After studying for two days, I created the following code and shared it with each colleague. I only posted the process function. Baidu found it online. In fact, the other three also have codes, but the corresponding web page code has been changed. The data crawling is not correct. I replayed it. I wrote these three functions, especially alexa Rosso...sub GoogleRank(strurl,id)
Set R=Server.CreateObject(Microsoft.XmlHttp)
R.Open GET,Url,False,
R.SetRequestHeader Referer,Url
R.Send
str1=B2S(R.ResponseBody)
str1=replace(str1,,,)
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top=<font color=#FB5E3C>
str_bottom=</font>
reg.Pattern=&str_top&((.|/n)*?)&str_bottom&
Set matches = reg.execute(str1)
str1=
For Each match1 in matches
str1=match1.value
Next
Set matches = Nothing
Set reg = Nothing
str1=replace(replace(str1,str_top,),str_bottom,)
conn.execute(update webtable set pr='&str1&' where id=&id)
end sub
Sub Error(str)select case str
case 1
response.write <BR> The search engine is empty, please contact <a href=mailto:[email protected]>[email protected]</a>
case 2
response.write <BR> The site name is empty, please contact <a href=mailto:[email protected]>[email protected]</a>
case 3
response.write <BR> The search engine you entered is not supported by this program, please contact <a href=mailto:[email protected]>[email protected]</a></body></html >
case 4
response.write <BR> Unknown error - data cannot be crawled, please <font color=blue><a href=javascript:location.reload();>Refresh</a></font> try again </body></html>
end select
response.end
End SubSub google(strurl,id,all) url=http://www.google.cn/search?complete=1&hl=zh-CN&q=site%3A&strUrl&&meta=
str= getHTTPPage(url)
if str= then
conn.execute(update webtable set google='0' where id=&id)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true
str_top=<td align=right nowrap><font size=-1>
str_bottom=</font></td></tr></table>
reg.Pattern=&str_top&((.)*)&str_bottom&
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing
if instr(str,<html>)=1 then
str2=0
else
str=split(str,</b>)
str1=str(3)
str2=split(str1,<b>)(1)
end if
if str2= or len(str2)>200 then
conn.execute(update webtable set google='0' where id=&id)
else
conn.execute(update webtable set google='&str2&' where id=&id)
end if
end if
End SubSub baidu(str,id,all)'call print_do(baidu)if all=n then
url=http://www.baidu.com/s?wd=site%3A&str&&cl=3
else
strext=split(str,.)
url=http://www.baidu.com/s?wd=&strext(0)&&cl=3
end if
'response.Write(<br>baidu's url:&url)If IsObjInstalled(AspHTTP.Conn)=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
if str= then
Call Error(4)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true
str_top=<td align=right nowrap>
str_bottom=</td>
reg.Pattern=&str_top&((.|/n)*?)&str_bottom&
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing
Set reg = Nothingresponse.write <BR>
'response.write
if str= or len(str)>200 then
conn.execute(update webtable set baidu='0' where id=&id)
else
if instr(str,approximately)=0 then
keyw=page
else
keyw=approximate
end if
str=Mid(str,(InStr(str,keyw)+1),(InStr(str, article)-InStr(str,keyw)-1))
response.write str
conn.execute(update webtable set baidu='&replace(replace(str,,,),,)&' where id=&id)
end if
end if
End SubSub alexa(strurl,id)
url=http://www.alexa.com/data/details/traffic_details?q=&url=&strurl
Set R=Server.CreateObject(Microsoft.XmlHttp)
R.Open GET,Url,False,
R.SetRequestHeader Referer,Url
R.Send
str1=Bytes2bStr(R.ResponseBody)
str1=replace(str1,,,)
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top=<!--Did you know
str_bottom=</span><br>
reg.Pattern=&str_top&((.|/n)*?)&str_bottom&
Set matches = reg.execute(str1)
str1=
For Each match1 in matches
str1=str1&match1.value
Next
Set matches = Nothing
Set reg = Nothing
str1 = Replace(str1,<!--Did you know? Alexa offers this data programmatically. Visit http://aws.amazon.com/awis for more information about the Alexa Web Information Service.-->,)
if str1<> then
str1=replace(str1,<span class,)
str1=replace(str1,</span></span>,)
str1=replace(str1,,)
str1=replace(str1, ,)
str1=split(str1,<br>)(0)
if cstr(right(str1,7))=</span> then
str1=left(trim(str1),len(str1)-7)
end if
if isnumeric(str1) then
num=str1
else
csstxt=GetAlexaCss()
num=
str1=split(str1,</span>)
for i=0 to ubund(str1)
str2=str1(i)
if left(str2,instr(str2,=))<> then
num=num&left(str2,instr(str2,=)-1)
str2=right(str2,len(str2)-instr(str2,=))
end if
str3=split(str2,>)
for j=0 to ubund(str3)
next
if str3(0)<> then
if not isnumeric(str3(0)) then
if instr(csstxt,str3(0))=0 then
num=num&str3(1)
end if
end if
end if
next
end if
else
num=0
end if
sql=update webtable set alexa='&num&' where id=&id
response.Write(<script>alert('&sql&')</script>)
conn.execute(sql)
End SubFunction B2S(Str)
Dim O
Set O = Server.CreateObject(adodb.stream)
O.Type = 1
O.Mode =3
O.Open
O.Write Str
O.Position = 0
O.Type = 2
O.Charset = GB2312
B2S=O.ReadText
O.Close
Set O = nothing
End Function' Get Alexa's stylesheet
Function GetAlexaCss()
url=http://client.alexa.com/common/css/scramble.css
If IsObjInstalled(AspHTTP.Conn)=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
GetAlexaCss=str
end functionSub print_do(str)
response.write <script>
response.write function HiddenLoad()
response.write {
response.write parent.do&str&.style.display='none';
response.write }
response.write </script>
response.write <body leftmargin=0 topmargin=0 marginwidth=0 marginheight=0 bgcolor=#f2f2f2 onload=HiddenLoad()>
end subFunction getHTTPPage(url)
on error resume next
dim http
set http=Server.createobject(Microsoft.XMLHTTP)
Http.open GET,url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=bytes2BSTR(Http.responseBody)
set http=nothing
if err.number<>0 then err.Clear
End functionFunction Bytes2bStr(vin)
Dim BytesStream,StringReturn
Set BytesStream = Server.CreateObject(ADODB.Stream)
BytesStream.Type = 2
BytesStream.Open
BytesStream.WriteText vin
BytesStream.Position = 0
BytesStream.Charset = GB2312
BytesStream.Position = 2
StringReturn =BytesStream.ReadText
BytesStream.close
Set BytesStream = Nothing
Bytes2bStr = StringReturn
End FunctionFunction getaspHTTPPage(url)
if url= then
exit function
end if
Set HttpObj = Server.CreateObject(AspHTTP.Conn)
'Set up a proxy server, users who use the proxy to access the Internet need to set this option
If ProxyIP=1 Then
HttpObj.Proxy=192.168.5.254:808
end if
HTTPObj.TimeOut = 45
HttpObj.Url = url
HttpObj.RequestMethod = GET
getapHTTPPage = HttpObj.GetURL
set HttpObj=nothing
End functionFunction IsObjInstalled(strClassString)
On Error Resume Next
IsObjInstalled = False
Err = 0
Dim xTestObj
Set xTestObj = Server.CreateObject(strClassString) If 0 = Err Then
If AspHttpOpen=1 Then
IsObjInstalled = True
'Response.write Current component ASPHTTP
Else
IsObjInstalled = False
'Response.write Current component XMLHTTP
End If
Else
IsObjInstalled = False
'Response.write Current component XMLHTTP
End If Set xTestObj = Nothing
Err = 0
End Function