研究了兩天搞出以下代碼,共享給各個同行,我只貼過程函數出來,百度是網上找來的,其實其他三個也有代碼,只是對應的網頁代碼改了,數據抓取不對,我重寫了這三個函數,尤其是alexa羅索....sub GoogleRank(strurl,id)
Set R=Server.CreateObject(Microsoft.XmlHttp)
R.Open GET,Url,False,,
R.SetRequestHeader Referer,Url
R.Send
str1=B2S(R.ResponseBody)
str1=replace(str1,,,)
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top=<font color=#FB5E3C>
str_bottom=</font>
reg.Pattern=&str_top&((.|/n)*?)&str_bottom&
Set matches = reg.execute(str1)
str1=
For Each match1 in matches
str1=match1.value
Next
Set matches = Nothing
Set reg = Nothing
str1=replace(replace(str1,str_top,),str_bottom,)
conn.execute(update webtable set pr='&str1&' where id=&id)
end sub
Sub Error(str)select case str
case 1
response.write <BR> 搜索引擎為空,請聯繫<a href=mailto:[email protected]>[email protected]</a>
case 2
response.write <BR> 站點名字為空,請聯繫<a href=mailto:[email protected]>[email protected]</a>
case 3
response.write <BR> 你輸入的搜索引擎本程序不支持,請聯繫<a href=mailto:[email protected]>[email protected]</a></body></html >
case 4
response.write <BR> 未知錯誤--抓取不到數據請<font color=blue><a href=javascript:location.reload();>刷新</a></font>重試</body></html>
end select
response.end
End SubSub google(strurl,id,all) url=http://www.google.cn/search?complete=1&hl=zh-CN&q=site%3A&strUrl&&meta=
str= getHTTPPage(url)
if str= then
conn.execute(update webtable set google='0' where id=&id)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true
str_top=<td align=right nowrap><font size=-1>
str_bottom=</font></td></tr></table>
reg.Pattern=&str_top&((.)*)&str_bottom&
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing
if instr(str,<html>)=1 then
str2=0
else
str=split(str,</b>)
str1=str(3)
str2=split(str1,<b>)(1)
end if
if str2= or len(str2)>200 then
conn.execute(update webtable set google='0' where id=&id)
else
conn.execute(update webtable set google='&str2&' where id=&id)
end if
end if
End SubSub baidu(str,id,all)'call print_do(baidu)if all=n then
url=http://www.baidu.com/s?wd=site%3A&str&&cl=3
else
strext=split(str,.)
url=http://www.baidu.com/s?wd=&strext(0)&&cl=3
end if
'response.Write(<br>baidu's url:&url)If IsObjInstalled(AspHTTP.Conn)=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
if str= then
Call Error(4)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true
str_top=<td align=right nowrap>
str_bottom=</td>
reg.Pattern=&str_top&((.|/n)*?)&str_bottom&
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing
Set reg = Nothingresponse.write <BR>
'response.write
if str= or len(str)>200 then
conn.execute(update webtable set baidu='0' where id=&id)
else
if instr(str,約)=0 then
keyw=頁
else
keyw=約
end if
str=Mid(str,(InStr(str,keyw)+1),(InStr(str,篇)-InStr(str,keyw)-1))
response.write str
conn.execute(update webtable set baidu='&replace(replace(str,,,), ,)&' where id=&id)
end if
end if
End SubSub alexa(strurl,id)
url=http://www.alexa.com/data/details/traffic_details?q=&url=&strurl
Set R=Server.CreateObject(Microsoft.XmlHttp)
R.Open GET,Url,False,,
R.SetRequestHeader Referer,Url
R.Send
str1=Bytes2bStr(R.ResponseBody)
str1=replace(str1,,,)
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top=<!--Did you know
str_bottom=</span><br>
reg.Pattern=&str_top&((.|/n)*?)&str_bottom&
Set matches = reg.execute(str1)
str1=
For Each match1 in matches
str1=str1&match1.value
Next
Set matches = Nothing
Set reg = Nothing
str1 = Replace(str1,<!--Did you know? Alexa offers this data programmatically. Visit http://aws.amazon.com/awis for more information about the Alexa Web Information Service.-->,)
if str1<> then
str1=replace(str1,<span class,)
str1=replace(str1,</span></span>,)
str1=replace(str1,,)
str1=replace(str1, ,)
str1=split(str1,<br>)(0)
if cstr(right(str1,7))=</span> then
str1=left(trim(str1),len(str1)-7)
end if
if isnumeric(str1) then
num=str1
else
csstxt=GetAlexaCss()
num=
str1=split(str1,</span>)
for i=0 to ubound(str1)
str2=str1(i)
if left(str2,instr(str2,=))<> then
num=num&left(str2,instr(str2,=)-1)
str2=right(str2,len(str2)-instr(str2,=))
end if
str3=split(str2,>)
for j=0 to ubound(str3)
next
if str3(0)<> then
if not isnumeric(str3(0)) then
if instr(csstxt,str3(0))=0 then
num=num&str3(1)
end if
end if
end if
next
end if
else
num=0
end if
sql=update webtable set alexa='&num&' where id=&id
response.Write(<script>alert('&sql&')</script>)
conn.execute(sql)
End SubFunction B2S(Str)
Dim O
Set O = Server.CreateObject(adodb.stream)
O.Type = 1
O.Mode =3
O.Open
O.Write Str
O.Position = 0
O.Type = 2
O.Charset = GB2312
B2S=O.ReadText
O.Close
Set O = nothing
End Function'獲取alexa的樣式表
Function GetAlexaCss()
url=http://client.alexa.com/common/css/scramble.css
If IsObjInstalled(AspHTTP.Conn)=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
GetAlexaCss=str
end functionSub print_do(str)
response.write <script>
response.write function HiddenLoad()
response.write {
response.write parent.do&str&.style.display='none';
response.write }
response.write </script>
response.write <body leftmargin=0 topmargin=0 marginwidth=0 marginheight=0 bgcolor=#f2f2f2 onload=HiddenLoad()>
end subFunction getHTTPPage(url)
on error resume next
dim http
set http=Server.createobject(Microsoft.XMLHTTP)
Http.open GET,url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=bytes2BSTR(Http.responseBody)
set http=nothing
if err.number<>0 then err.Clear
End functionFunction Bytes2bStr(vin)
Dim BytesStream,StringReturn
Set BytesStream = Server.CreateObject(ADODB.Stream)
BytesStream.Type = 2
BytesStream.Open
BytesStream.WriteText vin
BytesStream.Position = 0
BytesStream.Charset = GB2312
BytesStream.Position = 2
StringReturn =BytesStream.ReadText
BytesStream.close
Set BytesStream = Nothing
Bytes2bStr = StringReturn
End FunctionFunction getaspHTTPPage(url)
if url= then
exit function
end if
Set HttpObj = Server.CreateObject(AspHTTP.Conn)
'設置代理服務器,通過代理上網的用戶需要設置此選項
If ProxyIP=1 Then
HttpObj.Proxy=192.168.5.254:808
end if
HTTPObj.TimeOut = 45
HttpObj.Url = url
HttpObj.RequestMethod = GET
getaspHTTPPage = HttpObj.GetURL
set HttpObj=nothing
End functionFunction IsObjInstalled(strClassString)
On Error Resume Next
IsObjInstalled = False
Err = 0
Dim xTestObj
Set xTestObj = Server.CreateObject(strClassString) If 0 = Err Then
If AspHttpOpen=1 Then
IsObjInstalled = True
'Response.write 當前組件ASPHTTP
Else
IsObjInstalled = False
'Response.write 當前組件XMLHTTP
End If
Else
IsObjInstalled = False
'Response.write 當前組件XMLHTTP
End If Set xTestObj = Nothing
Err = 0
End Function