Home > 技术爱好 > ASP获取网站alexa排名的源码

ASP获取网站alexa排名的源码

February 16th, 2009

主要是用MSXML2.XMLHTTP获取页面内容的方式采集到alexa的值

如本站的真实alexa获取地址为:http://www.alexa.com/data/details/traffic_details/baizoo.cn

具体代码如下:

<%
 
'获取主域名
Function getDomainUrl(url)
tempurl=replace(url,"<a rel="external" href="http://" target="_blank">http://</a>","")
if instr(tempurl,"/")&gt;0 then
  
tempurl=left(tempurl,instr(tempurl,"/")-1)
end If
getDomainurl=tempurl
End Function
 
'返回页面源代码内容,<
a rel="external" href="http://www.baizoo.cn/article.asp?id=322" target="_blank">MSXML2.XMLHTTP手册下载</a>
Function GetHttpPage(HttpUrl)
   If IsNull(HttpUrl)=True or Len(HttpUrl)
&lt;18 or HttpUrl="$False$" Then
      GetHttpPage="$False$"
      Exit Function
   End If
   Dim Http
   Set Http=server.createobject("MSXML2.XMLHTTP")
   Http.open "GET",HttpUrl,False
   Http.Send()
   If Http.Readystate
&lt;&gt;4 then
      Set Http=Nothing
      GetHttpPage="$False$"
      Exit function
   End if
   GetHTTPPage=Http.responseText
   Set Http=Nothing
   If Err.number
&lt;&gt;0 then
      Err.Clear
   End If
End Function
 
'==================================================
'函数名:ScriptHtml
'作  用:过滤html标记
'参  数:ConStr ------ 要过滤的字符串
'   TagName ------要过滤的标签
'   FType 1表示过滤左边标签  2表示过滤左右标签及中间的值  3表示过滤左边标签和右边标签,保留内容。
'==================================================
Function ScriptHtml(Byval ConStr,TagName,FType,includestr)
    Dim Re
    Set Re=new RegExp
    Re.IgnoreCase =true
    Re.Global=True
    Select Case FType
    Case 1
       Re.Pattern="
&lt;" &amp; TagName &amp; "([^&gt;])*("&amp;includestr&amp;"){1,}([^&gt;])*&gt;"
       ConStr=Re.Replace(ConStr,"")
    Case 2
       Re.Pattern="
&lt;" &amp; TagName &amp; "([^&gt;])*("&amp;includestr&amp;"){1,}([^&gt;])*&gt;.*?&lt;/" &amp; TagName &amp; "([^&gt;])*&gt;"
    'response.write constr
&amp;"&lt;br&gt;"
       ConStr=Re.Replace(ConStr,"")
    'response.write server.htmlencode(constr)
&amp;"&lt;br&gt;"
    Case 3
  Re.Pattern="
&lt;" &amp; TagName &amp; "([^&gt;])*("&amp;includestr&amp;"){1,}([^&gt;])*&gt;"
       ConStr=Re.Replace(ConStr,"")
       Re.Pattern="
&lt;/" &amp; TagName &amp; "([^&gt;])*&gt;"
       ConStr=Re.Replace(ConStr,"")
    End Select
    ScriptHtml=ConStr
    Set Re=Nothing
End Function
 
'==================================================
'函数名:GetBody
'作  用:截取页面源码字符串
'参  数:ConStr ------将要截取的字符串
'参  数:StartStr ------开始字符串
'参  数:OverStr ------结束字符串
'参  数:IncluL ------是否包含StartStr
'参  数:IncluR ------是否包含OverStr
'==================================================
Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
   If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True or StartStr="" or IsNull(StartStr)=True or OverStr="" or IsNull(OverStr)=True Then
      GetBody="$False$"
      Exit Function
   End If
   Dim ConStrTemp
   Dim Start,Over
   ConStrTemp=Lcase(ConStr)
   StartStr=Lcase(StartStr)
   OverStr=Lcase(OverStr)
   Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
   'response.write Start
&amp;"&lt;br&gt;"&amp;IncluL&amp;"&lt;br&gt;"
   'response.end
   If Start
&lt;=0 then
      GetBody="$False$"
      Exit Function
   Else
      If IncluL=False Then
         Start=Start+LenB(StartStr)
      End If
   End If
   Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
   'response.write Over
   'response.end
   'response.write Start
&amp;"  "&amp;Over&amp;"  "&amp;Over-Start
   'response.end
   If Over
&lt;=0 or Over&lt;=Start then
      GetBody="$False$"
      Exit Function
   Else
      If IncluR=True Then
         Over=Over+LenB(OverStr)
      End If
   End If
  
   GetBody=MidB(ConStr,Start,Over-Start)
   'response.write getBody
   'response.end
End Function
 
'==================================================
'函数名:GetArray
'作  用:提取链接地址,以$Array$分隔
'参  数:ConStr ------提取地址的原字符
'参  数:StartStr ------开始字符串
'参  数:OverStr ------结束字符串
'参  数:IncluL ------是否包含StartStr
'参  数:IncluR ------是否包含OverStr
'==================================================
Function GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR)
   If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True or StartStr="" or OverStr="" or  IsNull(StartStr)=True or IsNull(OverStr)=True Then
      GetArray="$False$"
      Exit Function
   End If
   Dim TempStr,TempStr2,objRegExp,Matches,Match
   TempStr=""
   Set objRegExp = New Regexp
   objRegExp.IgnoreCase = True
   objRegExp.Global = True
   objRegExp.Pattern = "("
&amp;StartStr&amp;").+?("&amp;OverStr&amp;")"
   Set Matches =objRegExp.Execute(ConStr)
   For Each Match in Matches
      TempStr=TempStr
&amp; "$Array$" &amp; Match.Value
   Next
   Set Matches=nothing
  
   If TempStr="" Then
      GetArray="$False$"
      Exit Function
   End If
   TempStr=Right(TempStr,Len(TempStr)-7)
   If IncluL=False then
      objRegExp.Pattern =StartStr
      TempStr=objRegExp.Replace(TempStr,"")
   End if
   If IncluR=False then
      objRegExp.Pattern =OverStr
      TempStr=objRegExp.Replace(TempStr,"")
   End if
   Set objRegExp=nothing
   Set Matches=nothing
  
   If TempStr="" then
      GetArray="$False$"
   Else
      GetArray=TempStr
   End if
End Function
 
Function getAlexaRank(weburl)
tempurl=getDomainUrl(weburl)
'读取
<a rel="external" href="http://client.alexa.com/common/css/scramble.css" target="_blank">http://client.alexa.com/common/css/scramble.css</a>中的数据
alexacss="
<a rel="external" href="http://" target="_blank">http://</a>client.alexa.com/common/css/scramble.css"
strAlexaCss=GetHttpPage(alexacss)
'response.write strAlexaCss
'response.end
alexarankqueryurl="
<a rel="external" href="http://" target="_blank">http://</a>www.alexa.com/data/details/traffic_details/"&amp;tempurl
 
strAlexaContent=GetHttpPage(alexarankqueryurl)
 
rankcontent=getBody(strAlexaContent,"Information Service.--
&gt;","&lt;!-- google_ad_section_end(name=default) --&gt;",false,false)
'注:其实没必要获取span的class并替换,后面对span的处理部分可直接去除
'获取其中的span的class
strspan=GetArray(rankcontent,"
&lt;span class=""","""",false,false)
response.write rankcontent
&amp;"&lt;br&gt;"
response.write strspan
&amp;"&lt;br&gt;"
'response.end
If strspan
&lt;&gt;"$False$" Then
  aspan=split(strspan,"$Array$")
  
  For i=0 To UBound(aspan)
   'response.write "."
&amp;aspan(i)
   '判定aspan(i)即span的class是否在alexacss中存在,如果存在,则需要将这个span和span中的数据去掉。
   If InStr(strAlexaCss,"."
&amp;aspan(i))&gt;=1 Then
    'response.write aspan(i)
&amp;"&lt;br&gt;"
    'response.end
    '表示属性为none.需要替换掉。
    rankcontent=ScriptHtml(rankcontent,"span",2,aspan(i))
   Else
    rankcontent=ScriptHtml(rankcontent,"span",1,aspan(i))
   End if
  Next
  '替换上面少去掉的右边的span标签。
  rankcontent=Replace(rankcontent,"
&lt;/span&gt;","")
 
  
End If
If rankcontent="$False$" Then
  rankcontent="No Data"
End if
getAlexaRank=Replace(rankcontent,",","")
 
End Function
 
url=request.querystring("url")
%
&gt;
 
&lt;form name="alexaform" method=get&gt;
输入网址:
&lt;input type="" name="url" value="&lt;%=url%&gt;" size=40&gt; &lt;input type="submit" value="查 询"&gt;
&lt;/form&gt;
 
&lt;%
If url
&lt;&gt;"" Then
 
response.write "您的网站在ALEXA的排名为:"
response.flush
rank=getAlexaRank(url)
response.write rank
End if

baizoo 技术爱好 , ,

  1. No comments yet.
  1. No trackbacks yet.