aspx采集alexa方法整理
January 22nd, 2010
刚学了php采集的技巧,正好手头有个aspx相关的源码,就顺便学习一下。
首先提个aspx的小技巧,vs里面默认的aspx页面是设计和编程分开的,两者分别在.aspx和.cs页面,而它们关联的前提是在.aspx页面里添加了下面代码:
<%@ Page Language="C#" AutoEventWireup="true" CodeBehind="Default.aspx.cs" Inherits="web._Default" %>
如果要把代码直接写到aspx页面里,则头部代码改为
<%@ Page Language="C#" AutoEventWireup="true" %>
<%@ Import Namespace="System.IO" %>
<%@ Import Namespace="System.Net" %>
<%@ Import Namespace="System.IO" %>
<%@ Import Namespace="System.Net" %>
言归正传,下面整理aspx采集alexa值的方法,这里有个获取页面源码的小函数GetHtml
public HttpWebRequest request;
public HttpWebResponse response;
public string GetHtml(string url, string encode)
{
string str = null;
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "Get";
request.ContentType = "application/x-www-form-urlencoded";
response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
StreamReader sr = null;
switch (encode)
{
case "UTF-8":
sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
break;
case "GBK":
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("GBK"));
break;
case "GB2312":
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("GB2312"));
break;
default:
sr = new StreamReader(response.GetResponseStream(), Encoding.Default);
break;
}
str = sr.ReadToEnd();
}
return str;
}
public HttpWebResponse response;
public string GetHtml(string url, string encode)
{
string str = null;
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "Get";
request.ContentType = "application/x-www-form-urlencoded";
response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
StreamReader sr = null;
switch (encode)
{
case "UTF-8":
sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
break;
case "GBK":
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("GBK"));
break;
case "GB2312":
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("GB2312"));
break;
default:
sr = new StreamReader(response.GetResponseStream(), Encoding.Default);
break;
}
str = sr.ReadToEnd();
}
return str;
}
调用这个函数
protected void Page_Load(object sender, EventArgs e)
{
string url= Request.QueryString["url"];
string html=GetHtml("http://www.alexa.com/siteinfo/"+url,"UTF-8");
string regex = @"style=""margin-bottom:-2px;""/> (?<alexa>[^<]*)</a>";//用正则过滤得到需要的内容alexa值
MatchCollection mc = Regex.Matches(html, regex, RegexOptions.IgnoreCase);//获取正则匹配的内容
string alexa = null;
foreach (Match m in mc)
{
alexa = m.Groups["alexa"].Value.ToString();
}
divshow.InnerHtml = alexa;//把alexa值赋值给divshow
}
{
string url= Request.QueryString["url"];
string html=GetHtml("http://www.alexa.com/siteinfo/"+url,"UTF-8");
string regex = @"style=""margin-bottom:-2px;""/> (?<alexa>[^<]*)</a>";//用正则过滤得到需要的内容alexa值
MatchCollection mc = Regex.Matches(html, regex, RegexOptions.IgnoreCase);//获取正则匹配的内容
string alexa = null;
foreach (Match m in mc)
{
alexa = m.Groups["alexa"].Value.ToString();
}
divshow.InnerHtml = alexa;//把alexa值赋值给divshow
}
页面里加个显示控件
<body>
<form id="form1" runat="server">
<div runat="server" id="divshow">
</div>
</form>
</body>
最终效果浏览:http://localhost/alexa.aspx?url=baizoo.cn
<form id="form1" runat="server">
<div runat="server" id="divshow">
</div>
</form>
</body>
最终效果浏览:http://localhost/alexa.aspx?url=baizoo.cn

Recent Comments