asp教程.net 火车票源信息抓取系统
一、系统功能:
1.每隔一定时间从网络抓取一次最新的票源信息;
2.支持根据关键字筛选票源信息;
3.支持抓取时间间隔设置;
4.支持票源网址链接;
二、运行环境:
1.net2.0框架及以上;
2.ie6.0及以上;
三、实现思路:
1.设置抓取的地址与解析的方式
public static list
getdefaultsites()
{
listsites = new list ();
sites = new list();
sites.add(new site()
{
name = "火车票网",
url = "http://www.huochep**i*ao.com/city/search.asp?leixing=%d7%aa%c8%c3&chufa=&daoda=",
regexpattern = @"· ",
encoding = encoding.default,
keys = new string[] { "卧" }
});
sites.add(new site()
{
name = "百姓网",
url = "http://beijing.*baix**ing.com/huochepiao/?%e5%8f%91%e8%bd%a6%e6%97%a5%e6%9c%9f=&%e8%bd%a6%e6%ac%a1=&%e5%87%ba%e5%8f%91%e5%9f%8e%e5%b8%82=%e5%8c%97%e4%ba%ac&%e5%88%b0%e8%be%be%e5%9f%8e%e5%b8%82=&wanted=1",
regexpattern = @""" >",
encoding = encoding.utf8,
domain = "http://beijing.ba*ixi**ng.com/",
keys = new string[] { "卧" }
});
sites.add(new site()
{
name = "赶集网",
url = "http://bj.*ga**nji.com/piao/",
regexpattern = @"",
encoding = encoding.utf8,
domain = "http://bj.ga**nj*i.com/",
keys = new string[] { "卧" }
});sites.add(new site()
{
name = "酷讯网",
url = "http://huoche.ku*x**un.cn/zhuanrang-beijing-wuhan.html",
regexpattern = @"(.*?)