asp.net 火车票源信息抓取系统

作者:袖梨 2022-06-25

asp教程.net 火车票源信息抓取系统

一、系统功能:
        1.每隔一定时间从网络抓取一次最新的票源信息;

        2.支持根据关键字筛选票源信息;

        3.支持抓取时间间隔设置;

        4.支持票源网址链接;

  二、运行环境:
        1.net2.0框架及以上;

        2.ie6.0及以上;

  三、实现思路:
        1.设置抓取的地址与解析的方式

 

public static list getdefaultsites() 

    list sites = new list(); 
    sites = new list(); 
    sites.add(new site() 
    { 
        name = "火车票网", 
        url = "http://www.huochepiao.com/city/search.asp?leixing=%d7%aa%c8%c3&chufa=&daoda=", 
        regexpattern = @"· (.*?)", 
        encoding = encoding.default, 
        keys = new string[] { "卧" } 
    }); 
    sites.add(new site() 
    { 
        name = "百姓网", 
        url = "http://beijing.baixing.com/huochepiao/?%e5%8f%91%e8%bd%a6%e6%97%a5%e6%9c%9f=&%e8%bd%a6%e6%ac%a1=&%e5%87%ba%e5%8f%91%e5%9f%8e%e5%b8%82=%e5%8c%97%e4%ba%ac&%e5%88%b0%e8%be%be%e5%9f%8e%e5%b8%82=&wanted=1", 
        regexpattern = @""" >(.*?)", 
        encoding = encoding.utf8, 
        domain = "http://beijing.baixing.com/", 
        keys = new string[] { "卧" } 
    }); 
    sites.add(new site() 
    { 
        name = "赶集网", 
        url = "http://bj.ganji.com/piao/", 
        regexpattern = @"

(.*?)
", 
        encoding = encoding.utf8, 
        domain = "http://bj.ganji.com/", 
        keys = new string[] { "卧" } 
    }); 

    sites.add(new site() 
    { 
        name = "酷讯网", 
        url = "http://huoche.kuxun.cn/zhuanrang-beijing-wuhan.html", 
        regexpattern = @"

(.*?)
", 
        encoding = encoding.utf8, 
        domain = "", 
        ischange = "yes" 
    }); 

    return sites; 

抓取网页信息

public string getnetstring(string url, encoding codetpye) 

    string str = ""; 
    try 
    { 
        webclient  client = new webclient(); 
        byte[] pagedata = client.downloaddata(url); 
        str = codetpye.getstring(pagedata); 
    } 
    catch 
    { 
    } 
    return str ; 

解析票源信息

public class clsnetinfoparseserver 

    private static ilist lslist = new list(); 
    public void clearls() 
    { 
        lslist = new list(); 
    } 
    private bool ishas(string url) 
    { 
        foreach (var item in lslist) 
        { 
            if (item.url == url) 
            { 
                return true; 
            } 
        } 
        return false; 
    } 
    public ilist donetinfoparse(string strnetinfo, site site, string[] keys) 
    { 
        ilist list = new list(); 
        matchcollection mc = regex.matches(strnetinfo, site.regexpattern); 
        foreach (match m in mc) 
        { 
            if (m.success) 
            { 
                getresult r = new getresult(); 
                if (!string.isnullorempty(site.ischange)) 
                { 
                    r.content = site.domain + m.grou
ps教程[1].value.trim(); 
                    r.url = m.groups[2].value.trim(); 
                } 
                else 
                { 
                    r.url = site.domain + m.groups[1].value.trim(); 
                    r.content = m.groups[2].value.trim(); 
                } 
                if (!ishas( r.url)) 
                { 
                    bool iscontainkey = false; 
                    if (keys != null && keys.length > 0) 
                    { 
                        foreach (string key in keys) 
                        { 
                            if (r.content.contains(key)) 
                            { 
                                iscontainkey = true; 
                                break; 
                            } 
                        } 
                    } 
                    else 
                    { 
                        iscontainkey = true; 
                    } 
                    if (!iscontainkey) 
                        continue; 
                    r.getdatetime = datetime.now.tostring(); 
                    r.name = site.name; 
                    lslist.add(r); 
                    list.add(r); 
                } 
            } 
        } 
        return list; 
    } 

相关文章

精彩推荐