C# 获取网页中所有新闻内容 包括图片 该怎么弄
获取内容后,保存sql数据库,求助高手 /// <summary>/// 采集规则返回
/// </summary>
/// <param name="url">网址</param>
/// <param name="rules">正则</param>
/// <param name="title">标题_占位符</param>
/// <param name="titleid">标题ID_占位符</param>
/// <param name="content">内容_占位符</param>
/// <param name="image">图片_占位符</param>
/// <param name="imageid">图片ID_占位符</param>
/// <returns></returns>
public List<Dictionary<string, string>>Common_Rules(string url, string rules,string title,string titleid,string content,string image,string imageid)
{
List<Dictionary<string, string>> list = new List<Dictionary<string, string>>();
WebClient web = new WebClient();
StreamReader reader = new StreamReader(web.OpenRead(url),Encoding.UTF8);
String Content = reader.ReadToEnd();
MatchCollection mlist =Regex.Matches(Content,rules);
foreach (Match item in mlist)
{
Dictionary<string, string> result = new Dictionary<string, string>();
result.Add("title", item.Groups.Value);
result.Add("titleid", item.Groups.Value);
result.Add("content", item.Groups.Value);
result.Add("image", item.Groups.Value);
result.Add("imageid", item.Groups.Value);
list.Add(result);
}
return list;
}
这是我自己写的采集公共类 ,如果你已经接触采集的技术了 那你就能看懂这段代码,不懂在追问好了。
这个直接拿去用 注视
页:
[1]