最近在學python學了簡單的從網上抓取圖片:剛好做一個C#版本的: 下麵貼代碼: using System;using System.IO;using System.Collections.Generic;using static System.Console;using System.Text; ...
最近在學python學了簡單的從網上抓取圖片:剛好做一個C#版本的:
下麵貼代碼:
using System;
using System.IO;
using System.Collections.Generic;
using static System.Console;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
namespace Gaofajin.Net
{
public class ImageReptiles
{
public string path { get; set; } = "Images";//圖片主目錄
List<ImgInfo> urls = new List<ImgInfo>();存儲用戶定義的Url列表
public delegate void UrlOverEventHandler(string msg);//處理完成
public event UrlOverEventHandler urlOver;
public delegate void OnErrorEventHandler(string errmsg);//發送錯誤
public event OnErrorEventHandler onError;
public struct ImgInfo//url及圖片存儲子目錄
{
public string Path { get; set; }
public string Url { get; set; }
}
public List<ImgInfo> Urls{ get { return urls; } }
public void AddUrl(string url,string path)//添加url
{
urls.Add(new ImgInfo() { Url = url,Path=path });
}
public void AddUrl(string url)
{
urls.Add(new ImgInfo() { Url = url});
}
public void StartGetImage()//調用此方法開始抓取圖片
{
if (urls?.Count <= 0)
{
onError?.Invoke($"傳入Url集合為空,請調用{nameof(AddUrl)}方法傳入url地址!");
}
urlOver?.Invoke("開始抓取圖片,請稍後..........");
foreach (ImgInfo url in urls)
{
string html = GetHtml(url.Url);
List<string> list = GetImgUrlList(html);
urlOver?.Invoke($"url:{url.Url}"+SaveImg(list,url.Path));
}
urlOver?.Invoke("全部操作完成!");
}
string GetHtml(string uri)//請求指定url取得返回html數據
{
Stream rsp = null;
StreamReader sr=null;
try
{
WebRequest http = WebRequest.Create(uri);
rsp = http.GetResponse().GetResponseStream();
sr = new StreamReader(rsp, Encoding.UTF8);
return "成功:" + sr.ReadToEnd();
}
catch (Exception ex)
{
return "失敗:" + ex.Message;
}
finally
{
sr?.Close();
rsp?.Close();
}
}
List<string> GetImgUrlList(string html)//從返回html數據中分析提取圖片地址
{
if (html?.Substring(0, 2) != "成功")
{
return null;
}
List<string> list = new List<string>();
MatchCollection mc = Regex.Matches(html, @"[A-Za-z]{4,5}://[^?!\s]*\.jpg", RegexOptions.Multiline);
foreach (Match m in mc)
{
list.Add(m.Groups[0].Value);
}
return list;
}
String SaveImg(List<string> list,string subpath)//保存圖片到本地
{
if (list?.Count <= 0)
{
return "未解析到圖片地址!";
}
string dic = path + "\\" + subpath;
//檢查存儲路徑
if (!Directory.Exists(dic))
{
Directory.CreateDirectory(dic);
}
int s = 0, f = 0;
string msg = "一共抓到{0}個圖片地址,成功下載{1}張圖片,下載失敗{2}張,圖片保存路徑{3}";
foreach (string url in list)
{
//取文件名
string name = url.Substring(url.LastIndexOf('/') + 1, url.Length - url.LastIndexOf('/') -5);
WebClient wc = new WebClient();
try
{
wc.DownloadFile(url, dic+"\\"+ name+".jpg");
s++;
urlOver?.Invoke($"從{url}抓取圖片{ name + ".jpg"}成功!");
}
catch
{
f++;
urlOver?.Invoke($"從{url}抓取圖片{name+".jpg"}失敗!");
}
finally { wc.Dispose(); }
}
return string.Format(msg, list.Count, s, f, dic);
}
}
}