1、讀取網路中html網頁內容,獲取網頁中元素body內的html,處理所有img元素的src屬性後以字元串返回 2、通過HtmlAgilityPack Html操作類庫將html格式的字元串載入為html文檔對象,再對html dom進行操作 持續更新中,敬請期待... ...
1、讀取網路中html網頁內容,獲取網頁中元素body內的html,處理所有img元素的src屬性後以字元串返回
if (l_sWenBenHtmlFtpPath.Substring(l_sWenBenHtmlFtpPath.LastIndexOf(".") + 1) == "html") { HtmlWeb htmlWeb = new HtmlWeb(); HtmlDocument htmlDoc = htmlWeb.Load(l_sWenBenHtmlFtpPath); HtmlNode htmlNode = htmlDoc.DocumentNode; HtmlNodeCollection nodes = htmlNode.SelectNodes("//body"); //使用xpath語法進行查詢 if (nodes != null) { foreach (HtmlNode bodyTag in nodes) { HtmlNodeCollection nodes2 = htmlNode.SelectNodes("//img"); //使用xpath語法進行查詢 if (nodes2 != null) { foreach (HtmlNode imgTag in nodes2) { string imgHttpPath = imgTag.Attributes["src"].Value; imgTag.Attributes["src"].Value = l_sWenBenHtmlFtpPath.Substring(0, l_sWenBenHtmlFtpPath.LastIndexOf("/") + 1) + imgHttpPath; } } l_sWenBenHtml = bodyTag.InnerHtml; } } }
2、通過HtmlAgilityPack Html操作類庫將html格式的字元串載入為html文檔對象,再對html dom進行操作
//1.解碼前臺提交的html字串 string sDecodeString = HttpUtility.HtmlDecode(HttpUtility.UrlDecode(sEncodeString)); //2.拼接成完整的html字串 sDecodeString = @"<!DOCTYPE html><html><head><meta http-equiv=""content-type"" content=""text/html;charset=UTF-8""/>" + @"</head><body><div>"
+ sDecodeString + @"</div></body></html>"; //3.處理html的img標簽的src屬性-C#的HTML DOM操作 HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(sDecodeString.Replace("\n", " ")); HtmlNode node = doc.DocumentNode; HtmlNodeCollection nodes = node.SelectNodes("//img"); //使用xpath語法進行查詢 if (nodes != null) //沒有img節點時出錯 { //處理html字元串中img標簽的src屬性 foreach (HtmlNode imgTag in nodes) { string imgHttpPath = imgTag.Attributes["src"].Value; imgHttpPath = imgHttpPath.Substring(imgHttpPath.LastIndexOf("/") + 1); imgTag.Attributes["src"].Value = imgHttpPath; } } //4.獲取處理後的html字元串 sHtmlString = node.OuterHtml; //處理img中src屬性後的html字元串
//5.將字元串存入html格式的文件中
//do something
持續完善更新中,敬請期待...