package com.open1111.jsoup; import org.apache.http.HttpEntity;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.clien ...
package com.open1111.jsoup;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Demo02 {//
public static void main(String[] args) throws Exception{
CloseableHttpClient httpclient = HttpClients.createDefault(); // 創建httpclient實例
HttpGet httpget = new HttpGet("http://www.cnblogs.com/"); // 創建httpget實例
CloseableHttpResponse response = httpclient.execute(httpget); // 執行get請求
HttpEntity entity=response.getEntity(); // 獲取返回實體
String content=EntityUtils.toString(entity, "utf-8");//設置content編碼
response.close(); // 關閉流和釋放系統資源
Document doc=Jsoup.parse(content); // 解析網頁 得到文檔對象
Elements elements=doc.getElementsByTag("title"); // 獲取tag是title的所有DOM元素
Element element=elements.get(0); // 獲取第1個元素
String title=element.text(); // 返回元素的文本
System.out.println("網頁標題是:"+title);
Element element2=doc.getElementById("site_nav_top"); // 獲取id=site_nav_top的DOM元素
String navTop=element2.text(); // 返回元素的文本
System.out.println("口號:"+navTop);
Elements itemElements=doc.getElementsByClass("post_item"); // 根據樣式名稱來查詢DOM
System.out.println("=======輸出post_item==============");
for(Element e:itemElements){//for加強迴圈
System.out.println(e.html());
System.out.println("-------------");
}
Elements widthElements=doc.getElementsByAttribute("width"); // 根據屬性名稱來查詢DOM
System.out.println("=======輸出with的DOM==============");
for(Element e:widthElements){
System.out.println(e.toString());
System.out.println("-------------");
}
Elements targetElements=doc.getElementsByAttributeValue("target", "_blank");//根據屬性名和屬性值來獲取dom元素
System.out.println("=======輸出target-_blank的DOM==============");
for(Element e:targetElements){
System.out.println(e.toString());
System.out.println("-------------");
}
}
}