/// <summary> /// 財政部mca /// http://www.mca.gov.cn/article/sj/xzqh/1980/ /// https://github.com/zzzprojects/html-agility-pack /// https://github.com/l ...
/// <summary> /// 財政部mca /// http://www.mca.gov.cn/article/sj/xzqh/1980/ /// https://github.com/zzzprojects/html-agility-pack /// https://github.com/linezero/HtmlAgilityPack /// </summary> public partial class Form1 : Form { int codecell = 2; int namecell = 3; int yearnmae = 2019; string tableNo = "table"; string trNo = "tr"; string tdthNo = "th|td"; DataTable McaData() { DataTable dt = new DataTable(); dt.Columns.Add("year", typeof(int)); //年份 dt.Columns.Add("website", typeof(string)); //財政部網址 dt.Columns.Add("codecell", typeof(int)); //行政區劃編碼在表格的第幾列 dt.Columns.Add("namecell", typeof(int)); //行政區劃名稱在表格的第幾列 dt.Columns.Add("tableNo", typeof(string)); //表格標識 dt.Columns.Add("trNo", typeof(string)); //行標識 dt.Columns.Add("tdthNo", typeof(string)); //列標識 dt.Rows.Add(2019, "http://www.mca.gov.cn/article/sj/xzqh/1980/2019/202002281436.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2018, "http://www.mca.gov.cn/article/sj/xzqh/1980/201903/201903011447.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2017, "http://www.mca.gov.cn/article/sj/xzqh/1980/201803/201803131454.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2016, "http://www.mca.gov.cn/article/sj/xzqh/1980/201705/201705311652.html", 2, 3, "table//tbody", "tr", "th|td");//TBODY 都是大寫 dt.Rows.Add(2015, "http://www.mca.gov.cn/article/sj/tjbz/a/2015/201706011127.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2014, "http://files2.mca.gov.cn/cws/201502/20150225163817214.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2013, "http://files2.mca.gov.cn/cws/201404/20140404125552372.htm", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2012, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201707271556.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2011, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201707271552.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2010, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220946.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2009, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220943.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2008, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220941.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2007, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220939.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2006, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220936.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2005, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220935.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2004, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220930.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2003, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220928.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2002, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220927.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2001, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220925.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(2000, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220923.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1999, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220921.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1998, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220918.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1997, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220916.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1996, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220914.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1995, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220913.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1994, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220911.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1993, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041023.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1992, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220910.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1991, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041020.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1990, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041018.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1989, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041017.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1988, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220903.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1987, "http://www.mca.gov.cn/article/sj/xzqh/1980/1980/201911180950.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1986, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220859.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1985, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220858.html", 2, 3, "table", "tr", "th|td"); dt.Rows.Add(1984, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220856.html", 1, 2, "table", "tr", "th|td"); dt.Rows.Add(1983, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708160821.html", 1, 2, "table", "tr", "th|td"); dt.Rows.Add(1982, "http://www.mca.gov.cn/article/sj/xzqh/1980/1980/201911180942.html", 1, 2, "table", "tr", "th|td"); dt.Rows.Add(1981, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041004.html", 1, 2, "table", "tr", "th|td"); dt.Rows.Add(1980, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708040959.html", 1, 2, "table", "tr", "th|td"); return dt; } /// <summary> /// /// </summary> public Form1() { InitializeComponent(); } /// <summary> /// /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void Form1_Load(object sender, EventArgs e) { this.comboBox1.DataSource = McaData(); this.comboBox1.DisplayMember = "year"; this.comboBox1.ValueMember = "website"; } /// <summary> /// 抓取數據 /// Geovin Du 塗聚文 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void button1_Click(object sender, EventArgs e) { try { string website = this.comboBox1.SelectedValue.ToString(); //codecell =(int)this.numericUpDown1.Value; //namecell = (int)this.numericUpDown2.Value; HtmlAgilityPack.HtmlWeb webClient = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = webClient.Load(website); this.richTextBox1.Text = doc.Text.ToLower(); //HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[2]/div[3]/div[2]/div[1]/div[1]/div[1]/div"); //foreach (HtmlNode node in nodes) //{ // Console.WriteLine(node.InnerText.Trim()); //} //nodes = null; yearnmae = int.Parse(this.comboBox1.Text); DataRow[] drsselect = McaData().Select("year="+yearnmae+""); for (int i = 0; i < drsselect.Length; i++) { codecell =int.Parse(drsselect[i]["codecell"].ToString()); namecell= int.Parse(drsselect[i]["namecell"].ToString()); tableNo = drsselect[i]["tableNo"].ToString(); trNo = drsselect[i]["trNo"].ToString(); tdthNo = drsselect[i]["tdthNo"].ToString(); } List<AreaInfo> list = new List<AreaInfo>(); foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//"+tableNo)) { //Console.WriteLine("Found: " + table.Id); AreaInfo info = null; foreach (HtmlNode rows in table.SelectNodes(trNo)) { info = new AreaInfo(); int cell = 1; foreach (HtmlNode cells in rows.SelectNodes(tdthNo)) { if(cell==codecell) { info.AreaCode = cells.InnerText.Trim().Replace(" ", "").Trim(); } if(cell==namecell) { info.AreaName = cells.InnerText.Trim().Replace(" ", "").Trim(); } cell++; } if(!string.IsNullOrEmpty(info.AreaCode)) { list.Add(info); } } } doc = null; webClient = null; this.bindingSource1.DataSource = list; this.bindingNavigator1.BindingSource = this.bindingSource1; this.dataGridView1.DataSource = this.bindingSource1; } catch(Exception ex) { ex.Message.ToString(); } }