需求 某航空公司物流單信息查詢,是一個post請求。通過後臺模擬POST HTTP請求發現無法獲取頁面數據,通過查看航空公司網站後,發現網站使用避免CSRF攻擊機制,直接發揮40X錯誤。 關於CSRF 讀者自行百度 網站HTTP請求分析 Headers Form Data 在head里包含了cook ...
需求
某航空公司物流單信息查詢,是一個post請求。通過後臺模擬POST HTTP請求發現無法獲取頁面數據,通過查看航空公司網站後,發現網站使用避免CSRF攻擊機制,直接發揮40X錯誤。
關於CSRF
讀者自行百度
網站HTTP請求分析
Headers
Form Data
在head里包含了cookie 與 x-csrf-token formdata 里包含了_csrf (與head里的值是一樣的).
這裡通過查看該網站的JS源代碼發現_csrf 來自於網頁的head標簽里
猜測cookie與 x-csrf-token是有一定的有效期,並且他們共同作用來防禦CSRF攻擊。
解決方案
1,首先請求一下該航空公司的網站,獲取cookie與_csrf
2,然後C# 模擬http分別在head和formdata裡加入如上參數,發起請求
代碼
public class CSRFToken { string cookie;//用於請求的站點的cookie List<string> csrfs;//用於請求站點的token的key 以及 value public CSRFToken(string url) { //校驗傳輸安全 if (!string.IsNullOrWhiteSpace(url)) { try { //設置請求的頭信息.獲取url的host var _http = new HttpHelper(url); string cookie; string html = _http.CreateGetHttpResponseForPC(out cookie); this.cookie = cookie; string headRegex = @"<meta name=""_csrf.*"" content="".*""/>"; MatchCollection matches = Regex.Matches(html, headRegex); Regex re = new Regex("(?<=content=\").*?(?=\")", RegexOptions.None); csrfs = new List<string>(); foreach (Match math in matches) { MatchCollection mc = re.Matches(math.Value); foreach (Match ma in mc) { csrfs.Add(ma.Value); } } } catch (Exception e) { } } } public String getCookie() { return cookie; } public void setCookie(String cookie) { this.cookie = cookie; } public List<string> getCsrf_token() { return csrfs; } }
httpHelper
public string CreatePostHttpResponse(IDictionary<string, string> headers, IDictionary<string, string> parameters) { HttpWebRequest request = null; //HTTPSQ請求 UTF8Encoding encoding = new System.Text.UTF8Encoding(); ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult); request = WebRequest.Create(_baseIPAddress) as HttpWebRequest; request.ProtocolVersion = HttpVersion.Version10; ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11; request.Method = "POST"; request.ContentType = "application/x-www-form-urlencoded"; // request.ContentType = "application/json"; request.UserAgent = DefaultUserAgent; //request.Headers.Add("X-CSRF-TOKEN", "bc0cc533-60cc-484a-952d-0b4c1a95672c"); //request.Referer = "https://www.asianacargo.com/tracking/viewTraceAirWaybill.do"; //request.Headers.Add("Origin", "https://www.asianacargo.com"); //request.Headers.Add("Cookie", "JSESSIONID=HP21d2Dq5FoSlG4Fyw4slWwHb0-Sl1CG6jGtj7HE41e5f4aN_R1p!-435435446!117330181"); //request.Host = "www.asianacargo.com"; if (!(headers == null || headers.Count == 0)) { foreach (string key in headers.Keys) { request.Headers.Add(key, headers[key]); } } //如果需要POST數據 if (!(parameters == null || parameters.Count == 0)) { StringBuilder buffer = new StringBuilder(); int i = 0; foreach (string key in parameters.Keys) { if (i > 0) { buffer.AppendFormat("&{0}={1}", key, parameters[key]); } else { buffer.AppendFormat("{0}={1}", key, parameters[key]); } i++; } byte[] data = encoding.GetBytes(buffer.ToString()); using (Stream stream = request.GetRequestStream()) { stream.Write(data, 0, data.Length); } } HttpWebResponse response; try { //獲得響應流 response = (HttpWebResponse)request.GetResponse(); Stream s = response.GetResponseStream(); StreamReader readStream = new StreamReader(s, Encoding.UTF8); string SourceCode = readStream.ReadToEnd(); response.Close(); readStream.Close(); return SourceCode; } catch (WebException ex) { response = ex.Response as HttpWebResponse; return null; } } public string CreateGetHttpResponse(out string cookie) { HttpWebRequest request = null; //HTTPSQ請求 UTF8Encoding encoding = new System.Text.UTF8Encoding(); ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult); request = WebRequest.Create(_baseIPAddress) as HttpWebRequest; request.ProtocolVersion = HttpVersion.Version10; ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11; request.Method = "GET"; request.ContentType = "application/x-www-form-urlencoded"; request.UserAgent = DefaultUserAgent; HttpWebResponse response; try { //獲得響應流 response = (HttpWebResponse)request.GetResponse(); cookie = response.Headers["Set-Cookie"]; Stream s = response.GetResponseStream(); StreamReader readStream = new StreamReader(s, Encoding.UTF8); string SourceCode = readStream.ReadToEnd(); response.Close(); readStream.Close(); return SourceCode; } catch (WebException ex) { response = ex.Response as HttpWebResponse; cookie = ""; return null; } }
爬取程式
爬取結果
瀏覽器結果
註意事項與結論
1,不同的網站,獲取cstf的方式不一樣,無論怎麼做,只要信息傳到前臺我們都可以有相應的方法來獲取。
2,請求時候的http驗證可能不一樣,測試的某航空公司物流信息的時候,http請求的安全協議是tis12。
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11; 還有其他參數比如UserAgent後臺可能也會驗證
3,基於如上航空公司,發現它的cookie和cstf_token一定時間內不會改變,那麼當實際爬取的時候可以考慮緩存cookie以及cstf_token,只有當請求失敗的時候,才重新獲取