1 #undef DEBUG 2 using Microsoft.Win32; 3 using Newtonsoft.Json; 4 using System; 5 using System.Collections.Generic; 6 using System.Collections.Specialized; 7 using System.Diagnostics; 8 using System.Diagnostics.Contracts; 9 using System.Drawing; 10 using System.IO; 11 using System.Linq; 12 using System.Net; 13 using System.Runtime.InteropServices; 14 using System.Text; 15 using System.Threading; 16 using System.Threading.Tasks; 17 using System.Windows.Forms; 18 19 namespace AnfleCrawler.Common 20 { 21 /// <summary> 22 /// Chromium / CasperJS + PhantomJS 23 /// http://pinvoke.net/index.aspx 24 /// </summary> 25 public sealed partial class HttpBrowser : IHttpClient 26 { 27 #region NestedTypes 28 [Serializable] 29 public class AjaxBlockEntity 30 { 31 internal const string AjaxBlock = "_AjaxBlock"; 32 public string ID { get; set; } 33 public string Text { get; set; } 34 public bool IsEvent { get; set; } 35 } 36 public class AjaxEventEntity : MarshalByRefObject 37 { 38 public string ListenerSelector { get; set; } 39 public bool EntryCall { get; set; } 40 public Action<string> FinalCallback { get; set; } 41 } 42 43 [ComVisible(true)] 44 public sealed class STAContext : Disposable 45 { 46 #region Fields 47 public volatile bool IsRedirect; 48 //internal MessageLoopApartment _Apartment; 49 private SynchronizedCollection<Tuple<HtmlElement, EventHandler>> _releaseSet; 50 private AutoResetEvent _sendReceiveWaiter; 51 private CountdownEvent _ajaxWaiter; 52 private System.Threading.Timer _lazyTimer; 53 54 internal volatile bool DoInvokeHtml; 55 private volatile string _outerHtml; 56 #endregion 57 58 #region Properties 59 public Uri RequestUrl { get; private set; } 60 public HttpRequestContent RequestContent { get; private set; } 61 internal AutoResetEvent WaitHandle { get; set; } 62 63 internal AutoResetEvent SendReceiveWaiter 64 { 65 get 66 { 67 if (_sendReceiveWaiter == null) 68 { 69 _sendReceiveWaiter = new AutoResetEvent(false); 70 } 71 return _sendReceiveWaiter; 72 } 73 } 74 internal AjaxBlockEntity[] AjaxBlocks { get; private set; } 75 internal CountdownEvent AjaxWaiter 76 { 77 get 78 { 79 if (_ajaxWaiter == null) 80 { 81 _ajaxWaiter = new CountdownEvent(1); 82 } 83 return _ajaxWaiter; 84 } 85 } 86 internal volatile bool IsProcessEvent; 87 internal AjaxEventEntity AjaxEvent { get; set; } 88 89 internal string OuterHtml 90 { 91 get 92 { 93 DoInvokeHtml = true; 94 return _outerHtml; 95 } 96 set 97 { 98 _outerHtml = value; 99 } 100 } 101 #endregion 102 103 #region Constructor 104 internal STAContext(Uri url, HttpRequestContent content) 105 { 106 this.RequestUrl = url; 107 this.RequestContent = content; 108 string ablock; 109 if (this.RequestContent != null && this.RequestContent.Form != null) 110 { 111 if (!string.IsNullOrEmpty(ablock = this.RequestContent.Form.Get(AjaxBlockEntity.AjaxBlock))) 112 { 113 this.AjaxBlocks = JsonConvert.DeserializeObject<AjaxBlockEntity[]>(ablock); 114 this.RequestContent.Form.Remove(AjaxBlockEntity.AjaxBlock); 115 } 116 } 117 DoInvokeHtml = true; 118 } 119 120 protected override void DisposeInternal(bool disposing) 121 { 122 if (disposing) 123 { 124 //if (_Apartment != null) 125 //{ 126 // _Apartment.Dispose(); 127 // _Apartment = null; 128 //} 129 if (_lazyTimer != null) 130 { 131 _lazyTimer.Dispose(); 132 _lazyTimer = null; 133 } 134 if (this.WaitHandle != null) 135 { 136 this.WaitHandle.Dispose(); 137 this.WaitHandle = null; 138 } 139 140 DisposeObject(_sendReceiveWaiter); 141 DisposeObject(_ajaxWaiter); 142 } 143 } 144 #endregion 145 146 #region Methods 147 public void SetHtml(string html) 148 { 149 _outerHtml = html; 150 DoInvokeHtml = false; 151 } 152 153 internal void RegisterLazyLoad(Action<object> func, object state) 154 { 155 if (_lazyTimer != null) 156 { 157 return; 158 } 159 _lazyTimer = new System.Threading.Timer(x => STA_Run(func, x, this), state, 2000, Timeout.Infinite); 160 } 161 /// <summary> 162 /// 另种思路,在每次加载完毕后delay 163 /// </summary> 164 internal void DelayLazyLoad() 165 { 166 if (_lazyTimer == null) 167 { 168 return; 169 } 170 _lazyTimer.Change(2000, Timeout.Infinite); 171 } 172 173 /// <summary> 174 /// STA 175 /// </summary> 176 /// <param name="node"></param> 177 /// <param name="e"></param> 178 internal void AjaxMark(HtmlElement node, EventHandler e) 179 { 180 if (_releaseSet == null) 181 { 182 _releaseSet = new SynchronizedCollection<Tuple<HtmlElement, EventHandler>>(); 183 } 184 var q = from t in _releaseSet 185 where t.Item1 == node 186 select t; 187 if (q.Any()) 188 { 189 return; 190 } 191 _releaseSet.Add(Tuple.Create(node, e)); 192 node.AttachEventHandler("onpropertychange", e); 193 } 194 195 /// <summary> 196 /// STA 197 /// </summary> 198 internal void AjaxUnmarks() 199 { 200 if (_releaseSet.IsNullOrEmpty()) 201 { 202 return; 203 } 204 foreach (var item in _releaseSet) 205 { 206 var node = item.Item1; 207 node.DetachEventHandler("onpropertychange", item.Item2); 208 } 209 _releaseSet = null; 210 } 211 212 internal void _ReleaseMemory() 213 { 214 return; 215 #if !DEBUG 216 var proc = Process.GetCurrentProcess(); 217 //128M 218 if (proc.PrivateMemorySize64 <= 134217728L) 219 { 220 return; 221 } 222 base.ReleaseMemory(); 223 #endif 224 } 225 #endregion 226 } 227 #endregion 228 229 #region Static 230 public const string Callback_Snapshot = "_xSnapshot"; 231 232 static HttpBrowser() 233 { 234 SetBrowserFeatureControl(); 235 //NativeMethods.SetErrorMode(NativeMethods.ErrorModes.SYSTEM_DEFAULT); 236 NativeMethods.SetErrorMode(NativeMethods.ErrorModes.SEM_FAILCRITICALERRORS | NativeMethods.ErrorModes.SEM_NOGPFAULTERRORBOX | NativeMethods.ErrorModes.SEM_NOOPENFILEERRORBOX); 237 } 238 239 /// <summary> 240 /// http://msdn.microsoft.com/en-us/library/ee330720(v=vs.85).aspx 241 /// </summary> 242 private static void SetBrowserFeatureControl() 243 { 244 // FeatureControl settings are per-process 245 string fileName = Path.GetFileName(Process.GetCurrentProcess().MainModule.FileName); 246 string[] skip = new string[] { "devenv.exe", "XDesProc.exe" }; 247 if (skip.Any(p => p.Equals(fileName, StringComparison.OrdinalIgnoreCase))) 248 { 249 return; 250 } 251 252 SetBrowserFeatureControlKey("FEATURE_BROWSER_EMULATION", fileName, GetBrowserEmulationMode()); 253 SetBrowserFeatureControlKey("FEATURE_MANAGE_SCRIPT_CIRCULAR_REFS", fileName, 1); 254 //SetBrowserFeatureControlKey("FEATURE_GPU_RENDERING ", fileName, 1); 255 //SetBrowserFeatureControlKey("FEATURE_AJAX_CONNECTIONEVENTS", fileName, 1); 256 //SetBrowserFeatureControlKey("FEATURE_ENABLE_CLIPCHILDREN_OPTIMIZATION", fileName, 1); 257 //SetBrowserFeatureControlKey("FEATURE_DOMSTORAGE ", fileName, 1); 258 //SetBrowserFeatureControlKey("FEATURE_IVIEWOBJECTDRAW_DMLT9_WITH_GDI ", fileName, 0); 259 //SetBrowserFeatureControlKey("FEATURE_NINPUT_LEGACYMODE", fileName, 0); 260 //SetBrowserFeatureControlKey("FEATURE_DISABLE_LEGACY_COMPRESSION", fileName, 1); 261 //SetBrowserFeatureControlKey("FEATURE_LOCALMACHINE_LOCKDOWN", fileName, 0); 262 //SetBrowserFeatureControlKey("FEATURE_BLOCK_LMZ_OBJECT", fileName, 0); 263 //SetBrowserFeatureControlKey("FEATURE_BLOCK_LMZ_SCRIPT", fileName, 0); 264 //SetBrowserFeatureControlKey("FEATURE_DISABLE_NAVIGATION_SOUNDS", fileName, 1); 265 //SetBrowserFeatureControlKey("FEATURE_SCRIPTURL_MITIGATION", fileName, 1); 266 //SetBrowserFeatureControlKey("FEATURE_SPELLCHECKING", fileName, 0); 267 //SetBrowserFeatureControlKey("FEATURE_STATUS_BAR_THROTTLING", fileName, 1); 268 //SetBrowserFeatureControlKey("FEATURE_TABBED_BROWSING", fileName, 1); 269 //SetBrowserFeatureControlKey("FEATURE_VALIDATE_NAVIGATE_URL", fileName, 1); 270 //SetBrowserFeatureControlKey("FEATURE_WEBOC_DOCUMENT_ZOOM", fileName, 1); 271 //SetBrowserFeatureControlKey("FEATURE_WEBOC_POPUPMANAGEMENT", fileName, 0); 272 //SetBrowserFeatureControlKey("FEATURE_WEBOC_MOVESIZECHILD", fileName, 1); 273 //SetBrowserFeatureControlKey("FEATURE_ADDON_MANAGEMENT", fileName, 0); 274 //SetBrowserFeatureControlKey("FEATURE_WEBSOCKET", fileName, 1); 275 //SetBrowserFeatureControlKey("FEATURE_WINDOW_RESTRICTIONS ", fileName, 0); 276 //SetBrowserFeatureControlKey("FEATURE_XMLHTTP", fileName, 1); 277 } 278 /// <summary> 279 /// http://msdn.microsoft.com/en-us/library/ie/ee330730(v=vs.85).aspx 280 /// </summary> 281 /// <returns></returns> 282 private static uint GetBrowserEmulationMode() 283 { 284 int browserVersion; 285 using (var ieKey = Registry.LocalMachine.OpenSubKey(@"SOFTWARE\Microsoft\Internet Explorer", 286 RegistryKeyPermissionCheck.ReadSubTree, System.Security.AccessControl.RegistryRights.QueryValues)) 287 { 288 var version = ieKey.GetValue("svcVersion") ?? ieKey.GetValue("Version"); 289 if (version == null) 290 { 291 throw new ApplicationException("Microsoft Internet Explorer is required!"); 292 } 293 int.TryParse(version.ToString().Split(‘.‘)[0], out browserVersion); 294 } 295 if (browserVersion < 8) 296 { 297 throw new ApplicationException("Microsoft Internet Explorer 8 is required!"); 298 } 299 switch (browserVersion) 300 { 301 case 9: 302 return 9000; 303 case 10: 304 return 10000; 305 case 11: 306 return 11000; 307 default: 308 return 8000; 309 } 310 } 311 private static void SetBrowserFeatureControlKey(string feature, string appName, uint value) 312 { 313 using (var key = Registry.CurrentUser.CreateSubKey( 314 String.Concat(@"Software\Microsoft\Internet Explorer\Main\FeatureControl\", feature), 315 RegistryKeyPermissionCheck.ReadWriteSubTree)) 316 { 317 key.SetValue(appName, value, RegistryValueKind.DWord); 318 } 319 } 320 321 private static void STA_Run(Action<object> func, object state, STAContext context) 322 { 323 var sta = new Thread(arg => 324 { 325 var set = (object[])arg; 326 try 327 { 328 var func2 = (Action<object>)set[0]; 329 func2(set[1]); 330 } 331 catch (Exception ex) 332 { 333 App.LogError(ex, "STA_Run"); 334 } 335 }, 1024 * 512); //1024 * 512, 默认1M 336 sta.IsBackground = true; 337 sta.SetApartmentState(ApartmentState.STA); 338 try 339 { 340 sta.Start(new object[2] { func, state }); 341 } 342 catch (OutOfMemoryException ex) 343 { 344 HandleException(ex); 345 } 346 347 //context._Apartment.Invoke(func, state); 348 } 349 350 public static void FillAjaxBlock(NameValueCollection form, AjaxBlockEntity[] set) 351 { 352 Contract.Requires(form != null); 353 354 form[AjaxBlockEntity.AjaxBlock] = JsonConvert.SerializeObject(set, Formatting.None); 355 } 356 #endregion 357 358 #region Fields 359 private EndPoint _proxyAddr; 360 private Lazy<IHttpClient> _lazyClient; 361 private CookieContainer _cookieContainer; 362 private Action<STAContext, HtmlDocument> _onLoad; 363 #endregion 364 365 #region Properties 366 public int SendReceiveTimeout { get; set; } 367 public ushort? RetryCount { get; set; } 368 public TimeSpan? RetryWaitDuration { get; set; } 369 public bool UseCookies { get; set; } 370 public CookieContainer CookieContainer 371 { 372 get { return _cookieContainer; } 373 } 374 public string SaveFileDirectory { get; set; } 375 /// <summary> 376 /// 网页快照大小,Full Screenshot则设置Size.Empty 377 /// </summary> 378 public Size? Snapshot { get; set; } 379 /// <summary> 380 /// 供下载使用 381 /// </summary> 382 internal IHttpClient Client 383 { 384 get 385 { 386 var client = _lazyClient.Value; 387 client.SendReceiveTimeout = this.SendReceiveTimeout; 388 client.RetryCount = this.RetryCount; 389 client.RetryWaitDuration = this.RetryWaitDuration; 390 client.UseCookies = this.UseCookies; 391 client.SaveFileDirectory = this.SaveFileDirectory; 392 return client; 393 } 394 } 395 #endregion 396 397 #region Constructors 398 public HttpBrowser() 399 { 400 this.SendReceiveTimeout = -1; 401 _lazyClient = new Lazy<IHttpClient>(() => new HttpClient(), false); 402 _cookieContainer = new CookieContainer(); 403 this.UseCookies = true; 404 } 405 /// <summary> 406 /// crossLoad中如有跨域交互,请继承扩展IsolateProxy 407 /// </summary> 408 /// <param name="crossLoad"></param> 409 public HttpBrowser(Action<STAContext, HtmlDocument> crossLoad) 410 : this() 411 { 412 _onLoad = crossLoad; 413 } 414 #endregion 415 416 #region Methods 417 public void SetProxy(EndPoint address, NetworkCredential credential = null) 418 { 419 if (credential != null) 420 { 421 throw new NotSupportedException("credential"); 422 } 423 424 if (IsSpawned) 425 { 426 _proxyAddr = address; 427 } 428 else 429 { 430 #if DEBUG 431 App.LogInfo("SetProxy HttpBrowser {0}", address); 432 #endif 433 if (WinInetInterop.SetConnectionProxy(address.ToString())) 434 { 435 App.LogInfo("SetProxy HttpBrowser {0} succeed", address); 436 } 437 } 438 } 439 internal void RestoreSystemProxy() 440 { 441 if (IsSpawned) 442 { 443 _proxyAddr = null; 444 } 445 else 446 { 447 #if DEBUG 448 App.LogInfo("RestoreSystemProxy HttpBrowser"); 449 #endif 450 if (WinInetInterop.RestoreSystemProxy()) 451 { 452 App.LogInfo("RestoreSystemProxy HttpBrowser succeed"); 453 } 454 } 455 } 456 457 public string GetHtml(Uri requestUrl, HttpRequestContent content = null) 458 { 459 if (IsSpawned) 460 { 461 return SpawnedStart(_proxyAddr, requestUrl, content); 462 } 463 using (var arg = new STAContext(requestUrl, content)) 464 { 465 arg.WaitHandle = new AutoResetEvent(false); 466 this.STA_Run(arg); 467 arg.WaitHandle.WaitOne(); 468 return arg.OuterHtml; 469 } 470 } 471 472 public string GetHtml(Uri requestUrl, AjaxEventEntity local, HttpRequestContent content = null) 473 { 474 Contract.Requires(requestUrl != null); 475 if (local == null) 476 { 477 return GetHtml(requestUrl, content); 478 } 479 480 using (var arg = new STAContext(requestUrl, content)) 481 { 482 arg.AjaxEvent = local; 483 arg.WaitHandle = new AutoResetEvent(false); 484 this.STA_Run(arg); 485 arg.WaitHandle.WaitOne(); 486 return arg.OuterHtml; 487 } 488 } 489 490 public Stream GetStream(Uri requestUrl, HttpRequestContent content = null) 491 { 492 return this.Client.GetStream(requestUrl, content); 493 } 494 495 public void DownloadFile(Uri fileUrl, out string fileName) 496 { 497 this.Client.DownloadFile(fileUrl, out fileName); 498 } 499 #endregion 500 501 #region Hepler 502 /// <summary> 503 /// 注入Script 504 /// </summary> 505 /// <param name="document"></param> 506 /// <param name="js"></param> 507 public void InjectScript(HtmlDocument document, string js) 508 { 509 Contract.Requires(document != null); 510 511 if (!CheckDocument(document.Url)) 512 { 513 App.LogInfo("HttpBrowser InjectScript Cancel"); 514 return; 515 } 516 var head = document.GetElementsByTagName("head")[0]; 517 var script = document.CreateElement("script"); 518 script.SetAttribute("type", "text/javascript"); 519 script.SetAttribute("text", js); 520 head.AppendChild(script); 521 } 522 private bool CheckDocument(Uri documentUrl) 523 { 524 if (documentUrl != null && documentUrl.OriginalString.StartsWith("res://ieframe.dll", StringComparison.OrdinalIgnoreCase)) 525 { 526 App.LogInfo("CheckDocument {0}", documentUrl); 527 return false; 528 } 529 return true; 530 } 531 532 /// <summary> 533 /// 设置ajax参数 534 /// </summary> 535 /// <param name="browser"></param> 536 private void SetAjax(WebBrowser browser, bool isEvent) 537 { 538 var arg = (STAContext)browser.ObjectForScripting; 539 if (arg.AjaxBlocks.IsNullOrEmpty()) 540 { 541 return; 542 } 543 foreach (var block in arg.AjaxBlocks.Where(p => p.IsEvent == isEvent)) 544 { 545 var node = browser.Document.GetElementById(block.ID); 546 if (node == null) 547 { 548 continue; 549 } 550 arg.AjaxWaiter.AddCount(); 551 arg.AjaxMark(node, (sender, e) => 552 { 553 node = browser.Document.GetElementById(block.ID); 554 if (node == null || block.Text == null 555 || (!block.Text.Equals(node.InnerText, StringComparison.OrdinalIgnoreCase))) 556 { 557 // bug 如果先Signal再AddCount就会出错 558 arg.AjaxWaiter.Signal(); 559 } 560 }); 561 } 562 arg.AjaxWaiter.Signal(); 563 } 564 /// <summary> 565 /// 等待ajax执行 566 /// </summary> 567 /// <param name="arg"></param> 568 private bool WaitAjax(STAContext arg) 569 { 570 if (arg.AjaxBlocks.IsNullOrEmpty()) 571 { 572 return false; 573 } 574 int aTimeout = this.SendReceiveTimeout; 575 if (aTimeout <= 0) 576 { 577 aTimeout = (int)TimeSpan.FromSeconds(60d).TotalMilliseconds; 578 } 579 if (!arg.AjaxWaiter.Wait(aTimeout)) 580 { 581 App.LogInfo("HttpBrowser Ajax Timeout {0}", arg.RequestUrl); 582 return false; 583 } 584 return true; 585 } 586 587 private void ProcessAjaxEvent(WebBrowser browser) 588 { 589 var arg = (STAContext)browser.ObjectForScripting; 590 if (arg.AjaxEvent == null || string.IsNullOrEmpty(arg.AjaxEvent.ListenerSelector)) 591 { 592 return; 593 } 594 595 arg.IsProcessEvent = true; 596 if (arg.AjaxEvent.EntryCall && arg.AjaxEvent.FinalCallback != null) 597 { 598 InvokeHtml(browser); 599 arg.AjaxEvent.FinalCallback(arg.OuterHtml); 600 } 601 object val = browser.Document.InvokeScript("Soubiscbot", new object[] { 0, arg.AjaxEvent.ListenerSelector }); 602 var set = val.ToString().Split(‘,‘); 603 foreach (string id in set) 604 { 605 var node = browser.Document.GetElementById(id); 606 if (node == null) 607 { 608 continue; 609 } 610 arg.AjaxWaiter.Reset(); 611 SetAjax(browser, true); 612 node.InvokeMember("click"); 613 bool isSet = WaitAjax(arg); 614 Console.WriteLine("ProcessAjaxEvent isSet={0}", isSet); 615 if (arg.AjaxEvent.FinalCallback != null) 616 { 617 InvokeHtml(browser); 618 arg.AjaxEvent.FinalCallback(arg.OuterHtml); 619 } 620 } 621 arg.IsProcessEvent = false; 622 } 623 624 /// <summary> 625 /// 读取页面OuterHtml 626 /// </summary> 627 /// <param name="browser"></param> 628 /// <returns></returns> 629 private void InvokeHtml(WebBrowser browser) 630 { 631 var scripting = (STAContext)browser.ObjectForScripting; 632 if (scripting == null) 633 { 634 throw new InvalidOperationException("InvokeHtml"); 635 } 636 if (!scripting.DoInvokeHtml) 637 { 638 return; 639 } 640 scripting.OuterHtml = (string)browser.Document.InvokeScript("Soubiscbot"); 641 } 642 #endregion 643 644 #region STAThread 645 private void STA_Run(STAContext context) 646 { 647 context._ReleaseMemory(); 648 //context._Apartment = new MessageLoopApartment(); 649 STA_Run(state => 650 { 651 var browser = new WebBrowser() 652 { 653 ScriptErrorsSuppressed = true, 654 IsWebBrowserContextMenuEnabled = false, 655 ObjectForScripting = state 656 }; 657 browser.Navigating += browser_Navigating; 658 browser.DocumentCompleted += browser_DocumentCompleted; 659 browser.NewWindow += browser_NewWindow; 660 if (this.Snapshot.HasValue) 661 { 662 browser.ScrollBarsEnabled = false; 663 browser.Size = new Size(Screen.PrimaryScreen.WorkingArea.Width, 10240); 664 browser.Show(); 665 } 666 else 667 { 668 browser.Hide(); 669 } 670 var arg = (STAContext)state; 671 byte[] postData = null; 672 string headers = null; 673 if (arg.RequestContent != null) 674 { 675 if (this.UseCookies) 676 { 677 if (arg.RequestContent.HasCookie) 678 { 679 _cookieContainer.Add(arg.RequestUrl, arg.RequestContent.Cookies); 680 } 681 string cookieHeader = arg.RequestContent.Headers[HttpRequestHeader.Cookie]; 682 if (!string.IsNullOrEmpty(cookieHeader)) 683 { 684 _cookieContainer.SetCookies(arg.RequestUrl, cookieHeader.Replace(‘;‘, ‘,‘)); 685 arg.RequestContent.Headers.Remove(HttpRequestHeader.Cookie); 686 } 687 cookieHeader = _cookieContainer.GetCookieHeader(arg.RequestUrl); 688 if (cookieHeader.Length > 0) 689 { 690 arg.RequestContent.Headers[HttpRequestHeader.Cookie] = cookieHeader.Replace(‘,‘, ‘;‘); 691 } 692 //WinInetInterop.SaveCookies(_cookieContainer, absoluteUri); 693 } 694 else 695 { 696 arg.RequestContent.Headers[HttpRequestHeader.Cookie] = string.Empty; 697 //WinInetInterop.DeleteCache(WinInetInterop.CacheKind.Cookies); 698 } 699 if (arg.RequestContent.HasBody) 700 { 701 arg.RequestContent.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded"; 702 postData = Encoding.UTF8.GetBytes(arg.RequestContent.GetFormString()); 703 } 704 headers = arg.RequestContent.GetHeadersString(); 705 } 706 browser.Navigate(arg.RequestUrl, "_self", postData, headers); 707 708 STA_Run(STA_Wait, browser, arg); 709 //会阻塞当前线程 710 Application.Run(); 711 }, context, context); 712 } 713 private void STA_Wait(object state) 714 { 715 var browser = (WebBrowser)state; 716 #if DEBUG 717 App.LogInfo("STA_Wait {0}", browser.Url); 718 #endif 719 var arg = (STAContext)browser.ObjectForScripting; 720 try 721 { 722 int srTimeout = this.SendReceiveTimeout; 723 if (srTimeout > -1 && !arg.SendReceiveWaiter.WaitOne(srTimeout)) 724 { 725 //请求超时 726 browser.Invoke((Action)(() => 727 { 728 if (browser.ReadyState != WebBrowserReadyState.Complete) 729 { 730 browser.Stop(); 731 App.LogInfo("HttpBrowser SendReceive Timeout {0}", arg.RequestUrl); 732 } 733 })); 734 } 735 WaitAjax(arg); 736 } 737 catch (Exception ex) 738 { 739 App.LogError(ex, "HttpBrowser STA_Wait {0}", arg.RequestUrl); 740 HandleException(ex); 741 } 742 } 743 744 private void browser_NewWindow(object sender, System.ComponentModel.CancelEventArgs e) 745 { 746 var browser = (WebBrowser)sender; 747 var node = browser.Document.ActiveElement; 748 string link; 749 if (node != null && !string.IsNullOrEmpty(link = node.GetAttribute("href"))) 750 { 751 e.Cancel = true; 752 browser.Navigate(link); 753 } 754 } 755 private void browser_Navigating(object sender, WebBrowserNavigatingEventArgs e) 756 { 757 var browser = (WebBrowser)sender; 758 #if DEBUG 759 App.LogInfo("browser_Navigating {0}", browser.Url); 760 #endif 761 var arg = (STAContext)browser.ObjectForScripting; 762 arg.DelayLazyLoad(); 763 } 764 private void browser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) 765 { 766 var browser = (WebBrowser)sender; 767 #if DEBUG 768 App.LogInfo("browser_DocumentCompleted {0}", browser.Url); 769 #endif 770 var arg = (STAContext)browser.ObjectForScripting; 771 try 772 { 773 //e.Url不会变res:// 774 if (!CheckDocument(browser.Url)) 775 { 776 App.LogInfo("HttpBrowser DocumentCompleted Cancel {0}", browser.Url); 777 return; 778 } 779 if (browser.ReadyState != WebBrowserReadyState.Complete) 780 { 781 return; 782 } 783 784 //发生redirect或iframe load 785 if (browser.Url != e.Url) 786 { 787 App.LogInfo("HttpBrowser Redirect {0} to {1}", arg.RequestUrl, e.Url); 788 } 789 if (this.UseCookies) 790 { 791 WinInetInterop.LoadCookies(_cookieContainer, browser.Document.Url); 792 } 793 InjectScript(browser.Document, @"if (typeof ($) == ‘undefined‘) { 794 var script = document.createElement(‘script‘); 795 script.src = ‘http://libs.baidu.com/jquery/1.9.0/jquery.js‘; 796 document.getElementsByTagName(‘head‘)[0].appendChild(script); 797 } 798 function Soubiscbot(kind) { 799 switch (kind) { 800 case 0: 801 var set = []; 802 $(arguments[1]).each(function (i, o) { 803 var me = $(o); 804 var id = me.attr(‘id‘); 805 if (!id) { 806 id = Math.random(); 807 me.attr(‘id‘, id); 808 } 809 set[i] = id; 810 }); 811 return set.toString(); 812 break; 813 case 1: 814 try { 815 return arguments[1](); 816 } 817 catch (ex) { 818 return ex.toString(); 819 } 820 break; 821 default: 822 return document.documentElement.outerHTML; 823 break; 824 } 825 }"); 826 827 if (this.SendReceiveTimeout > -1) 828 { 829 arg.SendReceiveWaiter.Set(); 830 } 831 SetAjax(browser, false); 832 if (_onLoad != null) 833 { 834 _onLoad(arg, browser.Document); 835 } 836 if (arg.IsRedirect) 837 { 838 STA_Run(STA_Wait, browser, arg); 839 } 840 else 841 { 842 arg.RegisterLazyLoad(x => 843 { 844 var b = (WebBrowser)x; 845 if (b.IsDisposed) 846 { 847 return; 848 } 849 b.Invoke((Action<WebBrowser>)ProcessAjaxEvent, b); 850 b.Invoke((Action<object>)Callback, b); 851 }, browser); 852 } 853 } 854 catch (Exception ex) 855 { 856 App.LogError(ex, "HttpBrowser DocumentCompleted RequestUrl={0} BrowserUrl={1}", arg.RequestUrl, browser.Url); 857 HandleException(ex); 858 } 859 } 860 861 private static void HandleException(Exception ex) 862 { 863 if (ex is OutOfMemoryException || ex is AccessViolationException) 864 { 865 App.LogInfo("HttpBrowser auto exit {0}", ex.HResult); 866 Environment.Exit(ex.HResult); 867 } 868 } 869 #endregion 870 871 #region Callback 872 private void Callback(object state) 873 { 874 var browser = (WebBrowser)state; 875 #if DEBUG 876 App.LogInfo("Callback {0}", browser.Url); 877 #endif 878 var arg = (STAContext)browser.ObjectForScripting; 879 if (!Monitor.TryEnter(arg)) 880 { 881 return; 882 } 883 try 884 { 885 #warning HACK 886 if (this.Snapshot.HasValue) 887 { 888 Thread.Sleep(4000); 889 } 890 browser.Invoke((Action)(() => 891 { 892 if (this.Snapshot.HasValue) 893 { 894 //Guid fileID = CryptoManaged.MD5Hash(browser.Url.OriginalString);//browser.Url为ResponseUrl 895 Guid fileID = Guid.NewGuid(); 896 var js = new StringBuilder(); 897 js.AppendFormat("document.body.setAttribute(‘{0}‘, ‘{1}‘);", Callback_Snapshot, fileID); 898 js.Append(@" window.addEventListener(‘load‘, function () { 899 window.scrollTo(0, document.documentElement.offsetHeight); 900 }); 901 "); 902 browser.Document.InvokeScript("eval", new object[] { js.ToString() }); 903 string savePath = Path.Combine(this.SaveFileDirectory, string.Format("{0}.png", fileID)); 904 try 905 { 906 var shotSize = this.Snapshot.Value == Size.Empty ? browser.Document.Body.ScrollRectangle.Size : this.Snapshot.Value; 907 browser.Size = shotSize; 908 using (var img = new Bitmap(browser.Width, browser.Height)) 909 { 910 //browser.DrawToBitmap(img, new Rectangle(Point.Empty, img.Size)); 911 NativeMethods.DrawTo(browser.ActiveXInstance, img, Color.White); 912 img.Save(savePath, System.Drawing.Imaging.ImageFormat.Png); 913 App.LogInfo("xSnapshot {0} {1}", browser.Url, savePath); 914 } 915 } 916 catch (Exception ex) 917 { 918 App.LogError(ex, "xSnapshot {0} {1}", browser.Url, savePath); 919 } 920 } 921 InvokeHtml(browser); 922 })); 923 } 924 catch (Exception ex) 925 { 926 App.LogError(ex, "HttpBrowser Callback {0}", arg.RequestUrl); 927 HandleException(ex); 928 } 929 finally 930 { 931 Monitor.Exit(arg); 932 STA_Exit(browser); 933 } 934 } 935 936 /// <summary> 937 /// !重要! 退出STAUI线程 938 /// </summary> 939 private void STA_Exit(WebBrowser browser) 940 { 941 #if DEBUG 942 App.LogInfo("STA_Exit {0}", browser.Url); 943 #endif 944 RestoreSystemProxy(); 945 var arg = (STAContext)browser.ObjectForScripting; 946 if (arg.WaitHandle != null) 947 { 948 arg.WaitHandle.Set(); 949 } 950 try 951 { 952 browser.Stop(); 953 arg.AjaxUnmarks(); 954 //arg._Apartment.Dispose(); 955 browser.Invoke((Action)(() => Application.ExitThread())); 956 browser.Dispose(); 957 } 958 catch (SystemException ex) 959 { 960 //AccessViolationException 961 //InvalidComObjectException 962 App.LogError(ex, "HttpBrowser STA_Exit {0}", arg.RequestUrl); 963 } 964 } 965 #endregion 966 } 967 }
HttpBrowser
#region Spawned Process public bool IsSpawned { get; set; } internal string SpawnedStart(EndPoint proxy, Uri requestUrl, HttpRequestContent content) { #if DEBUG App.LogInfo("SpawnedStart: Proxy={0}\tUrl={1}", proxy, requestUrl); #endif bool hasValue = content != null; var stream = Serializer.Serialize(Tuple.Create(proxy, requestUrl, hasValue ? content.Headers : null, hasValue ? content.Form : null)); RestoreSystemProxy(); string[] args = Environment.GetCommandLineArgs(); string arg = string.Format("x#{0}", Convert.ToBase64String(stream.ToArray())); var proc = Process.Start(new ProcessStartInfo(args[0], arg) { RedirectStandardOutput = true, UseShellExecute = false, }); string html = proc.StandardOutput.ReadToEnd(); if (!proc.WaitForExit(120 * 1000)) { proc.Kill(); } proc.Close(); return html; } public static bool SpawnedMain() { string[] args = Environment.GetCommandLineArgs(); if (!(args.Length > 1 && args[1].StartsWith("x#"))) { return false; } var stream = new MemoryStream(Convert.FromBase64String(args[1].Substring(2))); var arg = (Tuple<EndPoint, Uri, WebHeaderCollection, NameValueCollection>)Serializer.Deserialize(stream); var client = (IHttpClient)new HttpBrowser(); if (arg.Item1 != null) { client.SetProxy(arg.Item1); } string html = client.GetHtml(arg.Item2, new HttpRequestContent() { Headers = arg.Item3, Form = arg.Item4 }); Console.WriteLine(html); return true; } #endregion
C# HttpBrowser 跨进程访问,解决内存泄露问题
时间: 2024-12-19 08:39:27