You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

164 lines
4.8 KiB

2 years ago
  1. using System;
  2. using System.IO;
  3. using System.Linq;
  4. using System.Net;
  5. using System.Web;
  6. /// <summary>
  7. /// Crawler 的摘要说明
  8. /// </summary>
  9. public class CrawlerHandler : Handler
  10. {
  11. private string[] Sources;
  12. private Crawler[] Crawlers;
  13. public CrawlerHandler(HttpContext context) : base(context)
  14. {
  15. }
  16. public override void Process()
  17. {
  18. Sources = Request.Form.GetValues("source[]");
  19. if (Sources == null || Sources.Length == 0)
  20. {
  21. WriteJson(new
  22. {
  23. state = "参数错误:没有指定抓取源"
  24. });
  25. return;
  26. }
  27. Crawlers = Sources.Select(x => new Crawler(x, Server).Fetch()).ToArray();
  28. WriteJson(new
  29. {
  30. state = "SUCCESS",
  31. list = Crawlers.Select(x => new
  32. {
  33. state = x.State,
  34. source = x.SourceUrl,
  35. url = x.ServerUrl
  36. })
  37. });
  38. }
  39. }
  40. public class Crawler
  41. {
  42. public string SourceUrl { get; set; }
  43. public string ServerUrl { get; set; }
  44. public string State { get; set; }
  45. private HttpServerUtility Server { get; set; }
  46. public Crawler(string sourceUrl, HttpServerUtility server)
  47. {
  48. this.SourceUrl = sourceUrl;
  49. this.Server = server;
  50. }
  51. public Crawler Fetch()
  52. {
  53. if (!IsExternalIPAddress(this.SourceUrl))
  54. {
  55. State = "INVALID_URL";
  56. return this;
  57. }
  58. var request = HttpWebRequest.Create(this.SourceUrl) as HttpWebRequest;
  59. using (var response = request.GetResponse() as HttpWebResponse)
  60. {
  61. if (response.StatusCode != HttpStatusCode.OK)
  62. {
  63. State = "Url returns " + response.StatusCode + ", " + response.StatusDescription;
  64. return this;
  65. }
  66. if (response.ContentType.IndexOf("image") == -1)
  67. {
  68. State = "Url is not an image";
  69. return this;
  70. }
  71. ServerUrl = PathFormatter.Format(Path.GetFileName(this.SourceUrl), Config.GetString("catcherPathFormat"));
  72. var savePath = Server.MapPath(ServerUrl);
  73. if (!Directory.Exists(Path.GetDirectoryName(savePath)))
  74. {
  75. Directory.CreateDirectory(Path.GetDirectoryName(savePath));
  76. }
  77. try
  78. {
  79. var stream = response.GetResponseStream();
  80. var reader = new BinaryReader(stream);
  81. byte[] bytes;
  82. using (var ms = new MemoryStream())
  83. {
  84. var buffer = new byte[4096];
  85. int count;
  86. while ((count = reader.Read(buffer, 0, buffer.Length)) != 0)
  87. {
  88. ms.Write(buffer, 0, count);
  89. }
  90. bytes = ms.ToArray();
  91. }
  92. File.WriteAllBytes(savePath, bytes);
  93. State = "SUCCESS";
  94. }
  95. catch (Exception e)
  96. {
  97. State = "抓取错误:" + e.Message;
  98. }
  99. return this;
  100. }
  101. }
  102. private bool IsExternalIPAddress(string url)
  103. {
  104. var uri = new Uri(url);
  105. switch (uri.HostNameType)
  106. {
  107. case UriHostNameType.Dns:
  108. var ipHostEntry = Dns.GetHostEntry(uri.DnsSafeHost);
  109. foreach (IPAddress ipAddress in ipHostEntry.AddressList)
  110. {
  111. var ipBytes = ipAddress.GetAddressBytes();
  112. if (ipAddress.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
  113. {
  114. if (!IsPrivateIP(ipAddress))
  115. {
  116. return true;
  117. }
  118. }
  119. }
  120. break;
  121. case UriHostNameType.IPv4:
  122. return !IsPrivateIP(IPAddress.Parse(uri.DnsSafeHost));
  123. }
  124. return false;
  125. }
  126. private bool IsPrivateIP(IPAddress myIPAddress)
  127. {
  128. if (IPAddress.IsLoopback(myIPAddress)) return true;
  129. if (myIPAddress.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
  130. {
  131. var ipBytes = myIPAddress.GetAddressBytes();
  132. // 10.0.0.0/24
  133. if (ipBytes[0] == 10)
  134. {
  135. return true;
  136. }
  137. // 172.16.0.0/16
  138. else if (ipBytes[0] == 172 && ipBytes[1] == 16)
  139. {
  140. return true;
  141. }
  142. // 192.168.0.0/16
  143. else if (ipBytes[0] == 192 && ipBytes[1] == 168)
  144. {
  145. return true;
  146. }
  147. // 169.254.0.0/16
  148. else if (ipBytes[0] == 169 && ipBytes[1] == 254)
  149. {
  150. return true;
  151. }
  152. }
  153. return false;
  154. }
  155. }