python爬虫爬取彼岸图网图片

超、凢脫俗 2021-07-26 16:43 734阅读 0赞

python爬虫爬取彼岸图网图片

话不多说,直接上代码!

  1. import requests
  2. from PIL import Image
  3. from io import BytesIO
  4. import re
  5. from requests.exceptions import HTTPError
  6. root = "http://pic.netbian.com/index_%d.html"
  7. uni = "http://pic.netbian.com"
  8. AllPage = []
  9. AllImgHTML = []
  10. AllImgURL = []
  11. def GetPageURL(root, Start, counts):
  12. if Start == 1:
  13. AllPage.append("http://pic.netbian.com/index.html")
  14. for i in range(Start + 1, Start + counts):
  15. newURL = root.replace("%d", str(i))
  16. AllPage.append(newURL)
  17. else:
  18. for i in range(Start, Start + counts):
  19. newURL = root.replace("%d", str(i))
  20. AllPage.append(newURL)
  21. def GetImgHTML(AllPage):
  22. for PageURL in AllPage:
  23. try:
  24. res = requests.get(PageURL)
  25. res.raise_for_status()
  26. except HTTPError:
  27. print("HTTP Error!")
  28. except ConnectionError:
  29. print("Failed to connect!")
  30. with open("C:/Users/86135/Desktop/PageFile.txt", "w", encoding="ISO-8859-1") as PageFile:
  31. PageFile.write(res.text)
  32. PageFile.close()
  33. with open("C:/Users/86135/Desktop/PageFile.txt", "r", encoding="gbk") as ReadFile:
  34. str = ReadFile.read()
  35. mid = re.split("\"", str)
  36. for i in mid:
  37. ImgHTML = re.findall("^/tupian/.*.html$", i)
  38. if len(ImgHTML) != 0:
  39. AllImgHTML.append(ImgHTML[0])
  40. def GetImgURL():
  41. UsefulImgHTML = [None for i in range(len(AllImgHTML))]
  42. for i in range(len(AllImgHTML)):
  43. UsefulImgHTML[i] = uni + AllImgHTML[i]
  44. for html in UsefulImgHTML:
  45. try:
  46. htmlres = requests.get(html)
  47. htmlres.raise_for_status()
  48. except HTTPError:
  49. print("HTTP Error!")
  50. except ConnectionError:
  51. print("Failed to connect!")
  52. with open("C:/Users/86135/Desktop/ImgHTML.txt", "w", encoding="ISO-8859-1") as ImgHTML:
  53. ImgHTML.write(htmlres.text)
  54. ImgHTML.close()
  55. with open("C:/Users/86135/Desktop/ImgHTML.txt", "r", encoding="gbk") as ReadHTML:
  56. str = ReadHTML.read()
  57. mid = re.split("\"", str)
  58. for i in mid:
  59. ImgURL = re.search("^/uploads/allimg/.*.jpg$", i)
  60. if ImgURL is not None:
  61. AllImgURL.append(ImgURL[0])
  62. break
  63. UsefulImgURL = [None for i in range(len(AllImgURL))]
  64. for i in range(len(AllImgURL)):
  65. UsefulImgURL[i] = uni + AllImgURL[i]
  66. return UsefulImgURL
  67. def DownloadWallpaper(url, path):
  68. try:
  69. res = requests.get(url)
  70. res.raise_for_status()
  71. MyImage = Image.open(BytesIO(res.content))
  72. MyImage.save(path)
  73. print("Done...")
  74. except HTTPError:
  75. print("HTTP Error!")
  76. except ConnectionError:
  77. print("Failed to connect!")
  78. if __name__ == "__main__":
  79. GetPageURL(root, 2, 2)
  80. GetImgHTML(AllPage)
  81. UsefulImgURL = GetImgURL()
  82. num = []
  83. for i in range(len(UsefulImgURL)):
  84. num.append(i)
  85. UsefulSavePath = [None for i in range(len(UsefulImgURL))]
  86. for i in range(len(UsefulSavePath)):
  87. UsefulSavePath[i] = "C:/Users/86135/Desktop/" + str(num[i]) + ".jpg"
  88. for i in range(len(UsefulImgURL)):
  89. print(i, end=" ")
  90. DownloadWallpaper(UsefulImgURL[i], UsefulSavePath[i])
  91. print("Task completed!")

运行结果如下:
在这里插入图片描述

发表评论

表情:
评论列表 (有 0 条评论,734人围观)

还没有评论,来说两句吧...

相关阅读