《阿七美图馆》爬虫
- 看到群友一个美图站,于是写了个爬虫爬了下来
- 运行环境:python3
#!/usr/bin/env python3
import requests
from lxml import etree
import os
def get_requests(url):
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
}
html = requests.get(url=url,headers=headers).content.decode()
result = etree.HTML(html)
img_list = result.xpath("//div[@class='post row']/div/img/@data-original")
name_list = result.xpath("//div[@class='post row']/div/img/@title")
dir = result.xpath("//div[@class='post-info']/div[1]/span[3]/text()")
dir = ''.join(dir)
for img,name in zip(img_list,name_list):
# img = 'https:' + img
download_img_url =requests.get(url=img, headers=headers).content
print("下载的图像: %s 路径: %s" % (name, img))
path = os.path.join(os.getcwd(), '阿七美图馆/{}/'.format(dir))
if not os.path.exists(path):
os.makedirs(path)
folder_path = path + name + '.jpg'
with open(folder_path, 'wb') as file:
file.write(download_img_url)
if __name__ == '__main__':
for item in range(3,1000):
try:
url = "http://a7a7.net/index.php/archives/{}/".format(str(item))
get_requests(url)
except:
continue
> 此篇文章【《阿七美图馆》爬虫】,均来自于网络,贫小子仅作分享