MENU

《阿七美图馆》爬虫

  • 看到群友一个美图站,于是写了个爬虫爬了下来
  • 运行环境:python3
#!/usr/bin/env python3
import requests
from lxml import etree
import os

def get_requests(url):

    headers = {
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
        }

    html = requests.get(url=url,headers=headers).content.decode()

    result = etree.HTML(html)
    img_list = result.xpath("//div[@class='post row']/div/img/@data-original")
    name_list = result.xpath("//div[@class='post row']/div/img/@title")
    dir = result.xpath("//div[@class='post-info']/div[1]/span[3]/text()")
    dir = ''.join(dir)
    for img,name in zip(img_list,name_list):
        # img = 'https:' + img
        download_img_url =requests.get(url=img, headers=headers).content
        print("下载的图像: %s   路径: %s" % (name, img))

        path = os.path.join(os.getcwd(), '阿七美图馆/{}/'.format(dir))
        if not os.path.exists(path):
            os.makedirs(path)
        folder_path = path + name + '.jpg'
        with open(folder_path, 'wb') as file:
            file.write(download_img_url)

if __name__ == '__main__':
    for item in range(3,1000):
        try:
            url = "http://a7a7.net/index.php/archives/{}/".format(str(item))
            get_requests(url)
        except:
            continue

242324471.png

0:00