python爬取wallhaven壁纸实例
本文最后更新于:2024年12月14日 下午
python爬取wallhaven壁纸实例
import os
import requests
from lxml import etree
定义原始网页地址
url = 'https://wallhaven.cc/search?categories=111&purity=100&resolutions=3840x2160&topRange=1w&sorting=toplist&order=desc&ai_art_filter=0&page=2'
循环5次,每次处理一个网页
for i in range(5):
匹配提取网址中的数字,并累加1
num_str = url.split('=')[-1]
num = int(num_str) + 1
new_url = url[:-len(num_str)] + str(num)
使用requests库获取网页内容
response = requests.get(new_url)
html = response.content
# 解析HTML页面
selector = etree.HTML(html)
# 获取所有壁纸链接
links = selector.xpath('/html/body/main/div[1]/section[1]/ul/li/figure/a[1]/@href')
# 逐个访问壁纸链接,并提取图片链接
directory = 'zhenbang'
if not os.path.exists(directory):
os.makedirs(directory)
for link in links:
# 访问壁纸链接
response = requests.get(link)
html = response.content
# 解析HTML页面
selector = etree.HTML(html)
# 获取图片链接
img_url = selector.xpath('/html/body/main/section/div[1]/img/@src')[0]
# 保存图片
file_path = os.path.join(directory, img_url.split('/')[-1])
try:
with open(file_path, 'wb') as f:
f.write(requests.get(img_url).content)
print('文件已保存到本地:{}'.format(file_path))
except Exception as e:
print('保存文件时出错:{}'.format(e))
# 更新URL
url = new_url
微信支付
支付宝支付
“请博主喝咖啡☕️”