娉ㄦ剰锛氭垜浠钩鏃惰闂槸鐢ㄦ祻瑙堝櫒璁块棶锛屼絾鏄敱浜庢垜浠紪鍐欎唬鐮侊紝鍒╃敤python锛屼负浜嗚缃戠珯璁や负鎴戜滑鐨勮闂睘浜庢甯哥敤鎴疯涓哄拰鑼冨洿锛屼负浜嗘墦鍏ュ唴閮紝鎴戜滑鍙兘浼鑷繁銆傜幇鍦ㄥ幓浼锛?
]/p
鑾峰彇title淇℃伅锛?/p>
//h1/test锛堬級
鍒版鍩烘湰鎼炲畾锛屽皾璇曟墦鍗扮粨鏋溿€?/p>
print(info)
print(title)
杩欐槸鎴戜滑鍙戠幇涓€鍫嗗唴瀹癸紝鍥犱负娌℃湁鏄剧ず鏂囨湰鍐呭銆?]/p/text()
涔嬪悗淇濆瓨鏂囦欢銆傚嵆鍙疄鐜拌繍琛屻€?瀹屾暣浠g爜濡備笅锛?/p>
# 鎬庝箞鍙戦€佽姹?/span>
# pip install requests
import requests
# pip install lxml
from lxml import etree
# 鍙戦€佺粰璋?/span>
url = 'https://www.93xscc.com/9034/2126907.html'
while True:
# 浼鑷繁
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.52'
}
# 鍙戦€佽姹?/span>
resp = requests.get(url,headers=headers)
# 璁剧疆缂栫爜
resp.encoding = 'utf-8'
# 鍝嶅簲淇℃伅
# print(resp.text)
e = etree.HTML(resp.text)
info = '\n'.join(e.xpath('//div[@]/p/text()'))
title = e.xpath('//h1/text()')[0]
url = f'https://www.85xs.cc{e.xpath("//tr/td[2]/a/@href")[0]}'
# print(info)
# print(title)
# 淇濆瓨
with open('鏂楃綏澶ч檰.txt','w',encoding='utf-8') as f:
f.write(title+'\n\n'+info+'\n\n')
'''
閫€鍑哄惊鐜?break
if url == '/book/douluodalu1/'
'''