#
from lxml import etreeimport requestsimport csvfp = open('./douban.csv','w+',encoding='utf-8',newline='')writer = csv.writer(fp)writer. writerow(('name','url','author','publisher','date','price','rate','comment')) #写头部urls = ['https://book.douban.com/top250?start={}'.format(num) for num in range(0,250,25)]headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'}for url in urls: page = requests.get(url,headers).text tree = etree.HTML(page) infos = tree.xpath('//tr[@class="item"]') for info in infos: name = info.xpath('td/div/a/@title')[0] url = info.xpath('td/div/a/@href')[0] book_infos = info.xpath('//td/p/text()')[0] author = book_infos.split('/')[0] pub = book_infos.split('/')[-3] date = book_infos.split('/')[-2] price = book_infos.split('/')[-1] rate = info.xpath('td/div/span[2]/text()')[0] comments = info.xpath('td/p/span/text()') comment = comments[0] if len(comments) != 0 else '空' writer.writerow((name,url,author,pub,date,price,rate,comment))fp.close()