【资料图】

# 蜘蛛程序import scrapyfrom scrapy import Selectorfrom ..items import GetItem  # 导入class BiliSpider(scrapy.Spider):    name = 'bili'    allowed_domains = ['bilibili.com']    start_urls = ['https://www.bilibili.com/']    # 爬取的页面    def parse(self, response):        sel = Selector(response)        list_items = sel.xpath('/html/body/div[2]/div[2]/main/div[2]/div/div[1]/div')        for list_item in list_items:            spider_item = GetItem()            spider_item['title'] = list_item.css('h3::attr(title)').extract()  # 标题            spider_item['author'] = list_item.css('span.bili-video-card__info--author::text').extract()  # 作者            spider_item['time'] = list_item.css('span.bili-video-card__info--date::text').extract()  # 时间            spider_item['link'] = list_item.css('h3 > a::attr(href)').extract()  # 链接            yield spider_item

# items文件

# Define here the models for your scraped items## See documentation in:# https://docs.scrapy.org/en/latest/topics/items.htmlimport scrapyclass GetItem(scrapy.Item):    # define the fields for your item here like:    # name = scrapy.Field()    title = scrapy.Field()    author = scrapy.Field()    time = scrapy.Field()    link = scrapy.Field()

# 配置

在setting文件中打开cookies,添加请求头

# 命令行启动,保存为csv文件

scrapy crawl bili -o bili.csv

推荐内容