import scrapy import papernews_spider from papernews_spider.myfirstPj.spiders import Tools # 正文爬虫(测试用) # import PapernewsSpider class NewscontentSpider(scrapy.Spider): name = 'NewsContent' allowed_domains = ['www.paper.com.cn'] urllist = open('url.txt', 'a', encoding='utf-8') start_urls = ['http://www.paper.com.cn/'] def parse(self, response): # 资讯详情页 lists = response.xpath('//b/text() | //p/text()').extract() # lists = response.xpath("//a[contains(@target, '_blank')]/text()").extract() # 打开保存的url链接 urllist = open("url.txt", 'a', encoding='utf-8') # 编辑输出编码格式,输出文件名,信息为追加写入 Tools.write_txt(Tools.__init__(self), "News.txt", urllist)