You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

25 lines
807 B

import scrapy
import papernews_spider
from papernews_spider.myfirstPj.spiders import Tools
# 正文爬虫(测试用)
# import PapernewsSpider
class NewscontentSpider(scrapy.Spider):
name = 'NewsContent'
allowed_domains = ['www.paper.com.cn']
urllist = open('url.txt', 'a', encoding='utf-8')
start_urls = ['http://www.paper.com.cn/']
def parse(self, response):
# 资讯详情页
lists = response.xpath('//b/text() | //p/text()').extract()
# lists = response.xpath("//a[contains(@target, '_blank')]/text()").extract()
# 打开保存的url链接
urllist = open("url.txt", 'a', encoding='utf-8')
# 编辑输出编码格式,输出文件名,信息为追加写入
Tools.write_txt(Tools.__init__(self), "News.txt", urllist)