You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
25 lines
807 B
25 lines
807 B
import scrapy
|
|
|
|
import papernews_spider
|
|
from papernews_spider.myfirstPj.spiders import Tools
|
|
|
|
|
|
# 正文爬虫(测试用)
|
|
# import PapernewsSpider
|
|
|
|
|
|
class NewscontentSpider(scrapy.Spider):
|
|
name = 'NewsContent'
|
|
allowed_domains = ['www.paper.com.cn']
|
|
urllist = open('url.txt', 'a', encoding='utf-8')
|
|
start_urls = ['http://www.paper.com.cn/']
|
|
|
|
def parse(self, response):
|
|
# 资讯详情页
|
|
lists = response.xpath('//b/text() | //p/text()').extract()
|
|
# lists = response.xpath("//a[contains(@target, '_blank')]/text()").extract()
|
|
# 打开保存的url链接
|
|
urllist = open("url.txt", 'a', encoding='utf-8')
|
|
|
|
# 编辑输出编码格式,输出文件名,信息为追加写入
|
|
Tools.write_txt(Tools.__init__(self), "News.txt", urllist)
|