You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
96 lines
5.4 KiB
96 lines
5.4 KiB
# Scrapy settings for myfirstPj project
|
|
#
|
|
# For simplicity, this file contains only settings considered important or
|
|
# commonly used. You can find more settings consulting the documentation:
|
|
#
|
|
# https://docs.scrapy.org/en/latest/topics/settings.html
|
|
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
|
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
|
|
|
BOT_NAME = 'myfirstPj'
|
|
|
|
SPIDER_MODULES = ['myfirstPj.spiders']
|
|
NEWSPIDER_MODULE = 'myfirstPj.spiders'
|
|
|
|
|
|
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
|
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1935.94 Safari/537.36'
|
|
|
|
|
|
|
|
# Obey robots.txt rules
|
|
ROBOTSTXT_OBEY = False
|
|
|
|
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
|
#CONCURRENT_REQUESTS = 32
|
|
|
|
# Configure a delay for requests for the same website (default: 0)
|
|
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
|
# See also autothrottle settings and docs
|
|
#DOWNLOAD_DELAY = 3
|
|
# The download delay setting will honor only one of:
|
|
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
|
#CONCURRENT_REQUESTS_PER_IP = 16
|
|
|
|
# Disable cookies (enabled by default)
|
|
COOKIES_ENABLED = False
|
|
|
|
# Disable Telnet Console (enabled by default)
|
|
#TELNETCONSOLE_ENABLED = False
|
|
|
|
# Override the default request headers:
|
|
DEFAULT_REQUEST_HEADERS = {
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Language': 'en',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1935.94 Safari/537.36',
|
|
# 'Cookie': "'_qquc': '6d2af3823bb4829a95200dba06ccb0265939e590873e5cef7edcafe1c96a755abf5e2403097381fa2d9a6c4604d69e51fc7914acc029fb5033a0475182f48025f054a58ca304d4f0925129c937cb6935540e9c8a32c823850e2a4fd1b9e6a7081f424d32347fd2822284431a74dec2c47cf56d87c11ed27ae08743d556ec1fbf41b4668dbfd6df049246d413308d16aed327f1420253934934bbb062de14706171347d330ba71e632c2d6a89b62e833cd2fac9e3fc13e07e94c47dbc159d7fed1db22e3274c3e3f940651d83de34fc405f741b3f69aac578d05fe26961e0125531c4fcb34a62af3e7a288d862f6eb34803c9e144a1661d0f8fc78ef8b87f3bf7ae89672f4ff196aadc60a8eae6483bae2ed065d851f447fc8f9d16bb79a0bd1ffb36ce652538c792e7c05235526d8d2eceed0ab7823ddf4076d2dfe3efa02b5b8f9d16bb79a0bd1ffb36ce652538c792e7c05235526d8d2eceed0ab7823ddf40f32e52a595d5e141717f6417f948aaa279d001d13b7a2bee2460d1d835ce38d700864368eab8f2b10f31642b295093604226def5e00d3d6a929c2e4596344032166b1741ede12384d1e9263bfd40239651284929d15a1aae886b7cf155fbd493', 'Hm_lvt_e11e5fa7b1c17369dacfb3f063d64cae': '1665814075', 'JSESSIONID': '12070D5B8A0173C0509273A9FD2060C5', 'Hm_lpvt_e11e5fa7b1c17369dacfb3f063d64cae': '1666073527'"
|
|
'Cookie': "'JSESSIONID=2E161C24EE80617B11702B4E76A42FF0; _qquc=6d2af3823bb4829a95200dba06ccb0265939e590873e5cef7edcafe1c96a755abf5e2403097381fa2d9a6c4604d69e51fc7914acc029fb5033a0475182f48025f054a58ca304d4f0925129c937cb693595284cd9e2fd46f4ee81e2dc73caad231f424d32347fd2822284431a74dec2c4b83f1b9d8786ebd0d7365bf0935405158f9d16bb79a0bd1f25565e5fdaa4fefb8026fd46b9c3cb6230301904f42b70dac9ff77c32d9228079178f395dd41708e28f30f4223948f25c30255de9bdf9210c2c95a92b6f80aff8ab6cd29261c19c181d4ecb97a9c5e4841b4668dbfd6df042c4e113b1f49a2735a11313cd1738fec5bcb7e4f0ae6ab92005e478fba8f38efe561ea8de8abb858e98715ccff56e8a9b336d1c71d4222cfd4067afd041d5b6002351d15ba1cc3738e7b731a999e72d4eac3bf864dc7c4e26062333f8df1e661d4067afd041d5b6002351d15ba1cc3738e7b731a999e72d4eac3bf864dc7c4e2ed41a28a5d5b372fd4e3cd2cdd4ab3e976652525878b2ab6c6cf81d18a62f4ef6a0d294378ea2f7c792744feca70155430d053014964fed2194a1d90a161c73c6f7567f12bdd69a2d9b81824ced2ccba099beefbb90aee3433a598ff7d0d05e8; Hm_lvt_e11e5fa7b1c17369dacfb3f063d64cae=1649303831,1649383836,1650452961; Hm_lpvt_e11e5fa7b1c17369dacfb3f063d64cae=1650453289'"
|
|
}
|
|
|
|
# Enable or disable spider middlewares
|
|
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
|
#SPIDER_MIDDLEWARES = {
|
|
# 'myfirstPj.middlewares.MyfirstpjSpiderMiddleware': 543,
|
|
#}
|
|
|
|
# Enable or disable downloader middlewares
|
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
|
#DOWNLOADER_MIDDLEWARES = {
|
|
# 'myfirstPj.middlewares.MyfirstpjDownloaderMiddleware': 543,
|
|
#}
|
|
|
|
# Enable or disable extensions
|
|
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
|
#EXTENSIONS = {
|
|
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
|
#}
|
|
|
|
# Configure item pipelines
|
|
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
ITEM_PIPELINES = {
|
|
'myfirstPj.pipelines.MyfirstpjPipeline': 300,
|
|
}
|
|
|
|
# Enable and configure the AutoThrottle extension (disabled by default)
|
|
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
|
#AUTOTHROTTLE_ENABLED = True
|
|
# The initial download delay
|
|
#AUTOTHROTTLE_START_DELAY = 5
|
|
# The maximum download delay to be set in case of high latencies
|
|
#AUTOTHROTTLE_MAX_DELAY = 60
|
|
# The average number of requests Scrapy should be sending in parallel to
|
|
# each remote server
|
|
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
|
# Enable showing throttling stats for every response received:
|
|
#AUTOTHROTTLE_DEBUG = False
|
|
|
|
# Enable and configure HTTP caching (disabled by default)
|
|
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
|
#HTTPCACHE_ENABLED = True
|
|
#HTTPCACHE_EXPIRATION_SECS = 0
|
|
#HTTPCACHE_DIR = 'httpcache'
|
|
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
|
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
|
|
|
|
|
FEED_EXPORT_ENCODING='utf-8'
|