# Scrapy settings for myfirstPj project # # For simplicity, this file contains only settings considered important or # commonly used. You can find more settings consulting the documentation: # # https://docs.scrapy.org/en/latest/topics/settings.html # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html # https://docs.scrapy.org/en/latest/topics/spider-middleware.html BOT_NAME = 'myfirstPj' SPIDER_MODULES = ['myfirstPj.spiders'] NEWSPIDER_MODULE = 'myfirstPj.spiders' # Crawl responsibly by identifying yourself (and your website) on the user-agent USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1935.94 Safari/537.36' # Obey robots.txt rules ROBOTSTXT_OBEY = False # Configure maximum concurrent requests performed by Scrapy (default: 16) #CONCURRENT_REQUESTS = 32 # Configure a delay for requests for the same website (default: 0) # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay # See also autothrottle settings and docs #DOWNLOAD_DELAY = 3 # The download delay setting will honor only one of: #CONCURRENT_REQUESTS_PER_DOMAIN = 16 #CONCURRENT_REQUESTS_PER_IP = 16 # Disable cookies (enabled by default) COOKIES_ENABLED = False # Disable Telnet Console (enabled by default) #TELNETCONSOLE_ENABLED = False # Override the default request headers: DEFAULT_REQUEST_HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1935.94 Safari/537.36', # 'Cookie': "'_qquc': '6d2af3823bb4829a95200dba06ccb0265939e590873e5cef7edcafe1c96a755abf5e2403097381fa2d9a6c4604d69e51fc7914acc029fb5033a0475182f48025f054a58ca304d4f0925129c937cb6935540e9c8a32c823850e2a4fd1b9e6a7081f424d32347fd2822284431a74dec2c47cf56d87c11ed27ae08743d556ec1fbf41b4668dbfd6df049246d413308d16aed327f1420253934934bbb062de14706171347d330ba71e632c2d6a89b62e833cd2fac9e3fc13e07e94c47dbc159d7fed1db22e3274c3e3f940651d83de34fc405f741b3f69aac578d05fe26961e0125531c4fcb34a62af3e7a288d862f6eb34803c9e144a1661d0f8fc78ef8b87f3bf7ae89672f4ff196aadc60a8eae6483bae2ed065d851f447fc8f9d16bb79a0bd1ffb36ce652538c792e7c05235526d8d2eceed0ab7823ddf4076d2dfe3efa02b5b8f9d16bb79a0bd1ffb36ce652538c792e7c05235526d8d2eceed0ab7823ddf40f32e52a595d5e141717f6417f948aaa279d001d13b7a2bee2460d1d835ce38d700864368eab8f2b10f31642b295093604226def5e00d3d6a929c2e4596344032166b1741ede12384d1e9263bfd40239651284929d15a1aae886b7cf155fbd493', 'Hm_lvt_e11e5fa7b1c17369dacfb3f063d64cae': '1665814075', 'JSESSIONID': '12070D5B8A0173C0509273A9FD2060C5', 'Hm_lpvt_e11e5fa7b1c17369dacfb3f063d64cae': '1666073527'" 'Cookie': "'JSESSIONID=2E161C24EE80617B11702B4E76A42FF0; _qquc=6d2af3823bb4829a95200dba06ccb0265939e590873e5cef7edcafe1c96a755abf5e2403097381fa2d9a6c4604d69e51fc7914acc029fb5033a0475182f48025f054a58ca304d4f0925129c937cb693595284cd9e2fd46f4ee81e2dc73caad231f424d32347fd2822284431a74dec2c4b83f1b9d8786ebd0d7365bf0935405158f9d16bb79a0bd1f25565e5fdaa4fefb8026fd46b9c3cb6230301904f42b70dac9ff77c32d9228079178f395dd41708e28f30f4223948f25c30255de9bdf9210c2c95a92b6f80aff8ab6cd29261c19c181d4ecb97a9c5e4841b4668dbfd6df042c4e113b1f49a2735a11313cd1738fec5bcb7e4f0ae6ab92005e478fba8f38efe561ea8de8abb858e98715ccff56e8a9b336d1c71d4222cfd4067afd041d5b6002351d15ba1cc3738e7b731a999e72d4eac3bf864dc7c4e26062333f8df1e661d4067afd041d5b6002351d15ba1cc3738e7b731a999e72d4eac3bf864dc7c4e2ed41a28a5d5b372fd4e3cd2cdd4ab3e976652525878b2ab6c6cf81d18a62f4ef6a0d294378ea2f7c792744feca70155430d053014964fed2194a1d90a161c73c6f7567f12bdd69a2d9b81824ced2ccba099beefbb90aee3433a598ff7d0d05e8; Hm_lvt_e11e5fa7b1c17369dacfb3f063d64cae=1649303831,1649383836,1650452961; Hm_lpvt_e11e5fa7b1c17369dacfb3f063d64cae=1650453289'" } # Enable or disable spider middlewares # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html #SPIDER_MIDDLEWARES = { # 'myfirstPj.middlewares.MyfirstpjSpiderMiddleware': 543, #} # Enable or disable downloader middlewares # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html #DOWNLOADER_MIDDLEWARES = { # 'myfirstPj.middlewares.MyfirstpjDownloaderMiddleware': 543, #} # Enable or disable extensions # See https://docs.scrapy.org/en/latest/topics/extensions.html #EXTENSIONS = { # 'scrapy.extensions.telnet.TelnetConsole': None, #} # Configure item pipelines # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html ITEM_PIPELINES = { 'myfirstPj.pipelines.MyfirstpjPipeline': 300, } # Enable and configure the AutoThrottle extension (disabled by default) # See https://docs.scrapy.org/en/latest/topics/autothrottle.html #AUTOTHROTTLE_ENABLED = True # The initial download delay #AUTOTHROTTLE_START_DELAY = 5 # The maximum download delay to be set in case of high latencies #AUTOTHROTTLE_MAX_DELAY = 60 # The average number of requests Scrapy should be sending in parallel to # each remote server #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 # Enable showing throttling stats for every response received: #AUTOTHROTTLE_DEBUG = False # Enable and configure HTTP caching (disabled by default) # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings #HTTPCACHE_ENABLED = True #HTTPCACHE_EXPIRATION_SECS = 0 #HTTPCACHE_DIR = 'httpcache' #HTTPCACHE_IGNORE_HTTP_CODES = [] #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' FEED_EXPORT_ENCODING='utf-8'