# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.

import oss2
from papernews_spider.Module.generateID import IdWorker


class Tools:

    # 上传阿里云oss存储,参数为数据流
    def put(self, directory, datalist):  # (oss文件位置与命名，传入的数据流）
        # 阿里云账号AccessKey拥有所有API的访问权限，风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维，请登录RAM控制台创建RAM用户。
        auth = oss2.Auth('LTAI5tRJKZvY8Switqrb3366', 'qAi7Hdrvvc7WLuvOr9n2g5PuBs3Vhn')
        # yourEndpoint填写Bucket所在地域对应的Endpoint。以华南1（深圳）为例，Endpoint填写为https://oss-cn-hangzhou.aliyuncs.com。
        # 填写Bucket名称。
        endpoint = 'oss-cn-shenzhen.aliyuncs.com'  # 外网
        # endpoint = 'oss-cn-shenzhen-internal.aliyuncs.com'  # 内网
        bucket = oss2.Bucket(auth, endpoint, 'qn-data-lake')

        # 生成上传数据
        id = IdWorker(1, 2, 0)
        new_id = str(id.get_id()) + ".txt"
        data = ''
        for item in datalist:
            data = data + item
        bucket.put_object(directory + new_id, data)


    # 写文件方法 参数(self，生成文件名，xpath返回的list)
    def write_txt(self, filename, listname):
        file = open(filename, 'a', encoding='UTF-8')
        for item in listname:
            if not (item == '\r\n        '
                    or item == '\r\n       '
                    or item == '\r\n          '
                    or item == '\r\t'
                    or item == '\r\r\n        \r\n        \r\n        '
                    or item == '\r\r\n      \r\n          '
                    or item == '\r\n        \r\n          '
                    or item == '\r\r\n      \r\n         \r\n     '):  # 清洗数据
                file.write(item)
                file.write("\n")
                # print(item)
        file.close()

    # url管理器(newlist:新获取的网页，oldurl是旧的记录url的文件)
    def url_manage(self, newlist, oldurl):
        """
        return：尚未爬取的链接
        """
        # 打开两个文件
        newfile = open(newlist, 'r', encoding='utf-8')
        oldfile = open(oldurl, 'r', encoding='utf-8')
        # 获取流
        new = newfile.readlines()
        old = oldfile.readlines()

        newfile.close()
        oldfile.close()
        # 返回差集
        difference = set(new).difference(old)

        # # 更新oldPriceUrl
        # Tools.write_txt(Tools.__init__(self), "oldPriceUrl.txt", new)
        return difference

    # 清洗数据方法(self,要更改的字符串，更变成的数据，传入的数据list，)
    def cleantxt(self, cleancontent, replacecontent, lists=list):

        # 清洗数据
        for new in lists:
            new = [i.replace(cleancontent, replacecontent) for i in lists]
            return new