• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.html_unescape函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中module.utils.html_unescape函数的典型用法代码示例。如果您正苦于以下问题:Python html_unescape函数的具体用法?Python html_unescape怎么用?Python html_unescape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了html_unescape函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: handleWebLinks

    def handleWebLinks(self):
        self.logDebug("Search for Web links ")

        package_links = []
        pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="([^"]*?)"[^>]*?name="file"'
        ids = re.findall(pattern, self.html, re.I | re.S)

        self.logDebug("Decrypting %d Web links" % len(ids))

        for idx, weblink_id in enumerate(ids):
            try:
                self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id))

                res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id})

                indexs = res.find("window.location =") + 19
                indexe = res.find('"', indexs)

                link2 = res[indexs:indexe]

                self.logDebug(link2)

                link2 = html_unescape(link2)
                package_links.append(link2)

            except Exception, detail:
                self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))
开发者ID:kurtiss,项目名称:htpc,代码行数:27,代码来源:LinkCryptWs.py


示例2: downloadFile

    def downloadFile(self, pyfile):
        url = pyfile.url

        for i in range(5):
            header = self.load(url, just_header=True)

            # self.load does not raise a BadHeader on 404 responses, do it here
            if 'code' in header and header['code'] == 404:
                raise BadHeader(404)

            if 'location' in header:
                self.logDebug("Location: " + header['location'])
                url = unquote(header['location'])
            else:
                break

        name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))

        if 'content-disposition' in header:
            self.logDebug("Content-Disposition: " + header['content-disposition'])
            m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition'])
            if m:
                disp = m.groupdict()
                self.logDebug(disp)
                if not disp['enc']:
                    disp['enc'] = 'utf-8'
                name = remove_chars(disp['name'], "\"';").strip()
                name = unicode(unquote(name), disp['enc'])

        if not name:
            name = url
        pyfile.name = name
        self.logDebug("Filename: %s" % pyfile.name)
        self.download(url, disposition=True)
开发者ID:DasLampe,项目名称:pyload,代码行数:34,代码来源:BasePlugin.py


示例3: handleCaptcha

 def handleCaptcha(self, inputs):
     found = re.search(self.RECAPTCHA_URL_PATTERN, self.html)
     if found:
         recaptcha_key = unquote(found.group(1))
         self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key)
         recaptcha = ReCaptcha(self)
         inputs["recaptcha_challenge_field"], inputs["recaptcha_response_field"] = recaptcha.challenge(recaptcha_key)
         return 1
     else:
         found = re.search(self.CAPTCHA_URL_PATTERN, self.html)
         if found:
             captcha_url = found.group(1)
             inputs["code"] = self.decryptCaptcha(captcha_url)
             return 2
         else:
             found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S)
             if found:
                 captcha_div = found.group(1)
                 self.logDebug(captcha_div)
                 numerals = re.findall(
                     r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>", html_unescape(captcha_div)
                 )
                 inputs["code"] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))])
                 self.logDebug("CAPTCHA", inputs["code"], numerals)
                 return 3
             else:
                 found = re.search(self.SOLVEMEDIA_PATTERN, self.html)
                 if found:
                     captcha_key = found.group(1)
                     captcha = SolveMedia(self)
                     inputs["adcopy_challenge"], inputs["adcopy_response"] = captcha.challenge(captcha_key)
                     return 4
     return 0
开发者ID:japhigu,项目名称:pyload,代码行数:33,代码来源:XFileSharingPro.py


示例4: process

    def process(self, pyfile):
        self.prepare()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                self.html = self.load(pyfile.url, cookies = False, decode = True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.location = self.getDirectDownloadLink()

            if not self.file_info:
                pyfile.name = html_unescape(unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1]))

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()
开发者ID:4Christopher,项目名称:pyload,代码行数:26,代码来源:XFileSharingPro.py


示例5: handlePremium

 def handlePremium(self):
     found = re.search(self.PREMIUM_URL_PATTERN, self.html)
     if not found: self.parseError("Premium URL")
     url = html_unescape(found.group(1))
     self.logDebug("Premium URL: " + url)        
     if not url.startswith("http://"): self.resetAccount()
     self.download(url)
开发者ID:beefone,项目名称:pyload,代码行数:7,代码来源:CoolshareCz.py


示例6: process

    def process(self, pyfile):
        self.prepare()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                # Due to a 0.4.9 core bug self.load would use cookies even if
                # cookies=False. Workaround using getURL to avoid cookies.
                # Can be reverted in 0.5 as the cookies bug has been fixed.
                self.html = getURL(pyfile.url, decode=True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.location = self.getDirectDownloadLink()

            if not self.file_info:
                pyfile.name = html_unescape(
                    unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
                )

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()
开发者ID:wangjun,项目名称:pyload,代码行数:31,代码来源:XFileSharingPro.py


示例7: getFileInfo

    def getFileInfo(self):
        self.logDebug("URL: %s" % self.pyfile.url)
        if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html):
            self.tempOffline()

        name, size, status = parseFileInfo(self)[:3]

        if status == 1:
            self.offline()
        elif status != 2:
            self.logDebug(self.file_info)
            self.parseError('File info')

        if name:
            self.pyfile.name = name
        else:
            self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1])

        if size:
            self.pyfile.size = size
        else:
            self.logError("File size not parsed")

        self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size))
        return self.file_info
开发者ID:Dmanugm,项目名称:pyload,代码行数:25,代码来源:SimpleHoster.py


示例8: handleCaptcha

 def handleCaptcha(self, inputs):
     captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1)
     self.logDebug(captcha_div)
     numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))
     inputs['code'] = "".join([a[1] for a in sorted(numerals, key = lambda num: int(num[0]))])
     self.logDebug("CAPTCHA", inputs['code'], numerals)
     return 3
开发者ID:4Christopher,项目名称:pyload,代码行数:7,代码来源:RarefileNet.py


示例9: proceed

 def proceed(self, url, location):
     url = self.parent.url
     self.html = self.load(url)
     link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
     for id in link_ids:
         new_link = html_unescape(
             re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
         self.urls.append(new_link)
开发者ID:earthGavinLee,项目名称:pyload,代码行数:8,代码来源:OneKhDe.py


示例10: get_file_name

    def get_file_name(self):
        try:
            name =  self.api["name"]
        except KeyError:
            file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
            name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]

        return html_unescape(name)
开发者ID:beefone,项目名称:pyload,代码行数:8,代码来源:MegauploadCom.py


示例11: load

    def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, req=None):
        """
        Load content at url and returns it

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param just_header: If True only the header will be retrieved and returned as dict
        :param decode: Wether to decode the output according to http header, should be True in most cases
        :return: Loaded content
        """
        if hasattr(self, 'pyfile') and self.pyfile.abort:
            self.abort()

        url = fixurl(url)

        if not url or not isinstance(url, basestring):
            self.fail(_("No url given"))

        if self.pyload.debug:
            self.log_debug("LOAD URL " + url,
                           *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")])

        if req is None:
            req = self.req or self.pyload.requestFactory.getRequest(self.__name__)

        #@TODO: Move to network in 0.4.10
        if hasattr(self, 'COOKIES') and isinstance(self.COOKIES, list):
            set_cookies(req.cj, self.COOKIES)

        res = req.load(url, get, post, ref, bool(cookies), just_header, multipart, decode is True)  #@TODO: Fix network multipart in 0.4.10

        #@TODO: Move to network in 0.4.10
        if decode:
            res = html_unescape(res)

        #@TODO: Move to network in 0.4.10
        if isinstance(decode, basestring):
            res = decode(res, decode)

        if self.pyload.debug:
            frame = inspect.currentframe()
            framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno))
            try:
                if not exists(os.path.join("tmp", self.__name__)):
                    os.makedirs(os.path.join("tmp", self.__name__))

                with open(framefile, "wb") as f:
                    del frame  #: Delete the frame or it wont be cleaned
                    f.write(encode(res))

            except IOError, e:
                self.log_error(e)
开发者ID:earthGavinLee,项目名称:pyload,代码行数:55,代码来源:Plugin.py


示例12: getPackageNameAndFolder

    def getPackageNameAndFolder(self):
        if hasattr(self, 'TITLE_PATTERN'):
            m = re.search(self.TITLE_PATTERN, self.html)
            if m:
                name = folder = html_unescape(m.group('title').strip())
                self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder))
                return name, folder

        name = self.pyfile.package().name
        folder = self.pyfile.package().folder
        self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder))
        return name, folder
开发者ID:3DMeny,项目名称:pyload,代码行数:12,代码来源:SimpleCrypter.py


示例13: handle_free

    def handle_free(self, pyfile):
        fileid = re.search(self.FILEID_PATTERN, self.html).group(1)
        self.log_debug("FileID: " + fileid)

        token = re.search(self.TOKEN_PATTERN, self.html).group(1)
        self.log_debug("Token: " + token)

        self.html = self.load("http://lolabits.es/action/License/Download",
                              post={'fileId'                     : fileid,
                                    '__RequestVerificationToken' : token},
                              decode="unicode-escape")

        self.link = html_unescape(re.search(self.LINK_PATTERN, self.html).group(1))
开发者ID:earthGavinLee,项目名称:pyload,代码行数:13,代码来源:LolabitsEs.py


示例14: _translateAPIFileInfo

def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap):
    
    # Translate
    fileInfo = {}
    try:
        fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
        fileInfo['name'] = html_unescape(apiFileDataMap['n'])
        fileInfo['size'] = int(apiFileDataMap['s'])
        fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]        
    except:
        pass

    return fileInfo
开发者ID:beefone,项目名称:pyload,代码行数:13,代码来源:MegauploadCom.py


示例15: getInfo

def getInfo(urls):
    for chunk in chunks(urls, 80):
        result = []

        api = getAPIData(chunk)

        for data in api.itervalues():
            if data[0] == "online":
                result.append((html_unescape(data[2]), data[1], 2, data[4]))

            elif data[0] == "offline":
                result.append((data[4], 0, 1, data[4]))

        yield result
开发者ID:masterwaster,项目名称:pyload,代码行数:14,代码来源:UploadedTo.py


示例16: parseFileInfo

def parseFileInfo(self, url = '', html = ''):
    if not html and hasattr(self, "html"): html = self.html
    name, size, status, found, fileid = url, 0, 3, None, None

    if re.search(self.FILE_OFFLINE_PATTERN, html):
        # File offline
        status = 1
    else:
        found = re.search(self.FILE_INFO_PATTERN, html)
        if found:
            name, fileid = html_unescape(found.group('N')), found.group('ID')
            size = parseFileSize(found.group('S'))
            status = 2

    return name, size, status, fileid
开发者ID:masterwaster,项目名称:pyload,代码行数:15,代码来源:UploadedTo.py


示例17: process

    def process(self, pyfile):
        if not hasattr(self, "HOSTER_NAME"):
            self.HOSTER_NAME = re.search(self.__pattern__, self.pyfile.url).group(1)
        if not hasattr(self, "DIRECT_LINK_PATTERN"):
            self.DIRECT_LINK_PATTERN = (
                r'(http://(\w+\.%s|\d+\.\d+\.\d+\.\d+)(:\d+/d/|/files/\d+/\w+/)[^"\'<]+)' % self.HOSTER_NAME
            )

        self.captcha = self.errmsg = None
        self.passwords = self.getPassword().splitlines()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                self.html = self.load(pyfile.url, cookies=False, decode=True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.req.http.lastURL = self.pyfile.url

            self.req.http.c.setopt(FOLLOWLOCATION, 0)
            self.html = self.load(self.pyfile.url, cookies=True, decode=True)
            self.header = self.req.http.header
            self.req.http.c.setopt(FOLLOWLOCATION, 1)

            self.location = None
            found = re.search("Location\s*:\s*(.*)", self.header, re.I)
            if found and re.match(self.DIRECT_LINK_PATTERN, found.group(1)):
                self.location = found.group(1).strip()

            if not self.file_info:
                pyfile.name = html_unescape(
                    unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
                )

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()
开发者ID:tetratec,项目名称:shareacc,代码行数:46,代码来源:XFileSharingPro.py


示例18: handleShow

    def handleShow(self, url):
        src = self.getSJSrc(url)
        soup = BeautifulSoup(src)
        packageName = self.pyfile.package().name
        if self.config.get("changeNameSJ") == "Show":
            found = html_unescape(soup.find("h2").find("a").string.split(" &#8211;")[0])
            if found:
                packageName = found

        nav = soup.find("div", attrs={"id": "scb"})

        package_links = []
        for a in nav.findAll("a"):
            if self.config.get("changeNameSJ") == "Show":
                package_links.append(a["href"])
            else:
                package_links.append(a["href"] + "#hasName")
        if self.config.get("changeNameSJ") == "Show":
            self.packages.append((packageName, package_links, packageName))
        else:
            self.packages.append((self.pyfile.package().name, package_links, self.pyfile.package().name))
开发者ID:J-Ha,项目名称:pyload-stuff,代码行数:21,代码来源:SerienjunkiesOrg.py


示例19: process

    def process(self, pyfile):
        self.req.cj.setCookie("uploaded.net", "lang", "en") # doesn't work anymore
        self.load("http://uploaded.net/language/en")

        api = getAPIData([pyfile.url])

        # TODO: fallback to parse from site, because api sometimes delivers wrong status codes

        if not api:
            self.logWarning("No response for API call")

            self.html = unicode(self.load(pyfile.url, decode = False), 'iso-8859-1')
            name, size, status, self.fileID = parseFileInfo(self)
            self.logDebug(name, size, status, self.fileID)
            if status == 1:
                self.offline()
            elif status == 2:
                pyfile.name, pyfile.size = name, size
            else:
                self.fail('Parse error - file info')
        elif api == 'Access denied':
            self.fail(_("API key invalid"))

        else:
            if self.fileID not in api:
                self.offline()

            self.data = api[self.fileID]
            if self.data[0] != "online":
                self.offline()

            pyfile.name = html_unescape(self.data[2])

        # self.pyfile.name = self.get_file_name()

        if self.premium:
            self.handlePremium()
        else:
            self.handleFree()
开发者ID:beefone,项目名称:pyload,代码行数:39,代码来源:UploadedTo.py


示例20: process

    def process(self, pyfile):
        html = self.load(pyfile.url, decode=True)

        if re.search(r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html):
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()

        #get config
        use3d = self.getConfig("3d")
        if use3d:
            quality = {"sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82,
                       "480p": 82, "720p": 84, "1080p": 85, "3072p": 85}
        else:
            quality = {"sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18,
                       "480p": 35, "720p": 22, "1080p": 37, "3072p": 38}
        desired_fmt = self.getConfig("fmt")
        if desired_fmt and desired_fmt not in self.formats:
            self.logWarning("FMT %d unknown - using default." % desired_fmt)
            desired_fmt = 0
        if not desired_fmt:
            desired_fmt = quality.get(self.getConfig("quality"), 18)

        #parse available streams
        streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",', html).group(1)
        streams = [x.split('\u0026') for x in streams.split(',')]
        streams = [dict((y.split('=', 1)) for y in x) for x in streams]
        streams = [(int(x['itag']), unquote(x['url'])) for x in streams]
        #self.logDebug("Found links: %s" % streams)
        self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams])

        #build dictionary of supported itags (3D/2D)
        allowed = lambda x: self.getConfig(self.formats[x][0])
        streams = [x for x in streams if x[0] in self.formats and allowed(x[0])]
        if not streams:
            self.fail("No available stream meets your preferences")
        fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams)

        self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" %
                      (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt],
                       "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT "))

        #return fmt nearest to quality index
        if desired_fmt in fmt_dict and allowed(desired_fmt):
            fmt = desired_fmt
        else:
            sel = lambda x: self.formats[x][3]  # select quality index
            comp = lambda x, y: abs(sel(x) - sel(y))

            self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()])
            fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and
                         sel(x) > sel(y) else y, fmt_dict.keys())

        self.logDebug("Chosen fmt: %s" % fmt)
        url = fmt_dict[fmt]
        self.logDebug("URL: %s" % url)

        #set file name
        file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv"
        file_name_pattern = '<meta name="title" content="(.+?)">'
        name = re.search(file_name_pattern, html).group(1).replace("/", "")

        # Cleaning invalid characters from the file name
        name = name.encode('ascii', 'replace')

        pyfile.name = html_unescape(name)

        time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url)
        ffmpeg = which("ffmpeg")
        if ffmpeg and time:
            m, s = time.groups()[1:]
            if not m:
                m = "0"

            pyfile.name += " (starting at %s:%s)" % (m, s)
        pyfile.name += file_suffix

        filename = self.download(url)

        if ffmpeg and time:
            inputfile = filename + "_"
            os.rename(filename, inputfile)

            subprocess.call([
                ffmpeg,
                "-ss", "00:%s:%s" % (m, s),
                "-i", inputfile,
                "-vcodec", "copy",
                "-acodec", "copy",
                filename])
            os.remove(inputfile)
开发者ID:ASCIIteapot,项目名称:pyload,代码行数:92,代码来源:YoutubeCom.py



注:本文中的module.utils.html_unescape函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.parseFileSize函数代码示例发布时间:2022-05-27
下一篇:
Python utils.fs_encode函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap