• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python scraper_utils.urljoin函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中salts_lib.scraper_utils.urljoin函数的典型用法代码示例。如果您正苦于以下问题:Python urljoin函数的具体用法?Python urljoin怎么用?Python urljoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了urljoin函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'})
     if fragment:
         iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
         if iframe_url:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src'])
             headers = {'Referer': page_url}
             html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_sources_list(self, html)
             for source in sources:
                 quality = sources[source]['quality']
                 host = scraper_utils.get_direct_hostname(self, source)
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url})
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 match = re.search('(\d+[a-z]bps)', source)
                 if match:
                     hoster['extra'] = match.group(1)
                 hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:26,代码来源:snagfilms_scraper.py


示例2: get_sources

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        
        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)
        
        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0)
        for source, value in scraper_utils.parse_json(html, vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
            hosters.append(hoster)
        return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:30,代码来源:xmovies8_scraper.py


示例3: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.EPISODE:
            gk_html = ''.join(match.group(0) for match in re.finditer('<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I))
        else:
            gk_html = html
        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url)
        sources.update(self.__get_ht_links(html, page_url))
        
        for stream_url, quality in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
            else:
                host = urlparse.urlparse(stream_url).hostname
                direct = False
            
            if host is None: continue
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)

        return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:30,代码来源:moviesub_scraper.py


示例4: search

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/results')
        params = {'q': title}
        referer = search_url + '?' + urllib.urlencode(params)
        headers = {'Referer': referer}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        cookies = {'begin_referer': referer, 'prounder': 1}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')):
            html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0)
            
        for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}):
            title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'})
            year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'})
            if not title_frag: continue
            match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href')
            if not match: continue
            match_url = match[0].attrs['href']
            match_title = match[0].content
            try:
                match = re.search('\s+(\d{4})\s+', year_frag[0].content)
                match_year = match.group(1)
            except:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:32,代码来源:xmovies8_scraper.py


示例5: __get_source_page

 def __get_source_page(self, video_type, page_url):
     match = re.search('/movie/(.*?)-(\d+)\.html', page_url)
     if not match: return '', '', ''
     slug, movie_id = match.groups()
     
     vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series'
     qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type)
     qp_url = scraper_utils.urljoin(self.base_url, qp_url)
     headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)}
     headers.update(XHR)
     html = self._http_get(qp_url, headers=headers, cache_limit=8)
     watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href')
     if not watching_url: return '', '', ''
     
     watching_url = watching_url[0].attrs['href']
     page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8)
     for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'):
         _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8)
     
     sl_url = SL_URL.format(movie_id=movie_id)
     sl_url = scraper_utils.urljoin(self.base_url, sl_url)
     html = self._http_get(sl_url, headers=headers, cache_limit=8)
     js_data = scraper_utils.parse_json(html, sl_url)
     try: html = js_data['html']
     except: html = ''
     return movie_id, watching_url, html
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:26,代码来源:yesmovies_scraper.py


示例6: get_sources

 def get_sources(self, video):
     hosters = []
     sources = {}
     today = datetime.datetime.today().date()
     max_age = today - datetime.timedelta(days=self.filter)
     if video.ep_airdate and max_age < video.ep_airdate:
         day_after = video.ep_airdate + datetime.timedelta(days=1)
         for day in [day_after, video.ep_airdate]:
             if day < today:
                 page_url = EP_PAGE % (day.strftime('%Y.%m.%d'))
                 page_url = scraper_utils.urljoin(self.base_url, page_url)
                 html = self._http_get(page_url, require_debrid=True, cache_limit=30 * 24)
                 sources.update(self.__get_sources(video, html))
             if sources: break
             
         if not sources and kodi.get_setting('scraper_url'):
             page_url = scraper_utils.urljoin(self.base_url, '/index.html')
             html = self._http_get(page_url, require_debrid=True, cache_limit=2)
             sources.update(self.__get_sources(video, html))
         
     for source in sources:
         host = urlparse.urlparse(source).hostname
         hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False}
         hosters.append(hoster)
     return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:25,代码来源:tvhd_scraper.py


示例7: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     pages = self.__get_alt_pages(html, page_url)
     sources = self.__get_sources(html, page_url, pages.get(page_url, True))
     for page in pages:
         if page == page_url: continue
         page_url = scraper_utils.urljoin(self.base_url, page, pages[page])
         html = self._http_get(page_url, cache_limit=1)
         sources.update(self.__get_sources(html, page, pages[page]))
         
     for stream_url, values in sources.iteritems():
         host = scraper_utils.get_direct_hostname(self, stream_url)
         if host == 'gvideo':
             quality = scraper_utils.gv_get_quality(stream_url)
             direct = True
         elif values['direct']:
             quality = values['quality']
             direct = True
         else:
             quality = values['quality']
             direct = False
             host = urlparse.urlparse(stream_url).hostname
         
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
         if values['subs']: hoster['subs'] = 'Turkish Subtitles'
         hosters.append(hoster)
             
     return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:32,代码来源:dizist_scraper.py


示例8: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'Referer': page_url}
        html = self._http_get(page_url, headers=headers, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
            if fragment:
                movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                if movie_url:
                    page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href'])
                    html = self._http_get(page_url, cache_limit=.5)
                    episodes = self.__get_episodes(html)
                    page_url = self.__get_best_page(episodes)
                    if not page_url:
                        return hosters
                    else:
                        page_url = scraper_utils.urljoin(self.base_url, page_url)
                        html = self._http_get(page_url, cache_limit=.5)
        
        streams = dom_parser2.parse_dom(html, 'iframe', req='src')
        if streams:
            streams = [(attrs['src'], 480) for attrs, _content in streams]
            direct = False
        else:
            streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])]
            direct = True
            
        headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}
        for stream_url, height in streams:
            if 'video.php' in stream_url or 'moviexk.php' in stream_url:
                if 'title=' in stream_url:
                    title = stream_url.split('title=')[-1]
                    stream_url = stream_url.replace(title, urllib.quote(title))
                redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0)
                if redir_url.startswith('http'):
                    redir_url = redir_url.replace(' ', '').split(';codec')[0]
                    stream_url = redir_url
                else:
                    continue
            
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(height)
                stream_url += scraper_utils.append_headers(headers)
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(height)
            
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
            hosters.append(source)

        return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:58,代码来源:moviexk_scraper.py


示例9: __login

 def __login(self):
     url = scraper_utils.urljoin(self.base_url, '/apis/v2/user/login.json')
     data = {'email': self.username, 'password': self.password, 'rememberMe': True}
     referer = scraper_utils.urljoin(self.base_url, '/login')
     headers = {'Content-Type': 'application/json', 'Referer': referer}
     headers.update(XHR)
     html = super(self.__class__, self)._http_get(url, data=json.dumps(data), headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     return js_data.get('status') == 'success'
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:9,代码来源:snagfilms_scraper.py


示例10: _get_episode_url

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % (video.season, video.episode)
     title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)'
     headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)}
     season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season))
     season_url = scraper_utils.urljoin(self.base_url, season_url)
     html = self._http_get(season_url, headers=headers, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'})
     return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:9,代码来源:flixanity_scraper.py


示例11: _get_episode_url

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=24)
     show_id = dom_parser2.parse_dom(html, 'div', {'id': 'icerikid'}, req='value')
     if show_id:
         episode_pattern = 'href="([^"]*-%s-sezon-%s-bolum[^"]*)"' % (video.season, video.episode)
         title_pattern = 'href="(?P<url>[^"]+)[^>]*class="realcuf".*?class="realcuf">(?P<title>[^<]*)'
         season_url = scraper_utils.urljoin(self.base_url, SEASON_URL)
         data = {'sezon_id': video.season, 'dizi_id': show_id[0].attrs['value'], 'tip': 'dizi', 'bolumid': ''}
         html = self._http_get(season_url, data=data, headers=XHR, cache_limit=2)
         return self._default_get_episode_url(html, video, episode_pattern, title_pattern)
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:11,代码来源:diziay_scraper.py


示例12: _get_episode_url

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=24)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
     if not fragment: return
     show_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
     if not show_url: return
     show_url = scraper_utils.urljoin(self.base_url, show_url[0].attrs['href'])
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers'})
     episode_pattern = 'href="([^"]+)[^>]+>[Ee][Pp]\s*(?:[Ss]0*%s-)?E?p?0*%s(?!\d)' % (video.season, video.episode)
     return self._default_get_episode_url(fragment or html, video, episode_pattern)
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:12,代码来源:moviexk_scraper.py


示例13: _http_get

 def _http_get(self, url, params=None, data=None, multipart_data=None, headers=None, cookies=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8):
     real_url = scraper_utils.urljoin(self.base_url, url)
     html = super(self.__class__, self)._http_get(real_url, params=params, data=data, multipart_data=multipart_data, headers=headers, cookies=cookies,
                                                  allow_redirect=allow_redirect, method=method, require_debrid=require_debrid, read_error=read_error,
                                                  cache_limit=cache_limit)
     if self.__update_base_url(html):
         real_url = scraper_utils.urljoin(self.base_url, url)
         html = super(self.__class__, self)._http_get(real_url, params=params, data=data, multipart_data=multipart_data, headers=headers,
                                                      cookies=cookies, allow_redirect=allow_redirect, method=method, require_debrid=require_debrid,
                                                      read_error=read_error, cache_limit=cache_limit)
     
     return html
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:12,代码来源:2ddl_scraper.py


示例14: _get_episode_url

    def _get_episode_url(self, show_url, video):
        show_url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(show_url, headers={'Referer': self.base_url}, cache_limit=24 * 7)
        match = re.search('href="([^"]*season=0*%s(?!\d))[^"]*' % (video.season), html)
        if not match: return

        episode_pattern = 'href="([^"]*/0*%s-0*%s/[^"]*)' % (video.season, video.episode)
        title_pattern = 'href="(?P<url>[^"]+)[^>]*>\s*(?P<title>.*?)\s*</a>'
        season_url = scraper_utils.urljoin(show_url, match.group(1))
        html = self._http_get(season_url, headers={'Referer': show_url}, cache_limit=2)
        episodes = dom_parser2.parse_dom(html, 'div', {'class': 'episodeDetail'})
        fragment = '\n'.join(ep.content for ep in episodes)
        return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:13,代码来源:putlocker_scraper.py


示例15: __add_torrent

 def __add_torrent(self, hash_id):
     list_url = scraper_utils.urljoin(self.base_url, LIST_URL)
     js_data = self._json_get(list_url, cache_limit=0)
     for transfer in js_data.get('transfers', []):
         if transfer['hash'].lower() == hash_id:
             return True
      
     add_url = scraper_utils.urljoin(self.base_url, ADD_URL)
     data = {'src': MAGNET_LINK % hash_id}
     js_data = self._json_get(add_url, data=data, cache_limit=0)
     if js_data.get('status') == 'success':
         return True
     else:
         return False
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:14,代码来源:premiumize_scraper.py


示例16: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'img', req=['data-id', 'data-name']):
            film_id, data_name = attrs['data-id'], attrs['data-name']
            data = {'id': film_id, 'n': data_name}
            server_url = scraper_utils.urljoin(self.base_url, SERVER_URL)
            server_url = server_url % (film_id)
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5)
            for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'):
                data = {'epid': attrs['data-id']}
                ep_url = scraper_utils.urljoin(self.base_url, EP_URL)
                ep_url = ep_url % (attrs['data-id'])
                headers = {'Referer': page_url}
                headers.update(XHR)
                html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5)
                js_data = scraper_utils.parse_json(html, ep_url)
                try:
                    links = [r.attrs['src'] for r in dom_parser2.parse_dom(js_data['link']['embed'], 'iframe', req='src')]
                except:
                    try: links = js_data['link']['l']
                    except: links = []
                try: heights = js_data['link']['q']
                except: heights = []
                for stream_url, height in map(None, links, heights):
                    match = re.search('movie_url=(.*)', stream_url)
                    if match:
                        stream_url = match.group(1)
                        
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                        stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                        direct = True
                    else:
                        host = urlparse.urlparse(stream_url).hostname
                        if height:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HD720
                        direct = False
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    sources.append(source)

        return sources
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:50,代码来源:hdmoviefree_scraper.py


示例17: resolve_link

 def resolve_link(self, link):
     url = scraper_utils.urljoin(self.base_url, link)
     html = self._http_get(url, cache_limit=.5)
     match = re.search('"file"\s*:\s*"([^"]+)', html)
     if match:
         file_link = match.group(1)
         stream_url = scraper_utils.urljoin(self.base_url, file_link)
         cj = self._set_cookies(self.base_url, {})
         request = urllib2.Request(stream_url)
         request.add_header('User-Agent', scraper_utils.get_ua())
         request.add_unredirected_header('Host', request.get_host())
         request.add_unredirected_header('Referer', url)
         cj.add_cookie_header(request)
         response = urllib2.urlopen(request)
         return response.geturl()
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:15,代码来源:noobroom_scraper.py


示例18: get_sources

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        hosters += self.__get_sources(html, url)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'parts-middle'})
        if fragment:
            for attrs, _content in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
                url = scraper_utils.urljoin(self.base_url, attrs['href'])
                html = self._http_get(url, cache_limit=8)
                hosters += self.__get_sources(html, url)

        return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:15,代码来源:veocube_scraper.py


示例19: _get_episode_url

 def _get_episode_url(self, show_url, video):
     season_url = show_url
     if video.season != 1:
         show_url = scraper_utils.urljoin(self.base_url, show_url)
         html = self._http_get(show_url, cache_limit=24)
         fragment = dom_parser2.parse_dom(html, 'div', {'class': 'page-numbers'})
         if fragment:
             match = re.search('href="([^"]+-%s-sezon[^"]*)' % (video.season), fragment[0].content)
             if match:
                 season_url = match.group(1)
         
     episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % (video.season, video.episode)
     season_url = scraper_utils.urljoin(self.base_url, season_url)
     html = self._http_get(season_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'posts-list'})
     return self._default_get_episode_url(fragment or html, video, episode_pattern)
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:16,代码来源:onlinedizi_scraper.py


示例20: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
     html = self._http_get(js_url, cache_limit=48)
     if source_url.startswith('/'):
         source_url = source_url[1:]
     pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url)
     match = re.search(pattern, html, re.I)
     if match:
         stream_url = match.group(1)
         if 'drive.google' in stream_url or 'docs.google' in stream_url:
             sources = scraper_utils.parse_google(self, stream_url)
         else:
             sources = [stream_url]
         
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
             host = scraper_utils.get_direct_hostname(self, source)
             if host == 'gvideo':
                 quality = scraper_utils.gv_get_quality(source)
                 direct = True
             elif 'youtube' in stream_url:
                 quality = QUALITIES.HD720
                 direct = False
                 host = 'youtube.com'
             else:
                 quality = QUALITIES.HIGH
                 direct = True
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
             hosters.append(hoster)
     return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:33,代码来源:piratejunkies_scraper.py



注:本文中的salts_lib.scraper_utils.urljoin函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python trans_utils.i18n函数代码示例发布时间:2022-05-27
下一篇:
Python scraper_utils.pathify_url函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap