本文整理汇总了Python中salts_lib.scraper_utils.get_ua函数的典型用法代码示例。如果您正苦于以下问题:Python get_ua函数的具体用法?Python get_ua怎么用?Python get_ua使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_ua函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
sources = {}
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=.5)
fragment = dom_parser.parse_dom(html, 'div', {'class': 'videos'})
if fragment:
for match in re.finditer('href="([^"]+)[^>]*>([^<]+)', fragment[0]):
page_url, page_label = match.groups()
page_label = page_label.lower()
if page_label not in ALLOWED_LABELS: continue
sources = self.__get_sources(page_url, ALLOWED_LABELS[page_label])
for source in sources:
host = self._get_direct_hostname(source)
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(source)
direct = True
stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
elif sources[source]['direct']:
quality = sources[source]['quality']
direct = True
stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
else:
quality = sources[source]['quality']
direct = False
host = urlparse.urlparse(source).hostname
stream_url = source
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
if sources[source]['subs']: hoster['subs'] = 'Turkish Subtitles'
hosters.append(hoster)
return hosters
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:35,代码来源:maksi_scraper.py
示例2: resolve_link
def resolve_link(self, link):
try:
headers = dict([item.split('=') for item in (link.split('|')[1]).split('&')])
for key in headers: headers[key] = urllib.unquote(headers[key])
link = link.split('|')[0]
except:
headers = {}
if not link.startswith('http'):
link = urlparse.urljoin(self.base_url, link)
html = self._http_get(link, headers=headers, cache_limit=0)
fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'})
if fragment:
iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
if iframe_url:
iframe_url = iframe_url[0]
headers = {'Referer': link}
html = self._http_get(iframe_url, headers=headers, cache_limit=0)
sitekey = dom_parser.parse_dom(html, 'div', {'class': 'g-recaptcha'}, ret='data-sitekey')
if sitekey:
token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(sitekey[0], lang='en')
if token:
data = {'g-recaptcha-response': token}
html = self._http_get(iframe_url, data=data, cache_limit=0)
log_utils.log(html)
match = re.search("\.replace\(\s*'([^']+)'\s*,\s*'([^']*)'\s*\)", html, re.I)
if match:
html = html.replace(match.group(1), match.group(2))
match = re.search("window\.atob[\([]+'([^']+)", html)
if match:
func_count = len(re.findall('window\.atob', html))
html = match.group(1)
for _i in xrange(func_count):
html = base64.decodestring(html)
streams = []
for match in re.finditer('''<source[^>]+src=["']([^'"]+)[^>]+label=['"]([^'"]+)''', html):
streams.append(match.groups())
if len(streams) > 1:
if not self.auto_pick:
result = xbmcgui.Dialog().select(i18n('choose_stream'), [e[1] for e in streams])
if result > -1:
return streams[result][0] + '|User-Agent=%s' % (scraper_utils.get_ua())
else:
best_stream = ''
best_q = 0
for stream in streams:
stream_url, label = stream
if Q_ORDER[scraper_utils.height_get_quality(label)] > best_q:
best_q = Q_ORDER[scraper_utils.height_get_quality(label)]
best_stream = stream_url
if best_stream:
return best_stream + '|User-Agent=%s' % (scraper_utils.get_ua())
elif streams:
return streams[0][0] + '|User-Agent=%s' % (scraper_utils.get_ua())
开发者ID:dsjh,项目名称:tknorris-beta-repo,代码行数:60,代码来源:watchhd_scraper.py
示例3: __get_king_links
def __get_king_links(self, iframe_url):
hosters = []
match = re.search('v=(.*)', iframe_url)
if match:
data = {'ID': match.group(1)}
headers = {'Referer': iframe_url}
headers.update(XHR)
xhr_url = iframe_url.split('?')[0] + '?p=GetVideoSources'
html = self._http_get(xhr_url, data=data, headers=headers, cache_limit=.5)
js_data = scraper_utils.parse_json(html, xhr_url)
try:
for source in js_data['VideoSources']:
stream_url = source['file'] + '|User-Agent=%s' % (scraper_utils.get_ua())
host = self._get_direct_hostname(source['file'])
label = source.get('label', '')
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(source['file'])
elif label.isdigit():
quality = scraper_utils.height_get_quality(label)
else:
quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
hosters.append(hoster)
except:
pass
return hosters
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:27,代码来源:dizibox_scraper.py
示例4: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=.5)
match = re.search('var\s*video_id="([^"]+)', html)
if match:
video_id = match.group(1)
url = urlparse.urljoin(self.base_url, VIDEO_URL)
data = {'v': video_id}
headers = XHR
headers['Referer'] = page_url
html = self._http_get(url, data=data, headers=headers, cache_limit=.5)
sources = scraper_utils.parse_json(html, url)
for source in sources:
match = re.search('url=(.*)', sources[source])
if match:
stream_url = urllib.unquote(match.group(1))
host = self._get_direct_hostname(stream_url)
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(stream_url)
else:
quality = scraper_utils.height_get_quality(source)
stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
hosters.append(hoster)
return hosters
开发者ID:assli100,项目名称:kodi-openelec,代码行数:28,代码来源:xmovies8_scraper.py
示例5: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
sources = []
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=8)
q_str = dom_parser.parse_dom(html, 'div', {'class': 'poster-qulabel'})
if q_str:
q_str = q_str[0].replace(' ', '').upper()
page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
else:
page_quality = QUALITIES.HIGH
for fragment in dom_parser.parse_dom(html, 'div', {'class': 'tab_box'}):
match = re.search('file\s*:\s*"([^"]+)', fragment)
if match:
stream_url = match.group(1)
else:
stream_url = self.__get_ajax_sources(fragment, page_url)
if stream_url:
host = self._get_direct_hostname(stream_url)
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(stream_url)
else:
quality = page_quality
stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url))
source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
sources.append(source)
return sources
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:32,代码来源:moviego_scraper.py
示例6: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=8)
match = re.search('<b>Views:.*?([\d,]+)', html)
if match:
views = int(match.group(1).replace(',', ''))
else:
views = None
html = self.__get_watch_now(html)
for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html):
title, fragment = match.groups()
for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment):
stream_url, name = match.groups()
match = re.search('(\d+)', name)
if video.video_type == VIDEO_TYPES.MOVIE:
if match:
quality = scraper_utils.height_get_quality(match.group(1))
else:
quality = QUALITIES.HIGH
else:
if not match or int(name) != int(video.episode):
continue
quality = QUALITIES.HIGH
stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies())
hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
hoster['title'] = title
hosters.append(hoster)
return hosters
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:33,代码来源:watchhd_scraper.py
示例7: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
fragment = dom_parser.parse_dom(html, 'ul', {'class': 'css_server_new'})
if fragment:
for match in re.finditer('href="([^"]+)[^>]*>(.*?)(?:-\d+)?</a>', fragment[0]):
url, host = match.groups()
host = host.lower()
host = re.sub('<img.*?/>', '', host)
host = HOSTS.get(host, host)
log_utils.log('%s - %s' % (url, host))
if host in GVIDEO_NAMES:
sources = self.__get_links(urlparse.urljoin(self.base_url, url))
direct = True
else:
sources = {url: host}
direct = False
for source in sources:
if self._get_direct_hostname(source) == 'gvideo':
quality = scraper_utils.gv_get_quality(source)
source = source + '|User-Agent=%s' % (scraper_utils.get_ua())
else:
quality = scraper_utils.get_quality(video, source, QUALITIES.HIGH)
hoster = {'multi-part': False, 'host': sources[source], 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct}
hosters.append(hoster)
return hosters
开发者ID:henry73,项目名称:salts,代码行数:31,代码来源:view47_scraper.py
示例8: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
norm_title = scraper_utils.normalize_title(video.title)
if source_url and source_url != FORCE_NO_MATCH:
source_url = urlparse.urljoin(self.base_url2, source_url)
for line in self._get_files(source_url, cache_limit=24):
if not line['directory']:
match = {}
if video.video_type == VIDEO_TYPES.MOVIE:
match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
if norm_title in scraper_utils.normalize_title(match_title):
match = line
else:
_show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
if int(video.season) == int(season) and int(video.episode) == int(episode):
match = line
if 'dubbed' in extra.lower(): continue
if match:
stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
if 'x265' in extra: hoster['format'] = 'x265'
if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
hosters.append(hoster)
return hosters
开发者ID:monicarero,项目名称:repository.xvbmc,代码行数:27,代码来源:farda_scraper.py
示例9: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
sources = {}
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
sources = self.__get_gk_links(html, url)
if not sources:
sources = self.__get_gk_links2(html)
sources.update(self.__get_iframe_links(html))
for source in sources:
host = self._get_direct_hostname(source)
if host == 'gvideo':
direct = True
quality = sources[source]
stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
else:
direct = False
stream_url = source
if self.base_url in source:
host = sources[source]
quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
else:
host = urlparse.urlparse(source).hostname
quality = sources[source]
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
hosters.append(hoster)
return hosters
开发者ID:ScriptUp,项目名称:salts,代码行数:32,代码来源:tunemovie_scraper.py
示例10: __get_cloud_links
def __get_cloud_links(self, html, page_url, sub):
hosters = []
html = html.replace('\\"', '"').replace('\\/', '/')
match = re.search("dizi_kapak_getir\('([^']+)", html)
if match:
ep_id = match.group(1)
for script_url in dom_parser.parse_dom(html, 'script', {'data-cfasync': 'false'}, ret='src'):
html = self._http_get(script_url, cache_limit=24)
match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
if match1 and match2:
link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1))
headers = {'Referer': page_url}
html = self._http_get(link_url, headers=headers, cache_limit=.5)
js_data = scraper_utils.parse_json(html, link_url)
for variant in js_data.get('variants', {}):
stream_host = random.choice(variant.get('hosts', []))
if stream_host:
stream_url = STREAM_URL % (stream_host, variant['path'], scraper_utils.get_ua(), urllib.quote(page_url))
if not stream_url.startswith('http'):
stream_url = 'http://' + stream_url
host = self._get_direct_hostname(stream_url)
if 'width' in variant:
quality = scraper_utils.width_get_quality(variant['width'])
elif 'height' in variant:
quality = scraper_utils.height_get_quality(variant['height'])
else:
quality = QUALITIES.HIGH
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
hoster['subs'] = sub
hosters.append(hoster)
return hosters
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:32,代码来源:dizilab_scraper.py
示例11: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
norm_title = scraper_utils.normalize_title(video.title)
if source_url and source_url != FORCE_NO_MATCH:
source_url = urlparse.urljoin(self.base_url, source_url)
for line in self._get_files(source_url, cache_limit=24):
if not line['directory']:
match = {}
if video.video_type == VIDEO_TYPES.MOVIE:
meta = scraper_utils.parse_movie_link(line['link'])
if norm_title in scraper_utils.normalize_title(meta['title']):
match = line
elif self.__episode_match(line, video):
match = line
meta = scraper_utils.parse_episode_link(line['link'])
if match:
if meta['dubbed']: continue
stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
stream_url = stream_url.replace(self.base_url, '')
quality = scraper_utils.height_get_quality(meta['height'])
hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
if 'format' in meta: hoster['format'] = meta['format']
if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
hosters.append(hoster)
return hosters
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:28,代码来源:farda_scraper.py
示例12: __get_links
def __get_links(self, url):
sources = []
match = re.search('src="([^"]+)', url)
if match:
url = match.group(1).replace('\\/', '/')
html = self._http_get(url, cache_limit=0)
match = re.search('<script\s+src="([^\']+)\'\+(\d+)\+\'([^\']+)', html)
if match:
page_url = ''.join(match.groups())
page_url += str(random.random())
html = self._http_get(page_url, cache_limit=0)
for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"', html):
stream_url, height = match.groups()
stream_url = stream_url.replace('\\&', '&').replace('\\/', '/')
if 'v.asp' in stream_url and 'ok.ru' not in url:
stream_redirect = self._http_get(stream_url, allow_redirect=False, cache_limit=0)
if stream_redirect: stream_url = stream_redirect
if self._get_direct_hostname(stream_url) == 'gvideo':
quality = scraper_utils.gv_get_quality(stream_url)
else:
quality = scraper_utils.height_get_quality(height)
host = self._get_direct_hostname(stream_url)
stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(url))
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
sources.append(hoster)
return sources
开发者ID:assli100,项目名称:kodi-openelec,代码行数:29,代码来源:sezonlukdizi_scraper.py
示例13: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
match = re.search('<b>Views:.*?([\d,]+)', html)
if match:
views = int(match.group(1).replace(',', ''))
else:
views = None
button = dom_parser.parse_dom(html, 'a', {'class': '[^"]*btn_watch_detail[^"]*'}, ret='href')
if button:
html = self._http_get(button[0], cache_limit=.5)
for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html):
title, fragment = match.groups()
for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment):
stream_url, name = match.groups()
match = re.search('(\d+)', name)
if match:
quality = scraper_utils.height_get_quality(match.group(1))
else:
quality = QUALITIES.HIGH
stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies())
hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
hoster['title'] = title
hosters.append(hoster)
return hosters
开发者ID:assli100,项目名称:kodi-openelec,代码行数:28,代码来源:watchhd_scraper.py
示例14: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=.25)
fragment = dom_parser.parse_dom(html, 'ul', {'class': 'dropdown-menu'})
if fragment:
match = re.search('''href=['"]([^'"]+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', fragment[0])
if match:
option_url = urlparse.urljoin(self.base_url, match.group(1))
html = self._http_get(option_url, cache_limit=2)
fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-player'})
if fragment:
iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
if iframe_url:
html = self._http_get(iframe_url[0], cache_limit=.25)
iframe_url = dom_parser.parse_dom(html, 'iframe', {'id': 'ifr'}, ret='src')
if iframe_url:
html = self._http_get(iframe_url[0], allow_redirect=False, cache_limit=.25)
if html.startswith('http'):
stream_url = html
host = urlparse.urlparse(stream_url).hostname
stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
quality = QUALITIES.HIGH
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
hosters.append(hoster)
return hosters
开发者ID:Stevie-Bs,项目名称:repository.xvbmc,代码行数:29,代码来源:onlinedizi_scraper.py
示例15: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
sources = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
views = None
fragment = dom_parser.parse_dom(html, 'img', {'src': '[^"]*view_icon.png'})
if fragment:
match = re.search('(\d+)', fragment[0])
if match:
views = match.group(1)
match = re.search('href="([^"]+-full-movie-[^"]+)', html)
if match:
url = match.group(1)
html = self._http_get(url, cache_limit=.5)
sources = self.__get_sources(html, url)
match = re.search('href="([^"]+)[^>]*>\s*<button', html)
if match:
html = self._http_get(match.group(1), cache_limit=.5)
sources.update(self.__get_sources(html, url))
for source in sources:
host = self._get_direct_hostname(source)
stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
quality = sources[source]['quality']
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
hosters.append(hoster)
return hosters
开发者ID:kevintone,项目名称:tdbaddon,代码行数:35,代码来源:m4ufree_scraper.py
示例16: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=8)
hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'})
links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'})
for host, link_frag in zip(hosts, links):
stream_url = dom_parser.parse_dom(link_frag, 'a', ret='href')
if stream_url:
stream_url = stream_url[0]
host = re.sub('^Server\s*', '', host, re.I)
host = re.sub('\s*Link\s+\d+', '', host)
if host.lower() == 'google':
sources = self.__get_gvideo_links(stream_url)
else:
sources = [{'host': host, 'link': stream_url}]
for source in sources:
stream_url = source['link']
host = self._get_direct_hostname(stream_url)
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(stream_url)
stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
direct = True
else:
host = HOST_SUB.get(source['host'].lower(), source['host'])
quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
direct = False
hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
hosters.append(hoster)
return hosters
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:34,代码来源:moviehubs_scraper.py
示例17: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if not source_url or source_url == FORCE_NO_MATCH: return hosters
js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
html = self._http_get(js_url, cache_limit=48)
if source_url.startswith('/'):
source_url = source_url[1:]
pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url)
match = re.search(pattern, html, re.I)
if match:
stream_url = match.group(1)
if 'drive.google' in stream_url or 'docs.google' in stream_url:
sources = scraper_utils.parse_google(self, stream_url)
else:
sources = [stream_url]
for source in sources:
stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
host = scraper_utils.get_direct_hostname(self, source)
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(source)
direct = True
elif 'youtube' in stream_url:
quality = QUALITIES.HD720
direct = False
host = 'youtube.com'
else:
quality = QUALITIES.HIGH
direct = True
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
hosters.append(hoster)
return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:33,代码来源:piratejunkies_scraper.py
示例18: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
views = None
match = re.search('<li>\s*Views\s*:\s*(.*?)</li>', html)
if match:
views = re.sub('[^0-9]', '', match.group(1))
hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'})
links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'})
for item in zip(hosts, links):
host, link_text = item
host = host.lower().replace('server', '').strip()
match = re.search('href="([^"]+)', link_text)
if match:
link = match.group(1)
if 'google' in host:
sources = self.__get_google_links(link)
for source in sources:
source += '|User-Agent=%s' % (scraper_utils.get_ua())
hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': views, 'direct': True}
hosters.append(hoster)
else:
hoster = {'multi-part': False, 'url': link, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'host': host, 'rating': None, 'views': views, 'direct': False}
hosters.append(hoster)
return hosters
开发者ID:henry73,项目名称:salts,代码行数:31,代码来源:tunemovie_scraper.py
示例19: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
sources = {}
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
fragment = dom_parser.parse_dom(html, 'div', {'class': 'meta-media'})
if fragment:
iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
if iframe_url:
iframe_url = urlparse.urljoin(self.base_url, iframe_url[0])
html = self._http_get(iframe_url, cache_limit=.5)
for match in re.finditer('window.location.href\s*=\s*"([^"]+)', html):
stream_url = match.group(1)
host = self._get_direct_hostname(stream_url)
if host == 'gvideo':
sources[stream_url] = scraper_utils.gv_get_quality(stream_url)
else:
sources[source_url] = QUALITIES.HIGH
for source in sources:
host = self._get_direct_hostname(stream_url)
stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True}
hosters.append(hoster)
return hosters
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:27,代码来源:firemovies_scraper.py
示例20: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=.5)
# exit early if trailer
if re.search('Şu an fragman*', html, re.I):
return hosters
match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:\s*["'](id=\d+)''', html)
if match:
url, data = match.groups()
url = urlparse.urljoin(self.base_url, url)
result = self._http_get(url, data=data, headers=XHR, cache_limit=.5)
for match in re.finditer('"videolink\d*"\s*:\s*"([^"]+)","videokalite\d*"\s*:\s*"?(\d+)p?', result):
stream_url, height = match.groups()
stream_url = stream_url.replace('\\/', '/')
host = self._get_direct_hostname(stream_url)
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(stream_url)
else:
quality = scraper_utils.height_get_quality(height)
stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url))
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
hosters.append(hoster)
return hosters
开发者ID:azumimuo,项目名称:family-xbmc-addon,代码行数:29,代码来源:dizimag_scraper.py
注:本文中的salts_lib.scraper_utils.get_ua函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论