本文整理汇总了Python中salts_lib.dom_parser.parse_dom函数的典型用法代码示例。如果您正苦于以下问题:Python parse_dom函数的具体用法?Python parse_dom怎么用?Python parse_dom使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_dom函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: search
def search(self, video_type, title, year):
results = []
search_url = urlparse.urljoin(self.__get_base_url(video_type), '/?s=%s' % (urllib.quote_plus(title)))
html = self._http_get(search_url, cache_limit=1)
for movie in dom_parser.parse_dom(html, 'div', {'class': 'movie'}):
match = re.search('href="([^"]+)', movie)
if match:
match_url = match.group(1)
if re.search('season-\d+-episode\d+', match_url): continue
match_title_year = dom_parser.parse_dom(movie, 'img', ret='alt')
if match_title_year:
match_title_year = match_title_year[0]
match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = dom_parser.parse_dom(movie, 'div', {'class': 'year'})
try: match_year = match_year[0]
except: match_year = ''
if not year or not match_year or year == match_year:
result = {'url': self._pathify_url(match_url), 'title': match_title, 'year': match_year}
results.append(result)
return results
开发者ID:edwtjo,项目名称:salts,代码行数:26,代码来源:iflix_scraper.py
示例2: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=.25)
match = re.search('''<option[^>]+value\s*=\s*["']([^"']+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', html)
if match:
option_url = urlparse.urljoin(self.base_url, match.group(1))
html = self._http_get(option_url, cache_limit=.25)
fragment = dom_parser.parse_dom(html, 'span', {'class': 'object-wrapper'})
if fragment:
iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
if iframe_url:
html = self._http_get(iframe_url[0], cache_limit=.25)
seen_urls = {}
for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', html):
stream_url, height = match.groups()
if stream_url not in seen_urls:
seen_urls[stream_url] = True
stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
host = self._get_direct_hostname(stream_url)
if host == 'gvideo':
quality = scraper_utils.gv_get_quality(stream_url)
else:
quality = scraper_utils.height_get_quality(height)
hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
hosters.append(hoster)
return hosters
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:31,代码来源:dizibox_scraper.py
示例3: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*movie_langs_list[^"]*'})
if fragment:
for match in re.finditer('href="([^"]+)', fragment[0]):
match = re.search('movie-player/(.*)', match.group(1))
if match:
player_url = urlparse.urljoin(self.base_url, PLAYER_URL % (match.group(1)))
html = self._http_get(player_url, cache_limit=.5)
match = re.search('<source\s+src="([^"]+)', html)
if match:
stream_url = match.group(1)
hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': self._gv_get_quality(stream_url), 'host': self._get_direct_hostname(stream_url), 'rating': None, 'views': None, 'direct': True}
hosters.append(hoster)
fragment2 = dom_parser.parse_dom(html, 'ul', {'class': 'servers'})
if fragment2:
for match in re.finditer('href="([^"]+).*?<span>(.*?)</span>', fragment2[0]):
other_url, quality = match.groups()
match = re.search('movie-player/(.*)', other_url)
if match:
other_url = urlparse.urljoin(self.base_url, PLAYER_URL % (match.group(1)))
if other_url == player_url: continue
hoster = {'multi-part': False, 'url': other_url, 'class': self, 'quality': QUALITY_MAP.get(quality, QUALITIES.HD720), 'host': self._get_direct_hostname(other_url), 'rating': None, 'views': None, 'direct': True}
hosters.append(hoster)
return hosters
开发者ID:beljim,项目名称:tknorris-beta-repo,代码行数:32,代码来源:beinmovie_scraper.py
示例4: search
def search(self, video_type, title, year):
search_url = urlparse.urljoin(self.base_url, '/?query=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=.25)
results = []
info = dom_parser.parse_dom(html, 'div', {'class': 'movie-info'})
for item in info:
match_title = dom_parser.parse_dom(item, 'span', {'class': 'movie-title'})
match_year = dom_parser.parse_dom(item, 'span', {'class': 'movie-year'})
if match_title:
match_title = self.__strip_link(match_title[0])
if match_year:
match_year = self.__strip_link(match_year[0])
else:
match_year = ''
match = re.search('href="([^"]+)', item)
if match:
url = match.group(1)
else:
continue
if not year or not match_year or year == match_year:
result = {'title': match_title, 'year': match_year, 'url': url.replace(self.base_url, '')}
results.append(result)
return results
开发者ID:rickardrocks,项目名称:tknorris-beta-repo,代码行数:27,代码来源:pctf_scraper.py
示例5: search
def search(self, video_type, title, year):
search_url = urlparse.urljoin(self.base_url, '/arsiv?limit=&tur=&orderby=&ulke=&order=&yil=&dizi_adi=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=8)
results = []
for item in dom_parser.parse_dom(html, 'div', {'class': 'tv-series-single'}):
try:
url = re.search('href="([^"]+)', item).group(1)
except:
url = ''
try:
match_year = re.search('<span>\s*(\d{4})\s*</span>', item).group(1)
except:
match_year = ''
try:
match_title = dom_parser.parse_dom(item, 'a', {'class': 'title'})
match_title = re.search('([^>]+)$', match_title[0]).group(1)
match_title = match_title.strip()
except:
match_title = ''
if url and match_title and (not year or not match_year or year == match_year):
result = {'url': self._pathify_url(url), 'title': match_title, 'year': ''}
results.append(result)
return results
开发者ID:sfennell,项目名称:salts,代码行数:28,代码来源:dizilab_scraper.py
示例6: __movie_search
def __movie_search(self, title, year):
results = []
search_url = urlparse.urljoin(self.base_url, '/search?q=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=1)
norm_title = scraper_utils.normalize_title(title)
for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}):
match_url = dom_parser.parse_dom(item, 'a', ret='href')
match_title = dom_parser.parse_dom(item, 'img', ret='alt')
match_year = ''
if match_url and match_title:
match_url = match_url[0]
match_title = match_title[0]
if match_year:
match_year = match_year[0]
else:
match_year = ''
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:24,代码来源:moviewatcher_scraper.py
示例7: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=.25)
for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*ldr-item[^"]*'}):
stream_url = dom_parser.parse_dom(link, 'a', ret='data-actuallink')
views = None
watched = dom_parser.parse_dom(link, 'div', {'class': 'click-count'})
if watched:
match = re.search(' (\d+) ', watched[0])
if match:
views = match.group(1)
score = dom_parser.parse_dom(link, 'div', {'class': '\s*point\s*'})
if score:
score = int(score[0])
rating = score * 10 if score else None
if stream_url:
stream_url = stream_url[0].strip()
host = urlparse.urlparse(stream_url).hostname
quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False}
hosters.append(hoster)
return hosters
开发者ID:monicarero,项目名称:repository.xvbmc,代码行数:29,代码来源:watchepisodes_scraper.py
示例8: search
def search(self, video_type, title, year, season=''):
results = []
search_url = urlparse.urljoin(self.base_url, SEARCH_URL)
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=8)
fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*items[^"]*'})
if fragment:
for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'item'}):
match_url = dom_parser.parse_dom(item, 'a', {'class': 'header'}, ret='href')
match_title_year = dom_parser.parse_dom(item, 'a', {'class': 'header'})
if match_url and match_title_year:
match_url = match_url[0]
match_title_year = match_title_year[0]
r = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
if r:
match_title, match_year = r.groups()
else:
match_title = match_title_year
match_year = ''
if not year or not match_year or year == match_year:
result = {'url': scraper_utils.pathify_url(match_url), 'title': match_title, 'year': match_year}
results.append(result)
return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:25,代码来源:sezonlukdizi_scraper.py
示例9: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/search/%s.html' % urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=.25)
results = []
fragment = dom_parser.parse_dom(html, 'div', {'class': 'list-movie'})
if fragment:
for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'movie'}):
match = re.search('class="movie-name".*?href="([^"]+)[^>]+>([^<]+)', item)
if match:
url, match_title = match.groups()
is_season = re.search('\s+-\s+[Ss](\d+)$', match_title)
if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
match_year = ''
if video_type == VIDEO_TYPES.MOVIE:
for info_frag in dom_parser.parse_dom(item, 'p', {'class': 'info'}):
match = re.search('(\d{4})', info_frag)
if match:
match_year = match.group(1)
break
if not match_year:
match = re.search('(\d{4})$', url)
if match:
match_year = match.group(1)
else:
if season and int(is_season.group(1)) != int(season):
continue
if (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:kevintone,项目名称:tdbaddon,代码行数:33,代码来源:putmv_scraper.py
示例10: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/search/?q=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=8)
results = []
for item in dom_parser.parse_dom(html, 'div', {'class': 'ml-item'}):
match_title = dom_parser.parse_dom(item, 'span', {'class': 'mli-info'})
match_url = re.search('href="([^"]+)', item, re.DOTALL)
year_frag = dom_parser.parse_dom(item, 'img', ret='alt')
is_episodes = dom_parser.parse_dom(item, 'span', {'class': 'mli-eps'})
if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes):
if match_title and match_url:
match_url = match_url.group(1)
match_title = match_title[0]
match_title = re.sub('</?h2>', '', match_title)
match_title = re.sub('\s+\d{4}$', '', match_title)
if video_type == VIDEO_TYPES.SEASON:
if season and not re.search('Season\s+%s$' % (season), match_title): continue
if not match_url.endswith('/'): match_url += '/'
match_url = urlparse.urljoin(match_url, 'watch/')
match_year = ''
if video_type == VIDEO_TYPES.MOVIE and year_frag:
match = re.search('\s*-\s*(\d{4})$', year_frag[0])
if match:
match_year = match.group(1)
if not year or not match_year or year == match_year:
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:33,代码来源:watch5s_scraper.py
示例11: search
def search(self, video_type, title, year, season=''):
results = []
search_url = urlparse.urljoin(self.base_url, '/search/')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=1)
for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}):
name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'})
if name:
match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0])
if match:
match_url, match_title_year = match.groups()
if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue
match_title_year = re.sub('</?[^>]*>', '', match_title_year)
match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year)
match_title_year = match_title_year.replace('’', "'")
match = re.search('(.*?)\s+\((\d{4})[^)]*\)$', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
if not match_year:
year_span = dom_parser.parse_dom(fragment, 'span', {'class': 'year'})
if year_span:
year_text = dom_parser.parse_dom(year_span[0], 'a')
if year_text:
match_year = year_text[0].strip()
if not year or not match_year or year == match_year:
result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
results.append(result)
return results
开发者ID:AMOboxTV,项目名称:AMOBox.LegoBuild,代码行数:35,代码来源:moviexk_scraper.py
示例12: _get_episode_url
def _get_episode_url(self, show_url, video):
sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
except: airdate_pattern = ''
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
for post in posts:
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
if match:
url, title = match.groups()
if not force_title:
if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('</strong>(.*?)</p>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:32,代码来源:2ddl_scraper.py
示例13: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/search.php?q=%s&limit=20×tamp=%s' % (urllib.quote_plus(title), int(time.time())))
html = self._http_get(search_url, cache_limit=.25)
results = []
items = dom_parser.parse_dom(html, 'li')
if len(items) >= 2:
items = items[1:]
for item in items:
match_url = dom_parser.parse_dom(item, 'a', ret='href')
match_title_year = dom_parser.parse_dom(item, 'strong')
if match_url and match_title_year:
match_url = match_url[0]
match_title_year = re.sub('</?strong>', '', match_title_year[0])
is_season = re.search('S(?:eason\s+)?(\d+)$', match_title_year, re.I)
if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
if video_type == VIDEO_TYPES.MOVIE:
match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
else:
log_utils.log(is_season.group(1))
if season and int(is_season.group(1)) != int(season):
continue
match_title = match_title_year
match_year = ''
result = {'title': match_title, 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:henry73,项目名称:salts,代码行数:32,代码来源:view47_scraper.py
示例14: search
def search(self, video_type, title, year):
results = []
search_url = urlparse.urljoin(self.base_url, SEARCH_URL)
search_url = search_url % (urllib.quote_plus(title))
html = self._http_get(search_url, headers=XHR, cache_limit=1)
for film in dom_parser.parse_dom(html, "li", {"class": "films-item"}):
match_url = dom_parser.parse_dom(film, "a", ret="href")
match_title = dom_parser.parse_dom(film, "div", {"class": "films-item-title"})
match_year = dom_parser.parse_dom(film, "div", {"class": "films-item-year"})
if match_url and match_title:
match_url = match_url[0]
match_title = match_title[0]
match_title = re.sub("</?span>", "", match_title)
if match_year:
match = re.search("(\d+)", match_year[0])
if match:
match_year = match.group(1)
else:
match_year = ""
else:
match_year = ""
if not year or not match_year or year == match_year:
result = {"title": match_title, "year": match_year, "url": match_url}
results.append(result)
return results
开发者ID:SQL-MisterMagoo,项目名称:salts,代码行数:27,代码来源:torbase_scraper.py
示例15: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
q_str = dom_parser.parse_dom(html, 'span', {'class': 'calidad\d*'})
if q_str:
if q_str[0].upper() == 'COMING SOON':
return hosters
try:
quality = self._height_get_quality(q_str[0])
except:
quality = QUALITIES.HIGH
else:
quality = QUALITIES.HIGH
fragment = dom_parser.parse_dom(html, 'div', {'id': 'player\d+'})
if fragment:
for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I):
stream_url = match.group(1)
host = urlparse.urlparse(stream_url).hostname
hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False}
hosters.append(hoster)
return hosters
开发者ID:beljim,项目名称:tknorris-beta-repo,代码行数:25,代码来源:filmstreaming_scraper.py
示例16: search
def search(self, video_type, title, year, season=''):
results = []
if video_type == VIDEO_TYPES.TVSHOW:
url = urlparse.urljoin(self.base_url, '/series/all/')
html = self._http_get(url, cache_limit=8)
links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href')
titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'})
items = zip(links, titles)
else:
url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title))
data = {'q': title, 'go': 'Search'}
html = self._http_get(url, data=data, cache_limit=8)
match = re.search('you can search again in (\d+) seconds', html, re.I)
if match:
wait = int(match.group(1))
if wait > self.timeout: wait = self.timeout
time.sleep(wait)
html = self._http_get(url, data=data, cache_limit=0)
pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)'
items = re.findall(pattern, html, re.DOTALL)
norm_title = scraper_utils.normalize_title(title)
for item in items:
url, match_title = item
if norm_title in scraper_utils.normalize_title(match_title):
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:31,代码来源:moviestorm_scraper.py
示例17: search
def search(self, video_type, title, year, season=''):
results = []
search_url = urlparse.urljoin(self.base_url, '/?s=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=8)
title_strip = [word.decode('utf-8') for word in TITLE_STRIP]
for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
match_url = re.search('href="([^"]+)', item)
match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
if match_url and match_title:
item_type = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'})
if item_type and item_type[0] in SEARCH_EXCLUDE: continue
match_url = match_url.group(1)
match_title = match_title[0]
if 'SEZON' in match_title.upper(): continue
year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
if year_frag:
match_year = year_frag[0]
else:
match_year = ''
match_title = ' '.join([word for word in match_title.split() if word.upper() not in title_strip])
if (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:kevintone,项目名称:tdbaddon,代码行数:28,代码来源:dizifilmhd_scraper.py
示例18: search
def search(self, video_type, title, year):
search_url = urlparse.urljoin(self.base_url, '/index.php?menu=search&query=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=.25)
results = []
sections = {VIDEO_TYPES.MOVIE: 'movies', VIDEO_TYPES.TVSHOW: 'series'}
fragment = dom_parser.parse_dom(html, 'div', {'id': sections[video_type]})
if fragment:
for item in dom_parser.parse_dom(fragment[0], 'figcaption'):
match = re.search('title="([^"]+)[^>]+href="([^"]+)', item)
if match:
match_title_year, url = match.groups()
match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
if match_title.startswith('Watch '): match_title = match_title.replace('Watch ', '')
if match_title.endswith(' Online'): match_title = match_title.replace(' Online', '')
if not year or not match_year or year == match_year:
result = {'title': match_title, 'url': scraper_utils.pathify_url(url), 'year': match_year}
results.append(result)
return results
开发者ID:assli100,项目名称:kodi-openelec,代码行数:26,代码来源:uflix_scraper.py
示例19: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
page_url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(page_url, cache_limit=.25)
for item in dom_parser.parse_dom(html, 'div', {'class': 'stream-table__row'}):
stream_url = dom_parser.parse_dom(item, 'a', ret='href')
match = re.search('Views:\s*(?:</span>)?\s*(\d+)', item, re.I)
if match:
views = match.group(1)
else:
views = None
match = re.search('Size:\s*(?:</span>)?\s*(\d+)', item, re.I)
if match:
size = int(match.group(1)) * 1024 * 1024
else:
size = None
if stream_url:
stream_url = stream_url[0]
match = re.search('/redirect/(.*)', stream_url)
if match:
stream_url = base64.decodestring(urllib.unquote(match.group(1)))
host = urlparse.urlparse(stream_url).hostname
if host:
quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False}
if size is not None: hoster['size'] = scraper_utils.format_size(size, 'B')
hosters.append(hoster)
return hosters
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:34,代码来源:moviewatcher_scraper.py
示例20: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
search_url = search_url % (urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=1)
results = []
fragment = dom_parser.parse_dom(html, 'div', {'class': 'movie'})
if fragment:
for item in dom_parser.parse_dom(fragment[0], 'li'):
match_url = dom_parser.parse_dom(item, 'a', ret='href')
match_title = dom_parser.parse_dom(item, 'span', {'class': 'text'})
match_year = dom_parser.parse_dom(item, 'span', {'class': 'year'})
if match_url and match_title:
match_url = match_url[0]
match_title = re.sub('</?strong>', '', match_title[0])
is_season = re.search('Season\s+(\d+)$', match_title, re.I)
if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
if video_type == VIDEO_TYPES.MOVIE:
if match_year:
match_year = match_year[0]
else:
match_year = ''
else:
if season and int(is_season.group(1)) != int(season):
continue
match_year = ''
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:29,代码来源:vivoto_scraper.py
注:本文中的salts_lib.dom_parser.parse_dom函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论