本文整理汇总了Python中salts_lib.scraper_utils.normalize_title函数的典型用法代码示例。如果您正苦于以下问题:Python normalize_title函数的具体用法?Python normalize_title怎么用?Python normalize_title使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了normalize_title函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: search
def search(self, video_type, title, year, season=''): # @UnusedVariable
results = []
search_url = '/search/' + urllib.quote_plus(title)
html = self._http_get(search_url, require_debrid=True, cache_limit=1)
if video_type == VIDEO_TYPES.TVSHOW:
seen_urls = {}
for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
if CATEGORIES[video_type] not in post: continue
match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
if match:
show_url, match_title = match.groups()
if show_url in seen_urls: continue
result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
seen_urls[show_url] = result
results.append(result)
elif video_type == VIDEO_TYPES.MOVIE:
norm_title = scraper_utils.normalize_title(title)
headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
for heading, post in zip(headings, posts):
if CATEGORIES[video_type] not in post or self.__too_old(post): continue
post_url, post_title = heading
meta = scraper_utils.parse_movie_link(post_title)
full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
match_year = meta['year']
match_norm_title = scraper_utils.normalize_title(meta['title'])
if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
results.append(result)
return results
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:32,代码来源:2ddl_scraper.py
示例2: __movie_search
def __movie_search(self, title, year):
results = []
search_url = urlparse.urljoin(self.base_url, '/search?q=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=1)
norm_title = scraper_utils.normalize_title(title)
for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}):
match_url = dom_parser.parse_dom(item, 'a', ret='href')
match_title = dom_parser.parse_dom(item, 'img', ret='alt')
match_year = ''
if match_url and match_title:
match_url = match_url[0]
match_title = match_title[0]
if match_year:
match_year = match_year[0]
else:
match_year = ''
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:24,代码来源:moviewatcher_scraper.py
示例3: _get_episode_url
def _get_episode_url(self, show_url, video):
sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode))
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.')
except: ep_airdate = ''
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
for heading, post in zip(headings, posts):
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
url, title = heading
if not force_title:
if (sxe in title) or (ep_airdate and ep_airdate in title):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('<strong>(.*?)</strong>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
开发者ID:monicarero,项目名称:repository.xvbmc,代码行数:31,代码来源:ddlvalley_scraper.py
示例4: __tv_search
def __tv_search(self, title, year):
results = []
if title:
norm_title = scraper_utils.normalize_title(title)
url = '/series/letra/%s/' % (title[0])
url = urlparse.urljoin(self.base_url, url)
html = self._http_get(url, cache_limit=48)
for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*bpM12[^"]*'}):
title_frag = dom_parser.parse_dom(item, 'h2')
year_frag = dom_parser.parse_dom(item, 'div', {'class': '[^"]*sectionDetail[^"]*'})
match_url = dom_parser.parse_dom(item, 'a', ret='href')
if title_frag and match_url:
match_url = match_url[0]
match = re.search('(.*?)<br>', title_frag[0])
if match:
match_title = match.group(1)
else:
match_title = title_frag[0]
match_year = ''
if year_frag:
match = re.search('(\d{4})', year_frag[0])
if match:
match_year = match.group(1)
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:kevintone,项目名称:tdbaddon,代码行数:30,代码来源:pelispedia_scraper.py
示例5: __search
def __search(self, video_type, title, year, season=''):
results = []
search_url = base64.decodestring(SEARCH_URL) % (urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=1)
js_data = scraper_utils.parse_json(html)
norm_title = scraper_utils.normalize_title(title)
for item in js_data.get('results', []):
if '/watch/' not in item['url'].lower(): continue
is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'], re.IGNORECASE)
if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
match_title_year = item['titleNoFormatting']
match_title_year = re.sub('^Watch\s+', '', match_title_year)
match_url = item['url']
match_year = ''
if video_type == VIDEO_TYPES.MOVIE:
match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
else:
if season and int(is_season.group(1)) != int(season):
continue
match = re.search('(.*?)\s+\(\d{4}\)', match_title_year)
if match:
match_title = match.group(1)
else:
match_title = match_title_year
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:34,代码来源:hdmovie14_scraper.py
示例6: search
def search(self, video_type, title, year, season=''):
url = urlparse.urljoin(self.base_url, LIST_URL)
js_data = self._http_get(url, cache_limit=0)
norm_title = scraper_utils.normalize_title(title)
results = []
if 'transfers' in js_data:
for item in js_data['transfers']:
is_season = re.search('(.*?[._ ]season[._ ]+(\d+))[._ ](.*)', item['name'], re.I)
if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
if re.search('[._ ]S\d+E\d+[._ ]', item['name']): continue # skip episodes
if video_type == VIDEO_TYPES.SEASON:
match_title, match_season, extra = is_season.groups()
if season and int(match_season) != int(season):
continue
match_year = ''
match_title = re.sub('[._]', ' ', match_title)
else:
match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name'])
if match:
match_title, match_year, extra = match.groups()
else:
match_title, match_year, extra = item['name'], '', ''
match_title = match_title.strip()
extra = extra.strip()
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result_title = match_title
if extra: result_title += ' [%s]' % (extra)
result = {'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash'])}
results.append(result)
return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:32,代码来源:premiumizev2_scraper.py
示例7: _get_episode_url
def _get_episode_url(self, show_url, video):
sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
except: airdate_pattern = ''
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
for post in posts:
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
if match:
url, title = match.groups()
if not force_title:
if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('</strong>(.*?)</p>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:32,代码来源:2ddl_scraper.py
示例8: _get_episode_url
def _get_episode_url(self, show_url, video):
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
page_url = [show_url]
too_old = False
while page_url and not too_old:
url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(url, require_debrid=True, cache_limit=1)
posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
for post in posts:
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
if match:
url, title = match.groups()
if not force_title:
if scraper_utils.release_check(video, title, require_title=False):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('</strong>(.*?)</p>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:28,代码来源:2ddl_scraper.py
示例9: search
def search(self, video_type, title, year, season=''):
results = []
html = self._http_get(self.base_url, cache_limit=48)
norm_title = scraper_utils.normalize_title(title)
for series in dom_parser.parse_dom(html, 'div', {'class': 'series-item'}):
match_url = dom_parser.parse_dom(series, 'a', ret='href')
match_title = dom_parser.parse_dom(series, 'h3')
match_year = dom_parser.parse_dom(series, 'p')
if match_url and match_title:
match_url = match_url[0]
match_title = match_title[0]
if match_year:
match = re.search('\s*(\d{4})\s+', match_year[0])
if match:
match_year = match.group(1)
else:
match_year = ''
else:
match_year = ''
if norm_title in scraper_utils.normalize_title(match_title):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:25,代码来源:yabanci_scraper.py
示例10: search
def search(self, video_type, title, year, season=''):
results = []
if title:
first_letter = title[:1].lower()
if first_letter.isdigit(): first_letter = '0-9'
search_url = '/alphabet/%s/' % (first_letter)
search_url = urlparse.urljoin(self.base_url, search_url)
html = self._http_get(search_url, cache_limit=24)
fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'})
if fragment:
norm_title = scraper_utils.normalize_title(title)
for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''', fragment[0]):
url, match_title_year = match.groups()
match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:25,代码来源:tvonline_scraper.py
示例11: search
def search(self, video_type, title, year, season=''):
results = []
search_url = scraper_utils.urljoin(self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php')
data = {'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1',
'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post'}
html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8)
norm_title = scraper_utils.normalize_title(title)
for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'asp_res_url'}):
match_url = attrs['href']
match_title_year = re.sub('</?[^>]*>', '', match_title_year)
is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I)
if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
match_year = ''
if video_type == VIDEO_TYPES.SEASON:
if season and int(is_season.group(1)) != int(season):
continue
match_title = match_title_year
match_title = re.sub('\s*\d{4}', '', match_title)
else:
match_title, match_year = scraper_utils.extra_year(match_title_year)
match_norm_title = scraper_utils.normalize_title(match_title)
title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
if title_match and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:28,代码来源:pubfilm_scraper.py
示例12: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
norm_title = scraper_utils.normalize_title(video.title)
if source_url and source_url != FORCE_NO_MATCH:
source_url = urlparse.urljoin(self.base_url, source_url)
for line in self._get_files(source_url, cache_limit=24):
if not line['directory']:
match = {}
if video.video_type == VIDEO_TYPES.MOVIE:
meta = scraper_utils.parse_movie_link(line['link'])
if norm_title in scraper_utils.normalize_title(meta['title']):
match = line
elif self.__episode_match(line, video):
match = line
meta = scraper_utils.parse_episode_link(line['link'])
if match:
if meta['dubbed']: continue
stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
stream_url = stream_url.replace(self.base_url, '')
quality = scraper_utils.height_get_quality(meta['height'])
hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
if 'format' in meta: hoster['format'] = meta['format']
if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
hosters.append(hoster)
return hosters
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:28,代码来源:farda_scraper.py
示例13: search
def search(self, video_type, title, year, season=''):
results = []
if title and title[0].isalpha():
page_url = ['/list/?char=%s' % (title[0])]
while page_url:
page_url = urlparse.urljoin(self.base_url, page_url[0])
html = self._http_get(page_url, cache_limit=48)
fragment = dom_parser.parse_dom(html, 'ul', {'class': 'list-film-char'})
if fragment:
norm_title = scraper_utils.normalize_title(title)
for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>', fragment[0]):
match_url, match_title = match.groups()
match_title = re.sub('</?strong>', '', match_title)
match = re.search('Season\s+(\d+)', match_title, re.I)
if match:
if season and int(season) != int(match.group(1)):
continue
if norm_title in scraper_utils.normalize_title(match_title):
result = {'title': scraper_utils.cleanse_title(match_title), 'year': '', 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
if results:
break
page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:28,代码来源:rlseries_scraper.py
示例14: _get_episode_url
def _get_episode_url(self, show_url, video):
force_title = scraper_utils.force_title(video)
title_fallback = kodi.get_setting('title-fallback') == 'true'
norm_title = scraper_utils.normalize_title(video.ep_title)
page_url = [show_url]
too_old = False
while page_url and not too_old:
html = self._http_get(page_url[0], require_debrid=True, cache_limit=1)
for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
if self.__too_old(post):
too_old = True
break
if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
match = dom_parser2.parse_dom(post, 'a', req='href')
if match:
url, title = match[0].attrs['href'], match[0].content
if not force_title:
if scraper_utils.release_check(video, title, require_title=False):
return scraper_utils.pathify_url(url)
else:
if title_fallback and norm_title:
match = re.search('</strong>(.*?)</p>', post)
if match and norm_title == scraper_utils.normalize_title(match.group(1)):
return scraper_utils.pathify_url(url)
page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
if page_url: page_url = [page_url[0].attrs['href']]
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:27,代码来源:2ddl_scraper.py
示例15: search
def search(self, video_type, title, year):
results = []
norm_title = scraper_utils.normalize_title(title)
if video_type == VIDEO_TYPES.MOVIE:
if year:
base_url = urlparse.urljoin(self.base_url, '/Film/')
html = self._http_get(base_url, cache_limit=48)
for link in self.__parse_directory(html):
if year == link['title']:
url = urlparse.urljoin(base_url, link['link'])
for movie in self.__get_files(url, cache_limit=24):
match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link'])
if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year}
results.append(result)
else:
base_url = urlparse.urljoin(self.base_url, '/Serial/')
html = self._http_get(base_url, cache_limit=48)
for link in self.__parse_directory(html):
if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']):
url = urlparse.urljoin(base_url, link['link'])
result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''}
results.append(result)
return results
开发者ID:azumimuo,项目名称:family-xbmc-addon,代码行数:25,代码来源:farda_scraper.py
示例16: search
def search(self, video_type, title, year, season=''):
results = []
if video_type == VIDEO_TYPES.MOVIE:
search_url = urlparse.urljoin(self.base_url, '/?s=')
search_url += urllib.quote_plus('%s' % (title))
html = self._http_get(search_url, cache_limit=1)
links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'href')
titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'title')
matches = zip(links, titles)
else:
html = self._http_get(self.base_url, cache_limit=8)
matches = re.findall('<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', html)
norm_title = scraper_utils.normalize_title(title)
for item in matches:
url, match_title_year = item
match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'title': match_title, 'year': match_year, 'url': scraper_utils.pathify_url(url)}
results.append(result)
return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:28,代码来源:rainierland_scraper.py
示例17: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
norm_title = scraper_utils.normalize_title(video.title)
if source_url and source_url != FORCE_NO_MATCH:
source_url = urlparse.urljoin(self.base_url2, source_url)
for line in self._get_files(source_url, cache_limit=24):
if not line['directory']:
match = {}
if video.video_type == VIDEO_TYPES.MOVIE:
match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
if norm_title in scraper_utils.normalize_title(match_title):
match = line
else:
_show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
if int(video.season) == int(season) and int(video.episode) == int(episode):
match = line
if 'dubbed' in extra.lower(): continue
if match:
stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
if 'x265' in extra: hoster['format'] = 'x265'
if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
hosters.append(hoster)
return hosters
开发者ID:monicarero,项目名称:repository.xvbmc,代码行数:27,代码来源:farda_scraper.py
示例18: _get_episode_url
def _get_episode_url(self, show_url, video):
url = urlparse.urljoin(self.base_url, show_url)
html = self._http_get(url, cache_limit=8)
pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season)
match = re.search(pattern, html, re.DOTALL)
if match:
fragment = match.group(1)
ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class')
episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'})
airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'})
ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else ''
norm_title = scraper_utils.normalize_title(video.ep_title)
num_id, airdate_id, title_id = '', '', ''
for episode, airdate, ep_id in zip(episodes, airdates, ep_ids):
if ep_airdate and ep_airdate == airdate: airdate_id = ep_id
match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode)
if match:
ep_num, ep_title = match.groups()
if int(ep_num) == int(video.episode): num_id = ep_id
if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id
best_id = ''
if not scraper_utils.force_title(video):
if num_id: best_id = num_id
if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id
if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id
else:
if title_id: best_id = title_id
if best_id:
return EP_URL % (best_id)
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:31,代码来源:filmovizjia_scraper.py
示例19: _default_get_episode_url
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''):
logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG)
if not html: return
try: html = html[0].content
except AttributeError: pass
force_title = scraper_utils.force_title(video)
if not force_title:
if episode_pattern:
match = re.search(episode_pattern, html, re.DOTALL | re.I)
if match:
return scraper_utils.pathify_url(match.group(1))
if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate:
airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year))
airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month))
airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month))
airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1])
airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day))
airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day))
logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG)
match = re.search(airdate_pattern, html, re.DOTALL | re.I)
if match:
return scraper_utils.pathify_url(match.group(1))
else:
logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern:
norm_title = scraper_utils.normalize_title(video.ep_title)
for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
episode = match.groupdict()
if norm_title == scraper_utils.normalize_title(episode['title']):
return scraper_utils.pathify_url(episode['url'])
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:35,代码来源:scraper.py
示例20: search
def search(self, video_type, title, year, season=''):
results = []
page_url = urlparse.urljoin(self.base_url, '/tvseries/index.php?&page=1')
while page_url:
html = self._http_get(page_url, cache_limit=48)
html = re.sub('<!--.*?-->', '', html)
norm_title = scraper_utils.normalize_title(title)
for td in dom_parser.parse_dom(html, 'td', {'class': 'topic_content'}):
match_url = re.search('href="([^"]+)', td)
match_title_year = dom_parser.parse_dom(td, 'img', ret='alt')
if match_url and match_title_year:
match_url = match_url.group(1)
if not match_url.startswith('/'): match_url = '/tvseries/' + match_url
match_title, match_year = scraper_utils.extra_year(match_title_year[0])
if norm_title in scraper_utils.normalize_title(match_title):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
match = re.search('href="([^"]+)[^>]*>>', html)
if match:
page_url = urlparse.urljoin(self.base_url, match.group(1))
else:
page_url = ''
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:25,代码来源:dayt_scraper.py
注:本文中的salts_lib.scraper_utils.normalize_title函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论