本文整理汇总了Python中salts_lib.scraper_utils.cleanse_title函数的典型用法代码示例。如果您正苦于以下问题:Python cleanse_title函数的具体用法?Python cleanse_title怎么用?Python cleanse_title使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cleanse_title函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: search
def search(self, video_type, title, year, season=''): # @UnusedVariable
results = []
search_url = '/search/' + urllib.quote_plus(title)
html = self._http_get(search_url, require_debrid=True, cache_limit=1)
if video_type == VIDEO_TYPES.TVSHOW:
seen_urls = {}
for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
if CATEGORIES[video_type] not in post: continue
match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
if match:
show_url, match_title = match.groups()
if show_url in seen_urls: continue
result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
seen_urls[show_url] = result
results.append(result)
elif video_type == VIDEO_TYPES.MOVIE:
norm_title = scraper_utils.normalize_title(title)
headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
for heading, post in zip(headings, posts):
if CATEGORIES[video_type] not in post or self.__too_old(post): continue
post_url, post_title = heading
meta = scraper_utils.parse_movie_link(post_title)
full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
match_year = meta['year']
match_norm_title = scraper_utils.normalize_title(meta['title'])
if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
results.append(result)
return results
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:32,代码来源:2ddl_scraper.py
示例2: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/index.php?search=%s&image.x=0&image.y=0')
search_url = search_url % (urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=.25)
results = []
# Are we on a results page?
if not re.search('window\.location', html):
pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)'
for match in re.finditer(pattern, html, re.DOTALL):
match_title_year, match_url = match.groups('')
# skip porn
if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue
match_title_year = re.sub('</?.*?>', '', match_title_year)
match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
if not year or not match_year or year == match_year:
result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
else:
match = re.search('window\.location\s+=\s+"([^"]+)', html)
if match:
url = match.group(1)
if url != 'movies.php':
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year}
results.append(result)
return results
开发者ID:AMOboxTV,项目名称:AMOBox.LegoBuild,代码行数:33,代码来源:filmikz_scraper.py
示例3: search
def search(self, video_type, title, year, season=''):
results = []
search_url = urlparse.urljoin(self.base_url, '/index.php')
params = {'search': title, 'image.x': 0, 'image.y': 0}
html = self._http_get(search_url, params=params, cache_limit=1)
# Are we on a results page?
if not re.search('window\.location', html):
pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)'
for match in re.finditer(pattern, html, re.DOTALL):
match_title_year, match_url = match.groups('')
# skip porn
if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue
match_title_year = re.sub('</?.*?>', '', match_title_year)
match_title, match_year = scraper_utils.extra_year(match_title_year)
if not year or not match_year or year == match_year:
result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
else:
match = re.search('window\.location\s+=\s+"([^"]+)', html)
if match:
url = match.group(1)
if url != 'movies.php':
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:26,代码来源:filmikz_scraper.py
示例4: __get_ok
def __get_ok(self, embed, flashvars):
hosters = []
link = flashvars[0].attrs['value']
match = re.search('metadataUrl=([^"]+)', link)
if match:
referer = scraper_utils.cleanse_title(urllib.unquote(embed[0].attrs['data']))
ok_url = scraper_utils.cleanse_title(urllib.unquote(match.group(1)))
html = self._http_get(ok_url, data='ok', headers={'Referer': referer}, cache_limit=.25)
js_data = scraper_utils.parse_json(html, ok_url)
stream_url = js_data.get('movie', {}).get('url')
if stream_url is not None:
host = urlparse.urlparse(stream_url).hostname
hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': stream_url, 'direct': False, 'subs': 'Turkish Subtitles'}
hosters.append(hoster)
return hosters
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:15,代码来源:dizibox_scraper.py
示例5: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/search/%s' % (urllib.quote_plus(title)))
html = self._http_get(search_url, cache_limit=.25)
results = []
for item in dom_parser.parse_dom(html, 'div', {'class': 'name_top'}):
match = re.search('href="([^"]+)[^>]+>([^<]+)', item)
if match:
url, match_title_year = match.groups()
is_season = re.search('Season\s+(\d+)', match_title_year, re.IGNORECASE)
if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
match_year = ''
if video_type == VIDEO_TYPES.MOVIE:
match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
else:
match_title = match_title_year
if season and int(is_season.group(1)) != int(season):
continue
if not year or not match_year or year == match_year:
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
results.append(result)
return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:27,代码来源:watchhd_scraper.py
示例6: search
def search(self, video_type, title, year, season=''):
results = []
if video_type == VIDEO_TYPES.TVSHOW:
url = urlparse.urljoin(self.base_url, '/series/all/')
html = self._http_get(url, cache_limit=8)
links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href')
titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'})
items = zip(links, titles)
else:
url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title))
data = {'q': title, 'go': 'Search'}
html = self._http_get(url, data=data, cache_limit=8)
match = re.search('you can search again in (\d+) seconds', html, re.I)
if match:
wait = int(match.group(1))
if wait > self.timeout: wait = self.timeout
time.sleep(wait)
html = self._http_get(url, data=data, cache_limit=0)
pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)'
items = re.findall(pattern, html, re.DOTALL)
norm_title = scraper_utils.normalize_title(title)
for item in items:
url, match_title = item
if norm_title in scraper_utils.normalize_title(match_title):
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:31,代码来源:moviestorm_scraper.py
示例7: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/movies.php?list=search&search=')
search_url += urllib.quote_plus(title)
cookies = {'onlylanguage': 'en', 'lang': 'en'}
html = self._http_get(search_url, cookies=cookies, cache_limit=.25)
results = []
pattern = 'id="tdmovies">\s*<a\s+href="([^"]+)">([^<]+).*?id="f7">(.*?)</TD>'
for match in re.finditer(pattern, html, re.DOTALL):
url, title, extra = match.groups('')
if (video_type == VIDEO_TYPES.MOVIE and '(TVshow)' in title) or (video_type == VIDEO_TYPES.TVSHOW and '(TVshow)' not in title):
continue
title = title.replace('(TVshow)', '')
title = title.strip()
r = re.search('>(\d{4})<', extra)
if r:
match_year = r.group(1)
else:
match_year = ''
if not year or not match_year or year == match_year:
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': match_year}
results.append(result)
return results
开发者ID:ScriptUp,项目名称:salts,代码行数:25,代码来源:movie4k_scraper.py
示例8: search
def search(self, video_type, title, year, season=''):
results = []
search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
html = self._http_get(search_url, cache_limit=1)
fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'})
if not fragment: return results
norm_title = scraper_utils.normalize_title(title)
for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'})
if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
if not match: continue
match_title = match[0].attrs['title']
match_url = match[0].attrs['href']
match_year = ''
if video_type == VIDEO_TYPES.SEASON:
if season and not re.search('Season\s+%s$' % (season), match_title, re.I):
continue
else:
match = re.search('-(\d{4})[-.]', match_url)
if match:
match_year = match.group(1)
match_norm_title = scraper_utils.normalize_title(match_title)
title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
if title_match and (not year or not match_year or year == match_year):
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:32,代码来源:moviesub_scraper.py
示例9: search
def search(self, video_type, title, year, season=''):
results = []
search_url = urlparse.urljoin(self.base_url, '/search/')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=1)
for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}):
name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'})
if name:
match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0])
if match:
match_url, match_title_year = match.groups()
if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue
match_title_year = re.sub('</?[^>]*>', '', match_title_year)
match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year)
match_title_year = match_title_year.replace('’', "'")
match = re.search('(.*?)\s+\((\d{4})[^)]*\)$', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
if not match_year:
year_span = dom_parser.parse_dom(fragment, 'span', {'class': 'year'})
if year_span:
year_text = dom_parser.parse_dom(year_span[0], 'a')
if year_text:
match_year = year_text[0].strip()
if not year or not match_year or year == match_year:
result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
results.append(result)
return results
开发者ID:AMOboxTV,项目名称:AMOBox.LegoBuild,代码行数:35,代码来源:moviexk_scraper.py
示例10: __alt_search
def __alt_search(self, video_type, title, year, season=''):
results = []
params = title.lower()
if year: params += ' %s' % (year)
if video_type == VIDEO_TYPES.SEASON and season:
params += ' Season %s' % (season)
params = {'key': params}
search_url = urlparse.urljoin(self.base_url, '/search')
html = self._http_get(search_url, params=params, cache_limit=1)
norm_title = scraper_utils.normalize_title(title)
for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}):
match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item)
if match:
match_url, match_title = match.groups()
is_season = re.search('-season-\d+', match_url)
if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season):
if video_type == VIDEO_TYPES.SEASON:
if season and not re.search('season-0*%s$' % (season), match_url): continue
match_title = re.sub('</?[^>]*>', '', match_title)
match_title = re.sub('\s+Full\s+Movie', '', match_title)
match = re.search('-(\d{4})(?:$|-)', match_url)
if match:
match_year = match.group(1)
else:
match_year = ''
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:32,代码来源:hdmovie14_scraper.py
示例11: search
def search(self, video_type, title, year, season=''):
search_url = self.base_url
if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
search_url += '/tvshow'
search_url += '/advanced-search.php?search='
search_url += urllib.quote_plus(title)
search_url += '&year=' + urllib.quote_plus(str(year))
search_url += '&advanced_search=Search'
html = self._http_get(search_url, cache_limit=.25)
results = []
for element in dom_parser.parse_dom(html, 'div', {'class': 'list_box_title'}):
match = re.search('href="([^"]+)"\s+title="(?:Watch )?([^"]+)', element)
if match:
url, match_title_year = match.groups()
match = re.search('(.*?)(?:\s+\(?\s*(\d{4})\s*\)?)', match_title_year)
if match:
match_title, match_year = match.groups()
else:
match_title = match_title_year
match_year = ''
if not year or not match_year or year == match_year:
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:AMOboxTV,项目名称:AMOBox.LegoBuild,代码行数:27,代码来源:merdb_scraper.py
示例12: search
def search(self, video_type, title, year, season=''): # @UnusedVariable
results = []
search_url = scraper_utils.urljoin(self.base_url, '/movies.php')
cookies = {'onlylanguage': 'en', 'lang': 'en'}
params = {'list': 'search', 'search': title}
html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
for _attrs, content in dom_parser2.parse_dom(html, 'TR', {'id': re.compile('coverPreview\d+')}):
match = dom_parser2.parse_dom(content, 'a', req='href')
if not match: continue
match_url, match_title = match[0].attrs['href'], match[0].content
is_show = re.search('\(tvshow\)', match_title, re.I)
if (video_type == VIDEO_TYPES.MOVIE and is_show) or (video_type == VIDEO_TYPES.TVSHOW and not is_show):
continue
match_title = match_title.replace('(TVshow)', '')
match_title = match_title.strip()
match_year = ''
for _attrs, div in dom_parser2.parse_dom(content, 'div'):
match = re.match('\s*(\d{4})\s*', div)
if match:
match_year = match.group(1)
if not year or not match_year or year == match_year:
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:28,代码来源:movie4k_scraper.py
示例13: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/search-movies/%s.html')
search_url = search_url % (urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=0)
results = []
for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}):
match_title = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='title')
url = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='href')
if match_title and url:
match_title, url = match_title[0], url[0]
is_season = re.search('Season\s+(\d+)$', match_title, re.I)
if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
match_year = ''
if video_type == VIDEO_TYPES.MOVIE:
match_year = dom_parser.parse_dom(thumb, 'div', {'class': '[^"]*status-year[^"]*'})
if match_year:
match_year = match_year[0]
else:
if season and int(is_season.group(1)) != int(season):
continue
if not year or not match_year or year == match_year:
result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:ScriptUp,项目名称:salts,代码行数:25,代码来源:tunemovie_scraper.py
示例14: search
def search(self, video_type, title, year, season=''):
results = []
html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
match = re.search('href="([^"]+)', item)
match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
if match and match_title:
url = match.group(1)
match_title = match_title[0]
if re.search('\d+\s*x\s*\d+', match_title): continue # exclude episodes
match_title, match_year = scraper_utils.extra_year(match_title)
if not match_year and year_frag:
match_year = year_frag[0]
match = re.search('(.*?)\s+\d{3,}p', match_title)
if match:
match_title = match.group(1)
extra = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'})
if extra:
match_title += ' [%s]' % (extra[0])
if not year or not match_year or year == match_year:
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:28,代码来源:hevcbluray_scraper.py
示例15: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/movie/search/')
title = re.sub('[^A-Za-z0-9 ]', '', title)
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=1)
results = []
for item in dom_parser.parse_dom(html, 'div', {'class': 'ml-item'}):
match_title = dom_parser.parse_dom(item, 'span', {'class': 'mli-info'})
match_url = re.search('href="([^"]+)', item, re.DOTALL)
match_year = re.search('class="jt-info">(\d{4})<', item)
is_episodes = dom_parser.parse_dom(item, 'span', {'class': 'mli-eps'})
if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes):
if match_title and match_url:
match_title = match_title[0]
match_title = re.sub('</?h2>', '', match_title)
match_title = re.sub('\s+\d{4}$', '', match_title)
if video_type == VIDEO_TYPES.SEASON:
if season and not re.search('Season\s+%s$' % (season), match_title): continue
url = urlparse.urljoin(match_url.group(1), 'watching.html')
match_year = match_year.group(1) if match_year else ''
if not year or not match_year or year == match_year:
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:28,代码来源:123movies_scraper.py
示例16: search
def search(self, video_type, title, year, season=''):
results = []
search_url = urlparse.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
html = self._http_get(search_url, cache_limit=1)
fragment = dom_parser.parse_dom(html, 'ul', {'class': 'cfv'})
if fragment:
for item in dom_parser.parse_dom(fragment[0], 'li'):
is_season = dom_parser.parse_dom(item, 'div', {'class': 'status'})
if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
match_url = dom_parser.parse_dom(item, 'a', ret='href')
match_title = dom_parser.parse_dom(item, 'a', ret='title')
if match_url and match_title:
match_title = match_title[0]
match_url = match_url[0]
match_year = ''
if video_type == VIDEO_TYPES.SEASON:
if season and not re.search('Season\s+%s$' % (season), match_title, re.I):
continue
else:
match = re.search('-(\d{4})\.html', match_url)
if match:
match_year = match.group(1)
if not year or not match_year or year == match_year:
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:28,代码来源:moviesub_scraper.py
示例17: search
def search(self, video_type, title, year, season=""):
results = []
search_url = urlparse.urljoin(self.base_url, "/search?query=%s")
search_url = search_url % (urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=8)
for item in dom_parser.parse_dom(html, "div", {"class": "one_movie-item"}):
match_url = dom_parser.parse_dom(item, "a", ret="href")
match_title = dom_parser.parse_dom(item, "img", ret="alt")
media_type = dom_parser.parse_dom(item, "div", {"class": "movie-series"})
if not media_type:
media_type = VIDEO_TYPES.MOVIE
elif media_type[0] == "TV SERIE":
media_type = VIDEO_TYPES.TVSHOW
if match_url and match_title and video_type == media_type:
match_url = match_url[0]
match_title = match_title[0]
match_year = re.search("-(\d{4})-", match_url)
if match_year:
match_year = match_year.group(1)
else:
match_year = ""
if not year or not match_year or year == match_year:
result = {
"url": scraper_utils.pathify_url(match_url),
"title": scraper_utils.cleanse_title(match_title),
"year": match_year,
}
results.append(result)
return results
开发者ID:EPiC-APOC,项目名称:repository.xvbmc,代码行数:33,代码来源:moviewatcher_scraper.py
示例18: search
def search(self, video_type, title, year, season=''): # @UnusedVariable
results = []
search_url = base64.decodestring(SEARCH_URL) % (urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=2)
if html:
js_data = scraper_utils.parse_json(html)
search_meta = scraper_utils.parse_episode_link(title)
for item in js_data.get('results', []):
metatags = item.get('richSnippet', {}).get('metatags', {})
post_date = metatags.get('articlePublishedTime')
if post_date:
post_date = re.sub('[+-]\d+:\d+$', '', post_date)
post_date = scraper_utils.to_datetime(post_date, '%Y-%m-%dT%H:%M:%S').date()
if self.__too_old(post_date): continue
match_title = metatags.get('ogTitle', '')
if not match_title:
match_title = item['titleNoFormatting']
match_title = re.sub(re.compile('\s*-\s*Scene\s*Down$', re.I), '', match_title)
match_url = item['url']
match_year = ''
item_meta = scraper_utils.parse_episode_link(match_title)
if scraper_utils.meta_release_check(video_type, search_meta, item_meta):
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
if not results:
results = self.__site_search(video_type, title, year)
return results
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:30,代码来源:scenedown_scraper.py
示例19: search
def search(self, video_type, title, year, season=''):
search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
search_url = search_url % (urllib.quote_plus(title))
html = self._http_get(search_url, cache_limit=1)
results = []
fragment = dom_parser.parse_dom(html, 'div', {'class': 'movie'})
if fragment:
for item in dom_parser.parse_dom(fragment[0], 'li'):
match_url = dom_parser.parse_dom(item, 'a', ret='href')
match_title = dom_parser.parse_dom(item, 'span', {'class': 'text'})
match_year = dom_parser.parse_dom(item, 'span', {'class': 'year'})
if match_url and match_title:
match_url = match_url[0]
match_title = re.sub('</?strong>', '', match_title[0])
is_season = re.search('Season\s+(\d+)$', match_title, re.I)
if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
if video_type == VIDEO_TYPES.MOVIE:
if match_year:
match_year = match_year[0]
else:
match_year = ''
else:
if season and int(is_season.group(1)) != int(season):
continue
match_year = ''
result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:29,代码来源:vivoto_scraper.py
示例20: __movie_search
def __movie_search(self, title, year):
results = []
search_url = urlparse.urljoin(self.base_url, '/search?q=')
search_url += urllib.quote_plus(title)
html = self._http_get(search_url, cache_limit=1)
norm_title = scraper_utils.normalize_title(title)
for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}):
match_url = dom_parser.parse_dom(item, 'a', ret='href')
match_title = dom_parser.parse_dom(item, 'img', ret='alt')
match_year = ''
if match_url and match_title:
match_url = match_url[0]
match_title = match_title[0]
if match_year:
match_year = match_year[0]
else:
match_year = ''
if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
results.append(result)
return results
开发者ID:freeworldxbmc,项目名称:KAOSbox-Repo,代码行数:24,代码来源:moviewatcher_scraper.py
注:本文中的salts_lib.scraper_utils.cleanse_title函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论