本文整理汇总了Python中setuptools.compat.urljoin函数的典型用法代码示例。如果您正苦于以下问题:Python urljoin函数的具体用法?Python urljoin怎么用?Python urljoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了urljoin函数的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: process_url
def process_url(self, url, retrieve=False):
"""Evaluate a URL as a possible download, and maybe retrieve it"""
if os.getenv("CONDA_BUILD"):
raise RuntimeError("Setuptools downloading is disabled in conda build. "
"Be sure to add all dependencies in the meta.yaml url=%sr" % url)
if url in self.scanned_urls and not retrieve:
return
self.scanned_urls[url] = True
if not URL_SCHEME(url):
self.process_filename(url)
return
else:
dists = list(distros_for_url(url))
if dists:
if not self.url_ok(url):
return
self.debug("Found link: %s", url)
if dists or not retrieve or url in self.fetched_urls:
list(map(self.add, dists))
return # don't need the actual page
if not self.url_ok(url):
self.fetched_urls[url] = True
return
self.info("Reading %s", url)
self.fetched_urls[url] = True # prevent multiple fetch attempts
f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
if f is None: return
self.fetched_urls[f.url] = True
if 'html' not in f.headers.get('content-type', '').lower():
f.close() # not html, we can't process it
return
base = f.url # handle redirects
page = f.read()
if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
if isinstance(f, HTTPError):
# Errors have no charset, assume latin1:
charset = 'latin-1'
else:
charset = f.headers.get_param('charset') or 'latin-1'
page = page.decode(charset, "ignore")
f.close()
for match in HREF.finditer(page):
link = urljoin(base, htmldecode(match.group(1)))
self.process_url(link)
if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
page = self.process_index(url, page)
开发者ID:zyh199552,项目名称:sysu-spider,代码行数:51,代码来源:package_index.py
示例2: process_index
def process_index(self, url, page):
"""Process the contents of a PyPI page"""
def scan(link):
# Process a URL to see if it's for a package page
if link.startswith(self.index_url):
parts = list(map(
unquote, link[len(self.index_url):].split('/')
))
if len(parts) == 2 and '#' not in parts[1]:
# it's a package page, sanitize and index it
pkg = safe_name(parts[0])
ver = safe_version(parts[1])
self.package_pages.setdefault(pkg.lower(), {})[link] = True
return to_filename(pkg), to_filename(ver)
return None, None
# process an index page into the package-page index
for match in HREF.finditer(page):
try:
scan(urljoin(url, htmldecode(match.group(1))))
except ValueError:
pass
pkg, ver = scan(url) # ensure this page is in the page index
if pkg:
# process individual package page
for new_url in find_external_links(url, page):
# Process the found URL
base, frag = egg_info_for_url(new_url)
if base.endswith('.py') and not frag:
if ver:
new_url += '#egg=%s-%s' % (pkg, ver)
else:
self.need_version_info(url)
self.scan_url(new_url)
return PYPI_MD5.sub(
lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
)
else:
return "" # no sense double-scanning non-package pages
开发者ID:carlosaherrera,项目名称:colorkeep_repository,代码行数:42,代码来源:package_index.py
示例3: find_external_links
return wrapper
REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
# this line is here to fix emacs' cruddy broken syntax highlighting
@unique_values
def find_external_links(url, page):
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
for match in REL.finditer(page):
tag, rel = match.groups()
rels = set(map(str.strip, rel.lower().split(',')))
if 'homepage' in rels or 'download' in rels:
for match in HREF.finditer(tag):
<<<<<<< HEAD
yield urljoin(url, htmldecode(match.group(1)))
=======
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
>>>>>>> 54eef0be98b1b67c8507db91f4cfa90b64991027
for tag in ("<th>Home Page", "<th>Download URL"):
pos = page.find(tag)
if pos!=-1:
match = HREF.search(page,pos)
if match:
<<<<<<< HEAD
yield urljoin(url, htmldecode(match.group(1)))
=======
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
>>>>>>> 54eef0be98b1b67c8507db91f4cfa90b64991027
开发者ID:Yankur,项目名称:Web-application,代码行数:30,代码来源:package_index.py
注:本文中的setuptools.compat.urljoin函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论