• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.get_domain函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.get_domain函数的典型用法代码示例。如果您正苦于以下问题:Python get_domain函数的具体用法?Python get_domain怎么用?Python get_domain使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了get_domain函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_source_node

	def get_source_node(self):
		if 'source_selector' in self.options:
			if self.options['source_selector']:
				nodes = self.doc.cssselect(self.options['source_selector'])
				if len(nodes) == 1:
					return nodes[0]
				for node in nodes:
					res = self.has_source(node)
					if res is not None:
						return res

		body = self.doc.find('body')
		if body is None:
			return None

		for node in body.iter():
			res = self.has_source(node)
			if res is not None:
				return res

		domain = get_domain(self.url)
		for a in self.doc.iter('a'):
			link = a.get('href')
			if link and link.startswith('http') \
					and get_domain(link) != domain:
				text = self.get_block_text(a)
				if len(text) > 2 \
						and text.endswith(u'报') \
						and not text.endswith(u'举报'):
					return a
开发者ID:dotajin,项目名称:haoku-open,代码行数:30,代码来源:article.py


示例2: data_needs

def data_needs(request, template="needs.html"):
    themes = Theme.objects.all().order_by("display_name")
    ordered_themes, theme_dict = add_ordered_needs_lists(themes)
    context = {
        "themes": themes,
        "theme_dict": theme_dict,
        "ordered_themes": ordered_themes,
        "domain": get_domain(8000),
        "domain8010": get_domain(),
    }
    return render_to_response(template, RequestContext(request, context))
开发者ID:WCGA,项目名称:marine-planner-wcodp,代码行数:11,代码来源:views.py


示例3: csw_listing

def csw_listing(request, template='pycsw_catalog_view.html'):
  if logger:
    logger.info("Start csw_listing")
  csw_recs = pycsw_records.objects.using('pycsw_test').all().order_by('organization')
  html_id = 0
  for rec in csw_recs:
    rec.html_id = html_id
    html_id += 1
  context = {'records': csw_recs, 'domain': get_domain(8000), 'domain8010': get_domain()}
  if logger:
    logger.info("End csw_listing")
  return render_to_response(template, RequestContext(request, context))
开发者ID:DanRamage,项目名称:secoora-portal,代码行数:12,代码来源:views.py


示例4: add

	def add(self, cate):
		url = cate['url']

		domain = get_domain(url)
		subdomains = get_subdomains(url)
		paths = get_path(url).split('/')
		query = urlparse.urlparse(url).query

		if domain not in self.root:
			self.root[domain] = {'sub':{}, 'path':{}}

		node = self.root[domain]
		if len(subdomains) > 1 or len(subdomains) == 1 and subdomains[0] != 'www':
			for sub in subdomains:
				if sub not in node['sub']:
					node['sub'][sub] = {'sub':{}, 'path':{}}
				node = node['sub'][sub]

		for path in paths:
			if path not in node['path']:
				node['path'][path] = {'path':{}}
			node = node['path'][path]

		if query:
			node['path']['query___' + query] = {'path':{}}
			node = node['path']['query___' + query]

		node['cate'] = cate
开发者ID:dotajin,项目名称:haoku-open,代码行数:28,代码来源:best2spider.py


示例5: bookmark_link

 def bookmark_link(self):
     if not self.bookmark and self.is_sublayer and self.parent.bookmark:
         return self.parent.bookmark.replace('<layer_id>', str(self.id))
     if not self.bookmark:
         domain = get_domain(8000)
         return '%s/planner/#%s' %(domain, self.slug)
     return self.bookmark
开发者ID:Ecotrust,项目名称:marco-portal,代码行数:7,代码来源:models.py


示例6: __init__

 def __init__(self, link, base_url):
     self.text = self.get_text(link)
     self.class_ = self.get_class(link)
     self.href = self.get_href(link, base_url)
     self.domain = get_domain(self.href)
     self.parent = link.parent
     self.base_url = base_url
开发者ID:nlpwhu,项目名称:info-source,代码行数:7,代码来源:website.py


示例7: introspect

def introspect(domain):
    filter_func = lambda x: get_domain(loads(x[1])).lower() in domain.lower()
    pages, requested_page = get_effective_page(request.args.get("page", 0),
            filter_func)
    items = get_items(filter_func, g.db_file, requested_page)

    return render_template("index.html", items=items, pages=pages,
            requested_page=requested_page, current_page=request.args.get('page', 0))
开发者ID:makoConstruct,项目名称:merveilles_io,代码行数:8,代码来源:routes.py


示例8: is_image_link

def is_image_link(url):
	if url.split('.')[-1] in img_extensions:
		return True 
	domain = get_domain(url).split('.')
	for sharer in img_sharers:
		if sharer in domain: 
			return True 
	return False 
开发者ID:morganecf,项目名称:topic-modeling,代码行数:8,代码来源:subreddit_stats.py


示例9: add_learn_links

def add_learn_links(themes):
    context = []
    domain = get_domain()
    for theme in themes:
        link = '%s/portal/learn/%s' %(domain, linkify(theme.name))
        #print link
        context.append({'theme': theme, 'learn_link': link})
    return context
开发者ID:atrawog,项目名称:marco-portal,代码行数:8,代码来源:views.py


示例10: get_allowed_from

 def get_allowed_from(self, child_urls):
     """
     :param child_urls: List of child urls to check robots.txt on
     :return: A list of allowed child urls to crawl
     """
     allowed = []
     domains = list(set('{0}'.format(get_domain(url)) for url in child_urls))
     domain_to_children = {domain: filter(lambda u: get_domain(u) == domain, child_urls) for domain in domains}
     for domain in domain_to_children:
         try:
             rules = self.robots.fetch(domain)
             for url in domain_to_children[domain]:
                 if rules.allowed(url, self._agent):
                     allowed.append(url)
         except:
             allowed.extend(domain_to_children[domain])
     return allowed
开发者ID:netarachelhershko,项目名称:crawler,代码行数:17,代码来源:robots_validator.py


示例11: top_things

def top_things(db_file):
    urls = {}
    people = {}
    graph = {}

    db = DB()

    if not db.open("{0}".format(db_file), DB.OREADER | DB.OCREATE):
        print "Could not open database. (Top things)"

    cur = db.cursor()
    cur.jump_back()
    while True:
        rec = cur.get(False)
        if not rec:
            break

        loaded_rec = loads(rec[1])
        split = get_domain(loaded_rec)

        if urls.get(split, False) == False:
            urls[split] = 1
        else:
            urls[split] = urls[split] + 1

        person = loaded_rec['person']
        if people.get(person, False) == False:
            people[person] = 1
        else:
            people[person] = people[person] + 1

        if split is not None and split is not "" and \
            person is not None and person is not "":
            # Build a crazy relational graph out of my nosql data
            if graph.get(split, False) == False:
                graph[split] = {"is_person": False, "data": [person], "linked_to_count": 1}
            elif person not in graph[split]:
                graph[split]["data"].append(person)
                graph[split]["linked_to_count"] = graph[split]["linked_to_count"] + 1

            if graph.get(person, False) == False:
                graph[person] = {"is_person": True, "data": [split]}
            elif split not in graph[person]:
                graph[person]["data"].append(split)

        cur.step_back()
    cur.disable()
    db.close()

    def get_one(x):
        return x[1]

    return (sorted(urls.items(), key=get_one, reverse=True),
            sorted(people.items(), key=get_one, reverse=True),
            graph)
开发者ID:lykkin,项目名称:merveilles_io,代码行数:55,代码来源:database.py


示例12: __init__

	def __init__(self, input, **options):
		self.input = input
		self.url = options.get('url', '')
		self.debug = options.get('debug', False)
		self.title = options.get('title', '^^')
		self.pages = options.get('pages', None)
		self.texts = options.get('texts', None)
		self.domain = get_domain(self.url)
		self.options = options
		self.doc = clean_html(input, return_doc=True)
		self.text = self.doc.text_content()
		self.len = word_count(self.text) if self.text else 0
开发者ID:dotajin,项目名称:haoku-open,代码行数:12,代码来源:clean+(copy).py


示例13: fetch_from

 def fetch_from(self, urls):
     """
     :param urls: A list of urls to fetch sitemaps of
     :return: A list of urls that was found within each sitemap of given urls
     """
     unique_domains = list(set(get_domain(u) for u in urls))
     sitemaps = self._try_fetch_sitemaps(unique_domains)
     results = []
     for url in sitemaps:
         sitemaps_content = self.requests_getter.get_content_from(sitemaps[url])
         for content in sitemaps_content:
             locations = self.sitemap_url_extractor.extract_from(content)
             locations = filter(lambda u: not u.endswith('.xml'), locations)
             results.extend(locations)
     return results
开发者ID:netarachelhershko,项目名称:crawler,代码行数:15,代码来源:sitemap_fetcher.py


示例14: fetch_stories

 def fetch_stories(self, correlation_id=-1):
     """Fetches new stories from the datasource. Uses the last story external id to 
     fetch only new stories."""
     try:
         url = "http://%s/twitter_sensor/?user=%s&password=%s" % (get_domain(), self.user.user_name, self.user.user_password)
         tweets = urllib.urlopen(url).read()
         tweets = json.loads(tweets)
         print tweets
         for key in tweets:
             try :
                 authors = []
                 authors.append(tweets[key])
                 self.add_read_story(key, authors)
                 self.add_user(tweets[key])
             except:
                 log_event("fetch_stories_failed", "AgentCell", self.id, "Adding fetched story %s failed, for %s" % (key, self.user), correlation_id)
     except:
         log_event("fetch_stories_failed", "AgentCell", self.id, "Failed to fetch stories for %s" % self.user, correlation_id)
开发者ID:ofri,项目名称:EventHorizon,代码行数:18,代码来源:models.py


示例15: article

def article():
	url = request.args.get('url')

	article = mongo.article.find_one({'_id':url})

	if not article:
		try:
			html = get_or_cache(url)
			article = html2article(html, url, selector=True, merge=True)
			if article and not article['src_name']:
				article['src_name'] = get_domain(url)

			tpl = url2tpl(url)
			urls = html2urls(html, url)
			texts = dict(map(lambda x: (x[0], max(x[1], key=lambda y:len(y))), urls.iteritems()))
			tmp = dict(map(lambda x: (x, url2tpl(x)), texts.iterkeys()))

			urls = {}
			for u, t in tmp.iteritems():
				if u != url and t == tpl:
					urls[u] = texts[u]
					if len(urls) >= 10:
						break

			if article:
				article['urls'] = urls
				article['_id'] = url
				article['view'] = 1
				article['last'] = time.time()

				copy = article.copy()
				copy['urls'] = json.dumps(copy['urls'])
				mongo.article.save(copy)
		except:
			pass
	else:
		article['urls'] = json.loads(article['urls'])
		mongo.article.update({'_id':url}, {'$set':{'view':article['view'] + 1}})

	if article:
		article['pubtime'] = article['pubtime'][:10]

	return render_template('extract/article.html', article=article, url=url)
开发者ID:dotajin,项目名称:haoku-open,代码行数:43,代码来源:views.py


示例16: crawl

    def crawl(self, url, max_page_depth=5, max_external_sites_page_depth=4, request_rate_limit=4):
        """
        Will crawl a given url up to max_page_depth and max_external_sites_page_depth on a max rate of
        request_rate_limit.
        :param url: The to-be crawled url
        :param max_page_depth: Max internal (same-domain) depth
        :param max_external_sites_page_depth: Max external (different-domain) depth
        :param request_rate_limit: Up to n requests at once
        :return: List of Url objects (See schemas/url.py)
        """
        self._url_scanner.set_request_limit(request_rate_limit)
        self._max_page_depth = max_page_depth
        self._max_external_sites_page_depth = max_external_sites_page_depth
        self._domain = get_domain(url)

        self._internal_urls_to_scan.append(url)
        self._crawl_internal_urls()
        self._crawl_external_urls()
        return self._get_crawled_urls()
开发者ID:netarachelhershko,项目名称:crawler,代码行数:19,代码来源:crawler.py


示例17: extract_essence

 def extract_essence(self, correlation_id):
     """
     Analyze the story text, to extract the essence from it. For the essence, look for a matching StoryEssence cell.
     If found, link the story cell to the StoryEssence cell. Else create a new StoryEssence cell & link the story to it.
     """
     try:
         print "extract_essence called for story '%s'" % self.core
         client = Client()
         response = client.get('http://%s/text_analyzer/extract_essence/' % get_domain(), {'text': self.core}).content
         print "got extract essence response: ", response
         if response != "":
             try :
                 self.add_essence(response)
             except:
                 print sys.exc_info()
                 print "essence=", response
                 log_event("extract_essence_failed", STORY_CELL, self.id, "Adding essence '%s' extracted from story '%s' failed" % (response, self.core), correlation_id)
             # all went all, update the flag
             self.is_essence_extracted = True
             self.save()
     except:
         print "Failed to extract essence", sys.exc_info()
         log_event("extract_essence_failed", STORY_CELL, self.id, "Failed to extract essence from story '%s'" % self.core, correlation_id)
开发者ID:dibaunaumh,项目名称:EventHorizon,代码行数:23,代码来源:models.py


示例18: fetch_stories

 def fetch_stories(self, correlation_id=-1):
     """Fetches new stories from the datasource. Uses the last story external id to 
     fetch only new stories."""
     try:
         #url = "http://%s/twitter_sensor/?user=%s&password=%s" % (get_domain(), self.user.user_name, self.user.user_password)
         #tweets = urllib.urlopen(url).read()
         client = Client()
         tweets = client.get('http://%s/twitter_sensor/' % get_domain(), {'user': self.user.user_name, 'password': self.user.user_password}).content
         tweets = json.loads(tweets)
         print tweets
         for key in tweets:
             try :
                 authors = []
                 for story in StoryCell.objects.all():
                     if story.core == key:
                         return
                 authors.append(tweets[key][0])
                 self.add_read_story(key, authors)
                 self.add_user(tweets[key][0])
             except:
                 log_event("fetch_stories_failed", AGENT_CELL, self.id, "Adding fetched story %s failed, for %s" % (key, self.user), correlation_id)
     except:
         print "Failed to fetch stories", sys.exc_info()
         log_event("fetch_stories_failed", AGENT_CELL, self.id, "Failed to fetch stories for %s" % self.user, correlation_id)
开发者ID:dibaunaumh,项目名称:EventHorizon,代码行数:24,代码来源:models.py


示例19: get_absolute_url

 def get_absolute_url(self):
     return "http://%s/cells/view/story/%d" % (get_domain(), self.id)
开发者ID:dibaunaumh,项目名称:EventHorizon,代码行数:2,代码来源:models.py


示例20: filter_func

 def filter_func(x):
     return get_domain(loads(x[1])).lower() in domain.lower()
开发者ID:lykkin,项目名称:merveilles_io,代码行数:2,代码来源:routes.py



注:本文中的utils.get_domain函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.get_file_content函数代码示例发布时间:2022-05-26
下一篇:
Python utils.get_dir_path函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap