• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python wikipedia.page函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中wikipedia.page函数的典型用法代码示例。如果您正苦于以下问题:Python page函数的具体用法?Python page怎么用?Python page使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了page函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: findRelevantArticles

def findRelevantArticles(term,data_path='.'):
    articleList = []
    articles = wikipedia.search(term) #Setting suggestion = False (default value); No clear use for it now

    for article in articles:
        try: 
            article = wikipedia.page(article)
            category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
            if len(category_keywords & relevant_categories) > 0:
                articlefilename = "content_"+str(article.title.lower())+".txt"
                if os.path.isfile(articlefilename):
                     articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
                with codecs.open(os.path.join(data_path,articlefilename),'wb', 'utf-8') as outfile:
                    content = wikipedia.page(article).content
                    print>>outfile,content
                articleList.append(str(article.title))
        except wikipedia.exceptions.PageError as e:
            pass
        except wikipedia.exceptions.DisambiguationError as e:
            for article in e.options:
                try:
                    article = wikipedia.page(article)
                    category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
                    if len(category_keywords & relevant_categories) > 0:
                        articlefilename = "content_"+str(article.title.lower())+".txt"
                        if os.path.isfile(articlefilename):
                            articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
                        with codecs.open(os.path.join(data_path,articlefilename),'wb','utf-8') as outfile:
                            print>>outfile,article.content
                        articleList.append(str(article.title))
                except wikipedia.exceptions.DisambiguationError as f:
                    pass
开发者ID:mac389,项目名称:computational-medical-knowledge,代码行数:32,代码来源:wikipedia_fetch.py


示例2: searchWiki

def searchWiki(page):
    wikipedia.set_lang("fr")
    link = ''
    try:
#        p = wikipedia.page(page)
#        link = p.url
        propos = wikipedia.search(page,results=5,suggestion=False)
        for choice in propos:
            if choice.encode('utf-8') == page.encode('utf-8'):
                p = wikipedia.page(page)
                link = p.url
                break
            elif page in choice:
                #TODO
                print 'There is a proposition containing the keyWord '
                print choice
            else:
                try:
                    wikipedia.page(page,redirect=False,auto_suggest=False)
                except wikipedia.exceptions.RedirectError:
                    p = wikipedia.page(page)
                    link = p.url
                    break
                except:
                    link =''
    except:
        link = ""
    return link#.encode('utf-8')
开发者ID:Droxef,项目名称:DigitalHumanities,代码行数:28,代码来源:wikipediaSearch.py


示例3: info

def info(topic):
	response = {}
	response["type"] = "wiki"
	try:
		page = wikipedia.page(topic)
		response['title'] = page.title
		response['url'] = page.url
		response['content'] = wikipedia.summary(page.title,sentences = 5)
		if len(response['content']) < 200:
			response['content'] = wikipedia.summary(page.title,sentences = 10)
	except Exception as error:
		ename = type(error).__name__
		if ename == 'DisambiguationError':
			page = wikipedia.page(error.options[0])
			response['title'] = page.title
			response['url'] = page.url
			response['content'] = wikipedia.summary(page.title,sentences = 2)
			if len(response['content']) < 200:
				response['content'] = wikipedia.summary(page.title,sentences = 10)
		elif ename == 'HTTPTimeoutError':
			response['type'] = "error"
			response['error'] = "I couldn't reach wikipedia"
		elif ename == 'PageError':
			response['type'] = "error"
			response['error'] = "I couldn't find anything on wikipedia"
		else:
			response['type'] = "error"
			response['error'] = "Unknown error occured while reaching wikipedia" 

	return response
开发者ID:diptanshuagrawal,项目名称:Darsy,代码行数:30,代码来源:wiki.py


示例4: wikify

def wikify():
    """Returns the sentences with wikipedia links"""
    tag_dict = look_entity()
    link_dict = {}
    combined = combine()
    for item in tag_dict.keys():
        if item in combined.keys():
            try:
                if combined[item] in wikipedia.page(combined[item]).content:
                    link_dict[item] = wikipedia.page(combined[item]).url
            except wikipedia.exceptions.DisambiguationError as disamb:
                try:
                    link_dict[item] = wikipedia.page(disamb.options[0]).url
                except:
                    pass
            except wikipedia.exceptions.PageError:
                pass
        else:
            try:
                link_dict[item] = wikipedia.page(item).url
            except wikipedia.exceptions.DisambiguationError as disamb:
                try:
                    link_dict[item] = wikipedia.page(disamb.options[0]).url
                except:
                    pass
            except wikipedia.exceptions.PageError:
                pass
    return link_dict
开发者ID:willempd,项目名称:wikify,代码行数:28,代码来源:wikify.py


示例5: article

    def article(self, pageid=None, title=None):
        """ 
            Returns a specific article from Wikipedia, 
            given its pageid or its title.
            Downloads it if necessary
        """
        if pageid is None and title is None:
            raise Exception('Pageid and title can\'t be None at the same time')

        if pageid is None:
            d = self.db.articles.find_one({'title': title})

            if d is not None:
                return d # found it
        else:
            d = self.db.articles.find_one({'_id': pageid})

            if d is not None:
                return d # found it
            
        try:
            if not(pageid is None):
                page = wikipedia.page(pageid=pageid)
            else:
                page = wikipedia.page(title=title)

        except (
            wikipedia.exceptions.DisambiguationError,
            wikipedia.exceptions.PageError,
            wikipedia.exceptions.WikipediaException,
            requests.exceptions.RequestException,
            ValueError # error decoding JSON response
        ):
            return

        try:
            time.sleep(0.5)
        except:
            time.wait(0.5)

        # Even if we didn't find pageid or title, it still could be in the DB
        # since the title could have changed
        try:
            d = {
                '_id': int(page.pageid),
                'title': page.title,
                'content': page.content
            }
        except KeyboardInterrupt: # filter KeyboardInterrupt from here
            raise
        except Exception:
            return # can't add this entry

        self.db.articles.update_one(
            {'_id': d['_id']},
            {'$set': d},
            upsert=True
        )

        return d
开发者ID:aparafita,项目名称:news-similarity,代码行数:60,代码来源:wikipedia.py


示例6: wiki

def wiki(event, bot):
	""" wiki \x02searchterm\x02. Will search Wikipedia for \x02searchterm\x02. """
	if not event.argument: return bot.say(functionHelp(wiki))
	result = search(event.argument, results=1, suggestion=True)
	if not result[0]: 
		if result[1]: return bot.say("No results found. Did you mean \x02%s\x02?" % result[1])
		else: return bot.say("No results found.")
	
	errors = []
	attempt = 0
	p = None
	try:
		p = page(result[0]) # use preload=True  when it's fixed: https://github.com/goldsmith/Wikipedia/issues/78
	except DisambiguationError as e:
		errors.append("Random disambig page: ")
		while attempt < 3:
			try: p = page(choice(e.options))
			except DisambiguationError: pass
			attempt += 1
	if not p: return bot.say("Gave up looking for disambiguous entry from disambiguous page.")
	
	if result[1]:
		errors.append("(SP: %s?) " % result[1])
	content = p.content[:800].replace("\n", " ").replace("====", "").replace("===", "").replace("==", "")
	
	bot.say(RESULT_RPL % ("".join(errors), p.url), strins=[p.title, content], fcfs=True)
开发者ID:Clam-,项目名称:pyBurlyBot,代码行数:26,代码来源:pbm_wikipedia.py


示例7: getContentFromLink

def getContentFromLink(link):
	try:
		linkText = wk.page(link, auto_suggest=False).content.lower()
	except wk.exceptions.DisambiguationError as e:
		options = filter(lambda x: "(disambiguation)" not in x, e.options)
		linkText = wk.page(options[0], auto_suggest=False).content.lower()
	return linkText
开发者ID:melissa0831,项目名称:OptimizedEntityLinking,代码行数:7,代码来源:cache.py


示例8: disambiguationWikipedia

def disambiguationWikipedia(noun):

    """
    Disambiguation for Wikipedia errors
    """

        # Try to get wikipedia content
    try:
        wiki = wikipedia.page(noun)

    except wikipedia.exceptions.DisambiguationError as e:
        new = e.options[0]

        try:
            wiki = wikipedia.page(new)

        except:
            return 'Null'

    except wikipedia.exceptions.PageError:
        new = wikipedia.search(noun)

        try:
            wiki = wikipedia.page(new[0])

        except:
            return 'Null'

    except:
        return 'Null'


    return wiki
开发者ID:LaurenceArnold,项目名称:Vanilla-Android,代码行数:33,代码来源:eindopdracht.py


示例9: search_wikipedia

def search_wikipedia(word):
    searchArr = wikipedia.search(word)

    wiki_results = []
    try: 
        try:
            for result in searchArr:
                #print("result: " + result)
                wiki_results.append(wikipedia.page(result, preload=False))
        except wikipedia.DisambiguationError as e:
            #print("disambiguation error on " + result)
            #print(e.with_traceback)
            try:
                for item in e.options:
                    #print("disambiguation error on " + item)
                    wiki_results.append(wikipedia.page(item, preload=False))
            except wikipedia.DisambiguationError as i:
                try:
                    for item in i.options:
                        #print("disambiguation error on " + item)
                        wiki_results.append(wikipedia.page(item, preload=False))
                except wikipedia.DisambiguationError:
                    pass
    except: 
        print("Something went wrong getting wikipedia results")
        pass

    return wiki_results
开发者ID:kottofy,项目名称:PythonDjango-SearchApp,代码行数:28,代码来源:Search_Wikipedia.py


示例10: page

 def page(title=None, pageid=None, auto_suggest=True, redirect=True):
     """
     The search term from user may not corresponds to a wikipedia page,
     due to vagueness. There are 2 alternatives, "redirect"/ "disambiguous".
     :param auto_suggest:let Wikipedia find a valid page title for the query
     :return:
     """
     if pageid is not None:
         pageid = int(pageid)
         page = WikipediaArticle.objects(pageid=pageid)
     else:
         page = WikipediaArticle.objects(title=title)
         if not page:
             results, suggestion = WikipediaWrapper.search(
                 title,
                 results=1,
                 suggestion=True)
             suggested_term = suggestion or results[0]
             page = WikipediaArticle.objects(title=suggested_term)
     if page:
         page = page[0]
     else:
         try:
             page = wikipedia.page(title=title,
                                   pageid=pageid,
                                   auto_suggest=auto_suggest,
                                   redirect=redirect)
         except UnicodeDecodeError:
             page = wikipedia.page(title=str_util.normal_str(title),
                                   pageid=pageid,
                                   auto_suggest=auto_suggest,
                                   redirect=redirect)
     if type(page) is wikipedia.WikipediaPage:
         page = WikipediaWrapper.save_page(page)
     return page
开发者ID:imoonkey,项目名称:eduwiki,代码行数:35,代码来源:wikipedia_util.py


示例11: climb_tree

    def climb_tree(self):
        """Climb the tree"""

        branch_found = True
        cur_branch = self.seed
        prev_node = None
        while cur_branch is not None:
            self.logger.debug('Current branch is %s'%cur_branch)
            #Get wikipedia page
            try:
                cur_page = wikipedia.page(cur_branch)
            except wikipedia.PageError:
                self.logger.exception('Cannot find page for %s. Ending search.'%cur_branch)
                self.tree.node(cur_branch)
                self.tree.edge(cur_branch,prev_node)
                cur_branch = None
                continue
            except wikipedia.DisambiguationError:
                self.logger.exception('Multiple pages found for query %s. Adding "(physicist)" and searching again.')
                cur_page = wikipedia.page(cur_branch+' (physicist)')

            #parse the table
            html_source = BeautifulSoup(cur_page.html(),'html.parser')
            advisor = self._search_info_table(html_source,['Doctoral advisor','Doctoral advisors','Academic advisors','Academic advisor'])
            alma_mater = self._search_info_table(html_source,'Alma mater')
            students = self._search_info_table(html_source,'Doctoral students')
            #add to graph
            self.tree.node(cur_branch,cur_branch+'\n'+self._none_filter(alma_mater))
            if prev_node is not None:
                self.tree.edge(cur_branch,prev_node)
            #update
            prev_node = cur_branch
            cur_branch = self._res_filter(advisor)
开发者ID:wtbarnes,项目名称:arbordemia,代码行数:33,代码来源:harvest.py


示例12: link_checker

def link_checker(ngram):
	''' Checks if the word gives a valid wikipedia link '''
	try:
		page = wikipedia.page(ngram)
		link = page.url
		return link
	except wikipedia.exceptions.DisambiguationError: 
		#link = ngram.split(" ") 
		#newlink = "_".join(ngram)
		link = 'http://en.wikipedia.org/wiki/' + ngram + '_(disambiguation)'
		return link
	except wikipedia.exceptions.PageError:
		wordlist = ngram.split()
		counter = 0
		for word in wordlist:
			word.lower()
			if word in ["prime","minister","president"]:
				wordlist.pop(counter)
			counter += 1  
		ngram.join(wordlist)
		try:
			page = wikipedia.page(ngram)
			link = page.url
			return link 
		except wikipedia.exceptions.PageError:
			return -1
		except wikipedia.exceptions.DisambiguationError:  
			return -1
开发者ID:svansuylekom,项目名称:Project-Tekstanalyse,代码行数:28,代码来源:PTA.py


示例13: collectFrom

def collectFrom(lang,start,hangang):
    wikipedia.set_lang(lang)
    lookpa = wikipedia.page(start).links
    lookna = [wikipedia.page(start)]
    corpus = str(wikipedia.page(start).content)
    while len(corpus) < hangang:
        random.shuffle(lookpa)
        item = lookpa[0]
        try:
            corpus += str(wikipedia.page(item).content)
        except wikipedia.exceptions.PageError:
            pass
        except wikipedia.exceptions.DisambiguationError:
            pass
        except KeyError:
            pass
        lookna.append(item)
        lookpa.remove(item)
        try: 
            for page in wikipedia.page(item).links:
                if page not in lookpa:
                    if page not in lookna:
                        lookpa.append(page)
        except wikipedia.exceptions.PageError:
            pass
        except wikipedia.exceptions.DisambiguationError:
            pass
        except KeyError:
            pass
        print('Corpus = ' + str(len(corpus)) + '   Searched = ' + str(len(lookna)) + '  Still = ' + str(len(lookpa)))
    
    f = open(lang + 'FromWikiCorp.txt', 'w')
    f.write(corpus)
    f.close()
开发者ID:mcooper,项目名称:Philippines-Languages,代码行数:34,代码来源:wikipediasearch.py


示例14: context_data

    def context_data(self):
        """
        Gather data from Wikipedia based on user-inputed SUBJECT. 
        """
        text_list, visited, visitedSeeAlso, queue = [], set(), set(), list() 
        queue.append((self.subject, self.depth))

        while len(queue) > 0:
            print("Hi")
            next = queue.pop(0)
            try:
                if next[0] not in visited and next[1] >= 0:
                    visited.add(next[0])
                    results = wikipedia.search(next[0], self.max_searches, False)
                    for pagename in results:
                        queue.append((pagename, next[1]-1))
                    text_list.extend(wikipedia.page(next[0]).content.split())
            except:
                pass

        queue.append((self.subject, self.depth))
        while len(queue) > 0: 
            next = queue.pop(0)
            try: 

                if next[0] not in visitedSeeAlso and next[1] >= 0: 
                    visitedSeeAlso.add(next[0])
                    page = wikipedia.page(next[0])
                    for reference in page.section("See also").splitlines():
                        queue.append((reference, next[1] -1))
                    text_list.extend(wikipedia.page(next[0]).content.split())
            except:
                pass          
        return text_list
开发者ID:simster7,项目名称:WordBlock,代码行数:34,代码来源:SubjectContext.py


示例15: getWikiPage

def getWikiPage(title):
  try:
    page = wikipedia.page(title)
  except wikipedia.exceptions.DisambiguationError as e:
    print(e.options)
    title = random.choice(e.options)
    page = wikipedia.page(title)
  return page
开发者ID:sarahgarcin,项目名称:SpatialLab,代码行数:8,代码来源:import-wiki-article.py


示例16: test_tax_holiday

 def test_tax_holiday(self):
     print("------ tax holiday test ------------")
     web_bot = wikipedia.page("Web Bot").title
     tax_holiday = wikipedia.page("Tax holiday").title
     
     article1 = Article(web_bot, repo=DataDict())
     article2 = Article(tax_holiday, op=op_backlinks, repo=DataDict())
     run_test(article1, article2)
     print('=========================================')
开发者ID:billtsay,项目名称:cursornet,代码行数:9,代码来源:test.py


示例17: test_impeachment

 def test_impeachment(self):
     print("------ Impeachment test ------------")
     impeachment = wikipedia.page("Impeachment").title
     tower = wikipedia.page("Trump Tower").title
     
     article1 = Article(impeachment, repo=DataDict())
     article2 = Article(tower, op=op_backlinks, repo=DataDict())
     run_test(article1, article2)
     print('=========================================')
开发者ID:billtsay,项目名称:cursornet,代码行数:9,代码来源:test.py


示例18: getText

def getText():
    commonHeaders = []
    popularity = []
    yourArticle = wikipedia.page("Obama")
    articles = []
    articles.append(wikipedia.search("American politicans"))
    articles.append(wikipedia.search("American presidents"))
    articles.append(wikipedia.search("Hillary Clinton"))
    articles.append(wikipedia.search("Bill Clinton"))
    articles.append(wikipedia.search("George Washington"))
    articles.append(wikipedia.search("John Kerry"))
    #articles.append(wikipedia.search("John F. Kennedy"))
##    yourArticle = wikipedia.page("New York")
##    articles = wikipedia.search("New York")
##    articles.append(wikipedia.search("American cities"))
##    articles.append(wikipedia.search("Boston"))
##    articles.append(wikipedia.search("Paris"))
##    articles.append(wikipedia.search("San Francisco"))
##    articles.append(wikipedia.search("Sacramento"))
##    articles.append(wikipedia.search("Seattle"))
##    articles.append(wikipedia.search("Chicago"))
##    articles.append(wikipedia.search("St. Louis"))
##    articles.append(wikipedia.search("Las Vegas"))
##    articles.append(wikipedia.search("Hartford"))
##    articles.append(wikipedia.search("Trenton, NJ"))
##    articles.append(wikipedia.search("Washington D.C."))
##    articles.append(wikipedia.search("Boise"))
##    articles.append(wikipedia.search("Detroit"))
##    articles.append(wikipedia.search("Now Orleans"))
##    articles.append(wikipedia.search("Salt Lake City"))
    for i in articles:
        article = wikipedia.page(i).content
        headers = getSectionHeaders(article)
        for x in headers:
            if x not in commonHeaders:
                commonHeaders.append(x)
                popularity.append(1)
            else:
                assert(len(popularity) > 1) 
                popularity[commonHeaders.index(x)] += 1
                print x
    print commonHeaders
    x = 0
    while (x < len(commonHeaders)):
        if (popularity[x]>1):
            print commonHeaders[x]
            print popularity[x]
        x = x + 1
        
    # Figure out what kind of article this is
    # We can use the categories tag, if you've created it
    yourCategories = yourArticle.categories
    for category in yourCategories:
        #print category
        break
    return 
开发者ID:therandomcode,项目名称:WikiWriter,代码行数:56,代码来源:main-python.py


示例19: wikipediaSearch

    def wikipediaSearch(self):
        LOGGER.info('Querying Wikipedia')
        neighborhood = False
        if self.location['neighborhood'] != '':
            neighborhood = True
            searchTerm = self.location['neighborhood'] + ' ' + self.location['city']
        elif self.location['neighborhood'] == '' and self.location['city'] != '' and self.location['region'] != '':
            searchTerm = self.location['city'] + ' ' + self.location['region']
        elif self.location['place_name'] != '':
            searchTerm = self.location['place_name']

        LOGGER.info('WIKI SEARCH TERM: ' + searchTerm)
        wikiPages = list()
        try:
            LOGGER.info('trying first wiki query')
            results = wikipedia.search(searchTerm)
            if len(results) != 0:
                if len(results) >= 3:
                   results = results[:3]
                for result in results:
                    try:
                        page = wikipedia.page(result)
                        wikiPages.append(page.content)
                    except wikipedia.exceptions.DisambiguationError as e:
			pass
                        #print 'Disambiguation Error'
                        #print e
                    except wikipedia.exceptions.PageError as e:
                        #print 'Page Error'
                        #print e
			pass
        except wikipedia.exceptions.DisambiguationError as e:
            #print 'DISAMBIGUATION ERROR'
            #print e.options
            if len(e.options) !=0:
                if len(e.options) >= 3:
                    e.options = e.options[:3]
                for opt in e.options:
                    try: 
                        page = wikipedia.page(opt)
                        wikiPages.append(page.content)
                    except wikipedia.exceptions.DisambiguationError as e:
                        #print 'Disambiguation Error'
                        #print e
			pass
                    except wikipedia.exceptions.PageError as e:
                        #print 'Page Error'
                        #print e
                        pass

        allText = ''
        if len(wikiPages) != 0:
            for page in wikiPages:
                allText += page

        self.results['wikipedia'] = allText
开发者ID:jcharry,项目名称:tale_of_user_defined_location,代码行数:56,代码来源:api_requests.py


示例20: new_article

def new_article(new_title, old_title):
# Function takes 2 arguments (current page title and old page title). This is incase of disambiguation and the user needs to choose another link.
    try:
        new_page = wikipedia.page(new_title)
        show_links(new_page)

    # Deals with wiki disambiguation error
    except wikipedia.exceptions.DisambiguationError:
        print("An error occurred (due to disambiguation), please choose another link.")
        show_links(wikipedia.page(old_title))
开发者ID:pla7a,项目名称:GettingToPhilosophy,代码行数:10,代码来源:g2p.py



注:本文中的wikipedia.page函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python wikipedia.random函数代码示例发布时间:2022-05-26
下一篇:
Python wikipedia.output函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap