• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python mechanize.Browser类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mechanize.Browser的典型用法代码示例。如果您正苦于以下问题:Python Browser类的具体用法?Python Browser怎么用?Python Browser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Browser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: lookup_offers_isbn

def lookup_offers_isbn(item_id):
    offers = []
    br = Browser()
    res = br.open("http://books.half.ebay.com/ws/web/HalfISBNSearch?isbn=%s" % item_id)
    soup = BeautifulSoup(res.read())
    ratings = soup.findAll('span',{'class': 'Header'})
    for r in ratings:
        rating = r.text
        prices= r.parent.parent.parent.findNextSibling('table').findAll('tr')[1:]
        linktext  = r.parent.parent.parent.findNextSiblings('table')[1].find(text=re.compile('View all.*'))
        if linktext:
            all = linktext.parent['href']
            # get link
            res2 = br.open(all)
            soup = BeautifulSoup(res2.read())
            rating2 = soup.findAll('span',{'class': 'Header'})
            prices = rating2[0].parent.parent.parent.parent.findAll('table')[3].findAll('tr')[1:]
        for row in prices:
            m = re.search("itemid=(\d+)",row.find('a',href=re.compile("itemid=\d+"))['href'])
            itemid=m.group(1)
            seller = row.find('a',{'class':'SellerDisplayLink'}).text
            price = row.find('span',{'class':'ItemPrice'}).text
            price = string.replace(price,",","")
            if price.startswith("$"):
                price = price[1:]
            offers.append({ 'rating' : rating, 'seller' : seller, 'listing_id' : itemid, 'price' : str(price) })
            print rating,seller,itemid,price
    return offers
开发者ID:clarsen,项目名称:booksell,代码行数:28,代码来源:halfcom.py


示例2: down_image

 def down_image(self, img):
     print "down image from " + img
     down_br = Browser()
     down_cj = CookieJar()
     down_br.set_cookiejar(down_cj)
     fn = tempfile.mktemp(suffix='.png')
     return down_br.retrieve(img, filename = fn)[0]
开发者ID:lite,项目名称:yebob_utils,代码行数:7,代码来源:Yebob.py


示例3: on_task_start

    def on_task_start(self, task, config):
        try:
            from mechanize import Browser
        except ImportError:
            raise PluginError('mechanize required (python module), please install it.', log)

        userfield = config.get('userfield', 'username')
        passfield = config.get('passfield', 'password')

        url = config['url']
        username = config['username']
        password = config['password']

        br = Browser()
        br.set_handle_robots(False)
        try:
            br.open(url)
        except Exception as e:
            # TODO: improve error handling
            raise PluginError('Unable to post login form', log)

        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        #br.set_debug_http(True)

        for form in br.forms():
            loginform = form

            try:
                loginform[userfield] = username
                loginform[passfield] = password
                break
            except Exception as e:
                pass
        else:
            received = os.path.join(task.manager.config_base, 'received')
            if not os.path.isdir(received):
                os.mkdir(received)
            filename = os.path.join(received, '%s.formlogin.html' % task.name)
            with open(filename, 'w') as f:
                f.write(br.response().get_data())
            log.critical('I have saved the login page content to %s for you to view' % filename)
            raise PluginError('Unable to find login fields', log)

        br.form = loginform

        br.submit()

        cookiejar = br._ua_handlers["_cookies"].cookiejar

        # Add cookiejar to our requests session
        task.requests.add_cookiejar(cookiejar)
        # Add handler to urllib2 default opener for backwards compatibility
        handler = urllib2.HTTPCookieProcessor(cookiejar)
        if urllib2._opener:
            log.debug('Adding HTTPCookieProcessor to default opener')
            urllib2._opener.add_handler(handler)
        else:
            log.debug('Creating new opener and installing it')
            urllib2.install_opener(urllib2.build_opener(handler))
开发者ID:Anaerin,项目名称:Flexget,代码行数:60,代码来源:plugin_formlogin.py


示例4: login_to_kaggle

    def login_to_kaggle(self):  
        """ Login to Kaggle website
        Parameters:
        -----------
        None
        
        Returns:
        browser: Browser
            a mechanizer Browser object to be used for further access to site
        """          
        
        if self.verbose:
            print("Logging in to Kaggle..."),

        br = Browser()
        cj = cookielib.LWPCookieJar()
        br.set_cookiejar(cj)
        
        br.open(self.kag_login_url)
        
        br.select_form(nr=0)
        br['UserName'] = self.kag_username
        br['Password'] = self.kag_password
        br.submit(nr=0)
        
        if br.title() == "Login | Kaggle":
            raise KaggleError("Unable to login Kaggle with username %s (response title: %s)" % (self.kag_username,br.title()))
        
        if self.verbose:
            print("done!")
        
        return br
开发者ID:joostgp,项目名称:ml_toolbox,代码行数:32,代码来源:kaggle.py


示例5: GetXboxLiveFriends

  def GetXboxLiveFriends(self):
    """Return a list of tuples (gamer_tag, gamer_presence)."""
    br = Browser()
    br.open('http://live.xbox.com/en-US/profile/Friends.aspx')
    br.select_form(name='f1')
    br['login'] = self.login
    br['passwd'] = self.passwd
    br.submit()  # Submit login form.
    br.select_form(name='fmHF')
    response = br.submit()  # Submit redirect form.
    friend_list = response.read()
    response.close()

    soup = BeautifulSoup(friend_list)
    friend_table = soup.find('table', {'class': FRIEND_TABLE_CLASS})
    if friend_table is None:
      raise XboxLiveError('Parsing failure.')

    friends = []
    for row in friend_table.contents[1:]:  # Skip header row.
      gamer_tag = row.find('td', {'class': GAMER_TAG_CLASS})
      gamer_tag = str(gamer_tag.find('a').contents[0])
      gamer_presence = row.find('td', {'class': GAMER_PRESENCE_CLASS})
      gamer_presence = str(gamer_presence.find('h4').contents[0])
      friends.append((gamer_tag, gamer_presence))
    return friends
开发者ID:damonkohler,项目名称:pypert,代码行数:26,代码来源:xbox_live.py


示例6: newBrowser

 def newBrowser(self):
   # Create new browsers all the time because its data structures grow
   # unboundedly (texas#135)
   br = Browser()
   br.add_password(self.hostname, self.username, self.password)
   br.set_handle_robots(None)
   return br
开发者ID:abubeck,项目名称:cob_bringup_overlays,代码行数:7,代码来源:ddwrt.py


示例7: fetch_laws_page_from_year

def fetch_laws_page_from_year(year, temporaryDirectory):  
    lawsDirectory = os.path.join(temporaryDirectory, 'all_laws');
    if not os.path.exists(lawsDirectory):
        os.makedirs(lawsDirectory)
        print('The laws directory did not exist so I created it')
        print(lawsDirectory)

    fileToWriteLawsListIn = os.path.join(lawsDirectory, year + '.html')
    print('File to write in is ' + fileToWriteLawsListIn)
    lawWasNotDownloaded = not os.path.isfile(fileToWriteLawsListIn)
    if lawWasNotDownloaded:
        startDownload = int(round(time.time() * 1000))
        
        print('Getting laws from year ' + year)
        url = get_ugly_url_for_laws(year)
        browser = Browser()
        browser.open(url)
        html = browser.response().get_data()

        with open(fileToWriteLawsListIn, 'a') as f: 
            f.write (html)

        endDownload = int(round(time.time() * 1000))
        print('Finished downloading laws for year ' + year + '. It took only ' 
              + str(endDownload - startDownload) + ' milliseconds')
    else:
        print('This year was already fetched ' + year 
              + '. Skipping to the next year')
开发者ID:andreicristianpetcu,项目名称:pubdocs,代码行数:28,代码来源:1_clrro_fetch_laws_per_year.py


示例8: __init__

 def __init__(self):
     Browser.__init__(self)
     self.set_handle_robots(False)
     self.addheaders = [(
         'Accept',
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
     )]
开发者ID:vinceau,项目名称:browserplus,代码行数:7,代码来源:browserplus.py


示例9: mrsc

def mrsc(gid):
	mech = Browser()
	url = "http://espn.go.com/ncf/playbyplay?gameId="+gid+"&period=0"
	#print url
	page = mech.open(url)
	html = page.read()
	print url
	if html.count('Play-by-play not currently available.') == 0:
		soup = BeautifulSoup(html)
		table = soup.findAll("table")[-1]
		rows = table.findAll('tr')[::-1]
		c=0
		toret=''
		keepgoing=True
		cup=html[::-1][:html[::-1].find(' left; font: 700 14px/25px Helvetica,Arial,sans-serif;" colspan="3"><div style="margin-right: 6px;"'[::-1])][::-1]
		cup=cup[cup.find('a name="')+len('a name="'):]
		cup=cup[:cup.find('"')]
		while c < 7 and keepgoing and c < len(rows):
			cols = rows[c].findAll('td')
			#print rows[c]
			if len(cols) > 2:
				#if str(cols[2]) != '<td>&nbsp;</td>' and str(cols[3]) != '<td>&nbsp;</td>':
				toret=str(' '.join(cols[0].findAll(text=True)))+'. '+str(' '.join(cols[1].findAll(text=True)))
				keepgoing=False
			c=c+1
		toret=toret.replace('  ',' ').strip()
		if toret != '': toret=toret+' '
		poss=''
		if cup != '' and len(cup) < 30: poss=cup
	else:
		toret=''
		poss=''
	return [toret,poss]
开发者ID:epmatsw,项目名称:FootballBot,代码行数:33,代码来源:following.py


示例10: getLastEntries

    def getLastEntries(self, url, lastDate):
        """ get all entries from an HTML table list if it is newer 
        than prevEntry. Format is from graz FF site """

        mech = Browser()
        mech.set_handle_robots(False)
        try:
            page = mech.open(url)
        except urllib2.HTTPError:
            if url == None:
                url = "(empty url)"
            self.logger.error("Could not read url "+url)
            return []
        html = page.read()
        soup = BeautifulSoup(html)
        link = soup.findAll('a')
        if len(link) == 0:
            logger.error('No links in the page: %s', url)
            return []
        returnLinks = []

        for l in link:
            try:
                date = datetime.strptime(l.string, "topo-%Y-%m-%d-%H:%M.tsv.gz")
            except ValueError:
                continue
            if date > lastDate:
                returnLinks.append(url+l.string)
            else:
                break

        return returnLinks
开发者ID:leonardomaccari,项目名称:communityNetworkMonitor,代码行数:32,代码来源:FFWien.py


示例11: main

def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--total-jobs', metavar='<total-jobs>', help='total number of jobs downloading documents', type=int)
    parser.add_argument('--job', metavar='<job>', help='job number between 1 and <total-jobs>', type=int)

    args = parser.parse_args()
    check_args(parser, args)

    br = Browser()
    br.set_handle_robots(False)
#    br.set_debug_responses(True)

    data = urlencode({'user': USERNAME, 'pass': getpass()})

    document_urls = [LOGIN_PREFIX + url.strip() + '&view=etext' for url in file(DOCUMENT_URLS_FILE)]

    start = args.job - 1
    step = args.total_jobs

    for url in iterview(document_urls[start::step]):
        try:
            get_document_pages(br, url, data)
        except Exception as e:
            print >> sys.stderr, '\n', (url, e)
开发者ID:hannawallach,项目名称:declassified-documents,代码行数:26,代码来源:get_document_pages.py


示例12: gen_path

def gen_path(request):
    x = json.loads(request.POST['data'])    #fetches data
    print x
    adj_mat = []    #creates empty adjacency matrix
    i1 = j1 = 0
    num_cities = len(x)
    for i in x:
        tmp_mat = []
        for j in x:
            if i!=j:
                API_KEY = "AIzaSyDBOSr6_XxvISPGX54P9bPnooE3RUpRTp0"
                orig_coord = x[i]
                dest_coord = x[j]
                br = Browser()  #creates mechanize instance
                br.set_handle_robots(False)
                # print "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)
                result = br.open("https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)).read()    #makes a call to GoogleMapsAPI
                json_result = json.loads(result)
                tmp_mat.append(int(json_result['rows'][0]['elements'][0]['distance']['value']))
            else:
                tmp_mat.append(0)
        adj_mat.append(tmp_mat)



    obj = ArpanDaErCode()
    ans = ""
    ans = ArpanDaErCode.solve(obj, adj_mat, num_cities) #gets sequence from model
    print ans
    ret = {'data': [str(ii) for ii in ans]}

    return HttpResponse(str(json.dumps(ret)))   #returns the sequens in JSON format for the JS to handle
开发者ID:RijuSen1996,项目名称:bppimt_hackon,代码行数:32,代码来源:views.py


示例13: scrap_query

def scrap_query(query, bang=None):

    r = ddg_query('imbd ' + query, bang=bang)
    if 'redirect' in dir(r) and 'primary' in dir(r.redirect):
        url = r.redirect.primary
    else:
        logger.info('Could not find imdb searchpage from DuckDuckGo bang')
        return None

    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2;\
                        WOW64) AppleWebKit/537.11 (KHTML, like Gecko)\
                        Chrome/23.0.1271.97 Safari/537.11')]

    r = br.open(url)
    soup = BeautifulSoup(r)


    for link in soup.find_all('a'):
        href = link.get('href','')
        match = re.search(r"imdb\.com/.*tt(?P<number>[^/]*)", href)
        if match:
            imdb_id = check_imdb(match.group('number'))
            return imdb_id

    return None
开发者ID:getzze,项目名称:imdbfetcher,代码行数:27,代码来源:searchengine_api.py


示例14: __init__

    def __init__(self,options):
        (self.configfile,self.config,self.moduleconfig) = self.initialize_config(options)
        # If we have a particular log level for this module, use that,
        # otherwise use the global log level. If that isn't defined
        # either, use the INFO loglevel.
        if 'log' in self.moduleconfig:
            loglevel = self.moduleconfig['log']
        else:
            loglevel = self.config.get('log','INFO')
        self.log = self.setup_logger(self.module_dir,loglevel)

        self.base_dir = self.config['datadir']

        if self.browser_use_robustfactory:
            self.browser = Browser(factory=RobustFactory())
        else:
            self.browser = Browser()
        self.browser.addheaders = [('User-agent', 'lagen.nu-bot ([email protected])')]

        # logger = logging.getLogger("mechanize")
        # logger.addHandler(logging.StreamHandler(sys.stdout))
        # logger.setLevel(logging.DEBUG)
        # self.browser.set_debug_http(True)
        # self.browser.set_debug_responses(True)
        # self.browser.set_debug_redirects(True)


        self.ns = {'rinfo':  Namespace(Util.ns['rinfo']),
                   'rinfoex':Namespace(Util.ns['rinfoex']),
                   'dct':    Namespace(Util.ns['dct'])}
开发者ID:staffanm,项目名称:legacy.lagen.nu,代码行数:30,代码来源:DocumentRepository.py


示例15: num_itens

    def num_itens(self,busca, data_inicial, data_final):
        br = Browser()
        response1 = \
            br.open("http://portal.in.gov.br/in/imprensa1/pesquisa_avancada")
        br.select_form(name="formBusca")
        br["texto_todas"] = busca
        br["dataPublicacaoInicial"] = data_inicial[:5]
        br["dataPublicacaoFinal"] = data_final[:5]
        br["ano"] = [data_final[-4:]]
        br["idJornal"] = ["1", "2", "3", "4"]
#        print(br.form)
        br.form.action = \
            "http://www.in.gov.br/imprensa/pesquisa/pesquisaresultado.jsp"
        res = br.submit()
        texto = res.read()
        x1, x2, x3 = texto.partition("ite")
        x1, x2, x3 = x1.rpartition(">")
        
        try:
            arq = open(self.retornar_html(),"w")
            arq.write(texto)
            arq.close()
        except:
            print("Erro ao tentar salvar página de buscas!")
        
        x3 = x3.replace(",","")
        x3 = x3.strip()
        #Retorna o número de itens achados
        if x3 == "Um":
            return 1
        
        if len(x3) > 0:
            return int(x3)
        else:
            return 0
开发者ID:andresmrm,项目名称:trazdia,代码行数:35,代码来源:proc.py


示例16: name

def name(request, string):

    movie = string.replace("_", "+")
    br = Browser()
    br.open("http://www.imdb.com/find?s=tt&q="+movie)
    link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
    data = br.follow_link(link)
    soup = BeautifulSoup(data.read())

    title = soup.find('h1').contents[0].strip()
    name = title.replace("&nbsp;", "")
    rating = soup.find('span', itemprop='ratingValue').contents[0]
    duration = soup.find('time', itemprop='duration').contents[0].strip()
    releaseDate = soup.find('a', title='See more release dates').contents[0]
    director = soup.find('span', itemprop='director').getText()
    actor_all = []
    actors = soup.findAll('span', itemprop='actors')
    for i in range(len(actors)):
        actor_all.append((actors[i].contents[1]).getText())
    genres_all = []
    genres = soup.findAll('span', itemprop='genre')
    for i in range(len(genres)):
        genres_all.append(genres[i].getText())

    jsonObject = {}
    jsonObject['Name:'] = name
    jsonObject['IMDB Rating:'] = rating
    jsonObject['Duration'] = duration
    jsonObject["Actors: "] = actor_all
    jsonObject['Director:'] = director
    jsonObject['Genres'] = genres_all
    jsonObject['Release Date'] = releaseDate
    movie_details = json.dumps(jsonObject)
    return HttpResponse(movie_details)
开发者ID:manimanjari,项目名称:imdbMovieFinder,代码行数:34,代码来源:views.py


示例17: __init__

    def __init__(self, request, username,password,context=''):
        """
        Al instanciar la classe, es loguejarà utilitzant el nom d'usuari i password proporcionats,
        Si browser_login conté algo són les cookies guardades de la última sessio loguejada que s'ha fet.
        Recarregarem les cookies en un Browser nou, i aixi estalviarà uns segons que consumiria del login.
        """
        self.context=context
        self.request=request
        registry = self.request.registry
        self.epitool=registry.getUtility(IEPIUtility)

        self.username = username
        self.password = password
        self.browser_login, elk = self.epitool.recoverBrowserSession(self.request, self.username,'presencia')
        if self.browser_login:
          self.br=Browser()
          self.br.set_handle_robots(False)
          cj = LWPCookieJar()
          self.br.set_cookiejar(cj)
          for co in self.browser_login:
              ck = Cookie(version=co['version'], name=co['name'], value=co['value'], port=co['port'], port_specified=co['port_specified'], domain=co['domain'], domain_specified=co['domain_specified'], domain_initial_dot=co['domain_initial_dot'], path=co['path'], path_specified=co['path_specified'], secure=co['secure'], expires=co['expires'], discard=co['discard'], comment=co['comment'], comment_url=co['comment_url'], rest=co['rest'])
              cj.set_cookie(ck)
          print "Logging-in into presencia via browser"
        else:
          self.br = Browser()
          self.br.set_handle_equiv(False)
          self.login(message=u"Logging-in into presencia via regular login")
          return
开发者ID:sneridagh,项目名称:epi,代码行数:28,代码来源:presencia.py


示例18: parseFeeds

 def parseFeeds(self):
     mech = Browser()
     mech.addheaders = [ ('User-agent', 'Mozilla/5.0 (compatible)') ]
     mech.set_handle_robots(False)
     for url in self.feedUrls:
     #url = "http://feeds.feedburner.com/PurdueEngNews?format=xml"
         page = mech.open(url)
         html = page.read()
         soup = BeautifulStoneSoup(html)
         headlines = []
         descriptions = []
         i=0
         self.newsList = []
         for item in soup.findAll('item'):
             if (i > 20):
                 break
             date = item.find('pubdate')
             title = item.find('title')
             link = item.find('link')
             desc = item.find('description')
             if (len(title.contents) > 0):
                 title2 = title.contents[0]
             else:
                 title2 = 'None'
             self.newsList.append(NewsStory(date.contents[0], title2, link.contents[0], \
                 desc.contents[0]))
             i+=1
         for story in self.newsList:
             headlines.append(story.title)
             descriptions.append(story.link)
             #story.display()
         self.headlineList.append(headlines)
         self.descList.append(descriptions)
     self.populateTopicList()
开发者ID:jevinskie,项目名称:purdue-kiosk,代码行数:34,代码来源:newshelper.py


示例19: google

def google(query):
    print("\n\t[!] Searching on Google...\n")
    print("[QUERY] >> " + query)

    try:
        query = query.replace(" ", "+")
        req = "https://www.google.com.br/search?q=%s&num=50&start=0" % query
        br = Browser()
        br.set_handle_robots(False)
        br.addheaders = [("User-agent", "chrome")]
        html = br.open(req).read()
        soup = BeautifulSoup(html, "html5lib")

        with open("./output/google-%s.txt" % query[8:], "w") as log:
            for results in soup.findAll(attrs={"class": "g"}):
                for title in results.findAll("h3", attrs={"class": "r"}):
                    t = title.text
                    t = t.title()
                    for link in results.findAll(attrs={"class": "s"}):
                        l = link.cite.text
                        print(t)
                        print(l + '\n')
                        log.write(str(l) + '\n')

    except Exception as e:
        print(e)
开发者ID:Pedro-Souza,项目名称:My-Scripts,代码行数:26,代码来源:GoogleSearch.py


示例20: __init__

class Du8Doc:

    def __init__(self):
        self.br = Browser()
        
    def from_html(self, html):
        text = re.sub("<.+>\n", "", html)
        text = re.sub("</.+>\n", "", text)
        text = re.sub('(<br/?>\s*)+', '\n', text)
        text = re.sub('&nbsp;', ' ', text)
        return text

    def get_links(self, url):
        res = self.br.open(url)
        data = res.get_data() 
        soup = BeautifulSoup(data, "html5lib")
        div_content = soup.find('table')
        urls = div_content.find_all("a")
        return [url.get('href') for url in urls ]        
        
    def get_content(self, link):
        res = self.br.open(link)
        data = res.get_data() 
        soup = BeautifulSoup(data, "html5lib")
        title, chapter = soup.html.title.string.split("-")[0:2]
        div_content = soup.find(id="content").prettify()
        content = self.from_html(div_content)
        return title, chapter, content
开发者ID:lite,项目名称:du8_utils,代码行数:28,代码来源:du8.py



注:本文中的mechanize.Browser类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python _rfc3986.urljoin函数代码示例发布时间:2022-05-27
下一篇:
Python mechanize.urlopen函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap