• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python req_proxy.main函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中req_proxy.main函数的典型用法代码示例。如果您正苦于以下问题:Python main函数的具体用法?Python main怎么用?Python main使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了main函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main

def main():
    dte = "dir%s" %(time.strftime("%d%m%Y"))
   
    try:
        os.makedirs(dte)
    except:
        pass


    link = "http://www.myntra.com/"
    page  = req_proxy.main(link) 
    #page = phan_proxy.main(link)
   
    
    if  not page:
        main()
    
    soup = BeautifulSoup(page)

    tag_mklevel2 = soup.find_all("a", attrs={"class":"mk-level2 "})
    
    #print len(filter(None,  map(menucollection, tag_mklevel2)))    
    f = open("to_extract.txt", "w+")
    f2 = open("extracted.txt", "a+")

    print >>f, dte
    print >>f2, dte

    f.close()
    f2.close()

    return   filter(None,  map(menucollection, tag_mklevel2))
开发者ID:Jai-Prakash-Singh,项目名称:parse_myntra,代码行数:32,代码来源:page1_myntra.py


示例2: main2

def main2(line, catlink, filename2):
    menu = line[0].strip()
    submnlink = line[1].strip()
    submntitle = line[2].strip()
    catlink = line[-2].strip()
    cattitle = line[-1].strip()

    page = req_proxy.main(catlink)

    soup = BeautifulSoup(page)

    tag_brand = soup.find("ul", attrs={"id": "brand"})

    tag_brand_a = []

    if tag_brand is not None:
        tag_brand_a = tag_brand.find_all("a")

    f = open(filename2, "a+")

    for al in tag_brand_a:
        brandlink = "%s%s" % ("http://www.flipkart.com", str(al.get("href")).strip())
        brandtitle = str(al.get_text()).replace("\n", " ").replace("\t", " ").replace("\r", " ").strip()
        print >> f, [menu, submnlink, submntitle, catlink, cattitle, brandlink, brandtitle]
        logging.debug([menu, submnlink, submntitle, catlink, cattitle, brandlink, brandtitle])

    f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:flipkar_parse,代码行数:27,代码来源:page2_first_brnd_collection_flipkart.py


示例3: main

def main():

    directory = "diramazon%s" %(time.strftime("%d%m%y"))
    
    try:
        os.makedirs(directory)

    except:
        pass

    f = open("to_extract_amazon.txt", "w+")
    f.write(directory)
    f.close()

    f = open("extracted_amazon.txt", "a+")
    f.write(directory)
    f.close()

    filename = "%s/%s" %(directory, "f_mn_ctl_cat.txt")

    f = open(filename, "a+")

    link = "http://www.amazon.in/gp/site-directory/ref=sa_menu_top_fullstore"
    page = req_proxy.main(link)

    soup2 = BeautifulSoup(page)

    catlist = ["Beauty & Health", "Watches & Fashion Jewellery", "Handbags & Luggage"]

    map(functools.partial(main2, soup=soup2, f = f), catlist)

    f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:amazon_parse,代码行数:32,代码来源:page1_first_amazon.py


示例4: get_product

    def get_product(self, line):

        """ given a  ct sct scl  subsubcate subcatelink br and brl find product"""
        line = ast.literal_eval(line)
        link = "http://www.junglee.com%s" %(line[-1])

        line[-1] = link
        
        page = req_proxy.main(link)
        soup = BeautifulSoup(page, "html.parser")

        pro_div_list = soup.find_all("div", attrs={"id":re.compile(r"^result_[\d]{1,}$")})

        filobj_write = self.pr_pl_filobj.write
        my_strip = self.my_strip

        info = [ filobj_write(str(  map(  my_strip, 
                                          line + [ pro_div.find("img", attrs={"alt":"Product Details"}).find_parent("a").get("href") ]
                                       )
                                 ) 
                                 + "\n"
                             )
                 for pro_div  in  pro_div_list
               ]

        del info[:]
        del info
开发者ID:Jai-Prakash-Singh,项目名称:snap_deal_parse,代码行数:27,代码来源:code1_first_junglee.py


示例5: main

def main(line):
    line = ast.literal_eval(line)

    page = req_proxy.main(line[8])

    f = open("myfile.txt", "w+")
    print >>f, page
    f.close()

    #driver = phan_proxy.main(line[8])
    #page = driver.page_source

    #driver.delete_all_cookies()
    #driver.quit()
    
    #tree = html.fromstring(page)
    #meta_disc = tree.xpath("/html/head/meta[3]/text()")

    soup = BeautifulSoup(page, "html.parser")
    meta_disc = soup.find("meta", attrs={"name":"description"}).get("content")

    title = soup.find("title").get_text()
    desc = soup.find("section", attrs={"id":"product-detail"})
    dte = time.strftime("%d:%m:%Y")
    status = " "
    spec = " "
    vender = "zovi.com"
    brand = "zovi"

    print map(my_strip,  [line[7], line[11], line[0], line[5], line[2], 
                         line[3], brand, line[9], line[5], line[4], 
                         line[1], line[8], vender, title, meta_disc, line[10], 
                         desc, spec, dte, status])
开发者ID:Jai-Prakash-Singh-213,项目名称:zovi_parse,代码行数:33,代码来源:page2_first_zoviold1.py


示例6: main

def main(line, directory):
    direc = "%s/%s/%s/%s" %(directory, line[2], line[3], line[5])

    try:
        os.makedirs(direc)
    except:
        pass

    filename = "%s/%s.csv" %(direc, line[5])
    f = open(filename, "a+")
    
    page = req_proxy.main(line[-6])
    soup = BeautifulSoup(page, "html.parser")

    title = soup.find("title").get_text()
    meta_disc = soup.find("meta", attrs={"name":"description"}).get("content")
    seller = "yebhi.com"
    item_desc = soup.find("div", attrs={"itemprop":"description"})
    dte = time.strftime("%d:%m:%Y")
    status  = " "

    f.write(",".join(map(my_strip, [line[9], line[7], line[0], line[12], line[2],
                                    line[3], line[5], line[10],  line[11], '',  
                                    line[1], line[8], seller, title, 
                                    meta_disc, line[13], item_desc, '', dte, status] )) + "\n")
    f.close()

    logging.debug("inserted  ............")
开发者ID:Jai-Prakash-Singh-213,项目名称:yebhi_parse,代码行数:28,代码来源:page3_first_all_item_info.py


示例7: main

def main(line, directory):
    filename = "%s/f_cl_tr_ct_st_scl_bt_bl.txt" %(directory)
    
    f = open(filename, "a+")
   
    page = req_proxy.main(line[-1])

    soup = BeautifulSoup(page, "html.parser")
    brand_tag_list = soup.find_all("span", attrs={"class":"forspan"})

    
    for brand_tag in brand_tag_list:
        if str(brand_tag.get_text()).strip() == "Brands":
            brand_box = brand_tag.find_parent("div", attrs={"class":"divli"})

    brand_list = brand_box.find_all("a", attrs={"class":"RefineByLink"})

    for brand_tag  in brand_list:
        brand = str(brand_tag.get("relmselect"))
	brand_link = "http://www.yebhi.com%s" %(str(brand_tag.get("href")))
	#f.write(str([catelink, target, cate, sub_cat, sub_cat_link]) + "\n")
        f.write(str([line[0], line[1], line[2], line[3], line[4], brand, brand_link]) + "\n")
        
        logging.debug([line[0], line[1], line[2], line[3], line[4], brand, brand_link])

    f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:yebhi_parse,代码行数:26,代码来源:page1_second_brand_ext_yebhi.py


示例8: main2

def main2(line, all_brand_link, f3):
    menu = line[0]
    ctlink = line[1]
    cttitle = line[2]
    sctlink = line[3]
    scttitle = line[4]

    page = req_proxy.main(all_brand_link)
    soup = BeautifulSoup(page)

    tag_brand_uls = soup.find_all("ul", attrs={"class":"column"})

    for ul_brand_link in tag_brand_uls:
        all_brand_link = ul_brand_link.find_all("a")
        for bl_bt in all_brand_link:
	    bl = "%s%s" %("http://www.amazon.in", bl_bt.get("href"))
	    bt = str(bl_bt.span.get_text()).strip()

            filedata = [menu, ctlink, cttitle, sctlink, scttitle, bl, bt]
            filedata2 = map(mystrip, filedata)
	    logging.debug(filedata2)
            #print >>f3, filedata2
            f4 = open(f3, "a+")
            print >>f4, filedata2
            f4.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:amazon_parse,代码行数:25,代码来源:page3_first_amazonold.py


示例9: main2

def main2(ml_mt_sub, filename):
    f = open(filename, "a+")

    menulink = ml_mt_sub[0]
    menutitle = ml_mt_sub[1]
    
    page = req_proxy.main(menulink)

    soup = BeautifulSoup(page, "html.parser")

    tag_box = soup.find_all("div", attrs={"class":"head clearfix"})
    
    for al in tag_box:
        cato = al.find("div")

        catolink = "%s%s" %("http://www.homeshop18.com", str(cato.a.get("href")).strip())
        catotitle = cato.a.get_text()

        sub_cato = al.find_next_sibling("div")
        
	if sub_cato:
            sub_cato2 = sub_cato.find_all("a")

	    for al in sub_cato2:
	        sub_catolink = "%s%s" %("http://www.homeshop18.com", str(al.get("href")).strip())
                sub_catotext = al.get("title")

                print >>f, ','.join([menulink, menutitle, catolink, catotitle, sub_catolink, sub_catotext])
                
	else:
	   print >>f, ','.join([menulink, menutitle, catolink, catotitle, catolink, catotitle])

    f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:homesho_parse,代码行数:33,代码来源:page1_first_home18.py


示例10: main2

def main2(mn_sbml_sbml, filename):
    menu = mn_sbml_sbml[0]
    submenulink = mn_sbml_sbml[1]
    submenutitle =  mn_sbml_sbml[2]
 
    page = req_proxy.main(submenulink)
    
    soup = BeautifulSoup(page)
    
    tag_cat = soup.find("div", attrs={"class":"nav-section first-section"})

    tag_cat_link = []

    if tag_cat is not None:
        tag_cat_link = tag_cat.find_all("a")

    f = open(filename, "a+")

    for l in tag_cat_link:
        cattitle = str(l.get("title")).strip()
	catlink = "%s%s" %("http://www.flipkart.com", str(l.get("href")).strip())
        print >>f, [menu, submenulink, submenutitle, catlink, cattitle]
        logging.debug([menu, submenulink, submenutitle, catlink, cattitle])

    f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:flipkar_parse,代码行数:25,代码来源:page1_first_flipkart.py


示例11: main3

def main3(i, q):
    for link in iter(q.get, None):
        page = req_proxy.main(link)
        soup = BeautifulSoup(page)

        tag_cat = soup.find_all("div", attrs={"class":"search-by-cat mt10 mb10 pl14 "})
    
        if tag_cat:
            cat_tag_a = tag_cat[0].find_all("a")

        else:
            cat_tag_a = []

        for cl in cat_tag_a:
            try:
                link_cl_ctxt_ccount.append([link, str(cl.get("href")).strip(), str(cl.get_text()).strip()])
                logging.debug((link, str(cl.get("href")).strip(), str(cl.get_text()).strip()))

            except:
                pass

        time.sleep(i + 2)
        q.task_done()

    q.task_done()
开发者ID:Jai-Prakash-Singh,项目名称:parse_jobong,代码行数:25,代码来源:page1_first_jobon.py


示例12: lnk_tl_vl_vl_prl_fun2

    def lnk_tl_vl_vl_prl_fun2(self, line, counter, cl = None):
        if cl is None:
            cl = line[-2]

        else:
            pass

        page = req_proxy.main(cl)
        soup =  BeautifulSoup(page)

	pro_div = soup.find("ul", attrs={"id":"listing-product-list"})
	pro_link = pro_div.find_all("li", attrs={"class":"listing-unit"})
       
        for pl in pro_link:
            pl = pl.find("a", attrs={"class":"OneLinkNoTx"})
            pl = pl.get("href")
            print line + [pl]
            self.f2.write(str(line + [pl]) + "\n")

        counter += len(pro_link)
        
        if counter > 1000:
            return  0

        next_url = pro_div.find("li", attrs={"class":"next"})

        try:
            next_url = "http://download.cnet.com%s" %(next_url.a.get("href"))
            logging.debug(next_url)
            self.lnk_tl_vl_vl_prl_fun2(line, counter,  next_url)
        except:
            pass
开发者ID:Jai-Prakash-Singh-213,项目名称:downloaddotcom_parse,代码行数:32,代码来源:page1_first_downloadsdotcom.py


示例13: main5

def main5(i, q):
    for line, f in iter(q.get, None):
    
        link = line[-2].strip()

        page = req_proxy.main(link)
        soup = BeautifulSoup(page)

        tag_brand  =  soup.find("div", attrs={"id":"facet_brand"})
        
        try:
            tag_a =  tag_brand.find_all("a")

        except:
            tag_a = []
        
        for l in  tag_a:
            try:
                brandlink = str(l.get("href")).strip()
                bramdname = str(l.get_text()).strip()
                print >>f,  "%s,%s,%s" %(','.join(line), brandlink, bramdname)

            except:
                pass

        time.sleep(i + 2)
        q.task_done()

    q.task_done()
开发者ID:Jai-Prakash-Singh,项目名称:parse_jobong,代码行数:29,代码来源:page1_first_jobon.py


示例14: main

def main(link):
    page = req_proxy.main(link)
    soup = BeautifulSoup(page, "html.parser")
    
    all_page_list = soup.find_all("li", attrs={"class":"MyAccountPag"})

    threads = []

    t = threading.Thread(target=main2, args=(link,))
    threads.append(t)
    t.start()

    for page_link_tag in all_page_list:
        page_link = "http://www.yebhi.com%s" %(str(page_link_tag.a.get("href")))
        t = threading.Thread(target=main2, args=(page_link,))
	threads.append(t)
	t.start()

    main_thread = threading.currentThread()

    for t in threading.enumerate():
        if t is main_thread:
            continue
        logging.debug('joining %s', t.getName())
        t.join()
开发者ID:Jai-Prakash-Singh-213,项目名称:yebhi_parse,代码行数:25,代码来源:page2_first_item_clctold2.py


示例15: main

def main(directory, mainlink):
    filename = "%s/complete_link_collection.txt" %(directory)

    f = open(filename, "a+")

    page = req_proxy.main(mainlink)

    soup = BeautifulSoup(page, "html.parser")
    cat_collection_box = soup.find("div", attrs={"class":"brw_bdr"})
    
    link_list = cat_collection_box.find_all("a")

    for link in link_list:
        link  = link.get("href")

        parsed = urlparse(link)

        if len(parsed.netloc) == 0:
            link = "http://www.snapdeal.com%s" %(link)
            
        f.write(str(link) + "\n")
        #print link

    f.close()
    
    return filename
开发者ID:Jai-Prakash-Singh,项目名称:snap_deal_parse,代码行数:26,代码来源:code1_second_snap_deal.py


示例16: part_second

def part_second(line, filename2):

    f = open(filename2, "a+")

    line2 = line.strip().split(",")
    menulink  = line2[0].strip()
    menutitle = line2[1].strip()
    catlink = line2[2].strip()
    cattitle = line2[3].replace("\n","").strip()
    subcatlink = line2[-2].strip()
    subcattitle = line2[-1].strip()

    page = req_proxy.main(subcatlink)
    soup = BeautifulSoup(page, "html.parser")

    tag_brand = soup.find("div", text=re.compile("Brand"))
    blbtbc_list = tag_brand.parent.parent.find_all("li", attrs={"class":"srch_ctgry_item"})

    for blbtbc in blbtbc_list:
        brandlink = "%s%s" %("http://www.homeshop18.com", str(blbtbc.a.get("href")).strip())
        brandtitle = str(blbtbc.a.get( "title")).replace("\n", "").strip()

        print >>f, ','.join([menulink, menutitle, catlink, cattitle, subcatlink, subcattitle, brandlink, brandtitle])
        print menulink, menutitle, catlink, cattitle, subcatlink, subcattitle, brandlink, brandtitle
	print "*"*145

    f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:homesho_parse,代码行数:27,代码来源:page1_first_home18.py


示例17: main

def main():
    link = "http://www.homeshop18.com/all-stores.html"
    page = req_proxy.main(link)

    soup = BeautifulSoup(page)

    tag_border = soup.find_all("div", attrs={"class":"border-box2 mar-top"})

    patter_firstpage = []

    for brend in tag_border:
        brend_title = brend.find("div", attrs={"class":"brend-title clearfix"}).get_text()
        cat = str(brend_title).strip()

        if  (cat == "Books") or (cat == "Jewellery"):
	    pass

	else:
	    tag_img_holder = brend.find_all("div", attrs={"class":"img-holder"})

            for sub_cat in tag_img_holder:
	        sub_cat_link = str(sub_cat.find("a").get("href")).strip()
                
                parsed = urlparse(sub_cat_link) 
		sub_cat_title = filter(None, str(parsed.path).split("/"))

	        patter_firstpage.append([cat, sub_cat_link, sub_cat_title[0].strip()])

    return patter_firstpage
开发者ID:Jai-Prakash-Singh-213,项目名称:homesho_parse,代码行数:29,代码来源:page1_first.py


示例18: main

def main(directory, link):
    page = req_proxy.main(link)
    #driver = phan_proxy.main(link)

    #try:
    #     driver.find_element_by_xpath("/html/body/div[2]/div/div/div/div/a/img").click()
    #     logging.debug("clicked..................................................................")

    #except:
    #    pass

    #driver = phan_scroller.main(driver)
    #page = driver.page_source

    soup = BeautifulSoup(page, "html.parser")

    target_cat_list = soup.find("div", attrs={"id":"breadCrumbWrapper"}).find_all("span", attrs={"itemprop":"title"})
    filename = "%s/%s.doc" %(directory, "_".join(str(target_cat_list[-1].get_text()).split()))

    f = open(filename, "a+")
    
    item_big_box_list = soup.find("div", attrs={"id":re.compile("products-main")})


    item_box_list = item_big_box_list.find_all("div", attrs={"class":"product_grid_row"})

    for item_box in item_box_list:
        item_sub_box_list  = item_box.find_all("div", attrs={"class":"product_grid_cont gridLayout3"})

        if len(item_sub_box_list) == 0:
            item_sub_box_list  = item_box.find_all("div", attrs={"class":"product_grid_cont gridLayout4"})
        
        for item_sub_box in item_sub_box_list:
            item_link = item_sub_box.find("a", attrs={"class":"hit-ss-logger somn-track prodLink"}).get("href")
            parsed = urlparse(item_link)

            if len(parsed.netloc) == 0:
                item_link = "http://www.snapdeal.com%s" %(item_link)

            size = []
            size_box_list = item_sub_box.find("div", attrs={"class":"productAttr"})
            
            if size_box_list is not None:
                size_option_box = size_box_list.find_all("option")

                for size_option in size_option_box:
                    size.append(size_option.get("value"))

            if len(size) !=0:
                size = filter(None, map(my_strip, size))
    
            info = [link, item_link, size]
            info2 = map(my_strip, info)

            f.write(str(info2) + "\n")
            logging.debug("inserted........................................................................")


    f.close()
开发者ID:Jai-Prakash-Singh,项目名称:snap_deal_parse,代码行数:59,代码来源:code2_first_snapdael.py


示例19: main2

def main2(line, filename):
    catlink = line[0]
    cattitle = line[1]

    f = open(filename, "a+")

    page = req_proxy.main(catlink)

    soup = BeautifulSoup(page)

    tag_page = soup.find("div", attrs={"id":"wp_page_numbers"})

    tag_page_a_list = []

    if tag_page:
        tag_page_a = tag_page.find_all("a")[:-1]

	for cat_page in tag_page_a:
	    sub_page_link = str(cat_page.get("href")).strip()

	    sub_page =  req_proxy.main(sub_page_link)

	    sub_soup = BeautifulSoup(sub_page)

            tag_content_a  = soup.find_all("h2", attrs={"class":"title"})

	    for subcatlinktitle in tag_content_a:
	        subcatlink = str(subcatlinktitle.a.get("href")).strip()
		subcattitle = subcatlinktitle.get_text().encode("ascii", "ignore")
                subcattitle = str(subcattitle).strip().replace("\n", "").replace("\t", "").replace(",", "")
		print >>f, ",".join([catlink, cattitle, subcatlink, subcattitle])

    else:
	tag_content_a  = soup.find_all("h2", attrs={"class":"title"})

	for subcatlinktitle in tag_content_a:
	    subcatlink = str(subcatlinktitle.a.get("href")).strip()
	    subcattitle = subcatlinktitle.get_text().encode("ascii", "ignore")
            subcattitle = str(subcattitle).strip().replace("\n", "").replace("\t", "").replace(",", "")
            print >>f,  ",".join([catlink, cattitle, subcatlink, subcattitle])
    
    f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:filmlink_parse,代码行数:42,代码来源:page1_category_filmlink.py


示例20: page1_cat_link_collect

    def page1_cat_link_collect(self):
        page = req_proxy.main(self.link)

        tree = html.fromstring(page)
        cat_box = tree.xpath("/html/body/div/div/div[5]/div[2]/div[2]/div/ul/li[2]/ul/li")

        all_cat_a = []
        for cat_link in cat_box:
            all_cat_a.append(cat_link.xpath("a/@href")[0])
    
        self.all_cat_a = all_cat_a
开发者ID:Jai-Prakash-Singh-213,项目名称:filmlinkforyou_parse,代码行数:11,代码来源:page1_first_filmlink.py



注:本文中的req_proxy.main函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python pil.Image类代码示例发布时间:2022-05-26
下一篇:
Python repyhelper.translate_and_import函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap