本文整理汇总了Python中req_proxy.main函数的典型用法代码示例。如果您正苦于以下问题:Python main函数的具体用法?Python main怎么用?Python main使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了main函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
dte = "dir%s" %(time.strftime("%d%m%Y"))
try:
os.makedirs(dte)
except:
pass
link = "http://www.myntra.com/"
page = req_proxy.main(link)
#page = phan_proxy.main(link)
if not page:
main()
soup = BeautifulSoup(page)
tag_mklevel2 = soup.find_all("a", attrs={"class":"mk-level2 "})
#print len(filter(None, map(menucollection, tag_mklevel2)))
f = open("to_extract.txt", "w+")
f2 = open("extracted.txt", "a+")
print >>f, dte
print >>f2, dte
f.close()
f2.close()
return filter(None, map(menucollection, tag_mklevel2))
开发者ID:Jai-Prakash-Singh,项目名称:parse_myntra,代码行数:32,代码来源:page1_myntra.py
示例2: main2
def main2(line, catlink, filename2):
menu = line[0].strip()
submnlink = line[1].strip()
submntitle = line[2].strip()
catlink = line[-2].strip()
cattitle = line[-1].strip()
page = req_proxy.main(catlink)
soup = BeautifulSoup(page)
tag_brand = soup.find("ul", attrs={"id": "brand"})
tag_brand_a = []
if tag_brand is not None:
tag_brand_a = tag_brand.find_all("a")
f = open(filename2, "a+")
for al in tag_brand_a:
brandlink = "%s%s" % ("http://www.flipkart.com", str(al.get("href")).strip())
brandtitle = str(al.get_text()).replace("\n", " ").replace("\t", " ").replace("\r", " ").strip()
print >> f, [menu, submnlink, submntitle, catlink, cattitle, brandlink, brandtitle]
logging.debug([menu, submnlink, submntitle, catlink, cattitle, brandlink, brandtitle])
f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:flipkar_parse,代码行数:27,代码来源:page2_first_brnd_collection_flipkart.py
示例3: main
def main():
directory = "diramazon%s" %(time.strftime("%d%m%y"))
try:
os.makedirs(directory)
except:
pass
f = open("to_extract_amazon.txt", "w+")
f.write(directory)
f.close()
f = open("extracted_amazon.txt", "a+")
f.write(directory)
f.close()
filename = "%s/%s" %(directory, "f_mn_ctl_cat.txt")
f = open(filename, "a+")
link = "http://www.amazon.in/gp/site-directory/ref=sa_menu_top_fullstore"
page = req_proxy.main(link)
soup2 = BeautifulSoup(page)
catlist = ["Beauty & Health", "Watches & Fashion Jewellery", "Handbags & Luggage"]
map(functools.partial(main2, soup=soup2, f = f), catlist)
f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:amazon_parse,代码行数:32,代码来源:page1_first_amazon.py
示例4: get_product
def get_product(self, line):
""" given a ct sct scl subsubcate subcatelink br and brl find product"""
line = ast.literal_eval(line)
link = "http://www.junglee.com%s" %(line[-1])
line[-1] = link
page = req_proxy.main(link)
soup = BeautifulSoup(page, "html.parser")
pro_div_list = soup.find_all("div", attrs={"id":re.compile(r"^result_[\d]{1,}$")})
filobj_write = self.pr_pl_filobj.write
my_strip = self.my_strip
info = [ filobj_write(str( map( my_strip,
line + [ pro_div.find("img", attrs={"alt":"Product Details"}).find_parent("a").get("href") ]
)
)
+ "\n"
)
for pro_div in pro_div_list
]
del info[:]
del info
开发者ID:Jai-Prakash-Singh,项目名称:snap_deal_parse,代码行数:27,代码来源:code1_first_junglee.py
示例5: main
def main(line):
line = ast.literal_eval(line)
page = req_proxy.main(line[8])
f = open("myfile.txt", "w+")
print >>f, page
f.close()
#driver = phan_proxy.main(line[8])
#page = driver.page_source
#driver.delete_all_cookies()
#driver.quit()
#tree = html.fromstring(page)
#meta_disc = tree.xpath("/html/head/meta[3]/text()")
soup = BeautifulSoup(page, "html.parser")
meta_disc = soup.find("meta", attrs={"name":"description"}).get("content")
title = soup.find("title").get_text()
desc = soup.find("section", attrs={"id":"product-detail"})
dte = time.strftime("%d:%m:%Y")
status = " "
spec = " "
vender = "zovi.com"
brand = "zovi"
print map(my_strip, [line[7], line[11], line[0], line[5], line[2],
line[3], brand, line[9], line[5], line[4],
line[1], line[8], vender, title, meta_disc, line[10],
desc, spec, dte, status])
开发者ID:Jai-Prakash-Singh-213,项目名称:zovi_parse,代码行数:33,代码来源:page2_first_zoviold1.py
示例6: main
def main(line, directory):
direc = "%s/%s/%s/%s" %(directory, line[2], line[3], line[5])
try:
os.makedirs(direc)
except:
pass
filename = "%s/%s.csv" %(direc, line[5])
f = open(filename, "a+")
page = req_proxy.main(line[-6])
soup = BeautifulSoup(page, "html.parser")
title = soup.find("title").get_text()
meta_disc = soup.find("meta", attrs={"name":"description"}).get("content")
seller = "yebhi.com"
item_desc = soup.find("div", attrs={"itemprop":"description"})
dte = time.strftime("%d:%m:%Y")
status = " "
f.write(",".join(map(my_strip, [line[9], line[7], line[0], line[12], line[2],
line[3], line[5], line[10], line[11], '',
line[1], line[8], seller, title,
meta_disc, line[13], item_desc, '', dte, status] )) + "\n")
f.close()
logging.debug("inserted ............")
开发者ID:Jai-Prakash-Singh-213,项目名称:yebhi_parse,代码行数:28,代码来源:page3_first_all_item_info.py
示例7: main
def main(line, directory):
filename = "%s/f_cl_tr_ct_st_scl_bt_bl.txt" %(directory)
f = open(filename, "a+")
page = req_proxy.main(line[-1])
soup = BeautifulSoup(page, "html.parser")
brand_tag_list = soup.find_all("span", attrs={"class":"forspan"})
for brand_tag in brand_tag_list:
if str(brand_tag.get_text()).strip() == "Brands":
brand_box = brand_tag.find_parent("div", attrs={"class":"divli"})
brand_list = brand_box.find_all("a", attrs={"class":"RefineByLink"})
for brand_tag in brand_list:
brand = str(brand_tag.get("relmselect"))
brand_link = "http://www.yebhi.com%s" %(str(brand_tag.get("href")))
#f.write(str([catelink, target, cate, sub_cat, sub_cat_link]) + "\n")
f.write(str([line[0], line[1], line[2], line[3], line[4], brand, brand_link]) + "\n")
logging.debug([line[0], line[1], line[2], line[3], line[4], brand, brand_link])
f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:yebhi_parse,代码行数:26,代码来源:page1_second_brand_ext_yebhi.py
示例8: main2
def main2(line, all_brand_link, f3):
menu = line[0]
ctlink = line[1]
cttitle = line[2]
sctlink = line[3]
scttitle = line[4]
page = req_proxy.main(all_brand_link)
soup = BeautifulSoup(page)
tag_brand_uls = soup.find_all("ul", attrs={"class":"column"})
for ul_brand_link in tag_brand_uls:
all_brand_link = ul_brand_link.find_all("a")
for bl_bt in all_brand_link:
bl = "%s%s" %("http://www.amazon.in", bl_bt.get("href"))
bt = str(bl_bt.span.get_text()).strip()
filedata = [menu, ctlink, cttitle, sctlink, scttitle, bl, bt]
filedata2 = map(mystrip, filedata)
logging.debug(filedata2)
#print >>f3, filedata2
f4 = open(f3, "a+")
print >>f4, filedata2
f4.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:amazon_parse,代码行数:25,代码来源:page3_first_amazonold.py
示例9: main2
def main2(ml_mt_sub, filename):
f = open(filename, "a+")
menulink = ml_mt_sub[0]
menutitle = ml_mt_sub[1]
page = req_proxy.main(menulink)
soup = BeautifulSoup(page, "html.parser")
tag_box = soup.find_all("div", attrs={"class":"head clearfix"})
for al in tag_box:
cato = al.find("div")
catolink = "%s%s" %("http://www.homeshop18.com", str(cato.a.get("href")).strip())
catotitle = cato.a.get_text()
sub_cato = al.find_next_sibling("div")
if sub_cato:
sub_cato2 = sub_cato.find_all("a")
for al in sub_cato2:
sub_catolink = "%s%s" %("http://www.homeshop18.com", str(al.get("href")).strip())
sub_catotext = al.get("title")
print >>f, ','.join([menulink, menutitle, catolink, catotitle, sub_catolink, sub_catotext])
else:
print >>f, ','.join([menulink, menutitle, catolink, catotitle, catolink, catotitle])
f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:homesho_parse,代码行数:33,代码来源:page1_first_home18.py
示例10: main2
def main2(mn_sbml_sbml, filename):
menu = mn_sbml_sbml[0]
submenulink = mn_sbml_sbml[1]
submenutitle = mn_sbml_sbml[2]
page = req_proxy.main(submenulink)
soup = BeautifulSoup(page)
tag_cat = soup.find("div", attrs={"class":"nav-section first-section"})
tag_cat_link = []
if tag_cat is not None:
tag_cat_link = tag_cat.find_all("a")
f = open(filename, "a+")
for l in tag_cat_link:
cattitle = str(l.get("title")).strip()
catlink = "%s%s" %("http://www.flipkart.com", str(l.get("href")).strip())
print >>f, [menu, submenulink, submenutitle, catlink, cattitle]
logging.debug([menu, submenulink, submenutitle, catlink, cattitle])
f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:flipkar_parse,代码行数:25,代码来源:page1_first_flipkart.py
示例11: main3
def main3(i, q):
for link in iter(q.get, None):
page = req_proxy.main(link)
soup = BeautifulSoup(page)
tag_cat = soup.find_all("div", attrs={"class":"search-by-cat mt10 mb10 pl14 "})
if tag_cat:
cat_tag_a = tag_cat[0].find_all("a")
else:
cat_tag_a = []
for cl in cat_tag_a:
try:
link_cl_ctxt_ccount.append([link, str(cl.get("href")).strip(), str(cl.get_text()).strip()])
logging.debug((link, str(cl.get("href")).strip(), str(cl.get_text()).strip()))
except:
pass
time.sleep(i + 2)
q.task_done()
q.task_done()
开发者ID:Jai-Prakash-Singh,项目名称:parse_jobong,代码行数:25,代码来源:page1_first_jobon.py
示例12: lnk_tl_vl_vl_prl_fun2
def lnk_tl_vl_vl_prl_fun2(self, line, counter, cl = None):
if cl is None:
cl = line[-2]
else:
pass
page = req_proxy.main(cl)
soup = BeautifulSoup(page)
pro_div = soup.find("ul", attrs={"id":"listing-product-list"})
pro_link = pro_div.find_all("li", attrs={"class":"listing-unit"})
for pl in pro_link:
pl = pl.find("a", attrs={"class":"OneLinkNoTx"})
pl = pl.get("href")
print line + [pl]
self.f2.write(str(line + [pl]) + "\n")
counter += len(pro_link)
if counter > 1000:
return 0
next_url = pro_div.find("li", attrs={"class":"next"})
try:
next_url = "http://download.cnet.com%s" %(next_url.a.get("href"))
logging.debug(next_url)
self.lnk_tl_vl_vl_prl_fun2(line, counter, next_url)
except:
pass
开发者ID:Jai-Prakash-Singh-213,项目名称:downloaddotcom_parse,代码行数:32,代码来源:page1_first_downloadsdotcom.py
示例13: main5
def main5(i, q):
for line, f in iter(q.get, None):
link = line[-2].strip()
page = req_proxy.main(link)
soup = BeautifulSoup(page)
tag_brand = soup.find("div", attrs={"id":"facet_brand"})
try:
tag_a = tag_brand.find_all("a")
except:
tag_a = []
for l in tag_a:
try:
brandlink = str(l.get("href")).strip()
bramdname = str(l.get_text()).strip()
print >>f, "%s,%s,%s" %(','.join(line), brandlink, bramdname)
except:
pass
time.sleep(i + 2)
q.task_done()
q.task_done()
开发者ID:Jai-Prakash-Singh,项目名称:parse_jobong,代码行数:29,代码来源:page1_first_jobon.py
示例14: main
def main(link):
page = req_proxy.main(link)
soup = BeautifulSoup(page, "html.parser")
all_page_list = soup.find_all("li", attrs={"class":"MyAccountPag"})
threads = []
t = threading.Thread(target=main2, args=(link,))
threads.append(t)
t.start()
for page_link_tag in all_page_list:
page_link = "http://www.yebhi.com%s" %(str(page_link_tag.a.get("href")))
t = threading.Thread(target=main2, args=(page_link,))
threads.append(t)
t.start()
main_thread = threading.currentThread()
for t in threading.enumerate():
if t is main_thread:
continue
logging.debug('joining %s', t.getName())
t.join()
开发者ID:Jai-Prakash-Singh-213,项目名称:yebhi_parse,代码行数:25,代码来源:page2_first_item_clctold2.py
示例15: main
def main(directory, mainlink):
filename = "%s/complete_link_collection.txt" %(directory)
f = open(filename, "a+")
page = req_proxy.main(mainlink)
soup = BeautifulSoup(page, "html.parser")
cat_collection_box = soup.find("div", attrs={"class":"brw_bdr"})
link_list = cat_collection_box.find_all("a")
for link in link_list:
link = link.get("href")
parsed = urlparse(link)
if len(parsed.netloc) == 0:
link = "http://www.snapdeal.com%s" %(link)
f.write(str(link) + "\n")
#print link
f.close()
return filename
开发者ID:Jai-Prakash-Singh,项目名称:snap_deal_parse,代码行数:26,代码来源:code1_second_snap_deal.py
示例16: part_second
def part_second(line, filename2):
f = open(filename2, "a+")
line2 = line.strip().split(",")
menulink = line2[0].strip()
menutitle = line2[1].strip()
catlink = line2[2].strip()
cattitle = line2[3].replace("\n","").strip()
subcatlink = line2[-2].strip()
subcattitle = line2[-1].strip()
page = req_proxy.main(subcatlink)
soup = BeautifulSoup(page, "html.parser")
tag_brand = soup.find("div", text=re.compile("Brand"))
blbtbc_list = tag_brand.parent.parent.find_all("li", attrs={"class":"srch_ctgry_item"})
for blbtbc in blbtbc_list:
brandlink = "%s%s" %("http://www.homeshop18.com", str(blbtbc.a.get("href")).strip())
brandtitle = str(blbtbc.a.get( "title")).replace("\n", "").strip()
print >>f, ','.join([menulink, menutitle, catlink, cattitle, subcatlink, subcattitle, brandlink, brandtitle])
print menulink, menutitle, catlink, cattitle, subcatlink, subcattitle, brandlink, brandtitle
print "*"*145
f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:homesho_parse,代码行数:27,代码来源:page1_first_home18.py
示例17: main
def main():
link = "http://www.homeshop18.com/all-stores.html"
page = req_proxy.main(link)
soup = BeautifulSoup(page)
tag_border = soup.find_all("div", attrs={"class":"border-box2 mar-top"})
patter_firstpage = []
for brend in tag_border:
brend_title = brend.find("div", attrs={"class":"brend-title clearfix"}).get_text()
cat = str(brend_title).strip()
if (cat == "Books") or (cat == "Jewellery"):
pass
else:
tag_img_holder = brend.find_all("div", attrs={"class":"img-holder"})
for sub_cat in tag_img_holder:
sub_cat_link = str(sub_cat.find("a").get("href")).strip()
parsed = urlparse(sub_cat_link)
sub_cat_title = filter(None, str(parsed.path).split("/"))
patter_firstpage.append([cat, sub_cat_link, sub_cat_title[0].strip()])
return patter_firstpage
开发者ID:Jai-Prakash-Singh-213,项目名称:homesho_parse,代码行数:29,代码来源:page1_first.py
示例18: main
def main(directory, link):
page = req_proxy.main(link)
#driver = phan_proxy.main(link)
#try:
# driver.find_element_by_xpath("/html/body/div[2]/div/div/div/div/a/img").click()
# logging.debug("clicked..................................................................")
#except:
# pass
#driver = phan_scroller.main(driver)
#page = driver.page_source
soup = BeautifulSoup(page, "html.parser")
target_cat_list = soup.find("div", attrs={"id":"breadCrumbWrapper"}).find_all("span", attrs={"itemprop":"title"})
filename = "%s/%s.doc" %(directory, "_".join(str(target_cat_list[-1].get_text()).split()))
f = open(filename, "a+")
item_big_box_list = soup.find("div", attrs={"id":re.compile("products-main")})
item_box_list = item_big_box_list.find_all("div", attrs={"class":"product_grid_row"})
for item_box in item_box_list:
item_sub_box_list = item_box.find_all("div", attrs={"class":"product_grid_cont gridLayout3"})
if len(item_sub_box_list) == 0:
item_sub_box_list = item_box.find_all("div", attrs={"class":"product_grid_cont gridLayout4"})
for item_sub_box in item_sub_box_list:
item_link = item_sub_box.find("a", attrs={"class":"hit-ss-logger somn-track prodLink"}).get("href")
parsed = urlparse(item_link)
if len(parsed.netloc) == 0:
item_link = "http://www.snapdeal.com%s" %(item_link)
size = []
size_box_list = item_sub_box.find("div", attrs={"class":"productAttr"})
if size_box_list is not None:
size_option_box = size_box_list.find_all("option")
for size_option in size_option_box:
size.append(size_option.get("value"))
if len(size) !=0:
size = filter(None, map(my_strip, size))
info = [link, item_link, size]
info2 = map(my_strip, info)
f.write(str(info2) + "\n")
logging.debug("inserted........................................................................")
f.close()
开发者ID:Jai-Prakash-Singh,项目名称:snap_deal_parse,代码行数:59,代码来源:code2_first_snapdael.py
示例19: main2
def main2(line, filename):
catlink = line[0]
cattitle = line[1]
f = open(filename, "a+")
page = req_proxy.main(catlink)
soup = BeautifulSoup(page)
tag_page = soup.find("div", attrs={"id":"wp_page_numbers"})
tag_page_a_list = []
if tag_page:
tag_page_a = tag_page.find_all("a")[:-1]
for cat_page in tag_page_a:
sub_page_link = str(cat_page.get("href")).strip()
sub_page = req_proxy.main(sub_page_link)
sub_soup = BeautifulSoup(sub_page)
tag_content_a = soup.find_all("h2", attrs={"class":"title"})
for subcatlinktitle in tag_content_a:
subcatlink = str(subcatlinktitle.a.get("href")).strip()
subcattitle = subcatlinktitle.get_text().encode("ascii", "ignore")
subcattitle = str(subcattitle).strip().replace("\n", "").replace("\t", "").replace(",", "")
print >>f, ",".join([catlink, cattitle, subcatlink, subcattitle])
else:
tag_content_a = soup.find_all("h2", attrs={"class":"title"})
for subcatlinktitle in tag_content_a:
subcatlink = str(subcatlinktitle.a.get("href")).strip()
subcattitle = subcatlinktitle.get_text().encode("ascii", "ignore")
subcattitle = str(subcattitle).strip().replace("\n", "").replace("\t", "").replace(",", "")
print >>f, ",".join([catlink, cattitle, subcatlink, subcattitle])
f.close()
开发者ID:Jai-Prakash-Singh-213,项目名称:filmlink_parse,代码行数:42,代码来源:page1_category_filmlink.py
示例20: page1_cat_link_collect
def page1_cat_link_collect(self):
page = req_proxy.main(self.link)
tree = html.fromstring(page)
cat_box = tree.xpath("/html/body/div/div/div[5]/div[2]/div[2]/div/ul/li[2]/ul/li")
all_cat_a = []
for cat_link in cat_box:
all_cat_a.append(cat_link.xpath("a/@href")[0])
self.all_cat_a = all_cat_a
开发者ID:Jai-Prakash-Singh-213,项目名称:filmlinkforyou_parse,代码行数:11,代码来源:page1_first_filmlink.py
注:本文中的req_proxy.main函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论