本文整理汇总了Python中pyquery.pyquery.PyQuery类的典型用法代码示例。如果您正苦于以下问题:Python PyQuery类的具体用法?Python PyQuery怎么用?Python PyQuery使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PyQuery类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: parse
async def parse(self, input_text, *k, **kk):
if not await self._check_support(input_text):
return []
html_text = await get_url_service.get_url_async(input_text)
html = PyQuery(html_text)
title = html('h1.main_title > a').text()
if not title:
for a in html('div.crumb-item > a'):
a = PyQuery(a)
if a.attr('href') in input_text:
title = a.text()
if not title:
try:
title = match1(html_text, '<title>([^<]+)').split('-')[0]
except AttributeError:
pass
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "list",
"caption": "271视频全集"
}
data["data"] = await self._get_list_info_api(html_text)
return data
开发者ID:wwqgtxx,项目名称:wwqLyParse,代码行数:26,代码来源:iqiyilistparser.py
示例2: extract_data
def extract_data(text):
global total_data
pq = PyQuery(text)
data = pq.find('p.data').text()
total_data = total_data + data
nextState = pq.find('.nextState').attr('value')
return nextState
开发者ID:gleitz,项目名称:code-kata,代码行数:7,代码来源:solution.py
示例3: parse
def parse(self, input_text, *k, **kk):
html2 = get_url(input_text)
html2 = PyQuery(html2)
w120 = html2("div.gut > div.listTab > div.listPic > div.list > dl.w120 > dt > a")
total = len(w120)
title = html2("div.gut > div.listTab > div.listPic > div.tab:first-child > p.p1 > i").text()
data = {
"data": [],
"more": False,
"title": title,
"total": total,
"type": "list",
"caption": "乐视视频全集"
}
for i in w120:
i = PyQuery(i)
url = i.attr("href")
title = i("a > img").attr("title")
info = {
"name": title,
"no": title,
"subtitle": title,
"url": url
}
data["data"].append(info)
return data
开发者ID:erics8,项目名称:wwqLyParse,代码行数:26,代码来源:lelistparser.py
示例4: detail_page
def detail_page(self, response):
t = response.text.replace(' ', '')
d = PyQuery(t)
base = response.save
base_url = response.url
fenbu = dict(map(
lambda x: (x.find('.field-righttit').text(), x.find('ul').text()),
list(d.find(".right-border div").items())
))
basic_info = dict(map(
lambda x: (x.text().replace(u':', "").strip(),
x.parent().text().replace(x.text(), "").strip()),
list(d.find('.fc-gray').items())
))
other_info = dict(map(
lambda x: (x.text().replace(u':', ''), x.next().text()), list(d.find('.xiaoqu-otherinfo dt').items())
))
info_temp = {
'base': base,
'sell_rent_info': fenbu,
'basic_info': basic_info,
'other_info': other_info
}
url = base_url + 'amenities/'
self.crawl(url, callback=self.amenities_page, save=info_temp, retries=100)
return [
2,
response.url,
json.dumps(info_temp),
time.strftime('%Y-%m-%d %X', time.localtime())
]
开发者ID:yangmingsong,项目名称:python,代码行数:32,代码来源:ganji_ershoufang.py
示例5: urlHandle
def urlHandle(self,input_text):
html = PyQuery(common.getUrl(input_text))
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
print('urlHandle:"'+input_text+'"-->"'+url+'"')
return url
开发者ID:v1-hermit,项目名称:wwqLyParse,代码行数:7,代码来源:jumpurlhandle.py
示例6: Parse_le
def Parse_le(self, input_text):
html = PyQuery(get_url(input_text))
items = html('dt.d_tit')
title = "LETV"
i = 0
data = {
"data": [],
"more": False,
"title": title,
"total": i,
"type": "collection"
}
for item in items:
a = PyQuery(item).children('a')
name = a.text()
no = a.text()
subtitle = a.text()
url = a.attr('href')
if url is None:
continue
if not re.match('^http://www\.le\.com/.+\.html', url):
continue
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url,
"caption": "首页地址列表"
}
data["data"].append(info)
i = i + 1
total = i
data["total"] = total
return data
开发者ID:erics8,项目名称:wwqLyParse,代码行数:34,代码来源:indexparser.py
示例7: url_handle
async def url_handle(self, input_text):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
return url
开发者ID:wwqgtxx,项目名称:wwqLyParse,代码行数:7,代码来源:jumpurlhandle.py
示例8: onSuccess
def onSuccess(self, tid, context, response,headers):
resp = PyQuery(response)
for h3 in resp.find("h3 a"):
url="http://dev.open.taobao.com/bbs/"+h3.attrib['href']
print h3.text
Spider.executeSql(self,"insert into task (task_type,url,status,http_code,task_context) values('topbbs文章',%s,0,-1,%s)",(url,h3.text))
Spider.onSuccess(self,tid, context,response,headers);
开发者ID:liuyun96,项目名称:python,代码行数:7,代码来源:TopBBS.py
示例9: __getPageAllLink
def __getPageAllLink(self,p):
# if self.kind=="1":
# lis=PyQuery(p)("div.qiuzu li")
# elif self.kind=="2":
# lis=PyQuery(p)("div.qiuzu li")
if self.kind=="1" or self.kind=="2":
lis=PyQuery(p)("div.house")
else:
lis=PyQuery(p)("div.qiuzu li")
links=[]
for li in lis:
# if self.kind=="3":
# tm=PyQuery(li)("p.time span").eq(1).text()
# link=self.baseurl+PyQuery(li)("p.housetitle a").attr("href")
if self.kind=="2" or self.kind=="1":
tm=PyQuery(li)("p.time").text()
tm=tm and tm.replace("个人","") or ""
link=self.baseurl+PyQuery(li)("p.housetitle a").attr("href")
else:
tm=PyQuery(li)("span.li5").text()
link=self.baseurl+PyQuery(li)("span.li2 a").attr("href")
if self.kind=="4":
if PyQuery(li)("span.li1").text()=="合租 ":
continue
# tm=PyQuery(li)("span.li5").text()
# link=self.baseurl+PyQuery(li)("span.li2 a").attr("href")
#link=self.baseurl+PyQuery(li)("span.li2 a").attr("href")
# print link
if u"天" in tm:
s=tm.find(u"天")
tm=tm[:s]
if int(tm)<8:
links.append(link)
else:
break
elif u"小时" in tm:
links.append(link)
elif u"分钟" in tm:
links.append(link)
else:
continue
if 1:#not checkPath(homepath,self.folder,link):
LinkLog.info("%s|%s"%(self.kind,link))
try:
getContent(link,self.citycode,self.kind)
except Exception,e:print "ganji getContent Exception %s"%e
time.sleep(int(self.st))
# fetch_quere.put({"mod":"soufang","link":link,"citycode":self.citycode,"kind":self.kind})
# self.clinks.extend(links)
if self.kind=="1" or self.kind=="2":
if len(links)!=30:
return False
else:
return True
else:
if len(links)!=35:
return False
else:
return True
开发者ID:ptphp,项目名称:PyLib,代码行数:60,代码来源:soufun.py
示例10: url_handle
def url_handle(self, input_text):
html = PyQuery(get_url(input_text))
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
logging.debug('urlHandle:"' + input_text + '"-->"' + url + '"')
return url
开发者ID:erics8,项目名称:wwqLyParse,代码行数:7,代码来源:jumpurlhandle.py
示例11: parse_html_page
def parse_html_page(self):
pq = PyQuery(self.html_page)
main_table = pq('#mainBody > table.coltable')
def find_row(text):
for c in main_table.find('td:first-child').items():
if c.text() == text:
return c.nextAll().items().next()
def find_row_text(text, default=''):
row = find_row(text)
if row:
return row.text()
return default
def find_row_html(text, default=''):
row = find_row(text)
if row:
return row.html()
return default
self.info_hash = find_row_text('Info hash')
self.title = pq.find('#mainBody > h1').text()
self.category, self.subcategory = find_row_text('Type').split(' - ', 1)
self.language = find_row_text('Language')
self.cover_url = find_row('Picture:').find('img').attr('src')
self.small_description = find_row_html('Small Description')
self.description = find_row_html('Description')
self.torrent_url = find_row('Download').find('a#dlNormal').attr('href')
size_string = find_row_text('Size')
match = re.match('.* \((?P<size>\d+(,\d\d\d)*) bytes\)', size_string)
self.torrent_size = int(match.group('size').replace(',', ''))
开发者ID:ChaosTherum,项目名称:WhatManager2,代码行数:32,代码来源:models.py
示例12: Parse_v
def Parse_v(self,input_text):
print(input_text)
html = PyQuery(common.getUrl(input_text))
datainfo_navlist = PyQuery(html("#datainfo-navlist"))
for a in datainfo_navlist.children('a'):
a = PyQuery(a)
url = a.attr("href")
if re.search('www.iqiyi.com/(a_|lib/m)',url):
return self.Parse(url)
开发者ID:v1-hermit,项目名称:wwqLyParse,代码行数:9,代码来源:listparser.py
示例13: parse
async def parse(self, input_text, *k, **kk):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
title = ""
for meta in html('meta[itemprop="name"]'):
meta = PyQuery(meta)
title = meta.attr("content")
break
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "list",
"caption": "QQ视频全集"
}
for a in html(".mod_episode a"):
a = PyQuery(a)
_title = ""
for span in PyQuery(a("span")):
span = PyQuery(span)
if span.attr("itemprop") == "episodeNumber":
_title = "第%s集" % span.text()
elif span.has_class("mark_v"):
_title += span.children("img").attr("alt")
info = {
"name": _title,
"no": _title,
"subtitle": _title,
"url": a.attr("href")
}
data["data"].append(info)
data["total"] = len(data["data"])
return data
开发者ID:wwqgtxx,项目名称:wwqLyParse,代码行数:35,代码来源:qqlistparser.py
示例14: parse
async def parse(self, input_text, *k, **kk):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
p_title = html("div.pl-title")
title = p_title.attr("title")
list_id = re.search('https?://list.youku.com/albumlist/show/id_(\d+)\.html', input_text).group(1)
ep = 'https://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=a'
first_u = ep.format(list_id, 1)
xhr_page = await get_url_service.get_url_async(first_u)
json_data = json.loads(xhr_page[14:-2])
# print(json_data)
# video_cnt = json_data['data']['total']
xhr_html = json_data['html']
# print(xhr_html)
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "collection",
"caption": "优酷视频全集"
}
last_num = 1
while True:
new_url = ep.format(list_id, last_num)
json_data = await get_url_service.get_url_async(new_url)[14:-2]
info = json.loads(json_data)
if info.get("error", None) == 1 and info.get("message", None) == "success":
new_html = info.get("html", None)
if new_html:
new_html = PyQuery(new_html)
items = new_html("a[target='video'][data-from='2-1']")
for item in items:
item = PyQuery(item)
url = "http:" + item.attr("href")
title = item.attr("title")
info = {
"name": title,
"no": title,
"subtitle": title,
"url": url
}
data["data"].append(info)
last_num += 1
else:
break
else:
break
data["total"] = len(data["data"])
# print(data)
return data
开发者ID:wwqgtxx,项目名称:wwqLyParse,代码行数:53,代码来源:youkulistparser.py
示例15: __initPageNum
def __initPageNum(self):
initurl="%s/%s/&act=personal&options="%(self.baseUrl,self.urlpath)
req=urllib2.Request(initurl, None, self.header)
p=self.br.open(req).read()
pg=PyQuery(p)("div#houses div.fl")
if re.search('''(\d+)''',pg.text()):
pg=re.search('''(\d+)''',pg.text()).group(1)
r=self.__getPageAllLink(p)
if not r:
return
self.pn= [i for i in range(int(pg)+1)][2:]
print ""
开发者ID:aviatorBeijing,项目名称:ptpy,代码行数:13,代码来源:soufang.py
示例16: __getAllNeedLinks
def __getAllNeedLinks(self):
cond=True
idx=0
checkit="0"
while cond:
url=self.baseUrl+self.urlpath%("f"+str(idx*32))
#url="http://gz.ganji.com/fang2/u2f0/a1f768/"
# print url
try:
req=urllib2.Request(url, None, self.header)
p=self.br.open(req).read()
except:
continue
else:
check=PyQuery(p)("ul.pageLink li a.c").text()
if check==None or check==checkit:
cond=False
break
else:
checkit=check
links=PyQuery(p)("div.list dl")
p=None
# print len(links)
for link in links:
lk=self.baseUrl+PyQuery(link)(" a.list_title").attr("href")
# print lk
if self.kind=="3" or self.kind=="4":
tm=PyQuery(link)("dd span.time").text()
if re.match('''\d{2}-\d{2}''', tm):
Y=int(time.strftime('%Y', time.localtime()))
tm="%s-%s"%(Y,tm.strip())
if tm<self.endtime:
cond=False
break
elif "分钟" in tm:
pass
elif "小时" in tm:
pass
else:
cond=False
break
if not checkPath(homepath,self.folder,lk):
LinkLog.info("%s|%s"%(self.kind,lk))
try:
getContent(lk,self.citycode,self.kind,self.upc)
except Exception,e:print "ganji getContent Exception %s"%e
# fetch_quere.put({"mod":"ganji","link":lk,"citycode":self.citycode,"kind":self.kind})
# if lk not in self.clinks:
# self.clinks.append(lk)
idx=idx+1
开发者ID:ptphp,项目名称:PyLib,代码行数:50,代码来源:ganji.py
示例17: _parse
def _parse(self, response):
d = PyQuery(response)
# page_turning
__url = map(lambda x: x.attr('href'),
d.find(self.__css).items()
)
if config_dictionary.get(self.__url_start).get('basejoin'):
new_url = map(lambda u: urlparse.urljoin(self.__url_base, u), __url)
else:
new_url = __url
self.__url_pool = self.__url_pool.union(set(new_url))
# IP address extracting
rst = ':'.join(d.text().split(' '))
proxy_list = re.findall(pattern_ip_address, rst)
proxy_port_queue.put((proxy_list, self.__url_base))
开发者ID:yangmingsong,项目名称:python,代码行数:15,代码来源:proxy_collection.py
示例18: serializeArray
def serializeArray(form):
form = PyQuery(form)
if not form.is_('form'):
return []
source = form.find('input, select, textarea')
data = []
for input in source:
input = PyQuery(input)
if input.is_('[disabled]') or not input.is_('[name]'):
continue
if input.is_('[type=checkbox]') and not input.is_('[checked]'):
continue
data.append((input.attr('name'), input.val()))
return data
开发者ID:ivanp,项目名称:emailsopener,代码行数:18,代码来源:utils.py
示例19: Parse
def Parse(self,input_text):
html = PyQuery(self.getUrl(input_text))
items = html('a')
title = html('title').text()
i =0
data = {
"data": [],
"more": False,
"title": title,
"total": i,
"type": "collection"
}
for item in items:
a = PyQuery(item)
name = a.attr('title')
if name is None:
name = a.text()
no = name
subtitle = name
url = a.attr('href')
if url is None:
continue
if name is None or name == "":
continue
if not re.match('(^(http|https)://.+\.(shtml|html))|(^(http|https)://.+/video/)',url):
continue
if re.search('(list|mall|about|help|shop|map|vip|faq|support|download|copyright|contract|product|tencent|upload|common|index.html|v.qq.com/u/|open.baidu.com)',url):
continue
if re.search('(下载|播 放|播放|投诉|评论|(\d{1,2}:\d{1,2}))',no):
continue
unsure = False
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url,
"unsure": unsure
}
data["data"].append(info)
i = i+1
total = i
data["total"] = total
return data
开发者ID:road0001,项目名称:wwqLyParse,代码行数:44,代码来源:anypageparser.py
示例20: __getAllNeedLinks
def __getAllNeedLinks(self):
cond=True
idx=0
checkit="0"
while cond:
url=self.baseUrl+self.urlpath%("f"+str(idx*32))
#url="http://gz.ganji.com/fang2/u2f0/a1f768/"
print url
try:
req=urllib2.Request(url, None, self.header)
p=self.br.open(req).read()
except:
pass
else:
check=PyQuery(p)("ul.pageLink li a.c").text()
if check==checkit:
break
else:
checkit=check
links=PyQuery(p)("div.list dl")
print len(links)
for link in links:
lk=self.baseUrl+PyQuery(link)(" a.list_title").attr("href")
if self.kind=="3" or self.kind=="4":
tm=PyQuery(link)("dd span.time").text()
if re.match('''\d{2}-\d{2}''', tm):
Y=int(time.strftime('%Y', time.localtime()))
tm="%s-%s"%(Y,tm.strip())
if tm<self.endtime:
break
elif "分钟" in tm:
pass
elif "小时" in tm:
pass
else:
break
if lk not in self.clinks:
self.clinks.append(lk)
idx=idx+1
time.sleep(self.st)
print len(self.clinks)
开发者ID:ptphp,项目名称:PyLib,代码行数:42,代码来源:ganji.py
注:本文中的pyquery.pyquery.PyQuery类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论