本文整理汇总了Python中pyquery.PyQuery类的典型用法代码示例。如果您正苦于以下问题:Python PyQuery类的具体用法?Python PyQuery怎么用?Python PyQuery使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PyQuery类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_render_attribute_to_document
def test_render_attribute_to_document():
document = PyQuery('<a attribute="{ value }" data-riot-id="0"></a>')
expression = {'expression': '{ value }', 'attribute': 'attribute', 'type': 'attribute', 'node': document}
render_document([expression], {'value': 'value'})
assert document.outer_html() == '<a attribute="value" data-riot-id="0" data-riot-dirty="true"></a>'
render_document([expression], {'value': 1})
assert document.outer_html() == '<a attribute="1" data-riot-id="0" data-riot-dirty="true"></a>'
开发者ID:pombredanne,项目名称:riotpy,代码行数:7,代码来源:test_expression.py
示例2: get_saml_response
def get_saml_response(response):
tree = PyQuery(response.content)
inputtag = tree.find('input[name="SAMLResponse"]')
assert len(inputtag) == 1
encoded_response = inputtag[0].get('value')
samlresponse = base64.b64decode(encoded_response)
return samlresponse
开发者ID:miing,项目名称:mci_migo,代码行数:7,代码来源:tests.py
示例3: html_to_records
def html_to_records(html):
pq = PyQuery(html)
rows = pq.find('table tr')
get_row = lambda r: map(lambda th: th.text, r)
headers = get_row(rows[0])
for row in rows[1:]:
yield dict(zip(headers, get_row(row)))
开发者ID:texastribune,项目名称:aeis,代码行数:7,代码来源:files.py
示例4: getResTb
def getResTb():
html = fileworker.getHTML()
pq = PyQuery(html)
result = dict()
blocks = list()
for i in pq.items('.row.result'):
list.append(i)
开发者ID:T10TheKinOfStars,项目名称:crawlerforfun,代码行数:7,代码来源:parser.py
示例5: parseProductPage
def parseProductPage(product, need_img_urls=False):
"""进入商品详情页, 抓取四个新字段
delivery reviews star total_sales
"""
if product['product_url']:
content = fetchContent(product['product_url'], False)
doc=PyQuery(content)
#product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
product['star'] = doc('p.star-level > i').attr("class")
product['total_sales'] = doc('p.bargain-number > a > em.value').text()
if need_img_urls:
url_list = get_img_urls(content)
product['img_urls'] = ', '.join(url_list)
else:
product['img_urls'] = ''
product['color'], product['size'] = '', ''
for index, td in enumerate(doc('div.obj-content > table > tbody > tr > td')):
tdQ = PyQuery(td)
if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'颜色':
product['color'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'尺寸':
product['size'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
product['MOQ'] = extractNum(doc('tr.amount > td.ladder-1-1 > span.value').text().replace(u"≥", ""))
if not product['MOQ'] or product['MOQ'] == 0:
product['MOQ'] = extractNum(PyQuery(doc('tr.amount').remove('td.amount-title').children('td').eq(0))('span.value').text())
if product['MOQ'] == 1:
#print product['product_url']
product['sku_size'] = PyQuery(doc('div.unit-detail-spec-operator').eq(0))('span.text').text()
product['sku_color'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.name').text()
product['sku_price'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.price').text()
product['sku_amount'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.count > span > em.value').text()
print product['sku_id'], '\t', product['sku_size'], "\t", product['sku_color'], "\t", product['sku_price'], "\t", product['sku_amount']
return product
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:34,代码来源:products.py
示例6: crawl_1688_category_tree
def crawl_1688_category_tree(wb):
#fr = open("C:users/chenweiqiang/desktop/ye.html", "r") #PyQuery之后取不出来元素
h = httplib2.Http()
response, content = h.request("https://ye.1688.com/")
# fw = open("C:users/chenweiqiang/desktop/ye2.html", "w")
# fw.write(content)
# fw.close()
ws = wb.add_sheet("ye.1688品类树")
ws.write(0,0,"一级品类")
ws.write(0,1,"二级品类")
ws.write(0,2,"三级品类")
row = 0
doc = PyQuery(content)
level1NodeList = doc("li.cat-box")
for level1Node in level1NodeList:
level1NodeQ = PyQuery(level1Node)
level1_category = level1NodeQ('div.cat-main').text().replace(' ', '')
level2NodeList = level1NodeQ('div.cat-sub-col > dl') # 多余div[class="cat-sub "] >
for level2Node in level2NodeList:
level2NodeQ = PyQuery(level2Node)
level2_category = level2NodeQ('dt > a').text()
level3NodeList = level2NodeQ('dd.cat-list > ul > li > a')
for level3Node in level3NodeList:
level3NodeQ = PyQuery(level3Node)
level3_category = level3NodeQ.text()
row += 1
ws.write(row, 0, level1_category)
ws.write(row, 1, level2_category)
ws.write(row, 2, level3_category)
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:29,代码来源:crawl_categoryTree.py
示例7: list_page
def list_page(self, response):
result_content = {}
content_iter = re.finditer(r"STK && STK.pageletM && STK.pageletM.view\((?P<content>\{.*?\})\)", response.content)
for iter in content_iter:
ok, content = safe_loads(iter.groupdict()['content'])
if ok and "pl_weibo_direct" == content.get("pid"):
result_content = content
break
else:
return {}
pyquery_doc = PyQuery(result_content["html"])
pyquery_doc.make_links_absolute(response.url)
items = []
for item in pyquery_doc("DIV.feed_lists>DIV.WB_cardwrap>DIV").items():
weibo_href = item("DIV.content>DIV.feed_from>A").attr.href
if weibo_href:
weibo_pics = []
for pic in item("DIV.feed_content DIV.media_box IMG").items():
weibo_pics.append(pic.attr.src)
data = {
"content": item("DIV.feed_content P.comment_txt").text(),
"nickname": item("DIV.feed_content A.W_texta").attr.title,
"href": weibo_href,
"quote_nickname": item("DIV.feed_content DIV.comment DIV.comment_info A.W_texta").attr.title,
"quote_content": item("DIV.feed_content DIV.comment DIV.comment_info P.comment_txt").text(),
"pics": ''.join(weibo_pics)
}
self.crawl("data:,%s" % weibo_href, callback = self.detail_page, data_fetch_content=data)
开发者ID:jttoday,项目名称:spider,代码行数:32,代码来源:weibo_weixin.py
示例8: get_bounds
def get_bounds(scene_name):
"""Use Earth Explorer metadata to get bounds of a Scene"""
url_code = get_metadata_code(scene_name)
metadata = PyQuery(
'http://earthexplorer.usgs.gov/fgdc/%s/%s/' % (url_code, scene_name)
)
metadata = metadata.text()[
metadata.text().find('G-Ring_Latitude:'):
metadata.text().find('\n Keywords:')
]
coords = (
metadata.replace(' ', '')
.replace('G-Ring_Latitude:', '')
.replace('G-Ring_Longitude:', '')
.split('\n')
)
coords = [float(coord) for coord in coords if coord != '']
# create a list of lists with the coordinates
coords = [coords[i:i + 2] for i in range(0, len(coords), 2)]
# use reverse() to change [lat, lon] to [lon, lat]
[coord.reverse() for coord in coords]
# repeat the first coordinate on the end of the list
if coords[0] != coords[-1]:
coords.append(coords[0])
return coords
开发者ID:ibamacsr,项目名称:imagery,代码行数:26,代码来源:utils.py
示例9: update_forums
def update_forums(client, group, session):
logging.info("Updating forums list for {}".format(group))
query = Forum.get_forum_page(client, group.gid)
reg = regex.compile(r"^forum\.php\?mod=forumdisplay&fid=(\d+)$")
for row in query.find("table.fl_tb>tr"):
sub_query = PyQuery(row)
href = sub_query.find("td").eq(1).find("a").attr("href")
if not href:
continue
fid = int(reg.findall(href)[0])
name = sub_query.find("td").eq(1).find("h2>a").clone().children().remove().end().text()
last_update = sub_query.find("td").eq(3).find("div>cite").clone().children().remove().end().text()
last_update = dateparse(last_update)
existence = session.query(Forum).filter(Forum.fid == fid)
if existence.count() == 0:
logging.info("<Forum(fid={})> not found, creating one".format(fid))
forum = Forum(fid=fid, name=name, updated_at=last_update, group=group, fresh=False)
session.add(forum)
else:
forum = existence.one()
if forum.updated_at != last_update:
logging.info("{} found, stale: against {} ".format(forum, last_update))
forum.updated_at = last_update
forum.fresh = False
session.add(forum)
else:
logging.info("{} found, fresh".format(forum))
开发者ID:doomsplayer,项目名称:dandao-spider,代码行数:31,代码来源:forum.py
示例10: _enhance_text
def _enhance_text(self):
"""
Transforms a simplified text into a valid mail.template text.
:return: mail.template text
"""
self.ensure_one()
# Parse and set back the keywords into raw template code
html_text = PyQuery(self.simplified_text.replace('\n', ''))
def sort_keywords(kw):
# Replace first if/for-clauses, then var, then code
index = kw.position
if kw.type == 'if' or 'for' in kw.type:
index += 2*len(self.body_html) * kw.nested_position
# Take if and for in the appearing order in the text
index -= kw.position
elif kw.type == 'var':
index += len(self.body_html)
return index
keywords = self.keyword_ids.sorted(sort_keywords, reverse=True)
# Replace automatic-generated keywords
for keyword in keywords:
keyword_text = html_text('#' + keyword.html_id)
keyword_text.replace_with(keyword.final_text)
# Replace user added keywords
template_text = html_text.html()
for keyword in keywords.filtered(lambda k: k.type == 'code'):
to_replace = u"[{}]".format(keyword.short_code)
template_text = template_text.replace(to_replace, keyword.raw_code)
final_text = PyQuery(BeautifulSoup(template_text).prettify())
return final_text('body').html()
开发者ID:maxime-beck,项目名称:compassion-modules,代码行数:33,代码来源:communication_revision.py
示例11: __extract
def __extract(self, html):
pq = PyQuery(html).find("main#main #mainArea table")
selector_ = "thead tr:eq(0) th"
date_order = [PyQuery(v).text().split('\n')[0] for v in PyQuery(pq).find(selector_)][3:]
result = {d: {} for d in date_order}
index = 0
total = len(PyQuery(pq).find("tbody tr"))
while index < total:
td = PyQuery(pq).find("tbody tr:eq(%d) td:eq(0)" % index)
room_type = td.text().split()[0]
rowspan = int(td.attr('rowspan'))
for i in xrange(index, index + rowspan):
row = PyQuery(pq).find("tbody tr:eq(%d)" % i)
# smoking or not
smoking = PyQuery(row).find("td.alC.alM > img").attr("alt")
room = "%s (%s)" % (room_type, smoking)
if row.hasClass('clubCardCell'):
member_type = 'member'
else:
member_type = 'guest'
for i, v in enumerate(self.__extract_price_remain(row)):
if room not in result[date_order[i]]:
result[date_order[i]][room] = {}
result[date_order[i]][room][member_type] = v
index += rowspan
return result
开发者ID:mkfsn,项目名称:ToyokoInn,代码行数:35,代码来源:ToyokoInn.py
示例12: get_meme_url
def get_meme_url(meme):
gen = GENERATORS.get(meme)
if gen:
pq = PyQuery(url="http://memegenerator.net/%s" % gen[2])
return pq.find('a img.large').attr('src')
else:
return None
开发者ID:ojii,项目名称:memepy,代码行数:7,代码来源:meme.py
示例13: download
def download(threadUrl):
"""
"""
d = PyQuery(url=threadUrl, parser='soup')
links = d('a[href^="job.php?action=download&aid="]')
# 获取 verify 的值
tmp = d('script:contains("var verifyhash =")').text()
verify = re.search(r"var verifyhash = '(.*?)'", tmp).group(1)
total = len(links)
d.make_links_absolute()
for i, e in enumerate(links.items(), start=1):
filename = e.text()
print('%s/%s %s' % (i, total, filename))
if not os.path.exists(os.path.join(SAVE_PATH, filename)):
params = urlencode(
{'check': 1, 'verify': verify, 'nowtime': int(time.time() * 1000)})
url = '%s?%s' % (e.attr['href'], params)
print(' fetch: ' + url)
downDoc = PyQuery(url, headers=headers)
# 第0个是电信下载点,第1个是移动下载点
downUrl = BASE_URL + downDoc('a[href^="remotedown.php"]').eq(1).attr('href')
addToIDM(downUrl, SAVE_PATH, filename)
time.sleep(1.5)
wefiler_urls = checkWefiler(d)
if wefiler_urls:
print(wefiler_urls)
开发者ID:fishlee,项目名称:DownloadHelpers,代码行数:31,代码来源:weiphone.py
示例14: _split
def _split(inputfile, outputdir):
source = open(inputfile, 'r')
html = source.read()
source.close()
if not os.path.isdir(outputdir):
os.mkdir(outputdir)
idx_slide=0
idx_section=0
parsed = PyQuery(html)
for section in parsed('section'):
slide = PyQuery(section)
if slide.has_class('stack'):
idx_section+=1
stack_path = os.path.join(outputdir,'%02d' % idx_section )
os.mkdir(stack_path)
for sub_slide in PyQuery(slide.html())('section'):
idx_slide+=1
_dump_slide(sub_slide, idx_slide, stack_path)
else:
if not slide.parent().has_class('stack'):
idx_slide+=1
_dump_slide(slide, idx_slide, outputdir)
开发者ID:tiry,项目名称:reveal-js-tools,代码行数:26,代码来源:split.py
示例15: _parse_table
def _parse_table(self, table):
# Initialize table
parsed_rows = []
# Parse table
qtable = PyQuery(table)
# Get headers
headers = self._get_headers(qtable)
if not headers:
return
# Get rows
rows = qtable.find("tr")
# Loop over rows
for row in rows:
# Get columns
qrow = PyQuery(row)
cols = qrow.find("td").map(self._get_text)[:]
# Parse column values
for colidx in range(len(cols)):
col = reduce(lambda x, y: re.sub(y[0], y[1], x), self._trans, cols[colidx])
cols[colidx] = col
# Append parsed columns
if cols:
parsed_rows.append(cols)
return {"headers": headers, "data": parsed_rows}
开发者ID:jmcarp,项目名称:sciscrape,代码行数:33,代码来源:sciparse.py
示例16: station_parse
def station_parse(content):
'''Parsing bus station and check station.
'''
OFF = '0-0'
stations = []
bus_status = []
content = json.loads(content[3:].decode('utf-8'))
status = content['status']
info = content['info']
if status == 1 and info != '':
pqContent = PyQuery(info)('#upInfo li')
for station in pqContent:
pqStation = PyQuery(station)
station_name = pqStation('.station').text()
stations.append(station_name)
buses = pqStation.find('.bus')
if buses.size() > 0:
left_count = 0
on_station_count = 0
for bus in buses:
if PyQuery(bus).attr('style'):
left_count+=1
else:
on_station_count+=1
bus_status.append('{0}-{1}'.format(on_station_count, left_count))
else:
bus_status.append(OFF)
if not stations:
return None
return (tuple(bus_status), tuple(stations))
开发者ID:ekeyme,项目名称:BusInTime,代码行数:31,代码来源:test_sample1.py
示例17: get_forums
def get_forums():
logging.info('Récupération des forums')
if config.debug:
progress = progressbar.NoProgressBar()
else:
progress = progressbar.ProgressBar(widgets=[progressbar.SimpleProgress('/'), ' ', progressbar.Bar("#","[","]"), progressbar.Percentage()])
d = PyQuery(url=config.rooturl + '/a-f1/', opener=fa_opener)
save.forums = []
levels = {}
n = 1
for i in progress([i for i in d.find("select option") if i.get("value", "-1") != "-1"]):
id = i.get("value", "-1")
logging.debug('Récupération: forum %s', id)
title = re.search('(((\||\xa0)(\xa0\xa0\xa0))*)\|--([^<]+)', i.text).group(5)
level = len(re.findall('(\||\xa0)\xa0\xa0\xa0', i.text))
if level <= 0:
parent = 0
else:
parent = levels[level-1]
levels[level] = n
d = PyQuery(url=config.rooturl+'/admin/index.forum?part=general&sub=general&mode=edit&fid=' + id + '&extended_admin=1&' + tid, opener=fa_opener)
try:
description = d("textarea").text()
except:
description = ""
save.forums.append({'id': int(id[1:]), 'newid': n, 'type': id[0], 'parent': parent, 'title': title, 'description': description, 'parsed': False})
n += 1
开发者ID:BossBravo,项目名称:Lalf-Forumactif,代码行数:34,代码来源:forumactif-phpbb.py
示例18: crawl_vvic_category_tree
def crawl_vvic_category_tree(wb):
h = httplib2.Http()
response, content = h.request("http://www.vvic.com/")
# fw = open("C:users/chenweiqiang/desktop/vvic2.html", "w")
# fw.write(content)
# fw.close()
ws = wb.add_sheet("vvic品类树")
ws.write(0,0,"一级品类")
ws.write(0,1,"二级品类")
ws.write(0,2,"三级品类")
row = 0
doc = PyQuery(content)
level1NodeList = doc("div.dd-inner > div.item")
anotherLevel1NodeList = [doc('div.sub-items')[0], doc('div.sub-items')[1], doc('div.sub-items')[2], doc('div.sub-items')[5]]
for index, level1Node in enumerate(level1NodeList):
level1_category = PyQuery(level1Node)('h3 > a').text()
level2NodeList = PyQuery(anotherLevel1NodeList[index]).children('dl')
for level2Node in level2NodeList:
level2NodeQ = PyQuery(level2Node)
level2_category = level2NodeQ.children('dt > a').text()
level3NodeList = level2NodeQ.children('dd > a')
for level3Node in level3NodeList:
level3_category = PyQuery(level3Node).text()
row += 1
ws.write(row, 0, level1_category)
ws.write(row, 1, level2_category)
ws.write(row, 2, level3_category)
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:27,代码来源:crawl_categoryTree.py
示例19: get_smileys
def get_smileys():
global n
logging.info('Récupération des émoticones')
n = 0
d = PyQuery(url=config.rooturl+'/admin/index.forum?part=themes&sub=avatars&mode=smilies&extended_admin=1&' + tid, opener=fa_opener)
result = re.search('function do_pagination_start\(\)[^\}]*start = \(start > \d+\) \? (\d+) : start;[^\}]*start = \(start - 1\) \* (\d+);[^\}]*\}', d.text())
try:
pages = int(result.group(1))
usersperpages = int(result.group(2))
except:
pages = 1
usersperpages = 0
if config.debug:
progress = progressbar.NoProgressBar()
else:
progress = progressbar.ProgressBar(widgets=[BarVar(), ' ', progressbar.Bar("#","[","]"), progressbar.Percentage()], maxval=pages-1)
progress.start()
for page in range(0,pages):
if page >= 1:
d = PyQuery(url=config.rooturl + '/admin/index.forum?part=themes&sub=avatars&mode=smilies&extended_admin=1&start=' + str(page*usersperpages) + '&' + tid, opener=fa_opener)
for i in d('table tr'):
e = PyQuery(i)
if e("td").eq(0).text() != None and e("td").eq(0).attr("colspan") == None:
save.smileys[e("td").eq(0).text()] = e("td").eq(1).text()
n += 1
progress.update(page)
progress.end()
开发者ID:BossBravo,项目名称:Lalf-Forumactif,代码行数:34,代码来源:forumactif-phpbb.py
示例20: extract_sample_and_codes_using_css
def extract_sample_and_codes_using_css(doc, sample_css_class):
sample_columns = {}
variable_column = None
sample_columns = {}
code_column = None
label_column = None
table = doc('#dataTable > .variablesList')
for i, th in enumerate(table('tr.fullHeader:first > th')):
th = PyQuery(th)
if code_column is None and th.hasClass('codesColumn'):
code_column = i
elif label_column is None and th.hasClass('labelColumn'):
label_column = i
elif th.hasClass(sample_css_class):
sample_columns[i] = th.text()
# Extract actual values for the variables.
variable_info = []
for row in table('tr.variables'):
columns = PyQuery(row)('td')
code = PyQuery(columns[code_column]).text().strip()
label = PyQuery(columns[label_column]).text().strip()
availability = [smpl.strip()
for i, smpl in sample_columns.items()
if PyQuery(columns[i]).text().strip() != '.']
variable_info.append({'code': code,
'label': label,
'availability': availability})
return variable_info, len(sample_columns)
开发者ID:americanist,项目名称:IPUMS_Codebooks,代码行数:31,代码来源:build_codebooks.py
注:本文中的pyquery.PyQuery类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论