• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pyquery.PyQuery类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyquery.PyQuery的典型用法代码示例。如果您正苦于以下问题:Python PyQuery类的具体用法?Python PyQuery怎么用?Python PyQuery使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了PyQuery类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_render_attribute_to_document

def test_render_attribute_to_document():
    document = PyQuery('<a attribute="{ value }" data-riot-id="0"></a>')
    expression =  {'expression': '{ value }', 'attribute': 'attribute', 'type': 'attribute', 'node': document}
    render_document([expression], {'value': 'value'})
    assert document.outer_html() == '<a attribute="value" data-riot-id="0" data-riot-dirty="true"></a>'
    render_document([expression], {'value': 1})
    assert document.outer_html() == '<a attribute="1" data-riot-id="0" data-riot-dirty="true"></a>'
开发者ID:pombredanne,项目名称:riotpy,代码行数:7,代码来源:test_expression.py


示例2: get_saml_response

def get_saml_response(response):
    tree = PyQuery(response.content)
    inputtag = tree.find('input[name="SAMLResponse"]')
    assert len(inputtag) == 1
    encoded_response = inputtag[0].get('value')
    samlresponse = base64.b64decode(encoded_response)
    return samlresponse
开发者ID:miing,项目名称:mci_migo,代码行数:7,代码来源:tests.py


示例3: html_to_records

def html_to_records(html):
    pq = PyQuery(html)
    rows = pq.find('table tr')
    get_row = lambda r: map(lambda th: th.text, r)
    headers = get_row(rows[0])
    for row in rows[1:]:
        yield dict(zip(headers, get_row(row)))
开发者ID:texastribune,项目名称:aeis,代码行数:7,代码来源:files.py


示例4: getResTb

def getResTb():
    html = fileworker.getHTML()
    pq = PyQuery(html)
    result = dict()
    blocks = list()
    for i in pq.items('.row.result'):
        list.append(i)
开发者ID:T10TheKinOfStars,项目名称:crawlerforfun,代码行数:7,代码来源:parser.py


示例5: parseProductPage

def parseProductPage(product, need_img_urls=False):
    """进入商品详情页, 抓取四个新字段
       delivery reviews star total_sales
    """
    if product['product_url']:
       content = fetchContent(product['product_url'], False)
       doc=PyQuery(content)
       #product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
       product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
       product['star'] = doc('p.star-level > i').attr("class")
       product['total_sales'] = doc('p.bargain-number > a > em.value').text()
       if need_img_urls:
           url_list = get_img_urls(content)
           product['img_urls'] = ', '.join(url_list)
       else:
           product['img_urls'] = ''
       product['color'], product['size'] = '', ''
       for index, td in enumerate(doc('div.obj-content > table > tbody > tr > td')):
            tdQ = PyQuery(td)
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'颜色':
                product['color'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'尺寸':
                product['size'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
       product['MOQ'] = extractNum(doc('tr.amount > td.ladder-1-1 > span.value').text().replace(u"≥", ""))
       if not product['MOQ'] or product['MOQ'] == 0:
           product['MOQ'] = extractNum(PyQuery(doc('tr.amount').remove('td.amount-title').children('td').eq(0))('span.value').text())
       if product['MOQ'] == 1:
           #print product['product_url']
           product['sku_size'] = PyQuery(doc('div.unit-detail-spec-operator').eq(0))('span.text').text()
           product['sku_color'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.name').text()
           product['sku_price'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.price').text()
           product['sku_amount'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.count > span > em.value').text()
           print product['sku_id'], '\t', product['sku_size'], "\t", product['sku_color'], "\t", product['sku_price'], "\t", product['sku_amount']
    return product
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:34,代码来源:products.py


示例6: crawl_1688_category_tree

def crawl_1688_category_tree(wb):
    #fr = open("C:users/chenweiqiang/desktop/ye.html", "r") #PyQuery之后取不出来元素
    h = httplib2.Http()
    response, content = h.request("https://ye.1688.com/")
#     fw = open("C:users/chenweiqiang/desktop/ye2.html", "w")
#     fw.write(content)
#     fw.close()
    ws = wb.add_sheet("ye.1688品类树")
    ws.write(0,0,"一级品类")
    ws.write(0,1,"二级品类")
    ws.write(0,2,"三级品类")
    row = 0
    doc = PyQuery(content)
    level1NodeList = doc("li.cat-box")
    for level1Node in level1NodeList:
        level1NodeQ = PyQuery(level1Node)
        level1_category = level1NodeQ('div.cat-main').text().replace(' ', '')
        level2NodeList = level1NodeQ('div.cat-sub-col > dl') # 多余div[class="cat-sub "] > 
        for level2Node in level2NodeList:
            level2NodeQ = PyQuery(level2Node)
            level2_category = level2NodeQ('dt > a').text()
            level3NodeList = level2NodeQ('dd.cat-list > ul > li > a')
            for level3Node in level3NodeList:
                level3NodeQ = PyQuery(level3Node)
                level3_category = level3NodeQ.text()
                row += 1
                ws.write(row, 0, level1_category)
                ws.write(row, 1, level2_category)
                ws.write(row, 2, level3_category)
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:29,代码来源:crawl_categoryTree.py


示例7: list_page

 def list_page(self, response):
     result_content = {}
 
     content_iter = re.finditer(r"STK && STK.pageletM && STK.pageletM.view\((?P<content>\{.*?\})\)", response.content)
     for iter in content_iter:
         ok, content = safe_loads(iter.groupdict()['content'])
         if ok and "pl_weibo_direct" == content.get("pid"):
             result_content = content
             break
     else:
         return {}
     
     pyquery_doc = PyQuery(result_content["html"])
     pyquery_doc.make_links_absolute(response.url)
     
     items = []
     for item in pyquery_doc("DIV.feed_lists>DIV.WB_cardwrap>DIV").items():
         weibo_href = item("DIV.content>DIV.feed_from>A").attr.href
         if weibo_href:
             weibo_pics = []
             for pic in item("DIV.feed_content DIV.media_box IMG").items():
                 weibo_pics.append(pic.attr.src)
                 
             data = {
                 "content": item("DIV.feed_content P.comment_txt").text(),
                 "nickname": item("DIV.feed_content A.W_texta").attr.title,
                 "href": weibo_href,
                 "quote_nickname": item("DIV.feed_content DIV.comment DIV.comment_info A.W_texta").attr.title,
                 "quote_content": item("DIV.feed_content DIV.comment DIV.comment_info P.comment_txt").text(),
                 "pics": ''.join(weibo_pics)
             }
             self.crawl("data:,%s" % weibo_href, callback = self.detail_page, data_fetch_content=data)
开发者ID:jttoday,项目名称:spider,代码行数:32,代码来源:weibo_weixin.py


示例8: get_bounds

def get_bounds(scene_name):
    """Use Earth Explorer metadata to get bounds of a Scene"""
    url_code = get_metadata_code(scene_name)

    metadata = PyQuery(
        'http://earthexplorer.usgs.gov/fgdc/%s/%s/' % (url_code, scene_name)
        )
    metadata = metadata.text()[
        metadata.text().find('G-Ring_Latitude:'):
        metadata.text().find('\n  Keywords:')
        ]
    coords = (
        metadata.replace(' ', '')
        .replace('G-Ring_Latitude:', '')
        .replace('G-Ring_Longitude:', '')
        .split('\n')
        )
    coords = [float(coord) for coord in coords if coord != '']
    # create a list of lists with the coordinates
    coords = [coords[i:i + 2] for i in range(0, len(coords), 2)]
    # use reverse() to change [lat, lon] to [lon, lat]
    [coord.reverse() for coord in coords]
    # repeat the first coordinate on the end of the list
    if coords[0] != coords[-1]:
        coords.append(coords[0])
    return coords
开发者ID:ibamacsr,项目名称:imagery,代码行数:26,代码来源:utils.py


示例9: update_forums

    def update_forums(client, group, session):
        logging.info("Updating forums list for {}".format(group))
        query = Forum.get_forum_page(client, group.gid)
        reg = regex.compile(r"^forum\.php\?mod=forumdisplay&fid=(\d+)$")

        for row in query.find("table.fl_tb>tr"):
            sub_query = PyQuery(row)
            href = sub_query.find("td").eq(1).find("a").attr("href")
            if not href:
                continue

            fid = int(reg.findall(href)[0])

            name = sub_query.find("td").eq(1).find("h2>a").clone().children().remove().end().text()
            last_update = sub_query.find("td").eq(3).find("div>cite").clone().children().remove().end().text()
            last_update = dateparse(last_update)

            existence = session.query(Forum).filter(Forum.fid == fid)
            if existence.count() == 0:
                logging.info("<Forum(fid={})> not found, creating one".format(fid))
                forum = Forum(fid=fid, name=name, updated_at=last_update, group=group, fresh=False)
                session.add(forum)
            else:
                forum = existence.one()
                if forum.updated_at != last_update:
                    logging.info("{} found, stale: against {} ".format(forum, last_update))
                    forum.updated_at = last_update
                    forum.fresh = False
                    session.add(forum)
                else:
                    logging.info("{} found, fresh".format(forum))
开发者ID:doomsplayer,项目名称:dandao-spider,代码行数:31,代码来源:forum.py


示例10: _enhance_text

    def _enhance_text(self):
        """
        Transforms a simplified text into a valid mail.template text.
        :return: mail.template text
        """
        self.ensure_one()
        # Parse and set back the keywords into raw template code
        html_text = PyQuery(self.simplified_text.replace('\n', ''))

        def sort_keywords(kw):
            # Replace first if/for-clauses, then var, then code
            index = kw.position
            if kw.type == 'if' or 'for' in kw.type:
                index += 2*len(self.body_html) * kw.nested_position
                # Take if and for in the appearing order in the text
                index -= kw.position
            elif kw.type == 'var':
                index += len(self.body_html)
            return index

        keywords = self.keyword_ids.sorted(sort_keywords, reverse=True)
        # Replace automatic-generated keywords
        for keyword in keywords:
            keyword_text = html_text('#' + keyword.html_id)
            keyword_text.replace_with(keyword.final_text)

        # Replace user added keywords
        template_text = html_text.html()
        for keyword in keywords.filtered(lambda k: k.type == 'code'):
            to_replace = u"[{}]".format(keyword.short_code)
            template_text = template_text.replace(to_replace, keyword.raw_code)
        final_text = PyQuery(BeautifulSoup(template_text).prettify())
        return final_text('body').html()
开发者ID:maxime-beck,项目名称:compassion-modules,代码行数:33,代码来源:communication_revision.py


示例11: __extract

    def __extract(self, html):
        pq = PyQuery(html).find("main#main #mainArea table")

        selector_ = "thead tr:eq(0) th"
        date_order = [PyQuery(v).text().split('\n')[0] for v in PyQuery(pq).find(selector_)][3:]
        result = {d: {} for d in date_order}

        index = 0
        total = len(PyQuery(pq).find("tbody tr"))
        while index < total:
            td = PyQuery(pq).find("tbody tr:eq(%d) td:eq(0)" % index)

            room_type = td.text().split()[0]
            rowspan = int(td.attr('rowspan'))

            for i in xrange(index, index + rowspan):
                row = PyQuery(pq).find("tbody tr:eq(%d)" % i)

                # smoking or not
                smoking = PyQuery(row).find("td.alC.alM > img").attr("alt")

                room = "%s (%s)" % (room_type, smoking)

                if row.hasClass('clubCardCell'):
                    member_type = 'member'
                else:
                    member_type = 'guest'

                for i, v in enumerate(self.__extract_price_remain(row)):
                    if room not in result[date_order[i]]:
                        result[date_order[i]][room] = {}
                    result[date_order[i]][room][member_type] = v

            index += rowspan
        return result
开发者ID:mkfsn,项目名称:ToyokoInn,代码行数:35,代码来源:ToyokoInn.py


示例12: get_meme_url

def get_meme_url(meme):
    gen = GENERATORS.get(meme)
    if gen:
        pq = PyQuery(url="http://memegenerator.net/%s" % gen[2])
        return pq.find('a img.large').attr('src')
    else:
        return None
开发者ID:ojii,项目名称:memepy,代码行数:7,代码来源:meme.py


示例13: download

def download(threadUrl):
    """
    """
    d = PyQuery(url=threadUrl, parser='soup')
    links = d('a[href^="job.php?action=download&aid="]')

    # 获取 verify 的值
    tmp = d('script:contains("var verifyhash =")').text()
    verify = re.search(r"var verifyhash = '(.*?)'", tmp).group(1)

    total = len(links)
    d.make_links_absolute()
    for i, e in enumerate(links.items(), start=1):
        filename = e.text()
        print('%s/%s %s' % (i, total, filename))

        if not os.path.exists(os.path.join(SAVE_PATH, filename)):
            params = urlencode(
                {'check': 1, 'verify': verify, 'nowtime': int(time.time() * 1000)})
            url = '%s?%s' % (e.attr['href'], params)

            print('  fetch: ' + url)
            downDoc = PyQuery(url, headers=headers)
            # 第0个是电信下载点,第1个是移动下载点
            downUrl = BASE_URL + downDoc('a[href^="remotedown.php"]').eq(1).attr('href')
            addToIDM(downUrl, SAVE_PATH, filename)
            time.sleep(1.5)

    wefiler_urls = checkWefiler(d)
    if wefiler_urls:
        print(wefiler_urls)
开发者ID:fishlee,项目名称:DownloadHelpers,代码行数:31,代码来源:weiphone.py


示例14: _split

def _split(inputfile, outputdir):
    source = open(inputfile, 'r')
    html = source.read()
    source.close()

    if not os.path.isdir(outputdir):
        os.mkdir(outputdir)

    idx_slide=0
    idx_section=0

    parsed = PyQuery(html)
    
    for section in parsed('section'):
        slide = PyQuery(section)        
        if slide.has_class('stack'):
            idx_section+=1
            stack_path = os.path.join(outputdir,'%02d' % idx_section )
            os.mkdir(stack_path)
            for sub_slide in PyQuery(slide.html())('section'):
                idx_slide+=1
                _dump_slide(sub_slide, idx_slide, stack_path)
        else: 
            if not slide.parent().has_class('stack'):
                idx_slide+=1
                _dump_slide(slide, idx_slide, outputdir)                    
开发者ID:tiry,项目名称:reveal-js-tools,代码行数:26,代码来源:split.py


示例15: _parse_table

    def _parse_table(self, table):

        # Initialize table
        parsed_rows = []

        # Parse table
        qtable = PyQuery(table)

        # Get headers
        headers = self._get_headers(qtable)
        if not headers:
            return

        # Get rows
        rows = qtable.find("tr")

        # Loop over rows
        for row in rows:

            # Get columns
            qrow = PyQuery(row)
            cols = qrow.find("td").map(self._get_text)[:]

            # Parse column values
            for colidx in range(len(cols)):
                col = reduce(lambda x, y: re.sub(y[0], y[1], x), self._trans, cols[colidx])
                cols[colidx] = col

            # Append parsed columns
            if cols:
                parsed_rows.append(cols)

        return {"headers": headers, "data": parsed_rows}
开发者ID:jmcarp,项目名称:sciscrape,代码行数:33,代码来源:sciparse.py


示例16: station_parse

def station_parse(content):
    '''Parsing bus station and check station.
    '''
    OFF = '0-0'
    stations = []
    bus_status = []
    content = json.loads(content[3:].decode('utf-8'))
    status = content['status']
    info = content['info']
    if status == 1 and info != '':
        pqContent = PyQuery(info)('#upInfo li')
        for station in pqContent:
            pqStation = PyQuery(station)
            station_name = pqStation('.station').text()
            stations.append(station_name)
            buses = pqStation.find('.bus')
            if buses.size() > 0:
                left_count = 0
                on_station_count = 0
                for bus in buses:
                    if PyQuery(bus).attr('style'):
                        left_count+=1
                    else:
                        on_station_count+=1
                bus_status.append('{0}-{1}'.format(on_station_count, left_count))
            else:
                bus_status.append(OFF)
    if not stations:
        return None

    return (tuple(bus_status), tuple(stations))
开发者ID:ekeyme,项目名称:BusInTime,代码行数:31,代码来源:test_sample1.py


示例17: get_forums

def get_forums():
    logging.info('Récupération des forums')
    if config.debug:
        progress = progressbar.NoProgressBar()
    else:
        progress = progressbar.ProgressBar(widgets=[progressbar.SimpleProgress('/'), ' ', progressbar.Bar("#","[","]"), progressbar.Percentage()])

    d = PyQuery(url=config.rooturl + '/a-f1/', opener=fa_opener)
    
    save.forums = []
    levels = {}
    n = 1

    for i in progress([i for i in d.find("select option") if i.get("value", "-1") != "-1"]):
        id = i.get("value", "-1")
        logging.debug('Récupération: forum %s', id)
        title = re.search('(((\||\xa0)(\xa0\xa0\xa0))*)\|--([^<]+)', i.text).group(5)
        level = len(re.findall('(\||\xa0)\xa0\xa0\xa0', i.text))
        
        if level <= 0:
            parent = 0
        else:
            parent = levels[level-1]
        
        levels[level] = n
        
        d = PyQuery(url=config.rooturl+'/admin/index.forum?part=general&sub=general&mode=edit&fid=' + id + '&extended_admin=1&' + tid, opener=fa_opener)
        try:
            description = d("textarea").text()
        except:
            description = ""
        
        save.forums.append({'id': int(id[1:]), 'newid': n, 'type': id[0], 'parent': parent, 'title': title, 'description': description, 'parsed': False})
        n += 1
开发者ID:BossBravo,项目名称:Lalf-Forumactif,代码行数:34,代码来源:forumactif-phpbb.py


示例18: crawl_vvic_category_tree

def crawl_vvic_category_tree(wb):
    h = httplib2.Http()
    response, content = h.request("http://www.vvic.com/")
#     fw = open("C:users/chenweiqiang/desktop/vvic2.html", "w")
#     fw.write(content)
#     fw.close()
    ws = wb.add_sheet("vvic品类树")
    ws.write(0,0,"一级品类")
    ws.write(0,1,"二级品类")
    ws.write(0,2,"三级品类")
    row = 0
    doc = PyQuery(content)
    level1NodeList = doc("div.dd-inner > div.item")
    anotherLevel1NodeList = [doc('div.sub-items')[0], doc('div.sub-items')[1], doc('div.sub-items')[2], doc('div.sub-items')[5]]
    for index, level1Node in enumerate(level1NodeList):
        level1_category = PyQuery(level1Node)('h3 > a').text()
        level2NodeList = PyQuery(anotherLevel1NodeList[index]).children('dl')
        for level2Node in level2NodeList:
            level2NodeQ = PyQuery(level2Node)
            level2_category = level2NodeQ.children('dt > a').text()
            level3NodeList = level2NodeQ.children('dd > a')
            for level3Node in level3NodeList:
                level3_category = PyQuery(level3Node).text()
                row += 1
                ws.write(row, 0, level1_category)
                ws.write(row, 1, level2_category)
                ws.write(row, 2, level3_category)
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:27,代码来源:crawl_categoryTree.py


示例19: get_smileys

def get_smileys():
    global n
    logging.info('Récupération des émoticones')

    n = 0

    d = PyQuery(url=config.rooturl+'/admin/index.forum?part=themes&sub=avatars&mode=smilies&extended_admin=1&' + tid, opener=fa_opener)
    result = re.search('function do_pagination_start\(\)[^\}]*start = \(start > \d+\) \? (\d+) : start;[^\}]*start = \(start - 1\) \* (\d+);[^\}]*\}', d.text())

    try:
        pages = int(result.group(1))
        usersperpages = int(result.group(2))
    except:
        pages = 1
        usersperpages = 0
        
    if config.debug:
        progress = progressbar.NoProgressBar()
    else:
        progress = progressbar.ProgressBar(widgets=[BarVar(), ' ', progressbar.Bar("#","[","]"), progressbar.Percentage()], maxval=pages-1)
    progress.start()

    for page in range(0,pages):
        if page >= 1:
            d = PyQuery(url=config.rooturl + '/admin/index.forum?part=themes&sub=avatars&mode=smilies&extended_admin=1&start=' + str(page*usersperpages) + '&' + tid, opener=fa_opener)
        
        for i in d('table tr'):
            e = PyQuery(i)
            if e("td").eq(0).text() != None and e("td").eq(0).attr("colspan") == None:
                save.smileys[e("td").eq(0).text()] = e("td").eq(1).text()
                n += 1
        progress.update(page)

    progress.end()
开发者ID:BossBravo,项目名称:Lalf-Forumactif,代码行数:34,代码来源:forumactif-phpbb.py


示例20: extract_sample_and_codes_using_css

def extract_sample_and_codes_using_css(doc, sample_css_class):
    sample_columns = {}
    variable_column = None

    sample_columns = {}
    code_column = None
    label_column = None
    table = doc('#dataTable > .variablesList')
    for i, th in enumerate(table('tr.fullHeader:first > th')):
        th = PyQuery(th)
        if code_column is None and th.hasClass('codesColumn'):
            code_column = i
        elif label_column is None and th.hasClass('labelColumn'):
            label_column = i
        elif th.hasClass(sample_css_class):
            sample_columns[i] = th.text()
            
    # Extract actual values for the variables.
    variable_info = []
    for row in table('tr.variables'):
        columns = PyQuery(row)('td')
        code = PyQuery(columns[code_column]).text().strip()
        label = PyQuery(columns[label_column]).text().strip()
        availability = [smpl.strip()
                        for i, smpl in sample_columns.items() 
                        if PyQuery(columns[i]).text().strip() != '.']
        variable_info.append({'code': code,
                              'label': label,
                              'availability': availability})

    return variable_info, len(sample_columns)
开发者ID:americanist,项目名称:IPUMS_Codebooks,代码行数:31,代码来源:build_codebooks.py



注:本文中的pyquery.PyQuery类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python pyquery.pq函数代码示例发布时间:2022-05-27
下一篇:
Python pyquery.pyq函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap