• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python spider.log_with_time函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中spider.log_with_time函数的典型用法代码示例。如果您正苦于以下问题:Python log_with_time函数的具体用法?Python log_with_time怎么用?Python log_with_time使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了log_with_time函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: stock1_parser

def stock1_parser(task, rule):
    try:
        j = demjson.decode(task['text'])
    except:
        log_with_time("bad response: %r"%task['url'])
        return []
    code = j['code']
    message = j['message']

    url = ""
    ret = {"spider":[], 'stock2':[]}

    if code == 3 and message:
        try:
            skuid = re.search("\d+", message).group()
            url = surl2(task['gid'], skuid)
        except:
            return []
    if url == "":
        #print(task['text'])
        stock = 1 if j.get('totalAmount') else 0
        ret['spider'] = format_price([(itemurl+task['gid'], task['price'], stock)])
    else:
        ret['stock2'] = [(url, task['gid'], task['price'])]

    return ret
开发者ID:haidao-git19,项目名称:tlf,代码行数:26,代码来源:parser.py


示例2: list_parser

def list_parser(task, rule):
    t = etree.HTML(task["text"]) 
    nodes = t.xpath(rule["node"]) 
    if not nodes:
        log_with_time("node rule error: %s" % task["url"])
        return 
    dp = []
    dps = {}
    ret = [] 
    now = int(time.time()) 
    for node in nodes:
        link = node.xpath(rule["link"])
        gid = node.xpath(rule["gid"]) 
        if not link or not gid:
            log_with_time("rule error: %s" % task["url"])
            continue 
        gid = gid[0]
        dp.append((link[0], ""))
        ret.append(gid)
        dps[gid] = now 
    return {
            "dps_log": dps,
            "dp": dp,
            "price": ret,
            } 
开发者ID:haidao-git19,项目名称:tlf,代码行数:25,代码来源:parser.py


示例3: cats_parser

def cats_parser(url, res,  rule):
    content = res['text']
    t = etree.HTML(content)
    ret = set()
    items = t.xpath(rule)
    for v in items:
        #pdb.set_trace()
        if '/c0-0/' in v:
            continue
        if '/ctg/s2/' in v:
            r = "(?<=/ctg/s2/).+"
            cat = re.search(r, v)
            if not cat:
                log_with_time("bad regex: %r %r" % (r, v))
                continue
            cat = cat.group().split('-')[0]
            ret.add(ctgurl % cat)
        elif 'list.yhd.com' in v:
            # http://list.yhd.com/.../
            r = "(?<=yhd\.com\/).+"
            cat = re.search(r, v)
            if not cat:
                log_with_time("bad regex: %r %r" % (r, v))
                continue
            cat = cat.group().split('-')[0]
            ret.add(lsturl % cat)
    return ret
开发者ID:haidao-git19,项目名称:tlf,代码行数:27,代码来源:parser.py


示例4: rt_parser

def rt_parser(items): 
    pids = get_pids(items)
    if not pids:
        log_with_time("got nothing: %s" % entries)
        return
    purl = price_url % (",".join(["J_" + i for i in pids]), 
            random.randint(1000000, 10000000), int(time.time() * 1000)) 
    surl = stock_url % (async_http.quote(",".join([i for i in pids])), 
            random.randint(1000000, 10000000), int(time.time() * 1000)) 

    price_res = simple_http.get(purl) 
    stock_res = simple_http.get(surl)
    if price_res["status"] != 200 or stock_res["status"] != 200:
        log_with_time("not200: %s" % price["res"])
        return
    try:
        price_json = jsonp_json(price_res["text"]) 
        stock_json = jsonp_json(stock_res["text"].decode("gbk"))
    except: 
        traceback.print_exc()
        return
    prices = {} 
    for i in price_json: 
        prices[i["id"].split("_")[1]] = i["p"]
    stocks = {} 
    for k,v in stock_json.items(): 
        s = v["StockStateName"]
        if u"有货" in s or u"现货" in s:
            stocks[k] = 1
        else:
            stocks[k] = 0 
    ret = []
    for pid in prices:
        ret.append((str(pid), str(prices[pid]), stocks[pid])) 
    return format_price(ret)
开发者ID:haidao-git19,项目名称:tlf,代码行数:35,代码来源:parser.py


示例5: dp_parser

def dp_parser(task, rule): 
    desc_url = re.findall("desc: '(http.*?desc/[0-9]+)'", task["text"]) 
    if not desc_url:
        log_with_time("no desc: %s" % task["url"])
        return
    crc = urlcrc.get_urlcrc(3, task["url"])
    return [(desc_url[0], str(crc), "")] 
开发者ID:haidao-git19,项目名称:tlf,代码行数:7,代码来源:parser.py


示例6: list_parser

def list_parser(task, rule):
    t = etree.HTML(task['text'])
    nodes = t.xpath(rule['nodes'])
    prices = []
    items = []
    dps = {}
    #pdb.set_trace()
    for node in nodes:
        gid = node.attrib['itemid']
        buyinfo = node.xpath(rule['buyinfo'])
        if not gid:
            log_with_time("bad response: %r"%task['url'])
            continue
        if buyinfo:
            buyinfo = buyinfo[0]
            buycart = buyinfo.xpath(rule['buycart'])
            stock = 1
            if not buycart:
                if buyinfo.xpath(rule['sellout']) or not node.xpath(rule['comment']):
                    stock = 0
            prices.append((gid, stock))
        else:
            items.append(gid)
        dps[gid] = int(time.time())
    return {"prices": prices, "items": items, "dps": dps}
开发者ID:haidao-git19,项目名称:tlf,代码行数:25,代码来源:parser.py


示例7: extract_book

def extract_book(url, tree, rule): 
    result = []
    dps = [] 
    now = int(time.time())
    dps_log = {} 
    nodes = tree.xpath(rule["book_node"])
    comments = {}
    lid = re.search("\d+", url.split('-')[-1]).group()
    for node in nodes:
        link_node = node.xpath(rule["book_title"]) 
        stock = node.xpath(rule["book_stock"]) 
        comment = node.xpath(rule["book_comment"])
        if not link_node or not stock: 
            log_with_time("rule error: %s" % url)
            continue 
        link_node = link_node[0]
        link = link_node.attrib["href"]
        gid = re_gid.search(link).group()
        comments[gid] = comment[0]
        title = link_node.text
        if u"有货" in stock[0]:
            s = 1
        else:
            s = 0 
        dps_log[gid] = now
        dps.append((link, gid, title))
        result.append((link, gid, lid, s)) 
    return {
            "book_price": result,
            #"dp": dps,
            "dps_log": dps_log,
            "comment": comments
            } 
开发者ID:haidao-git19,项目名称:tlf,代码行数:33,代码来源:parser.py


示例8: fix_url

def fix_url(url):
    if "tuan" in url:
        log_with_time("skip url: %s" % url)
        return
    x = re.findall("/([0-9\-]+)\.", url)
    if not x:
        return
    return base + ",".join(x[0].split("-"))
开发者ID:haidao-git19,项目名称:tlf,代码行数:8,代码来源:parser.py


示例9: book_price

def book_price(task, rule):
    try:
        j = json.loads(task['text'])
        price = j['price'][0]['proPrice'] #if j['price'] else 0
    except:
        log_with_time("bad response: %s" % task['link'])
        return 
    return format_price([[str(task['qid']), str(price), task['stock']]])
开发者ID:haidao-git19,项目名称:tlf,代码行数:8,代码来源:parser.py


示例10: price_parser

def price_parser(task, rule):
    try:
        price = re.search("(?<=price\:)\d+\.\d+(?=\,)", task['text']).group()
    except:
        log_with_time("bad response: %r"%task['url'])
        return []
    ret = [(task['gid'], price, task['stock'])]
    fret = format_price(ret)
    return fret
开发者ID:haidao-git19,项目名称:tlf,代码行数:9,代码来源:parser.py


示例11: item_parser

def item_parser(task, rule):
    try:
        t = etree.HTML(task['text'])
        btn = t.xpath(rule)[0]
        stock = 0 if btn.attrib.get('disabled') else 1
    except:
        log_with_time("bad response: %s"%task['url'])
        return
    return [(task['gid'], stock)]
开发者ID:haidao-git19,项目名称:tlf,代码行数:9,代码来源:parser.py


示例12: price_parser

def price_parser(task, rule):
    try:
        items = jsonp_json(task["text"])
    except ValueError as e:
        log_with_time("price_parser: jsonp_json: %s" % task["text"])
        return
    d = {}
    for item in items:
        d[item["id"].split("_")[1]] =  item["p"]
    return [d]
开发者ID:haidao-git19,项目名称:tlf,代码行数:10,代码来源:parser.py


示例13: pager

def pager(task, rule):
    j = json.loads(task['text'])
    if not 'gpagecount' in j:
        log_with_time("bad response %r"%task['url'])
        return []
    code = re.search("(?<=code=)\d+(?=&)", task['url']).group()
    ret = []
    for i in range(1, j['gpagecount']+1):
        ret.append(gurl%(code,i))
    return ret
开发者ID:haidao-git19,项目名称:tlf,代码行数:10,代码来源:parser.py


示例14: stock_parser

def stock_parser(task, rule):
    try:
        j = json.loads(task['text'])
        stock = 1 if j['havestock'] in ("true", "realstock") else 0
    except:
        log_with_time("bad response %s"%task['url'])
        return

    ret = [(itemurl % task['info'][0], str(task['info'][1]), stock)]
    fret = format_price(ret)
    return fret
开发者ID:haidao-git19,项目名称:tlf,代码行数:11,代码来源:parser.py


示例15: cats

def cats(url, res, rule):
    content = res["text"]
    try:
        t = etree.HTML(content)
    except:
        log_with_time("bad response %s" % content.decode("utf-8", "replace"))
        return
    ret = []
    for i in t.xpath(rule):
        ret.append(yougou + i)
    return ret
开发者ID:muchrooms,项目名称:tlf,代码行数:11,代码来源:parser.py


示例16: checkoffline

def checkoffline(task, rule): 
    try:
        j = json.loads(task['text'])
        j = j['items']
    except:
        log_with_time("bad response %s"%task['url'])
        return
    ret = []
    for k,v in j.items():
        if not v['is_found']:
            ret.append((str(k), str(-1), -1))
    fret = format_price(ret)
    return fret
开发者ID:haidao-git19,项目名称:tlf,代码行数:13,代码来源:parser.py


示例17: meizhuang_cats_parser

def meizhuang_cats_parser(url, content, rule): 
    t = etree.HTML(content) 
    ret = []
    for node in t.xpath(rule[0]):
        #link
        link = node.xpath(rule[1])
        #price
        price = node.xpath(rule[2]) 
        if not link or not price:
            log_with_time("rule error: %s" % url)
        ret.append((link[0], price[0], 1))
    result = format_price(ret)
    return result
开发者ID:haidao-git19,项目名称:tlf,代码行数:13,代码来源:parser.py


示例18: list_parser

def list_parser(task): 
    t = etree.HTML(task["recv"].getvalue())
    nodes = t.xpath(task["rule"]["rule"])
    ret = []
    for node in nodes:
        link = node.xpath("div/div[@class = 'proTit']/a/@href") 
        price = node.xpath("div/div[@class = 'proPrice']/text()") 
        if not link or not price:
            log_with_time("rule error: %s" % task["old_url"])
            continue
        p = fix_price(price[0]) 
        ret.append((link[0], p, 1)) 
    result = format_price(ret)
    return result 
开发者ID:haidao-git19,项目名称:tlf,代码行数:14,代码来源:list.py


示例19: promo_filter

def promo_filter(item): 
    url, sku = item 
    parts =re.findall("/([A-Za-z-0-9]+)\.h", url)
    if not parts:
        log_with_time("url rule error: %s" % url)
    pid, sid = parts[0].split("-") 
    #if "A" in url:
    #    goodsNo = re.findall("([0-9]+)\.html", url)[0]
    #else:
    #    goodsNo = sku
    p = promo_url.format(time = int(time.time() * 1000), goodsNo = sku, sid = sid,  pid = pid)
    return {
            "url": p, 
            "old": url
            }
开发者ID:haidao-git19,项目名称:tlf,代码行数:15,代码来源:parser.py



注:本文中的spider.log_with_time函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python spider.Spider类代码示例发布时间:2022-05-27
下一篇:
Python spi.transfer函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap