本文整理汇总了Python中spider.log_with_time函数的典型用法代码示例。如果您正苦于以下问题:Python log_with_time函数的具体用法?Python log_with_time怎么用?Python log_with_time使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了log_with_time函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: stock1_parser
def stock1_parser(task, rule):
try:
j = demjson.decode(task['text'])
except:
log_with_time("bad response: %r"%task['url'])
return []
code = j['code']
message = j['message']
url = ""
ret = {"spider":[], 'stock2':[]}
if code == 3 and message:
try:
skuid = re.search("\d+", message).group()
url = surl2(task['gid'], skuid)
except:
return []
if url == "":
#print(task['text'])
stock = 1 if j.get('totalAmount') else 0
ret['spider'] = format_price([(itemurl+task['gid'], task['price'], stock)])
else:
ret['stock2'] = [(url, task['gid'], task['price'])]
return ret
开发者ID:haidao-git19,项目名称:tlf,代码行数:26,代码来源:parser.py
示例2: list_parser
def list_parser(task, rule):
t = etree.HTML(task["text"])
nodes = t.xpath(rule["node"])
if not nodes:
log_with_time("node rule error: %s" % task["url"])
return
dp = []
dps = {}
ret = []
now = int(time.time())
for node in nodes:
link = node.xpath(rule["link"])
gid = node.xpath(rule["gid"])
if not link or not gid:
log_with_time("rule error: %s" % task["url"])
continue
gid = gid[0]
dp.append((link[0], ""))
ret.append(gid)
dps[gid] = now
return {
"dps_log": dps,
"dp": dp,
"price": ret,
}
开发者ID:haidao-git19,项目名称:tlf,代码行数:25,代码来源:parser.py
示例3: cats_parser
def cats_parser(url, res, rule):
content = res['text']
t = etree.HTML(content)
ret = set()
items = t.xpath(rule)
for v in items:
#pdb.set_trace()
if '/c0-0/' in v:
continue
if '/ctg/s2/' in v:
r = "(?<=/ctg/s2/).+"
cat = re.search(r, v)
if not cat:
log_with_time("bad regex: %r %r" % (r, v))
continue
cat = cat.group().split('-')[0]
ret.add(ctgurl % cat)
elif 'list.yhd.com' in v:
# http://list.yhd.com/.../
r = "(?<=yhd\.com\/).+"
cat = re.search(r, v)
if not cat:
log_with_time("bad regex: %r %r" % (r, v))
continue
cat = cat.group().split('-')[0]
ret.add(lsturl % cat)
return ret
开发者ID:haidao-git19,项目名称:tlf,代码行数:27,代码来源:parser.py
示例4: rt_parser
def rt_parser(items):
pids = get_pids(items)
if not pids:
log_with_time("got nothing: %s" % entries)
return
purl = price_url % (",".join(["J_" + i for i in pids]),
random.randint(1000000, 10000000), int(time.time() * 1000))
surl = stock_url % (async_http.quote(",".join([i for i in pids])),
random.randint(1000000, 10000000), int(time.time() * 1000))
price_res = simple_http.get(purl)
stock_res = simple_http.get(surl)
if price_res["status"] != 200 or stock_res["status"] != 200:
log_with_time("not200: %s" % price["res"])
return
try:
price_json = jsonp_json(price_res["text"])
stock_json = jsonp_json(stock_res["text"].decode("gbk"))
except:
traceback.print_exc()
return
prices = {}
for i in price_json:
prices[i["id"].split("_")[1]] = i["p"]
stocks = {}
for k,v in stock_json.items():
s = v["StockStateName"]
if u"有货" in s or u"现货" in s:
stocks[k] = 1
else:
stocks[k] = 0
ret = []
for pid in prices:
ret.append((str(pid), str(prices[pid]), stocks[pid]))
return format_price(ret)
开发者ID:haidao-git19,项目名称:tlf,代码行数:35,代码来源:parser.py
示例5: dp_parser
def dp_parser(task, rule):
desc_url = re.findall("desc: '(http.*?desc/[0-9]+)'", task["text"])
if not desc_url:
log_with_time("no desc: %s" % task["url"])
return
crc = urlcrc.get_urlcrc(3, task["url"])
return [(desc_url[0], str(crc), "")]
开发者ID:haidao-git19,项目名称:tlf,代码行数:7,代码来源:parser.py
示例6: list_parser
def list_parser(task, rule):
t = etree.HTML(task['text'])
nodes = t.xpath(rule['nodes'])
prices = []
items = []
dps = {}
#pdb.set_trace()
for node in nodes:
gid = node.attrib['itemid']
buyinfo = node.xpath(rule['buyinfo'])
if not gid:
log_with_time("bad response: %r"%task['url'])
continue
if buyinfo:
buyinfo = buyinfo[0]
buycart = buyinfo.xpath(rule['buycart'])
stock = 1
if not buycart:
if buyinfo.xpath(rule['sellout']) or not node.xpath(rule['comment']):
stock = 0
prices.append((gid, stock))
else:
items.append(gid)
dps[gid] = int(time.time())
return {"prices": prices, "items": items, "dps": dps}
开发者ID:haidao-git19,项目名称:tlf,代码行数:25,代码来源:parser.py
示例7: extract_book
def extract_book(url, tree, rule):
result = []
dps = []
now = int(time.time())
dps_log = {}
nodes = tree.xpath(rule["book_node"])
comments = {}
lid = re.search("\d+", url.split('-')[-1]).group()
for node in nodes:
link_node = node.xpath(rule["book_title"])
stock = node.xpath(rule["book_stock"])
comment = node.xpath(rule["book_comment"])
if not link_node or not stock:
log_with_time("rule error: %s" % url)
continue
link_node = link_node[0]
link = link_node.attrib["href"]
gid = re_gid.search(link).group()
comments[gid] = comment[0]
title = link_node.text
if u"有货" in stock[0]:
s = 1
else:
s = 0
dps_log[gid] = now
dps.append((link, gid, title))
result.append((link, gid, lid, s))
return {
"book_price": result,
#"dp": dps,
"dps_log": dps_log,
"comment": comments
}
开发者ID:haidao-git19,项目名称:tlf,代码行数:33,代码来源:parser.py
示例8: fix_url
def fix_url(url):
if "tuan" in url:
log_with_time("skip url: %s" % url)
return
x = re.findall("/([0-9\-]+)\.", url)
if not x:
return
return base + ",".join(x[0].split("-"))
开发者ID:haidao-git19,项目名称:tlf,代码行数:8,代码来源:parser.py
示例9: book_price
def book_price(task, rule):
try:
j = json.loads(task['text'])
price = j['price'][0]['proPrice'] #if j['price'] else 0
except:
log_with_time("bad response: %s" % task['link'])
return
return format_price([[str(task['qid']), str(price), task['stock']]])
开发者ID:haidao-git19,项目名称:tlf,代码行数:8,代码来源:parser.py
示例10: price_parser
def price_parser(task, rule):
try:
price = re.search("(?<=price\:)\d+\.\d+(?=\,)", task['text']).group()
except:
log_with_time("bad response: %r"%task['url'])
return []
ret = [(task['gid'], price, task['stock'])]
fret = format_price(ret)
return fret
开发者ID:haidao-git19,项目名称:tlf,代码行数:9,代码来源:parser.py
示例11: item_parser
def item_parser(task, rule):
try:
t = etree.HTML(task['text'])
btn = t.xpath(rule)[0]
stock = 0 if btn.attrib.get('disabled') else 1
except:
log_with_time("bad response: %s"%task['url'])
return
return [(task['gid'], stock)]
开发者ID:haidao-git19,项目名称:tlf,代码行数:9,代码来源:parser.py
示例12: price_parser
def price_parser(task, rule):
try:
items = jsonp_json(task["text"])
except ValueError as e:
log_with_time("price_parser: jsonp_json: %s" % task["text"])
return
d = {}
for item in items:
d[item["id"].split("_")[1]] = item["p"]
return [d]
开发者ID:haidao-git19,项目名称:tlf,代码行数:10,代码来源:parser.py
示例13: pager
def pager(task, rule):
j = json.loads(task['text'])
if not 'gpagecount' in j:
log_with_time("bad response %r"%task['url'])
return []
code = re.search("(?<=code=)\d+(?=&)", task['url']).group()
ret = []
for i in range(1, j['gpagecount']+1):
ret.append(gurl%(code,i))
return ret
开发者ID:haidao-git19,项目名称:tlf,代码行数:10,代码来源:parser.py
示例14: stock_parser
def stock_parser(task, rule):
try:
j = json.loads(task['text'])
stock = 1 if j['havestock'] in ("true", "realstock") else 0
except:
log_with_time("bad response %s"%task['url'])
return
ret = [(itemurl % task['info'][0], str(task['info'][1]), stock)]
fret = format_price(ret)
return fret
开发者ID:haidao-git19,项目名称:tlf,代码行数:11,代码来源:parser.py
示例15: cats
def cats(url, res, rule):
content = res["text"]
try:
t = etree.HTML(content)
except:
log_with_time("bad response %s" % content.decode("utf-8", "replace"))
return
ret = []
for i in t.xpath(rule):
ret.append(yougou + i)
return ret
开发者ID:muchrooms,项目名称:tlf,代码行数:11,代码来源:parser.py
示例16: checkoffline
def checkoffline(task, rule):
try:
j = json.loads(task['text'])
j = j['items']
except:
log_with_time("bad response %s"%task['url'])
return
ret = []
for k,v in j.items():
if not v['is_found']:
ret.append((str(k), str(-1), -1))
fret = format_price(ret)
return fret
开发者ID:haidao-git19,项目名称:tlf,代码行数:13,代码来源:parser.py
示例17: meizhuang_cats_parser
def meizhuang_cats_parser(url, content, rule):
t = etree.HTML(content)
ret = []
for node in t.xpath(rule[0]):
#link
link = node.xpath(rule[1])
#price
price = node.xpath(rule[2])
if not link or not price:
log_with_time("rule error: %s" % url)
ret.append((link[0], price[0], 1))
result = format_price(ret)
return result
开发者ID:haidao-git19,项目名称:tlf,代码行数:13,代码来源:parser.py
示例18: list_parser
def list_parser(task):
t = etree.HTML(task["recv"].getvalue())
nodes = t.xpath(task["rule"]["rule"])
ret = []
for node in nodes:
link = node.xpath("div/div[@class = 'proTit']/a/@href")
price = node.xpath("div/div[@class = 'proPrice']/text()")
if not link or not price:
log_with_time("rule error: %s" % task["old_url"])
continue
p = fix_price(price[0])
ret.append((link[0], p, 1))
result = format_price(ret)
return result
开发者ID:haidao-git19,项目名称:tlf,代码行数:14,代码来源:list.py
示例19: promo_filter
def promo_filter(item):
url, sku = item
parts =re.findall("/([A-Za-z-0-9]+)\.h", url)
if not parts:
log_with_time("url rule error: %s" % url)
pid, sid = parts[0].split("-")
#if "A" in url:
# goodsNo = re.findall("([0-9]+)\.html", url)[0]
#else:
# goodsNo = sku
p = promo_url.format(time = int(time.time() * 1000), goodsNo = sku, sid = sid, pid = pid)
return {
"url": p,
"old": url
}
开发者ID:haidao-git19,项目名称:tlf,代码行数:15,代码来源:parser.py
注:本文中的spider.log_with_time函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论