• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python db.handle_db函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中src.script.db.handle_db函数的典型用法代码示例。如果您正苦于以下问题:Python handle_db函数的具体用法?Python handle_db怎么用?Python handle_db使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了handle_db函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_apk

    def get_apk(self, response):
        topic_url = referer_url = response.request.meta['referer_url']
        update_date = time.strftime('%Y-m-%d %H:%M:%S')
        is_crawled = '1'
        priority_rating = '0'
        filename = ''.join([str(random.randrange(1,100000)), '.apk'])
#        if os.path.exists(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
#            os.mkdir(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
        #调用新建文件夹的方法
        down_dir = utils.make_spiderdir(self.platform, 'download')
        #解码:有的文件可能是utf-8编码,解为unicode
        try:
            filename = filename.decode('utf-8', 'ignore')
        except:
            pass
        filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename])
        open(filename, 'wb').write(response.body)
        #下载后在存数据库,确保准确性
        hashurl = sql.hash_topic(topic_url)
        updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
        filename = filename.replace('\\', '\\\\')
        insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename)
        status = handle_db(insert_sql)
        log.msg(str(status), log.DEBUG) 
        #更新topic数据库表
        insert_sql = sql.insert_topic_url(self.platform, topic_url,referer_url, updatedate, is_crawled, priority_rating)
        statusn = handle_db(insert_sql)
        log.msg(str(status), log.DEBUG)

        try:
            autocopy.copy(filename, self.platform)
            log.msg('copy job is successed', log.INFO)
        except:
            log.msg(str(traceback.print_exc()), log.ERROR)
            log.msg('copy job is failture', log.ERROR) 
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:35,代码来源:spider.py


示例2: reply_status

 def reply_status(self, response):
     #重新返回首页,对特别是需要回复的模块特别有用,而不需要回复的帖子缺对性能是个损失.
     #查询回复状态
     
     success = u'非常感谢,你的回复已经发布'.encode('gbk', 'ignore')
     status = re.findall(success, response.body)
     username_sql = sql.select_accountbyusername(self.username, self.platform)
     
     #回复数量限制
     failture = u'对不起,您所在的用户组每小时限制发帖 '.encode('gbk', 'ignore')
     failture_status = re.findall(failture, response.body, re.I)
     if failture_status:
         return
     if status:
         log.msg('reply success', log.INFO) 
         reply_nums = handle_db(username_sql)['data'][0]
         self.point_num = reply_nums[5]
         self.reply_num = reply_nums[-2]
         self.reply_num += 1
         self.point_num += 1
         #回复成功.账号reply_num+1,积分+1
         try:
             update_replynum = sql.update_account_username(self.username, self.platform, self.reply_num, self.point_num)
             update_success = handle_db(update_replynum)
             log.msg(('<username: %s \'s integral is : +1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO)
         except Exception, e:
             log.msg(str(traceback.print_exc()), log.ERROR)
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:27,代码来源:goreply.py


示例3: get_apk

    def get_apk(self, response):
        '''
                        将下载地址下载的包写入文件,并且
        '''
        filename = response.request.meta['filename']
        filename = ''.join([str(random.randrange(1,100000)), '.', filename])
#        if os.path.exists(''.join([os.getcwd(), os.sep, 'xda', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
#            os.mkdir(''.join([os.getcwd(), os.sep, 'xda', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
        #调用新建文件夹的方法
        down_dir = utils.make_spiderdir(self.name, 'download')
        #解码:有的文件可能是utf-8编码,解为unicode
        try:
            filename = filename.decode('utf-8', 'ignore')
        except:
            pass
        filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename])
        open(filename, 'wb').write(response.body)
        #下载后在存数据库,确保准确性
        topic_url = response.request.meta['topic_url']
        hashurl = sql.hash_topic(topic_url)
        updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
        filename = filename.replace('\\', '\\\\')
        insert_sql = sql.insert_softwareinfo(self.name, hashurl, topic_url, updatedate, filename)
        status = db.handle_db(insert_sql)
        log.msg(str(status), log.DEBUG) 
        #更新topic数据库表
        update_topic_url = sql.topicurl_withcrawed(topic_url)
        status = db.handle_db(update_topic_url)
        log.msg(str(status), log.DEBUG)
        #备份目录
        try:
            autocopy.copy(filename, self.name)
            log.msg('copy job is successed', log.INFO)
        except:
            log.msg('copy job is failture', log.ERROR)
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:35,代码来源:xda.py


示例4: parse_item

 def parse_item(self, response):
     #解析出需要下载的包
     log.msg(response.url, log.INFO)
     try:
         topic_url = response.request.meta['topic_url']
         if re.findall(u'指定的主题不存在或已被删除或正在被审核,请返回'.encode('gbk', 'ignore'), response.body, re.I):
             #执行更新topic操作
             log.msg(('this topic_url(%s) has be removed by admin' % response.url), log.INFO) 
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             return
         topic_url = response.request.meta['topic_url']
     except:
         log.msg(str(traceback.print_exc()), log.INFO)
     try:
         urls = re.findall('<a href="(.*?)".*\.apk</a>', response.body, re.I)
         if urls == []:
             #必须先回复在下载的数据
             reply_urls = re.findall(u'如果你要查看本帖隐藏内容请'.encode('gbk', 'ignore'), response.body, re.I)
             #判断是否有匹配必须回复的,有就更新抓取级别为-1,单独使用回复加下载一体模块查找级别为-1的模块
             print 'reply_urls'
             print reply_urls
             if reply_urls != []:
                 update_topic_priority_sql = sql.update_topic_priority(self.platform, topic_url, '-1')
                 n = handle_db(update_topic_priority_sql)
                 log.msg(str(n), log.INFO)
                 return
             log.msg(('this url->%s has not apk file' % response.request.meta['topic_url']), log.INFO) 
             #如果没有apk文件,该主题链接失去意义,更新is_crawled=1
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             #回复成功后,没有下载链接,本次登陆错操作不能中止,继续执行
             self.topic_reply_num -= 1
             log.msg(('reply success, will download software%s' % str(self.topic_reply_num)), log.DEBUG)
             log.msg(str(response), log.DEBUG)
             request = self.get_down_topic(response, url)
             yield request
         else:
             for url in set(urls):
                 url = url.replace('amp;', '')
                 print 'url:', url
                 request =  response.request.replace(url=url, callback=self.get_downloadpath)
                 request.meta['url'] = response.url
                 yield request
         
     except IndexError, e:
         log.msg(str(traceback.print_exc()), log.ERROR)
         #没有 apk的下载包的地方,更新该链接抓取状态为1
         update_crawled_sql = sql.topicurl_withcrawed(response.url)
         status = handler_db(update_crawled_sql)
         if status['errorNo'] == 1:
             log.msg('this url has no apk', log.INFO) 
         return
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:53,代码来源:goapk.py


示例5: parse_item

 def parse_item(self, response):
     #解析出需要下载的包
     log.msg(response.url, log.INFO)
     try:
         if re.findall(u'抱歉,指定的主题不存在或已被删除或正在被审核'.encode('gbk', 'ignore'), response.body, re.I):
             #执行更新topic操作
             log.msg(('this topic_url(%s) has be removed by admin' % response.url), log.INFO) 
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             
             return
         topic_url = response.request.meta['topic_url']
     except:
         log.msg(str(traceback.print_exc()), log.INFO)
     try:
         urls = re.findall('<a href="(forum.php?[^"]+)" target="_blank">.*?\.apk</a>', response.body, re.I)
         if urls == []:
             #没有回复不能下载
             noreply_regex = u'如果您要查看本帖隐藏内容请'.encode('gbk', 'ignore')
             noreply = re.findall(noreply_regex, response.body, re.I)
             for i in noreply:
                 print i.decode('gbk','ignore')
             if noreply != []:
                 #需要执行更新topic方法,avail字段为-1
                 update_topic_priority_sql = sql.update_topic_priority(self.platform, topic_url, '-1')
                 n = handle_db(update_topic_priority_sql)
                 log.msg(''.join(['hide_apk_update topic_url priority=-1', str(n)]), log.INFO)
                 return
             else:
                 log.msg(('this url->%s has not apk file' % response.request.meta['topic_url']), log.INFO) 
                 #如果没有apk文件,该主题链接失去意义,更新is_crawled=1
                 update_topic_url = sql.topicurl_withcrawed(topic_url)
                 status = handle_db(update_topic_url)
                 return
         else:
             for url in set(urls):
                 url = 'http://bbs.mumayi.com/%s' % url
                 request =  response.request.replace(url=url, callback=self.get_apk)
                 request.meta['url'] = response.url
                 yield request
         
     except IndexError, e:
         log.msg(str(traceback.print_exc()), log.ERROR)
         #没有 apk的下载包的地方,更新该链接抓取状态为1
         update_crawled_sql = sql.topicurl_withcrawed(response.url)
         status = handler_db(update_crawled_sql)
         if status['errorNo'] == 1:
             log.msg('this url has no apk', log.INFO) 
         return
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:49,代码来源:mumayidown.py


示例6: get_proxy

def get_proxy(spider_name, enable):
    try:
        proxy_sql = sql.get_proxy_url(spider_name, enable)
        n = handle_db(proxy_sql)['data'][0][0]
        
        print n
    except:
        #全部查询完,如果没有结果,要调增加操作,在重复调用本方法,即回调。没值会异常
        #至所有代理均可用,出错在说
        update_enable_sql = sql.update_proxy_enable(spider_name)
        n = handle_db(update_enable_sql)
        print n
        #重复调用本方法
        get_proxy(spider_name, enable)
    return n
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:15,代码来源:gfan.py


示例7: get_integral_page

 def get_integral_page(self, response):
     #正则匹配相应的积分
     try:
         print 'get_integral_page'
         integral = re.findall(u'金币: <a href=".*?">(.*?)</a>'.encode('gbk', 'ignore'), response.body, re.I)[0].replace(' ', '')
         print 'integral', integral
         if integral:
             #如果取到相应的积分,执行判断该积分是否>20,小于20,更新数据库,跳出,大于20,更新数据库,向下执行
             update_user_integral_sql = sql.update_account_point(self.username, self.platform, integral)
             n = handle_db(update_user_integral_sql)
             log.msg(('update user(%s)\'s integral is: %s, %s' % (self.username, integral, n)), log.INFO)
             #用户积分低于多少不能进行下载,可配置.
             if int(integral) > settings.INTEGERAL:
                 request = self.get_topic(response)
                 return request
             else:
                 print 'return None'
                 return
         else:
             log.msg('cann\'t get user\'s integral', log.ERROR)
             request = self.get_topic(response)
             return request
     except:
         log.msg(str(traceback.print_exc()), log.ERROR)
         request = self.get_topic(response)
         print 'except'
         return request
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:27,代码来源:goapkdown.py


示例8: parse_item

 def parse_item(self, response):
     #解析出需要下载的包
     topic_url = response.request.meta['topic_url']
     try:
         urls = re.findall('<a.*?href="(.*?)".*?>.*\.[apk|zip|rar].*?</a>', response.body, re.I)
         print urls
         if urls == []:
             print 'this url->%s has not apk file' % response.request.meta['topic_url']
             #如果没有apk文件,该主题链接失去意义,更新is_crawled=1
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             print status
         for url in set(urls):
             url = 'http://bbs.gfan.com/%s' % url
             request =  response.request.replace(url=url, callback=self.get_attachementpath)
             request.meta['url'] = response.url
             yield request
         return
     except IndexError, e:
         traceback.print_exc()
         #没有 apk的下载包的地方,更新该链接抓取状态为1
         update_crawled_sql = sql.topicurl_withcrawed(response.url)
         status = handler_db(update_crawled_sql)
         if status['errorNo'] == 1:
             print 'this url has no apk'
         return
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:26,代码来源:gfan.py


示例9: get_apk

    def get_apk(self, response):

        filename = ''.join([str(random.randrange(1,100000)), '.apk'])
#        if os.path.exists(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
#            os.mkdir(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
        #调用新建文件夹的方法
        down_dir = utils.make_spiderdir(self.platform, 'download')
        #解码:有的文件可能是utf-8编码,解为unicode
        try:
            filename = filename.decode('utf-8', 'ignore')
        except:
            pass
        filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename])
        open(filename, 'wb').write(response.body)
        #下载后在存数据库,确保准确性
        topic_url = response.request.meta['topic_url']
        hashurl = sql.hash_topic(topic_url)
        updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
        filename = filename.replace('\\', '\\\\')
        insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename)
        status = handle_db(insert_sql)
        log.msg(str(status), log.DEBUG) 
        #更新topic数据库表
        update_topic_url = sql.topicurl_withcrawed(topic_url)
        status = handle_db(update_topic_url)
        log.msg(str(status), log.DEBUG)

        #能进入本方法执行,表示已经下载了该response.积分-1
        account_sql = sql.select_accountbyusername(self.username, self.platform)
        point_num = handle_db(account_sql)['data'][0][5]
        point_num -= 1
        #然后执行更新
        update_account_pointsql = sql.update_account_point(self.username, self.platform, point_num)
        n = handle_db(update_account_pointsql)
        if n['errorNo'] == 0:
            log.msg(('<username: %s \'s integral is : -1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO)
        try:
            autocopy.copy(filename, self.platform)
            log.msg('copy job is successed', log.INFO)
        except:
            log.msg(str(traceback.print_exc()), log.ERROR)
            log.msg('copy job is failture', log.ERROR)
        
        request_topic = self.repeat_reply(response)
        return request_topic
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:45,代码来源:mumayi.py


示例10: reply_status

 def reply_status(self, response):
     #重新返回首页,对特别是需要回复的模块特别有用,而不需要回复的帖子缺对性能是个损失.
     #查询回复状态
     
     success = u'非常感谢,你的回复已经发布'.encode('gbk', 'ignore')
     status = re.findall(success, response.body)
     username_sql = sql.select_accountbyusername(self.username, self.platform)
     print 'status', status
             #回复数量限制
     failture = u'对不起,您所在的用户组每小时限制发帖 '.encode('gbk', 'ignore')
     failture_status = re.findall(failture, response.body, re.I)
     if failture_status:
         print u'对不起,您所在的用户组每小时限制发帖 '
         return
     
     if status:
         log.msg('reply success', log.INFO) 
         reply_nums = handle_db(username_sql)['data'][0]
         self.point_num = reply_nums[5]
         self.reply_num = reply_nums[-2]
         self.reply_num += 1
         self.point_num += 1
         #回复成功.账号reply_num+1,积分+1
         try:
             update_replynum = sql.update_account_username(self.username, self.platform, self.reply_num, self.point_num)
             update_success = handle_db(update_replynum)
             log.msg(('<username: %s \'s integral is : +1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO)
             #回复成功,执行下载
             url = response.request.meta['topic_url']
             print u'回复成功主题:url---->', url
             #回复成功,重新进入topic页
             self.topic_reply_num -= 1
             log.msg(('reply success, will download software%s' % str(self.topic_reply_num)), log.DEBUG)
             log.msg(str(response), log.DEBUG)
             request = self.get_down_topic(response, url)
             return request
         except Exception, e:
             log.msg(str(traceback.print_exc()), log.ERROR)
             #重调get_topic.同时REPLY_NUM-1,当REPLY_NUM<1时,不在做任何事情.
             self.topic_reply_num -= 1
             log.msg(('reply success, will download software', str(self.topic_reply_num)), log.DEBUG)
             log.msg(str(response), log.DEBUG)
             request_topic = self.repeat_reply(response)
             return request_topic
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:44,代码来源:goapk.py


示例11: get_apk

 def get_apk(self, response):
     filename = response.request.meta['filename']
     
     if os.path.exists(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
         os.mkdir(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
     filename = ''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'), os.sep, str(time.time()).split('.')[0], filename])
     open(filename, 'wb').write(response.body)
     #下载后在存数据库,确保准确性
     topic_url = response.request.meta['topic_url']
     hashurl = sql.hash_topic(topic_url)
     updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
     filename = filename.replace('\\', '\\\\')
     insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename)
     status = handle_db(insert_sql)
     print status 
     #更新topic数据库表
     update_topic_url = sql.topicurl_withcrawed(topic_url)
     status = handle_db(update_topic_url)
     print status
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:19,代码来源:gfan.py


示例12: __init__

 def __init__(self):
     account_message = get_account(self.platform)
     self.username =  account_message[2]
     self.password = account_message[3]        
     self.reply_num = 0
     #爬虫启动前,执行更新用户使用状态的为1.不准其他线程使用该账号.
     update_avail_sql = sql.update_use_byusernamesql(self.username, self.platform, '1')
     n = handle_db(update_avail_sql)
     log.msg(('<username: %s > is being use' % self.username), log.DEBUG)
     #本对象一旦接手到爬虫结束的信号,调用第一个参数这个方法
     dispatcher.connect(self.user_relax, signals.spider_closed)
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:11,代码来源:gfandown.py


示例13: process_item

 def process_item(self, item, spider):
     #对传递过来的item进行解析
     topic_url = item['topic_url']
     referer_url = item['referer_url']
     spider_name = item['spider_name']
     update_date = time.strftime('%Y-%m-%d %H:%M:%S')
     is_crawled = 0
     priority_rating = 0
     sql = insert_topic_url(spider_name, topic_url,referer_url, update_date, is_crawled, priority_rating)
     n = handle_db(sql)
     print n
     return item
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:12,代码来源:pipelines.py


示例14: parse_item

 def parse_item(self, response):
     #解析出需要下载的包
     log.msg(response.url, log.INFO)
     try:
         if re.findall(u'抱歉,指定的主题不存在或已被删除或正在被审核'.encode('gbk', 'ignore'), response.body, re.I):
             #执行更新topic操作
             log.msg(('this topic_url(%s) has be removed by admin' % response.url), log.INFO) 
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             
             return
         topic_url = response.request.meta['topic_url']
     except:
         log.msg(str(traceback.print_exc()), log.INFO)
     try:
         urls = re.findall('<a href="(forum.php?[^"]+)".*>.*?\.apk', response.body, re.I)
         if urls == []:
             #没有回复不能下载
             log.msg(('this url->%s has not apk file' % response.request.meta['topic_url']), log.INFO) 
             #如果没有apk文件,该主题链接失去意义,更新is_crawled=1
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             request_topic = self.repeat_reply(response)
             yield request_topic
         else:
             for url in set(urls):
                 url = 'http://bbs.mumayi.com/%s' % url
                 url = url.replace('amp;', '')
                 request =  response.request.replace(url=url, method='get', callback=self.get_apk)
                 request.meta['url'] = response.url
                 yield request
         
     except IndexError, e:
         log.msg(str(traceback.print_exc()), log.ERROR)
         #没有 apk的下载包的地方,更新该链接抓取状态为1
         update_crawled_sql = sql.topicurl_withcrawed(response.url)
         status = handler_db(update_crawled_sql)
         if status['errorNo'] == 1:
             log.msg('this url has no apk', log.INFO) 
         return
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:40,代码来源:mumayi.py


示例15: get_topic

 def get_topic(self, response):
     #根据时间,取为下载的部分进行回复
     
     topic_urls = sql.get_topic_ranone(self.platform, '0')
     url = handle_db(topic_urls)['data'][0]
     
     request = response.request.replace(url = url[0], method='get')
     request.callback = self.get_code
     #request.meta['crawled'] = url[1]
     request.meta['topic_url'] = url[0]
     #将登陆后这个response对象存起来,方便后面回调本方法,在传入这个对象
     request.meta['topic_response'] = response
     return request
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:13,代码来源:goreply.py


示例16: reply_status

 def reply_status(self, response):
     #重新返回首页,对特别是需要回复的模块特别有用,而不需要回复的帖子缺对性能是个损失.
     #查询回复状态
     
     success = '非常感谢,您的回复已经发布'
     status = re.findall(success, response.body)
     username_sql = sql.select_accountbyusername(self.username, self.platform)
     
     if status:
         log.msg('reply success', log.INFO) 
         reply_nums = handle_db(username_sql)['data'][0]
         self.point_num = reply_nums[5]
         self.reply_num = reply_nums[-2]
         self.reply_num += 1
         self.point_num += 1
         #回复成功.账号reply_num+1,积分+1
         try:
             update_replynum = sql.update_account_username(self.username, self.platform, self.reply_num, self.point_num)
             update_success = handle_db(update_replynum)
             log.msg(('<username: %s \'s integral is : +1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO)
         except Exception, e:
             log.msg(str(traceback.print_exc()), log.ERROR)
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:22,代码来源:gfanreply.py


示例17: parse_item

 def parse_item(self, response):
     #解析出需要下载的包
     log.msg(response.url, log.INFO)
     try:
         topic_url = response.request.meta['topic_url']
         if re.findall('指定的主题不存在或已被删除或正在被审核', response.body, re.I):
             #执行更新topic操作
             log.msg(('this topic_url(%s) has be removed by admin' % response.url), log.INFO) 
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             
             return
         topic_url = response.request.meta['topic_url']
     except:
         log.msg(str(traceback.print_exc()), log.INFO)
     try:
         urls = re.findall('<a href="(attachment.php?[^"]+)".*?>.*\.apk', response.body, re.I)
         if urls == []:
             log.msg(('this url->%s has not apk file' % response.request.meta['topic_url']), log.INFO) 
             #如果没有apk文件,该主题链接失去意义,更新is_crawled=1
             update_topic_url = sql.topicurl_withcrawed(topic_url)
             status = handle_db(update_topic_url)
             return
         else:
             for url in set(urls):
                 url = 'http://bbs.gfan.com/%s' % url
                 request =  response.request.replace(url=url, callback=self.get_attachementpath)
                 request.meta['url'] = response.url
                 yield request
         
     except IndexError, e:
         log.msg(str(traceback.print_exc()), log.ERROR)
         #没有 apk的下载包的地方,更新该链接抓取状态为1
         update_crawled_sql = sql.topicurl_withcrawed(response.url)
         status = handler_db(update_crawled_sql)
         if status['errorNo'] == 1:
             log.msg('this url has no apk', log.INFO) 
         return
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:38,代码来源:gfandown.py


示例18: get_apk

    def get_apk(self, response):

        filename = ''.join([str(random.randrange(1,100000)), '.ipa'])
#        if os.path.exists(''.join([os.getcwd(), os.sep, 'hiapk', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
#            os.mkdir(''.join([os.getcwd(), os.sep, 'hiapk', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
        #调用新建文件夹的方法
        down_dir = utils.make_spiderdir(self.platform, 'download')
        #解码:有的文件可能是utf-8编码,解为unicode
        try:
            filename = filename.decode('utf-8', 'ignore')
        except:
            pass
        filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename])
        
        
        
        #下载后在存数据库,确保准确性
        topic_url = response.request.url
        hashurl = sql.hash_topic(topic_url)
        updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
        filename = filename.replace('\\', '\\\\')
        #插入数据库主题页面
        insert_topic_sql = sql.insert_topic_url(self.name, topic_url, '', updatedate, '1', '0')
        topic_status = handle_db(insert_topic_sql)
        if topic_status['errorNo'] == -1:
            raise ValueError("this ipa file has download in my databases")
        open(filename, 'wb').write(response.body)
        insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename)
        status = handle_db(insert_sql)
        log.msg(str(status), log.DEBUG) 

        try:
            autocopy.copy(filename, self.platform)
            log.msg('copy job is successed', log.INFO)
        except:
            log.msg(str(traceback.print_exc()), log.ERROR)
            log.msg('copy job is failture', log.ERROR)
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:37,代码来源:weiphone.py


示例19: reply_status

 def reply_status(self, response):
     #重新返回首页,对特别是需要回复的模块特别有用,而不需要回复的帖子缺对性能是个损失.
     #查询回复状态
     
     success = '非常感谢,您的回复已经发布'
     status = re.findall(success, response.body)
     print self.username
     username_sql = sql.select_accountbyusername(self.username, self.platform)
     
     if status:
         print 'reply success'
         reply_nums = handle_db(username_sql)['data'][0]
         
         self.reply_num = reply_nums[-2]
         self.reply_num += 1
         #回复成功.账号reply_num+1
         try:
             now_time = time.strftime('%Y-%m-%d %H:%M:%S')
             update_replynum = sql.update_account_username(self.username, now_time, self.platform, self.reply_num)
             update_success = handle_db(update_replynum)
             print update_success
             print 'update success! user\'s reply_num + 1'
         except Exception, e:
             traceback.print_exc()
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:24,代码来源:gfan.py


示例20: get_topic

 def get_topic(self, response):
     #随机选取一个下载地址,进行回复下载.
     
     print 'get_topic--------->hello world'
     topic_urls = sql.get_topic_bycrawed(self.platform, '0')
     urls = handle_db(topic_urls)['data']
     
     for url in urls:
         print 'len(urls)---------->',url
         request = response.request.replace(url = url[0], method='get', meta={'proxy' : 'http://119.115.136.226:443'})
         request.callback = self.get_code
         request.meta['crawled'] = url[1] 
         request.meta['topic_url'] = url[0]
         #request.meta['proxy'] = self.proxy_url
         yield request
开发者ID:dalinhuang,项目名称:jnqprojects,代码行数:15,代码来源:gfan.py



注:本文中的src.script.db.handle_db函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python sessionhandler.SESSIONS类代码示例发布时间:2022-05-27
下一篇:
Python pycode.get_temp_folder函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap