• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python http.Http类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中src.tools.http.Http的典型用法代码示例。如果您正苦于以下问题:Python Http类的具体用法?Python Http怎么用?Python Http使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Http类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: login

    def login(self, account, password, captcha=''):
        content = Http.get_content('https://www.zhihu.com/')
        xsrf = Match.xsrf(content)
        if not xsrf:
            Debug.logger.info(u'登陆失败')
            Debug.logger.info(u'敲击回车重新发送登陆请求')
            return False
        xsrf = xsrf.split('=')[1]
        # add xsrf as cookie into cookieJar,
        cookie = Http.make_cookie(name='_xsrf', value=xsrf, domain='www.zhihu.com')
        self.cookieJar.set_cookie(cookie)
        if captcha:
            post_data = {'_xsrf': xsrf, 'email': account, 'password': password, 'remember_me': True,
                         'captcha': captcha}
        else:
            post_data = {'_xsrf': xsrf, 'email': account, 'password': password, 'remember_me': True}

        header = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip,deflate',  # 主要属性,只要有此项知乎即认为来源非脚本
            'Accept-Language': 'zh,zh-CN;q=0.8,en-GB;q=0.6,en;q=0.4',
            'Host': 'www.zhihu.com',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36(KHTML, like Gecko)Chrome/34.0.1847.116 Safari/537.36',
            'Connection': 'keep-alive',
            'X-Requested-With': 'XMLHttpRequest',
            'Origin': 'https://www.zhihu.com',
            'Referer': 'https://www.zhihu.com/',
        }
        result = Http.get_content(url=r'https://www.zhihu.com/login/email', data=post_data, extra_header=header)
        if not result:
            Debug.logger.info(u'登陆失败,请敲击回车重新登陆')
            return False
        response = json.loads(result)

        if response['r'] == 0:
            print u'登陆成功!'
            print u'登陆账号:', account
            print u'请问是否需要记住帐号密码?输入yes记住,输入其它任意字符跳过,回车确认'
            if raw_input() == 'yes':
                Config.account, Config.password, Config.remember_account = account, password, True
                print u'帐号密码已保存,可通过修改config.json修改设置'
            else:
                Config.account, Config.password, Config.remember_account = '', '', False
                print u'跳过保存环节,进入下一流程'
            Config._save()
            cookie = self.get_cookie()
            DB.execute('delete from LoginRecord')  # 登陆成功后清除数据库中原有的登录记录,避免下次登陆时取到旧记录
            data = {}
            data['account'] = account
            data['password'] = password
            data['recordDate'] = ExtraTools.get_today()
            data['cookieStr'] = cookie
            DB.save(data, 'LoginRecord')
            DB.commit()
            return True
        else:
            print u'登陆失败'
            Debug.print_dict(response)
            return False
开发者ID:FengWenPei,项目名称:ZhihuHelp,代码行数:60,代码来源:login.py


示例2: init_config

 def init_config(recipe_kind):
     if recipe_kind == 'zhihu':      # TODO: 再有一个需要登录的网站, 改掉硬编码
         login = Login(recipe_kind='zhihu')
     else:
         return
     # !!!!!发布的时候把Config.remember_account改成false!!!!!,第一次需要登录,之后用cookie即可
     # 登陆成功了,自动记录账户
     if Config.remember_account_set:
         Debug.logger.info(u'检测到有设置文件,直接使用之前的设置')
         # if raw_input():
         # login.start()
         # Config.picture_quality = guide.set_picture_quality()
         Config.picture_quality = 1
         # else:
         try:
             Http.set_cookie()   # sinablog, jianshu: DontNeed
         except TypeError:
             print u"没有找到登录成功的cookie记录, 请重新登录"
             login.start()
     else:
         log.warning_log(u"Please login...")
         login.start()
         # Config.picture_quality = guide.set_picture_quality()
         Config.picture_quality = 1
         Config.remember_account_set = True
     # save config
     Config._save()
     return
开发者ID:gitter-badger,项目名称:EE-Book,代码行数:28,代码来源:main.py


示例3: __init__

    def __init__(self, task_list):
        self.task_set = set(task_list)
        self.work_set = set()  # 待抓取网址池
        self.answer_list = []
        self.question_list = []
        self.thread_pool = ThreadPool(Config.max_thread)

        self.info_list = []
        self.extra_index_list = []
        self.info_url_set = self.task_set.copy()

        self.add_property()  # 添加扩展属性
        Http.set_cookie()
开发者ID:GTagent,项目名称:ZhihuHelp__Python,代码行数:13,代码来源:worker.py


示例4: get_sinablog_question_list

 def get_sinablog_question_list(self, author_id):
     u"""
     get sinablog_info, article_num
     :param author_id:
     :return:
     """
     href_article_list = 'http://blog.sina.com.cn/s/articlelist_{}_0_1.html'.format(author_id)
     href_profile = 'http://blog.sina.com.cn/s/profile_{}.html'.format(author_id)
     content_profile = Http.get_content(href_profile)
     parser = SinaBlogParser(content_profile)
     self.question_list += parser.get_extra_info()
     content_article_list = Http.get_content(href_article_list)
     article_num = int(self.parse_article_num(content_article_list))
     return article_num
开发者ID:gitter-badger,项目名称:EE-Book,代码行数:14,代码来源:sinablog_worker.py


示例5: create_work_set

    def create_work_set(self, target_url):
        u"""
        根据博客首页的url, 首先通过re获得博客id, 然后根据博客"关于我"的页面的内容获得写入SinaBlog_Info
        的数据(这部分理应不在这个函数中, 可以改进), 最后通过博客目录页面的内容, 获得每篇博文的地址,
        放入work_set中

        :param target_url: 博客首页的url
        :return:
        """
        Debug.logger.debug(u"target_url是:" + str(target_url))
        if target_url in self.task_complete_set:
            return
        result = Match.SinaBlog(target_url)
        SinaBlog_author_id = int(result.group('SinaBlog_people_id'))

        href_article_list = 'http://blog.sina.com.cn/s/articlelist_{}_0_1.html'.format(SinaBlog_author_id)
        href_profile = 'http://blog.sina.com.cn/s/profile_{}.html'.format(SinaBlog_author_id)

        # ############下面这部分应该是SinaBlogAuthorWorker的内容, 写到SinaBlog_Info, 暂时写在这, 以后再优化
        content_profile = Http.get_content(href_profile)

        parser = SinaBlogParser(content_profile)
        self.question_list += parser.get_SinaBlog_info_list()
        # Debug.logger.debug(u"create_work_set中的question_list是什么??" + str(self.question_list))
        # #############上面这部分应该是SinaBlogAuthorWorker的内容, 写到SinaBlog_Info, 暂时写在这, 以后再优化

        # content_index = Http.get_content(href_index)
        content_article_list = Http.get_content(href_article_list)

        article_num = int(self.parse_article_num(content_article_list))
        Debug.logger.debug(u"article_num:" + str(article_num))
        if article_num % 50 != 0:
            page_num = article_num/50 + 1      # 博客目录页面, 1页放50个博客链接
        else:
            page_num = article_num / 50

        self.question_list[0]['article_num'] = article_num  # 这样的话, 每行只能放一个新浪博客地址!!!
        # 上面这行, 暂时只能这样写, 因为"关于我"的页面, 没有文章的数量

        self.task_complete_set.add(target_url)

        for page in range(page_num):
            url = 'http://blog.sina.com.cn/s/articlelist_{}_0_{}.html'.format(SinaBlog_author_id, page+1)
            content_article_list = Http.get_content(url)
            article_list = self.parse_get_article_list(content_article_list)
            for item in article_list:
                self.work_set.add(item)
            # self.work_set.add(article_list[0])
        return
开发者ID:knarfeh,项目名称:SinaBlog2e-book,代码行数:49,代码来源:worker.py


示例6: create_work_set

    def create_work_set(self, target_url):
        u"""
        根据博客首页的url, 首先通过re获得博客id, 然后根据博客"关于我"的页面的内容获得写入sinablog_info
        的数据(这部分理应不在这个函数中, 可以改进), 最后通过博客目录页面的内容, 获得每篇博文的地址,
        放入work_set中
        :param target_url: 博客首页的url
        :return:
        """
        if target_url in self.task_complete_set:
            return
        result = Match.sinablog_author(target_url)
        sinablog_author_id = int(result.group('sinablog_people_id'))

        article_num = self.get_sinablog_question_list(sinablog_author_id)
        if article_num % 50 != 0:
            page_num = article_num/50 + 1      # 50 href on 1 page
        else:
            page_num = article_num / 50

        self.question_list[0]['article_num'] = article_num
        # 上面这行, 暂时只能这样写, 因为"关于我"的页面没有文章的数量

        self.task_complete_set.add(target_url)

        for page in range(page_num):
            url = 'http://blog.sina.com.cn/s/articlelist_{}_0_{}.html'.format(sinablog_author_id, page+1)
            content_article_list = Http.get_content(url)
            article_list = self.parse_get_article_list(content_article_list)
            for item in article_list:
                self.work_set.add(item)
        return
开发者ID:gitter-badger,项目名称:EE-Book,代码行数:31,代码来源:sinablog_worker.py


示例7: check_update

    def check_update():  # 强制更新
        u"""
            *   功能
                *   检测更新。
                *   若在服务器端检测到新版本,自动打开浏览器进入新版下载页面
                *   网页请求超时或者版本号正确都将自动跳过
            *   输入
                *   无
            *   返回
                *   无
        """
        print u"检查更新。。。"
        try:
            content = Http.get_content(u"http://zhihuhelpbyyzy-zhihu.stor.sinaapp.com/ZhihuHelpUpdateTime.txt")
            if not content:
                raise Exception("HttpError")
        except:
            return
        time, url = [x.strip() for x in content.split("\n")]
        if time == Config.update_time:
            return
        else:
            print u"发现新版本,\n更新日期:{} ,点按回车进入更新页面".format(time)
            print u"新版本下载地址:" + url
            raw_input()
            import webbrowser

            webbrowser.open_new_tab(url)
        return
开发者ID:zuiwan,项目名称:ZhihuHelp,代码行数:29,代码来源:main.py


示例8: create_work_set

    def create_work_set(self, target_url):
        u"""
        根据target_url(例:http://www.jianshu.com/users/b1dd2b2c87a8/latest_articles)的内容,
        先获得creator_id, 再根据文章的数目, 获得页面数, 依次打开每个页面, 将文章的地址放入work_set中
        :param target_url:
        :return:
        """
        if target_url in self.task_complete_set:
            return
        id_result = Match.jianshu_author(target_url)
        jianshu_id = id_result.group('jianshu_id')
        article_num, article_list = self.get_jianshu_question_list(target_url)
        self.task_complete_set.add(target_url)
        if article_num % 9 != 0:
            page_num = article_num/9 + 1      # 9 href on one page
        else:
            page_num = article_num / 9

        for item in article_list:
            self.work_set.add(item)
        for page in range(page_num-1):          # page+2, don't need to get the first page
            url = 'http://www.jianshu.com/users/{}/latest_articles?page={}'.format(jianshu_id, page+2)
            content_article_list = Http.get_content(url)
            article_list = self.parse_get_article_list(content_article_list)
            for item in article_list:
                self.work_set.add(item)
        return
开发者ID:mozii,项目名称:EE-Book,代码行数:27,代码来源:jianshu_worker.py


示例9: worker

 def worker(self, target_url):
     content = Http.get_content(target_url)
     if not content:
         return
     self.work_set.discard(target_url)
     self.parse_content(content)
     return
开发者ID:hmilyfyj,项目名称:ZhihuHelp__Python,代码行数:7,代码来源:worker.py


示例10: __init__

    def __init__(self, task_list):
        self.task_set = set(task_list)
        self.task_complete_set = set()
        self.work_set = set()  # 待抓取网址池
        self.work_complete_set = set()  # 已完成网址池
        self.content_list = []  # 用于存放已抓取的内容
        self.answer_list = []
        self.question_list = []

        self.info_list = []
        self.extra_index_list = []
        self.info_url_set = self.task_set.copy()
        self.info_url_complete_set = set()

        self.add_property()  # 添加扩展属性
        Http.set_cookie()
开发者ID:FengWenPei,项目名称:ZhihuHelp,代码行数:16,代码来源:worker.py


示例11: create_work_set

    def create_work_set(self, target_url):
        if target_url in self.task_complete_set:
            return
        result = Match.column(target_url)
        self.column_id = result.group('column_id')
        content = Http.get_content('https://zhuanlan.zhihu.com/api/columns/' + self.column_id)
        if not content:
            return
        raw_info = json.loads(content)
        info = {}
        info['creator_id'] = raw_info['creator']['slug']
        info['creator_hash'] = raw_info['creator']['hash']
        info['creator_sign'] = raw_info['creator']['bio']
        info['creator_name'] = raw_info['creator']['name']
        info['creator_logo'] = raw_info['creator']['avatar']['template'].replace('{id}', raw_info['creator']['avatar'][
            'id']).replace('_{size}', '')

        info['column_id'] = raw_info['slug']
        info['name'] = raw_info['name']
        info['logo'] = raw_info['creator']['avatar']['template'].replace('{id}', raw_info['avatar']['id']).replace(
            '_{size}', '')
        info['article'] = raw_info['postsCount']
        info['follower'] = raw_info['followersCount']
        info['description'] = raw_info['description']
        self.info_list.append(info)
        self.task_complete_set.add(target_url)
        detect_url = 'https://zhuanlan.zhihu.com/api/columns/{}/posts?limit=10&offset='.format(self.column_id)
        for i in range(info['article'] / 10 + 1):
            self.work_set.add(detect_url + str(i * 10))
        return
开发者ID:FengWenPei,项目名称:ZhihuHelp,代码行数:30,代码来源:worker.py


示例12: create_work_set

    def create_work_set(self, target_url):
        if target_url in self.task_complete_set:
            return
        result = Match.column(target_url)
        self.column_id = result.group("column_id")
        content = Http.get_content("https://zhuanlan.zhihu.com/api/columns/" + self.column_id)
        if not content:
            return
        raw_info = json.loads(content)
        info = {}
        info["creator_id"] = raw_info["creator"]["slug"]
        info["creator_hash"] = raw_info["creator"]["hash"]
        info["creator_sign"] = raw_info["creator"]["bio"]
        info["creator_name"] = raw_info["creator"]["name"]
        info["creator_logo"] = (
            raw_info["creator"]["avatar"]["template"]
            .replace("{id}", raw_info["creator"]["avatar"]["id"])
            .replace("_{size}", "")
        )

        info["column_id"] = raw_info["slug"]
        info["name"] = raw_info["name"]
        info["logo"] = (
            raw_info["creator"]["avatar"]["template"].replace("{id}", raw_info["avatar"]["id"]).replace("_{size}", "")
        )
        info["article"] = raw_info["postsCount"]
        info["follower"] = raw_info["followersCount"]
        info["description"] = raw_info["description"]
        self.info_list.append(info)
        self.task_complete_set.add(target_url)
        detect_url = "https://zhuanlan.zhihu.com/api/columns/{}/posts?limit=10&offset=".format(self.column_id)
        for i in range(info["article"] / 10 + 1):
            self.work_set.add(detect_url + str(i * 10))
        return
开发者ID:HiltonWei,项目名称:zhihu,代码行数:34,代码来源:worker.py


示例13: check_update

 def check_update():  # 强制更新
     u"""
         *   功能
             *   检测更新。
             *   若在服务器端检测到新版本,自动打开浏览器进入新版下载页面
             *   网页请求超时或者版本号正确都将自动跳过
         *   输入
             *   无
         *   返回
             *   无
     """
     print u"检查更新。。。"
     if Config.debug:
         # 当位于debug模式时,不检查更新
         return
     try:
         content = Http.get_content(u"https://www.yaozeyuan.online/zhihuhelp/upgrade.txt")
         if not content:
             raise Exception(u'HttpError')
         time, url = [x.strip() for x in content.strip('\n').split('\n')]
         if time == Config.update_time:
             return
         else:
             print u"发现新版本,\n更新日期:{} ,点按回车进入更新页面".format(time)
             print u'新版本下载地址:' + url
             raw_input()
             import webbrowser
             webbrowser.open_new_tab(url)
     except Exception:
         # 不论发生任何异常均直接返回
         return
开发者ID:EleVenPerfect,项目名称:OTHERS,代码行数:31,代码来源:main.py


示例14: catch_info

 def catch_info(self, target_url):
     content = Http.get_content(target_url + '/about')
     if not content:
         return
     self.info_url_set.discard(target_url)
     parser = AuthorParser(content)
     self.info_list.append(parser.get_extra_info())
     return
开发者ID:GTagent,项目名称:ZhihuHelp__Python,代码行数:8,代码来源:worker.py


示例15: worker

 def worker(self, target_url):
     Debug.logger.info(u'开始抓取{}的内容'.format(target_url))
     content = Http.get_content(target_url)
     if not content:
         return
     self.work_set.discard(target_url)
     self.parse_content(content)
     return
开发者ID:GTagent,项目名称:ZhihuHelp__Python,代码行数:8,代码来源:worker.py


示例16: init_config

    def init_config():
        login = Login()
        if Config.remember_account:
            print u"检测到有设置文件,是否直接使用之前的设置?(帐号、密码、图片质量)"
            print u"点按回车使用之前设置,敲入任意字符后点按回车进行重新设置"
            if raw_input():
                login.start()
                Config.picture_quality = guide.set_picture_quality()
            else:
                Http.set_cookie()
        else:
            login.start()
            Config.picture_quality = guide.set_picture_quality()

        # 储存设置
        Config._save()
        return
开发者ID:zuiwan,项目名称:ZhihuHelp,代码行数:17,代码来源:main.py


示例17: create_work_set

    def create_work_set(self, target_url):
        if target_url in self.task_complete_set:
            return

        self.task_complete_set.add(target_url)
        url = target_url + '?page=2'      # there are page num in this url

        content = Http.get_content(url)
        page_num = self.parse_max_page(content)

        for item in range(int(page_num)):
            url = target_url + '?page={}'.format(str(item+1))
            content = Http.get_content(url)
            parser = CnblogsAuthorParser(content)
            article_url_list = parser.get_article_list()
            for item in article_url_list:
                self.work_set.add(item)
        return
开发者ID:mozii,项目名称:EE-Book,代码行数:18,代码来源:cnblogs_worker.py


示例18: catch_info

 def catch_info(self, target_url):
     if target_url in self.info_url_complete_set:
         return
     content = Http.get_content(target_url)
     if not content:
         return
     self.info_url_complete_set.add(target_url)
     parser = YiibaiParser(content)
     self.info_list.append(parser.get_extra_info())
     return
开发者ID:mozii,项目名称:EE-Book,代码行数:10,代码来源:yiibai_worker.py


示例19: create_work_set

 def create_work_set(self, target_url):
     content = Http.get_content(target_url + '?nr=1&sort=created')
     if not content:
         return
     self.task_set.discard(target_url)
     max_page = self.parse_max_page(content)
     for page in range(max_page):
         url = '{}?nr=1&sort=created&page={}'.format(target_url, page + 1)
         self.work_set.add(url)
     return
开发者ID:GTagent,项目名称:ZhihuHelp__Python,代码行数:10,代码来源:worker.py


示例20: create_work_set

    def create_work_set(self, target_url):
        if target_url in self.task_complete_set:
            return

        content = Http.get_content(target_url)
        article_list = YiibaiParser(content).get_article_list()

        self.task_complete_set.add(target_url)
        for item in article_list:
            self.work_set.add(item)
        return
开发者ID:mozii,项目名称:EE-Book,代码行数:11,代码来源:yiibai_worker.py



注:本文中的src.tools.http.Http类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python match.Match类代码示例发布时间:2022-05-27
下一篇:
Python db.DB类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap