• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python request.Response类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中wpull.http.request.Response的典型用法代码示例。如果您正苦于以下问题:Python Response类的具体用法?Python Response怎么用?Python Response使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Response类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_html_not_quite_charset

    def test_html_not_quite_charset(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'videogame_top.htm')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertIn(
            'http://example.com/copyright_2001_2006_rtype.gif',
            inline_urls
        )
        self.assertIn(
            'http://www.geocities.jp/gamehouse_grindcrusher/',
            linked_urls
        )
开发者ID:Willianvdv,项目名称:wpull,代码行数:27,代码来源:html_test.py


示例2: test_redirect_loop

    def test_redirect_loop(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        for dummy in range(21):
            request = session.next_request
            self.assertTrue(request.url_info.url.endswith('robots.txt'))

            response = Response('HTTP/1.0', 302, 'See else')
            response.url_info = request.url_info
            response.fields['location'] = '/robots.txt'

            http_client.response = response
            yield session.fetch()

        request = session.next_request
        self.assertTrue(request)

        response = Response('HTTP/1.0', 200, 'OK')

        http_client.response = response
        yield session.fetch()

        self.assertEqual(RobotsState.ok, session._robots_state)

        print(session.next_request)
        self.assertTrue(session.done)
开发者ID:imshashank,项目名称:data-mining,代码行数:33,代码来源:robotstxt_test.py


示例3: test_xhtml_invalid

    def test_xhtml_invalid(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'xhtml_invalid.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual(
            {
                'http://example.com/image.png',
                'http://example.com/script.js',
            },
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/link'
            },
            linked_urls
        )
开发者ID:Willianvdv,项目名称:wpull,代码行数:32,代码来源:html_test.py


示例4: test_rss_as_html

    def test_rss_as_html(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()
        response.fields['content-type'] = 'application/rss+xml'

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples', 'rss.xml')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)

        self.assertTrue(scrape_result)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links
        self.assertFalse(
            inline_urls
        )
        self.assertEqual(
            {
                'http://www.someexamplerssdomain.com/main.html',
                'http://www.wikipedia.org/'
            },
            linked_urls
        )
开发者ID:Willianvdv,项目名称:wpull,代码行数:30,代码来源:html_test.py


示例5: test_html_soup

    def test_html_soup(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()
        response.fields['Refresh'] = 'yes'

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples', 'soup.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual(
            {'http://example.com/ABOUTM~1.JPG'},
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/BLOG',
                'http://example.com/web ring/Join.htm',
            },
            linked_urls
        )
开发者ID:Willianvdv,项目名称:wpull,代码行数:30,代码来源:html_test.py


示例6: test_html_krokozyabry

    def test_html_krokozyabry(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()
        response.fields['content-type'] = 'text/html; charset=KOI8-R'

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'krokozyabry.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual('koi8-r', scrape_result.encoding)

        self.assertEqual(
            set(),
            inline_urls
        )
        self.assertEqual(
            {'http://example.com/Кракозябры'},
            linked_urls
        )
开发者ID:Willianvdv,项目名称:wpull,代码行数:30,代码来源:html_test.py


示例7: test_html_krokozyabry

    def test_html_krokozyabry(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'text/html; charset=KOI8-R'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples',
                                          'krokozyabry.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual('koi8-r', scrape_info['encoding'])

        self.assertEqual(
            set(),
            inline_urls
        )
        self.assertEqual(
            {'http://example.com/Кракозябры'},
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:27,代码来源:scraper_test.py


示例8: test_html_soup

    def test_html_soup(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['Refresh'] = 'yes'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples', 'soup.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual(
            {'http://example.com/ABOUTM~1.JPG'},
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/BLOG',
                'http://example.com/web ring/Join.htm',
            },
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:27,代码来源:scraper_test.py


示例9: test_sitemap_scraper_xml_index

    def test_sitemap_scraper_xml_index(self):
        scraper = SitemapScraper(self.get_html_parser())
        request = Request('http://example.com/sitemap.xml')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            response.body.write(
                b'''<?xml version="1.0" encoding="UTF-8"?>
                <sitemapindex
                xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <sitemap>
                      <loc>http://www.example.com/sitemap1.xml.gz</loc>
                      <lastmod>2004-10-01T18:23:17+00:00</lastmod>
                   </sitemap>
                </sitemapindex>
            '''
            )

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual({
            'http://www.example.com/sitemap1.xml.gz',
        },
            linked_urls
        )
        self.assertFalse(inline_urls)
开发者ID:Willianvdv,项目名称:wpull,代码行数:29,代码来源:sitemap_test.py


示例10: test_rss_as_html

    def test_rss_as_html(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'application/rss+xml'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples', 'rss.xml')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)

        self.assertTrue(scrape_info)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']
        self.assertFalse(
            inline_urls
        )
        self.assertEqual(
            {
                'http://www.someexamplerssdomain.com/main.html',
                'http://www.wikipedia.org/'
            },
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:27,代码来源:scraper_test.py


示例11: test_javascript_heavy_inline_monstrosity

    def test_javascript_heavy_inline_monstrosity(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/test.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'twitchplayspokemonfirered.html')
            with open(html_file_path, 'rb') as in_file:
                in_file.seek(0x147)
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertIn(
            'http://cdn.bulbagarden.net/upload/archive/a/a4/'
            '20090718115357%21195Quagsire.png',
            inline_urls
        )
        self.assertIn(
            'http://www.google.com/url?q=http%3A%2F%2Fwww.reddit.com%2F'
            'user%2FGoldenSandslash15&sa=D&sntz=1&'
            'usg=AFQjCNElFBxZYdNm5mWoRSncf5tbdIJQ-A',
            linked_urls
        )

        print('\n'.join(inline_urls))
        print('\n'.join(linked_urls))
开发者ID:Willianvdv,项目名称:wpull,代码行数:32,代码来源:javascript_test.py


示例12: test_sitemap_scraper_xml

    def test_sitemap_scraper_xml(self):
        scraper = SitemapScraper(self.get_html_parser())
        request = Request('http://example.com/sitemap.xml')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            response.body.write(
                b'''<?xml version="1.0" encoding="UTF-8"?>
                <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <url>
                      <loc>http://www.example.com/</loc>
                      <lastmod>2005-01-01</lastmod>
                      <changefreq>monthly</changefreq>
                      <priority>0.8</priority>
                   </url>
                </urlset>
            '''
            )

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual({
            'http://www.example.com/',
        },
            linked_urls
        )
        self.assertFalse(inline_urls)
开发者ID:Willianvdv,项目名称:wpull,代码行数:30,代码来源:sitemap_test.py


示例13: test_http_response

    def test_http_response(self):
        response = Response(200, 'OK', version='HTTP/1.0')
        response.fields['hello'] = 'world'

        new_response = HTTPResponseInfoWrapper(response)
        info = new_response.info()

        self.assertEqual('world', info.get('hello'))
开发者ID:Willianvdv,项目名称:wpull,代码行数:8,代码来源:wrapper_test.py


示例14: test_response

    def test_response(self):
        response = Response(200, 'OK')
        response.fields['Cake'] = 'dolphin'

        self.assertEqual(
            (b'HTTP/1.1 200 OK\r\n'
             b'Cake: dolphin\r\n'
             b'\r\n'),
            response.to_bytes()
        )
开发者ID:Willianvdv,项目名称:wpull,代码行数:10,代码来源:request_test.py


示例15: test_html_detect

    def test_html_detect(self):
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO('<html><body>hi</body></html>'.encode('utf-16le'))
        ))
        self.assertFalse(HTMLReader.is_file(
            io.BytesIO('hello world!'.encode('utf-16le'))
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(b'<title>hello</title>hi')
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(b'<html><body>hello')
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(
                b'The document has moved <a href="somewhere.html">here</a>'
            )
        ))
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.htm'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.html'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.dhtm'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.xhtml'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.xht'))
        )
        self.assertFalse(
            HTMLReader.is_url(URLInfo.parse('example.com/image.jpg'))
        )
        self.assertTrue(
            HTMLReader.is_request(Request.new('example.com/index.html'))
        )
        self.assertFalse(
            HTMLReader.is_request(Request.new('example.com/image.jpg'))
        )

        response = Response('HTTP/1.0', '200', 'OK')
        response.fields['Content-Type'] = 'text/html'
        self.assertTrue(HTMLReader.is_response(response))

        response = Response('HTTP/1.0', '200', 'OK')
        response.fields['Content-Type'] = 'image/png'
        self.assertFalse(HTMLReader.is_response(response))
开发者ID:lowks,项目名称:wpull,代码行数:50,代码来源:document_test.py


示例16: test_warc_recorder_rollback

    def test_warc_recorder_rollback(self):
        warc_filename = 'asdf.warc'
        warc_prefix = 'asdf'

        with open(warc_filename, 'wb') as warc_file:
            warc_file.write(b'a' * 10)

        warc_recorder = WARCRecorder(
            warc_prefix,
            params=WARCRecorderParams(
                compress=False,
            )
        )

        request = HTTPRequest('http://example.com/')
        request.address = ('0.0.0.0', 80)
        response = HTTPResponse(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'KITTEH DOGE')

        with warc_recorder.session() as session:
            session.pre_request(request)
            session.request_data(request.to_bytes())

            class BadRecord(WARCRecord):
                def __init__(self, original_record):
                    super().__init__()
                    self.block_file = original_record.block_file
                    self.fields = original_record.fields

                def __iter__(self):
                    for dummy in range(1000):
                        yield b"where's my elephant?"
                    raise OSError('Oops')

            session._child_session._request_record = \
                BadRecord(session._child_session._request_record)
            original_offset = os.path.getsize(warc_filename)

            with self.assertRaises((OSError, IOError)):
                session.request(request)

            new_offset = os.path.getsize(warc_filename)
            self.assertEqual(new_offset, original_offset)
            self.assertFalse(os.path.exists(warc_filename + '-wpullinc'))

            _logger.debug('original offset {0}'.format(original_offset))
开发者ID:Willianvdv,项目名称:wpull,代码行数:49,代码来源:warc_test.py


示例17: test_css_scraper_reject_type

    def test_css_scraper_reject_type(self):
        scraper = CSSScraper()
        request = Request('http://example.com/styles.css')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples', 'styles.css')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response,
                                       link_type=LinkType.html)
        self.assertFalse(scrape_result)
开发者ID:Willianvdv,项目名称:wpull,代码行数:15,代码来源:css_test.py


示例18: test_html_serious_bad_encoding

    def test_html_serious_bad_encoding(self):
        scraper = HTMLScraper(encoding_override='utf8')
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'text/html; charset=utf8'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                'testing', 'samples', 'xkcd_1_evil.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)

        self.assertTrue(scrape_info)
开发者ID:yipdw,项目名称:wpull,代码行数:15,代码来源:scraper_test.py


示例19: test_warc_max_size_and_append

    def test_warc_max_size_and_append(self):
        file_prefix = 'asdf'

        with open('asdf-00000.warc', 'w'):
            pass

        with open('asdf-00001.warc', 'w'):
            pass

        warc_recorder = WARCRecorder(
            file_prefix,
            params=WARCRecorderParams(
                compress=False,
                max_size=1,
                appending=True
            ),
        )

        request = HTTPRequest('http://example.com/1')
        request.address = ('0.0.0.0', 80)
        response = HTTPResponse(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'BLAH')

        with warc_recorder.session() as session:
            session.pre_request(request)
            session.request_data(request.to_bytes())
            session.request(request)
            session.pre_response(response)
            session.response_data(response.to_bytes())
            session.response_data(response.body.content())
            session.response(response)

        warc_recorder.close()

        self.assertTrue(os.path.exists('asdf-00000.warc'))
        self.assertTrue(os.path.exists('asdf-00001.warc'))
        self.assertTrue(os.path.exists('asdf-00002.warc'))
        self.assertTrue(os.path.exists('asdf-00003.warc'))
        self.assertTrue(os.path.exists('asdf-meta.warc'))

        self.assertEqual(0, os.path.getsize('asdf-00000.warc'))
        self.assertEqual(0, os.path.getsize('asdf-00001.warc'))
        self.assertNotEqual(0, os.path.getsize('asdf-00002.warc'))
        self.assertNotEqual(0, os.path.getsize('asdf-00003.warc'))
        self.assertNotEqual(0, os.path.getsize('asdf-meta.warc'))
开发者ID:Willianvdv,项目名称:wpull,代码行数:48,代码来源:warc_test.py


示例20: test_html_encoding_lxml_name_mismatch

    def test_html_encoding_lxml_name_mismatch(self):
        '''It should accept encoding names with underscore.'''
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'text/html; charset=EUC_KR'

        with wpull.util.reset_file_offset(response.body.content_file):
            response.body.content_file.write(
                '힖'.encode('euc_kr')
            )

        scrape_info = scraper.scrape(request, response)

        self.assertTrue(scrape_info)
        self.assertEqual('euc_kr', scrape_info['encoding'])
开发者ID:lowks,项目名称:wpull,代码行数:16,代码来源:scraper_test.py



注:本文中的wpull.http.request.Response类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python options.AppArgumentParser类代码示例发布时间:2022-05-26
下一篇:
Python request.Request类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap