• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python url.URLInfo类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中wpull.url.URLInfo的典型用法代码示例。如果您正苦于以下问题:Python URLInfo类的具体用法?Python URLInfo怎么用?Python URLInfo使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了URLInfo类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_directory_filter

    def test_directory_filter(self):
        mock_record = MockURLTableRecord()
        mock_record.url = 'http://example.com/blog/'

        url_filter = DirectoryFilter()

        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com'),
            mock_record
        ))

        url_filter = DirectoryFilter(accepted=['/blog'])

        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example.com'),
            mock_record
        ))

        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/'),
            mock_record
        ))

        url_filter = DirectoryFilter(rejected=['/cgi-bin/'])

        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example.com/cgi-bin'),
            mock_record
        ))
开发者ID:DanielOaks,项目名称:wpull,代码行数:33,代码来源:url_test.py


示例2: test_url_info_ipv6

 def test_url_info_ipv6(self):
     self.assertEqual(
         'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6',
         URLInfo.parse(
             'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6'
         ).url
     )
     self.assertEqual(
         '[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080',
         URLInfo.parse(
             'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6'
         ).hostname_with_port
     )
     self.assertEqual(
         'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6',
         URLInfo.parse(
             'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6'
         ).url
     )
     self.assertEqual(
         '[2001:db8:85a3:8d3:1319:8a2e:370:7348]',
         URLInfo.parse(
             'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6'
         ).hostname_with_port
     )
开发者ID:Willianvdv,项目名称:wpull,代码行数:25,代码来源:url_test.py


示例3: test_url_info_invalids

 def test_url_info_invalids(self):
     self.assertRaises(ValueError, URLInfo.parse, '')
     self.assertRaises(ValueError, URLInfo.parse, '#')
     self.assertRaises(ValueError, URLInfo.parse, 'http://')
     self.assertRaises(ValueError, URLInfo.parse, 'example....com')
     self.assertRaises(ValueError, URLInfo.parse, 'http://example....com')
     self.assertRaises(ValueError, URLInfo.parse, 'http://example…com')
     self.assertRaises(ValueError, URLInfo.parse, 'http://[34.4kf]::4')
     self.assertRaises(ValueError, URLInfo.parse, 'http://[34.4kf::4')
     self.assertRaises(ValueError, URLInfo.parse, 'http://dmn3]:3a:45')
     self.assertRaises(ValueError, URLInfo.parse, ':38/3')
     self.assertRaises(ValueError, URLInfo.parse, 'http://][a:@1]')
     self.assertRaises(ValueError, URLInfo.parse, 'http://[[aa]]:4:]6')
     self.assertNotIn('[', URLInfo.parse('http://[a]').hostname)
     self.assertNotIn(']', URLInfo.parse('http://[a]').hostname)
     self.assertRaises(ValueError, URLInfo.parse, 'http://[[a]')
     self.assertRaises(ValueError, URLInfo.parse, 'http://[[a]]a]')
     self.assertRaises(ValueError, URLInfo.parse, 'http://[[a:a]]')
     self.assertRaises(ValueError, URLInfo.parse, 'http:///')
     self.assertRaises(ValueError, URLInfo.parse, 'http:///horse')
     self.assertRaises(ValueError, URLInfo.parse, 'http://?what?')
     self.assertRaises(ValueError, URLInfo.parse, 'http://#egg=wpull')
     self.assertRaises(ValueError, URLInfo.parse,
                       'http://:@example.com:[email protected]/')
     self.assertRaises(ValueError, URLInfo.parse, 'http://\x00/')
     self.assertRaises(ValueError, URLInfo.parse, 'http:/a')
     self.assertRaises(ValueError, URLInfo.parse, 'http://@@example.com/@')
     self.assertRaises(
         ValueError, URLInfo.parse,
         'http://fat32defragmenter.internets::80')
     self.assertRaises(
         ValueError, URLInfo.parse,
         'http://fat32defragmenter.internets:80/')
     self.assertRaises(ValueError, URLInfo.parse, 'http:// /spaaaace')
     self.assertRaises(
         ValueError, URLInfo.parse,
         'http://a-long-long-time-ago-the-earth-was-ruled-by-dinosaurs-'
         'they-were-big-so-not-a-lot-of-people-went-around-hassling-them-'
         'actually-no-people-went-around-hassling-them-'
         'because-there-weren-t-any-people-yet-'
         'just-the-first-tiny-mammals-'
         'basically-life-was-good-'
         'lou-it-just-dont-get-no-better-than-this-'
         'yeah-'
         'then-something-happened-'
         'a-giant-meteorite-struck-the-earth-'
         'goodbye-dinosaurs-'
         'but-what-if-the-dinosaurs-werent-all-destroyed-'
         'what-if-the-impact-of-that-meteorite-created-a-parallel-dimension-'
         'where-the-dinosaurs-continue-to-thrive-'
         'and-evolved-into-intelligent-vicious-aggressive-beings-'
         'just-like-us-'
         'and-hey-what-if-they-found-their-way-back.movie'
     )
     self.assertRaises(
         ValueError, URLInfo.parse, 'http://[...]/python.xml%22')
     self.assertRaises(
         ValueError, URLInfo.parse, 'http://[…]/python.xml%22')
     self.assertRaises(
         ValueError, URLInfo.parse, 'http://[.]/python.xml%22')
开发者ID:Willianvdv,项目名称:wpull,代码行数:60,代码来源:url_test.py


示例4: test_backward_filename_filter

    def test_backward_filename_filter(self):
        url_filter = BackwardFilenameFilter(
            accepted=['html', 'image.*.png'],
            rejected=['bmp', 'jp[eg]', 'image.123.png']
        )

        mock_record = MockURLTableRecord()
        mock_record.url = 'http://example.com/'

        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example/index.html'),
            mock_record
        ))
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example/myimage.1003.png'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example/myimage.123.png'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example/blah.png'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example/image.1003.png.bmp'),
            mock_record
        ))
开发者ID:DanielOaks,项目名称:wpull,代码行数:29,代码来源:url_test.py


示例5: test_regex_filter

    def test_regex_filter(self):
        mock_record = MockURLTableRecord()
        mock_record.url = 'http://example.com/blog/'

        url_filter = RegexFilter()
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.net'),
            mock_record
        ))

        url_filter = RegexFilter(accepted=r'blo[a-z]/$')
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.net/blob/'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example.net/blob/123'),
            mock_record
        ))

        url_filter = RegexFilter(rejected=r'\.gif$')
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.net/blob/'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example.net/blob/123.gif'),
            mock_record
        ))
开发者ID:DanielOaks,项目名称:wpull,代码行数:29,代码来源:url_test.py


示例6: test_parent_filter

    def test_parent_filter(self):
        mock_record = MockURLTableRecord()
        mock_record.inline = False
        url_filter = ParentFilter()

        mock_record.top_url = 'http://example.com/blog/topic2/'
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/topic2/'),
            mock_record
        ))
        mock_record.top_url = 'http://example.com/blog/topic1/'
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/topic1/blah.html'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example.com/blog/'),
            mock_record
        ))

        mock_record.inline = True
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/styles.css'),
            mock_record
        ))
开发者ID:DanielOaks,项目名称:wpull,代码行数:25,代码来源:url_test.py


示例7: test_url_info_trailing_dot

    def test_url_info_trailing_dot(self):
        self.assertEqual(
            'http://example.com./',
            URLInfo.parse('http://example.com./').url
        )

        self.assertEqual(
            'http://example.com.:81/',
            URLInfo.parse('http://example.com.:81/').url
        )
开发者ID:Willianvdv,项目名称:wpull,代码行数:10,代码来源:url_test.py


示例8: test_url_info_round_trip

    def test_url_info_round_trip(self):
        urls = [
            'http://example.com/blah%20blah/',
            'example.com:81?blah=%c3%B0',
            'http://example.com/a/../../b/style.css',
            'http://example.com/'
            '?blah=http%3A%2F%2Fexample.com%2F%3Ffail%3Dtrue',
            'http://example.com/??blah=blah[0:]=bl%61h?blah"&d%26_',
            'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6',
        ]

        for url in urls:
            URLInfo.parse(URLInfo.parse(url).url)
开发者ID:Willianvdv,项目名称:wpull,代码行数:13,代码来源:url_test.py


示例9: test_to_dir_path_url

 def test_to_dir_path_url(self):
     self.assertEqual(
         'ftp://putfile.com/',
         to_dir_path_url(URLInfo.parse('ftp://putfile.com/'))
     )
     self.assertEqual(
         'ftp://putfile.com/',
         to_dir_path_url(URLInfo.parse('ftp://putfile.com/asdf'))
     )
     self.assertEqual(
         'ftp://putfile.com/asdf/',
         to_dir_path_url(URLInfo.parse('ftp://putfile.com/asdf/qwer'))
     )
开发者ID:Super-Rad,项目名称:wpull,代码行数:13,代码来源:ftp_test.py


示例10: test_url_info_misleading_parts

 def test_url_info_misleading_parts(self):
     self.assertEqual(
         'http://example.com/?a',
         URLInfo.parse('http://example.com?a').url
     )
     self.assertEqual(
         'http://example.com/?a?',
         URLInfo.parse('http://example.com?a?').url
     )
     self.assertEqual(
         'http://example.com/',
         URLInfo.parse('http://example.com#a').url
     )
     self.assertEqual(
         'http://example.com/',
         URLInfo.parse('http://example.com#a?').url
     )
     self.assertEqual(
         'http://example.com/?a',
         URLInfo.parse('http://example.com?a#').url
     )
     self.assertEqual(
         'http://example.com/:10',
         URLInfo.parse('http://example.com/:10').url
     )
     self.assertEqual(
         'http://example.com/[email protected]/',
         URLInfo.parse('http://:@[email protected]/').url
     )
     self.assertEqual(
         'http://example.com/http:/example.com',
         URLInfo.parse('http://:@example.com/http://example.com').url
     )
开发者ID:Willianvdv,项目名称:wpull,代码行数:33,代码来源:url_test.py


示例11: test_ip_address_normalization

 def test_ip_address_normalization(self):
     self.assertEqual(
         'http://192.0.2.235/',
         URLInfo.parse('https://0xC0.0x00.0x02.0xEB').url
     )
     self.assertEqual(
         'http://192.0.2.235/',
         URLInfo.parse('https://0301.1680.0002.0353').url
     )
     self.assertEqual(
         'http://192.0.2.235/',
         URLInfo.parse('https://0xC00002EB/').url
     )
     self.assertEqual(
         'http://192.0.2.235/',
         URLInfo.parse('https://3221226219/').url
     )
     self.assertEqual(
         'http://192.0.2.235/',
         URLInfo.parse('https://030000001353/').url
     )
     self.assertEqual(
         'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6',
         URLInfo.parse(
             'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6'
         ).url
     )
     self.assertEqual(
         'https://[::1]/',
         URLInfo.parse('https://[0:0:0:0:0:0:0:1]').url
     )
     self.assertEqual(
         'https://[::ffff:192.0.2.128]/',
         URLInfo.parse('https://[::ffff:c000:0280]').url
     )
开发者ID:Willianvdv,项目名称:wpull,代码行数:35,代码来源:url_test.py


示例12: parse_url

    def parse_url(cls, url, encoding):
        '''Parse and return a URLInfo.

        This function logs a warning if the URL cannot be parsed and returns
        None.
        '''
        try:
            url_info = URLInfo.parse(url, encoding=encoding)
            # FIXME: workaround detection of bad URL unsplit. See issue #132.
            URLInfo.parse(url_info.url, encoding=encoding)
        except ValueError as error:
            _logger.warning(__(_('Discarding malformed URL ‘{url}’: {error}.'),
                               url=url, error=error))
        else:
            return url_info
开发者ID:mback2k,项目名称:wpull,代码行数:15,代码来源:processor.py


示例13: test_url_info_reserved_char_is_ok

 def test_url_info_reserved_char_is_ok(self):
     self.assertEqual(
         'http://example.com/@49IMG.DLL/$SESSION$/image.png;large',
         URLInfo.parse(
             'http://example.com/@49IMG.DLL/$SESSION$/image.png;large').url
     )
     self.assertEqual(
         'http://example.com/@49IMG.DLL/$SESSION$/imag%C3%A9.png;large',
         URLInfo.parse(
             'http://example.com/@49IMG.DLL/$SESSION$/imagé.png;large').url
     )
     self.assertEqual(
         'http://example.com/$c/%system.exe/',
         URLInfo.parse('http://example.com/$c/%system.exe/').url
     )
开发者ID:Willianvdv,项目名称:wpull,代码行数:15,代码来源:url_test.py


示例14: test_url_info_parts

    def test_url_info_parts(self):
        url_info = URLInfo.parse(
            'HTTP://userName:pass%[email protected][A::1]:81/ásdF\u200C/ghjK?a=b=c&D#/?')
        self.assertEqual(
            'http://userName:pass:[email protected][a::1]:81/'
            '%C3%A1sdF%E2%80%8C/ghjK?a=b=c&D',
            url_info.url
        )
        self.assertEqual('http', url_info.scheme)
        self.assertEqual('userName:pass%[email protected][A::1]:81',
                         url_info.authority)
        self.assertEqual('/ásdF\u200C/ghjK?a=b=c&D#/?', url_info.resource)
        self.assertEqual('userName', url_info.username)
        self.assertEqual('pass:word', url_info.password)
        self.assertEqual('[A::1]:81', url_info.host)
        self.assertEqual('[a::1]:81', url_info.hostname_with_port)
        self.assertEqual('a::1', url_info.hostname)
        self.assertEqual(81, url_info.port)
        self.assertEqual('/%C3%A1sdF%E2%80%8C/ghjK', url_info.path)
        self.assertEqual('a=b=c&D', url_info.query)
        self.assertEqual('/?', url_info.fragment)
        self.assertEqual('utf-8', url_info.encoding)
        self.assertEqual(
            'HTTP://userName:pass%[email protected][A::1]:81/ásdF\u200C/ghjK?a=b=c&D#/?',
            url_info.raw)
        self.assertEqual(('/%C3%A1sdF%E2%80%8C', 'ghjK'), url_info.split_path())

        url_info = URLInfo.parse(
            'Ftp://N00B:[email protected]/mydocs/'
        )
        self.assertEqual('ftp', url_info.scheme)
        self.assertEqual('N00B:[email protected]',
                         url_info.authority)
        self.assertEqual('/mydocs/', url_info.resource)
        self.assertEqual('N00B', url_info.username)
        self.assertEqual('hunter2', url_info.password)
        self.assertEqual('LocalHost.Example', url_info.host)
        self.assertEqual('localhost.example', url_info.hostname_with_port)
        self.assertEqual('localhost.example', url_info.hostname)
        self.assertEqual(21, url_info.port)
        self.assertEqual('/mydocs/', url_info.path)
        self.assertFalse(url_info.query)
        self.assertFalse(url_info.fragment)
        self.assertEqual('utf-8', url_info.encoding)
        self.assertEqual(
            'Ftp://N00B:[email protected]/mydocs/',
            url_info.raw)
        self.assertEqual(('/mydocs', ''), url_info.split_path())
开发者ID:asergi,项目名称:wpull,代码行数:48,代码来源:url_test.py


示例15: test_append_slash_to_path_url

 def test_append_slash_to_path_url(self):
     self.assertEqual(
         'ftp://putfile.com/example/',
         append_slash_to_path_url(
             URLInfo.parse('ftp://putfile.com/example')
         )
     )
开发者ID:Super-Rad,项目名称:wpull,代码行数:7,代码来源:ftp_test.py


示例16: test_http_filter

    def test_http_filter(self):
        mock_record = MockURLTableRecord()

        url_filter = HTTPFilter()
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.net'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('mailto:[email protected]'),
            mock_record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse("javascript:alert('hello!')"),
            mock_record
        ))
开发者ID:DanielOaks,项目名称:wpull,代码行数:16,代码来源:url_test.py


示例17: fetch_robots_txt

    def fetch_robots_txt(self, request, file=None):
        '''Fetch the robots.txt file for the request.

        Coroutine.
        '''
        url_info = request.url_info
        url = URLInfo.parse('{0}://{1}/robots.txt'.format(
            url_info.scheme, url_info.hostname_with_port)).url

        if not file:
            file = wpull.body.new_temp_file(os.getcwd(), hint='robots')

        with contextlib.closing(file):
            request = self._web_client.request_factory(url)

            session = self._web_client.session(request)
            while not session.done():
                wpull.util.truncate_file(file.name)

                try:
                    response = yield From(session.fetch(file=file))
                except ProtocolError:
                    self._accept_as_blank(url_info)

                    return

            status_code = response.status_code

            if 500 <= status_code <= 599:
                raise ServerError('Server returned error for robots.txt.')

            if status_code == 200:
                self._read_content(response, url_info)
            else:
                self._accept_as_blank(url_info)
开发者ID:Willianvdv,项目名称:wpull,代码行数:35,代码来源:robots.py


示例18: _get_cookie_referrer_host

    def _get_cookie_referrer_host(self):
        """Return the referrer hostname."""
        referer = self._original_request.fields.get("Referer")

        if referer:
            return URLInfo.parse(referer).hostname
        else:
            return None
开发者ID:pombredanne,项目名称:wpull,代码行数:8,代码来源:web.py


示例19: test_url_info_query

 def test_url_info_query(self):
     self.assertEqual(
         'http://example.com/?a=',
         URLInfo.parse('http://example.com?a=').url
     )
     self.assertEqual(
         'http://example.com/?a=1',
         URLInfo.parse('http://example.com?a=1').url
     )
     self.assertEqual(
         'http://example.com/?a=1&b',
         URLInfo.parse('http://example.com?a=1&b').url
     )
     self.assertEqual(
         'http://example.com/?a=1&b=',
         URLInfo.parse('http://example.com?a=1&b=').url
     )
开发者ID:Willianvdv,项目名称:wpull,代码行数:17,代码来源:url_test.py


示例20: test_url_info_to_dict

 def test_url_info_to_dict(self):
     url_info = URLInfo.parse('https://example.com/file.jpg')
     url_info_dict = url_info.to_dict()
     self.assertEqual('/file.jpg', url_info_dict['path'])
     self.assertEqual('example.com', url_info_dict['hostname'])
     self.assertEqual('https', url_info_dict['scheme'])
     self.assertEqual(443, url_info_dict['port'])
     self.assertEqual('utf-8', url_info_dict['encoding'])
开发者ID:Willianvdv,项目名称:wpull,代码行数:8,代码来源:url_test.py



注:本文中的wpull.url.URLInfo类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python wrapped.Styled类代码示例发布时间:2022-05-26
下一篇:
Python scraper.HTMLScraper类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap