• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python logging.__函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中wpull.backport.logging.__函数的典型用法代码示例。如果您正苦于以下问题:Python __函数的具体用法?Python __怎么用?Python __使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了__函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: snapshot

    def snapshot(self, remote, html_path=None, render_path=None):
        '''Take HTML and PDF snapshot.'''
        content = yield remote.eval('page.content')
        url = yield remote.eval('page.url')

        if html_path:
            _logger.debug(__('Saving snapshot to {0}.', html_path))
            dir_path = os.path.abspath(os.path.dirname(html_path))

            if not os.path.exists(dir_path):
                os.makedirs(dir_path)

            with open(html_path, 'wb') as out_file:
                out_file.write(content.encode('utf-8'))

            if self._warc_recorder:
                self._add_warc_snapshot(html_path, 'text/html', url)

        if render_path:
            _logger.debug(__('Saving snapshot to {0}.', render_path))
            yield remote.call('page.render', render_path)

            if self._warc_recorder:
                self._add_warc_snapshot(render_path, 'application/pdf', url)

        raise tornado.gen.Return(content)
开发者ID:mback2k,项目名称:wpull,代码行数:26,代码来源:processor.py


示例2: process

    def process(self, session: AppSession):
        self._debug_log_registered_hooks(session)
        internal_plugin_path = get_package_filename(os.path.join('application', 'plugins'))
        plugin_locations = [internal_plugin_path]

        plugin_filenames = []

        if session.args.plugin_script:
            plugin_filenames.append(session.args.plugin_script)

        locator = PluginLocator(plugin_locations, plugin_filenames)

        session.plugin_manager = PluginManager(plugin_locator=locator)
        session.plugin_manager.collectPlugins()

        for plugin_info in session.plugin_manager.getAllPlugins():
            if plugin_info.path.startswith(internal_plugin_path):
                _logger.debug(__(
                    _('Found plugin {name} from {filename}.'),
                    filename=plugin_info.path,
                    name=plugin_info.name
                ))
            else:
                _logger.info(__(
                    _('Found plugin {name} from {filename}.'),
                    filename=plugin_info.path,
                    name=plugin_info.name
                ))

            plugin_info.plugin_object.app_session = session

            if plugin_info.plugin_object.should_activate():
                session.plugin_manager.activatePluginByName(plugin_info.name)
                self._connect_plugin_hooks(session, plugin_info.plugin_object)
开发者ID:Super-Rad,项目名称:wpull,代码行数:34,代码来源:plugin.py


示例3: _polling_sleep

    def _polling_sleep(cls, resource_monitor, log=False):
        for counter in itertools.count():
            resource_info = resource_monitor.check()

            if not resource_info:
                if log and counter:
                    _logger.info(_('Situation cleared.'))

                break

            if log and counter % 15 == 0:
                if resource_info.path:
                    _logger.warning(__(
                        _('Low disk space on {path} ({size} free).'),
                        path=resource_info.path,
                        size=wpull.string.format_size(resource_info.free)
                    ))
                else:
                    _logger.warning(__(
                        _('Low memory ({size} free).'),
                        size=wpull.string.format_size(resource_info.free)
                    ))

                _logger.warning(_('Waiting for operator to clear situation.'))

            yield from asyncio.sleep(60)
开发者ID:Super-Rad,项目名称:wpull,代码行数:26,代码来源:resmon.py


示例4: _scrape_document

    def _scrape_document(self, request, response, url_item):
        to_native = self.to_script_native_type
        url_info_dict = to_native(request.url_info.to_dict())
        document_info_dict = to_native(response.body.to_dict())
        filename = to_native(response.body.content_file.name)

        new_url_dicts = self.callbacks.get_urls(
            filename, url_info_dict, document_info_dict)

        _logger.debug(__('Hooked scrape returned {0}', new_url_dicts))

        if not new_url_dicts:
            return

        if to_native(1) in new_url_dicts:
            # Lua doesn't have sequences
            for i in itertools.count(1):
                new_url_dict = new_url_dicts[to_native(i)]

                _logger.debug(__('Got lua new url info {0}', new_url_dict))

                if new_url_dict is None:
                    break

                self._add_hooked_url(url_item, new_url_dict)
        else:
            for new_url_dict in new_url_dicts:
                self._add_hooked_url(url_item, new_url_dict)
开发者ID:mback2k,项目名称:wpull,代码行数:28,代码来源:hook.py


示例5: _check_resource_monitor

    def _check_resource_monitor(self):
        if not self._resource_monitor:
            return

        for counter in itertools.count():
            resource_info = self._resource_monitor.check()

            if not resource_info:
                if counter:
                    _logger.info(_('Situation cleared.'))
                break

            if counter % 15 == 0:
                if resource_info.path:
                    _logger.warning(__(
                        _('Low disk space on {path} ({size} free).'),
                        path=resource_info.path,
                        size=wpull.string.format_size(resource_info.free)
                    ))
                else:
                    _logger.warning(__(
                        _('Low memory ({size} free).'),
                        size=wpull.string.format_size(resource_info.free)
                    ))

                _logger.warning(_('Waiting for operator to clear situation.'))

            yield From(trollius.sleep(60))
开发者ID:Willianvdv,项目名称:wpull,代码行数:28,代码来源:engine.py


示例6: _read_input_urls

    def _read_input_urls(cls, session: AppSession, default_scheme='http'):
        '''Read the URLs provided by the user.'''

        url_string_iter = session.args.urls or ()
        # FIXME: url rewriter isn't created yet
        url_rewriter = session.factory.get('URLRewriter')

        if session.args.input_file:
            if session.args.force_html:
                lines = cls._input_file_as_html_links(session)
            else:
                lines = cls._input_file_as_lines(session)

            url_string_iter = itertools.chain(url_string_iter, lines)

        base_url = session.args.base

        for url_string in url_string_iter:
            _logger.debug(__('Parsing URL {0}', url_string))

            if base_url:
                url_string = wpull.url.urljoin(base_url, url_string)

            url_info = wpull.url.URLInfo.parse(
                url_string, default_scheme=default_scheme)

            _logger.debug(__('Parsed URL {0}', url_info))

            if url_rewriter:
                # TODO: this logic should be a hook
                url_info = url_rewriter.rewrite(url_info)
                _logger.debug(__('Rewritten URL {0}', url_info))

            yield url_info
开发者ID:Super-Rad,项目名称:wpull,代码行数:34,代码来源:database.py


示例7: _make_socket

    def _make_socket(self):
        '''Make and wrap the socket with an IOStream.'''
        host, port = self._original_address

        family, self._resolved_address = yield self._resolver.resolve(
            host, port)

        self._socket = socket.socket(family, socket.SOCK_STREAM)

        _logger.debug(__('Socket to {0}/{1}.', family, self._resolved_address))

        if self._params.bind_address:
            _logger.debug(__(
                'Binding socket to {0}', self._params.bind_address
            ))
            self._socket.bind(self._params.bind_address)

        if self._ssl:
            self._io_stream = SSLIOStream(
                self._socket,
                max_buffer_size=self._params.buffer_size,
                rw_timeout=self._params.read_timeout,
                ssl_options=self._params.ssl_options or {},
                server_hostname=host,
            )
        else:
            self._io_stream = IOStream(
                self._socket,
                rw_timeout=self._params.read_timeout,
                max_buffer_size=self._params.buffer_size,
            )

        self._io_stream.set_close_callback(self._stream_closed_callback)
开发者ID:nwpu063291,项目名称:wpull,代码行数:33,代码来源:connection.py


示例8: resolve_all

    def resolve_all(self, host, port=0):
        '''Resolve hostname and return a list of results.

        Args:
            host (str): The hostname.
            port (int): The port number.

        Returns:
            list: A list of tuples where each tuple contains the family and
            the socket address. See :method:`resolve` for the socket address
            format.
        '''
        _logger.debug(__('Lookup address {0} {1}.', host, port))

        host = self._lookup_hook(host, port)
        results = None

        if self._cache:
            results = self._get_cache(host, port, self._family)

        if results is None:
            results = yield From(self._resolve_from_network(host, port))

        if self._cache:
            self._put_cache(host, port, results)

        if not results:
            raise DNSNotFound(
                "DNS resolution for {0} did not return any results."
                .format(repr(host))
            )

        _logger.debug(__('Resolved addresses: {0}.', results))

        raise Return(results)
开发者ID:Willianvdv,项目名称:wpull,代码行数:35,代码来源:dns.py


示例9: process

    def process(self, item_session: ItemSession, request, response, file_writer_session):
        '''Process PhantomJS.

        Coroutine.
        '''
        if response.status_code != 200:
            return

        if not HTMLReader.is_supported(request=request, response=response):
            return

        _logger.debug('Starting PhantomJS processing.')

        self._file_writer_session = file_writer_session

        # FIXME: this is a quick hack for crashes. See #137.
        attempts = int(os.environ.get('WPULL_PHANTOMJS_TRIES', 5))

        for dummy in range(attempts):
            try:
                yield from self._run_driver(item_session, request, response)
            except asyncio.TimeoutError:
                _logger.warning(_('Waiting for page load timed out.'))
                break
            except PhantomJSCrashed as error:
                _logger.exception(__('PhantomJS crashed: {}', error))
            else:
                break
        else:
            _logger.warning(__(
                _('PhantomJS failed to fetch ‘{url}’. I am sorry.'),
                url=request.url_info.url
            ))
开发者ID:Super-Rad,项目名称:wpull,代码行数:33,代码来源:phantomjs.py


示例10: control

    def control(self, remote):
        '''Scroll the page.'''
        num_scrolls = self._num_scrolls

        if self._smart_scroll:
            is_page_dynamic = yield remote.call('isPageDynamic')

            if not is_page_dynamic:
                num_scrolls = 0

        url = yield remote.eval('page.url')
        total_scroll_count = 0

        for scroll_count in range(num_scrolls):
            _logger.debug(__('Scrolling page. Count={0}.', scroll_count))

            pre_scroll_counter_values = remote.resource_counter.values()

            scroll_position = yield remote.eval('page.scrollPosition')
            scroll_position['top'] += self._viewport_size[1]

            yield self.scroll_to(remote, 0, scroll_position['top'])

            total_scroll_count += 1

            self._log_action('wait', self._wait_time)
            yield wpull.async.sleep(self._wait_time)

            post_scroll_counter_values = remote.resource_counter.values()

            _logger.debug(__(
                'Counter values pre={0} post={1}',
                pre_scroll_counter_values,
                post_scroll_counter_values
            ))

            if post_scroll_counter_values == pre_scroll_counter_values \
               and self._smart_scroll:
                break

        for dummy in range(remote.resource_counter.pending):
            if remote.resource_counter.pending:
                self._log_action('wait', self._wait_time)
                yield wpull.async.sleep(self._wait_time)
            else:
                break

        yield self.scroll_to(remote, 0, 0)

        _logger.info(__(
            gettext.ngettext(
                'Scrolled page {num} time.',
                'Scrolled page {num} times.',
                total_scroll_count,
            ), num=total_scroll_count
        ))

        if self._warc_recorder:
            self._add_warc_action_log(url)
开发者ID:mback2k,项目名称:wpull,代码行数:59,代码来源:processor.py


示例11: run

    def run(self):
        scrape_snapshot_path = self._get_temp_path('phantom', suffix='.html')
        action_log_path = self._get_temp_path('phantom-action', suffix='.txt')
        event_log_path = self._get_temp_path('phantom-event', suffix='.txt')
        snapshot_paths = [scrape_snapshot_path]
        snapshot_paths.extend(self._get_snapshot_paths())
        url = self._item_session.url_record.url

        driver_params = PhantomJSDriverParams(
            url=url,
            snapshot_paths=snapshot_paths,
            wait_time=self._params.wait_time,
            num_scrolls=self._params.num_scrolls,
            smart_scroll=self._params.smart_scroll,
            snapshot=self._params.snapshot,
            viewport_size=self._params.viewport_size,
            paper_size=self._params.paper_size,
            event_log_filename=event_log_path,
            action_log_filename=action_log_path,
            custom_headers=self._params.custom_headers,
            page_settings=self._params.page_settings,
        )

        driver = self._phantomjs_driver_factory(params=driver_params)

        _logger.info(__(
            _('PhantomJS fetching ‘{url}’.'),
            url=url
        ))

        with contextlib.closing(driver):
            yield from driver.start()

            # FIXME: we don't account that things might be scrolling and
            # downloading so it might not be a good idea to timeout like
            # this
            if self._params.load_time:
                yield from asyncio.wait_for(
                    driver.process.wait(), self._params.load_time
                )
            else:
                yield from driver.process.wait()

            if driver.process.returncode != 0:
                raise PhantomJSCrashed(
                    'PhantomJS exited with code {}'
                    .format(driver.process.returncode)
                )

        if self._warc_recorder:
            self._add_warc_action_log(action_log_path, url)
            for path in snapshot_paths:
                self._add_warc_snapshot(path, url)

        _logger.info(__(
            _('PhantomJS fetched ‘{url}’.'),
            url=url
        ))
开发者ID:Super-Rad,项目名称:wpull,代码行数:58,代码来源:phantomjs.py


示例12: write_record

    def write_record(self, record):
        '''Append the record to the WARC file.'''
        # FIXME: probably not a good idea to modifiy arguments passed to us
        # TODO: add extra gzip headers that wget uses
        record.fields['WARC-Warcinfo-ID'] = self._warcinfo_record.fields[
            WARCRecord.WARC_RECORD_ID]

        _logger.debug(__('Writing WARC record {0}.',
                         record.fields['WARC-Type']))

        if self._params.compress:
            open_func = gzip.GzipFile
        else:
            open_func = open

        # Use getsize to get actual file size. Avoid tell() because it may
        # not be the raw file position.
        if os.path.exists(self._warc_filename):
            before_offset = os.path.getsize(self._warc_filename)
        else:
            before_offset = 0

        journal_filename = self._warc_filename + '-wpullinc'

        with open(journal_filename, 'w') as file:
            file.write('wpull-journal-version:1\n')
            file.write('offset:{}\n'.format(before_offset))

        try:
            with open_func(self._warc_filename, mode='ab') as out_file:
                for data in record:
                    out_file.write(data)
        except (OSError, IOError) as error:
            _logger.info(__(
                _('Rolling back file {filename} to length {length}.'),
                filename=self._warc_filename, length=before_offset
            ))
            with open(self._warc_filename, mode='wb') as out_file:
                out_file.truncate(before_offset)

            raise error
        finally:
            os.remove(journal_filename)

        after_offset = os.path.getsize(self._warc_filename)

        if self._cdx_filename:
            raw_file_offset = before_offset
            raw_file_record_size = after_offset - before_offset

            self._write_cdx_field(
                record, raw_file_record_size, raw_file_offset
            )
开发者ID:asergi,项目名称:wpull,代码行数:53,代码来源:warc.py


示例13: _load_ca_certs

    def _load_ca_certs(cls, session: AppSession, clean: bool=True):
        '''Load the Certificate Authority certificates.
        '''
        args = session.args

        if session.ca_certs_filename:
            return session.ca_certs_filename

        certs = set()

        if args.use_internal_ca_certs:
            pem_filename = os.path.join(
                os.path.dirname(__file__), '..', '..', 'cert', 'ca-bundle.pem'
            )
            certs.update(cls._read_pem_file(pem_filename, from_package=True))

        if args.ca_directory:
            if os.path.isdir(args.ca_directory):
                for filename in os.listdir(args.ca_directory):
                    if os.path.isfile(filename):
                        certs.update(cls._read_pem_file(filename))
            else:
                _logger.warning(__(
                    _('Certificate directory {path} does not exist.'),
                    path=args.ca_directory
                ))

        if args.ca_certificate:
            if os.path.isfile(args.ca_certificate):
                certs.update(cls._read_pem_file(args.ca_certificate))
            else:
                _logger.warning(__(
                    _('Certificate file {path} does not exist.'),
                    path=args.ca_certificate
                ))

        session.ca_certs_filename = certs_filename = tempfile.mkstemp(
            suffix='.pem', prefix='tmp-wpull-')[1]

        def clean_certs_file():
            os.remove(certs_filename)

        if clean:
            atexit.register(clean_certs_file)

        with open(certs_filename, 'w+b') as certs_file:
            for cert in certs:
                certs_file.write(cert)

        _logger.debug('CA certs loaded.')
开发者ID:Super-Rad,项目名称:wpull,代码行数:50,代码来源:sslcontext.py


示例14: _process_url_item

    def _process_url_item(self, url_item):
        '''Process an item.

        Args:
            url_item (:class:`.item.URLItem`): The item to process.

        This function calls :meth:`.processor.BaseProcessor.process`.
        '''
        _logger.debug(__('Begin session for {0} {1}.',
                         url_item.url_record, url_item.url_info))

        yield self._processor.process(url_item)

        _logger.debug(__('End session for {0} {1}.',
                         url_item.url_record, url_item.url_info))
开发者ID:mback2k,项目名称:wpull,代码行数:15,代码来源:engine.py


示例15: _read_content

    def _read_content(self, response, original_url_info):
        '''Read response and parse the contents into the pool.'''
        data = response.body.read(4096)
        url_info = original_url_info

        try:
            self._robots_txt_pool.load_robots_txt(url_info, data)
        except ValueError:
            _logger.warning(__(
                _('Failed to parse {url} for robots exclusion rules. '
                  'Ignoring.'), url_info.url))
            self._accept_as_blank(url_info)
        else:
            _logger.debug(__('Got a good robots.txt for {0}.',
                             url_info.url))
开发者ID:Willianvdv,项目名称:wpull,代码行数:15,代码来源:robots.py


示例16: _connect

    def _connect(self):
        '''Connect the socket if not already connected.'''
        if self.connected:
            # Reset the callback so the context does not leak to another
            self._io_stream.set_close_callback(self._stream_closed_callback)
            return

        yield self._make_socket()

        _logger.debug(__('Connecting to {0}.', self._resolved_address))
        try:
            yield self._io_stream.connect(
                self._resolved_address, timeout=self._params.connect_timeout
            )
        except (tornado.netutil.SSLCertificateError,
                SSLVerficationError) as error:
            raise SSLVerficationError('Certificate error: {error}'.format(
                error=error)) from error
        except (ssl.SSLError, socket.error) as error:
            if error.errno == errno.ECONNREFUSED:
                raise ConnectionRefused('Connection refused: {error}'.format(
                    error=error)) from error
            else:
                raise NetworkError('Connection error: {error}'.format(
                    error=error)) from error
        else:
            _logger.debug('Connected.')
开发者ID:nwpu063291,项目名称:wpull,代码行数:27,代码来源:connection.py


示例17: scrape

    def scrape(self, request, response, link_type=None):
        if not self.is_supported(request=request, response=response):
            return
        if link_type and link_type != LinkType.css:
            return

        link_contexts = set()
        base_url = request.url_info.url
        encoding = self._encoding_override or \
            detect_response_encoding(response)

        try:
            with wpull.util.reset_file_offset(response.body):
                for link, context in self.iter_processed_links(
                        response.body, encoding, base_url, context=True):
                    if context == 'import':
                        link_type = LinkType.css
                    else:
                        link_type = LinkType.media

                    link_contexts.add(LinkContext(link, inline=True, link_type=link_type))

        except UnicodeError as error:
            _logger.warning(__(
                _('Failed to read document at ‘{url}’: {error}'),
                url=request.url_info.url, error=error
            ))

        return ScrapeResult(link_contexts, encoding)
开发者ID:Willianvdv,项目名称:wpull,代码行数:29,代码来源:css.py


示例18: _run_worker

    def _run_worker(self):
        '''Run a single consumer.

        Coroutine.
        '''
        _logger.debug('Worker start.')

        while True:
            priority, item = yield From(self._item_queue.get())

            if item == self.POISON_PILL:
                _logger.debug('Worker quitting.')
                return

            else:
                _logger.debug(__('Processing item {0}.', item))
                self._item_get_semaphore.release()
                self._token_queue.get_nowait()
                yield From(self._process_item(item))
                self._token_queue.task_done()

                if os.environ.get('OBJGRAPH_DEBUG'):
                    import gc
                    import objgraph
                    gc.collect()
                    objgraph.show_most_common_types(25)
                if os.environ.get('FILE_LEAK_DEBUG'):
                    import subprocess
                    output = subprocess.check_output(
                        ['lsof', '-p', str(os.getpid()), '-n'])
                    for line in output.decode('ascii', 'replace').split('\n'):
                        if 'REG' in line and \
                                (os.getcwd() in line or '/tmp/' in line):
                            print('FILELEAK', line)
开发者ID:Willianvdv,项目名称:wpull,代码行数:34,代码来源:engine.py


示例19: _get_next_url_record

    def _get_next_url_record(self):
        '''Return the next available URL from the URL table.

        This function will return items marked as "todo" and then items
        marked as "error". As a consequence, items experiencing errors will
        be done last.

        Returns:
            :class:`.item.URLRecord`.
        '''
        _logger.debug('Get next URL todo.')

        try:
            url_record = self._url_table.check_out(Status.todo)
        except NotFound:
            url_record = None

        if not url_record:
            try:
                _logger.debug('Get next URL error.')
                url_record = self._url_table.check_out(Status.error)
            except NotFound:
                url_record = None

        _logger.debug(__('Return record {0}.', url_record))

        return url_record
开发者ID:Willianvdv,项目名称:wpull,代码行数:27,代码来源:engine.py


示例20: _read_response_by_length

    def _read_response_by_length(self, response):
        '''Read the connection specified by a length.'''
        _logger.debug('Reading body by length.')

        try:
            body_size = int(response.fields['Content-Length'])

            if body_size < 0:
                raise ValueError('Content length cannot be negative.')

        except ValueError as error:
            _logger.warning(__(
                _('Invalid content length: {error}'), error=error
            ))

            yield self._read_response_until_close(response)
            return

        def callback(data):
            self._events.response_data.fire(data)
            response.body.content_file.write(self._decompress_data(data))

        yield self._io_stream.read_bytes(
            body_size, streaming_callback=callback,
        )

        response.body.content_file.write(self._flush_decompressor())
开发者ID:nwpu063291,项目名称:wpull,代码行数:27,代码来源:connection.py



注:本文中的wpull.backport.logging.__函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python request.Request类代码示例发布时间:2022-05-26
下一篇:
Python options.AppArgumentParser类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap