• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python step._interpret_hadoop_jar_command_stderr函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mrjob.logs.step._interpret_hadoop_jar_command_stderr函数的典型用法代码示例。如果您正苦于以下问题:Python _interpret_hadoop_jar_command_stderr函数的具体用法?Python _interpret_hadoop_jar_command_stderr怎么用?Python _interpret_hadoop_jar_command_stderr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了_interpret_hadoop_jar_command_stderr函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_yarn_error

    def test_yarn_error(self):
        lines = [
            "16/01/22 19:14:16 INFO mapreduce.Job: Task Id :"
            " attempt_1453488173054_0001_m_000000_0, Status : FAILED\n",
            "Error: java.lang.RuntimeException: PipeMapRed" ".waitOutputThreads(): subprocess failed with code 1\n",
            "\tat org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:330)\n",
            "\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished" "(PipeMapRed.java:543)\n",
            "\n",
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(
                errors=[
                    dict(
                        attempt_id="attempt_1453488173054_0001_m_000000_0",
                        hadoop_error=dict(
                            message=(
                                "Error: java.lang.RuntimeException: PipeMapRed"
                                ".waitOutputThreads(): subprocess failed with"
                                " code 1\n\tat org.apache.hadoop.streaming"
                                ".PipeMapRed.waitOutputThreads(PipeMapRed.java"
                                ":330)\n\tat org.apache.hadoop.streaming"
                                ".PipeMapRed.mapRedFinished(PipeMapRed.java"
                                ":543)"
                            ),
                            num_lines=5,
                            start_line=0,
                        ),
                        # task ID is implied by attempt ID
                        task_id="task_1453488173054_0001_m_000000",
                    )
                ]
            ),
        )
开发者ID:davidmarin,项目名称:mrjob,代码行数:35,代码来源:test_step.py


示例2: test_lines_can_be_bytes

 def test_lines_can_be_bytes(self):
     self.assertEqual(
         _interpret_hadoop_jar_command_stderr(
             [b"15/12/11 13:33:11 INFO mapreduce.Job:" b" Running job: job_1449857544442_0002\n"]
         ),
         dict(job_id="job_1449857544442_0002"),
     )
开发者ID:davidmarin,项目名称:mrjob,代码行数:7,代码来源:test_step.py


示例3: test_yarn_error_without_exception

    def test_yarn_error_without_exception(self):
        # when there's no exception, just use the whole line as the message
        lines = [
            '16/01/22 19:14:16 INFO mapreduce.Job: Task Id :'
            ' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n',
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(
                errors=[
                    dict(
                        attempt_id='attempt_1453488173054_0001_m_000000_0',
                        hadoop_error=dict(
                            message=(
                                'Task Id :'
                                ' attempt_1453488173054_0001_m_000000_0,'
                                ' Status : FAILED'
                            ),
                            num_lines=1,
                            start_line=0,
                        ),
                        # task ID is implied by attempt ID
                        task_id='task_1453488173054_0001_m_000000',
                    )
                ]
            ))
开发者ID:Affirm,项目名称:mrjob,代码行数:27,代码来源:test_step.py


示例4: test_record_callback

    def test_record_callback(self):
        records = []

        def record_callback(record):
            records.append(record)

        lines = [
            "packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop"
            "-unjar7873615084086492115/] []"
            " /tmp/streamjob737002412080260811.jar tmpDir=null\n",
            "15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n",
            "Streaming Command Failed!\n",
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines, record_callback=record_callback),
            dict(job_id="job_1449857544442_0002"),
        )

        self.assertEqual(
            records,
            [
                dict(
                    caller_location="",
                    level="",
                    logger="",
                    message=(
                        "packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop"
                        "-unjar7873615084086492115/] []"
                        " /tmp/streamjob737002412080260811.jar"
                        " tmpDir=null"
                    ),
                    num_lines=1,
                    start_line=0,
                    thread="",
                    timestamp="",
                ),
                dict(
                    caller_location="",
                    level="INFO",
                    logger="mapreduce.Job",
                    message="Running job: job_1449857544442_0002",
                    num_lines=1,
                    start_line=1,
                    thread="",
                    timestamp="15/12/11 13:33:11",
                ),
                dict(
                    caller_location="",
                    level="",
                    logger="",
                    message="Streaming Command Failed!",
                    num_lines=1,
                    start_line=2,
                    thread="",
                    timestamp="",
                ),
            ],
        )
开发者ID:davidmarin,项目名称:mrjob,代码行数:59,代码来源:test_step.py


示例5: test_treat_eio_as_eof

    def test_treat_eio_as_eof(self):
        def yield_lines():
            yield ("15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n")
            e = IOError()
            e.errno = errno.EIO
            raise e

        self.assertEqual(_interpret_hadoop_jar_command_stderr(yield_lines()), dict(job_id="job_1449857544442_0002"))
开发者ID:davidmarin,项目名称:mrjob,代码行数:8,代码来源:test_step.py


示例6: test_infer_job_id_from_application_id

    def test_infer_job_id_from_application_id(self):
        lines = [
            "15/12/11 13:32:45 INFO impl.YarnClientImpl:" " Submitted application application_1449857544442_0002\n"
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(application_id="application_1449857544442_0002", job_id="job_1449857544442_0002"),
        )
开发者ID:davidmarin,项目名称:mrjob,代码行数:9,代码来源:test_step.py


示例7: test_record_callback

    def test_record_callback(self):
        records = []

        def record_callback(record):
            records.append(record)

        lines = [
            'packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop'
            '-unjar7873615084086492115/] []'
            ' /tmp/streamjob737002412080260811.jar tmpDir=null\n',
            '15/12/11 13:33:11 INFO mapreduce.Job:'
            ' Running job: job_1449857544442_0002\n',
            'Streaming Command Failed!\n',
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(
                lines, record_callback=record_callback),
            dict(job_id='job_1449857544442_0002'))

        self.assertEqual(
            records,
            [
                dict(
                    caller_location='',
                    level='',
                    logger='',
                    message=('packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop'
                             '-unjar7873615084086492115/] []'
                             ' /tmp/streamjob737002412080260811.jar'
                             ' tmpDir=null'),
                    num_lines=1,
                    start_line=0,
                    thread='',
                    timestamp='',
                ),
                dict(
                    caller_location='',
                    level='INFO',
                    logger='mapreduce.Job',
                    message='Running job: job_1449857544442_0002',
                    num_lines=1,
                    start_line=1,
                    thread='',
                    timestamp='15/12/11 13:33:11',
                ),
                dict(
                    caller_location='',
                    level='',
                    logger='',
                    message='Streaming Command Failed!',
                    num_lines=1,
                    start_line=2,
                    thread='',
                    timestamp='',
                ),
            ])
开发者ID:Affirm,项目名称:mrjob,代码行数:57,代码来源:test_step.py


示例8: test_yarn_error

    def test_yarn_error(self):
        lines = [
            '16/01/22 19:14:16 INFO mapreduce.Job: Task Id :'
            ' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n',
            'Error: java.lang.RuntimeException: PipeMapRed'
            '.waitOutputThreads(): subprocess failed with code 1\n',
            '\tat org.apache.hadoop.streaming.PipeMapRed'
            '.waitOutputThreads(PipeMapRed.java:330)\n',
            '\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished'
            '(PipeMapRed.java:543)\n',
            '\n',
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(
                errors=[
                    dict(
                        attempt_id='attempt_1453488173054_0001_m_000000_0',
                        hadoop_error=dict(
                            message=(
                                'Error: java.lang.RuntimeException: PipeMapRed'
                                '.waitOutputThreads(): subprocess failed with'
                                ' code 1\n\tat org.apache.hadoop.streaming'
                                '.PipeMapRed.waitOutputThreads(PipeMapRed.java'
                                ':330)\n\tat org.apache.hadoop.streaming'
                                '.PipeMapRed.mapRedFinished(PipeMapRed.java'
                                ':543)'
                            ),
                            num_lines=5,
                            start_line=0,
                        ),
                        # task ID is implied by attempt ID
                        task_id='task_1453488173054_0001_m_000000',
                    )
                ]
            ))
开发者ID:Affirm,项目名称:mrjob,代码行数:37,代码来源:test_step.py


示例9: _run_job_in_hadoop

    def _run_job_in_hadoop(self):
        for step_num, step in enumerate(self._get_steps()):
            self._warn_about_spark_archives(step)

            step_args = self._args_for_step(step_num)
            env = _fix_env(self._env_for_step(step_num))

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d...' %
                     (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))
            log.debug('  with environment: %r' % sorted(env.items()))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr,
                    record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_unicode(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvpe(step_args[0], step_args, env)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._step_output_uri(step_num))

            log_interpretation['step'] = step_interpretation

            step_type = step['type']

            if not _is_spark_step_type(step_type):
                counters = self._pick_counters(log_interpretation, step_type)
                if counters:
                    log.info(_format_counters(counters))
                else:
                    log.warning('No counters found')

            if returncode:
                error = self._pick_error(log_interpretation, step_type)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                # use CalledProcessError's well-known message format
                reason = str(CalledProcessError(returncode, step_args))
                raise StepFailedException(
                    reason=reason, step_num=step_num,
                    num_steps=self._num_steps())
开发者ID:okomestudio,项目名称:mrjob,代码行数:84,代码来源:hadoop.py


示例10: test_yarn

 def test_yarn(self):
     self.assertEqual(_interpret_hadoop_jar_command_stderr(YARN_STEP_LOG_LINES), PARSED_YARN_STEP_LOG_LINES)
开发者ID:davidmarin,项目名称:mrjob,代码行数:2,代码来源:test_step.py


示例11: test_empty

 def test_empty(self):
     self.assertEqual(_interpret_hadoop_jar_command_stderr([]), {})
开发者ID:davidmarin,项目名称:mrjob,代码行数:2,代码来源:test_step.py


示例12: _run_job_in_hadoop

    def _run_job_in_hadoop(self):
        for step_num in range(self._num_steps()):
            step_args = self._args_for_step(step_num)

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d' %
                      (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr,
                    record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_string(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvp(step_args[0], step_args)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._hdfs_step_output_dir(step_num))

            log_interpretation['step'] = step_interpretation

            if 'counters' not in step_interpretation:
                log.info('Attempting to read counters from history log')
                self._interpret_history_log(log_interpretation)

            # just print counters for this one step
            self._print_counters(step_nums=[step_num])

            if returncode:
                error = self._pick_error(log_interpretation)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                raise CalledProcessError(returncode, step_args)
开发者ID:imtiaz39,项目名称:mrjob,代码行数:74,代码来源:hadoop.py



注:本文中的mrjob.logs.step._interpret_hadoop_jar_command_stderr函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python options.add_basic_opts函数代码示例发布时间:2022-05-27
下一篇:
Python local.LocalMRJobRunner类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap