• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python util.to_lines函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mrjob.util.to_lines函数的典型用法代码示例。如果您正苦于以下问题:Python to_lines函数的具体用法?Python to_lines怎么用?Python to_lines使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了to_lines函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: _assert_output_matches

    def _assert_output_matches(
            self, job_class, input_bytes=b'', input_paths=(), job_args=[]):

        # run classes defined in this module in inline mode, classes
        # with their own script files in local mode. used by
        # test_skip_combiner_that_runs_cmd()
        if job_class.__module__ == __name__:
            runner_alias = 'inline'
        else:
            runner_alias = 'local'

        reference_job = self._reference_job(
            job_class, input_bytes=input_bytes,
            input_paths=input_paths,
            job_args=job_args,
            runner_alias=runner_alias)

        with reference_job.make_runner() as runner:
            runner.run()

            reference_output = sorted(to_lines(runner.cat_output()))

        harness_job = self._harness_job(
            job_class, input_bytes=input_bytes,
            input_paths=input_paths,
            job_args=job_args)

        with harness_job.make_runner() as runner:
            runner.run()

            harness_output = sorted(to_lines(runner.cat_output()))

        self.assertEqual(harness_output, reference_output)
开发者ID:Affirm,项目名称:mrjob,代码行数:33,代码来源:test_mrjob_spark_harness.py


示例2: test_mixed_job

    def test_mixed_job(self):
        # test a combination of streaming and spark steps
        job = MRStreamingAndSpark(['-r', 'spark'])
        job.sandbox(stdin=BytesIO(
            b'foo\nbar\n'))

        with job.make_runner() as runner:
            runner.run()

            # converts to 'null\t"foo"', 'null\t"bar"' and then counts chars
            self.assertEqual(
                sorted(to_lines(runner.cat_output())),
                [
                    b'\t 2\n',
                    b'" 4\n',
                    b'a 1\n',
                    b'b 1\n',
                    b'f 1\n',
                    b'l 4\n',
                    b'n 2\n',
                    b'o 2\n',
                    b'r 1\n',
                    b'u 2\n',
                ]
            )
开发者ID:Affirm,项目名称:mrjob,代码行数:25,代码来源:test_runner.py


示例3: test_no_trailing_newline

 def test_no_trailing_newline(self):
     self.assertEqual(
         list(to_lines(iter([
             b'Alouette,\ngentille',
             b' Alouette.',
         ]))),
         [b'Alouette,\n', b'gentille Alouette.'])
开发者ID:Yelp,项目名称:mrjob,代码行数:7,代码来源:test_util.py


示例4: test_python_dash_v_as_python_bin

    def test_python_dash_v_as_python_bin(self):
        python_cmd = cmd_line([sys.executable or 'python', '-v'])
        mr_job = MRTwoStepJob(['--python-bin', python_cmd, '--no-conf',
                               '-r', 'local'])
        mr_job.sandbox(stdin=[b'bar\n'])

        with mr_job.make_runner() as runner:
            runner.run()

            # expect python -v crud in stderr

            with open(runner._task_stderr_path('mapper', 0, 0)) as lines:
                self.assertTrue(any(
                    'import mrjob' in line or  # Python 2
                    "import 'mrjob'" in line
                    for line in lines))

            with open(runner._task_stderr_path('mapper', 0, 0)) as lines:
                self.assertTrue(any(
                    '#' in line for line in lines))

            # should still get expected results
            self.assertEqual(
                sorted(to_lines(runner.cat_output())),
                sorted([b'1\tnull\n', b'1\t"bar"\n']))
开发者ID:Affirm,项目名称:mrjob,代码行数:25,代码来源:test_local.py


示例5: test_loading_bootstrapped_mrjob_library

    def test_loading_bootstrapped_mrjob_library(self):
        # track the dir we're loading mrjob from rather than the full path
        # to deal with edge cases where we load from the .py file,
        # and the script loads from the .pyc compiled from that .py file.
        our_mrjob_dir = os.path.dirname(os.path.realpath(mrjob.__file__))

        with mrjob_conf_patcher():
            mr_job = MRJobWhereAreYou(['-r', 'local', '--bootstrap-mrjob'])
            mr_job.sandbox()

            with mr_job.make_runner() as runner:
                # sanity check
                self.assertEqual(runner._bootstrap_mrjob(), True)
                local_tmp_dir = os.path.realpath(runner._get_local_tmp_dir())

                runner.run()

                output = list(to_lines(runner.cat_output()))
                self.assertEqual(len(output), 1)

                # script should load mrjob from its working dir
                _, script_mrjob_dir = mr_job.parse_output_line(output[0])

                self.assertNotEqual(our_mrjob_dir, script_mrjob_dir)
                self.assertTrue(script_mrjob_dir.startswith(local_tmp_dir))
开发者ID:Affirm,项目名称:mrjob,代码行数:25,代码来源:test_local.py


示例6: test_cat_output

    def test_cat_output(self):
        a_dir_path = os.path.join(self.tmp_dir, 'a')
        b_dir_path = os.path.join(self.tmp_dir, 'b')
        l_dir_path = os.path.join(self.tmp_dir, '_logs')
        os.mkdir(a_dir_path)
        os.mkdir(b_dir_path)
        os.mkdir(l_dir_path)

        a_file_path = os.path.join(a_dir_path, 'part-00000')
        b_file_path = os.path.join(b_dir_path, 'part-00001')
        c_file_path = os.path.join(self.tmp_dir, 'part-00002')
        x_file_path = os.path.join(l_dir_path, 'log.xml')
        y_file_path = os.path.join(self.tmp_dir, '_SUCCESS')

        with open(a_file_path, 'w') as f:
            f.write('A')

        with open(b_file_path, 'w') as f:
            f.write('B')

        with open(c_file_path, 'w') as f:
            f.write('C')

        with open(x_file_path, 'w') as f:
            f.write('<XML XML XML/>')

        with open(y_file_path, 'w') as f:
            f.write('I win')

        runner = InlineMRJobRunner(conf_paths=[], output_dir=self.tmp_dir)
        self.assertEqual(sorted(to_lines(runner.cat_output())),
                         [b'A', b'B', b'C'])
开发者ID:okomestudio,项目名称:mrjob,代码行数:32,代码来源:test_runner.py


示例7: test_buffered_lines

 def test_buffered_lines(self):
     self.assertEqual(
         list(to_lines(chunk for chunk in
                       [b'The quick\nbrown fox\nju',
                        b'mped over\nthe lazy\ndog',
                        b's.\n'])),
         [b'The quick\n', b'brown fox\n', b'jumped over\n', b'the lazy\n',
          b'dogs.\n'])
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:8,代码来源:test_util.py


示例8: test_multiple_2

 def test_multiple_2(self):
     data = b'x\ny\nz\n'
     job = MRCmdJob(['--mapper-cmd=cat', '--reducer-cmd-2', 'wc -l',
                     '--runner=local', '--no-conf'])
     job.sandbox(stdin=BytesIO(data))
     with job.make_runner() as r:
         r.run()
         self.assertEqual(sum(int(l) for l in to_lines(r.cat_output())), 3)
开发者ID:Affirm,项目名称:mrjob,代码行数:8,代码来源:test_local.py


示例9: test_long_lines

 def test_long_lines(self):
     super_long_line = b'a' * 10000 + b'\n' + b'b' * 1000 + b'\nlast\n'
     self.assertEqual(
         list(to_lines(
             chunk for chunk in
             (super_long_line[0 + i:1024 + i]
              for i in range(0, len(super_long_line), 1024)))),
         [b'a' * 10000 + b'\n', b'b' * 1000 + b'\n', b'last\n'])
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:8,代码来源:test_util.py


示例10: parse_output

    def parse_output(self, chunks):
        """Parse the final output of this MRJob (as a stream of byte chunks)
        into a stream of ``(key, value)``.
        """
        read = self.output_protocol().read

        for line in to_lines(chunks):
            yield read(line)
开发者ID:Yelp,项目名称:mrjob,代码行数:8,代码来源:job.py


示例11: test_read_all_non_hidden_files

    def test_read_all_non_hidden_files(self):
        self.makefile(os.path.join(self.output_dir, 'baz'),
                      b'qux\n')

        self.makefile(os.path.join(self.output_dir, 'foo', 'bar'),
                      b'baz\n')

        self.assertEqual(sorted(to_lines(self.runner.cat_output())),
                         [b'baz\n', b'qux\n'])
开发者ID:Affirm,项目名称:mrjob,代码行数:9,代码来源:test_runner.py


示例12: test_output_dir_not_considered_hidden

    def test_output_dir_not_considered_hidden(self):
        output_dir = os.path.join(self.tmp_dir, '_hidden', '_output_dir')

        self.makefile(os.path.join(output_dir, 'part-00000'),
                      b'cats\n')

        runner = InlineMRJobRunner(conf_paths=[], output_dir=output_dir)

        self.assertEqual(sorted(to_lines(runner.stream_output())),
                         [b'cats\n'])
开发者ID:Affirm,项目名称:mrjob,代码行数:10,代码来源:test_runner.py


示例13: test_eof_without_trailing_newline

 def test_eof_without_trailing_newline(self):
     self.assertEqual(
         list(to_lines(iter([
             b'Alouette,\ngentille',
             b' Alouette.',
             b'',  # treated as EOF
             b'Allouette,\nje te p',
             b'lumerais.',
         ]))),
         [b'Alouette,\n', b'gentille Alouette.',
          b'Allouette,\n', b'je te plumerais.'])
开发者ID:Yelp,项目名称:mrjob,代码行数:11,代码来源:test_util.py


示例14: stream_output

    def stream_output(self):
        """Like :py:meth:`cat_output` except that it groups bytes into
        lines. Equivalent to ``mrjob.util.to_lines(runner.stream_output())``.

        .. deprecated:: 0.6.0
        """
        log.warning('stream_output() is deprecated and will be removed in'
                    ' v0.7.0. use mrjob.util.to_lines(runner.cat_output())'
                    ' instead.')

        return to_lines(self.cat_output())
开发者ID:okomestudio,项目名称:mrjob,代码行数:11,代码来源:runner.py


示例15: _cat_log_lines

def _cat_log_lines(fs, path):
    """Yield lines from the given log.

    Log errors rather than raising them.
    """
    try:
        if not fs.exists(path):
            return
        for line in to_lines(fs.cat(path)):
            yield to_unicode(line)
    except (IOError, OSError) as e:
        log.warning("couldn't cat() %s: %r" % (path, e))
开发者ID:Affirm,项目名称:mrjob,代码行数:12,代码来源:wrap.py


示例16: test_no_file_args_required

    def test_no_file_args_required(self):
        words1 = self.makefile('words1', b'kit and caboodle\n')
        words2 = self.makefile('words2', b'baubles\nbangles and beads\n')

        job = MRJobLauncher(
            args=['-r', 'local', tests.sr_wc.__file__, words1, words2])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            lines = list(to_lines(runner.cat_output()))
            self.assertEqual(len(lines), 1)
            self.assertEqual(int(lines[0]), 7)
开发者ID:Affirm,项目名称:mrjob,代码行数:14,代码来源:test_local.py


示例17: test_mixed_job

    def test_mixed_job(self):
        # can we run just the streaming part of a job?
        input_bytes = b'foo\nbar\n'

        job = self._harness_job(
            MRStreamingAndSpark, input_bytes=input_bytes,
            first_step_num=0, last_step_num=0)

        with job.make_runner() as runner:
            runner.run()

            # the streaming part is just an identity mapper, but it converts
            # lines to pairs of JSON
            self.assertEqual(set(to_lines(runner.cat_output())),
                             {b'null\t"foo"\n', b'null\t"bar"\n'})
开发者ID:Affirm,项目名称:mrjob,代码行数:15,代码来源:test_mrjob_spark_harness.py


示例18: test_output_in_subdirs

    def test_output_in_subdirs(self):
        # test for output being placed in subdirs, for example with nicknack
        self.makefile(os.path.join(self.output_dir, 'a', 'part-00000'),
                      b'line-a0\n')
        self.makefile(os.path.join(self.output_dir, 'a', 'part-00001'),
                      b'line-a1\n')

        self.makefile(os.path.join(self.output_dir, 'b', 'part-00000'),
                      b'line-b0\n')

        self.makefile(os.path.join(self.output_dir, 'b', '.crc.part-00000'),
                      b'42\n')

        self.assertEqual(sorted(to_lines(self.runner.cat_output())),
                         [b'line-a0\n', b'line-a1\n', b'line-b0\n'])
开发者ID:Affirm,项目名称:mrjob,代码行数:15,代码来源:test_runner.py


示例19: test_cat_mapper

    def test_cat_mapper(self):
        data = b'x\ny\nz\n'
        job = MRCmdJob(['--mapper-cmd=cat', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'command',
                        'command': 'cat'}}])

            r.run()
            lines = [line.strip() for line in to_lines(r.cat_output())]
            self.assertEqual(sorted(lines), sorted(data.split()))
开发者ID:Affirm,项目名称:mrjob,代码行数:16,代码来源:test_local.py


示例20: test_mapper_pre_filter

    def test_mapper_pre_filter(self):
        data = b'x\ny\nz\n'
        job = MRFilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            lines = [line.strip() for line in to_lines(r.cat_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
开发者ID:Affirm,项目名称:mrjob,代码行数:17,代码来源:test_local.py



注:本文中的mrjob.util.to_lines函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python util.unique函数代码示例发布时间:2022-05-27
下一篇:
Python util.tar_and_gzip函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap