• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python conf.load_opts_from_mrjob_confs函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mrjob.conf.load_opts_from_mrjob_confs函数的典型用法代码示例。如果您正苦于以下问题:Python load_opts_from_mrjob_confs函数的具体用法?Python load_opts_from_mrjob_confs怎么用?Python load_opts_from_mrjob_confs使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load_opts_from_mrjob_confs函数的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_symlink_to_duplicate_conf_path

    def test_symlink_to_duplicate_conf_path(self):
        conf_path = os.path.join(self.tmp_dir, "mrjob.conf")
        with open(conf_path, "w") as f:
            dump_mrjob_conf({}, f)

        conf_symlink_path = os.path.join(self.tmp_dir, "mrjob.conf.symlink")
        os.symlink("mrjob.conf", conf_symlink_path)

        self.assertEqual(load_opts_from_mrjob_confs("foo", [conf_path, conf_symlink_path]), [(conf_symlink_path, {})])

        self.assertEqual(load_opts_from_mrjob_confs("foo", [conf_symlink_path, conf_path]), [(conf_path, {})])
开发者ID:kartheek6,项目名称:mrjob,代码行数:11,代码来源:test_conf.py


示例2: __init__

    def __init__(self, alias, opts, conf_paths):
        """
        :param alias: Runner alias (e.g. ``'local'``)
        :param opts: Keyword args to runner's constructor (usually from the
                     command line).
        :param conf_paths: An iterable of paths to config files
        """
        super(RunnerOptionStore, self).__init__()

        # sanitize incoming options and issue warnings for bad keys
        opts = self.validated_options(opts)

        unsanitized_opt_dicts = load_opts_from_mrjob_confs(
            alias, conf_paths=conf_paths)

        for path, mrjob_conf_opts in unsanitized_opt_dicts:
            self.cascading_dicts.append(self.validated_options(
                mrjob_conf_opts, from_where=(' from %s' % path)))

        self.cascading_dicts.append(opts)

        if (len(self.cascading_dicts) > 2 and
                all(len(d) == 0 for d in self.cascading_dicts[2:-1]) and
                (len(conf_paths or []) > 0)):
            log.warning('No configs specified for %s runner' % alias)

        self.populate_values_from_cascading_dicts()

        log.debug('Active configuration:')
        log.debug(pprint.pformat(self))
开发者ID:parastoo-62,项目名称:mrjob,代码行数:30,代码来源:runner.py


示例3: __init__

    def __init__(self, alias, opts, conf_paths):
        """
        :param alias: Runner alias (e.g. ``'local'``)
        :param opts: Options from the command line
        :param conf_paths: Either a file path or an iterable of paths to config
                           files
        """
        super(RunnerOptionStore, self).__init__()

        # sanitize incoming options and issue warnings for bad keys
        opts = self.validated_options(
            opts, 'Got unexpected keyword arguments: %s')

        unsanitized_opt_dicts = load_opts_from_mrjob_confs(
            alias, conf_paths=conf_paths)

        for path, mrjob_conf_opts in unsanitized_opt_dicts:
            self.cascading_dicts.append(self.validated_options(
                mrjob_conf_opts,
                'Got unexpected opts from %s: %%s' % path))

        self.cascading_dicts.append(opts)

        if (len(self.cascading_dicts) > 2 and
            all(len(d) == 0 for d in self.cascading_dicts[2:-1])):
            log.warning('No configs specified for %s runner' % alias)

        self.populate_values_from_cascading_dicts()

        self._validate_cleanup()
开发者ID:AnthonyNystrom,项目名称:mrjob,代码行数:30,代码来源:runner.py


示例4: test_symlink_to_duplicate_conf_path

    def test_symlink_to_duplicate_conf_path(self):
        conf_path = os.path.join(self.tmp_dir, 'mrjob.conf')
        with open(conf_path, 'w') as f:
            dump_mrjob_conf({}, f)

        conf_symlink_path = os.path.join(self.tmp_dir, 'mrjob.conf.symlink')
        os.symlink('mrjob.conf', conf_symlink_path)

        self.assertEqual(
            load_opts_from_mrjob_confs(
                'foo', [conf_path, conf_symlink_path]),
            [(conf_symlink_path, {})])

        self.assertEqual(
            load_opts_from_mrjob_confs(
                'foo', [conf_symlink_path, conf_path]),
            [(conf_path, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:17,代码来源:test_conf.py


示例5: test_conf_path_order_beats_include

    def test_conf_path_order_beats_include(self):
        conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf')
        conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf')

        with open(conf_path_1, 'w') as f:
            dump_mrjob_conf({}, f)

        with open(conf_path_2, 'w') as f:
            dump_mrjob_conf({}, f)

        # shouldn't matter that conf_path_1 includes conf_path_2
        self.assertEqual(
            load_opts_from_mrjob_confs('foo', [conf_path_1, conf_path_2]),
            [(conf_path_1, {}), (conf_path_2, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:14,代码来源:test_conf.py


示例6: __init__

    def __init__(self, mr_job_script=None, conf_paths=None,
                 extra_args=None, file_upload_args=None,
                 hadoop_input_format=None, hadoop_output_format=None,
                 input_paths=None, output_dir=None, partitioner=None,
                 sort_values=None, stdin=None, step_output_dir=None,
                 **opts):
        """All runners take the following keyword arguments:

        :type mr_job_script: str
        :param mr_job_script: the path of the ``.py`` file containing the
                              :py:class:`~mrjob.job.MRJob`. If this is None,
                              you won't actually be able to :py:meth:`run` the
                              job, but other utilities (e.g. :py:meth:`ls`)
                              will work.
        :type conf_paths: None or list
        :param conf_paths: List of config files to combine and use, or None to
                           search for mrjob.conf in the default locations.
        :type extra_args: list of str
        :param extra_args: a list of extra cmd-line arguments to pass to the
                           mr_job script. This is a hook to allow jobs to take
                           additional arguments.
        :param file_upload_args: a list of tuples of ``('--ARGNAME', path)``.
                                 The file at the given path will be uploaded
                                 to the local directory of the mr_job script
                                 when it runs, and then passed into the script
                                 with ``--ARGNAME``. Useful for passing in
                                 SQLite DBs and other configuration files to
                                 your job.
        :type hadoop_input_format: str
        :param hadoop_input_format: name of an optional Hadoop ``InputFormat``
                                    class. Passed to Hadoop along with your
                                    first step with the ``-inputformat``
                                    option. Note that if you write your own
                                    class, you'll need to include it in your
                                    own custom streaming jar (see
                                    :mrjob-opt:`hadoop_streaming_jar`).
        :type hadoop_output_format: str
        :param hadoop_output_format: name of an optional Hadoop
                                     ``OutputFormat`` class. Passed to Hadoop
                                     along with your first step with the
                                     ``-outputformat`` option. Note that if you
                                     write your own class, you'll need to
                                     include it in your own custom streaming
                                     jar (see
                                     :mrjob-opt:`hadoop_streaming_jar`).
        :type input_paths: list of str
        :param input_paths: Input files for your job. Supports globs and
                            recursively walks directories (e.g.
                            ``['data/common/', 'data/training/*.gz']``). If
                            this is left blank, we'll read from stdin
        :type output_dir: str
        :param output_dir: An empty/non-existent directory where Hadoop
                           should put the final output from the job.
                           If you don't specify an output directory, we'll
                           output into a subdirectory of this job's temporary
                           directory. You can control this from the command
                           line with ``--output-dir``. This option cannot be
                           set from configuration files. If used with the
                           hadoop runner, this path does not need to be fully
                           qualified with ``hdfs://`` URIs because it's
                           understood that it has to be on HDFS.
        :type partitioner: str
        :param partitioner: Optional name of a Hadoop partitioner class, e.g.
                            ``'org.apache.hadoop.mapred.lib.HashPartitioner'``.
                            Hadoop streaming will use this to determine how
                            mapper output should be sorted and distributed
                            to reducers.
        :type sort_values: bool
        :param sort_values: if true, set partitioners and jobconf variables
                            so that reducers to receive the values
                            associated with any key in sorted order (sorted by
                            their *encoded* value). Also known as secondary
                            sort.
        :param stdin: an iterable (can be a ``BytesIO`` or even a list) to use
                      as stdin. This is a hook for testing; if you set
                      ``stdin`` via :py:meth:`~mrjob.job.MRJob.sandbox`, it'll
                      get passed through to the runner. If for some reason
                      your lines are missing newlines, we'll add them;
                      this makes it easier to write automated tests.
        :type step_output_dir: str
        :param step_output_dir: An empty/non-existent directory where Hadoop
                                should put output from all steps other than
                                the last one (this only matters for multi-step
                                jobs). Currently ignored by local runners.
        """
        self._ran_job = False

        # opts are made from:
        #
        # empty defaults (everything set to None)
        # runner-specific defaults
        # opts from config file(s)
        # opts from command line
        self._opts = self._combine_confs(
            [(None, {key: None for key in self.OPT_NAMES})] +
            [(None, self._default_opts())] +
            load_opts_from_mrjob_confs(self.alias, conf_paths) +
            [('the command line', opts)]
        )

#.........这里部分代码省略.........
开发者ID:okomestudio,项目名称:mrjob,代码行数:101,代码来源:runner.py



注:本文中的mrjob.conf.load_opts_from_mrjob_confs函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python emr.EMRJobRunner类代码示例发布时间:2022-05-27
下一篇:
Python conf.load_opts_from_mrjob_conf函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap