• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python options.add_basic_opts函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mrjob.options.add_basic_opts函数的典型用法代码示例。如果您正苦于以下问题:Python add_basic_opts函数的具体用法?Python add_basic_opts怎么用?Python add_basic_opts使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了add_basic_opts函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main

def main(args):
    # parser command-line args
    usage = '%prog [options]'
    description = "Collect EMR stats from active jobflows. "
    description += "Active jobflows are those in states of: "
    description += "BOOTSTRAPPING, RUNNING, STARTING, and WAITING. "
    description += "Collected stats include total number of active jobflows"
    description += "and total number of Amazon EC2 instances used to execute"
    description += "these jobflows. The instance counts are not separated by"
    description += "instance type."
    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option(
        "-p", "--pretty-print",
        action="store_true", dest="pretty_print", default=False,
        help=('Pretty print the collected stats'))
    add_basic_opts(option_parser)

    options, args = option_parser.parse_args(args)
    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
    log.info('collecting EMR active jobflows...')
    job_flows = collect_active_job_flows(options.conf_paths)
    log.info('compiling stats from collected jobflows...')
    stats = job_flows_to_stats(job_flows)

    if options.pretty_print:
        pretty_print(stats)
    else:
        print(json.dumps(stats))
开发者ID:tempcyc,项目名称:mrjob,代码行数:31,代码来源:collect_emr_stats.py


示例2: make_option_parser

def make_option_parser():
    usage = '%prog [options] <time-untouched> <URIs>'
    description = (
        'Delete all files in a given URI that are older than a specified'
        ' time.\n\nThe time parameter defines the threshold for removing'
        ' files. If the file has not been accessed for *time*, the file is'
        ' removed. The time argument is a number with an optional'
        ' single-character suffix specifying the units: m for minutes, h for'
        ' hours, d for days.  If no suffix is specified, time is in hours.')

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '-t', '--test', dest='test', default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    add_basic_opts(option_parser)
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('aws_region', 's3_endpoint'),
    })

    alphabetize_options(option_parser)

    return option_parser
开发者ID:kartheek6,项目名称:mrjob,代码行数:25,代码来源:s3_tmpwatch.py


示例3: make_option_parser

def make_option_parser():
    usage = "%prog [options] <time-untouched> <URIs>"
    description = (
        "Delete all files in a given URI that are older than a specified"
        " time.\n\nThe time parameter defines the threshold for removing"
        " files. If the file has not been accessed for *time*, the file is"
        " removed. The time argument is a number with an optional"
        " single-character suffix specifying the units: m for minutes, h for"
        " hours, d for days.  If no suffix is specified, time is in hours."
    )

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        "-t",
        "--test",
        dest="test",
        default=False,
        action="store_true",
        help="Don't actually delete any files; just log that we would",
    )

    add_basic_opts(option_parser)

    return option_parser
开发者ID:tempcyc,项目名称:mrjob,代码行数:25,代码来源:s3_tmpwatch.py


示例4: make_option_parser

def make_option_parser():
    usage = '%prog [options]'
    description = ('Terminate idle EMR clusters that meet the criteria'
                   ' passed in on the command line (or, by default,'
                   ' clusters that have been idle for one hour).')

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--max-hours-idle', dest='max_hours_idle',
        default=None, type='float',
        help=('Max number of hours a cluster can go without bootstrapping,'
              ' running a step, or having a new step created. This will fire'
              ' even if there are pending steps which EMR has failed to'
              ' start. Make sure you set this higher than the amount of time'
              ' your jobs can take to start instances and bootstrap.'))
    option_parser.add_option(
        '--max-mins-locked', dest='max_mins_locked',
        default=DEFAULT_MAX_MINUTES_LOCKED, type='float',
        help='Max number of minutes a cluster can be locked while idle.')
    option_parser.add_option(
        '--mins-to-end-of-hour', dest='mins_to_end_of_hour',
        default=None, type='float',
        help=('Terminate clusters that are within this many minutes of'
              ' the end of a full hour since the job started running'
              ' AND have no pending steps.'))
    option_parser.add_option(
        '--unpooled-only', dest='unpooled_only', action='store_true',
        default=False,
        help='Only terminate un-pooled clusters')
    option_parser.add_option(
        '--pooled-only', dest='pooled_only', action='store_true',
        default=False,
        help='Only terminate pooled clusters')
    option_parser.add_option(
        '--pool-name', dest='pool_name', default=None,
        help='Only terminate clusters in the given named pool.')
    option_parser.add_option(
        '--dry-run', dest='dry_run', default=False,
        action='store_true',
        help="Don't actually kill idle jobs; just log that we would")

    option_parser.add_option(
        '-t', '--test', dest='test', default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    add_basic_opts(option_parser)
    add_emr_connect_opts(option_parser)
    alphabetize_options(option_parser)

    return option_parser
开发者ID:gitbenedict,项目名称:mrjob,代码行数:52,代码来源:terminate_idle_clusters.py


示例5: make_option_parser

def make_option_parser():
    usage = '%prog [options]'
    description = 'Print a giant report on EMR usage.'

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--max-days-ago', dest='max_days_ago', type='float', default=None,
        help=('Max number of days ago to look at jobs. By default, we go back'
              ' as far as EMR supports (currently about 2 months)'))

    add_basic_opts(option_parser)

    return option_parser
开发者ID:PythonCharmers,项目名称:mrjob,代码行数:14,代码来源:audit_usage.py


示例6: make_option_parser

def make_option_parser():
    usage = '%prog [options] jobflowid'
    description = 'Terminate an existing EMR job flow.'

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '-t', '--test', dest='test', default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    add_basic_opts(option_parser)

    return option_parser
开发者ID:Asana,项目名称:mrjob,代码行数:14,代码来源:terminate_job_flow.py


示例7: make_option_parser

def make_option_parser():
    usage = 'usage: %prog [options] JOB_FLOW_ID'
    description = (
        'List, display, and parse Hadoop logs associated with EMR job flows.'
        ' Useful for debugging failed jobs for which mrjob did not display a'
        ' useful error message or for inspecting jobs whose output has been'
        ' lost.')

    option_parser = OptionParser(usage=usage, description=description)

    add_basic_opts(option_parser)

    option_parser.add_option('-f', '--find-failure', dest='find_failure',
                             action='store_true', default=False,
                             help=('Search the logs for information about why'
                                   ' the job failed'))
    option_parser.add_option('-l', '--list', dest='list_relevant',
                             action="store_true", default=False,
                             help='List log files MRJob finds relevant')

    option_parser.add_option('-L', '--list-all', dest='list_all',
                             action="store_true", default=False,
                             help='List all log files')

    option_parser.add_option('-a', '--cat', dest='cat_relevant',
                             action="store_true", default=False,
                             help='Cat log files MRJob finds relevant')

    option_parser.add_option('-A', '--cat-all', dest='cat_all',
                             action="store_true", default=False,
                             help='Cat all log files to JOB_FLOW_ID/')

    option_parser.add_option('-s', '--step-num', dest='step_num',
                             action='store', type='int', default=None,
                             help=('Limit results to a single step. To be used'
                                   ' with --list and --cat.'))
    option_parser.add_option('--counters', dest='get_counters',
                             action='store_true', default=False,
                             help='Show counters from the job flow')

    add_emr_connect_opts(option_parser)

    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('ec2_key_pair_file', 's3_sync_wait_time', 'ssh_bin')
    })

    alphabetize_options(option_parser)

    return option_parser
开发者ID:kartheek6,项目名称:mrjob,代码行数:49,代码来源:fetch_logs.py


示例8: make_option_parser

def make_option_parser():
    usage = '%prog [options]'
    description = ('Report jobs running for more than a certain number of'
                   ' hours (by default, %.1f). This can help catch buggy jobs'
                   ' and Hadoop/EMR operational issues.' % DEFAULT_MIN_HOURS)

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--min-hours', dest='min_hours', type='float',
        default=DEFAULT_MIN_HOURS,
        help=('Minimum number of hours a job can run before we report it.'
              ' Default: %default'))

    add_basic_opts(option_parser)

    return option_parser
开发者ID:DanisHack,项目名称:mrjob,代码行数:17,代码来源:report_long_jobs.py


示例9: make_option_parser

def make_option_parser():
    usage = "%prog [options]"
    description = "Print a giant report on EMR usage."

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        "--max-days-ago",
        dest="max_days_ago",
        type="float",
        default=None,
        help=(
            "Max number of days ago to look at jobs. By default, we go back"
            " as far as EMR supports (currently about 2 months)"
        ),
    )

    add_basic_opts(option_parser)

    return option_parser
开发者ID:rfbowen,项目名称:mrjob,代码行数:20,代码来源:audit_usage.py


示例10: make_option_parser

def make_option_parser():
    usage = '%prog [options]'
    description = (
        'Create a persistent EMR job flow to run jobs in, and print its ID to'
        ' stdout. WARNING: Do not run'
        ' this without mrjob.tools.emr.terminate_idle_job_flows in your'
        ' crontab; job flows left idle can quickly become expensive!')
    option_parser = OptionParser(usage=usage, description=description)

    add_basic_opts(option_parser)
    # these aren't nicely broken down, just scrape specific options
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: (
            'bootstrap_mrjob',
            'label',
            'owner',
        ),
    })
    add_emr_connect_opts(option_parser)
    add_emr_launch_opts(option_parser)

    alphabetize_options(option_parser)
    return option_parser
开发者ID:kartheek6,项目名称:mrjob,代码行数:23,代码来源:create_job_flow.py


示例11: main

def main(cl_args=None):
    usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"'
    description = ('Run a command on the master and all slaves of an EMR job'
                   ' flow. Store stdout and stderr for results in OUTPUT_DIR.')

    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option('-o', '--output-dir', dest='output_dir',
                             default=None,
                             help="Specify an output directory (default:"
                             " JOB_FLOW_ID)")
    add_basic_opts(option_parser)
    add_emr_connect_opts(option_parser)
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('ec2_key_pair_file', 'ssh_bin'),
    })
    alphabetize_options(option_parser)

    options, args = option_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('output_dir', 'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    if len(args) < 2:
        option_parser.print_help()
        sys.exit(1)

    job_flow_id, cmd_string = args[:2]
    cmd_args = shlex_split(cmd_string)

    output_dir = os.path.abspath(options.output_dir or job_flow_id)

    with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner:
        runner._enable_slave_ssh_access()
        run_on_all_nodes(runner, output_dir, cmd_args)
开发者ID:kartheek6,项目名称:mrjob,代码行数:37,代码来源:mrboss.py


示例12: configure_options

    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option(
            '--help', dest='help_main', action='store_true', default=False,
            help='show this message and exit')

        self.option_parser.add_option(
            '--help-emr', dest='help_emr', action='store_true', default=False,
            help='show EMR-related options')

        self.option_parser.add_option(
            '--help-hadoop', dest='help_hadoop', action='store_true',
            default=False,
            help='show Hadoop-related options')

        self.option_parser.add_option(
            '--help-local', dest='help_local', action='store_true',
            default=False,
            help='show local/inline runner-related options')

        self.option_parser.add_option(
            '--help-runner', dest='help_runner', action='store_true',
            default=False, help='show runner-related options')

        # protocol stuff
        self.proto_opt_group = OptionGroup(
            self.option_parser, 'Protocols')
        self.option_parser.add_option_group(self.proto_opt_group)

        add_protocol_opts(self.proto_opt_group)

        # options for running the entire job
        self.runner_opt_group = OptionGroup(
            self.option_parser, 'Running the entire job')
        self.option_parser.add_option_group(self.runner_opt_group)

        add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        add_basic_opts(self.runner_opt_group)

        # options for inline/local runners
        self.local_opt_group = OptionGroup(
            self.option_parser,
            'Running locally (these apply when you set -r inline or -r local)')
        self.option_parser.add_option_group(self.local_opt_group)

        add_local_opts(self.local_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop or EMR (these apply when you set -r hadoop or'
            ' -r emr)')
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop (these apply when you set -r hadoop)')
        self.option_parser.add_option_group(self.hadoop_opt_group)

        add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on EMR (these apply when you set -r emr)')
        self.option_parser.add_option_group(self.emr_opt_group)

        add_emr_opts(self.emr_opt_group)
开发者ID:gitbenedict,项目名称:mrjob,代码行数:83,代码来源:launch.py


示例13: make_option_parser

def make_option_parser():
    usage = "%prog [options]"
    description = (
        "Terminate idle EMR job flows that meet the criteria"
        " passed in on the command line (or, by default,"
        " job flows that have been idle for one hour)."
    )

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        "--max-hours-idle",
        dest="max_hours_idle",
        default=None,
        type="float",
        help=(
            "Max number of hours a job flow can go without bootstrapping,"
            " running a step, or having a new step created. This will fire"
            " even if there are pending steps which EMR has failed to"
            " start. Make sure you set this higher than the amount of time"
            " your jobs can take to start instances and bootstrap."
        ),
    )
    option_parser.add_option(
        "--max-mins-locked",
        dest="max_mins_locked",
        default=DEFAULT_MAX_MINUTES_LOCKED,
        type="float",
        help="Max number of minutes a job flow can be locked while idle.",
    )
    option_parser.add_option(
        "--mins-to-end-of-hour",
        dest="mins_to_end_of_hour",
        default=None,
        type="float",
        help=(
            "Terminate job flows that are within this many minutes of"
            " the end of a full hour since the job started running"
            " AND have no pending steps."
        ),
    )
    option_parser.add_option(
        "--unpooled-only",
        dest="unpooled_only",
        action="store_true",
        default=False,
        help="Only terminate un-pooled job flows",
    )
    option_parser.add_option(
        "--pooled-only", dest="pooled_only", action="store_true", default=False, help="Only terminate pooled job flows"
    )
    option_parser.add_option(
        "--pool-name", dest="pool_name", default=None, help="Only terminate job flows in the given named pool."
    )
    option_parser.add_option(
        "--dry-run",
        dest="dry_run",
        default=False,
        action="store_true",
        help="Don't actually kill idle jobs; just log that we would",
    )

    option_parser.add_option(
        "-t",
        "--test",
        dest="test",
        default=False,
        action="store_true",
        help="Don't actually delete any files; just log that we would",
    )

    add_basic_opts(option_parser)

    return option_parser
开发者ID:dnephin,项目名称:mrjob,代码行数:74,代码来源:terminate_idle_job_flows.py


示例14: configure_options

    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option(
            "--help", dest="help_main", action="store_true", default=False, help="show this message and exit"
        )

        self.option_parser.add_option(
            "--help-emr", dest="help_emr", action="store_true", default=False, help="show EMR-related options"
        )

        self.option_parser.add_option(
            "--help-hadoop", dest="help_hadoop", action="store_true", default=False, help="show Hadoop-related options"
        )

        self.option_parser.add_option(
            "--help-runner", dest="help_runner", action="store_true", default=False, help="show runner-related options"
        )

        # protocol stuff
        self.proto_opt_group = OptionGroup(self.option_parser, "Protocols")
        self.option_parser.add_option_group(self.proto_opt_group)

        add_protocol_opts(self.proto_opt_group)

        # options for running the entire job
        self.runner_opt_group = OptionGroup(self.option_parser, "Running the entire job")
        self.option_parser.add_option_group(self.runner_opt_group)

        add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        add_basic_opts(self.runner_opt_group)

        self.hadoop_opts_opt_group = OptionGroup(
            self.option_parser,
            "Configuring or emulating Hadoop (these apply when you set -r" " hadoop, -r emr, or -r local)",
        )
        self.option_parser.add_option_group(self.hadoop_opts_opt_group)

        add_hadoop_shared_opts(self.hadoop_opts_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser, "Running on Hadoop or EMR (these apply when you set -r hadoop or" " -r emr)"
        )
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser, "Running on Hadoop (these apply when you set -r hadoop)"
        )
        self.option_parser.add_option_group(self.hadoop_opt_group)

        add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser, "Running on Amazon Elastic MapReduce (these apply when you set -r" " emr)"
        )
        self.option_parser.add_option_group(self.emr_opt_group)

        add_emr_opts(self.emr_opt_group)
开发者ID:senseb,项目名称:mrjob,代码行数:74,代码来源:launch.py



注:本文中的mrjob.options.add_basic_opts函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python parse.find_python_traceback函数代码示例发布时间:2022-05-27
下一篇:
Python step._interpret_hadoop_jar_command_stderr函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap