• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python java_gateway.launch_gateway函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyspark.java_gateway.launch_gateway函数的典型用法代码示例。如果您正苦于以下问题:Python launch_gateway函数的具体用法?Python launch_gateway怎么用?Python launch_gateway使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了launch_gateway函数的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: _ensure_initialized

    def _ensure_initialized(cls, instance=None, gateway=None, conf=None):
        """
        Checks whether a SparkContext is initialized or not.
        Throws error if a SparkContext is already running.
        """
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = gateway or launch_gateway(conf)
                SparkContext._jvm = SparkContext._gateway.jvm

            if instance:
                if (SparkContext._active_spark_context and
                        SparkContext._active_spark_context != instance):
                    currentMaster = SparkContext._active_spark_context.master
                    currentAppName = SparkContext._active_spark_context.appName
                    callsite = SparkContext._active_spark_context._callsite

                    # Raise error if there is already a running Spark context
                    raise ValueError(
                        "Cannot run multiple SparkContexts at once; "
                        "existing SparkContext(app=%s, master=%s)"
                        " created by %s at %s:%s "
                        % (currentAppName, currentMaster,
                            callsite.function, callsite.file, callsite.linenum))
                else:
                    SparkContext._active_spark_context = instance
开发者ID:AllenShi,项目名称:spark,代码行数:26,代码来源:context.py


示例2: sparkSession

 def sparkSession(cls):
     if not hasattr(cls, "spark"):
         # We can't use the SparkSession Builder here, since we need to call
         # Scala side's SmvTestHive.createContext to create the HiveTestContext's
         # SparkSession.
         # So we need to
         #   * Create a java_gateway
         #   * Create a SparkConf using the jgw (since without it SparkContext will ignore the given conf)
         #   * Create python SparkContext using the SparkConf (so we can specify the warehouse.dir)
         #   * Create Scala side HiveTestContext SparkSession
         #   * Create python SparkSession
         jgw = launch_gateway(None)
         jvm = jgw.jvm
         import tempfile
         import getpass
         hivedir = "file://{0}/{1}/smv_hive_test".format(tempfile.gettempdir(), getpass.getuser())
         sConf = SparkConf(False, _jvm=jvm).set("spark.sql.test", "")\
                                           .set("spark.sql.hive.metastore.barrierPrefixes",
                                                "org.apache.spark.sql.hive.execution.PairSerDe")\
                                           .set("spark.sql.warehouse.dir", hivedir)\
                                           .set("spark.ui.enabled", "false")
         sc = SparkContext(master="local[1]", appName="SMV Python Test", conf=sConf, gateway=jgw).getOrCreate()
         jss = sc._jvm.org.apache.spark.sql.hive.test.SmvTestHive.createContext(sc._jsc.sc())
         cls.spark = SparkSession(sc, jss.sparkSession())
     return cls.spark
开发者ID:TresAmigosSD,项目名称:SMV,代码行数:25,代码来源:testconfig.py


示例3: _ensure_initialized

    def _ensure_initialized(cls, instance=None, gateway=None):
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = gateway or launch_gateway()
                SparkContext._jvm = SparkContext._gateway.jvm
                SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile

            if instance:
                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
                    raise ValueError("Cannot run multiple SparkContexts at once")
                else:
                    SparkContext._active_spark_context = instance
开发者ID:ComplexQubit,项目名称:incubator-spark,代码行数:12,代码来源:context.py


示例4: _ensure_initialized

    def _ensure_initialized(cls, instance=None):
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = launch_gateway()
                SparkContext._jvm = SparkContext._gateway.jvm
                SparkContext._writeIteratorToPickleFile = \
                    SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
                SparkContext._takePartition = \
                    SparkContext._jvm.PythonRDD.takePartition

            if instance:
                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
                    raise ValueError("Cannot run multiple SparkContexts at once")
                else:
                    SparkContext._active_spark_context = instance
开发者ID:AustinBGibbons,项目名称:incubator-spark,代码行数:15,代码来源:context.py


示例5: _ensure_initialized

    def _ensure_initialized(cls, instance=None, gateway=None):
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = gateway or launch_gateway()
                SparkContext._jvm = SparkContext._gateway.jvm
                SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile

            if instance:
                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
                    currentMaster = SparkContext._active_spark_context.master
                    currentAppName = SparkContext._active_spark_context.appName
                    callsite = SparkContext._active_spark_context._callsite

                    # Raise error if there is already a running Spark context
                    raise ValueError("Cannot run multiple SparkContexts at once; existing SparkContext(app=%s, master=%s)" \
                        " created by %s at %s:%s " \
                        % (currentAppName, currentMaster, callsite.function, callsite.file, callsite.linenum))
                else:
                    SparkContext._active_spark_context = instance
开发者ID:AndreSchumacher,项目名称:spark,代码行数:19,代码来源:context.py


示例6: __init__

    def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
        environment=None, batchSize=1024):
        """
        Create a new SparkContext.

        @param master: Cluster URL to connect to
               (e.g. mesos://host:port, spark://host:port, local[4]).
        @param jobName: A name for your job, to display on the cluster web UI
        @param sparkHome: Location where Spark is installed on cluster nodes.
        @param pyFiles: Collection of .zip or .py files to send to the cluster
               and add to PYTHONPATH.  These can be paths on the local file
               system or HDFS, HTTP, HTTPS, or FTP URLs.
        @param environment: A dictionary of environment variables to set on
               worker nodes.
        @param batchSize: The number of Python objects represented as a single
               Java object.  Set 1 to disable batching or -1 to use an
               unlimited batch size.
        """
        with SparkContext._lock:
            if SparkContext._active_spark_context:
                raise ValueError("Cannot run multiple SparkContexts at once")
            else:
                SparkContext._active_spark_context = self
                if not SparkContext._gateway:
                    SparkContext._gateway = launch_gateway()
                    SparkContext._jvm = SparkContext._gateway.jvm
                    SparkContext._writeIteratorToPickleFile = \
                        SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
                    SparkContext._takePartition = \
                        SparkContext._jvm.PythonRDD.takePartition
        self.master = master
        self.jobName = jobName
        self.sparkHome = sparkHome or None # None becomes null in Py4J
        self.environment = environment or {}
        self.batchSize = batchSize  # -1 represents a unlimited batch size

        # Create the Java SparkContext through Py4J
        empty_string_array = self._gateway.new_array(self._jvm.String, 0)
        self._jsc = self._jvm.JavaSparkContext(master, jobName, sparkHome,
                                              empty_string_array)

        # Create a single Accumulator in Java that we'll send all our updates through;
        # they will be passed back to us through a TCP server
        self._accumulatorServer = accumulators._start_update_server()
        (host, port) = self._accumulatorServer.server_address
        self._javaAccumulator = self._jsc.accumulator(
                self._jvm.java.util.ArrayList(),
                self._jvm.PythonAccumulatorParam(host, port))

        self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
        # Broadcast's __reduce__ method stores Broadcast instances here.
        # This allows other code to determine which Broadcast instances have
        # been pickled, so it can determine which Java broadcast objects to
        # send.
        self._pickled_broadcast_vars = set()

        # Deploy any code dependencies specified in the constructor
        for path in (pyFiles or []):
            self.addPyFile(path)
        SparkFiles._sc = self
        sys.path.append(SparkFiles.getRootDirectory())

        # Create a temporary directory inside spark.local.dir:
        local_dir = self._jvm.spark.Utils.getLocalDir()
        self._temp_dir = \
            self._jvm.spark.Utils.createTempDir(local_dir).getAbsolutePath()
开发者ID:WuErPing,项目名称:spark,代码行数:66,代码来源:context.py


示例7: setUpClass

 def setUpClass(cls):
     gateway = launch_gateway(SparkConf())
     cls._jvm = gateway.jvm
     cls.longMessage = True
     random.seed(42)
开发者ID:JkSelf,项目名称:spark,代码行数:5,代码来源:test_broadcast.py


示例8: __init__

    def __init__(self, arglist, _sparkSession, py_module_hotload=True):
        self.smvHome = os.environ.get("SMV_HOME")
        if (self.smvHome is None):
            raise SmvRuntimeError("SMV_HOME env variable not set!")

        self.sparkSession = _sparkSession

        if (self.sparkSession is not None):
            sc = self.sparkSession.sparkContext
            sc.setLogLevel("ERROR")

            self.sc = sc
            self.sqlContext = self.sparkSession._wrapped
            self._jvm = sc._jvm
            self.j_smvPyClient = self._jvm.org.tresamigos.smv.python.SmvPyClientFactory.init(self.sparkSession._jsparkSession)
            self.j_smvApp = self.j_smvPyClient.j_smvApp()
        else:
            _gw = launch_gateway(None)
            self._jvm = _gw.jvm

        self.py_module_hotload = py_module_hotload

        java_import(self._jvm, "org.tresamigos.smv.ColumnHelper")
        java_import(self._jvm, "org.tresamigos.smv.SmvDFHelper")
        java_import(self._jvm, "org.tresamigos.smv.dqm.*")
        java_import(self._jvm, "org.tresamigos.smv.panel.*")
        java_import(self._jvm, "org.tresamigos.smv.python.SmvPythonHelper")
        java_import(self._jvm, "org.tresamigos.smv.SmvHDFS")
        java_import(self._jvm, "org.tresamigos.smv.DfCreator")

        self.smvSchemaObj = self._jvm.SmvPythonHelper.getSmvSchema()

        self.py_smvconf = SmvConfig(arglist)

        # configure spark sql params
        if (self.sparkSession is not None):
            for k, v in self.py_smvconf.spark_sql_props().items():
                self.sqlContext.setConf(k, v)

        # issue #429 set application name from smv config
        if (self.sparkSession is not None):
            sc._conf.setAppName(self.appName())

        # CmdLine is static, so can be an attribute
        cl = self.py_smvconf.cmdline
        self.cmd_line = namedtuple("CmdLine", cl.keys())(*cl.values())

        # shortcut is meant for internal use only
        self.dsm = DataSetMgr(self._jvm, self.py_smvconf)

        # computed df cache, keyed by m.versioned_fqn
        self.data_cache = {}

        # AFTER app is available but BEFORE stages,
        # use the dynamically configured app dir to set the source path, library path
        self.prependDefaultDirs()

        self.repoFactory = DataSetRepoFactory(self)
        self.dsm.register(self.repoFactory)

        # provider cache, keyed by providers' fqn
        self.provider_cache = {}
        self.refresh_provider_cache()

        # Initialize DataFrame and Column with helper methods
        smv.helpers.init_helpers()
开发者ID:TresAmigosSD,项目名称:SMV,代码行数:66,代码来源:smvapp.py



注:本文中的pyspark.java_gateway.launch_gateway函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python ml.Pipeline类代码示例发布时间:2022-05-26
下一篇:
Python files.SparkFiles类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap