本文整理汇总了Python中pyspark.profiler.ProfilerCollector类的典型用法代码示例。如果您正苦于以下问题:Python ProfilerCollector类的具体用法?Python ProfilerCollector怎么用?Python ProfilerCollector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ProfilerCollector类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _do_init
#.........这里部分代码省略.........
self._batchSize = batchSize # -1 represents an unlimited batch size
self._unbatched_serializer = serializer
if batchSize == 0:
self.serializer = AutoBatchedSerializer(self._unbatched_serializer)
else:
self.serializer = BatchedSerializer(self._unbatched_serializer,
batchSize)
# Set any parameters passed directly to us on the conf
if master:
self._conf.setMaster(master)
if appName:
self._conf.setAppName(appName)
if sparkHome:
self._conf.setSparkHome(sparkHome)
if environment:
for key, value in environment.items():
self._conf.setExecutorEnv(key, value)
for key, value in DEFAULT_CONFIGS.items():
self._conf.setIfMissing(key, value)
# Check that we have at least the required parameters
if not self._conf.contains("spark.master"):
raise Exception("A master URL must be set in your configuration")
if not self._conf.contains("spark.app.name"):
raise Exception("An application name must be set in your configuration")
# Read back our properties from the conf in case we loaded some of them from
# the classpath or an external config file
self.master = self._conf.get("spark.master")
self.appName = self._conf.get("spark.app.name")
self.sparkHome = self._conf.get("spark.home", None)
for (k, v) in self._conf.getAll():
if k.startswith("spark.executorEnv."):
varName = k[len("spark.executorEnv."):]
self.environment[varName] = v
self.environment["PYTHONHASHSEED"] = os.environ.get("PYTHONHASHSEED", "0")
# Create the Java SparkContext through Py4J
self._jsc = jsc or self._initialize_context(self._conf._jconf)
# Reset the SparkConf to the one actually used by the SparkContext in JVM.
self._conf = SparkConf(_jconf=self._jsc.sc().conf())
# Create a single Accumulator in Java that we'll send all our updates through;
# they will be passed back to us through a TCP server
self._accumulatorServer = accumulators._start_update_server()
(host, port) = self._accumulatorServer.server_address
self._javaAccumulator = self._jvm.PythonAccumulatorV2(host, port)
self._jsc.sc().register(self._javaAccumulator)
self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
self.pythonVer = "%d.%d" % sys.version_info[:2]
# Broadcast's __reduce__ method stores Broadcast instances here.
# This allows other code to determine which Broadcast instances have
# been pickled, so it can determine which Java broadcast objects to
# send.
self._pickled_broadcast_vars = BroadcastPickleRegistry()
SparkFiles._sc = self
root_dir = SparkFiles.getRootDirectory()
sys.path.insert(1, root_dir)
# Deploy any code dependencies specified in the constructor
self._python_includes = list()
for path in (pyFiles or []):
self.addPyFile(path)
# Deploy code dependencies set by spark-submit; these will already have been added
# with SparkContext.addFile, so we just need to add them to the PYTHONPATH
for path in self._conf.get("spark.submit.pyFiles", "").split(","):
if path != "":
(dirname, filename) = os.path.split(path)
if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
self._python_includes.append(filename)
sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))
# Create a temporary directory inside spark.local.dir:
local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
self._temp_dir = \
self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir, "pyspark") \
.getAbsolutePath()
# profiling stats collected for each PythonRDD
if self._conf.get("spark.python.profile", "false") == "true":
dump_path = self._conf.get("spark.python.profile.dump", None)
self.profiler_collector = ProfilerCollector(profiler_cls, dump_path)
else:
self.profiler_collector = None
# create a signal handler which would be invoked on receiving SIGINT
def signal_handler(signal, frame):
self.cancelAllJobs()
raise KeyboardInterrupt()
# see http://stackoverflow.com/questions/23206787/
if isinstance(threading.current_thread(), threading._MainThread):
signal.signal(signal.SIGINT, signal_handler)
开发者ID:AllenShi,项目名称:spark,代码行数:101,代码来源:context.py
示例2: SparkContext
#.........这里部分代码省略.........
# Broadcast's __reduce__ method stores Broadcast instances here.
# This allows other code to determine which Broadcast instances have
# been pickled, so it can determine which Java broadcast objects to
# send.
self._pickled_broadcast_vars = BroadcastPickleRegistry()
SparkFiles._sc = self
root_dir = SparkFiles.getRootDirectory()
sys.path.insert(1, root_dir)
# Deploy any code dependencies specified in the constructor
self._python_includes = list()
for path in (pyFiles or []):
self.addPyFile(path)
# Deploy code dependencies set by spark-submit; these will already have been added
# with SparkContext.addFile, so we just need to add them to the PYTHONPATH
for path in self._conf.get("spark.submit.pyFiles", "").split(","):
if path != "":
(dirname, filename) = os.path.split(path)
if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
self._python_includes.append(filename)
sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))
# Create a temporary directory inside spark.local.dir:
local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
self._temp_dir = \
self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir, "pyspark") \
.getAbsolutePath()
# profiling stats collected for each PythonRDD
if self._conf.get("spark.python.profile", "false") == "true":
dump_path = self._conf.get("spark.python.profile.dump", None)
self.profiler_collector = ProfilerCollector(profiler_cls, dump_path)
else:
self.profiler_collector = None
# create a signal handler which would be invoked on receiving SIGINT
def signal_handler(signal, frame):
self.cancelAllJobs()
raise KeyboardInterrupt()
# see http://stackoverflow.com/questions/23206787/
if isinstance(threading.current_thread(), threading._MainThread):
signal.signal(signal.SIGINT, signal_handler)
def __repr__(self):
return "<SparkContext master={master} appName={appName}>".format(
master=self.master,
appName=self.appName,
)
def _repr_html_(self):
return """
<div>
<p><b>SparkContext</b></p>
<p><a href="{sc.uiWebUrl}">Spark UI</a></p>
<dl>
<dt>Version</dt>
<dd><code>v{sc.version}</code></dd>
<dt>Master</dt>
<dd><code>{sc.master}</code></dd>
<dt>AppName</dt>
<dd><code>{sc.appName}</code></dd>
开发者ID:AllenShi,项目名称:spark,代码行数:67,代码来源:context.py
示例3: _do_init
def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
conf, jsc, profiler_cls):
self.environment = environment or {}
self._conf = conf or SparkConf(_jvm=self._jvm)
self._batchSize = batchSize # -1 represents an unlimited batch size
self._unbatched_serializer = serializer
if batchSize == 0:
self.serializer = AutoBatchedSerializer(self._unbatched_serializer)
else:
self.serializer = BatchedSerializer(self._unbatched_serializer,
batchSize)
# Set any parameters passed directly to us on the conf
if master:
self._conf.setMaster(master)
if appName:
self._conf.setAppName(appName)
if sparkHome:
self._conf.setSparkHome(sparkHome)
if environment:
for key, value in environment.items():
self._conf.setExecutorEnv(key, value)
for key, value in DEFAULT_CONFIGS.items():
self._conf.setIfMissing(key, value)
# Check that we have at least the required parameters
if not self._conf.contains("spark.master"):
raise Exception("A master URL must be set in your configuration")
if not self._conf.contains("spark.app.name"):
raise Exception("An application name must be set in your configuration")
# Read back our properties from the conf in case we loaded some of them from
# the classpath or an external config file
self.master = self._conf.get("spark.master")
self.appName = self._conf.get("spark.app.name")
self.sparkHome = self._conf.get("spark.home", None)
for (k, v) in self._conf.getAll():
if k.startswith("spark.executorEnv."):
varName = k[len("spark.executorEnv."):]
self.environment[varName] = v
if sys.version >= '3.3' and 'PYTHONHASHSEED' not in os.environ:
# disable randomness of hash of string in worker, if this is not
# launched by spark-submit
self.environment["PYTHONHASHSEED"] = "0"
# Create the Java SparkContext through Py4J
self._jsc = jsc or self._initialize_context(self._conf._jconf)
# Create a single Accumulator in Java that we'll send all our updates through;
# they will be passed back to us through a TCP server
self._accumulatorServer = accumulators._start_update_server()
(host, port) = self._accumulatorServer.server_address
self._javaAccumulator = self._jsc.accumulator(
self._jvm.java.util.ArrayList(),
self._jvm.PythonAccumulatorParam(host, port))
self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
self.pythonVer = "%d.%d" % sys.version_info[:2]
# Broadcast's __reduce__ method stores Broadcast instances here.
# This allows other code to determine which Broadcast instances have
# been pickled, so it can determine which Java broadcast objects to
# send.
self._pickled_broadcast_vars = set()
SparkFiles._sc = self
root_dir = SparkFiles.getRootDirectory()
sys.path.insert(1, root_dir)
# Deploy any code dependencies specified in the constructor
self._python_includes = list()
for path in (pyFiles or []):
self.addPyFile(path)
# Deploy code dependencies set by spark-submit; these will already have been added
# with SparkContext.addFile, so we just need to add them to the PYTHONPATH
for path in self._conf.get("spark.submit.pyFiles", "").split(","):
if path != "":
(dirname, filename) = os.path.split(path)
if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
self._python_includes.append(filename)
sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))
# Create a temporary directory inside spark.local.dir:
local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
self._temp_dir = \
self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir, "pyspark") \
.getAbsolutePath()
# profiling stats collected for each PythonRDD
if self._conf.get("spark.python.profile", "false") == "true":
dump_path = self._conf.get("spark.python.profile.dump", None)
self.profiler_collector = ProfilerCollector(profiler_cls, dump_path)
else:
self.profiler_collector = None
开发者ID:HodaAlemi,项目名称:spark,代码行数:95,代码来源:context.py
注:本文中的pyspark.profiler.ProfilerCollector类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论