本文整理汇总了Python中sklearn.externals.joblib.cpu_count函数的典型用法代码示例。如果您正苦于以下问题:Python cpu_count函数的具体用法?Python cpu_count怎么用?Python cpu_count使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cpu_count函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: check_n_jobs
def check_n_jobs(n_jobs):
"""Check and adjust the number of CPUs that can work in parallel.
Parameters
----------
n_jobs : int,
Number of parallel workers, specified according to joblib's conventions:
If 0 is provided, all CPUs are used.
A negative number indicates that all the CPUs except (|n_jobs| - 1) ones
will be used.
Returns
-------
n_jobs : int,
Actual number of CPUs that will be used according to their availability.
"""
if n_jobs == 0: # invalid according to joblib's conventions
raise ValueError(
"'n_jobs == 0' is not a valid choice. "
"Please provide a positive number of CPUs, or -1 "
"for all CPUs, or a negative number (-i) for "
"'all but (i-1)' CPUs (joblib conventions)."
)
elif n_jobs < 0:
n_jobs = max(1, joblib.cpu_count() + n_jobs + 1)
else:
n_jobs = min(n_jobs, joblib.cpu_count())
return n_jobs
开发者ID:nilearn,项目名称:nilearn_sandbox,代码行数:30,代码来源:common_checks.py
示例2: _parallel_learning
def _parallel_learning(self, X, Y, w):
n_samples = len(X)
objective, positive_slacks = 0, 0
verbose = max(0, self.verbose - 3)
if self.batch_size is not None:
raise ValueError("If n_jobs != 1, batch_size needs to" "be None")
# generate batches of size n_jobs
# to speed up inference
if self.n_jobs == -1:
n_jobs = cpu_count()
else:
n_jobs = self.n_jobs
n_batches = int(np.ceil(float(len(X)) / n_jobs))
slices = gen_even_slices(n_samples, n_batches)
for batch in slices:
X_b = X[batch]
Y_b = Y[batch]
candidate_constraints = Parallel(n_jobs=self.n_jobs, verbose=verbose)(
delayed(find_constraint)(self.model, x, y, w) for x, y in zip(X_b, Y_b)
)
dpsi = np.zeros(self.model.size_psi)
for x, y, constraint in zip(X_b, Y_b, candidate_constraints):
y_hat, delta_psi, slack, loss = constraint
if slack > 0:
objective += slack
dpsi += delta_psi
positive_slacks += 1
w = self._solve_subgradient(dpsi, n_samples, w)
return objective, positive_slacks, w
开发者ID:huyng,项目名称:pystruct,代码行数:30,代码来源:subgradient_ssvm.py
示例3: _get_n_jobs
def _get_n_jobs(n_jobs):
"""Get number of jobs for the computation.
See sklearn/utils/__init__.py for more information.
This function reimplements the logic of joblib to determine the actual
number of jobs depending on the cpu count. If -1 all CPUs are used.
If 1 is given, no parallel computing code is used at all, which is useful
for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
Thus for n_jobs = -2, all CPUs but one are used.
Parameters
----------
n_jobs : int
Number of jobs stated in joblib convention.
Returns
-------
n_jobs : int
The actual number of jobs as positive integer.
Examples
--------
>>> from sklearn.utils import _get_n_jobs
>>> _get_n_jobs(4)
4
>>> jobs = _get_n_jobs(-2)
>>> assert jobs == max(cpu_count() - 1, 1)
>>> _get_n_jobs(0)
Traceback (most recent call last):
...
ValueError: Parameter n_jobs == 0 has no meaning.
"""
if n_jobs < 0:
return max(cpu_count() + 1 + n_jobs, 1)
elif n_jobs == 0:
raise ValueError('Parameter n_jobs == 0 has no meaning.')
else:
return n_jobs
开发者ID:flaviassantos,项目名称:pyod,代码行数:35,代码来源:sklearn_base.py
示例4: _fit_multiclass_task
def _fit_multiclass_task(self, X, y, sample_weight, params):
if params['init_model'] is not None:
max_digits = len(str(len(self._classes)))
init_model_filenames = ['{}.{}'.format(params['init_model'],
str(i + 1).zfill(max_digits)) for i in range(self._n_classes)]
ovr_list = [None] * self._n_classes
for i, cls_num in enumerate(self._classes):
if params['init_model'] is not None:
params['init_model'] = init_model_filenames[i]
self._classes_map[i] = cls_num
ovr_list[i] = (y == cls_num).astype(int)
self._estimators[i] = RGFExecuter(**params)
n_jobs = self.n_jobs if self.n_jobs > 0 else cpu_count() + self.n_jobs + 1
substantial_n_jobs = max(n_jobs, self.n_classes_)
if substantial_n_jobs < n_jobs and self.verbose:
print('n_jobs = {0}, but RGFClassifier uses {1} CPUs because '
'classes_ is {2}'.format(n_jobs, substantial_n_jobs,
self.n_classes_))
self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(utils.fit_ovr_binary)(self._estimators[i],
X,
ovr_list[i],
sample_weight)
for i in range(self._n_classes))
开发者ID:fukatani,项目名称:rgf_python,代码行数:25,代码来源:rgf_model.py
示例5: get_split_scores
def get_split_scores(factory,thresholds,formula,
metric = None,#p.e. usability entropy
use_joblib = False,
joblib_backend = 'threading',
n_jobs = -1,
min_events_fraction_leaf = 0.,verbose = False):
if metric == None:
metric = penalized_usability_entropy
if min_events_fraction_leaf <=1:
min_events_fraction_leaf = int(min_events_fraction_leaf*sum(factory.weights))
if verbose:
print min_events_fraction_leaf, sum(factory.weights)
if not use_joblib:
scores = np.repeat(float("inf"),len(thresholds))
for i,(feature,cut,_) in enumerate(thresholds):
predicate = (factory.events[:,feature] > cut)
#skip the edge cases... (inf penalty)
if np.all(predicate) or (not np.any(predicate)):
#if this split does not split, fuggedaboutit
continue
if min_events_fraction_leaf>0:
#get rid of too uneven a cuts
sum_weight = np.sum(factory.weights)
true_weight = np.sum(factory.weights[predicate])
false_weight = sum_weight - true_weight
if true_weight < min_events_fraction_leaf or false_weight < min_events_fraction_leaf:
if verbose: print "t:",true_weight,"f:",false_weight, "discarded"
continue
if verbose: print "t:",true_weight,"f:",false_weight, "passed"
#compute score
subFactories = factory.split_by(predicate)
scores[i] = metric(formula,*subFactories)
else:
if n_jobs < 0:
n_jobs = joblib.cpu_count() +1 - n_jobs
indices = [0]+[len(thresholds)*(i+1)/n_jobs for i in range(n_jobs)]
thresholdSections = [thresholds[indices[i]:indices[i+1]] for i in range(n_jobs)]
if joblib_backend == 'threading':
factory = [deepcopy(factory) for i in range(n_jobs)]
formula = [deepcopy(formula) for i in range(n_jobs)]
metric = [deepcopy(metric) for i in range(n_jobs)] #in case it has some internal data
jobs = (joblib.delayed(get_split_scores)(factory[i],thresholdSection, formula[i],
metric=metric[i],use_joblib = False,
min_events_fraction_leaf = min_events_fraction_leaf,
verbose = verbose)
for i,thresholdSection in enumerate(thresholdSections))
else:
jobs = (joblib.delayed(get_split_scores)(factory,thresholdSection, formula,
metric=metric,use_joblib = False,
min_events_fraction_leaf = min_events_fraction_leaf,
verbose = verbose)
for thresholdSection in thresholdSections)
scores = np.hstack(joblib.Parallel(n_jobs = n_jobs, backend = joblib_backend)(jobs))
return scores
开发者ID:justheuristic,项目名称:pruner,代码行数:60,代码来源:alt_hierarchy.py
示例6: test_multi_output_classification_partial_fit_parallelism
def test_multi_output_classification_partial_fit_parallelism():
sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=-1)
mor.partial_fit(X, y, classes)
est1 = mor.estimators_[0]
mor.partial_fit(X, y)
est2 = mor.estimators_[0]
if cpu_count() > 1:
# parallelism requires this to be the case for a sane implementation
assert_false(est1 is est2)
开发者ID:dominicSchiller,项目名称:DataScience_EA12_Clustering_Exercise,代码行数:10,代码来源:test_multioutput.py
示例7: fit
def fit(self, X, y=None, groups=None):
"""Run fit on the estimator with randomly drawn parameters.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples in the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples] or [n_samples, n_output]
Target relative to X for classification or regression;
groups : array-like, with shape (n_samples,), optional
Group labels for the samples used while splitting the dataset into
train/test set.
"""
# check if the list of parameter spaces is provided. If not, then
# only step in manual mode can be used.
if len(self.search_spaces_) == 0:
raise ValueError(
"Please provide search space using `add_spaces` first before"
"calling fit method."
)
n_jobs = self.n_jobs
# account for case n_jobs < 0
if n_jobs < 0:
n_jobs = max(1, cpu_count() + n_jobs + 1)
for space_id in sorted(self.search_spaces_.keys()):
elem = self.search_spaces_[space_id]
# if not provided with search subspace, n_iter is taken as
# self.n_iter
if isinstance(elem, tuple):
space, n_iter = elem
else:
n_iter = self.n_iter
# do the optimization for particular search space
while n_iter > 0:
# when n_iter < n_jobs points left for evaluation
n_jobs_adjusted = min(n_iter, self.n_jobs)
self.step(
X, y, space_id,
groups=groups, n_jobs=n_jobs_adjusted
)
n_iter -= n_jobs
开发者ID:MechCoder,项目名称:scikit-optimize,代码行数:52,代码来源:searchcv.py
示例8: _partition_X
def _partition_X(X, n_jobs):
"""Private function used to partition X between jobs."""
n_nodes = X.shape[1]
# Compute the number of jobs
n_jobs = min(cpu_count() if n_jobs == -1 else n_jobs, n_nodes)
# Partition estimators between jobs
n_node_per_job = (n_nodes // n_jobs) * np.ones(n_jobs, dtype=np.int)
n_node_per_job[:n_nodes % n_jobs] += 1
starts = np.cumsum(n_node_per_job)
return n_jobs, [0] + starts.tolist()
开发者ID:ZeitgeberH,项目名称:kaggle-connectomics,代码行数:13,代码来源:directivity.py
示例9: _partition_estimators
def _partition_estimators(ensemble):
"""Private function used to partition estimators between jobs."""
# Compute the number of jobs
if ensemble.n_jobs == -1:
n_jobs = min(cpu_count(), ensemble.n_estimators)
else:
n_jobs = min(ensemble.n_jobs, ensemble.n_estimators)
# Partition estimators between jobs
n_estimators = (ensemble.n_estimators // n_jobs) * np.ones(n_jobs,
dtype=np.int)
n_estimators[:ensemble.n_estimators % n_jobs] += 1
starts = np.cumsum(n_estimators)
return n_jobs, n_estimators.tolist(), [0] + starts.tolist()
开发者ID:orazaro,项目名称:kgml,代码行数:16,代码来源:bag.py
示例10: _partition_estimators
def _partition_estimators(n_estimators, n_jobs):
"""Private function used to partition estimators between jobs."""
# Compute the number of jobs
if n_jobs == -1:
n_jobs = min(cpu_count(), n_estimators)
else:
n_jobs = min(n_jobs, n_estimators)
# Partition estimators between jobs
n_estimators_per_job = (n_estimators // n_jobs) * np.ones(n_jobs,
dtype=np.int)
n_estimators_per_job[:n_estimators % n_jobs] += 1
starts = np.cumsum(n_estimators_per_job)
return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
开发者ID:albahnsen,项目名称:pyea,代码行数:16,代码来源:ga.py
示例11: try_add1_bfs
def try_add1_bfs(allTrees,factory,learning_rate,
loss,breadth,y_pred,regularizer = 0.,
use_joblib = False,n_jobs = -1):
'''
select best tree to add (1 step)
'''
if factory.__class__ is BinaryClassificationFactory:
y_sign = factory.labels_sign
margin = y_sign*y_pred
elif factory.__class__ is RegressionFactory:
margin = factory.labels - y_pred
else:
raise Exception("Factory type not supported")
if use_joblib:
if n_jobs < 0:
n_jobs = joblib.cpu_count() + 1 - n_jobs
indices = [0]+[len(allTrees)*(i+1)/n_jobs for i in range(n_jobs)]
treeSections = [allTrees[indices[i]:indices[i+1]] for i in range(n_jobs)]
tasks = [joblib.delayed(_inthread_try_add)(
treeSection,
factory,
loss,
margin,
y_pred,
learning_rate,
regularizer) for treeSection in treeSections]
_res = joblib.Parallel(n_jobs = n_jobs,
backend = "multiprocessing")(tasks)
triples = reduce(lambda a,b:a+b, _res)
else:
triples = [_try_add(tree,factory,loss,margin,y_pred,learning_rate,regularizer) for tree in allTrees]
triples.sort(key = lambda el: el[0])
return ([triple[1] for triple in triples[:breadth]],
[triple[0] for triple in triples[:breadth]],
[triple[2] for triple in triples[:breadth]])
开发者ID:mindis,项目名称:pruner,代码行数:45,代码来源:greedy.py
示例12: computePartition
def computePartition(self, nbTasks, dataSize):
"""
Compute data partitioning for parallel computation :
min(nbTasks, dataSize)
Parameters
----------
nbTasks : int (!=0)
If >0 : the parallelization factor.
If <0 : nbTasks = #cpu+nbTasks+1 (-1 -> nbTasks = #cpu)
dataSize : int > 0
The size of the data to process
Return
------
triplet = (nbTasks, counts, starts)
nbTasks : int
The final parallelization factor. It is computed as
min(#cpu/nbTasks, dataSize)
counts : list of int
The number of data pieces for each parallel task
starts : list of int
The start indexes of the data for each parallel task
"""
if nbTasks < 0:
cpu = cpu_count()+nbTasks+1
if cpu <= 0:
cpu = 1
nbTasks = min(cpu, dataSize)
else:
if nbTasks == 0:
nbTasks = 1
nbTasks = min(nbTasks, dataSize)
counts = [dataSize / nbTasks] * nbTasks
for i in xrange(dataSize % nbTasks):
counts[i] += 1
starts = [0] * (nbTasks + 1)
for i in xrange(1, nbTasks + 1):
starts[i] = starts[i - 1] + counts[i - 1]
return nbTasks, counts, starts
开发者ID:jm-begon,项目名称:masterthesis,代码行数:45,代码来源:TaskManager.py
示例13: _partition_clips
def _partition_clips(n_jobs, n_clips):
if n_jobs == -1:
n_jobs = min(cpu_count(), n_clips)
else:
n_jobs = min(n_jobs, n_clips)
counts = [n_clips / n_jobs] * n_jobs
for i in xrange(n_clips % n_jobs):
counts[i] += 1
starts = [0] * (n_jobs + 1)
for i in xrange(1, n_jobs + 1):
starts[i] = starts[i - 1] + counts[i - 1]
return n_jobs, counts, starts
开发者ID:Sandy4321,项目名称:kaggle-marinexplore,代码行数:18,代码来源:estimator.py
示例14: _set_params_with_dependencies
def _set_params_with_dependencies(self):
if self.max_bin is None:
if self._is_sparse_train_X:
self._max_bin = 200
else:
self._max_bin = 65000
else:
self._max_bin = self.max_bin
if isinstance(self.min_samples_leaf, utils.FLOATS):
self._min_samples_leaf = ceil(self.min_samples_leaf * self._n_samples)
else:
self._min_samples_leaf = self.min_samples_leaf
if self.n_jobs == -1:
self._n_jobs = 0
elif self.n_jobs < 0:
self._n_jobs = cpu_count() + self.n_jobs + 1
else:
self._n_jobs = self.n_jobs
self._set_target_and_loss()
开发者ID:fukatani,项目名称:rgf_python,代码行数:22,代码来源:fastrgf_model.py
示例15: _e_step
def _e_step(self, X, cal_delta):
"""
E-step
set `cal_delta == True` when we need to run _m_step
for inference, set it to False
"""
# parell run e-step
if self.n_jobs == -1:
n_jobs = cpu_count()
else:
n_jobs = self.n_jobs
results = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_update_gamma)
(X[idx_slice, :], self.expElogbeta, self.alpha,
self.rng, 100, self.mean_change_tol, cal_delta)
for idx_slice in gen_even_slices(X.shape[0], n_jobs))
# merge result
gammas, deltas = zip(*results)
gamma = np.vstack(gammas)
if cal_delta:
# This step finishes computing the sufficient statistics for the
# M step, so that
# sstats[k, w] = \sum_d n_{dw} * phi_{dwk}
# = \sum_d n_{dw} * exp{Elogtheta_{dk} + Elogbeta_{kw}} / phinorm_{dw}.
delta_component = np.zeros(self.components_.shape)
for delta in deltas:
delta_component += delta
delta_component *= self.expElogbeta
else:
delta_component = None
return (gamma, delta_component)
开发者ID:praveenkottayi,项目名称:topicModels,代码行数:37,代码来源:lda.py
示例16: fit
def fit(self, X, Y, H_init=None):
"""Learn parameters using subgradient descent.
Parameters
----------
X : iterable
Traing instances. Contains the structured input objects.
No requirement on the particular form of entries of X is made.
Y : iterable
Training labels. Contains the strctured labels for inputs in X.
Needs to have the same length as X.
constraints : None
Discarded. Only for API compatibility currently.
"""
print("Training latent subgradient structural SVM")
self.w = getattr(self, "w", np.random.normal(
0, .001, size=self.model.size_psi))
#constraints = []
self.objective_curve_ = []
n_samples = len(X)
try:
# catch ctrl+c to stop training
for iteration in xrange(self.max_iter):
positive_slacks = 0
objective = 0.
#verbose = max(0, self.verbose - 3)
if self.n_jobs == 1:
# online learning
for x, y in zip(X, Y):
h = self.model.latent(x, y, self.w)
h_hat = self.model.loss_augmented_inference(
x, h, self.w, relaxed=True)
delta_psi = (self.model.psi(x, h)
- self.model.psi(x, h_hat))
slack = (-np.dot(delta_psi, self.w)
+ self.model.loss(h, h_hat))
objective += np.maximum(slack, 0)
if slack > 0:
positive_slacks += 1
self._solve_subgradient(delta_psi, n_samples)
else:
#generate batches of size n_jobs
#to speed up inference
if self.n_jobs == -1:
n_jobs = cpu_count()
else:
n_jobs = self.j_jobs
n_batches = int(np.ceil(float(len(X)) / n_jobs))
slices = gen_even_slices(n_samples, n_batches)
for batch in slices:
X_b = X[batch]
Y_b = Y[batch]
verbose = self.verbose - 1
candidate_constraints = Parallel(
n_jobs=self.n_jobs,
verbose=verbose)(delayed(find_constraint_latent)(
self.model, x, y, self.w)
for x, y in zip(X_b, Y_b))
dpsi = np.zeros(self.model.size_psi)
for x, y, constraint in zip(X_b, Y_b,
candidate_constraints):
y_hat, delta_psi, slack, loss = constraint
objective += slack
dpsi += delta_psi
if slack > 0:
positive_slacks += 1
dpsi /= float(len(X_b))
self._solve_subgradient(dpsi, n_samples)
# some statistics
objective += np.sum(self.w ** 2) / self.C / 2.
#objective /= float(n_samples)
if positive_slacks == 0:
print("No additional constraints")
if self.break_on_no_constraints:
break
if self.verbose > 0:
print(self)
print("iteration %d" % iteration)
print("positive slacks: %d, "
"objective: %f" %
(positive_slacks, objective))
self.objective_curve_.append(objective)
if self.verbose > 2:
print(self.w)
self._compute_training_loss(X, Y, iteration)
if self.logger is not None:
self.logger(self, iteration)
except KeyboardInterrupt:
pass
print("final objective: %f" % self.objective_curve_[-1])
print("calls to inference: %d" % self.model.inference_calls)
#.........这里部分代码省略.........
开发者ID:hushell,项目名称:pystruct,代码行数:101,代码来源:subgradient_latent_ssvm.py
示例17: _fit
def _fit(self, X, y):
X, y = check_X_y(X, y, "csr")
# Initialization
cv = check_cv(self.cv, y, is_classifier(self.estimator))
scorer = check_scoring(self.estimator, scoring=self.scoring)
n_features = X.shape[1]
if self.max_features is not None:
if not isinstance(self.max_features, numbers.Integral):
raise TypeError("'max_features' should be an integer between 1 and {} features."
" Got {!r} instead."
.format(n_features, self.max_features))
elif self.max_features < 1 or self.max_features > n_features:
raise ValueError("'max_features' should be between 1 and {} features."
" Got {} instead."
.format(n_features, self.max_features))
max_features = self.max_features
else:
max_features = n_features
if not isinstance(self.n_gen_no_change, (numbers.Integral, np.integer, type(None))):
raise ValueError("'n_gen_no_change' should either be None or an integer."
" {} was passed."
.format(self.n_gen_no_change))
estimator = clone(self.estimator)
# Genetic Algorithm
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat,
creator.Individual, toolbox.attr_bool, n=n_features)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", _evalFunction, gaobject=self, estimator=estimator, X=X, y=y,
cv=cv, scorer=scorer, verbose=self.verbose, fit_params=self.fit_params,
max_features=max_features, caching=self.caching)
toolbox.register("mate", tools.cxUniform, indpb=self.crossover_independent_proba)
toolbox.register("mutate", tools.mutFlipBit, indpb=self.mutation_independent_proba)
toolbox.register("select", tools.selTournament, tournsize=self.tournament_size)
if self.n_jobs == 0:
raise ValueError("n_jobs == 0 has no meaning.")
elif self.n_jobs > 1:
pool = multiprocessing.Pool(processes=self.n_jobs)
toolbox.register("map", pool.map)
elif self.n_jobs < 0:
pool = multiprocessing.Pool(processes=max(cpu_count() + 1 + self.n_jobs, 1))
toolbox.register("map", pool.map)
pop = toolbox.population(n=self.n_population)
hof = tools.HallOfFame(1, similar=np.array_equal)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean, axis=0)
stats.register("std", np.std, axis=0)
stats.register("min", np.min, axis=0)
stats.register("max", np.max, axis=0)
if self.verbose > 0:
print("Selecting features with genetic algorithm.")
_, log = _eaFunction(pop, toolbox, cxpb=self.crossover_proba, mutpb=self.mutation_proba,
ngen=self.n_generations, ngen_no_change=self.n_gen_no_change,
stats=stats, halloffame=hof, verbose=self.verbose)
if self.n_jobs != 1:
pool.close()
pool.join()
# Set final attributes
support_ = np.array(hof, dtype=np.bool)[0]
self.estimator_ = clone(self.estimator)
self.estimator_.fit(X[:, support_], y)
self.generation_scores_ = np.array([score for score, _ in log.select("max")])
self.n_features_ = support_.sum()
self.support_ = support_
return self
开发者ID:manuel-calzolari,项目名称:sklearn-genetic,代码行数:78,代码来源:__init__.py
示例18: wheel_up_features_bfs
def wheel_up_features_bfs (initialBunch,
trees,
factory,
loss,
learning_rate=0.25,
nIters=100,
trees_sample_size=100,
verbose = True,
vali_factory = None,
learning_rate_decay = 1.,
trees_sample_increase = 0,
regularizer = 0.,
random_walk = True,
use_joblib = False,
n_jobs = -1,
joblib_backend = "threading",
copy_pred = False):
"""
Iterative BFS over best ADD-1 results for [nTrees] iterations
"""
allTrees = copy.copy(trees)
bunch = copy.copy(initialBunch)
pred = factory.predict(bunch)
bestScore = loss.score(factory,pred)
if vali_factory is not None:
vali_pred = vali_factory.predict(bunch)
vali_score = loss.score(vali_factory,vali_pred)
vali_scores = [vali_score]
if use_joblib:
if n_jobs < 0:
n_jobs = joblib.cpu_count()
if joblib_backend == "threading":
#create copies of data once to escape GIL forever
factory = [copy.deepcopy(factory) for i in range(n_jobs)]
losses = [copy.deepcopy(loss) for i in range(n_jobs)]
elif joblib_backend == "multiprocessing":
pass
else:
raise ValueError, "joblib_backend must be either 'threading' or 'multiprocessing'"
if verbose:
print "\niteration #",0," ntrees = ", len(bunch),"\nbest loss = ",bestScore
print "learning_rate = ", learning_rate
print "sample_size", trees_sample_size
for itr in xrange(1,nIters+1):
change_index= random.randint(0,len(bunch)-1) if random_walk else (i-1)%len(bunch)
trees_sample = random.sample(allTrees,trees_sample_size)+ [bunch[change_index]]
bunch_wo = copy.copy(bunch)
replaced_tree = bunch_wo.pop(change_index)
if use_joblib and joblib_backend=="threading":
#split trees into sections
indices = [0]+[len(trees_sample)*(i+1)/n_jobs for i in range(n_jobs)]
treeSections = [trees_sample[indices[i]:indices[i+1]] for i in range(n_jobs)]
pred_wo = pred - factory[0].predict(PrunedFormula([bunch[change_index]],bias=0.))
if copy_pred:
pred_wo = [copy.deepcopy(pred) for i in range(n_jobs)]
else:
pred_wo = [pred for i in range(n_jobs)]
#execute sections in parallel
tasks = [joblib.delayed(try_add1_bfs)(treeSections[ithread],factory[ithread],
learning_rate,losses[ithread],
1,pred_wo[ithread],regularizer=regularizer,
use_joblib=False)
for ithread in range(n_jobs)]
_res = joblib.Parallel(n_jobs = n_jobs,
backend = "threading")(tasks)
_additions,newScores,newPreds = reduce(lambda a,b:[a[i]+b[i] for i in range(3)], _res)
else:
pred_wo = pred - factory.predict(PrunedFormula([bunch[change_index]],bias=0.))
_additions,newScores,newPreds = try_add1_bfs(trees_sample,factory,
learning_rate,loss,
1,pred_wo,regularizer=regularizer,
use_joblib=use_joblib,n_jobs=n_jobs)
learning_rate *= learning_rate_decay
trees_sample_size = min(len(allTrees),trees_sample_size + trees_sample_increase)
triples = zip(newScores,_additions,newPreds)
triples.sort(key = lambda el: el[0])
newBestScore = min(newScores)
#.........这里部分代码省略.........
开发者ID:justheuristic,项目名称:pruner,代码行数:101,代码来源:greedy.py
示例19: __init__
def __init__(self, n_features, n_jobs=1):
self.n_features = n_features
if n_jobs == -1:
n_jobs = cpu_count()
self.n_jobs = n_jobs
开发者ID:pgervais,项目名称:nilearn,代码行数:5,代码来源:searchlight.py
示例20: __init__
def __init__(self, *files, columns=None, ngrams=2, decap=False, patterns=None, mask=None):
"""
Create a new data object with the following attributes:
* instances - list of raw text instances
* labels - array of instance labels in same order as raw text
* features - matrix of feature vectors per text instance
* names - array of feature names in same order as features
Both features and names are undefined until extracted
using some Vectorizer.
Exclusive options for either BIO-NER vs. plain-text input:
1. **BIO-NER** paramters: Define a `columns` integer to define the number of disregarded
columns and thereby declare that the input will be in BIO-NER format. In addtion, the
`ngram` option can be set to define the ngram size of the tokens to generate.
All other keyword parameter will be ignored.
2. **plain-text** keyword parameters: Set `decap=True` to lower-case the first letter of
each plain-text line. Use a list of regex `patterns` and a repacement string `mask` to
"mask" pattern-matched words in regular (non-`column`) input.
"""
try:
if columns is None:
inputs = [[l.strip('\r\n') for l in f] for f in files]
if decap:
for i in range(len(inputs)):
inputs[i] = ["{}{}".format(l[0].lower(), l[1:])
for l in inputs[i] if len(l)]
if patterns and mask:
self.instances = []
splits = joblib.cpu_count()
for lines in inputs:
jobs = tuple(lines[i::splits] for i in range(splits))
jobs = joblib.Parallel(n_jobs=splits)(
delayed(subAll)(patterns, mask, lines) for lines in jobs
)
self.instances.append(list(zip(lines, chain(*jobs))))
else:
self.instances = [list(zip(lines, lines)) for lines in inputs]
else:
self.instances = []
for f in files:
# FIXME: instead of two hardcoded entity masks,
# FIXME: this has to be dynamic or generic...
sentences = SentenceParser(f, ('FACTOR', 'TARGET'), id_columns=columns)
if not columns:
sentences = list(enumerate(sentences, start=1))
data = [(sid, asDict(s, ngrams)) for sid, s in sentences]
self.instances.append(data)
except UnicodeDecodeError as e:
import sys
print('decoding error:', e.reason, 'in input file')
sys.exit(1)
# ensure the minority label(s) come first (important for the evaluation, too!)
self.instances = sorted(self.instances, key=len)
self.classes = len(self.instances)
self.labels = np.concatenate([
(np.zeros(len(data), dtype=np.uint8) + i)
for i, data in enumerate(self.instances)
])
self.ids = None
self.raw = None
self.features = None
self.names = None
if columns is None:
self.raw, self.instances = zip(*list(chain.from_iterable(self.instances)))
if len(self.raw) and '\t' in self.raw[0]:
self.ids = [l.split('\t', 1)[0] for l in self.raw]
else:
self.ids = self.raw
else:
self.ids, self.instances = zip(*list(chain.from_iterable(self.instances)))
开发者ID:fnl,项目名称:libfnl,代码行数:85,代码来源:textclass.py
注:本文中的sklearn.externals.joblib.cpu_count函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论