• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python joblib.cpu_count函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.externals.joblib.cpu_count函数的典型用法代码示例。如果您正苦于以下问题:Python cpu_count函数的具体用法?Python cpu_count怎么用?Python cpu_count使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了cpu_count函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: check_n_jobs

def check_n_jobs(n_jobs):
    """Check and adjust the number of CPUs that can work in parallel.

    Parameters
    ----------
    n_jobs : int,
      Number of parallel workers, specified according to joblib's conventions:
      If 0 is provided, all CPUs are used.
      A negative number indicates that all the CPUs except (|n_jobs| - 1) ones
      will be used.

    Returns
    -------
    n_jobs : int,
      Actual number of CPUs that will be used according to their availability.

    """
    if n_jobs == 0:  # invalid according to joblib's conventions
        raise ValueError(
            "'n_jobs == 0' is not a valid choice. "
            "Please provide a positive number of CPUs, or -1 "
            "for all CPUs, or a negative number (-i) for "
            "'all but (i-1)' CPUs (joblib conventions)."
        )
    elif n_jobs < 0:
        n_jobs = max(1, joblib.cpu_count() + n_jobs + 1)
    else:
        n_jobs = min(n_jobs, joblib.cpu_count())

    return n_jobs
开发者ID:nilearn,项目名称:nilearn_sandbox,代码行数:30,代码来源:common_checks.py


示例2: _parallel_learning

    def _parallel_learning(self, X, Y, w):
        n_samples = len(X)
        objective, positive_slacks = 0, 0
        verbose = max(0, self.verbose - 3)
        if self.batch_size is not None:
            raise ValueError("If n_jobs != 1, batch_size needs to" "be None")
        # generate batches of size n_jobs
        # to speed up inference
        if self.n_jobs == -1:
            n_jobs = cpu_count()
        else:
            n_jobs = self.n_jobs

        n_batches = int(np.ceil(float(len(X)) / n_jobs))
        slices = gen_even_slices(n_samples, n_batches)
        for batch in slices:
            X_b = X[batch]
            Y_b = Y[batch]
            candidate_constraints = Parallel(n_jobs=self.n_jobs, verbose=verbose)(
                delayed(find_constraint)(self.model, x, y, w) for x, y in zip(X_b, Y_b)
            )
            dpsi = np.zeros(self.model.size_psi)
            for x, y, constraint in zip(X_b, Y_b, candidate_constraints):
                y_hat, delta_psi, slack, loss = constraint
                if slack > 0:
                    objective += slack
                    dpsi += delta_psi
                    positive_slacks += 1
            w = self._solve_subgradient(dpsi, n_samples, w)
        return objective, positive_slacks, w
开发者ID:huyng,项目名称:pystruct,代码行数:30,代码来源:subgradient_ssvm.py


示例3: _get_n_jobs

def _get_n_jobs(n_jobs):
    """Get number of jobs for the computation.
    See sklearn/utils/__init__.py for more information.

    This function reimplements the logic of joblib to determine the actual
    number of jobs depending on the cpu count. If -1 all CPUs are used.
    If 1 is given, no parallel computing code is used at all, which is useful
    for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
    Thus for n_jobs = -2, all CPUs but one are used.
    Parameters
    ----------
    n_jobs : int
        Number of jobs stated in joblib convention.
    Returns
    -------
    n_jobs : int
        The actual number of jobs as positive integer.
    Examples
    --------
    >>> from sklearn.utils import _get_n_jobs
    >>> _get_n_jobs(4)
    4
    >>> jobs = _get_n_jobs(-2)
    >>> assert jobs == max(cpu_count() - 1, 1)
    >>> _get_n_jobs(0)
    Traceback (most recent call last):
    ...
    ValueError: Parameter n_jobs == 0 has no meaning.
    """
    if n_jobs < 0:
        return max(cpu_count() + 1 + n_jobs, 1)
    elif n_jobs == 0:
        raise ValueError('Parameter n_jobs == 0 has no meaning.')
    else:
        return n_jobs
开发者ID:flaviassantos,项目名称:pyod,代码行数:35,代码来源:sklearn_base.py


示例4: _fit_multiclass_task

    def _fit_multiclass_task(self, X, y, sample_weight, params):
        if params['init_model'] is not None:
            max_digits = len(str(len(self._classes)))
            init_model_filenames = ['{}.{}'.format(params['init_model'],
                                                   str(i + 1).zfill(max_digits)) for i in range(self._n_classes)]
        ovr_list = [None] * self._n_classes
        for i, cls_num in enumerate(self._classes):
            if params['init_model'] is not None:
                params['init_model'] = init_model_filenames[i]
            self._classes_map[i] = cls_num
            ovr_list[i] = (y == cls_num).astype(int)
            self._estimators[i] = RGFExecuter(**params)

        n_jobs = self.n_jobs if self.n_jobs > 0 else cpu_count() + self.n_jobs + 1
        substantial_n_jobs = max(n_jobs, self.n_classes_)
        if substantial_n_jobs < n_jobs and self.verbose:
            print('n_jobs = {0}, but RGFClassifier uses {1} CPUs because '
                  'classes_ is {2}'.format(n_jobs, substantial_n_jobs,
                                           self.n_classes_))

        self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(utils.fit_ovr_binary)(self._estimators[i],
                                                                                      X,
                                                                                      ovr_list[i],
                                                                                      sample_weight)
                                                        for i in range(self._n_classes))
开发者ID:fukatani,项目名称:rgf_python,代码行数:25,代码来源:rgf_model.py


示例5: get_split_scores

def get_split_scores(factory,thresholds,formula,
                     metric = None,#p.e. usability entropy
                     use_joblib = False,
                     joblib_backend = 'threading',
                     n_jobs = -1,
                     min_events_fraction_leaf = 0.,verbose = False):

    if metric == None:
        metric = penalized_usability_entropy
    if min_events_fraction_leaf <=1:
        min_events_fraction_leaf = int(min_events_fraction_leaf*sum(factory.weights))
    if verbose:
        print min_events_fraction_leaf, sum(factory.weights)

    if not use_joblib:
        scores = np.repeat(float("inf"),len(thresholds))
        for i,(feature,cut,_) in enumerate(thresholds):
            predicate =  (factory.events[:,feature] > cut)

            #skip the edge cases... (inf penalty)
            if np.all(predicate) or (not np.any(predicate)):
                #if this split does not split, fuggedaboutit
                continue 
            if min_events_fraction_leaf>0:
                #get rid of too uneven a cuts
                sum_weight = np.sum(factory.weights)
                true_weight = np.sum(factory.weights[predicate])
                false_weight = sum_weight - true_weight
                if true_weight < min_events_fraction_leaf or false_weight < min_events_fraction_leaf:
                    if verbose: print "t:",true_weight,"f:",false_weight, "discarded"
                    continue
                if verbose: print "t:",true_weight,"f:",false_weight, "passed"
            #compute score
            subFactories = factory.split_by(predicate)
            scores[i] = metric(formula,*subFactories)
    else:
        if n_jobs < 0:
            n_jobs = joblib.cpu_count() +1 - n_jobs
       
        indices = [0]+[len(thresholds)*(i+1)/n_jobs for i in range(n_jobs)]
        thresholdSections = [thresholds[indices[i]:indices[i+1]] for i in range(n_jobs)]
        
        if joblib_backend == 'threading':
            factory = [deepcopy(factory) for i in range(n_jobs)]
            formula = [deepcopy(formula) for i in range(n_jobs)]
            metric = [deepcopy(metric) for i in range(n_jobs)] #in case it has some internal data
            
            jobs = (joblib.delayed(get_split_scores)(factory[i],thresholdSection, formula[i],
                                                 metric=metric[i],use_joblib = False,
                                                 min_events_fraction_leaf = min_events_fraction_leaf,
                                                 verbose = verbose)
                                    for i,thresholdSection in enumerate(thresholdSections))
        else:
            jobs = (joblib.delayed(get_split_scores)(factory,thresholdSection, formula,
                                                 metric=metric,use_joblib = False,
                                                 min_events_fraction_leaf = min_events_fraction_leaf,
                                                 verbose = verbose)
                                    for thresholdSection in thresholdSections)
        scores = np.hstack(joblib.Parallel(n_jobs = n_jobs, backend = joblib_backend)(jobs))
    return scores
开发者ID:justheuristic,项目名称:pruner,代码行数:60,代码来源:alt_hierarchy.py


示例6: test_multi_output_classification_partial_fit_parallelism

def test_multi_output_classification_partial_fit_parallelism():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=-1)
    mor.partial_fit(X, y, classes)
    est1 = mor.estimators_[0]
    mor.partial_fit(X, y)
    est2 = mor.estimators_[0]
    if cpu_count() > 1:
        # parallelism requires this to be the case for a sane implementation
        assert_false(est1 is est2)
开发者ID:dominicSchiller,项目名称:DataScience_EA12_Clustering_Exercise,代码行数:10,代码来源:test_multioutput.py


示例7: fit

    def fit(self, X, y=None, groups=None):
        """Run fit on the estimator with randomly drawn parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output]
            Target relative to X for classification or regression;

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.
        """

        # check if the list of parameter spaces is provided. If not, then
        # only step in manual mode can be used.

        if len(self.search_spaces_) == 0:
            raise ValueError(
                "Please provide search space using `add_spaces` first before"
                "calling fit method."
            )

        n_jobs = self.n_jobs

        # account for case n_jobs < 0
        if n_jobs < 0:
            n_jobs = max(1, cpu_count() + n_jobs + 1)

        for space_id in sorted(self.search_spaces_.keys()):
            elem = self.search_spaces_[space_id]

            # if not provided with search subspace, n_iter is taken as
            # self.n_iter
            if isinstance(elem, tuple):
                space, n_iter = elem
            else:
                n_iter = self.n_iter

            # do the optimization for particular search space
            while n_iter > 0:
                # when n_iter < n_jobs points left for evaluation
                n_jobs_adjusted = min(n_iter, self.n_jobs)

                self.step(
                    X, y, space_id,
                    groups=groups, n_jobs=n_jobs_adjusted
                )
                n_iter -= n_jobs
开发者ID:MechCoder,项目名称:scikit-optimize,代码行数:52,代码来源:searchcv.py


示例8: _partition_X

def _partition_X(X, n_jobs):
    """Private function used to partition X between jobs."""
    n_nodes = X.shape[1]

    # Compute the number of jobs
    n_jobs = min(cpu_count() if n_jobs == -1 else n_jobs, n_nodes)

    # Partition estimators between jobs
    n_node_per_job = (n_nodes // n_jobs) * np.ones(n_jobs, dtype=np.int)
    n_node_per_job[:n_nodes % n_jobs] += 1
    starts = np.cumsum(n_node_per_job)

    return n_jobs, [0] + starts.tolist()
开发者ID:ZeitgeberH,项目名称:kaggle-connectomics,代码行数:13,代码来源:directivity.py


示例9: _partition_estimators

def _partition_estimators(ensemble):
    """Private function used to partition estimators between jobs."""
    # Compute the number of jobs
    if ensemble.n_jobs == -1:
        n_jobs = min(cpu_count(), ensemble.n_estimators)

    else:
        n_jobs = min(ensemble.n_jobs, ensemble.n_estimators)

    # Partition estimators between jobs
    n_estimators = (ensemble.n_estimators // n_jobs) * np.ones(n_jobs,
                                                               dtype=np.int)
    n_estimators[:ensemble.n_estimators % n_jobs] += 1
    starts = np.cumsum(n_estimators)

    return n_jobs, n_estimators.tolist(), [0] + starts.tolist()
开发者ID:orazaro,项目名称:kgml,代码行数:16,代码来源:bag.py


示例10: _partition_estimators

def _partition_estimators(n_estimators, n_jobs):
    """Private function used to partition estimators between jobs."""
    # Compute the number of jobs
    if n_jobs == -1:
        n_jobs = min(cpu_count(), n_estimators)

    else:
        n_jobs = min(n_jobs, n_estimators)

    # Partition estimators between jobs
    n_estimators_per_job = (n_estimators // n_jobs) * np.ones(n_jobs,
                                                              dtype=np.int)
    n_estimators_per_job[:n_estimators % n_jobs] += 1
    starts = np.cumsum(n_estimators_per_job)

    return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
开发者ID:albahnsen,项目名称:pyea,代码行数:16,代码来源:ga.py


示例11: try_add1_bfs

def try_add1_bfs(allTrees,factory,learning_rate,
                 loss,breadth,y_pred,regularizer = 0.,
                 use_joblib = False,n_jobs = -1):
    '''
    select best tree to add (1 step)
    '''
    if factory.__class__ is BinaryClassificationFactory:
        y_sign = factory.labels_sign
        margin = y_sign*y_pred
    elif factory.__class__ is RegressionFactory:
        margin = factory.labels - y_pred
    else:
        raise Exception("Factory type not supported")

    if use_joblib:
        if n_jobs < 0:
            n_jobs = joblib.cpu_count() + 1 - n_jobs
        
        indices = [0]+[len(allTrees)*(i+1)/n_jobs for i in range(n_jobs)]
        treeSections = [allTrees[indices[i]:indices[i+1]] for i in range(n_jobs)]

        tasks = [joblib.delayed(_inthread_try_add)(
                    treeSection,
                    factory,
                    loss,
                    margin,
                    y_pred,
                    learning_rate,
                    regularizer) for treeSection in treeSections]
        _res = joblib.Parallel(n_jobs = n_jobs,
                               backend = "multiprocessing")(tasks)
        triples = reduce(lambda a,b:a+b, _res)

    else:
        triples = [_try_add(tree,factory,loss,margin,y_pred,learning_rate,regularizer) for tree in allTrees]   

    
    triples.sort(key = lambda el: el[0])
    



    return ([triple[1] for triple in triples[:breadth]],
            [triple[0] for triple in triples[:breadth]],
            [triple[2] for triple in triples[:breadth]])
开发者ID:mindis,项目名称:pruner,代码行数:45,代码来源:greedy.py


示例12: computePartition

    def computePartition(self, nbTasks, dataSize):
        """
        Compute data partitioning for parallel computation :
        min(nbTasks, dataSize)

        Parameters
        ----------
        nbTasks : int (!=0)
            If >0 : the parallelization factor.
            If <0 : nbTasks = #cpu+nbTasks+1 (-1 -> nbTasks = #cpu)
        dataSize : int > 0
            The size of the data to process

        Return
        ------
        triplet = (nbTasks, counts, starts)
        nbTasks : int
            The final parallelization factor. It is computed as
            min(#cpu/nbTasks, dataSize)
        counts : list of int
            The number of data pieces for each parallel task
        starts : list of int
            The start indexes of the data for each parallel task
        """
        if nbTasks < 0:
            cpu = cpu_count()+nbTasks+1
            if cpu <= 0:
                cpu = 1
            nbTasks = min(cpu, dataSize)
        else:
            if nbTasks == 0:
                nbTasks = 1
            nbTasks = min(nbTasks, dataSize)

        counts = [dataSize / nbTasks] * nbTasks

        for i in xrange(dataSize % nbTasks):
            counts[i] += 1

        starts = [0] * (nbTasks + 1)

        for i in xrange(1, nbTasks + 1):
            starts[i] = starts[i - 1] + counts[i - 1]

        return nbTasks, counts, starts
开发者ID:jm-begon,项目名称:masterthesis,代码行数:45,代码来源:TaskManager.py


示例13: _partition_clips

def _partition_clips(n_jobs, n_clips):
    if n_jobs == -1:
        n_jobs = min(cpu_count(), n_clips)

    else:
        n_jobs = min(n_jobs, n_clips)

    counts = [n_clips / n_jobs] * n_jobs

    for i in xrange(n_clips % n_jobs):
        counts[i] += 1

    starts = [0] * (n_jobs + 1)

    for i in xrange(1, n_jobs + 1):
        starts[i] = starts[i - 1] + counts[i - 1]

    return n_jobs, counts, starts
开发者ID:Sandy4321,项目名称:kaggle-marinexplore,代码行数:18,代码来源:estimator.py


示例14: _set_params_with_dependencies

    def _set_params_with_dependencies(self):
        if self.max_bin is None:
            if self._is_sparse_train_X:
                self._max_bin = 200
            else:
                self._max_bin = 65000
        else:
            self._max_bin = self.max_bin

        if isinstance(self.min_samples_leaf, utils.FLOATS):
            self._min_samples_leaf = ceil(self.min_samples_leaf * self._n_samples)
        else:
            self._min_samples_leaf = self.min_samples_leaf

        if self.n_jobs == -1:
            self._n_jobs = 0
        elif self.n_jobs < 0:
            self._n_jobs = cpu_count() + self.n_jobs + 1
        else:
            self._n_jobs = self.n_jobs

        self._set_target_and_loss()
开发者ID:fukatani,项目名称:rgf_python,代码行数:22,代码来源:fastrgf_model.py


示例15: _e_step

    def _e_step(self, X, cal_delta):
        """
        E-step

        set `cal_delta == True` when we need to run _m_step
        for inference, set it to False
        """

        # parell run e-step
        if self.n_jobs == -1:
            n_jobs = cpu_count()
        else:
            n_jobs = self.n_jobs

        results = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(_update_gamma)
            (X[idx_slice, :], self.expElogbeta, self.alpha,
             self.rng, 100, self.mean_change_tol, cal_delta)
            for idx_slice in gen_even_slices(X.shape[0], n_jobs))

        # merge result
        gammas, deltas = zip(*results)
        gamma = np.vstack(gammas)

        if cal_delta:
            # This step finishes computing the sufficient statistics for the
            # M step, so that
            # sstats[k, w] = \sum_d n_{dw} * phi_{dwk}
            # = \sum_d n_{dw} * exp{Elogtheta_{dk} + Elogbeta_{kw}} / phinorm_{dw}.
            delta_component = np.zeros(self.components_.shape)
            for delta in deltas:
                delta_component += delta
            delta_component *= self.expElogbeta
        else:
            delta_component = None

        return (gamma, delta_component)
开发者ID:praveenkottayi,项目名称:topicModels,代码行数:37,代码来源:lda.py


示例16: fit

    def fit(self, X, Y, H_init=None):
        """Learn parameters using subgradient descent.

        Parameters
        ----------
        X : iterable
            Traing instances. Contains the structured input objects.
            No requirement on the particular form of entries of X is made.

        Y : iterable
            Training labels. Contains the strctured labels for inputs in X.
            Needs to have the same length as X.

        constraints : None
            Discarded. Only for API compatibility currently.
        """
        print("Training latent subgradient structural SVM")
        self.w = getattr(self, "w", np.random.normal(
            0, .001, size=self.model.size_psi))
        #constraints = []
        self.objective_curve_ = []
        n_samples = len(X)
        try:
            # catch ctrl+c to stop training
            for iteration in xrange(self.max_iter):
                positive_slacks = 0
                objective = 0.
                #verbose = max(0, self.verbose - 3)

                if self.n_jobs == 1:
                    # online learning
                    for x, y in zip(X, Y):
                        h = self.model.latent(x, y, self.w)
                        h_hat = self.model.loss_augmented_inference(
                            x, h, self.w, relaxed=True)
                        delta_psi = (self.model.psi(x, h)
                                     - self.model.psi(x, h_hat))
                        slack = (-np.dot(delta_psi, self.w)
                                 + self.model.loss(h, h_hat))
                        objective += np.maximum(slack, 0)
                        if slack > 0:
                            positive_slacks += 1
                        self._solve_subgradient(delta_psi, n_samples)
                else:
                    #generate batches of size n_jobs
                    #to speed up inference
                    if self.n_jobs == -1:
                        n_jobs = cpu_count()
                    else:
                        n_jobs = self.j_jobs

                    n_batches = int(np.ceil(float(len(X)) / n_jobs))
                    slices = gen_even_slices(n_samples, n_batches)
                    for batch in slices:
                        X_b = X[batch]
                        Y_b = Y[batch]
                        verbose = self.verbose - 1
                        candidate_constraints = Parallel(
                            n_jobs=self.n_jobs,
                            verbose=verbose)(delayed(find_constraint_latent)(
                                self.model, x, y, self.w)
                                for x, y in zip(X_b, Y_b))
                        dpsi = np.zeros(self.model.size_psi)
                        for x, y, constraint in zip(X_b, Y_b,
                                                    candidate_constraints):
                            y_hat, delta_psi, slack, loss = constraint
                            objective += slack
                            dpsi += delta_psi
                            if slack > 0:
                                positive_slacks += 1
                        dpsi /= float(len(X_b))
                        self._solve_subgradient(dpsi, n_samples)

                # some statistics
                objective += np.sum(self.w ** 2) / self.C / 2.
                #objective /= float(n_samples)

                if positive_slacks == 0:
                    print("No additional constraints")
                    if self.break_on_no_constraints:
                        break
                if self.verbose > 0:
                    print(self)
                    print("iteration %d" % iteration)
                    print("positive slacks: %d, "
                          "objective: %f" %
                          (positive_slacks, objective))
                self.objective_curve_.append(objective)

                if self.verbose > 2:
                    print(self.w)

                self._compute_training_loss(X, Y, iteration)
                if self.logger is not None:
                    self.logger(self, iteration)

        except KeyboardInterrupt:
            pass
        print("final objective: %f" % self.objective_curve_[-1])
        print("calls to inference: %d" % self.model.inference_calls)
#.........这里部分代码省略.........
开发者ID:hushell,项目名称:pystruct,代码行数:101,代码来源:subgradient_latent_ssvm.py


示例17: _fit

    def _fit(self, X, y):
        X, y = check_X_y(X, y, "csr")
        # Initialization
        cv = check_cv(self.cv, y, is_classifier(self.estimator))
        scorer = check_scoring(self.estimator, scoring=self.scoring)
        n_features = X.shape[1]

        if self.max_features is not None:
            if not isinstance(self.max_features, numbers.Integral):
                raise TypeError("'max_features' should be an integer between 1 and {} features."
                                " Got {!r} instead."
                                .format(n_features, self.max_features))
            elif self.max_features < 1 or self.max_features > n_features:
                raise ValueError("'max_features' should be between 1 and {} features."
                                 " Got {} instead."
                                 .format(n_features, self.max_features))
            max_features = self.max_features
        else:
            max_features = n_features

        if not isinstance(self.n_gen_no_change, (numbers.Integral, np.integer, type(None))):
            raise ValueError("'n_gen_no_change' should either be None or an integer."
                             " {} was passed."
                             .format(self.n_gen_no_change))

        estimator = clone(self.estimator)

        # Genetic Algorithm
        toolbox = base.Toolbox()

        toolbox.register("attr_bool", random.randint, 0, 1)
        toolbox.register("individual", tools.initRepeat,
                         creator.Individual, toolbox.attr_bool, n=n_features)
        toolbox.register("population", tools.initRepeat, list, toolbox.individual)
        toolbox.register("evaluate", _evalFunction, gaobject=self, estimator=estimator, X=X, y=y,
                         cv=cv, scorer=scorer, verbose=self.verbose, fit_params=self.fit_params,
                         max_features=max_features, caching=self.caching)
        toolbox.register("mate", tools.cxUniform, indpb=self.crossover_independent_proba)
        toolbox.register("mutate", tools.mutFlipBit, indpb=self.mutation_independent_proba)
        toolbox.register("select", tools.selTournament, tournsize=self.tournament_size)

        if self.n_jobs == 0:
            raise ValueError("n_jobs == 0 has no meaning.")
        elif self.n_jobs > 1:
            pool = multiprocessing.Pool(processes=self.n_jobs)
            toolbox.register("map", pool.map)
        elif self.n_jobs < 0:
            pool = multiprocessing.Pool(processes=max(cpu_count() + 1 + self.n_jobs, 1))
            toolbox.register("map", pool.map)

        pop = toolbox.population(n=self.n_population)
        hof = tools.HallOfFame(1, similar=np.array_equal)
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean, axis=0)
        stats.register("std", np.std, axis=0)
        stats.register("min", np.min, axis=0)
        stats.register("max", np.max, axis=0)

        if self.verbose > 0:
            print("Selecting features with genetic algorithm.")

        _, log = _eaFunction(pop, toolbox, cxpb=self.crossover_proba, mutpb=self.mutation_proba,
                             ngen=self.n_generations, ngen_no_change=self.n_gen_no_change,
                             stats=stats, halloffame=hof, verbose=self.verbose)
        if self.n_jobs != 1:
            pool.close()
            pool.join()

        # Set final attributes
        support_ = np.array(hof, dtype=np.bool)[0]
        self.estimator_ = clone(self.estimator)
        self.estimator_.fit(X[:, support_], y)

        self.generation_scores_ = np.array([score for score, _ in log.select("max")])
        self.n_features_ = support_.sum()
        self.support_ = support_

        return self
开发者ID:manuel-calzolari,项目名称:sklearn-genetic,代码行数:78,代码来源:__init__.py


示例18: wheel_up_features_bfs

def wheel_up_features_bfs (initialBunch,
                           trees,
                           factory,
                           loss,
                           learning_rate=0.25,
                           nIters=100,
                           trees_sample_size=100,
                           verbose = True,
                           vali_factory = None,
                           learning_rate_decay = 1.,
                           trees_sample_increase = 0,
                           regularizer = 0.,
                           random_walk = True,
                           use_joblib = False,
                           n_jobs = -1,
                           joblib_backend = "threading",
                           copy_pred = False):
    """
    Iterative BFS over best ADD-1 results for [nTrees] iterations
    """
    allTrees = copy.copy(trees)
    
    bunch = copy.copy(initialBunch)
    pred = factory.predict(bunch)
    bestScore = loss.score(factory,pred)
    
    if vali_factory is not None:
        vali_pred = vali_factory.predict(bunch)
        vali_score = loss.score(vali_factory,vali_pred)
        vali_scores = [vali_score]
    
    
    if use_joblib:
        if n_jobs < 0:
            n_jobs = joblib.cpu_count()
                
        if joblib_backend == "threading":
            #create copies of data once to escape GIL forever
            factory = [copy.deepcopy(factory) for i in range(n_jobs)]
            losses = [copy.deepcopy(loss) for i in range(n_jobs)]

        elif joblib_backend == "multiprocessing":
            pass
        else:
            raise ValueError, "joblib_backend must be either 'threading' or 'multiprocessing'"
    
  
    if verbose:
        print "\niteration #",0," ntrees = ", len(bunch),"\nbest loss = ",bestScore
        print "learning_rate = ", learning_rate
        print "sample_size", trees_sample_size

    
    for itr in xrange(1,nIters+1):
        change_index= random.randint(0,len(bunch)-1) if random_walk else  (i-1)%len(bunch)
        trees_sample = random.sample(allTrees,trees_sample_size)+ [bunch[change_index]]
        bunch_wo = copy.copy(bunch)
        replaced_tree = bunch_wo.pop(change_index)

        if use_joblib and joblib_backend=="threading":
            #split trees into sections
            indices = [0]+[len(trees_sample)*(i+1)/n_jobs for i in range(n_jobs)]
            treeSections = [trees_sample[indices[i]:indices[i+1]] for i in range(n_jobs)]
            
            pred_wo = pred - factory[0].predict(PrunedFormula([bunch[change_index]],bias=0.))

            if copy_pred:
                pred_wo = [copy.deepcopy(pred) for i in range(n_jobs)]
            else:
                pred_wo = [pred for i in range(n_jobs)]

            #execute sections in parallel
            tasks = [joblib.delayed(try_add1_bfs)(treeSections[ithread],factory[ithread],
                                                          learning_rate,losses[ithread],
                                                          1,pred_wo[ithread],regularizer=regularizer,
                                                          use_joblib=False)
                                                for ithread in range(n_jobs)]
                                                    
            _res = joblib.Parallel(n_jobs = n_jobs,
                           backend = "threading")(tasks)
            _additions,newScores,newPreds = reduce(lambda a,b:[a[i]+b[i] for i in range(3)], _res)
            
        else:
            pred_wo = pred - factory.predict(PrunedFormula([bunch[change_index]],bias=0.))

            _additions,newScores,newPreds = try_add1_bfs(trees_sample,factory,
                                                         learning_rate,loss,
                                                          1,pred_wo,regularizer=regularizer,
                                                          use_joblib=use_joblib,n_jobs=n_jobs)
            
            
            

        learning_rate *= learning_rate_decay
        trees_sample_size = min(len(allTrees),trees_sample_size + trees_sample_increase)
            
        triples = zip(newScores,_additions,newPreds)
        triples.sort(key = lambda el: el[0])

        newBestScore = min(newScores)
#.........这里部分代码省略.........
开发者ID:justheuristic,项目名称:pruner,代码行数:101,代码来源:greedy.py


示例19: __init__

 def __init__(self, n_features, n_jobs=1):
     self.n_features = n_features
     if n_jobs == -1:
         n_jobs = cpu_count()
     self.n_jobs = n_jobs
开发者ID:pgervais,项目名称:nilearn,代码行数:5,代码来源:searchlight.py


示例20: __init__

    def __init__(self, *files, columns=None, ngrams=2, decap=False, patterns=None, mask=None):
        """
        Create a new data object with the following attributes:

            * instances - list of raw text instances
            * labels - array of instance labels in same order as raw text
            * features - matrix of feature vectors per text instance
            * names - array of feature names in same order as features

        Both features and names are undefined until extracted
        using some Vectorizer.

        Exclusive options for either BIO-NER vs. plain-text input:

        1. **BIO-NER** paramters: Define a `columns` integer to define the number of disregarded
           columns and thereby declare that the input will be in BIO-NER format. In addtion, the
           `ngram` option can be set to define the ngram size of the tokens to generate.
           All other keyword parameter will be ignored.

        2. **plain-text** keyword parameters: Set `decap=True` to lower-case the first letter of
           each plain-text line. Use a list of regex `patterns` and a repacement string `mask` to
           "mask" pattern-matched words in regular (non-`column`) input.
        """
        try:
            if columns is None:
                inputs = [[l.strip('\r\n') for l in f] for f in files]

                if decap:
                    for i in range(len(inputs)):
                        inputs[i] = ["{}{}".format(l[0].lower(), l[1:])
                                     for l in inputs[i] if len(l)]

                if patterns and mask:
                    self.instances = []
                    splits = joblib.cpu_count()

                    for lines in inputs:
                        jobs = tuple(lines[i::splits] for i in range(splits))
                        jobs = joblib.Parallel(n_jobs=splits)(
                            delayed(subAll)(patterns, mask, lines) for lines in jobs
                        )
                        self.instances.append(list(zip(lines, chain(*jobs))))
                else:
                    self.instances = [list(zip(lines, lines)) for lines in inputs]

            else:
                self.instances = []

                for f in files:
                    # FIXME: instead of two hardcoded entity masks,
                    # FIXME: this has to be dynamic or generic...
                    sentences = SentenceParser(f, ('FACTOR', 'TARGET'), id_columns=columns)

                    if not columns:
                        sentences = list(enumerate(sentences, start=1))

                    data = [(sid, asDict(s, ngrams)) for sid, s in sentences]
                    self.instances.append(data)
        except UnicodeDecodeError as e:
            import sys
            print('decoding error:', e.reason, 'in input file')
            sys.exit(1)

        # ensure the minority label(s) come first (important for the evaluation, too!)
        self.instances = sorted(self.instances, key=len)

        self.classes = len(self.instances)
        self.labels = np.concatenate([
            (np.zeros(len(data), dtype=np.uint8) + i)
            for i, data in enumerate(self.instances)
        ])
        self.ids = None
        self.raw = None
        self.features = None
        self.names = None

        if columns is None:
            self.raw, self.instances = zip(*list(chain.from_iterable(self.instances)))

            if len(self.raw) and '\t' in self.raw[0]:
                self.ids = [l.split('\t', 1)[0] for l in self.raw]
            else:
                self.ids = self.raw
        else:
            self.ids, self.instances = zip(*list(chain.from_iterable(self.instances)))
开发者ID:fnl,项目名称:libfnl,代码行数:85,代码来源:textclass.py



注:本文中的sklearn.externals.joblib.cpu_count函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python joblib.delayed函数代码示例发布时间:2022-05-27
下一篇:
Python forest.RandomForestClassifier类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap