• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python api.logit函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中statsmodels.formula.api.logit函数的典型用法代码示例。如果您正苦于以下问题:Python logit函数的具体用法?Python logit怎么用?Python logit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了logit函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: reduce_multi_model

def reduce_multi_model(orig_fitted, base_string, res, df, fit=None):
    """orig_fitted = an object returned from calling .fit() on a statsmodels logit model
    base_string = the right hand side of the formula used to estimate orig_fitted
    res = The string for the column name in df that has the classes.
    df = the pandas dataframe from which orig_fitted was estimated
    ==========
    Returns a fitted logistic regression model, and the base string used to estimate
    the model.
    
    If at least one variable has a p-value which is > 0.05, this function will
    removes the variable with the worst p-value, estimate a new logistic regression,
    and repeat the process until no more insignificant variables can be removed."""
    
    #Check the class of the function inputs
    assert isinstance(base_string, str)
    assert isinstance(res, str)
    assert isinstance(df, pd.DataFrame)
    
    #Try to reduce the number of variables in the original model
    new_bvars = whittle_multi_model_vars(orig_fitted, base_string)
    #Initialize a variable for the smallest model
    small_model = orig_fitted
    #Initialize a variable for the smallest model base_string
    small_base = base_string
    
    node_variables = isolate_node_cols(df)
    
    while new_bvars is not None: #If a reduced set of variables has been found
        #new_base = " + ".join(["0"] + new_bvars) #Create a new base_string
        #new_fstring = res + " ~ " + new_base #Create a new statsmodels formula string
        
        model_vars = combat_multi_collinearity(df, new_bvars, node_variables, max_cond=2000)
        new_base = " + ".join(model_vars) #Create a string of all variables using in the multivariate regression
        new_fstring = res + " ~ " + "0 + " + new_base #Create the new formula string
        
        try: #Try to fit a new logistic regression model
        #Use the if...else statement to accomodate various optimization methods
            if fit is None:
                new_model = smf.logit(new_fstring, data = df).fit(maxiter=2000, disp=False)
            else:
                new_model = smf.logit(new_fstring, data = df).fit(method=fit, maxiter=2000, disp=False)
        #Assign small_base to the smallest identified set of base variables so far
            small_base = " + ".join(new_bvars)  
        #Assign small_model to the model with smallest set of base variables so far
            small_model = new_model
        #Search for new base variables
            new_bvars =  whittle_multi_model_vars(new_model, new_base)
        except Exception as inst: #If the model could not be fit, print a message saying so
            #print "Estimating logit model failed when using formula: {}".format(new_fstring)
            #Note the line below is un-tested, but I added it because it seemed
            #that an infinite loop would result without it.
            #print inst
            new_bvars = None

    #Print the model results of the most reduced model.            
    #print "="*10
    #print "The reduced model results are:"
    #print small_model.summary()
    
    return small_model, small_base
开发者ID:timothyb0912,项目名称:cs289_final_project,代码行数:60,代码来源:hybrid_logit.py


示例2: RunLogisticModels

def RunLogisticModels(live):
    """Runs regressions that predict sex.

    live: DataFrame of pregnancy records
    """
    #live = linear.ResampleRowsWeighted(live)

    df = live[live.prglngth>30]
    # df = JoinFemResp(df)

    df['boy'] = (df.babysex==1).astype(int)
    df['isyoung'] = (df.agepreg<20).astype(int)
    df['isold'] = (df.agepreg<35).astype(int)
    df['season'] = (((df.datend+1) % 12) / 3).astype(int)

    # run the simple model
    model = smf.logit('boy ~ agepreg', data=df)    
    results = model.fit()
    print('nobs', results.nobs)
    print(type(results))
    SummarizeResults(results)

    # run the complex model
    model = smf.logit('boy ~ agepreg + hpagelb + birthord + C(race)', data=df)
    results = model.fit()
    print('nobs', results.nobs)
    print(type(results))
    SummarizeResults(results)

    # make the scatter plot
    exog = pandas.DataFrame(model.exog, columns=model.exog_names)
    endog = pandas.DataFrame(model.endog, columns=[model.endog_names])
    
    xs = exog['agepreg']
    lo = results.fittedvalues
    o = np.exp(lo)
    p = o / (o+1)

    #thinkplot.Scatter(xs, p, alpha=0.1)
    #thinkplot.Show()

    # compute accuracy
    actual = endog['boy']
    baseline = actual.mean()

    predict = (results.predict() >= 0.5)
    true_pos = predict * actual
    true_neg = (1 - predict) * (1 - actual)

    acc = (sum(true_pos) + sum(true_neg)) / len(actual)
    print(acc, baseline)

    columns = ['agepreg', 'hpagelb', 'birthord', 'race']
    new = pandas.DataFrame([[35, 39, 3, 1]], columns=columns)
    y = results.predict(new)
    print(y)
开发者ID:Bercio,项目名称:ThinkStats2,代码行数:56,代码来源:regression.py


示例3: calculate_odds_ratio

def calculate_odds_ratio(genotypes, phen_vector1, phen_vector2, reg_type, covariates, response='',
                         phen_vector3=''):  # diff - done
    """
    Runs the regression for a specific phenotype vector relative to the genotype data and covariates.

    :param genotypes: a DataFrame containing the genotype information
    :param phen_vector: a array containing the phenotype vector
    :param covariates: a string containing all desired covariates
    :type genotypes: pandas DataFrame
    :type phen_vector: numpy array
    :type covariates: string

    .. note::
        The covariates must be a string that is delimited by '+', not a list.
        If you are using a list of covariates and would like to convert it to the pyPhewas format, use the following::

            l = ['genotype', 'age'] # a list of your covariates
            covariates = '+'.join(l) # pyPhewas format

        The covariates that are listed here *must* be headers to your genotype CSV file.
    """

    data = genotypes
    data['y'] = phen_vector1
    data['MaxAgeAtCPT'] = phen_vector2
    # f='y~'+covariates
    if response:
        f = response + '~ y + genotype +' + covariates
        if phen_vector3.any():
            data['phe'] = phen_vector3
            f = response + '~ y + phe + genotype' + covariates
    else:
        f = 'genotype ~ y +' + covariates
        if phen_vector3.any():
            data['phe'] = phen_vector3
            f = 'genotype ~ y + phe +' + covariates
    try:
        if reg_type == 0:
            logreg = smf.logit(f, data).fit(method='bfgs', disp=False)
            p = logreg.pvalues.y
            odds = logreg.params.y
            conf = logreg.conf_int()
            od = [-math.log10(p), logreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
        else:
            linreg = smf.logit(f, data).fit(method='bfgs', disp=False)
            p = linreg.pvalues.y
            odds = linreg.params.y
            conf = linreg.conf_int()
            od = [-math.log10(p), linreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
    except:
        odds = 0
        p = np.nan
        od = [np.nan, np.nan, np.nan]
    return (odds, p, od)
开发者ID:BennettLandman,项目名称:pyPheWAS,代码行数:54,代码来源:pyProWAS.py


示例4: logistic_regression

 def logistic_regression(self, use_glm=True):
     """
     (b) it seems the statistical significant predict variable is only Lag2. How disappointing...
     """
     formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
     model = (
         smf.glm(formula, data=self.df, family=sm.families.Binomial())
         if use_glm
         else smf.logit(formula, data=self.transformedDF)
     )
     result = model.fit()
     if use_glm:
         probs = result.fittedvalues
         """Beware the prob here is the index 0's prob, so we should use the lambda function below"""
         pred_values = probs.map(lambda x: 0 if x > 0.5 else 1)
     else:
         """The probability of being 1"""
         probs = Series(result.predict(sm.add_constant(self.df[["Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume"]])))
         pred_values = probs.map(lambda x: 1 if x > 0.5 else 0)
     """
     (c) Percentage of currect predictions: (54+557)/(54+557+48+430) = 56.1%.
         Weeks the market goes up the logistic regression is right most of the time, 557/(557+48) = 92.1%.
         Weeks the market goes up the logistic regression is wrong most of the time 54/(430+54) = 11.2%.
     """
     tp.output_table(pred_values.values, self.transformedDF[self.y_col].values)
开发者ID:Aran00,项目名称:ISIRExerciseCode,代码行数:25,代码来源:Exec10.py


示例5: logistic_model

def logistic_model(data, explanatory_variables, response_variable, 
                   maxiter = 35, verbose = True):
    explanatory_vars = ' + '.join(explanatory_variables)
    formula = response_variable + ' ~ ' + explanatory_vars

    try:
        model = smf.logit(formula = formula, data = data).fit(maxiter = maxiter)
    except:
        print('Error "' + str(sys.exc_info()[1]) + '" while processing model', formula)
        model = None
    
    if verbose and model != None:
        print()
        print('MODEL:', formula, '\n')
        print(model.summary())
        print()

        # odds ratios with 95% confidence intervals
        print ("Odds Ratios")
        params = model.params
        conf = model.conf_int()
        conf['OR'] = params
        conf.columns = ['Lower CI', 'Upper CI', 'Odds Ratios']
        print (numpy.exp(conf))
        
    return(model)
开发者ID:MColosso,项目名称:Forest-Fires,代码行数:26,代码来源:Forest+Fires+-+week+4.py


示例6: LogisticRegressionExample

def LogisticRegressionExample():
    """Runs a simple example of logistic regression and prints results.
    """
    y = np.array([0, 1, 0, 1])
    x1 = np.array([0, 0, 0, 1])
    x2 = np.array([0, 1, 1, 1])

    beta = [-1.5, 2.8, 1.1]

    log_o = beta[0] + beta[1] * x1 + beta[2] * x2 
    print(log_o)

    o = np.exp(log_o)
    print(o)

    p = o / (o+1)
    print(p)

    like = y * p + (1-y) * (1-p)
    print(like)
    print(np.prod(like))

    df = pandas.DataFrame(dict(y=y, x1=x1, x2=x2))
    results = smf.logit('y ~ x1 + x2', data=df).fit()
    print(results.summary())
开发者ID:13tsuyoshi,项目名称:ThinkStats2,代码行数:25,代码来源:regression.py


示例7: _corr

    def _corr(self, sel, suffix):
        formula = str('model_accuracy ~ human_accuracy')
        logreg = smf.logit(formula=formula, data=sel).fit()
        summ = logreg.summary()
        if self.html is None:
            print(summ)
        else:
            summ = summ.as_html().replace('class="simpletable"',
                                          'class="simpletable table"')

        sel = sel.rename(columns={'human_accuracy': 'human accuracy',
                                  'model_accuracy': 'model accuracy'})

        sns.lmplot('human accuracy', 'model accuracy', data=sel, x_jitter=.01,
                    y_jitter=.05, logistic=True, truncate=True)

        bins = np.digitize(sel['human accuracy'], np.arange(.05,1,.1))
        #bins[bins==11] = 10
        count = sel['model accuracy'].groupby(bins).count()
        mean = sel['model accuracy'].groupby(bins).mean()
        sns.plt.scatter(.1*mean.index, mean, s=10*count, c='.15',
                        linewidths=0, alpha=.8)
        sns.plt.title(models.NICE_NAMES[self.model_name])
        sns.plt.xlim([-.1, 1.1])
        sns.plt.ylim([-.1, 1.1])
        self.show(pref='corr_sil', suffix=self.model_name + '_' + suffix,
                  caption=suffix + summ)
开发者ID:mageed,项目名称:conv-exp,代码行数:27,代码来源:run.py


示例8: run_logits

def run_logits(grouped, formula, var):
    for code, group in grouped:
        country = get_country(code).ljust(14)
        model = smf.logit(formula, data=group)    
        results = model.fit(disp=False)
        nobs, param, stars = extract_res(results, var=var)
        arrow = '<--' if stars and param > 0 else ''
        print(country, nobs, '%0.3g'%param, stars, arrow, sep='\t')
开发者ID:Libardo1,项目名称:ProbablyOverthinkingIt,代码行数:8,代码来源:ess.py


示例9: log_reg

def log_reg(formula, df):
    try:
        model1 = smf.logit(formula = formula, data=df).fit()
        print model1.summary()
    except Exception:
        print "+" * 40
        print "bad formula"
        print "+" * 40
开发者ID:AlexanderMwangi,项目名称:DAT3-students,代码行数:8,代码来源:logit_reg_Patient2.py


示例10: fit_model

def fit_model(formula, model_file):
    """
    Saves a model
    :param formula: formula for the model
    :param model_file: name of file to save the model to
    """
    data = load_data()
    model = logit(formula=formula, data=data)
    fitted = model.fit()
    fitted.save(model_file)
开发者ID:NathanDeMaria,项目名称:Football,代码行数:10,代码来源:save_model.py


示例11: logistic_regression_test

def logistic_regression_test():
  df = pandas.DataFrame.from_csv('./generated_logistic_data.csv')

  generated_model = smf.logit('y ~ variable_a + variable_b + variable_c', df)
  generated_fit = generated_model.fit()
  roc_data = sklearn.metrics.roc_curve(df['y'], generated_fit.predict(df))
  auc = sklearn.metrics.auc(roc_data[0], roc_data[1])
  print generated_fit.summary()
  print "AUC score: {0}".format(auc)
  assert auc > .8, 'AUC should be significantly above random'
开发者ID:zbxzc35,项目名称:Machine-Learning-Test-by-Test,代码行数:10,代码来源:logistic_regression_tests.py


示例12: generate_model

def generate_model(df):
    '''
    Create a logistic regression model from loans data based on fields
    FICO.score, Interest.Rate, and Interest.below12
    :param df: a dataframe with fields for the independent vars fico and interest
    and the dependent var discrete_rate
    :return: a fitted logistic model
    '''
    model = smf.logit(formula='discrete_rate  ~ fico + interest', data=df)
    fitted_model = model.fit()
    return fitted_model
开发者ID:AkiraKane,项目名称:thinkful,代码行数:11,代码来源:U2L4P3_logistic_regression_analysis.py


示例13: logRegR

    def logRegR(self, event):
        # would have to mess with Patsy formula parser to get more powerful...
        # too much work
        dlg = wx.TextEntryDialog(self.parent, "Enter the linear regression formula")
        if dlg.ShowModal() == wx.ID_OK:
            model = smf.logit(formula=dlg.GetValue(), data=self.parent.data.data)
            results = model.fit()
            self.parent.write("\n" + str(results.summary()) + "\n")
            sns.regplot(results.predict(), model.endog, ci=False, y_jitter=0.2)
            plt.show()

        dlg.Destroy()
开发者ID:alanhdu,项目名称:Dex,代码行数:12,代码来源:Stats.py


示例14: fit_model

def fit_model(y, formula, df):
    from statsmodels.formula.api import ols, logit

    # If you have a dichotomous variable then
    # we're going to run a logistic regression
    if df[y].nunique() == 2:
        lm = logit(formula, df).fit()
    # otherwise we'll run an ordinary least
    # squares regression
    else:
        lm = ols(formula, df).fit()

    return lm
开发者ID:KirstieJane,项目名称:DESCRIBING_DATA,代码行数:13,代码来源:create_correlation_matrix.py


示例15: check_initial_specification

def check_initial_specification(dataframe, result_string, new_var, min_specification, fit_word=None):
    assert isinstance(dataframe, pd.DataFrame) #Make sure dataframe is a pandas dataframe.
    assert isinstance(result_string, str) #Make sure the result_string is actually a string
    assert isinstance(new_var, list) #Make sure new_var is a list
    assert isinstance(min_specification, str) #Make sure the min_specification is a string
    
    base_vars = min_specification.split(" + ") #Extract the variables used in the minimum specification
    if "0" in base_vars: #Remove any zeros from the variables used in the minimum specification
        base_vars.remove("0")
    
    #Initialize starting values for the optimization
    start_vals = np.random.rand(len(base_vars + new_var))
    
    #Create the formula string for the logistic regression
    fString = result_string + " ~ " + min_specification + " + " + " + ".join(new_var)
    
    #Make sure the matrix for the logistic regression is invertible
    if not check_full_rank(dataframe, base_vars + new_var):
        #If not, raise an error
        raise Exception("The base model plus {} is not of full rank.".format(new_var))
    
    #Fit the logistic regression
    if fit_word is None:
        model = smf.logit(fString, data=dataframe).fit(start_params = start_vals, maxiter=2000, disp=False)
    else:
        model = smf.logit(fString, data=dataframe).fit(method=fit_word, start_params = start_vals, maxiter=2000, disp=False)
        
    if not model.mle_retvals["converged"]: #Check if the model converged
        #If it did not, raise an error
        raise Exception("The model for {} did not converge".format(new_var))
        
    lowest_pval = model.pvalues[new_var[0]] #Initialize a value for the lowest p-value
    for orig_var in new_var: #Iterate through the new variables
        current_pval = model.pvalues[orig_var]
        #If the current variables p-value is less than the lowest p-value
        if current_pval < lowest_pval:
            #Keep track of this number
            lowest_pval = current_pval
    return lowest_pval
开发者ID:timothyb0912,项目名称:cs289_final_project,代码行数:39,代码来源:hybrid_logit.py


示例16: log_regression

def log_regression(wine_set):
    # # examining the data before recoding
    # print(wine_set["sulphates"].describe())
    # wine_set["sulphates_c"] = pd.qcut(wine_set["sulphates"], 4)
    # print(wine_set.groupby("sulphates_c").size())
    # print()
    # #
    # print(wine_set["alcohol"].describe())
    # wine_set["alcohol_c"] = pd.qcut(wine_set["alcohol"], 4)
    # print(wine_set.groupby("alcohol_c").size())
    # print()
    #
    # print(wine_set["quality"].describe())
    # wine_set["quality_c"] = pd.qcut(wine_set["quality"], 3)
    # print(wine_set.groupby("quality_c").size())
    # print()


    # recode quality into 2 groups: 0:{3,4,5,6}, 1:{7,8,9}
    recode = {3: 0, 4: 0, 5:0, 6:0, 7:1, 8:1, 9:1}
    wine_set['quality_c'] = wine_set['quality'].map(recode)

    # recode sulphates into 2 groups: 0: <= mean, 1: > mean
    def sulphates_to_cat(x):
       if x['sulphates'] <= wine_set['sulphates'].mean():
          return 0
       else:
          return 1
    wine_set['sulphates_c'] = wine_set.apply(lambda x: sulphates_to_cat(x), axis=1)

    # recode alcohol into 2 groups: 0: <= mean , 1: > mean
    def alcohol_to_cat(x):
       if x['alcohol'] <= wine_set['alcohol'].mean():
          return 0
       else:
          return 1
    wine_set['alcohol_c'] = wine_set.apply(lambda x: alcohol_to_cat(x), axis=1)
    # print(wine_set.head(10))

    # logistic regression for sulphates+alcohol -> quality
    print ("Logistic regression model for the association between wine's quality and sulphates&alcohol")
    model1 = smf.logit(formula="quality_c ~ sulphates_c + alcohol_c", data=wine_set)
    results1 = model1.fit()
    print(results1.summary())

    # odds ratios with 95% confidence intervals
    print("\nConfidence intervals")
    conf = results1.conf_int()
    conf['Odds ratio'] = results1.params
    conf.columns = ['Lower conf.int.', 'Upper conf.int.', 'Odds ratio']
    print(numpy.exp(conf))
开发者ID:ekolik,项目名称:-Python-Analysis_of_wine_quality,代码行数:51,代码来源:regression_modeling.py


示例17: report_logitreg

def report_logitreg(formula, data, verbose=True):
    """Fit logistic regression, print a report, and return the fit object."""
    results = smf.logit(formula, data=data).fit()
    summary = results.summary()
    margeff = results.get_margeff().summary()


    if verbose:
        report = """
{summary}\n\n
{margeff}\n""".format(summary=summary,margeff=margeff)

        print(report)

    return results
开发者ID:xguse,项目名称:crunchers,代码行数:15,代码来源:lazy_stats.py


示例18: test_log_regression

def test_log_regression():
	"""Tests the results of logistic regression.
	Explore on the beta coefficient

	"""
	run = load_in_dataframe(2)
	run_added = add_gainlossratio(run)
	run_final = organize_columns(run_added)

	#fit the logistic regression line
	fitted = logit("respcat ~ gain + loss", run_final).fit()
	#get the parameters
	fitted_params = fitted.params.as_matrix()
	test_fitted_params = log_regression(run_final).as_matrix()
	assert_array_equal(fitted_params,test_fitted_params)
开发者ID:gastonstat,项目名称:project-epsilon,代码行数:15,代码来源:test_logistic_reg.py


示例19: logistic_fit

 def logistic_fit(self, glm_fit=True):
     '''
     The logit function would report error when y(Direction) is not transformed to 0/1
     So glm looks easier to use
     '''
     formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
     if glm_fit is True:
         model = smf.glm(formula, data=self.df, family=sm.families.Binomial())
     else:
         # In fact, this function has wrong fittedvalues, but it's predict value is still right.
         model = smf.logit(formula, data=self.df)
     result = model.fit()
     print result.summary()
     # In logit fit there are errors here. Not sure why...
     if glm_fit:
         self.output_binary_table(result, result.fittedvalues, model.endog.astype(int), glm_fit)
开发者ID:Aran00,项目名称:ISIRBookCode,代码行数:16,代码来源:LogisticRegression.py


示例20: test_compare_logit

    def test_compare_logit(self):

        vs = Independence()
        family = Binomial()

        Y = 1 * (np.random.normal(size=100) < 0)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, covstruct=vs).fit()

        sml = sm.logit("Y ~ X1 + X2 + X3", data=D).fit()

        assert_almost_equal(sml.params.values, md.params, decimal=10)
开发者ID:lema655,项目名称:statsmodels,代码行数:18,代码来源:test_gee.py



注:本文中的statsmodels.formula.api.logit函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python api.ols函数代码示例发布时间:2022-05-27
下一篇:
Python api.glm函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap