本文整理汇总了Python中statsmodels.formula.api.logit函数的典型用法代码示例。如果您正苦于以下问题:Python logit函数的具体用法?Python logit怎么用?Python logit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了logit函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: reduce_multi_model
def reduce_multi_model(orig_fitted, base_string, res, df, fit=None):
"""orig_fitted = an object returned from calling .fit() on a statsmodels logit model
base_string = the right hand side of the formula used to estimate orig_fitted
res = The string for the column name in df that has the classes.
df = the pandas dataframe from which orig_fitted was estimated
==========
Returns a fitted logistic regression model, and the base string used to estimate
the model.
If at least one variable has a p-value which is > 0.05, this function will
removes the variable with the worst p-value, estimate a new logistic regression,
and repeat the process until no more insignificant variables can be removed."""
#Check the class of the function inputs
assert isinstance(base_string, str)
assert isinstance(res, str)
assert isinstance(df, pd.DataFrame)
#Try to reduce the number of variables in the original model
new_bvars = whittle_multi_model_vars(orig_fitted, base_string)
#Initialize a variable for the smallest model
small_model = orig_fitted
#Initialize a variable for the smallest model base_string
small_base = base_string
node_variables = isolate_node_cols(df)
while new_bvars is not None: #If a reduced set of variables has been found
#new_base = " + ".join(["0"] + new_bvars) #Create a new base_string
#new_fstring = res + " ~ " + new_base #Create a new statsmodels formula string
model_vars = combat_multi_collinearity(df, new_bvars, node_variables, max_cond=2000)
new_base = " + ".join(model_vars) #Create a string of all variables using in the multivariate regression
new_fstring = res + " ~ " + "0 + " + new_base #Create the new formula string
try: #Try to fit a new logistic regression model
#Use the if...else statement to accomodate various optimization methods
if fit is None:
new_model = smf.logit(new_fstring, data = df).fit(maxiter=2000, disp=False)
else:
new_model = smf.logit(new_fstring, data = df).fit(method=fit, maxiter=2000, disp=False)
#Assign small_base to the smallest identified set of base variables so far
small_base = " + ".join(new_bvars)
#Assign small_model to the model with smallest set of base variables so far
small_model = new_model
#Search for new base variables
new_bvars = whittle_multi_model_vars(new_model, new_base)
except Exception as inst: #If the model could not be fit, print a message saying so
#print "Estimating logit model failed when using formula: {}".format(new_fstring)
#Note the line below is un-tested, but I added it because it seemed
#that an infinite loop would result without it.
#print inst
new_bvars = None
#Print the model results of the most reduced model.
#print "="*10
#print "The reduced model results are:"
#print small_model.summary()
return small_model, small_base
开发者ID:timothyb0912,项目名称:cs289_final_project,代码行数:60,代码来源:hybrid_logit.py
示例2: RunLogisticModels
def RunLogisticModels(live):
"""Runs regressions that predict sex.
live: DataFrame of pregnancy records
"""
#live = linear.ResampleRowsWeighted(live)
df = live[live.prglngth>30]
# df = JoinFemResp(df)
df['boy'] = (df.babysex==1).astype(int)
df['isyoung'] = (df.agepreg<20).astype(int)
df['isold'] = (df.agepreg<35).astype(int)
df['season'] = (((df.datend+1) % 12) / 3).astype(int)
# run the simple model
model = smf.logit('boy ~ agepreg', data=df)
results = model.fit()
print('nobs', results.nobs)
print(type(results))
SummarizeResults(results)
# run the complex model
model = smf.logit('boy ~ agepreg + hpagelb + birthord + C(race)', data=df)
results = model.fit()
print('nobs', results.nobs)
print(type(results))
SummarizeResults(results)
# make the scatter plot
exog = pandas.DataFrame(model.exog, columns=model.exog_names)
endog = pandas.DataFrame(model.endog, columns=[model.endog_names])
xs = exog['agepreg']
lo = results.fittedvalues
o = np.exp(lo)
p = o / (o+1)
#thinkplot.Scatter(xs, p, alpha=0.1)
#thinkplot.Show()
# compute accuracy
actual = endog['boy']
baseline = actual.mean()
predict = (results.predict() >= 0.5)
true_pos = predict * actual
true_neg = (1 - predict) * (1 - actual)
acc = (sum(true_pos) + sum(true_neg)) / len(actual)
print(acc, baseline)
columns = ['agepreg', 'hpagelb', 'birthord', 'race']
new = pandas.DataFrame([[35, 39, 3, 1]], columns=columns)
y = results.predict(new)
print(y)
开发者ID:Bercio,项目名称:ThinkStats2,代码行数:56,代码来源:regression.py
示例3: calculate_odds_ratio
def calculate_odds_ratio(genotypes, phen_vector1, phen_vector2, reg_type, covariates, response='',
phen_vector3=''): # diff - done
"""
Runs the regression for a specific phenotype vector relative to the genotype data and covariates.
:param genotypes: a DataFrame containing the genotype information
:param phen_vector: a array containing the phenotype vector
:param covariates: a string containing all desired covariates
:type genotypes: pandas DataFrame
:type phen_vector: numpy array
:type covariates: string
.. note::
The covariates must be a string that is delimited by '+', not a list.
If you are using a list of covariates and would like to convert it to the pyPhewas format, use the following::
l = ['genotype', 'age'] # a list of your covariates
covariates = '+'.join(l) # pyPhewas format
The covariates that are listed here *must* be headers to your genotype CSV file.
"""
data = genotypes
data['y'] = phen_vector1
data['MaxAgeAtCPT'] = phen_vector2
# f='y~'+covariates
if response:
f = response + '~ y + genotype +' + covariates
if phen_vector3.any():
data['phe'] = phen_vector3
f = response + '~ y + phe + genotype' + covariates
else:
f = 'genotype ~ y +' + covariates
if phen_vector3.any():
data['phe'] = phen_vector3
f = 'genotype ~ y + phe +' + covariates
try:
if reg_type == 0:
logreg = smf.logit(f, data).fit(method='bfgs', disp=False)
p = logreg.pvalues.y
odds = logreg.params.y
conf = logreg.conf_int()
od = [-math.log10(p), logreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
else:
linreg = smf.logit(f, data).fit(method='bfgs', disp=False)
p = linreg.pvalues.y
odds = linreg.params.y
conf = linreg.conf_int()
od = [-math.log10(p), linreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
except:
odds = 0
p = np.nan
od = [np.nan, np.nan, np.nan]
return (odds, p, od)
开发者ID:BennettLandman,项目名称:pyPheWAS,代码行数:54,代码来源:pyProWAS.py
示例4: logistic_regression
def logistic_regression(self, use_glm=True):
"""
(b) it seems the statistical significant predict variable is only Lag2. How disappointing...
"""
formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
model = (
smf.glm(formula, data=self.df, family=sm.families.Binomial())
if use_glm
else smf.logit(formula, data=self.transformedDF)
)
result = model.fit()
if use_glm:
probs = result.fittedvalues
"""Beware the prob here is the index 0's prob, so we should use the lambda function below"""
pred_values = probs.map(lambda x: 0 if x > 0.5 else 1)
else:
"""The probability of being 1"""
probs = Series(result.predict(sm.add_constant(self.df[["Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume"]])))
pred_values = probs.map(lambda x: 1 if x > 0.5 else 0)
"""
(c) Percentage of currect predictions: (54+557)/(54+557+48+430) = 56.1%.
Weeks the market goes up the logistic regression is right most of the time, 557/(557+48) = 92.1%.
Weeks the market goes up the logistic regression is wrong most of the time 54/(430+54) = 11.2%.
"""
tp.output_table(pred_values.values, self.transformedDF[self.y_col].values)
开发者ID:Aran00,项目名称:ISIRExerciseCode,代码行数:25,代码来源:Exec10.py
示例5: logistic_model
def logistic_model(data, explanatory_variables, response_variable,
maxiter = 35, verbose = True):
explanatory_vars = ' + '.join(explanatory_variables)
formula = response_variable + ' ~ ' + explanatory_vars
try:
model = smf.logit(formula = formula, data = data).fit(maxiter = maxiter)
except:
print('Error "' + str(sys.exc_info()[1]) + '" while processing model', formula)
model = None
if verbose and model != None:
print()
print('MODEL:', formula, '\n')
print(model.summary())
print()
# odds ratios with 95% confidence intervals
print ("Odds Ratios")
params = model.params
conf = model.conf_int()
conf['OR'] = params
conf.columns = ['Lower CI', 'Upper CI', 'Odds Ratios']
print (numpy.exp(conf))
return(model)
开发者ID:MColosso,项目名称:Forest-Fires,代码行数:26,代码来源:Forest+Fires+-+week+4.py
示例6: LogisticRegressionExample
def LogisticRegressionExample():
"""Runs a simple example of logistic regression and prints results.
"""
y = np.array([0, 1, 0, 1])
x1 = np.array([0, 0, 0, 1])
x2 = np.array([0, 1, 1, 1])
beta = [-1.5, 2.8, 1.1]
log_o = beta[0] + beta[1] * x1 + beta[2] * x2
print(log_o)
o = np.exp(log_o)
print(o)
p = o / (o+1)
print(p)
like = y * p + (1-y) * (1-p)
print(like)
print(np.prod(like))
df = pandas.DataFrame(dict(y=y, x1=x1, x2=x2))
results = smf.logit('y ~ x1 + x2', data=df).fit()
print(results.summary())
开发者ID:13tsuyoshi,项目名称:ThinkStats2,代码行数:25,代码来源:regression.py
示例7: _corr
def _corr(self, sel, suffix):
formula = str('model_accuracy ~ human_accuracy')
logreg = smf.logit(formula=formula, data=sel).fit()
summ = logreg.summary()
if self.html is None:
print(summ)
else:
summ = summ.as_html().replace('class="simpletable"',
'class="simpletable table"')
sel = sel.rename(columns={'human_accuracy': 'human accuracy',
'model_accuracy': 'model accuracy'})
sns.lmplot('human accuracy', 'model accuracy', data=sel, x_jitter=.01,
y_jitter=.05, logistic=True, truncate=True)
bins = np.digitize(sel['human accuracy'], np.arange(.05,1,.1))
#bins[bins==11] = 10
count = sel['model accuracy'].groupby(bins).count()
mean = sel['model accuracy'].groupby(bins).mean()
sns.plt.scatter(.1*mean.index, mean, s=10*count, c='.15',
linewidths=0, alpha=.8)
sns.plt.title(models.NICE_NAMES[self.model_name])
sns.plt.xlim([-.1, 1.1])
sns.plt.ylim([-.1, 1.1])
self.show(pref='corr_sil', suffix=self.model_name + '_' + suffix,
caption=suffix + summ)
开发者ID:mageed,项目名称:conv-exp,代码行数:27,代码来源:run.py
示例8: run_logits
def run_logits(grouped, formula, var):
for code, group in grouped:
country = get_country(code).ljust(14)
model = smf.logit(formula, data=group)
results = model.fit(disp=False)
nobs, param, stars = extract_res(results, var=var)
arrow = '<--' if stars and param > 0 else ''
print(country, nobs, '%0.3g'%param, stars, arrow, sep='\t')
开发者ID:Libardo1,项目名称:ProbablyOverthinkingIt,代码行数:8,代码来源:ess.py
示例9: log_reg
def log_reg(formula, df):
try:
model1 = smf.logit(formula = formula, data=df).fit()
print model1.summary()
except Exception:
print "+" * 40
print "bad formula"
print "+" * 40
开发者ID:AlexanderMwangi,项目名称:DAT3-students,代码行数:8,代码来源:logit_reg_Patient2.py
示例10: fit_model
def fit_model(formula, model_file):
"""
Saves a model
:param formula: formula for the model
:param model_file: name of file to save the model to
"""
data = load_data()
model = logit(formula=formula, data=data)
fitted = model.fit()
fitted.save(model_file)
开发者ID:NathanDeMaria,项目名称:Football,代码行数:10,代码来源:save_model.py
示例11: logistic_regression_test
def logistic_regression_test():
df = pandas.DataFrame.from_csv('./generated_logistic_data.csv')
generated_model = smf.logit('y ~ variable_a + variable_b + variable_c', df)
generated_fit = generated_model.fit()
roc_data = sklearn.metrics.roc_curve(df['y'], generated_fit.predict(df))
auc = sklearn.metrics.auc(roc_data[0], roc_data[1])
print generated_fit.summary()
print "AUC score: {0}".format(auc)
assert auc > .8, 'AUC should be significantly above random'
开发者ID:zbxzc35,项目名称:Machine-Learning-Test-by-Test,代码行数:10,代码来源:logistic_regression_tests.py
示例12: generate_model
def generate_model(df):
'''
Create a logistic regression model from loans data based on fields
FICO.score, Interest.Rate, and Interest.below12
:param df: a dataframe with fields for the independent vars fico and interest
and the dependent var discrete_rate
:return: a fitted logistic model
'''
model = smf.logit(formula='discrete_rate ~ fico + interest', data=df)
fitted_model = model.fit()
return fitted_model
开发者ID:AkiraKane,项目名称:thinkful,代码行数:11,代码来源:U2L4P3_logistic_regression_analysis.py
示例13: logRegR
def logRegR(self, event):
# would have to mess with Patsy formula parser to get more powerful...
# too much work
dlg = wx.TextEntryDialog(self.parent, "Enter the linear regression formula")
if dlg.ShowModal() == wx.ID_OK:
model = smf.logit(formula=dlg.GetValue(), data=self.parent.data.data)
results = model.fit()
self.parent.write("\n" + str(results.summary()) + "\n")
sns.regplot(results.predict(), model.endog, ci=False, y_jitter=0.2)
plt.show()
dlg.Destroy()
开发者ID:alanhdu,项目名称:Dex,代码行数:12,代码来源:Stats.py
示例14: fit_model
def fit_model(y, formula, df):
from statsmodels.formula.api import ols, logit
# If you have a dichotomous variable then
# we're going to run a logistic regression
if df[y].nunique() == 2:
lm = logit(formula, df).fit()
# otherwise we'll run an ordinary least
# squares regression
else:
lm = ols(formula, df).fit()
return lm
开发者ID:KirstieJane,项目名称:DESCRIBING_DATA,代码行数:13,代码来源:create_correlation_matrix.py
示例15: check_initial_specification
def check_initial_specification(dataframe, result_string, new_var, min_specification, fit_word=None):
assert isinstance(dataframe, pd.DataFrame) #Make sure dataframe is a pandas dataframe.
assert isinstance(result_string, str) #Make sure the result_string is actually a string
assert isinstance(new_var, list) #Make sure new_var is a list
assert isinstance(min_specification, str) #Make sure the min_specification is a string
base_vars = min_specification.split(" + ") #Extract the variables used in the minimum specification
if "0" in base_vars: #Remove any zeros from the variables used in the minimum specification
base_vars.remove("0")
#Initialize starting values for the optimization
start_vals = np.random.rand(len(base_vars + new_var))
#Create the formula string for the logistic regression
fString = result_string + " ~ " + min_specification + " + " + " + ".join(new_var)
#Make sure the matrix for the logistic regression is invertible
if not check_full_rank(dataframe, base_vars + new_var):
#If not, raise an error
raise Exception("The base model plus {} is not of full rank.".format(new_var))
#Fit the logistic regression
if fit_word is None:
model = smf.logit(fString, data=dataframe).fit(start_params = start_vals, maxiter=2000, disp=False)
else:
model = smf.logit(fString, data=dataframe).fit(method=fit_word, start_params = start_vals, maxiter=2000, disp=False)
if not model.mle_retvals["converged"]: #Check if the model converged
#If it did not, raise an error
raise Exception("The model for {} did not converge".format(new_var))
lowest_pval = model.pvalues[new_var[0]] #Initialize a value for the lowest p-value
for orig_var in new_var: #Iterate through the new variables
current_pval = model.pvalues[orig_var]
#If the current variables p-value is less than the lowest p-value
if current_pval < lowest_pval:
#Keep track of this number
lowest_pval = current_pval
return lowest_pval
开发者ID:timothyb0912,项目名称:cs289_final_project,代码行数:39,代码来源:hybrid_logit.py
示例16: log_regression
def log_regression(wine_set):
# # examining the data before recoding
# print(wine_set["sulphates"].describe())
# wine_set["sulphates_c"] = pd.qcut(wine_set["sulphates"], 4)
# print(wine_set.groupby("sulphates_c").size())
# print()
# #
# print(wine_set["alcohol"].describe())
# wine_set["alcohol_c"] = pd.qcut(wine_set["alcohol"], 4)
# print(wine_set.groupby("alcohol_c").size())
# print()
#
# print(wine_set["quality"].describe())
# wine_set["quality_c"] = pd.qcut(wine_set["quality"], 3)
# print(wine_set.groupby("quality_c").size())
# print()
# recode quality into 2 groups: 0:{3,4,5,6}, 1:{7,8,9}
recode = {3: 0, 4: 0, 5:0, 6:0, 7:1, 8:1, 9:1}
wine_set['quality_c'] = wine_set['quality'].map(recode)
# recode sulphates into 2 groups: 0: <= mean, 1: > mean
def sulphates_to_cat(x):
if x['sulphates'] <= wine_set['sulphates'].mean():
return 0
else:
return 1
wine_set['sulphates_c'] = wine_set.apply(lambda x: sulphates_to_cat(x), axis=1)
# recode alcohol into 2 groups: 0: <= mean , 1: > mean
def alcohol_to_cat(x):
if x['alcohol'] <= wine_set['alcohol'].mean():
return 0
else:
return 1
wine_set['alcohol_c'] = wine_set.apply(lambda x: alcohol_to_cat(x), axis=1)
# print(wine_set.head(10))
# logistic regression for sulphates+alcohol -> quality
print ("Logistic regression model for the association between wine's quality and sulphates&alcohol")
model1 = smf.logit(formula="quality_c ~ sulphates_c + alcohol_c", data=wine_set)
results1 = model1.fit()
print(results1.summary())
# odds ratios with 95% confidence intervals
print("\nConfidence intervals")
conf = results1.conf_int()
conf['Odds ratio'] = results1.params
conf.columns = ['Lower conf.int.', 'Upper conf.int.', 'Odds ratio']
print(numpy.exp(conf))
开发者ID:ekolik,项目名称:-Python-Analysis_of_wine_quality,代码行数:51,代码来源:regression_modeling.py
示例17: report_logitreg
def report_logitreg(formula, data, verbose=True):
"""Fit logistic regression, print a report, and return the fit object."""
results = smf.logit(formula, data=data).fit()
summary = results.summary()
margeff = results.get_margeff().summary()
if verbose:
report = """
{summary}\n\n
{margeff}\n""".format(summary=summary,margeff=margeff)
print(report)
return results
开发者ID:xguse,项目名称:crunchers,代码行数:15,代码来源:lazy_stats.py
示例18: test_log_regression
def test_log_regression():
"""Tests the results of logistic regression.
Explore on the beta coefficient
"""
run = load_in_dataframe(2)
run_added = add_gainlossratio(run)
run_final = organize_columns(run_added)
#fit the logistic regression line
fitted = logit("respcat ~ gain + loss", run_final).fit()
#get the parameters
fitted_params = fitted.params.as_matrix()
test_fitted_params = log_regression(run_final).as_matrix()
assert_array_equal(fitted_params,test_fitted_params)
开发者ID:gastonstat,项目名称:project-epsilon,代码行数:15,代码来源:test_logistic_reg.py
示例19: logistic_fit
def logistic_fit(self, glm_fit=True):
'''
The logit function would report error when y(Direction) is not transformed to 0/1
So glm looks easier to use
'''
formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
if glm_fit is True:
model = smf.glm(formula, data=self.df, family=sm.families.Binomial())
else:
# In fact, this function has wrong fittedvalues, but it's predict value is still right.
model = smf.logit(formula, data=self.df)
result = model.fit()
print result.summary()
# In logit fit there are errors here. Not sure why...
if glm_fit:
self.output_binary_table(result, result.fittedvalues, model.endog.astype(int), glm_fit)
开发者ID:Aran00,项目名称:ISIRBookCode,代码行数:16,代码来源:LogisticRegression.py
示例20: test_compare_logit
def test_compare_logit(self):
vs = Independence()
family = Binomial()
Y = 1 * (np.random.normal(size=100) < 0)
X1 = np.random.normal(size=100)
X2 = np.random.normal(size=100)
X3 = np.random.normal(size=100)
groups = np.random.randint(0, 4, size=100)
D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})
md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, covstruct=vs).fit()
sml = sm.logit("Y ~ X1 + X2 + X3", data=D).fit()
assert_almost_equal(sml.params.values, md.params, decimal=10)
开发者ID:lema655,项目名称:statsmodels,代码行数:18,代码来源:test_gee.py
注:本文中的statsmodels.formula.api.logit函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论