本文整理汇总了Python中statsmodels.stats.anova.anova_lm函数的典型用法代码示例。如果您正苦于以下问题:Python anova_lm函数的具体用法?Python anova_lm怎么用?Python anova_lm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了anova_lm函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: model_formulas
def model_formulas():
''' Define models through formulas '''
# Get the data:
# Development of world record times for the 100m Freestyle, for men and women.
data = pd.read_csv('swim100m.csv')
# Different models
model1 = ols("time ~ sex", data).fit() # one factor
model2 = ols("time ~ sex + year", data).fit() # two factors
model3 = ols("time ~ sex * year", data).fit() # two factors with interaction
# Model information
print((model1.summary()))
print((model2.summary()))
print((model3.summary()))
# ANOVAs
print('----------------- Results ANOVAs: Model 1 -----------------------')
print((anova_lm(model1)))
print('--------------------- Model 2 -----------------------------------')
print((anova_lm(model2)))
print('--------------------- Model 3 -----------------------------------')
model3Results = anova_lm(model3)
print(model3Results)
# Just to check the correct run
return model3Results['F'][0] # should be 156.1407931415788
开发者ID:ChengduoZhao,项目名称:statsintro_python,代码行数:30,代码来源:ISP_simpleModels.py
示例2: anova_oneway
def anova_oneway():
''' One-way ANOVA: test if results from 3 groups are equal. '''
# Get the data
data = getData('altman_910.txt')
# Sort them into groups, according to column 1
group1 = data[data[:,1]==1,0]
group2 = data[data[:,1]==2,0]
group3 = data[data[:,1]==3,0]
# First, check if the variances are equal, with the "Levene"-test
(W,p) = stats.levene(group1, group2, group3)
if p<0.05:
print('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p))
# Do the one-way ANOVA
F_statistic, pVal = stats.f_oneway(group1, group2, group3)
# Print the results
print 'Altman 910:'
print (F_statistic, pVal)
if pVal < 0.05:
print('One of the groups is significantly different.')
# Elegant alternative implementation, with pandas & statsmodels
df = pd.DataFrame(data, columns=['value', 'treatment'])
model = ols('value ~ C(treatment)', df).fit()
print anova_lm(model)
开发者ID:josef-pkt,项目名称:statsintro,代码行数:29,代码来源:anovaOneway.py
示例3: model_formulas
def model_formulas():
''' Define models through formulas '''
# Get the dta
data = read_csv(r'..\Data\data_kaplan\swim100m.csv')
# Different models
model1 = ols("time ~ sex", data).fit() # one factor
model2 = ols("time ~ sex + year", data).fit() # two factors
model3 = ols("time ~ sex * year", data).fit() # two factors with interaction
# Model information
print((model1.summary()))
print((model2.summary()))
print((model3.summary()))
# ANOVAs
print('-----------------------------------------------------------------')
print((anova_lm(model1)))
print('-----------------------------------------------------------------')
print((anova_lm(model2)))
print('-----------------------------------------------------------------')
model3Results = anova_lm(model3)
print(model3Results)
# Just to check the correct run
return model3Results['F'][0] # should be 156.1407931415788
开发者ID:nsonnad,项目名称:statsintro,代码行数:28,代码来源:modeling.py
示例4: run_anova
def run_anova(self):
ps_table_for_anova = self.ps_table[self.ps_table['Area'].isin(self.params.anova_areas)]
#ps_lm = mixedlm('prob_diff ~ C(Area) * C(Pulse_Frequency)', data=ps_table_for_anova, groups=ps_table_for_anova['Subject']).fit()
ps_lm = ols('prob_diff ~ C(Area) * C(Pulse_Frequency)', data=ps_table_for_anova).fit()
anova = anova_lm(ps_lm)
self.pass_object('fvalue_rf', anova['F'].values[0:3])
self.pass_object('pvalue_rf', anova['PR(>F)'].values[0:3])
ps_table_for_anova_low = ps_table_for_anova[ps_table_for_anova['Pulse_Frequency'].isin([10,25])]
print 'nsamples =', len(ps_table_for_anova_low)
ps_lm = ols('prob_diff ~ C(Area) * C(Duration)', data=ps_table_for_anova_low).fit()
anova = anova_lm(ps_lm)
self.pass_object('fvalue_rd_low', anova['F'].values[0:3])
self.pass_object('pvalue_rd_low', anova['PR(>F)'].values[0:3])
ps_lm = ols('prob_diff ~ C(Area) * C(Amplitude)', data=ps_table_for_anova_low).fit()
anova = anova_lm(ps_lm)
self.pass_object('fvalue_ra_low', anova['F'].values[0:3])
self.pass_object('pvalue_ra_low', anova['PR(>F)'].values[0:3])
ps_table_for_anova_high = ps_table_for_anova[ps_table_for_anova['Pulse_Frequency'].isin([100,200])]
print 'nsamples =', len(ps_table_for_anova_high)
ps_lm = ols('prob_diff ~ C(Area) * C(Duration)', data=ps_table_for_anova_high).fit()
anova = anova_lm(ps_lm)
self.pass_object('fvalue_rd_high', anova['F'].values[0:3])
self.pass_object('pvalue_rd_high', anova['PR(>F)'].values[0:3])
ps_lm = ols('prob_diff ~ C(Area) * C(Amplitude)', data=ps_table_for_anova_high).fit()
anova = anova_lm(ps_lm)
self.pass_object('fvalue_ra_high', anova['F'].values[0:3])
self.pass_object('pvalue_ra_high', anova['PR(>F)'].values[0:3])
开发者ID:maciekswat,项目名称:ram_utils,代码行数:34,代码来源:RunAnalysis.py
示例5: anova_interaction
def anova_interaction(data_lastDV):
"""
Two-way ANOVA and interaction analysis of given data
http://statsmodels.sourceforge.net/devel/examples/generated/example_interactions.html
Note: 2way ANOVAs are for 2+ categorical independent/causal variables, with 2+ levels each
:param data: data frame containing the independent variables in first two columns, dependent in the third
:return: None
"""
col_names = data_lastDV.columns.values # get the columns' names
factor_groups = data_lastDV[col_names].dropna()
if len(col_names) < 3:
print("ERROR in statsMOOC.py: Not enough columns in dataframe to do interaction analysis: " + len(col_names))
# two-way anova
formula = col_names[2] + " ~ C(" + col_names[0] + ") + C(" + col_names[1] + ")"
formula_interaction = formula.replace('+', '*')
interaction_lm = ols(formula, data=factor_groups).fit() # linear model
print(interaction_lm.summary())
print(FORMAT_LINE)
print("- " + col_names[2] + " = " + col_names[0] + " * " + col_names[1] + " Interaction -")
print(anova_lm(ols(formula_interaction, data=factor_groups).fit(), interaction_lm))
print(FORMAT_LINE)
print("- " + col_names[2] + " = " + col_names[0] + " + " + col_names[1] + " ANOVA -")
print(anova_lm(ols(col_names[2] + " ~ C(" + col_names[0] + ")", data=factor_groups).fit(), ols(col_names[2] +" ~ C("+col_names[0]+") + C(" + col_names[1]+", Sum)", data=factor_groups).fit()))
print(FORMAT_LINE)
print("- " + col_names[2] + " = " + col_names[1] + " + " + col_names[0] + " ANOVA -")
print(anova_lm(ols(col_names[2] + " ~ C(" + col_names[1] + ")", data=factor_groups).fit(), ols(col_names[2] +" ~ C("+col_names[0]+") + C(" + col_names[1]+", Sum)", data=factor_groups).fit()))
开发者ID:UberHowley,项目名称:spoc-file-processing,代码行数:32,代码来源:statsSPOC.py
示例6: anova_interaction
def anova_interaction():
'''ANOVA with interaction: Measurement of fetal head circumference,
by four observers in three fetuses.'''
# Get the data
data = getData('altman_12_6.txt')
# Bring them in dataframe-format
df = pd.DataFrame(data, columns=['hs', 'fetus', 'observer'])
# Determine the ANOVA with interaction
formula = 'hs ~ C(fetus) + C(observer) + C(fetus):C(observer)'
lm = ols(formula, df).fit()
print anova_lm(lm)
开发者ID:josef-pkt,项目名称:statsintro,代码行数:14,代码来源:anovaTwoway.py
示例7: anova
def anova(df, fmla, typ=3):
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
# Anova/OLS
lm = ols(fmla, df).fit() # 'data' <==> 'df' keyword change with version
# Grab the pvalues (note we use Type III)
aov = anova_lm(lm, typ=typ)
pvals = aov["PR(>F)"]
pvals.index = map(lambda s: "p_" + s, pvals.index)
# Grab the explainable sum of squares
ess = aov.drop("Residual").sum_sq
ess = ess / ess.sum()
ess.index = map(lambda s: "ess_" + s, ess.index)
# Grab the fit
fit = lm.params
fit.index = map(lambda s: "fit_" + s, fit.index)
# I think this happens with pathological inputs
if np.any(aov["sum_sq"] < 0):
1 / 0
return {"lm": lm, "aov": aov, "pvals": pvals, "ess": ess, "fit": fit}
开发者ID:cxrodgers,项目名称:my,代码行数:26,代码来源:stats.py
示例8: test_results
def test_results(self):
new_model = ols("np.log(Days+1) ~ C(Duration) + C(Weight)",
self.data).fit()
results = anova_lm(new_model, self.kidney_lm)
Res_Df = np.array([
56, 54
])
RSS = np.array([
29.62486, 28.9892
])
Df = np.array([
0, 2
])
Sum_of_Sq = np.array([
np.nan, 0.6356584
])
F = np.array([
np.nan, 0.5920404
])
PrF = np.array([
np.nan, 0.5567479
])
np.testing.assert_equal(results["df_resid"].values, Res_Df)
np.testing.assert_almost_equal(results["ssr"].values, RSS, 4)
np.testing.assert_almost_equal(results["df_diff"].values, Df)
np.testing.assert_almost_equal(results["ss_diff"].values, Sum_of_Sq)
np.testing.assert_almost_equal(results["F"].values, F)
np.testing.assert_almost_equal(results["Pr(>F)"].values, PrF)
开发者ID:Tskatom,项目名称:Embers_VT,代码行数:30,代码来源:test_anova.py
示例9: anova
def anova(dv):
"""Perform ANOVA."""
df = make_summary()
lm = ols('%s ~ C(group) * age * iq' % dv, data=df).fit()
divider = '---------'
print divider, dv, divider, '\n', anova_lm(lm, typ=2, robust='hc3')
开发者ID:sammosummo,项目名称:PsychosisDDM,代码行数:7,代码来源:conventional.py
示例10: startanova
def startanova(self):
from urllib2 import urlopen
import numpy as np
import pandas
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm
try:
rehab_table = pandas.read_csv('rehab.table')
except:
url = 'http://stats191.stanford.edu/data/rehab.csv'
#the next line is not necessary with recent version of pandas
url = urlopen(url)
rehab_table = pandas.read_table(url, delimiter=",")
rehab_table.to_csv('rehab.table')
print rehab_table
plt.figure(figsize=(6, 6));
rehab_table.boxplot('Time', 'Fitness', ax=plt.gca())
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
table9 = anova_lm(rehab_lm,test=self.test,robust=self.robust)
print table9
print rehab_lm.model.data.orig_exog
print rehab_lm.summary()
plt.show()
开发者ID:vishnumani2009,项目名称:OpenSource-Open-Ended-Statistical-toolkit,代码行数:29,代码来源:anovafront.py
示例11: doAnova
def doAnova(data):
'''one-way ANOVA'''
df = pd.DataFrame(data)
model = ols('StressReduction ~ C(Treatment)',df).fit()
anovaResults = anova_lm(model)
print(anovaResults)
if anovaResults['PR(>F)'][0] < 0.05:
print('One of the groups is different.')
开发者ID:ejmurray,项目名称:statsintro_python,代码行数:10,代码来源:C8_3_multipleTesting.py
示例12: anova_statsmodels
def anova_statsmodels():
''' do the ANOVA with a function '''
# Get the data
data = pd.read_csv(r'..\Data\data_kaplan\galton.csv')
anova_results = anova_lm(ols('height ~ 1 + sex', data).fit())
print('\nANOVA with "statsmodels" ------------------------------')
print(anova_results)
return anova_results['F'][0]
开发者ID:ing7t,项目名称:kod,代码行数:11,代码来源:anovaOneway.py
示例13: anova_statsmodels
def anova_statsmodels():
""" do the ANOVA with a function """
# Get the data
data = pd.read_csv(r"..\Data\data_kaplan\galton.csv")
anova_results = anova_lm(ols("height ~ 1 + sex", data).fit())
print('\nANOVA with "statsmodels" ------------------------------')
print(anova_results)
return anova_results["F"][0]
开发者ID:phaustin,项目名称:statsintro,代码行数:11,代码来源:anovaOneway.py
示例14: test_results
def test_results(self):
Df = np.array([2, 2, 2, 54])
sum_sq = np.array([158.6415227, 16.97129, 0.6356584, 28.9892])
mean_sq = np.array([79.3207613, 8.485645, 0.3178292, 0.536837])
f_value = np.array([147.7557648, 15.80674, 0.5920404, np.nan])
pr_f = np.array([1.262324e-22, 3.944502e-06, 0.5567479, np.nan])
results = anova_lm(self.kidney_lm)
np.testing.assert_equal(results["df"].values, Df)
np.testing.assert_almost_equal(results["sum_sq"].values, sum_sq, 4)
np.testing.assert_almost_equal(results["F"].values, f_value, 4)
np.testing.assert_almost_equal(results["PR(>F)"].values, pr_f)
开发者ID:JerWatson,项目名称:statsmodels,代码行数:12,代码来源:test_anova.py
示例15: one_stats
def one_stats(data_lastDV):
"""
Do basic analysis of one IV onto one DV
:param data: pandas dataframe we are exploring (IV-of-interest in first column, followed by IVs, and DV in last index)
:return: None
"""
col_names = data_lastDV.columns.values.tolist() # get the columns' names
causal = col_names.pop(0) # first item is the topic
outcome = col_names.pop() # remove the last item in the list
topic_data = data_lastDV[[causal, outcome]]
# descriptive stats
print(FORMAT_LINE)
print(topic_data[causal].describe())
print(FORMAT_LINE)
fig = plt.figure()
# bar chart of topics
ax1 = fig.add_subplot(121)
df_compare = topic_data.groupby(causal)[causal].count() # displays num instances assigned to each condition
ax1 = df_compare.plot(kind='bar', title=causal)
ax1.set_xlabel(causal)
ax1.set_ylabel("count instances")
# scatter plot
ax2 = fig.add_subplot(122)
df_compare = data_lastDV.groupby(causal)[outcome].mean() # displays num helpers selected in each topic
ax2 = df_compare.plot(kind='bar', title=causal)
ax2.set_xlabel(causal)
ax2.set_ylabel("mean " + outcome)
plt.show()
# One Way ANOVA
cond_lm = ols(outcome + " ~ C(" + causal + ")", data=topic_data).fit()
anova_table = anova_lm(cond_lm)
print("\n"+FORMAT_LINE)
print("One-Way ANOVA: " + causal + " --> " + outcome)
print(FORMAT_LINE)
print(anova_table)
#print(cond_lm.model.data.orig_exog)
print(cond_lm.summary())
# boxplot of topics --> num helpers selected
fig = plt.figure()
ax = fig.add_subplot(111)
ax = topic_data.boxplot(outcome, causal, ax=plt.gca())
ax.set_xlabel(causal)
ax.set_ylabel(outcome)
plt.show()
for cond in col_names:
anova_interaction(data_lastDV[[causal, cond, outcome]])
plot_interaction(data_lastDV[[causal, cond, outcome]])
开发者ID:StrategicC,项目名称:mooc-file-processing,代码行数:53,代码来源:statsMOOC.py
示例16: anova_oneway
def anova_oneway():
''' One-way ANOVA: test if results from 3 groups are equal.
Twenty-two patients undergoing cardiac bypass surgery were randomized to one of three ventilation groups:
Group I: Patients received 50% nitrous oxide and 50% oxygen mixture continuously for 24 h.
Group II: Patients received a 50% nitrous oxide and 50% oxygen mixture only dirng the operation.
Group III: Patients received no nitrous oxide but received 35-50% oxygen for 24 h.
The data show red cell folate levels for the three groups after 24h' ventilation.
'''
# Get the data
print('One-way ANOVA: -----------------')
inFile = 'altman_910.txt'
data = np.genfromtxt(inFile, delimiter=',')
# Sort them into groups, according to column 1
group1 = data[data[:,1]==1,0]
group2 = data[data[:,1]==2,0]
group3 = data[data[:,1]==3,0]
# --- >>> START stats <<< ---
# First, check if the variances are equal, with the "Levene"-test
(W,p) = stats.levene(group1, group2, group3)
if p<0.05:
print(('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p)))
# Do the one-way ANOVA
F_statistic, pVal = stats.f_oneway(group1, group2, group3)
# --- >>> STOP stats <<< ---
# Print the results
print('Data form Altman 910:')
print((F_statistic, pVal))
if pVal < 0.05:
print('One of the groups is significantly different.')
# Elegant alternative implementation, with pandas & statsmodels
df = pd.DataFrame(data, columns=['value', 'treatment'])
model = ols('value ~ C(treatment)', df).fit()
anovaResults = anova_lm(model)
print(anovaResults)
# Check if the two results are equal. If they are, there is no output
np.testing.assert_almost_equal(F_statistic, anovaResults['F'][0])
return (F_statistic, pVal) # should be (3.711335988266943, 0.043589334959179327)
开发者ID:ChengduoZhao,项目名称:statsintro_python,代码行数:49,代码来源:ISP_anovaOneway.py
示例17: do_ANOVA
def do_ANOVA(data):
'''4.3.2. Perform an ANOVA on the data'''
print('ANOVA: ----------------------------------------------')
# First, I fit a statistical "ordinary least square (ols)"-model to the data, using the
# formula language from "patsy". The formula 'weight ~ C(group)' says:
# "weight" is a function of the categorical value "group"
# and the data are taken from the DataFrame "data", which contains "weight" and "group"
model = ols('weight ~ C(group)', data).fit()
# "anova_lm" (where "lm" stands for "linear model") extracts the ANOVA-parameters
# from the fitted model.
anovaResults = anova_lm(model)
print(anovaResults)
if anovaResults['PR(>F)'][0] < 0.05:
print('One of the groups is different.')
开发者ID:ejmurray,项目名称:statsintro_python,代码行数:18,代码来源:S8_multipleGroups.py
示例18: anova_interaction
def anova_interaction():
'''ANOVA with interaction: Measurement of fetal head circumference,
by four observers in three fetuses, from a study investigating the
reproducibility of ultrasonic fetal head circumference data.'''
# Get the data
data = getData('altman_12_6.txt', subDir='..\Data\data_altman')
# Bring them in dataframe-format
df = pd.DataFrame(data, columns=['hs', 'fetus', 'observer'])
# --- >>> START stats <<< ---
# Determine the ANOVA with interaction
formula = 'hs ~ C(fetus) + C(observer) + C(fetus):C(observer)'
lm = ols(formula, df).fit()
anovaResults = anova_lm(lm)
# --- >>> STOP stats <<< ---
print(anovaResults)
return anovaResults['F'][0]
开发者ID:mlskit,项目名称:astromlskit,代码行数:20,代码来源:2-way.py
示例19: anova_interaction
def anova_interaction():
"""ANOVA with interaction: Measurement of fetal head circumference,
by four observers in three fetuses, from a study investigating the
reproducibility of ultrasonic fetal head circumference data.
"""
# Get the data
data = getData("altman_12_6.txt", subDir="..\Data\data_altman")
# Bring them in DataFrame-format
df = pd.DataFrame(data, columns=["hs", "fetus", "observer"])
# --- >>> START stats <<< ---
# Determine the ANOVA with interaction
formula = "hs ~ C(fetus) + C(observer) + C(fetus):C(observer)"
lm = ols(formula, df).fit()
anovaResults = anova_lm(lm)
# --- >>> STOP stats <<< ---
print(anovaResults)
return anovaResults["F"][0]
开发者ID:ejmurray,项目名称:statsintro_python,代码行数:21,代码来源:C8_3_anovaTwoway.py
示例20: ancova
def ancova(data_covar_lastDV):
"""
ANCOVA for when you have a numerical covariate to control for. Read more about ANOVA/ANCOVA/etc here: http://www.statsmakemecry.com/smmctheblog/stats-soup-anova-ancova-manova-mancova
http://elderlab.yorku.ca/~elder/teaching/psyc3031/lectures/Lecture%207%20Analysis%20of%20Covariance%20-%20ANCOVA%20%28GLM%202%29.pdf (slide 24)
:param data: data frame containing the independent and dependent variables (covariate is second to last, DV is last item in list)
:return: None
"""
col_names = data_covar_lastDV.columns.values.tolist() # get the columns' names
outcome = col_names.pop() # remove the last item in the list
covariate = col_names.pop() # remove the [second to] last item in the list
fig = plt.figure()
i = 1
for cond in col_names:
cond_table = data_covar_lastDV[[cond, covariate, outcome]].dropna()
cond_lm = ols(outcome + " ~ " + covariate + " + " + cond, data=cond_table).fit()
anova_table = anova_lm(cond_lm)
print("\n"+FORMAT_LINE)
print("ANCOVA: " + cond + " + " + covariate + " --> " + outcome)
print(FORMAT_LINE)
print(anova_table)
#print(cond_lm.model.data.orig_exog)
print(cond_lm.summary())
ax = fig.add_subplot(1,2, i)
ax = cond_table.boxplot(outcome, cond, ax=plt.gca())
ax.set_xlabel(cond)
ax.set_ylabel(outcome)
i += 1
# box plot
# TODO: need to remove the effect of the covariate before plotting
# http://statsmodels.sourceforge.net/devel/examples/generated/example_interactions.html
user_input = input(">> Display boxplot of conditions? [y/n]: ")
if is_yes(user_input):
fig.tight_layout()
plt.show()
开发者ID:UberHowley,项目名称:spoc-file-processing,代码行数:40,代码来源:statsSPOC.py
注:本文中的statsmodels.stats.anova.anova_lm函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论