本文整理汇总了Python中statsmodels.compat.python.lmap函数的典型用法代码示例。如果您正苦于以下问题:Python lmap函数的具体用法?Python lmap怎么用?Python lmap使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了lmap函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: handle_missing
def handle_missing(cls, endog, exog, missing, **kwargs):
"""
This returns a dictionary with keys endog, exog and the keys of
kwargs. It preserves Nones.
"""
none_array_names = []
if exog is not None:
combined = (endog, exog)
combined_names = ['endog', 'exog']
else:
combined = (endog,)
combined_names = ['endog']
none_array_names += ['exog']
# deal with other arrays
combined_2d = ()
combined_2d_names = []
if len(kwargs):
for key, value_array in iteritems(kwargs):
if value_array is None or value_array.ndim == 0:
none_array_names += [key]
continue
# grab 1d arrays
if value_array.ndim == 1:
combined += (value_array,)
combined_names += [key]
elif value_array.squeeze().ndim == 1:
combined += (value_array,)
combined_names += [key]
# grab 2d arrays that are _assumed_ to be symmetric
elif value_array.ndim == 2:
combined_2d += (value_array,)
combined_2d_names += [key]
else:
raise ValueError("Arrays with more than 2 dimensions "
"aren't yet handled")
nan_mask = _nan_rows(*combined)
if combined_2d:
nan_mask = _nan_rows(*(nan_mask[:, None],) + combined_2d)
if missing == 'raise' and np.any(nan_mask):
raise MissingDataError("NaNs were encountered in the data")
elif missing == 'drop':
nan_mask = ~nan_mask
drop_nans = lambda x: cls._drop_nans(x, nan_mask)
drop_nans_2d = lambda x: cls._drop_nans_2d(x, nan_mask)
combined = dict(zip(combined_names, lmap(drop_nans, combined)))
if combined_2d:
combined.update(dict(zip(combined_2d_names,
lmap(drop_nans_2d, combined_2d))))
if none_array_names:
combined.update(dict(zip(none_array_names,
[None] * len(none_array_names))))
return combined, np.where(~nan_mask)[0].tolist()
else:
raise ValueError("missing option %s not understood" % missing)
开发者ID:andrewclegg,项目名称:statsmodels,代码行数:60,代码来源:data.py
示例2: prob_mv_grid
def prob_mv_grid(bins, cdf, axis=-1):
'''helper function for probability of a rectangle grid in a multivariate distribution
how does this generalize to more than 2 variates ?
bins : tuple
tuple of bin edges, currently it is assumed that they broadcast
correctly
'''
if not isinstance(bins, np.ndarray):
bins = lmap(np.asarray, bins)
n_dim = len(bins)
bins_ = []
#broadcast if binedges are 1d
if all(lmap(np.ndim, bins) == np.ones(n_dim)):
for d in range(n_dim):
sl = [None]*n_dim
sl[d] = slice(None)
bins_.append(bins[d][sl])
else: #assume it is already correctly broadcasted
n_dim = bins.shape[0]
bins_ = bins
print(len(bins))
cdf_values = cdf(bins_)
probs = cdf_values.copy()
for d in range(n_dim):
probs = np.diff(probs, axis=d)
return probs
开发者ID:ChadFulton,项目名称:statsmodels,代码行数:31,代码来源:quantize.py
示例3: date_range_str
def date_range_str(start, end=None, length=None):
"""
Returns a list of abbreviated date strings.
Parameters
----------
start : str
The first abbreviated date, for instance, '1965q1' or '1965m1'
end : str, optional
The last abbreviated date if length is None.
length : int, optional
The length of the returned array of end is None.
Returns
-------
date_range : list
List of strings
"""
flags = re.IGNORECASE | re.VERBOSE
#_check_range_inputs(end, length, freq)
start = start.lower()
if re.search(_m_pattern, start, flags):
annual_freq = 12
split = 'm'
elif re.search(_q_pattern, start, flags):
annual_freq = 4
split = 'q'
elif re.search(_y_pattern, start, flags):
annual_freq = 1
start += 'a1' # hack
if end:
end += 'a1'
split = 'a'
else:
raise ValueError("Date %s not understood" % start)
yr1, offset1 = lmap(int, start.replace(":","").split(split))
if end is not None:
end = end.lower()
yr2, offset2 = lmap(int, end.replace(":","").split(split))
length = (yr2 - yr1) * annual_freq + offset2
elif length:
yr2 = yr1 + length // annual_freq
offset2 = length % annual_freq + (offset1 - 1)
years = np.repeat(lrange(yr1+1, yr2), annual_freq).tolist()
years = np.r_[[str(yr1)]*(annual_freq+1-offset1), years] # tack on first year
years = np.r_[years, [str(yr2)]*offset2] # tack on last year
if split != 'a':
offset = np.tile(np.arange(1, annual_freq+1), yr2-yr1-1)
offset = np.r_[np.arange(offset1, annual_freq+1).astype('a2'), offset]
offset = np.r_[offset, np.arange(1,offset2+1).astype('a2')]
date_arr_range = [''.join([i, split, asstr(j)]) for i,j in
zip(years, offset)]
else:
date_arr_range = years.tolist()
return date_arr_range
开发者ID:Inoryy,项目名称:statsmodels,代码行数:55,代码来源:datetools.py
示例4: setup_class
def setup_class(cls):
XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split()
endog_name = 'docvis'
exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const']
instrument_names = 'income ssiratio'.split() + XLISTEXOG2 + ['const']
endog = DATA[endog_name]
exog = DATA[exog_names]
instrument = DATA[instrument_names]
asarray = lambda x: np.asarray(x, float)
endog, exog, instrument = lmap(asarray, [endog, exog, instrument])
cls.bse_tol = [5e-6, 5e-7]
q_tol = [0.04, 0]
# compare to Stata default options, iterative GMM
# with const at end
start = OLS(np.log(endog+1), exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
mod = gmm.NonlinearIVGMM(endog, exog, instrument, moment_exponential_add)
res0 = mod.fit(start, maxiter=0, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0},
wargs={'centered':False})
cls.res1 = res0
from .results_gmm_poisson import results_addonestep as results
cls.res2 = results
开发者ID:statsmodels,项目名称:statsmodels,代码行数:31,代码来源:test_gmm_poisson.py
示例5: anova_oneway
def anova_oneway(y, x, seq=0):
# new version to match NIST
# no generalization or checking of arguments, tested only for 1d
yrvs = y[:,np.newaxis] #- min(y)
#subracting mean increases numerical accuracy for NIST test data sets
xrvs = x[:,np.newaxis] - x.mean() #for 1d#- 1e12 trick for 'SmLs09.dat'
meang, varg, xdevmeangr, countg = groupsstats_dummy(yrvs[:,:1], xrvs[:,:1])#, seq=0)
#the following does not work as replacement
#gcount, gmean , meanarr, withinvar, withinvararr = groupstatsbin(y, x)#, seq=0)
sswn = np.dot(xdevmeangr.T,xdevmeangr)
ssbn = np.dot((meang-xrvs.mean())**2, countg.T)
nobs = yrvs.shape[0]
ncat = meang.shape[1]
dfbn = ncat - 1
dfwn = nobs - ncat
msb = ssbn/float(dfbn)
msw = sswn/float(dfwn)
f = msb/msw
prob = stats.f.sf(f,dfbn,dfwn)
R2 = (ssbn/(sswn+ssbn)) #R-squared
resstd = np.sqrt(msw) #residual standard deviation
#print(f, prob
def _fix2scalar(z): # return number
if np.shape(z) == (1, 1): return z[0,0]
else: return z
f, prob, R2, resstd = lmap(_fix2scalar, (f, prob, R2, resstd))
return f, prob, R2, resstd
开发者ID:0ceangypsy,项目名称:statsmodels,代码行数:28,代码来源:anova_nistcertified.py
示例6: dataset
def dataset(self, as_dict=False):
"""
Returns a Python generator object for iterating over the dataset.
Parameters
----------
as_dict : bool, optional
If as_dict is True, yield each row of observations as a dict.
If False, yields each row of observations as a list.
Returns
-------
Generator object for iterating over the dataset. Yields each row of
observations as a list by default.
Notes
-----
If missing_values is True during instantiation of StataReader then
observations with _StataMissingValue(s) are not filtered and should
be handled by your applcation.
"""
try:
self._file.seek(self._data_location)
except Exception:
pass
if as_dict:
vars = lmap(str, self.variables())
for i in range(len(self)):
yield dict(zip(vars, self._next()))
else:
for i in range(self._header['nobs']):
yield self._next()
开发者ID:statsmodels,项目名称:statsmodels,代码行数:35,代码来源:foreign.py
示例7: bootstrap
def bootstrap(distr, args=(), nobs=200, nrep=100, value=None, batch_size=None):
'''Monte Carlo (or parametric bootstrap) p-values for gof
currently hardcoded for A^2 only
assumes vectorized fit_vec method,
builds and analyses (nobs, nrep) sample in one step
rename function to less generic
this works also with nrep=1
'''
#signature similar to kstest ?
#delegate to fn ?
#rvs_kwds = {'size':(nobs, nrep)}
#rvs_kwds.update(kwds)
#it will be better to build a separate batch function that calls bootstrap
#keep batch if value is true, but batch iterate from outside if stat is returned
if batch_size is not None:
if value is None:
raise ValueError('using batching requires a value')
n_batch = int(np.ceil(nrep/float(batch_size)))
count = 0
for irep in range(n_batch):
rvs = distr.rvs(args, **{'size':(batch_size, nobs)})
params = distr.fit_vec(rvs, axis=1)
params = lmap(lambda x: np.expand_dims(x, 1), params)
cdfvals = np.sort(distr.cdf(rvs, params), axis=1)
stat = asquare(cdfvals, axis=1)
count += (stat >= value).sum()
return count / float(n_batch * batch_size)
else:
#rvs = distr.rvs(args, **kwds) #extension to distribution kwds ?
rvs = distr.rvs(args, **{'size':(nrep, nobs)})
params = distr.fit_vec(rvs, axis=1)
params = lmap(lambda x: np.expand_dims(x, 1), params)
cdfvals = np.sort(distr.cdf(rvs, params), axis=1)
stat = asquare(cdfvals, axis=1)
if value is None: #return all bootstrap results
stat_sorted = np.sort(stat)
return stat_sorted
else: #calculate and return specific p-value
return (stat >= value).mean()
开发者ID:bashtage,项目名称:statsmodels,代码行数:47,代码来源:gof_new.py
示例8: _col_size
def _col_size(self, k=None):
"""Calculate size of a data record."""
if len(self._col_sizes) == 0:
self._col_sizes = lmap(lambda x: self._calcsize(x), self._header["typlist"])
if k == None:
return self._col_sizes
else:
return self._col_sizes[k]
开发者ID:Inoryy,项目名称:statsmodels,代码行数:8,代码来源:foreign.py
示例9: variables
def variables(self):
"""
Returns a list of the dataset's StataVariables objects.
"""
return lmap(_StataVariable, zip(lrange(self._header['nvar']),
self._header['typlist'], self._header['varlist'],
self._header['srtlist'],
self._header['fmtlist'], self._header['lbllist'],
self._header['vlblist']))
开发者ID:statsmodels,项目名称:statsmodels,代码行数:9,代码来源:foreign.py
示例10: test_panel_robust_cov
def test_panel_robust_cov():
import pandas as pa
import statsmodels.datasets.grunfeld as gr
from .results.results_panelrobust import results as res_stata
dtapa = gr.data.load_pandas()
# Stata example/data seems to miss last firm
dtapa_endog = dtapa.endog[:200]
dtapa_exog = dtapa.exog[:200]
res = OLS(dtapa_endog, add_constant(dtapa_exog[["value", "capital"]], prepend=False)).fit()
# time indicator in range(max Ti)
time = np.asarray(dtapa_exog[["year"]])
time -= time.min()
time = np.squeeze(time).astype(int)
# sw.cov_nw_panel requires bounds instead of index
tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
# firm index in range(n_firms)
firm_names, firm_id = np.unique(np.asarray(dtapa_exog[["firm"]], "S20"), return_inverse=True)
# panel newey west standard errors
cov = sw.cov_nw_panel(res, 0, tidx, use_correction="hac")
# dropping numpy 1.4 soon
# np.testing.assert_allclose(cov, res_stata.cov_pnw0_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw0_stata, decimal=4)
cov = sw.cov_nw_panel(res, 1, tidx, use_correction="hac")
# np.testing.assert_allclose(cov, res_stata.cov_pnw1_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw1_stata, decimal=4)
cov = sw.cov_nw_panel(res, 4, tidx) # check default
# np.testing.assert_allclose(cov, res_stata.cov_pnw4_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw4_stata, decimal=4)
# cluster robust standard errors
cov_clu = sw.cov_cluster(res, firm_id)
assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)
# cluster robust standard errors, non-int groups
cov_clu = sw.cov_cluster(res, lmap(str, firm_id))
assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)
# Driscoll and Kraay panel robust standard errors
rcov = sw.cov_nw_groupsum(res, 0, time, use_correction=0)
assert_almost_equal(rcov, res_stata.cov_dk0_stata, decimal=4)
rcov = sw.cov_nw_groupsum(res, 1, time, use_correction=0)
assert_almost_equal(rcov, res_stata.cov_dk1_stata, decimal=4)
rcov = sw.cov_nw_groupsum(res, 4, time) # check default
assert_almost_equal(rcov, res_stata.cov_dk4_stata, decimal=4)
开发者ID:JerWatson,项目名称:statsmodels,代码行数:53,代码来源:test_panel_robustcov.py
示例11: data2proddummy
def data2proddummy(x):
'''creates product dummy variables from 2 columns of 2d array
drops last dummy variable, but not from each category
singular with simple dummy variable but not with constant
quickly written, no safeguards
'''
#brute force, assumes x is 2d
#replace with encoding if possible
groups = np.unique(lmap(tuple, x.tolist()))
#includes singularity with additive factors
return (x==groups[:,None,:]).all(-1).T.astype(int)[:,:-1]
开发者ID:ChadFulton,项目名称:statsmodels,代码行数:14,代码来源:ols_anova_original.py
示例12: _next
def _next(self):
typlist = self._header["typlist"]
if self._has_string_data:
data = [None] * self._header["nvar"]
for i in range(len(data)):
if isinstance(typlist[i], int):
data[i] = self._null_terminate(self._file.read(typlist[i]), self._encoding)
else:
data[i] = self._unpack(typlist[i], self._file.read(self._col_size(i)))
return data
else:
return lmap(
lambda i: self._unpack(typlist[i], self._file.read(self._col_size(i))), lrange(self._header["nvar"])
)
开发者ID:Inoryy,项目名称:statsmodels,代码行数:14,代码来源:foreign.py
示例13: variables
def variables(self):
"""
Returns a list of the dataset's StataVariables objects.
"""
return lmap(
_StataVariable,
zip(
lrange(self._header["nvar"]),
self._header["typlist"],
self._header["varlist"],
self._header["srtlist"],
self._header["fmtlist"],
self._header["lbllist"],
self._header["vlblist"],
),
)
开发者ID:Inoryy,项目名称:statsmodels,代码行数:16,代码来源:foreign.py
示例14: dates_from_str
def dates_from_str(dates):
"""
Turns a sequence of date strings and returns a list of datetime.
Parameters
----------
dates : array-like
A sequence of abbreviated dates as string. For instance,
'1996m1' or '1996Q1'. The datetime dates are at the end of the
period.
Returns
-------
date_list : array
A list of datetime types.
"""
return lmap(date_parser, dates)
开发者ID:Inoryy,项目名称:statsmodels,代码行数:17,代码来源:datetools.py
示例15: test_plot_quarter
def test_plot_quarter(close_figures):
dta = sm.datasets.macrodata.load_pandas().data
dates = lmap('Q'.join, zip(dta.year.astype(int).apply(str),
dta.quarter.astype(int).apply(str)))
# test dates argument
quarter_plot(dta.unemp.values, dates)
# test with a DatetimeIndex with no freq
dta.set_index(pd.to_datetime(dates), inplace=True)
quarter_plot(dta.unemp)
# w freq
# see pandas #6631
dta.index = pd.DatetimeIndex(pd.to_datetime(dates), freq='QS-Oct')
quarter_plot(dta.unemp)
# w PeriodIndex
dta.index = pd.PeriodIndex(pd.to_datetime(dates), freq='Q')
quarter_plot(dta.unemp)
开发者ID:bashtage,项目名称:statsmodels,代码行数:19,代码来源:test_tsaplots.py
示例16: summary_return
def summary_return(tables, return_fmt='text'):
# join table parts then print
if return_fmt == 'text':
strdrop = lambda x: str(x).rsplit('\n',1)[0]
# convert to string drop last line
return '\n'.join(lmap(strdrop, tables[:-1]) + [str(tables[-1])])
elif return_fmt == 'tables':
return tables
elif return_fmt == 'csv':
return '\n'.join(x.as_csv() for x in tables)
elif return_fmt == 'latex':
# TODO: insert \hline after updating SimpleTable
table = copy.deepcopy(tables[0])
del table[-1]
for part in tables[1:]:
table.extend(part)
return table.as_latex_tabular()
elif return_fmt == 'html':
return "\n".join(table.as_html() for table in tables)
else:
raise ValueError('available output formats are text, csv, latex, html')
开发者ID:bashtage,项目名称:statsmodels,代码行数:21,代码来源:summary.py
示例17: _get_colwidths
def _get_colwidths(self, output_format, **fmt_dict):
"""Return list, the calculated widths of each column."""
output_format = get_output_format(output_format)
fmt = self.output_formats[output_format].copy()
fmt.update(fmt_dict)
ncols = max(len(row) for row in self)
request = fmt.get('colwidths')
if request is 0: # assume no extra space desired (e.g, CSV)
return [0] * ncols
elif request is None: # assume no extra space desired (e.g, CSV)
request = [0] * ncols
elif isinstance(request, (int, long)):
request = [request] * ncols
elif len(request) < ncols:
request = [request[i % len(request)] for i in range(ncols)]
min_widths = []
for col in zip(*self):
maxwidth = max(len(c.format(0, output_format, **fmt)) for c in col)
min_widths.append(maxwidth)
result = lmap(max, min_widths, request)
return result
开发者ID:ChadFulton,项目名称:statsmodels,代码行数:21,代码来源:table.py
示例18: print_summary
def print_summary(self, stats, orientation='auto'):
#TODO: need to specify a table formating for the numbers, using defualt
title = 'Summary Statistics'
header = stats
stubs = self.univariate['obs'][1]
data = [[self.univariate[astat][2][col] for astat in stats] for col in
range(len(self.univariate['obs'][2]))]
if (orientation == 'varcols') or \
(orientation == 'auto' and len(stubs) < len(header)):
#swap rows and columns
data = lmap(lambda *row: list(row), *data)
header, stubs = stubs, header
part_fmt = dict(data_fmts = ["%#8.4g"]*(len(header)-1))
table = SimpleTable(data,
header,
stubs,
title=title,
txt_fmt = part_fmt)
return table
开发者ID:bashtage,项目名称:statsmodels,代码行数:22,代码来源:descriptivestats.py
示例19: len
## Try with a pandas series
import pandas
import scikits.timeseries as ts
d1 = ts.Date(year=1700, freq='A')
#NOTE: have to have yearBegin offset for annual data until parser rewrite
#should this be up to the user, or should it be done in TSM init?
#NOTE: not anymore, it's end of year now
ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
pandas_dr = pandas.DateRange(start=d1.datetime,
periods=len(sunspots.endog), timeRule='[email protected]')
#pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)
dates = np.arange(1700, 1700 + len(sunspots.endog))
dates = ts.date_array(dates, freq='A')
#sunspots = pandas.Series(sunspots.endog, index=dates)
#NOTE: pandas only does business days for dates it looks like
import datetime
dt_dates = np.asarray(lmap(datetime.datetime.fromordinal,
ts_dr.toordinal().astype(int)))
sunspots = pandas.Series(sunspots.endog, index=dt_dates)
#NOTE: pandas can't handle pre-1900 dates
mod = AR(sunspots, freq='A')
res = mod.fit(method='mle', maxlag=9)
# some data for an example in Box Jenkins
IBM = np.asarray([460, 457, 452, 459, 462, 459, 463, 479, 493, 490.])
w = np.diff(IBM)
theta = .5
开发者ID:0ceangypsy,项目名称:statsmodels,代码行数:30,代码来源:ar_model.py
示例20: lstsq
def lstsq(a, b, cond=None, overwrite_a=0, overwrite_b=0):
"""Compute least-squares solution to equation :m:`a x = b`
Compute a vector x such that the 2-norm :m:`|b - a x|` is minimised.
Parameters
----------
a : array, shape (M, N)
b : array, shape (M,) or (M, K)
cond : float
Cutoff for 'small' singular values; used to determine effective
rank of a. Singular values smaller than rcond*largest_singular_value
are considered zero.
overwrite_a : boolean
Discard data in a (may enhance performance)
overwrite_b : boolean
Discard data in b (may enhance performance)
Returns
-------
x : array, shape (N,) or (N, K) depending on shape of b
Least-squares solution
residues : array, shape () or (1,) or (K,)
Sums of residues, squared 2-norm for each column in :m:`b - a x`
If rank of matrix a is < N or > M this is an empty array.
If b was 1-d, this is an (1,) shape array, otherwise the shape is (K,)
rank : integer
Effective rank of matrix a
s : array, shape (min(M,N),)
Singular values of a. The condition number of a is abs(s[0]/s[-1]).
Raises LinAlgError if computation does not converge
"""
a1, b1 = lmap(asarray_chkfinite, (a, b))
if a1.ndim != 2:
raise ValueError('expected matrix')
m, n = a1.shape
if b1.ndim == 2:
nrhs = b1.shape[1]
else:
nrhs = 1
if m != b1.shape[0]:
raise ValueError('incompatible dimensions')
gelss, = get_lapack_funcs(('gelss',), (a1, b1))
if n > m:
# need to extend b matrix as it will be filled with
# a larger solution matrix
b2 = zeros((n, nrhs), dtype=gelss.dtype)
if b1.ndim == 2:
b2[:m, :] = b1
else:
b2[:m, 0] = b1
b1 = b2
overwrite_a = overwrite_a or (a1 is not a and not hasattr(a, '__array__'))
overwrite_b = overwrite_b or (b1 is not b and not hasattr(b, '__array__'))
if gelss.module_name[:7] == 'flapack':
lwork = calc_lwork.gelss(gelss.prefix, m, n, nrhs)[1]
v, x, s, rank, info = gelss(a1, b1, cond=cond, lwork=lwork,
overwrite_a=overwrite_a,
overwrite_b=overwrite_b)
else:
raise NotImplementedError('calling gelss from %s' %
gelss.module_name)
if info > 0:
raise LinAlgError("SVD did not converge in Linear Least Squares")
if info < 0:
raise ValueError('illegal value in %-th argument of '
'internal gelss' % -info)
resids = asarray([], dtype=x.dtype)
if n < m:
x1 = x[:n]
if rank == n:
resids = sum(x[n:]**2, axis=0)
x = x1
return x, resids, rank, s
开发者ID:DevSinghSachan,项目名称:statsmodels,代码行数:76,代码来源:linalg.py
注:本文中的statsmodels.compat.python.lmap函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论