本文整理汇总了Python中pylearn2.utils.string_utils.preprocess函数的典型用法代码示例。如果您正苦于以下问题:Python preprocess函数的具体用法?Python preprocess怎么用?Python preprocess使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了preprocess函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_preprocess
def test_preprocess():
"""
Tests that `preprocess` fills in environment variables using various
interfaces and raises a ValueError if a needed environment variable
definition is missing.
"""
try:
keys = ["PYLEARN2_" + str(uuid.uuid1())[:8] for _ in xrange(3)]
strs = ["${%s}" % k for k in keys]
os.environ[keys[0]] = keys[1]
# Test with os.environ only.
assert preprocess(strs[0]) == keys[1]
# Test with provided dict only.
assert preprocess(strs[1], environ={keys[1]: keys[2]}) == keys[2]
# Provided overrides os.environ.
assert preprocess(strs[0], environ={keys[0]: keys[2]}) == keys[2]
raised = False
try:
preprocess(strs[2], environ={keys[1]: keys[0]})
except ValueError:
raised = True
assert raised
finally:
for key in keys:
if key in os.environ:
del os.environ[key]
开发者ID:123fengye741,项目名称:pylearn2,代码行数:27,代码来源:test_string_utils.py
示例2: get_key
def get_key(config_file = '${HOME}/.key_chain'):
"""
read and returns auth key from config file
"""
config_file = preprocess(config_file)
f = open(config_file)
config = ConfigParser.RawConfigParser()
config.read(preprocess(config_file))
return config.get('mashape', 'key')
开发者ID:LeonBai,项目名称:lisa_emotiw-1,代码行数:10,代码来源:face_api.py
示例3: __init__
def __init__(self):
default_path = "${PYLEARN2_DATA_PATH}"
local_path = "${PYLEARN2_LOCAL_DATA_PATH}"
self.pid = os.getpid()
try:
self.dataset_remote_dir = string_utils.preprocess(default_path)
self.dataset_local_dir = string_utils.preprocess(local_path)
except (ValueError, string_utils.NoDataPathError, string_utils.EnvironmentVariableError):
# Local cache seems to be deactivated
self.dataset_remote_dir = ""
self.dataset_local_dir = ""
开发者ID:syhw,项目名称:pylearn2,代码行数:12,代码来源:cache.py
示例4: __init__
def __init__(self, whichset, path=None):
# here, final refers to the unlabled images from which
# we should make predictions (images_test_rev1)
# the train/test/valid sets come from images_training_rev1
# bigtrain is just the whole unsplit images_traininng_rev1
assert whichset in ['train','test','valid','final','bigtrain']
self.whichset = whichset
# this is the final desired shape
# the original shape is 424, 424
self.img_shape = (100,100,3)
self.target_shape = (37,)
if path is None:
path = '${PYLEARN2_DATA_PATH}/galaxy-data/'
# load data
path = preprocess(path)
file_n = "{}_arrays.h5".format(os.path.join(path, "h5", whichset))
if os.path.isfile(file_n):
# just open file
self.h5file = tables.openFile(file_n, mode='r')
else:
# create file and fill with data
self.first_time(whichset, path, file_n)
#axes=('b', 0, 1, 'c') # not sure what this means
#view_converter = DefaultViewConverter((100, 100, 3), axes)
super(galaxy_zoo_dataset, self).__init__(X=root.images, y=root.targets,
axes=axes)
开发者ID:nhnifong,项目名称:Ed-galaxy-zoo,代码行数:30,代码来源:make_dataset.py
示例5: __init__
def __init__(self, path, n_labels=2, start=None, stop=None, del_raw=True, x_only=False):
self.del_raw = del_raw
path = preprocess(path)
x, y = CSVDataset._load_data(path, del_raw=del_raw)
if np.isnan(np.min(y)):
y = None
else:
y = y.astype(int).reshape(-1, 1)
if start is not None:
if stop is None:
stop = x.shape[0]
assert start >= 0
assert start < stop
if not (stop <= x.shape[0]):
raise ValueError("stop must be less than the # of examples but " +
"stop is " + str(stop) + " and there are " + str(x.shape[0]) +
" examples.")
x = x[start:stop, :]
if y is not None:
y = y[start:stop, :]
if x_only:
y = None
n_labels = None
super(CSVDataset, self).__init__(X=x, y=y, y_labels=n_labels)
开发者ID:ChenglongChen,项目名称:avito_context_click_2015,代码行数:28,代码来源:train_pylearn.py
示例6: load
def load(filepath, recurse_depth=0, retry=True):
"""
Parameters
----------
filepath : str
A path to a file to load. Should be a pickle, Matlab, or NumPy
file.
recurse_depth : int
End users should not use this argument. It is used by the function
itself to implement the `retry` option recursively.
retry : bool
If True, will make a handful of attempts to load the file before
giving up. This can be useful if you are for example calling
show_weights.py on a file that is actively being written to by a
training script--sometimes the load attempt might fail if the
training script writes at the same time show_weights tries to
read, but if you try again after a few seconds you should be able
to open the file.
Returns
-------
loaded_object : object
The object that was stored in the file.
..todo
Refactor to hide recurse_depth from end users
"""
try:
import joblib
joblib_available = True
except ImportError:
joblib_available = False
if recurse_depth == 0:
filepath = preprocess(filepath)
if filepath.endswith(".npy") or filepath.endswith(".npz"):
return np.load(filepath)
if filepath.endswith(".mat"):
global io
if io is None:
import scipy.io
io = scipy.io
try:
return io.loadmat(filepath)
except NotImplementedError, nei:
if str(nei).find("HDF reader") != -1:
global hdf_reader
if hdf_reader is None:
import h5py
hdf_reader = h5py
return hdf_reader.File(filepath)
else:
raise
# this code should never be reached
assert False
开发者ID:pangyuteng,项目名称:chalearn2014,代码行数:60,代码来源:serial.py
示例7: __init__
def __init__(self, dataset, model, algorithm=None, save_path=None,
save_freq=0, extensions=None, allow_overwrite=True):
"""
Construct a Train instance.
Parameters
----------
dataset : `pylearn2.datasets.dataset.Dataset`
model : `pylearn2.models.model.Model`
algorithm : <Optional>
`pylearn2.training_algorithms.training_algorithm.TrainingAlgorithm`
save_path : <Optional> str
Path to save (with pickle / joblib) the model.
save_freq : <Optional> int
Frequency of saves, in epochs. A frequency of zero disables
automatic saving altogether. A frequency of 1 saves every
epoch. A frequency of 2 saves every other epoch, etc.
(default=0, i.e. never save). Note: when automatic saving is
enabled (eg save_freq > 0), the model is always saved after
learning, even when the final epoch is not a multiple of
`save_freq`.
extensions : <Optional> iterable
A collection of `TrainExtension` objects whose callbacks are
triggered at various points in learning.
allow_overwrite : <Optional> bool
If `True`, will save the model to save_path even if there is already
something there. Otherwise, will raise an error if the `save_path`
is already occupied.
"""
self.allow_overwrite = allow_overwrite
self.first_save = True
self.dataset = dataset
self.model = model
self.algorithm = algorithm
if save_path is not None:
if save_freq == 0:
warnings.warn('save_path specified but save_freq is 0 '
'(never save). Is this intentional?')
self.save_path = preprocess(save_path)
else:
if save_freq > 0:
phase_variable = 'PYLEARN2_TRAIN_PHASE'
if phase_variable in os.environ:
phase = 'phase%d' % os.environ[phase_variable]
tokens = [os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'],
phase, 'pkl']
else:
tokens = os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'], 'pkl'
self.save_path = '.'.join(tokens)
self.save_freq = save_freq
if hasattr(self.dataset, 'yaml_src'):
self.model.dataset_yaml_src = self.dataset.yaml_src
else:
warnings.warn("dataset has no yaml src, model won't know what " +
"data it was trained on")
self.extensions = extensions if extensions is not None else []
self.training_seconds = sharedX(value=0, name='training_seconds_this_epoch')
self.total_seconds = sharedX(value=0, name='total_seconds_last_epoch')
开发者ID:fancyspeed,项目名称:pylearn2,代码行数:60,代码来源:train.py
示例8: __enter__
def __enter__(self):
if isinstance(self._f, basestring):
self._f = preprocess(self._f)
self._handle = open(self._f, self._mode, self._buffering)
else:
self._handle = self._f
return self._handle
开发者ID:deigen,项目名称:pylearn,代码行数:7,代码来源:mnist_ubyte.py
示例9: main
def main():
base = '${PYLEARN2_DATA_PATH}/esp_game/ESPGame100k/labels/'
base = preprocess(base)
paths = sorted(os.listdir(base))
assert len(paths) == 100000
words = {}
for i, path in enumerate(paths):
if i % 1000 == 0:
print(i)
path = base+path
f = open(path, 'r')
lines = f.readlines()
for line in lines:
word = line[: -1]
if word not in words:
words[word] = 1
else:
words[word] += 1
ranked_words = sorted(words.keys(), key=lambda x: -words[x])
ranked_words = [word_ + '\n' for word_ in ranked_words[0:4000]]
f = open('wordlist.txt', 'w')
f.writelines(ranked_words)
f.close()
开发者ID:123fengye741,项目名称:pylearn2,代码行数:29,代码来源:make_wordlist.py
示例10: load
def load(filepath, recurse_depth=0):
try:
import joblib
joblib_available = True
except ImportError:
joblib_available = False
if recurse_depth == 0:
filepath = preprocess(filepath)
if filepath.endswith('.npy'):
return np.load(filepath)
if filepath.endswith('.mat'):
global io
if io is None:
import scipy.io
io = scipy.io
try:
return io.loadmat(filepath)
except NotImplementedError, nei:
if str(nei).find('HDF reader') != -1:
global hdf_reader
if hdf_reader is None:
import h5py
hdf_reader = h5py
return hdf_reader.File(filepath)
else:
raise
#this code should never be reached
assert False
开发者ID:gdesjardins,项目名称:pylearn,代码行数:31,代码来源:serial.py
示例11: load_ndarray_label
def load_ndarray_label(name):
"""
Load the train,valid,test label data for the dataset `name` and return it
in ndarray format. This is only available for the toy dataset ule.
Parameters
----------
name : 'ule'
Must be 'ule'
Returns
-------
train_l. valid_l, test_l : ndarray
Label data loaded
"""
assert name in ['ule']
common_path = os.path.join(
preprocess('${PYLEARN2_DATA_PATH}'), 'UTLC', 'filetensor', name + '_')
trname, vname, tename = [common_path + subset + '.tf'
for subset in ['trainl', 'validl', 'testl']]
trainl = load_filetensor(trname)
validl = load_filetensor(vname)
testl = load_filetensor(tename)
return trainl, validl, testl
开发者ID:123fengye741,项目名称:pylearn2,代码行数:27,代码来源:utlc.py
示例12: __init__
def __init__(self, dataset, model, algorithm=None, save_path=None,
save_freq=0, extensions=None, allow_overwrite=True):
self.allow_overwrite = allow_overwrite
self.first_save = True
self.dataset = dataset
self.model = model
self.algorithm = algorithm
if save_path is not None:
if save_freq == 0:
warnings.warn('save_path specified but save_freq is 0 '
'(never save). Is this intentional?')
self.save_path = preprocess(save_path)
else:
if save_freq > 0:
phase_variable = 'PYLEARN2_TRAIN_PHASE'
if phase_variable in os.environ:
phase = 'phase%d' % os.environ[phase_variable]
tokens = [os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'],
phase, 'pkl']
else:
tokens = os.environ['PYLEARN2_TRAIN_FILE_FULL_STEM'], 'pkl'
self.save_path = '.'.join(tokens)
self.save_freq = save_freq
if hasattr(self.dataset, 'yaml_src'):
self.model.dataset_yaml_src = self.dataset.yaml_src
else:
warnings.warn("dataset has no yaml src, model won't know what " +
"data it was trained on")
self.extensions = extensions if extensions is not None else []
self.training_seconds = sharedX(value=0,
name='training_seconds_this_epoch')
self.total_seconds = sharedX(value=0, name='total_seconds_last_epoch')
开发者ID:Bowen-C,项目名称:pylearn2,代码行数:34,代码来源:train.py
示例13: Transform
def Transform():
"""Test smaller version of convolutional_network.ipynb"""
which_experiment = "S100"
skip.skip_if_no_data()
yaml_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
data_dir = string_utils.preprocess("${PYLEARN2_DATA_PATH}")
save_path = os.path.join(data_dir, "cifar10", "experiment_" + string.lower(which_experiment))
base_save_path = os.path.join(data_dir, "cifar10")
# Escape potential backslashes in Windows filenames, since
# they will be processed when the YAML parser will read it
# as a string
# save_path.replace('\\', r'\\')
yaml = open("{0}/experiment_base_transform.yaml".format(yaml_file_path), "r").read()
hyper_params = {
"batch_size": 64,
"output_channels_h1": 64,
"output_channels_h2": 128,
"output_channels_h3": 600,
"max_epochs": 100,
"save_path": save_path,
"base_save_path": base_save_path,
}
yaml = yaml % (hyper_params)
train = yaml_parse.load(yaml)
train.main_loop()
开发者ID:CKehl,项目名称:pylearn2,代码行数:26,代码来源:train_experiment_base.py
示例14: __init__
def __init__(self,
path = 'train.csv',
one_hot = False,
expect_labels = True,
expect_headers = True,
delimiter = ',',
col_number = 10):
"""
.. todo::
WRITEME
"""
self.path = path
self.one_hot = one_hot
self.expect_labels = expect_labels
self.expect_headers = expect_headers
self.delimiter = delimiter
self.col_number = col_number
self.view_converter = None
# and go
self.path = preprocess(self.path)
X, y = self._load_data()
super(CSVModified, self).__init__(X=X, y=y)
开发者ID:lluiscastrejonsubira,项目名称:Network-Oracle,代码行数:27,代码来源:csv_modified.py
示例15: __init__
def __init__(self, save_dir):
PYLEARN2_TRAIN_DIR = preprocess('${PYLEARN2_TRAIN_DIR}')
PYLEARN2_TRAIN_BASE_NAME = preprocess('${PYLEARN2_TRAIN_BASE_NAME}')
src = os.path.join(PYLEARN2_TRAIN_DIR, PYLEARN2_TRAIN_BASE_NAME)
dst = os.path.join(save_dir, PYLEARN2_TRAIN_BASE_NAME)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if os.path.exists(save_dir) and not os.path.isdir(save_dir):
raise IOError("save path %s exists, not a directory" % save_dir)
elif not os.access(save_dir, os.W_OK):
raise IOError("permission error creating %s" % dst)
with log_timing(log, 'copying yaml from {} to {}'.format(src, dst)):
copyfile(src, dst)
开发者ID:Qi0116,项目名称:deepthought,代码行数:16,代码来源:util.py
示例16: __init__
def __init__(self,
path='../filtered-seizure-data', # base directory, location of directories of filtered hkl files
target='Dog_1', # target is added bot to the path and as a prefix to each file name
one_hot=False,
scale_option='usf',
nwindows=60,
skip=5,
window_size=None,
expect_labels = True):
"""
.. todo::
WRITEME
"""
self.path = path
self.target = target
self.one_hot = one_hot
self.scale_option = scale_option
self.nwindows = nwindows
self.expect_labels = expect_labels
self.skip = skip
self.view_converter = None
self.Nsamples = 239766 # 10 min at 399.61 Hz
if window_size is None:
self.window_size = self.Nsamples // self.nwindows
else:
self.window_size = window_size
# and go
self.path = preprocess(self.path)
X, y = self._load_data()
super(MyPyLearn2Dataset, self).__init__(X=X, y=y)
开发者ID:udibr,项目名称:seizure-prediction,代码行数:35,代码来源:my_pylearn2_dataset.py
示例17: _unpickle
def _unpickle(cls, file):
"""
.. todo::
What is this? why not just use serial.load like the CIFAR-100
class? Whoever wrote it shows up as "unknown" in git blame.
"""
from pylearn2.utils import string_utils
fname = os.path.join(string_utils.preprocess('${PYLEARN2_DATA_PATH}'),
'cifar10', 'cifar-10-batches-py', file)
# fname = os.path.join('/Users/karino-t/data/cifar10/cifar-10-batches-py',file)
if not os.path.exists(fname):
raise IOError(fname+" was not found. You probably need to "
"download the CIFAR-10 dataset by using the "
"download script in "
"pylearn2/scripts/datasets/download_cifar10.sh "
"or manually from "
"http://www.cs.utoronto.ca/~kriz/cifar.html")
fname = cache.datasetCache.cache_file(fname)
_logger.info('loading file %s' % fname)
fo = open(fname, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
开发者ID:Lupino,项目名称:nupic_image_recognition,代码行数:25,代码来源:cifar10.py
示例18: show
def show(image):
"""
Parameters
----------
image : PIL Image object or ndarray
If ndarray, integer formats are assumed to use 0-255
and float formats are assumed to use 0-1
"""
if hasattr(image, '__array__'):
#do some shape checking because PIL just raises a tuple indexing error
#that doesn't make it very clear what the problem is
if len(image.shape) < 2 or len(image.shape) > 3:
raise ValueError('image must have either 2 or 3 dimensions but its shape is '+str(image.shape))
if image.dtype == 'int8':
image = np.cast['uint8'](image)
elif str(image.dtype).startswith('float'):
#don't use *=, we don't want to modify the input array
image = image * 255.
image = np.cast['uint8'](image)
#PIL is too stupid to handle single-channel arrays
if len(image.shape) == 3 and image.shape[2] == 1:
image = image[:,:,0]
try:
ensure_Image()
image = Image.fromarray(image)
except TypeError:
raise TypeError("PIL issued TypeError on ndarray of shape " +
str(image.shape) + " and dtype " +
str(image.dtype))
try:
f = NamedTemporaryFile(mode='r', suffix='.png', delete=False)
except TypeError:
# before python2.7, we can't use the delete argument
f = NamedTemporaryFile(mode='r', suffix='.png')
"""
TODO: prior to python 2.7, NamedTemporaryFile has no delete = False
argument unfortunately, that means f.close() deletes the file. we then
save an image to the file in the next line, so there's a race condition
where for an instant we don't actually have the file on the filesystem
reserving the name, and then write to that name anyway
TODO: see if this can be remedied with lower level calls (mkstemp)
"""
warnings.warn('filesystem race condition')
name = f.name
f.flush()
f.close()
image.save(name)
viewer_command = string.preprocess('${PYLEARN2_VIEWER_COMMAND}')
if os.name == 'nt':
subprocess.Popen(viewer_command + ' ' + name +' && del ' + name, shell = True)
else:
subprocess.Popen(viewer_command + ' ' + name +' ; rm ' + name, shell = True)
开发者ID:SuperElectric,项目名称:pylearn2,代码行数:59,代码来源:image.py
示例19: __init__
def __init__(self, which_set,
base_path = '${PYLEARN2_DATA_PATH}/hoge',
start = None,
stop = None,
preprocessor = None,
fit_preprocessor = False,
axes = ('b', 0, 1, 'c'),
fit_test_preprocessor = False):
"""
which_set: A string specifying which portion of the dataset
to load. Valid values are 'train' or 'public_test'
base_path: The directory containing the .csv files from kaggle.com.
This directory should be writable; if the .csv files haven't
already been converted to npy, this class will convert them
to save memory the next time they are loaded.
fit_preprocessor: True if the preprocessor is allowed to fit the
data.
fit_test_preprocessor: If we construct a test set based on this
dataset, should it be allowed to fit the test set?
"""
self.test_args = locals()
self.test_args['which_set'] = 'public_test'
self.test_args['fit_preprocessor'] = fit_test_preprocessor
del self.test_args['start']
del self.test_args['stop']
del self.test_args['self']
files = {'train': 'train.csv', 'public_test' : 'test.csv'}
try:
filename = files[which_set]
except KeyError:
raise ValueError("Unrecognized dataset name: " + which_set)
path = base_path + '/' + filename
path = preprocess(path)
X, y = self._load_data(path, which_set == 'train')
if start is not None:
assert which_set != 'test'
assert isinstance(start, int)
assert isinstance(stop, int)
assert start >= 0
assert start < stop
assert stop <= X.shape[0]
X = X[start:stop, :]
if y is not None:
y = y[start:stop, :]
view_converter = DefaultViewConverter(shape=[48,48,1], axes=axes)
super(HogeDataset, self).__init__(X=X, y=y, view_converter=view_converter)
if preprocessor:
preprocessor.apply(self, can_fit=fit_preprocessor)
开发者ID:bemoregt,项目名称:grbm_sample,代码行数:59,代码来源:hoge_dataset.py
示例20: __init__
def __init__(self,
which_set = 'full',
path = 'train.mat',
one_hot = False,
colorspace = 'none',
step = 1,
start = None,
stop = None,
center = False,
rescale = False,
gcn = None,
toronto_prepro = False,
axes=('b', 0, 1, 'c')):
self.__dict__.update(locals())
del self.self
#
#self.one_hot = one_hot
#self.colorspace = colorspace
#self.step=step
#self.which_set=which_set
self.view_converter = None
self.path = preprocess(self.path)
X, y = self._load_data()
if center:
X -= 127.5
#self.center = center
if rescale:
X /= 127.5
#self.rescale = rescale
if toronto_prepro:
assert not center
assert not gcn
X = X / 255.
if which_set == 'test':
other = MATDATA(which_set='train')
oX = other.X
oX /= 255.
X = X - oX.mean(axis=0)
else:
X = X - X.mean(axis=0)
#self.toronto_prepro = toronto_prepro
#self.gcn = gcn
if gcn is not None:
gcn = float(gcn)
X = global_contrast_normalize(X, scale=gcn, min_divisor=1e-8)
view_converter = DefaultViewConverter((
self.windowSize,self.windowSize,self.channels), axes)
super(MATDATA, self).__init__(X=X, y=y, view_converter=view_converter)
开发者ID:mlisicki,项目名称:pylearn2,代码行数:58,代码来源:mat_data.py
注:本文中的pylearn2.utils.string_utils.preprocess函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论