本文整理汇总了Python中sklearn.ensemble.IsolationForest类的典型用法代码示例。如果您正苦于以下问题:Python IsolationForest类的具体用法?Python IsolationForest怎么用?Python IsolationForest使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IsolationForest类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _predict_self
def _predict_self(self):
clf = IsolationForest(contamination=self.frac)
clf.fit(self.num_X)
return clf.predict(self.num_X)
开发者ID:xiangnanyue,项目名称:Pyod,代码行数:7,代码来源:pyador.py
示例2: test_iforest_sparse
def test_iforest_sparse():
"""Check IForest for various parameter settings on sparse input."""
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"bootstrap": [True, False]})
for sparse_format in [csc_matrix, csr_matrix]:
X_train_sparse = sparse_format(X_train)
X_test_sparse = sparse_format(X_test)
for params in grid:
# Trained on sparse format
sparse_classifier = IsolationForest(
n_estimators=10, random_state=1, **params).fit(X_train_sparse)
sparse_results = sparse_classifier.predict(X_test_sparse)
# Trained on dense format
dense_classifier = IsolationForest(
n_estimators=10, random_state=1, **params).fit(X_train)
dense_results = dense_classifier.predict(X_test)
assert_array_equal(sparse_results, dense_results)
assert_array_equal(sparse_results, dense_results)
开发者ID:AndyMelendezCuesta,项目名称:scikit-learn,代码行数:26,代码来源:test_iforest.py
示例3: outlier_rejection
def outlier_rejection(X, y):
model = IsolationForest(max_samples=100,
contamination=0.4,
random_state=rng)
model.fit(X)
y_pred = model.predict(X)
return X[y_pred == 1], y[y_pred == 1]
开发者ID:zzhhoubin,项目名称:imbalanced-learn,代码行数:7,代码来源:plot_outlier_rejections.py
示例4: test_iforest_subsampled_features
def test_iforest_subsampled_features():
# It tests non-regression for #5732 which failed at predict.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng)
clf = IsolationForest(max_features=0.8)
clf.fit(X_train, y_train)
clf.predict(X_test)
开发者ID:perimosocordiae,项目名称:scikit-learn,代码行数:7,代码来源:test_iforest.py
示例5: outlier_rejection
def outlier_rejection(X, y):
"""This will be our function used to resample our dataset."""
model = IsolationForest(max_samples=100,
contamination=0.4,
random_state=rng)
model.fit(X)
y_pred = model.predict(X)
return X[y_pred == 1], y[y_pred == 1]
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:8,代码来源:plot_outlier_rejections.py
示例6: IsolationForest_calulate
def IsolationForest_calulate(train_data_one,test_data):
# 使用异常检测方法
clf = IsolationForest()
# 训练异常检测模型
clf.fit(train_data_one)
# 模型预测
Pre_result = clf.predict(test_data)
# 计算多少个概率
prob = len([x for x in Pre_result if x == 1])/len(Pre_result)
return prob
开发者ID:Ayo616,项目名称:KDD-workshop-second,代码行数:10,代码来源:ITPA.py
示例7: test_iforest_works
def test_iforest_works():
# toy sample (the last two samples are outliers)
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
# Test LOF
clf = IsolationForest(random_state=rng)
clf.fit(X)
pred = clf.predict(X)
# assert detect outliers:
assert_greater(np.min(pred[-2:]), np.max(pred[:-2]))
开发者ID:ElDeveloper,项目名称:scikit-learn,代码行数:11,代码来源:test_iforest.py
示例8: test_iforest_works
def test_iforest_works(contamination):
# toy sample (the last two samples are outliers)
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
# Test IsolationForest
clf = IsolationForest(random_state=rng, contamination=contamination)
clf.fit(X)
decision_func = -clf.decision_function(X)
pred = clf.predict(X)
# assert detect outliers:
assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
assert_array_equal(pred, 6 * [1] + 2 * [-1])
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:12,代码来源:test_iforest.py
示例9: fit
def fit(self, X, y=None):
"""Fit detector. y is optional for unsupervised methods.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples.
y : numpy array of shape (n_samples,), optional (default=None)
The ground truth of the input samples (labels).
"""
# validate inputs X and y (optional)
X = check_array(X)
self._set_n_classes(y)
self.detector_ = IsolationForest(n_estimators=self.n_estimators,
max_samples=self.max_samples,
contamination=self.contamination,
max_features=self.max_features,
bootstrap=self.bootstrap,
n_jobs=self.n_jobs,
random_state=self.random_state,
verbose=self.verbose)
self.detector_.fit(X=X,
y=None,
sample_weight=None)
# invert decision_scores_. Outliers comes with higher outlier scores.
self.decision_scores_ = invert_order(
self.detector_.decision_function(X))
self._process_decision_scores()
return self
开发者ID:flaviassantos,项目名称:pyod,代码行数:32,代码来源:iforest.py
示例10: isolationForest
def isolationForest(self, settings, mname, data):
'''
:param settings: -> settings dictionary
:param mname: -> name of serialized cluster
:return: -> isolation forest instance
:example settings: -> {n_estimators:100, max_samples:100, contamination:0.1, bootstrap:False,
max_features:1.0, n_jobs:1, random_state:None, verbose:0}
'''
# rng = np.random.RandomState(42)
if settings['random_state'] == 'None':
settings['random_state'] = None
if isinstance(settings['bootstrap'], str):
settings['bootstrap'] = str2Bool(settings['bootstrap'])
if isinstance(settings['verbose'], str):
settings['verbose'] = str2Bool(settings['verbose'])
if settings['max_samples'] != 'auto':
settings['max_samples'] = int(settings['max_samples'])
# print type(settings['max_samples'])
for k, v in settings.iteritems():
logger.info('[%s] : [INFO] IsolationForest %s set to %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v)
print "IsolationForest %s set to %s" % (k, v)
try:
clf = IsolationForest(n_estimators=int(settings['n_estimators']), max_samples=settings['max_samples'], contamination=float(settings['contamination']), bootstrap=settings['bootstrap'],
max_features=float(settings['max_features']), n_jobs=int(settings['n_jobs']), random_state=settings['random_state'], verbose=settings['verbose'])
except Exception as inst:
logger.error('[%s] : [ERROR] Cannot instanciate isolation forest with %s and %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
print "Error while instanciating isolation forest with %s and %s" % (type(inst), inst.args)
sys.exit(1)
# clf = IsolationForest(max_samples=100, random_state=rng)
# print "*&*&*&& %s" % type(data)
try:
clf.fit(data)
except Exception as inst:
logger.error('[%s] : [ERROR] Cannot fit isolation forest model with %s and %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
sys.exit(1)
predict = clf.predict(data)
print "Anomaly Array:"
print predict
self.__serializemodel(clf, 'isoforest', mname)
return clf
开发者ID:igabriel85,项目名称:dmon-adp,代码行数:46,代码来源:dmonscilearncluster.py
示例11: test_score_samples
def test_score_samples():
X_train = [[1, 1], [1, 2], [2, 1]]
clf1 = IsolationForest(contamination=0.1).fit(X_train)
clf2 = IsolationForest().fit(X_train)
assert_array_equal(clf1.score_samples([[2., 2.]]),
clf1.decision_function([[2., 2.]]) + clf1.offset_)
assert_array_equal(clf2.score_samples([[2., 2.]]),
clf2.decision_function([[2., 2.]]) + clf2.offset_)
assert_array_equal(clf1.score_samples([[2., 2.]]),
clf2.score_samples([[2., 2.]]))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:10,代码来源:test_iforest.py
示例12: predict
def predict(self, X, window=DEFAULT_WINDOW):
"""
Predict if a particular sample is an outlier or not.
:param X: the time series to detect of
:param type X: pandas.Series
:param window: the length of window
:param type window: int
:return: 1 denotes normal, 0 denotes abnormal.
"""
x_train = list(range(0, 2 * window + 1)) + list(range(0, 2 * window + 1)) + list(range(0, window + 1))
sample_features = zip(x_train, X)
clf = IsolationForest(self.n_estimators, self.max_samples, self.contamination, self.max_feature, self.bootstrap, self.n_jobs, self.random_state, self.verbose)
clf.fit(sample_features)
predict_res = clf.predict(sample_features)
if predict_res[-1] == -1:
return 0
return 1
开发者ID:lixuefeng123,项目名称:Metis,代码行数:18,代码来源:isolation_forest.py
示例13: test_iforest_parallel_regression
def test_iforest_parallel_regression():
"""Check parallel regression."""
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng)
ensemble = IsolationForest(n_jobs=3, random_state=0).fit(X_train)
ensemble.set_params(n_jobs=1)
y1 = ensemble.predict(X_test)
ensemble.set_params(n_jobs=2)
y2 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y2)
ensemble = IsolationForest(n_jobs=1, random_state=0).fit(X_train)
y3 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y3)
开发者ID:AyushKesar,项目名称:scikit-learn,代码行数:18,代码来源:test_iforest.py
示例14: test_iforest_performance
def test_iforest_performance():
"""Test Isolation Forest performs well"""
# Generate train/test data
rng = check_random_state(2)
X = 0.3 * rng.randn(120, 2)
X_train = np.r_[X + 2, X - 2]
X_train = X[:100]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
X_test = np.r_[X[100:], X_outliers]
y_test = np.array([0] * 20 + [1] * 20)
# fit the model
clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)
# predict scores (the lower, the more normal)
y_pred = - clf.decision_function(X_test)
# check that there is at most 6 errors (false positive or false negative)
assert_greater(roc_auc_score(y_test, y_pred), 0.98)
开发者ID:AndyMelendezCuesta,项目名称:scikit-learn,代码行数:22,代码来源:test_iforest.py
示例15: test_iforest_warm_start
def test_iforest_warm_start():
"""Test iterative addition of iTrees to an iForest """
rng = check_random_state(0)
X = rng.randn(20, 2)
# fit first 10 trees
clf = IsolationForest(n_estimators=10, max_samples=20,
random_state=rng, warm_start=True)
clf.fit(X)
# remember the 1st tree
tree_1 = clf.estimators_[0]
# fit another 10 trees
clf.set_params(n_estimators=20)
clf.fit(X)
# expecting 20 fitted trees and no overwritten trees
assert len(clf.estimators_) == 20
assert clf.estimators_[0] is tree_1
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:18,代码来源:test_iforest.py
示例16: outlier_removal
def outlier_removal(df, col, method, params):
if method == 'Isolation Forest':
do_outlier_removal = IsolationForest(**params)
if method == 'Local Outlier Factor':
do_outlier_removal = LocalOutlierFactor(**params)
else:
method == None
do_outlier_removal.fit(np.array(df[col]))
if method == 'Isolation Forest':
outlier_scores = do_outlier_removal.decision_function(np.array(df[col]))
df[('meta', 'Outlier Scores - ' + method + str(params))] = outlier_scores
is_outlier = do_outlier_removal.predict(np.array(df[col]))
df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
if method == 'Local Outlier Factor':
is_outlier = do_outlier_removal.fit_predict(np.array(df[col]))
df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
df[('meta', 'Outlier Factor - ' + method + str(params))] = do_outlier_removal.negative_outlier_factor_
return df, do_outlier_removal
开发者ID:USGS-Astrogeology,项目名称:PySAT,代码行数:18,代码来源:outlier_removal.py
示例17: in
#add the class column back in (it wasn't featurized by itself)
featureMatrix['class'] = enhancedDf['class']
#randomly assign 3/4 of the feature df to training and 1/4 to test
featureMatrix['is_train'] = np.random.uniform(0, 1, len(featureMatrix)) <= .75
#split out the train and test df's into separate objects
train, test = featureMatrix[featureMatrix['is_train']==True], featureMatrix[featureMatrix['is_train']==False]
#drop the is_train column, we don't need it anymore
train = train.drop('is_train', axis=1)
test = test.drop('is_train', axis=1)
#create the isolation forest class and factorize the class column
clf = IsolationForest(n_estimators=opts.numtrees)
#train the isolation forest on the training set, dropping the class column (since the trainer takes that as a separate argument)
print('\nTraining')
clf.fit(train.drop('class', axis=1))
#remove the 'answers' from the test set
testnoclass = test.drop('class', axis=1)
print('\nPredicting (class 1 is normal, class -1 is malicious)')
#evaluate our results on the test set.
test.is_copy = False
test['prediction'] = clf.predict(testnoclass)
print
开发者ID:DavidJBianco,项目名称:Clearcut,代码行数:30,代码来源:train_flows_iforest.py
示例18: ohEncoding
vecData.index = data.index
if replace is True:
data = data.drop(cols, axis=1)
data = data.join(vecData)
return data, vecData, vec
df, t, v = ohEncoding(df, col, replace=True)
print "Shape after encoding"
print type(df.shape)
df_unlabeled = df.drop("Anomaly", axis=1)
print "Shape of the dataframe without anomaly column: "
print df_unlabeled.shape
clf = IsolationForest(max_samples=6444, verbose=1, n_jobs=-1, contamination=0.255555
, bootstrap=True, max_features=9)
clf.fit(df_unlabeled)
pred = clf.predict(df_unlabeled)
# print type(pred)
# print data.shape
# print len(pred)
# print pred
anomalies = np.argwhere(pred == -1)
normal = np.argwhere(pred == 1)
# print anomalies
# print type(anomalies)
df['ISO1'] = pred
# iterate over rows
nLabAno = 0
开发者ID:igabriel85,项目名称:dmon-adp,代码行数:32,代码来源:CEP_Exp_Two.py
示例19: pairwise_distances
speed.drop(['vehicle_id', 'ride_id', 'type'], axis = 1, inplace = True)
# merge
vehicle = pd.merge(rpm, speed, how = 'outer', on = 'timestamp')
# drop null values and zero speeds --> neutral gear
# speed < 200 to remove outliers
vh = vehicle.dropna(axis = 0)
vh = vh[(vh['rpm'] > 0) & ((vh['speed'] > 0) & (vh['speed'] < 200))]
# detect outliers using IsolationForest
# assume contamination at 0.01 level
distances = pairwise_distances(vh[['rpm','speed']],vh[['rpm','speed']], metric = 'cosine')
clf = IsolationForest(max_samples = 100, contamination = 0.01, verbose = 1)
clf.fit(distances)
labels = clf.predict(distances)
vh['outlier'] = labels
# remove outliers found by IsolationForest
vh = vh[['rpm','speed']][vh['outlier'] == 1]
#recompute distances after outlier removal
distances = pairwise_distances(vh[['rpm','speed']],vh[['rpm','speed']], metric = 'cosine')
# initialize variable to keep best model, its silhouette score and predicted labels
best_model = (None, -1, None)
# iterate over possible number of gears
# since we want to pick model with best silhouette score, can't start with single cluster (k=1)
开发者ID:PrzemekPobrotyn,项目名称:gear_prediction,代码行数:31,代码来源:first_approach.py
示例20: IsolationForest
# ## Improving the Predicition model ##
# This part is about finding a better metric for predicting future house sales regarding their price.
#
# First, I will detect outliers and delete them from the dataset if needed.
# ### Detecting Outliers ###
# The first step to improve our learning behaviour is to find outliers and then remove them from the data set if needed.
# To detect outliers I will use the Isolation Forest Algorithm which is good for high-dimensional data sets as we have present here.
# In[ ]:
from sklearn.ensemble import IsolationForest
clf = IsolationForest(max_samples=100, random_state=rng)
clf.fit(df)
y = clf.predict(df)
print y
# ### Location based prices ###
# House prices don't only depend on the size of the house or amount of rooms, but are also really dependant on the location of said house. To get an idea how the position might impact my data I analyse the relationship between location and price in my dataset.
# In[ ]:
import gmaps
gmaps.configure(api_key="AIzaSyDPWAl8lcrK9q-tOkrl64sGkxDnbWz47Ko")
locations = df[["lat", "long"]]
prices = df["price"]
开发者ID:Sourge,项目名称:udacity,代码行数:29,代码来源:housing.py
注:本文中的sklearn.ensemble.IsolationForest类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论