• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python tests.locate函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tests.locate函数的典型用法代码示例。如果您正苦于以下问题:Python locate函数的具体用法?Python locate怎么用?Python locate使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了locate函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: smallcatGBM

def smallcatGBM():
  # Training set has 26 categories from A to Z
  # Categories A, C, E, G, ... are perfect predictors of y = 1
  # Categories B, D, F, H, ... are perfect predictors of y = 0

  
  

  #Log.info("Importing alphabet_cattest.csv data...\n")
  alphabet = h2o.import_file(path=tests.locate("smalldata/gbm_test/alphabet_cattest.csv"))
  alphabet["y"] = alphabet["y"].asfactor()
  #Log.info("Summary of alphabet_cattest.csv from H2O:\n")
  #alphabet.summary()

  # Prepare data for scikit use
  trainData = np.loadtxt(tests.locate("smalldata/gbm_test/alphabet_cattest.csv"), delimiter=',', skiprows=1,
                         converters={0:lambda s: ord(s.split("\"")[1])})
  trainDataResponse = trainData[:,1]
  trainDataFeatures = trainData[:,0]
  
  # Train H2O GBM Model:
  #Log.info("H2O GBM (Naive Split) with parameters:\nntrees = 1, max_depth = 1, nbins = 100\n")
  gbm_h2o = h2o.gbm(x=alphabet[['X']], y=alphabet["y"], distribution="bernoulli", ntrees=1, max_depth=1, nbins=100)
  gbm_h2o.show()
  
  # Train scikit GBM Model:
  # Log.info("scikit GBM with same parameters:")
  gbm_sci = ensemble.GradientBoostingClassifier(n_estimators=1, max_depth=1, max_features=None)
  gbm_sci.fit(trainDataFeatures[:,np.newaxis],trainDataResponse)
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:29,代码来源:pyunit_smallcatGBM.py


示例2: offsets_and_distributions

def offsets_and_distributions():

    # cars
    cars = h2o.upload_file(tests.locate("smalldata/junit/cars_20mpg.csv"))
    cars = cars[cars["economy_20mpg"].isna() == 0]
    cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
    offset = h2o.H2OFrame(python_obj=[[.5] for x in range(398)])
    offset.set_name(0,"x1")
    cars = cars.cbind(offset)

    # insurance
    insurance = h2o.import_file(tests.locate("smalldata/glm_test/insurance.csv"))
    insurance["offset"] = insurance["Holders"].log()

    # bernoulli - offset not supported
    #dl = h2o.deeplearning(x=cars[2:8], y=cars["economy_20mpg"], distribution="bernoulli", offset_column="x1",
    #                       training_frame=cars)
    #predictions = dl.predict(cars)

    # gamma
    dl = h2o.deeplearning(x=insurance[0:3], y=insurance["Claims"], distribution="gamma", offset_column="offset", training_frame=insurance)
    predictions = dl.predict(insurance)

    # gaussian
    dl = h2o.deeplearning(x=insurance[0:3], y=insurance["Claims"], distribution="gaussian", offset_column="offset", training_frame=insurance)
    predictions = dl.predict(insurance)

    # poisson
    dl = h2o.deeplearning(x=insurance[0:3], y=insurance["Claims"], distribution="poisson", offset_column="offset", training_frame=insurance)
    predictions = dl.predict(insurance)

    # tweedie
    dl = h2o.deeplearning(x=insurance.names[0:3], y="Claims", distribution="tweedie", offset_column="offset", training_frame=insurance)
    predictions = dl.predict(insurance)
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:34,代码来源:pyunit_offsets_and_distributionsDeeplearning.py


示例3: link_functions_gaussian

def link_functions_gaussian():
    
    

    print("Read in prostate data.")
    h2o_data = h2o.import_file(path=tests.locate("smalldata/prostate/prostate_complete.csv.zip"))
    h2o_data.head()

    sm_data = pd.read_csv(zipfile.ZipFile(tests.locate("smalldata/prostate/prostate_complete.csv.zip")).
                          open("prostate_complete.csv")).as_matrix()
    sm_data_response = sm_data[:,9]
    sm_data_features = sm_data[:,1:9]

    print("Testing for family: GAUSSIAN")
    print("Set variables for h2o.")
    myY = "GLEASON"
    myX = ["ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","DPROS"]

    print("Create models with canonical link: IDENTITY")
    h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gaussian", link="identity",alpha=[0.5], Lambda=[0])
    sm_model = sm.GLM(endog=sm_data_response, exog=sm_data_features,
                      family=sm.families.Gaussian(sm.families.links.identity)).fit()

    print("Compare model deviances for link function identity")
    h2o_deviance = h2o_model.residual_deviance() / h2o_model.null_deviance()
    sm_deviance = sm_model.deviance / sm_model.null_deviance
    assert h2o_deviance - sm_deviance < 0.01, "expected h2o to have an equivalent or better deviance measures"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:27,代码来源:pyunit_link_functions_gaussianGLM.py


示例4: link_functions_binomial

def link_functions_binomial():
  
  

  print("Read in prostate data.")
  h2o_data = h2o.import_file(path=tests.locate("smalldata/prostate/prostate_complete.csv.zip"))
  h2o_data.head()

  sm_data = pd.read_csv(zipfile.ZipFile(tests.locate("smalldata/prostate/prostate_complete.csv.zip")).open("prostate_complete.csv")).as_matrix()
  sm_data_response = sm_data[:,2]
  sm_data_features = sm_data[:,[1,3,4,5,6,7,8,9]]

  print("Testing for family: BINOMIAL")
  print("Set variables for h2o.")
  myY = "CAPSULE"
  myX = ["ID","AGE","RACE","GLEASON","DCAPS","PSA","VOL","DPROS"]

  print("Create models with canonical link: LOGIT")
  h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY].asfactor(), family="binomial", link="logit",alpha=[0.5], Lambda=[0])
  sm_model = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Binomial(sm.families.links.logit)).fit()

  print("Compare model deviances for link function logit")
  h2o_deviance = h2o_model.residual_deviance() / h2o_model.null_deviance()
  sm_deviance = sm_model.deviance / sm_model.null_deviance
  assert h2o_deviance - sm_deviance < 0.01, "expected h2o to have an equivalent or better deviance measures"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:25,代码来源:pyunit_link_functions_binomialGLM.py


示例5: frame_slicing

def frame_slicing():
    
    

    iris = h2o.import_file(path=tests.locate("smalldata/iris/iris_wheader.csv"))
    prostate = h2o.import_file(path=tests.locate("smalldata/prostate/prostate.csv.zip"))
    airlines = h2o.import_file(path=tests.locate("smalldata/airlines/allyears2k.zip"))
    iris.show()
    prostate.show()
    airlines.show()

    ###################################################################

    # H2OFrame[int] (column slice)
    res1 = iris[0]
    assert abs(res1[8,:] - 4.4) < 1e-10, "incorrect values"

    # H2OFrame[int,int]
    res2 = prostate[13, 3]
    assert abs(res2 - 1) < 1e-10, "incorrect values"

    # H2OFrame[int, slice]
    res3 = airlines[12, 0:3]
    assert abs(res3[0,0] - 1987) < 1e-10 and abs(res3[0,1] - 10) < 1e-10 and abs(res3[0,2] - 29) < 1e-10, \
        "incorrect values"

    # H2OFrame[slice, int]
    res4 = iris[5:8, 1]
    assert abs(res4[0,:] - 3.9) < 1e-10 and abs(res4[1,:] - 3.4) < 1e-10 and abs(res4[2,:] - 3.4) < 1e-10, "incorrect values"

    # H2OFrame[slice, slice]
    res5 = prostate[5:8, 0:3]
    assert abs(res5[0,0] - 6) < 1e-10 and abs(res5[1,1] - 0) < 1e-10 and abs(res5[2,2] - 61) < 1e-10, "incorrect values"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:33,代码来源:pyunit_frame_slicing.py


示例6: dim_checks

def dim_checks():
  
  

  # Log.info("Uploading logreg/princeton/cuse.dat")
  h2o_data = h2o.import_file(path=tests.locate("smalldata/logreg/prostate.csv"))
  np_data = np.loadtxt(tests.locate("smalldata/logreg/prostate.csv"), delimiter=',', skiprows=1)

  h2o_rows, h2o_cols = h2o_data.dim
  np_rows, np_cols = list(np_data.shape)

  print 'The dimensions of h2o frame is: {0} x {1}'.format(h2o_rows, h2o_cols)
  print 'The dimensions of numpy array is: {0} x {1}'.format(np_rows, np_cols)

  assert [h2o_rows, h2o_cols] == [np_rows, np_cols], "expected equal number of columns and rows"

  # Log.info("Slice out a column and data frame it, try dim on it...")

  h2o_slice = h2o_data[4]
  np_slice = np_data[:,4]

  h2o_rows, h2o_cols = h2o_slice.dim
  np_rows = np_slice.shape[0]

  print 'The dimensions of h2o column slice is: {0} x {1}'.format(h2o_rows, h2o_cols)
  print 'The dimensions of numpy array column slice is: {0} x 1'.format(np_rows)

  assert [h2o_rows, h2o_cols] == [np_rows, 1], "expected equal number of columns and rows"

  # Log.info("OK, now try an operator, e.g. '&', and then check dimensions agao...")

  h2oColAmpFive = h2o_slice & 5

  assert h2oColAmpFive.nrow == h2o_rows, "expected the number of rows to remain unchanged"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:34,代码来源:pyunit_dim.py


示例7: fiftycatGBM

def fiftycatGBM():
  
  

  # Training set has only 45 categories cat1 through cat45
  #Log.info("Importing 50_cattest_train.csv data...\n")
  train = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_train.csv"))
  train["y"] = train["y"].asfactor()

  #Log.info("Summary of 50_cattest_train.csv from H2O:\n")
  #train.summary()
  
  # Train H2O GBM Model:
  #Log.info(paste("H2O GBM with parameters:\nntrees = 10, max_depth = 20, nbins = 20\n", sep = ""))
  model = h2o.gbm(x=train[["x1","x2"]], y=train["y"], distribution="bernoulli", ntrees=10, max_depth=5, nbins=20)
  model.show()
 
  # Test dataset has all 50 categories cat1 through cat50
  #Log.info("Importing 50_cattest_test.csv data...\n")
  test = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_test.csv"))
  #Log.info("Summary of 50_cattest_test.csv from H2O:\n")
  #test.summary()
  
  # Predict on test dataset with GBM model:
  #Log.info("Performing predictions on test dataset...\n")
  predictions = model.predict(test)
  predictions.show()
  
  # Get the confusion matrix and AUC
  #Log.info("Confusion matrix of predictions (max accuracy):\n")
  performance = model.model_performance(test)
  test_cm = performance.confusion_matrix()
  test_auc = performance.auc()
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:33,代码来源:pyunit_fiftycatGBM.py


示例8: pubdev_1953

def pubdev_1953():

    # small_test = [tests.locate("bigdata/laptop/citibike-nyc/2013-10.csv")]
    # data = h2o.import_file(path=small_test)
    # startime = data["starttime"]
    # secsPerDay=1000*60*60*24
    # data["Days"] = (startime/secsPerDay).floor()
    # grouped = data.group_by(["Days","start station name"])
    # bpd = grouped.count(name="bikes").get_frame()
    # secs = bpd["Days"]*secsPerDay
    # bpd["Month"]     = secs.month().asfactor()
    # bpd["DayOfWeek"] = secs.dayOfWeek()
    # wthr1 = h2o.import_file(path=[tests.locate("bigdata/laptop/citibike-nyc/31081_New_York_City__Hourly_2013.csv"), tests.locate("bigdata/laptop/citibike-nyc/31081_New_York_City__Hourly_2014.csv")])
    # wthr2 = wthr1[["Year Local","Month Local","Day Local","Hour Local","Dew Point (C)","Humidity Fraction","Precipitation One Hour (mm)","Temperature (C)","Weather Code 1/ Description"]]
    # wthr2.set_name(wthr2.index("Precipitation One Hour (mm)"), "Rain (mm)")
    # wthr2.set_name(wthr2.index("Weather Code 1/ Description"), "WC1")
    # wthr3 = wthr2[ wthr2["Hour Local"]==12 ]
    # wthr3["msec"] = h2o.H2OFrame.mktime(year=wthr3["Year Local"], month=wthr3["Month Local"]-1, day=wthr3["Day Local"]-1, hour=wthr3["Hour Local"])
    # secsPerDay=1000*60*60*24
    # wthr3["Days"] = (wthr3["msec"]/secsPerDay).floor()
    # wthr4 = wthr3.drop("Year Local").drop("Month Local").drop("Day Local").drop("Hour Local").drop("msec")
    # rain = wthr4["Rain (mm)"]
    # rain[ rain.isna() ] = 0
    # bpd_with_weather = bpd.merge(wthr4,allLeft=True,allRite=False)
    # r = bpd_with_weather['Days'].runif(seed=356964763)
    # train = bpd_with_weather[  r  < 0.6]
    # test  = bpd_with_weather[(0.6 <= r) & (r < 0.9)]

    predictors = ['DayOfWeek', 'WC1', 'start station name', 'Temperature (C)', 'Days', 'Month', 'Humidity Fraction', 'Rain (mm)', 'Dew Point (C)']

    train = h2o.import_file(tests.locate("smalldata/glm_test/citibike_small_train.csv"))
    test = h2o.import_file(tests.locate("smalldata/glm_test/citibike_small_test.csv"))

    glm0 = h2o.glm(x=train[predictors], y=train["bikes"], validation_x=test[predictors], validation_y=test["bikes"], family="poisson")
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:34,代码来源:pyunit_NOPASS_pubdev_1953.py


示例9: wide_dataset_large

def wide_dataset_large():
    
    

    print("Reading in Arcene training data for binomial modeling.")
    trainDataResponse = np.genfromtxt(tests.locate("smalldata/arcene/arcene_train_labels.labels"), delimiter=' ')
    trainDataResponse = np.where(trainDataResponse == -1, 0, 1)
    trainDataFeatures = np.genfromtxt(tests.locate("smalldata/arcene/arcene_train.data"), delimiter=' ')
    trainData = h2o.H2OFrame(np.column_stack((trainDataResponse, trainDataFeatures)).tolist())

    print("Run model on 3250 columns of Arcene with strong rules off.")
    model = h2o.glm(x=trainData[1:3250], y=trainData[0].asfactor(), family="binomial", lambda_search=False, alpha=[1])

    print("Test model on validation set.")
    validDataResponse = np.genfromtxt(tests.locate("smalldata/arcene/arcene_valid_labels.labels"), delimiter=' ')
    validDataResponse = np.where(validDataResponse == -1, 0, 1)
    validDataFeatures = np.genfromtxt(tests.locate("smalldata/arcene/arcene_valid.data"), delimiter=' ')
    validData = h2o.H2OFrame(np.column_stack((validDataResponse, validDataFeatures)).tolist())
    prediction = model.predict(validData)

    print("Check performance of predictions.")
    performance = model.model_performance(validData)

    print("Check that prediction AUC better than guessing (0.5).")
    assert performance.auc() > 0.5, "predictions should be better then pure chance"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:25,代码来源:pyunit_wide_dataset_largeGLM.py


示例10: anomaly

def anomaly():
    

    print "Deep Learning Anomaly Detection MNIST"

    train = h2o.import_file(tests.locate("bigdata/laptop/mnist/train.csv.gz"))
    test = h2o.import_file(tests.locate("bigdata/laptop/mnist/test.csv.gz"))

    predictors = range(0,784)
    resp = 784

    # unsupervised -> drop the response column (digit: 0-9)
    train = train[predictors]
    test = test[predictors]

    # 1) LEARN WHAT'S NORMAL
    # train unsupervised Deep Learning autoencoder model on train_hex
    ae_model = h2o.deeplearning(x=train[predictors], training_frame=train, activation="Tanh", autoencoder=True,
                                hidden=[50], l1=1e-5, ignore_const_cols=False, epochs=1)

    # 2) DETECT OUTLIERS
    # anomaly app computes the per-row reconstruction error for the test data set
    # (passing it through the autoencoder model and computing mean square error (MSE) for each row)
    test_rec_error = ae_model.anomaly(test)

    # 3) VISUALIZE OUTLIERS
    # Let's look at the test set points with low/median/high reconstruction errors.
    # We will now visualize the original test set points and their reconstructions obtained
    # by propagating them through the narrow neural net.

    # Convert the test data into its autoencoded representation (pass through narrow neural net)
    test_recon = ae_model.predict(test)
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:32,代码来源:pyunit_anomaly_largeDeepLearning.py


示例11: group_by

def group_by():
    # Connect to a pre-existing cluster
    

    h2o_iris = h2o.import_file(path=tests.locate("smalldata/iris/iris_wheader.csv"))
    pd_iris = pd.read_csv(tests.locate("smalldata/iris/iris_wheader.csv"))

    na_handling = ["ignore","rm","all"]
    col_names = h2o_iris.col_names[0:4]

    print "Running smoke test"

    # smoke test
    for na in na_handling:
      grouped = h2o_iris.group_by("class")
      grouped \
        .count(na=na) \
        .min(  na=na) \
        .max(  na=na) \
        .mean( na=na) \
        .var(  na=na) \
        .sd(   na=na) \
        .ss(   na=na) \
        .sum(  na=na)
      print grouped.get_frame()
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:25,代码来源:pyunit_groupby.py


示例12: fiftycatRF

def fiftycatRF():
    
    

    # Training set has only 45 categories cat1 through cat45
    #Log.info("Importing 50_cattest_train.csv data...\n")
    train = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_train.csv"))
    train["y"] = train["y"].asfactor()

    #Log.info("Summary of 50_cattest_train.csv from H2O:\n")
    #train.summary()

    # Train H2O DRF Model:
    #Log.info(paste("H2O DRF with parameters:\nclassification = TRUE, ntree = 50, depth = 20, nbins = 500\n", sep = ""))
    model = h2o.random_forest(x=train[["x1", "x2"]], y=train["y"], ntrees=50, max_depth=20, nbins=500)

    # Test dataset has all 50 categories cat1 through cat50
    #Log.info("Importing 50_cattest_test.csv data...\n")
    test = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_test.csv"))

    #Log.info("Summary of 50_cattest_test.csv from H2O:\n")
    #test.summary()

    # Predict on test dataset with DRF model:
    #Log.info("Performing predictions on test dataset...\n")
    preds = model.predict(test)
    preds.head()

    # Get the confusion matrix and AUC
    #Log.info("Confusion matrix of predictions (max accuracy):\n")
    perf = model.model_performance(test)
    perf.show()
    cm = perf.confusion_matrix()
    print(cm)
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:34,代码来源:pyunit_fiftycatRF.py


示例13: iris_h2o_vs_sciKmeans

def iris_h2o_vs_sciKmeans():
  # Connect to a pre-existing cluster
    # connect to localhost:54321

  iris_h2o = h2o.import_file(path=tests.locate("smalldata/iris/iris.csv"))
  iris_sci = np.genfromtxt(tests.locate("smalldata/iris/iris.csv"), delimiter=',')
  iris_sci = iris_sci[:,0:4]

  s =[[4.9,3.0,1.4,0.2],
  [5.6,2.5,3.9,1.1],
  [6.5,3.0,5.2,2.0]]

  start = h2o.H2OFrame(s)

  h2o_km = h2o.kmeans(x=iris_h2o[0:4], k=3, user_points=start, standardize=False)

  sci_km = KMeans(n_clusters=3, init=np.asarray(s), n_init=1)
  sci_km.fit(iris_sci)

  # Log.info("Cluster centers from H2O:")
  print "Cluster centers from H2O:"
  h2o_centers = h2o_km.centers()
  print h2o_centers

  # Log.info("Cluster centers from scikit:")
  print "Cluster centers from scikit:"
  sci_centers = sci_km.cluster_centers_.tolist()
  print sci_centers

  for hcenter, scenter in zip(h2o_centers, sci_centers):
    for hpoint, spoint in zip(hcenter,scenter):
      assert (hpoint- spoint) < 1e-10, "expected centers to be the same"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:32,代码来源:pyunit_iris_h2o_vs_sciKmeans.py


示例14: plot_test

def plot_test():
    
    
    kwargs = {}
    kwargs['server'] = True

    air = h2o.import_file(tests.locate("smalldata/airlines/AirlinesTrain.csv.zip"))

    # Constructing test and train sets by sampling (20/80)
    s = air[0].runif()
    air_train = air[s <= 0.8]
    air_valid = air[s > 0.8]

    myX = ["Origin", "Dest", "Distance", "UniqueCarrier", "fMonth", "fDayofMonth", "fDayOfWeek"]
    myY = "IsDepDelayed"

    air_gbm = h2o.gbm(x=air_train[myX], y=air_train[myY], validation_x=air_valid[myX], validation_y=air_valid[myY],
                      distribution="bernoulli", ntrees=100, max_depth=3, learn_rate=0.01)

    # Plot ROC for training and validation sets
    air_gbm.plot(type="roc", train=True, **kwargs)
    air_gbm.plot(type="roc", valid=True, **kwargs)

    air_test = h2o.import_file(tests.locate("smalldata/airlines/AirlinesTest.csv.zip"))
    perf = air_gbm.model_performance(air_test)

    #Plot ROC for test set
    perf.plot(type="roc", **kwargs)
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:28,代码来源:pyunit_plot.py


示例15: frame_as_list

def frame_as_list():

    iris = h2o.import_file(path=tests.locate("smalldata/iris/iris_wheader.csv"))
    prostate = h2o.import_file(path=tests.locate("smalldata/prostate/prostate.csv.zip"))
    airlines = h2o.import_file(path=tests.locate("smalldata/airlines/allyears2k.zip"))

    res1 = h2o.as_list(iris, use_pandas=False)
    assert (
        abs(float(res1[9][0]) - 4.4) < 1e-10
        and abs(float(res1[9][1]) - 2.9) < 1e-10
        and abs(float(res1[9][2]) - 1.4) < 1e-10
    ), "incorrect values"

    res2 = h2o.as_list(prostate, use_pandas=False)
    assert (
        abs(float(res2[7][0]) - 7) < 1e-10
        and abs(float(res2[7][1]) - 0) < 1e-10
        and abs(float(res2[7][2]) - 68) < 1e-10
    ), "incorrect values"

    res3 = h2o.as_list(airlines, use_pandas=False)
    assert (
        abs(float(res3[4][0]) - 1987) < 1e-10
        and abs(float(res3[4][1]) - 10) < 1e-10
        and abs(float(res3[4][2]) - 18) < 1e-10
    ), "incorrect values"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:26,代码来源:pyunit_frame_as_list.py


示例16: additional_parameters

def additional_parameters():

    #col_types as list
    dest_frame="dev29&hex%"
    c_names = ["a", "b", "c"]
    c_types = ["enum", "enum", "string"]

    fhex = h2o.import_file(tests.locate("smalldata/jira/hexdev_29.csv"),
                           destination_frame=dest_frame,
                           col_names=c_names,
                           col_types=c_types)
    fhex.describe()

    assert fhex._id == dest_frame.replace("%",".").replace("&",".")
    assert fhex.col_names == c_names
    col_summary = h2o.frame(fhex._id)["frames"][0]["columns"]
    for i in range(len(col_summary)):
        assert col_summary[i]["type"] == c_types[i]

    #col_types as dictionary
    dest_frame="dev29&hex%"
    c_names = ["a", "b", "c"]
    c_types = {"c":"string", "a":"enum", "b": "enum"}

    fhex = h2o.import_file(tests.locate("smalldata/jira/hexdev_29.csv"),
                           destination_frame=dest_frame,
                           col_names=c_names,
                           col_types=c_types)
    fhex.describe()

    assert fhex._id == dest_frame.replace("%",".").replace("&",".")
    assert fhex.col_names == c_names
    col_summary = h2o.frame(fhex._id)["frames"][0]["columns"]
    for i in range(len(col_summary)):
      assert col_summary[i]["type"] == c_types[c_names[i]]
开发者ID:xc35,项目名称:h2o-3,代码行数:35,代码来源:pyunit_hexdev_29_additional_parameters.py


示例17: col_names_check

def col_names_check():

  iris_wheader = h2o.import_file(tests.locate("smalldata/iris/iris_wheader.csv"))
  assert iris_wheader.col_names == ["sepal_len","sepal_wid","petal_len","petal_wid","class"], \
      "Expected {0} for column names but got {1}".format(["sepal_len","sepal_wid","petal_len","petal_wid","class"],
                                                         iris_wheader.col_names)

  iris = h2o.import_file(tests.locate("smalldata/iris/iris.csv"))
  assert iris.col_names == ["C1","C2","C3","C4","C5"], "Expected {0} for column names but got " \
                                                         "{1}".format(["C1","C2","C3","C4","C5"], iris.col_names)

  df = h2o.H2OFrame(np.random.randn(100,4).tolist(), column_names=list("ABCD"), column_types=["Enum"]*4)
  df.head()
  assert df.col_names == list("ABCD"), "Expected {} for column names but got {}".format(list("ABCD"), df.col_names)
  assert df.types == {"A": "Enum", "C": "Enum", "B": "Enum", "D": "Enum"}, "Expected {} for column types " \
                              "but got {}".format({"A": "Enum", "C": "Enum", "B": "Enum", "D": "Enum"},
                                                  df.types)

  df = h2o.H2OFrame(np.random.randn(100,4).tolist())
  df.head()
  assert df.col_names == ["C1","C2","C3","C4"], "Expected {} for column names but got {}".format(["C1","C2","C3","C4"]
                                                                                                 , df.col_names)
  assert df.types == {"C3": "Numeric", "C2": "Numeric", "C1": "Numeric", "C4": "Numeric"}, "Expected {}" \
                      " for column types but got {}".format({"C3": "Numeric", "C2": "Numeric", "C1": "Numeric",
                                                             "C4": "Numeric"}, df.types)
开发者ID:MarcLafon,项目名称:h2o-3,代码行数:25,代码来源:pyunit_colnames.py


示例18: test_locate

def test_locate():

    iris_path = h2o.locate("smalldata/iris/iris.csv")

    try:
        tests.locate("smalldata/iris/afilethatdoesnotexist.csv")
        assert False, "Expected h2o.locate to raise a ValueError"
    except ValueError:
        assert True
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:9,代码来源:pyunit_locate.py


示例19: get_model_test

def get_model_test():
    
    

    prostate = h2o.import_file(path=tests.locate("smalldata/logreg/prostate.csv"))

    r = prostate[0].runif()
    train = prostate[r < 0.70]
    test = prostate[r >= 0.70]

    # Regression
    regression_gbm1 = h2o.gbm(y=train[1], x=train[2:9], distribution="gaussian")
    predictions1 = regression_gbm1.predict(test)

    regression_gbm2 = h2o.get_model(regression_gbm1._id)
    assert regression_gbm2._model_json['output']['model_category'] == "Regression"
    predictions2 = regression_gbm2.predict(test)

    for r in range(predictions1.nrow):
        p1 = predictions1[r,0]
        p2 = predictions2[r,0]
        assert p1 == p2, "expected regression predictions to be the same for row {}, but got {} and {}".format(r, p1, p2)

    # Binomial
    train[1] = train[1].asfactor()
    bernoulli_gbm1 = h2o.gbm(y=train[1], x=train[2:], distribution="bernoulli")
    predictions1 = bernoulli_gbm1.predict(test)

    bernoulli_gbm2 = h2o.get_model(bernoulli_gbm1._id)
    assert bernoulli_gbm2._model_json['output']['model_category'] == "Binomial"
    predictions2 = bernoulli_gbm2.predict(test)

    for r in range(predictions1.nrow):
        p1 = predictions1[r,0]
        p2 = predictions2[r,0]
        assert p1 == p2, "expected binomial predictions to be the same for row {}, but got {} and {}".format(r, p1, p2)

    # Clustering
    benign_h2o = h2o.import_file(path=tests.locate("smalldata/logreg/benign.csv"))
    km_h2o = h2o.kmeans(x=benign_h2o, k=3)
    benign_km = h2o.get_model(km_h2o._id)
    assert benign_km._model_json['output']['model_category'] == "Clustering"

    # Multinomial
    train[4] = train[4].asfactor()
    multinomial_dl1 = h2o.deeplearning(x=train[0:2], y=train[4], loss='CrossEntropy')
    predictions1 = multinomial_dl1.predict(test)

    multinomial_dl2 = h2o.get_model(multinomial_dl1._id)
    assert multinomial_dl2._model_json['output']['model_category'] == "Multinomial"
    predictions2 = multinomial_dl2.predict(test)

    for r in range(predictions1.nrow):
        p1 = predictions1[r,0]
        p2 = predictions2[r,0]
        assert p1 == p2, "expected multinomial predictions to be the same for row {0}, but got {1} and {2}" \
                         "".format(r, p1, p2)
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:57,代码来源:pyunit_get_model.py


示例20: bernoulliGBM

def bernoulliGBM():
  
  

  #Log.info("Importing prostate.csv data...\n")
  prostate_train = h2o.import_file(path=tests.locate("smalldata/logreg/prostate_train.csv"))

  #Log.info("Converting CAPSULE and RACE columns to factors...\n")
  prostate_train["CAPSULE"] = prostate_train["CAPSULE"].asfactor()

  #Log.info("H2O Summary of prostate frame:\n")
  #prostate.summary()

  # Import prostate_train.csv as numpy array for scikit comparison
  trainData = np.loadtxt(tests.locate("smalldata/logreg/prostate_train.csv"), delimiter=',', skiprows=1)
  trainDataResponse = trainData[:,0]
  trainDataFeatures = trainData[:,1:]

  ntrees = 100
  learning_rate = 0.1
  depth = 5
  min_rows = 10
  # Build H2O GBM classification model:
  #Log.info(paste("H2O GBM with parameters:\ndistribution = 'bernoulli', ntrees = ", ntrees, ", max_depth = 5,
  # min_rows = 10, learn_rate = 0.1\n", sep = ""))
  gbm_h2o = h2o.gbm(x=prostate_train[1:], y=prostate_train["CAPSULE"], ntrees=ntrees, learn_rate=learning_rate,
                    max_depth=depth, min_rows=min_rows, distribution="bernoulli")

  # Build scikit GBM classification model
  #Log.info("scikit GBM with same parameters\n")
  gbm_sci = ensemble.GradientBoostingClassifier(learning_rate=learning_rate, n_estimators=ntrees, max_depth=depth,
                                                min_samples_leaf=min_rows, max_features=None)
  gbm_sci.fit(trainDataFeatures,trainDataResponse)

  #Log.info("Importing prostate_test.csv data...\n")
  prostate_test = h2o.import_file(path=tests.locate("smalldata/logreg/prostate_test.csv"))

  #Log.info("Converting CAPSULE and RACE columns to factors...\n")
  prostate_test["CAPSULE"] = prostate_test["CAPSULE"].asfactor()

  # Import prostate_test.csv as numpy array for scikit comparison
  testData = np.loadtxt(tests.locate("smalldata/logreg/prostate_test.csv"), delimiter=',', skiprows=1)
  testDataResponse = testData[:,0]
  testDataFeatures = testData[:,1:]

  # Score on the test data and compare results

  # scikit
  auc_sci = roc_auc_score(testDataResponse, gbm_sci.predict_proba(testDataFeatures)[:,1])

  # h2o
  gbm_perf = gbm_h2o.model_performance(prostate_test)
  auc_h2o = gbm_perf.auc()

  #Log.info(paste("scikit AUC:", auc_sci, "\tH2O AUC:", auc_h2o))
  assert auc_h2o >= auc_sci, "h2o (auc) performance degradation, with respect to scikit"
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:56,代码来源:pyunit_bernoulliGBM.py



注:本文中的tests.locate函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tests.logging_disabled函数代码示例发布时间:2022-05-27
下一篇:
Python tests.json_response函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap