本文整理汇总了Python中window_based_tagger_config.get_config函数的典型用法代码示例。如果您正苦于以下问题:Python get_config函数的具体用法?Python get_config怎么用?Python get_config使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_config函数的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, models_folder, essays_folder, spell_check_dict):
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
if not models_folder.endswith("/"):
models_folder += "/"
if not essays_folder.endswith("/"):
essays_folder += "/"
self.logger = logging.getLogger()
cfg = get_config(essays_folder)
self.config = cfg
self.essays_folder = essays_folder
# Create spell checker
# Need annotations here purely to load the tags
tagged_essays = load_bratt_essays(essays_folder, include_vague=cfg["include_vague"], include_normal=cfg["include_normal"], load_annotations=True)
self.__set_tags_(tagged_essays)
self.wd_sent_freq = defaultdict(int)
self.spelling_corrector = build_spelling_corrector(tagged_essays, self.config["lower_case"], self.wd_sent_freq, folder=spell_check_dict)
# has to be an int as used in slices. In python 3.x this will automatically be a float
offset = int((self.config["window_size"] - 1) / 2)
unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)
extractors = [unigram_window_stemmed, biigram_window_stemmed]
# most params below exist ONLY for the purposes of the hashing to and from disk
self.feature_extractor = FeatureExtractorTransformer(extractors)
# load models
self.logger.info("Loading pickled models")
store = ModelStore(models_folder=models_folder)
self.feature_transformer = store.get_transformer()
self.logger.info("Loaded Transformer")
self.tag_2_wd_classifier = store.get_tag_2_wd_classifier()
self.logger.info("Loaded word tagging model")
self.tag_2_sent_classifier = store.get_tag_2_sent_classifier()
self.logger.info("Loaded sentence classifier")
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:41,代码来源:annotator.py
示例2: get_config
CV_FOLDS = 5
MIN_TAG_FREQ = 5
LOOK_BACK = 0 # how many sentences to look back when predicting tags
# end not hashed
# construct unique key using settings for pickling
settings = Settings.Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
folder = root_folder + "Training/"
processed_essay_filename_prefix = root_folder + "Pickled/essays_proc_pickled_"
features_filename_prefix = root_folder + "Pickled/feats_pickled_"
config = get_config(folder)
""" Load Essays """
mem_process_essays = memoize_to_disk(filename_prefix=processed_essay_filename_prefix)(load_process_essays)
tagged_essays = mem_process_essays( **config )
logger.info("Essays loaded")
""" End load Essays """
def evaluate_window_size(config, window_size, features_filename_prefix):
config["window_size"] = window_size
""" FEATURE EXTRACTION """
offset = (config["window_size"] - 1) / 2
unigram_window = fact_extract_positional_word_features(offset)
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:feature_selection.py
示例3: open
root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/"
coref_root = root_folder + "CoReference/"
coref_output_folder = coref_root + "CRel/"
train_fname = coref_output_folder + "training_crel_anatagged_essays_most_recent_code.dill"
with open(train_fname, "rb") as f:
pred_tagged_essays_train = dill.load(f)
test_fname = coref_output_folder + "test_crel_anatagged_essays_most_recent_code.dill"
with open(test_fname, "rb") as f:
pred_tagged_essays_test = dill.load(f)
# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
config = get_config(coref_output_folder)
results_processor = ResultsProcessor(dbname="metrics_causal_model")
########################################################
logger.info("Started at: " + str(datetime.datetime.now()))
logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays_train)) # should be 902
cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test)
cv_folds = [(pred_tagged_essays_train, pred_tagged_essays_test)] # type: List[Tuple[Any,Any]]
def evaluate_model(
collection_prefix: str,
folds: List[Tuple[Any, Any]],
extractor_fn_names_lst: List[str],
cost_function_name: str,
beta: float,
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:train_causal_relation_extractor_hyperparam_tuning_logreg_test.py
示例4: get_config
from featureextractortransformer import FeatureExtractorTransformer
from load_data import load_process_essays
from featureextractionfunctions import *
from window_based_tagger_config import get_config
import cPickle as pickle
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()
# not hashed as don't affect persistence of feature processing
config = get_config(data)
""" FEATURE EXTRACTION """
offset = (config["window_size"] - 1) / 2
unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)
extractors = [unigram_window_stemmed, biigram_window_stemmed]
feat_config = dict(config.items() + [("extractors", extractors)])
""" LOAD DATA """
tagged_essays = load_process_essays( **config )
logger.info("Essays loaded")
# most params below exist ONLY for the purposes of the hashing to and from disk
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:29,代码来源:CreateTrainingData.py
示例5: Settings
# Data Set Partition
CV_FOLDS = 5
MIN_FEAT_FREQ = 5
# Global settings
settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"
training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl"
# NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook
# used as inputs to parsing model
rnn_predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/"
config = get_config(training_folder)
results_processor = ResultsProcessor(dbname="metrics_causal")
# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
test_config = get_config(test_folder)
tagged_essays_test = load_process_essays(**test_config)
########################################################
fname = rnn_predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(fname, "rb") as f:
pred_tagged_essays = dill.load(f)
logger.info("Started at: " + str(datetime.datetime.now()))
logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays)) # should be 902
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:30,代码来源:train_causal_relation_extractor_feature_selection_logreg.py
示例6: get_config
MIN_FEAT_FREQ = 5 # 5 best so far
CV_FOLDS = 5
MIN_TAG_FREQ = 5
LOOK_BACK = 0 # how many sentences to look back when predicting tags
# end not hashed
# construct unique key using settings for pickling
settings = Settings.Settings()
root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/"
training_folder = root_folder + "Training/"
test_folder = root_folder + "Test/"
train_config = get_config(training_folder)
""" FEATURE EXTRACTION """
train_config["window_size"] = 9
offset = (train_config["window_size"] - 1) / 2
unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)
triigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 3)
unigram_bow_window = fact_extract_bow_ngram_features(offset, 1)
#optimal SC feature set
extractors = [unigram_bow_window,
unigram_window_stemmed,
biigram_window_stemmed,
#trigram_window_stemmed,
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:windowbasedtagger_most_common_tag_multiclass_test.py
示例7: ModelStore
MIN_TAG_FREQ = 5
LOOK_BACK = 0 # how many sentences to look back when predicting tags
# end not hashed
# construct unique key using settings for pickling
settings = Settings.Settings()
model_store = ModelStore()
""" PETER - CHANGE THESE FILE PATHS """
folder = settings.data_directory + "CoralBleaching/BrattData/EBA1415_Merged/" # Location where the training data is, use EBA_Pre and Post test essays preferably
test_folder= settings.data_directory + "CoralBleaching/BrattData/Merged/" # Location where the new essays to tag are located
out_predictions_file = settings.data_directory + "CoralBleaching/Results/predictions.txt" # File to dump the predictions to
config = get_config(folder)
""" FEATURE EXTRACTION """
offset = (config["window_size"] - 1) / 2
unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)
#pos_tag_window = fact_extract_positional_POS_features(offset)
#pos_tag_plus_wd_window = fact_extract_positional_POS_features_plus_word(offset)
#head_wd_window = fact_extract_positional_head_word_features(offset)
#pos_dep_vecs = fact_extract_positional_dependency_vectors(offset)
extractors = [unigram_window_stemmed, biigram_window_stemmed]
feat_config = dict(config.items() + [("extractors", extractors)])
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:29,代码来源:windowbasedclassifier_train_THEN_predict.py
示例8: Settings
SPARSE_WD_FEATS = True
MIN_FEAT_FREQ = 5 # 5 best so far
CV_FOLDS = 5
MIN_TAG_FREQ = 5
LOOK_BACK = 0 # how many sentences to look back when predicting tags
# end not hashed
settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
partition = "Training" # Training | Test
target_folder = root_folder + partition + "/"
processed_essay_filename_prefix = root_folder + "Pickled/essays_proc_pickled_"
config = get_config(target_folder)
# LOAD ESSAYS
mem_process_essays = memoize_to_disk(filename_prefix=processed_essay_filename_prefix)(load_process_essays)
tagged_essays = mem_process_essays(**config)
# map parsed essays to essay name
print("{0} essays loaded".format(len(tagged_essays)))
# LOAD COREF RESULTS
coref_root = root_folder + "CoReference/"
coref_folder = coref_root + partition
coref_files = find_files(coref_folder, ".*\.tagged")
print("{0} co-ref tagged files loaded".format(len(coref_files)))
assert len(coref_files) == len(tagged_essays)
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:30,代码来源:CVTrainWordWindowTaggingModel.py
示例9: get_config
settings = Settings.Settings()
root = settings.data_directory + "/GlobalWarming/BrattFiles/merged/"
""" INPUT - two serialized files, one for the pre-processed essays, the other for the features """
""" OUTPUT """
processed_essay_filename_prefix = root + "Pickled/essays_proc_pickled_"
features_filename_prefix = root + "Pickled/feats_pickled_"
out_predictions_file = root + "Experiment/Output/predictions.txt"
out_predicted_margins_file = root + "Experiment/Output/predicted_confidence.txt"
out_metrics_file = root + "Experiment/Output/metrics.txt"
out_categories_file = root + "Experiment/Output/categories.txt"
config = get_config(root)
""" FEATURE EXTRACTION """
offset = (config["window_size"] - 1) / 2
unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)
extractors = [unigram_window_stemmed, biigram_window_stemmed]
feat_config = dict(config.items() + [("extractors", extractors)])
""" LOAD DATA """
mem_process_essays = memoize_to_disk(filename_prefix=processed_essay_filename_prefix)(load_process_essays)
tagged_essays = mem_process_essays( **config )
# replace periods in tags so that we can store results in mongo
replace_periods(tagged_essays)
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:windowBased_classifier_cause_effect_lbls.py
注:本文中的window_based_tagger_config.get_config函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论