本文整理汇总了Python中nltk.corpus.treebank.parsed_sents函数的典型用法代码示例。如果您正苦于以下问题:Python parsed_sents函数的具体用法?Python parsed_sents怎么用?Python parsed_sents使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parsed_sents函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: pcfg_demo
def pcfg_demo():
"""
A demonstration showing how C{WeightedGrammar}s can be created and used.
"""
from nltk.corpus import treebank
from nltk import treetransforms
from nltk import induce_pcfg
from nltk.parse import pchart
pcfg_prods = toy_pcfg1.productions()
pcfg_prod = pcfg_prods[2]
print('A PCFG production:', repr(pcfg_prod))
print(' pcfg_prod.lhs() =>', repr(pcfg_prod.lhs()))
print(' pcfg_prod.rhs() =>', repr(pcfg_prod.rhs()))
print(' pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
print()
grammar = toy_pcfg2
print('A PCFG grammar:', repr(grammar))
print(' grammar.start() =>', repr(grammar.start()))
print(' grammar.productions() =>', end=' ')
# Use string.replace(...) is to line-wrap the output.
print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 26))
print()
print('Coverage of input words by a grammar:')
print(grammar.covers(['a', 'boy']))
print(grammar.covers(['a', 'girl']))
# extract productions from three trees and induce the PCFG
print("Induce PCFG grammar from treebank data:")
productions = []
for item in treebank.items[:2]:
for tree in treebank.parsed_sents(item):
# perform optional tree transformations, e.g.:
tree.collapse_unary(collapsePOS=False)
tree.chomsky_normal_form(horzMarkov=2)
productions += tree.productions()
S = Nonterminal('S')
grammar = induce_pcfg(S, productions)
print(grammar)
print()
print("Parse sentence using induced grammar:")
parser = pchart.InsideChartParser(grammar)
parser.trace(3)
# doesn't work as tokens are different:
#sent = treebank.tokenized('wsj_0001.mrg')[0]
sent = treebank.parsed_sents('wsj_0001.mrg')[0].leaves()
print(sent)
for parse in parser.nbest_parse(sent):
print(parse)
开发者ID:ggosline,项目名称:taxonparser,代码行数:60,代码来源:grammar.py
示例2: grammar_development_with_treebank
def grammar_development_with_treebank():
from nltk.corpus import treebank
t = treebank.parsed_sents("wsj_0001.mrg")[0]
print t
print "identify verbs for SV in VP -> SV S", [
subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(_grammar_filter)
]
开发者ID:prashiyn,项目名称:nltk-examples,代码行数:8,代码来源:ch08.py
示例3: learn_treebank
def learn_treebank(files=None, markov_order=None):
"""
Learn a PCFG from the Penn Treebank, and return it.
By default, this learns from NLTK's 10% sample of the Penn Treebank.
You can give the filename of a Treebank file; 'wsj-02-21.mrg' will
learn from the entire training section of Treebank.
"""
if files is None: bank = treebank.parsed_sents()
else: bank = treebank.parsed_sents(files)
return learn_trees(bank, collapse=True, markov_order=markov_order)
开发者ID:salmanahmad,项目名称:6.863,代码行数:11,代码来源:learn_pcfg.py
示例4: grammarDevelopmen
def grammarDevelopmen():
print "page 315 8.6 Grammar Developmen"
print "=============== Treebanks and Grammars ==============="
from nltk.corpus import treebank
t = treebank.parsed_sents('wsj_0001.mrg')[0]
print t
def filter(tree):
child_nodes = [child.node for child in tree if isinstance(child, nltk.Tree)]
return (tree.node == 'VP') and ('S' in child_nodes)
print [subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(filter)]
开发者ID:hbdhj,项目名称:python,代码行数:12,代码来源:chapter8.py
示例5: test
def test():
"""Do some tree drawing tests."""
def print_tree(n, tree, sentence=None, ansi=True, **xargs):
print()
print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
print(tree)
print()
drawtree = TreePrettyPrinter(tree, sentence)
try:
print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
except (UnicodeDecodeError, UnicodeEncodeError):
print(drawtree.text(unicodelines=False, ansi=False, **xargs))
from nltk.corpus import treebank
for n in [0, 1440, 1591, 2771, 2170]:
tree = treebank.parsed_sents()[n]
print_tree(n, tree, nodedist=2, maxwidth=8)
print()
print('ASCII version:')
print(TreePrettyPrinter(tree).text(nodedist=2))
tree = Tree.fromstring(
'(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
'(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
'(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
' zwemmen of terrassen .'.split())
print_tree('Discontinuous tree', tree, sentence, nodedist=2)
开发者ID:CaptainAL,项目名称:Spyder,代码行数:28,代码来源:treeprettyprinter.py
示例6: main
def main(transform_func = None, n = 10):
parser=StanfordParser(
path_to_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser.jar",
path_to_models_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models.jar",
model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
)
test_sents = treebank.sents()[-n:]
print "len(test_sents) = %d" %(len(test_sents))
if transform_func and callable(transform_func):
print "transforming it using ", transform_func
test_sents = [[transform_func(w) for w in s]
for s in test_sents] # transform it
print test_sents[:10]
print "predicting"
pred_parses = parser.parse_sents(test_sents)
gold_parses = treebank.parsed_sents()
print "evaluating"
correct_n = gold_n = predicted_n = 0.0
for gparse, pparse in zip(gold_parses, pred_parses):
cn, gn, pn = precision_and_recall_stat(get_nodes_with_range(gparse),
get_nodes_with_range(pparse))
correct_n += cn
gold_n += gn
predicted_n += pn
print "Prediction: %f, Recall: %f" %(correct_n / predicted_n, correct_n / gold_n)
开发者ID:xiaohan2012,项目名称:capitalization-restoration-train,代码行数:35,代码来源:parse.py
示例7: sentences
def sentences():
for f in treebank.fileids():
for t in treebank.parsed_sents(f):
t.chomsky_normal_form(horzMarkov=1)
t.collapse_unary(collapsePOS=True)
yield (t, t.leaves())
开发者ID:brucespang,项目名称:pcyk,代码行数:7,代码来源:pcyk.py
示例8: convert_wsj
def convert_wsj(file_obj):
from nltk.corpus import treebank
sys.stderr.write("Converting Penn Treebank sampler...\n")
tb = TreebankConverter()
for sentence in treebank.parsed_sents():
tb.add_sentence(sentence)
tb.write(file_obj)
开发者ID:Sandy4321,项目名称:nltk_contrib,代码行数:8,代码来源:demo.py
示例9: main
def main():
answers = open('coref_key.txt', 'r')
this_correct = 0
correct = 0
total = 0
prev_sentences = deque()
for file in FILENAMES:
this_correct = 0
this_total = 0
prev_sentences.clear()
for tree in treebank.parsed_sents(file):
tree = ParentedTree.convert(tree)
for pronoun, np_node in find_pronouns(tree):
# i = 0
# for t in list(prev_sentences)[-3:]:
# t.pretty_print()
# print("-"*25)
# i = i + 1
# if i == 3: break
proposed = hobbs_to_string(hobbs(np_node, pronoun.lower(), prev_sentences))
tree.pretty_print()
actual = answers.readline()
if proposed == actual[:-1]:
update_pronoun_results(pronoun, 1)
correct += 1
this_correct += 1
update_pronoun_results(pronoun, 0)
total += 1
this_total += 1
print "Pronoun: '" + pronoun + "' Proposed: '" + proposed + "' Actual: '" + actual + "'"
if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"
print("*"*100)
print("*"*100)
prev_sentences.append(tree)
print("-"*50)
if this_correct: print file,":\tCorrect:", this_correct, "\tTotal:", this_total, "\tPercentage:", this_correct/float(this_total), "\n"
if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"
print("-"*50)
print "Male correct:", PRONOUN_RESULTS['male'], "\tMale total:", PRONOUN_RESULTS['male_total'], "\tPercent correct:", PRONOUN_RESULTS['male_pct']
print "Female correct:", PRONOUN_RESULTS['female'], "\tFemale total:", PRONOUN_RESULTS['female_total'], "\tPercent correct:", PRONOUN_RESULTS['female_pct']
print "Neutral correct:", PRONOUN_RESULTS['neutral'], "\tNeutral total:", PRONOUN_RESULTS['neutral_total'], "\tPercent correct:", PRONOUN_RESULTS['neutral_pct']
print "Plural correct:", PRONOUN_RESULTS['they'], "\tPlural total:", PRONOUN_RESULTS['they_total'], "\tPercent correct:", PRONOUN_RESULTS['they_pct']
print "Reflexive correct:", PRONOUN_RESULTS['reflexive'], "\tReflexive total:", PRONOUN_RESULTS['reflexive_total'], "\tPercent correct:", PRONOUN_RESULTS['reflexive_pct']
print "Total correct:", correct, "\tTotal:", total, "\tPercent correct:", correct/float(total)
开发者ID:treyfeldman,项目名称:Hobb-s-Algorithm,代码行数:56,代码来源:HobbsImplementation.py
示例10: getTrees
def getTrees(source,size):
'''Load the trees from source, return first SIZE trees'''
if source=='treebank':
from nltk.corpus import treebank
trees = treebank.parsed_sents()
#inds = random.permutation(range(0,len(trees)))[0:size]
trees = trees[:size]
return trees
else:
return list()
开发者ID:Jsalim,项目名称:NLP-Stuff,代码行数:10,代码来源:PCFG_util.py
示例11: TreebankNoTraces
def TreebankNoTraces():
tb = []
for t in treebank.parsed_sents():
if t.label() != "S": continue
RemoveFunctionTags(t)
RemoveTraces(t)
t.collapse_unary(collapsePOS = True, collapseRoot = True)
t.chomsky_normal_form()
tb.append(t)
return tb
开发者ID:weitongruan,项目名称:Comp150NLP,代码行数:10,代码来源:pset4.py
示例12: learn_treebank
def learn_treebank(trees=None):
"""
Learn a PCFG from the Penn Treebank, and return it.
By default, this learns from NLTK's 10% sample of the Penn Treebank.
You can also pass a set of trees.
"""
if trees is None: bank = treebank.parsed_sents()
else: bank = trees
return learn_trees(bank, collapse=True)
开发者ID:JakeBrawer,项目名称:org,代码行数:10,代码来源:learn_pcfg.py
示例13: write_example_tree
def write_example_tree(features, f):
filename = features['_filename']
sen = features['_sentence_id']
phr = features['_phrase_id']
tree = treebank.parsed_sents(filename)[sen]
phrase = tree[tree.treepositions('preorder')[phr]]
l = treebank_helper.get_label(phrase)
treebank_helper.set_label(phrase, '***' + l + '***')
f.write(str(tree))
f.write('\n')
treebank_helper.set_label(phrase, l)
开发者ID:EddieNejadi,项目名称:Machine_Learning,代码行数:11,代码来源:funtag.py
示例14: treebank_accessor
def treebank_accessor():
'''
Function that reads the Penn treebank and returns all the trees
for each sentence in the corpus.
'''
trees = []
for i in range(1, TREEBANK_FILES + 1):
file_number = "%03d" % (i,)
t = treebank.parsed_sents('wsj_0' + file_number + '.mrg')
for sentence in range(len(t)):
# For each sentence in the file, convert to a tree and add it to trees[]
trees.append(t[sentence])
return trees
开发者ID:barbaragabriela,项目名称:inlp-probabilistic-parsing,代码行数:16,代码来源:helper.py
示例15: get_treebank_rules
def get_treebank_rules(cutoff=0, include_counts=False):
all_rules = cache_utils.cache_get('treebank_rules', 'rules')
if not all_rules:
log('Generating lexical rules from Penn Treebank', 4)
from nltk.corpus import treebank
all_rules = dict()
for tree in treebank.parsed_sents():
for rule, count in lexical_rules(tree).items():
all_rules[rule] = all_rules.get(rule, 0) + count
cache_utils.cache_set('treebank_rules', 'rules', all_rules)
if include_counts:
return {k: v for (k, v) in all_rules.items() if v > cutoff}
else:
rules_set = set([rule for rule, count in all_rules.items() if count > cutoff])
return rules_set
开发者ID:snyderp,项目名称:cs412-scorer,代码行数:17,代码来源:syntactic_formation.py
示例16: read_treebank_files
def read_treebank_files(files, extractor,fe):
"""Read the listed treebank files and collect function tagging examples
from each tree.
The user-provided feature extractor is applied to each phrase in each
tree. The extracted feature dicts and the true function tags for each
phrase are stored in two separate lists, which are returned.
"""
X = []
Y = []
for filename in files:
scount = 0
for tree in treebank.parsed_sents(filename):
tree = ParentedTree.convert(tree)
treebank_helper.postprocess(tree)
find_examples_in_tree(tree, X, Y, extractor,fe, filename, scount, 0)
scount += 1
return X, Y
开发者ID:EddieNejadi,项目名称:Machine_Learning,代码行数:18,代码来源:funtag.py
示例17: create_forests
def create_forests(self, filename=None, treelist=None, clear=False):
""" This will read sentences to parse. One sentence per line, no periods etc.
:param filename: not used
:param clear: start with empty
"""
filename = filename or Document.get_default_treeset_file()
forests = []
input_trees = []
shared_lexicon = load_lexicon(Document.get_default_lexicon_file())
print('loaded shared_lexicon: ', shared_lexicon)
if treelist:
input_trees = treelist
elif has_nltk:
print(f"reading trees {NLTK_TREE_RANGE[0]}-{NLTK_TREE_RANGE[1]} from NLTK's treebank")
for i in range(*NLTK_TREE_RANGE): # 199
trees = treebank.parsed_sents(f'wsj_0{str(i).rjust(3, "0")}.mrg')
for j, tree in enumerate(trees):
tree.chomsky_normal_form()
tree.collapse_unary()
input_trees.append(as_list(tree))
else:
readfile = open(filename, 'r')
for line in readfile:
line = line.strip()
if line:
if line.startswith('[') and line.endswith(']'):
input_trees.append(ast.literal_eval(line))
else:
input_trees.append(line)
for input_tree in input_trees:
syn = classes.SyntaxAPI()
syn.lexicon = shared_lexicon
if isinstance(input_tree, list):
syn.input_tree = input_tree
else:
syn.input_text = input_tree
forest = Forest(heading_text=str(input_tree), syntax=syn)
forests.append(forest)
return forests
开发者ID:jpurma,项目名称:Kataja,代码行数:43,代码来源:Document.py
示例18: get_trees
def get_trees(fileids=None, verbose=False):
"""
Get the CNF trees for the treebank fileids given, or for the entire treebank
"""
if not fileids:
# Get the Penn Treebank corpus
fileids = treebank.fileids()
# Get the sentence-trees in each file
tree_lists = [treebank.parsed_sents(file_id) for file_id in fileids]
trees = [sent for sent_list in tree_lists for sent in sent_list]
if verbose:
print("obtained", len(trees), "trees from the corpus.")
cnf_trees = [ctc.convert_tree(t) for t in trees]
if verbose:
print("converted", len(trees), "trees to cnf.")
return cnf_trees
开发者ID:hmc-cs159-spring2016,项目名称:banana,代码行数:19,代码来源:main.py
示例19: train_pcfg
def train_pcfg():
print 'training grammar'
productions = []
# print len(treebank.fileids())
trees = []
# up to 199 less for shorter grammar for quicker training
for fileid in treebank.fileids()[0:20]:
for tree in treebank.parsed_sents(fileid):
# perform optional tree transformations, e.g.:
# Remove branches A->B->C into A->B+C so we can avoid infinite
# productions
tree.collapse_unary(collapsePOS=False)
# Remove A->(B,C,D) into A->B,C+D->D (binarization req'd by CKY parser)
# horizontal and vertical Markovization: remember parents and siblings in tree
# This gives a performance boost, but makes the grammar HUGE
# If we use these we would need to implement a tag forgetting method
#tree.chomsky_normal_form(horzMarkov = 0, vertMarkov=0)
tree.chomsky_normal_form()
productions += tree.productions()
S = nltk.Nonterminal('S')
grammar = nltk.induce_pcfg(S, productions)
print "grammar trained!"
return grammar
开发者ID:owenst,项目名称:geotweets,代码行数:23,代码来源:write.py
示例20: getAvgNodeLength
import nltk
from nltk.corpus import treebank
from nltk.probability import *
from nltk.grammar import *
### RETRIEVE ALL TREES AND THEN SELECT THE FIRST 100.
all_trees = treebank.parsed_sents()
trees_100 = all_trees[0:100]
### FUNCTION EXTRACTING LEAVES OF NODES WITH LABEL AS A PARAMETER OF getAvgNodeLength().
def getAvgNodeLength(label):
l_leaves = list()
for tree in trees_100:
for node in tree:
if node.label() == label:
l_leaves.append(node.leaves())
### CREATED OWN LIST OF PUNCTUATION TO EXCLUDE SINCE USING string.punctuation WOULD
### HAVE DELETED WORDS SUCH AS "Dr.", "World-Wide", "U.S.", etc. WHICH ARE OF INTEREST.
punct = [u"*", u",", u"&", u"'"]
for wordlist in l_sbj:
for word in wordlist:
for i in punct:
if i in word:
wordlist.remove(word)
### CREATE LIST OF LENGTHS (IN WORDS) OF NODES.
l_len = list()
for wordlist in l_leaves:
开发者ID:albertomh,项目名称:py-nltk-parsing,代码行数:31,代码来源:avg+viterbi.py
注:本文中的nltk.corpus.treebank.parsed_sents函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论