本文整理汇总了Java中dragon.nlp.tool.lemmatiser.EngLemmatiser类的典型用法代码示例。如果您正苦于以下问题:Java EngLemmatiser类的具体用法?Java EngLemmatiser怎么用?Java EngLemmatiser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
EngLemmatiser类属于dragon.nlp.tool.lemmatiser包,在下文中一共展示了EngLemmatiser类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: inform
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
if (lemmatiserResourceDir != null ) {
try {
String path=((SolrResourceLoader) loader).getConfigDir();
if(!path.endsWith(File.separator))
path=path+File.separator;
lemmatiser = new EngLemmatiser(path+lemmatiserResourceDir,
false, false);
} catch (Exception e) {
StringBuilder sb = new StringBuilder("Initiating ");
sb.append(this.getClass().getName()).append(" failed due to:\n");
sb.append(ExceptionUtils.getFullStackTrace(e));
throw new IllegalArgumentException(sb.toString());
}
}
}
开发者ID:ziqizhang,项目名称:jate,代码行数:18,代码来源:EnglishLemmatisationFilterFactory.java
示例2: Lemmatiser
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public Lemmatiser(EngLemmatiser lemmatiser) {
this.lemmatiser=lemmatiser;
tagLookUp = new HashMap<>();
tagLookUp.put("NN", 1);
tagLookUp.put("NNS", 1);
tagLookUp.put("NNP", 1);
tagLookUp.put("NNPS", 1);
tagLookUp.put("VB", 2);
tagLookUp.put("VBG", 2);
tagLookUp.put("VBD", 2);
tagLookUp.put("VBN", 2);
tagLookUp.put("VBP", 2);
tagLookUp.put("VBZ", 2);
tagLookUp.put("JJ", 3);
tagLookUp.put("JJR", 3);
tagLookUp.put("JJS", 3);
tagLookUp.put("RB", 4);
tagLookUp.put("RBR", 4);
tagLookUp.put("RBS", 4);
}
开发者ID:ziqizhang,项目名称:jate,代码行数:22,代码来源:Lemmatiser.java
示例3: main
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static void main(String[] args) throws IOException, ParseException, JATEException {
/*mergeGS("/home/zqz/Work/data/semeval2017-scienceie/scienceie2017_test/scienceie2017_test_gs",
"/home/zqz/Work/data/semeval2017-scienceie/scienceie2017_test/all_key_phrases.txt");*/
Lemmatiser lem = new Lemmatiser(new EngLemmatiser(args[0],
false, false));
String solrHomePath = args[4];
String solrCoreName = args[5];
final EmbeddedSolrServer solrServer = new EmbeddedSolrServer(Paths.get(solrHomePath), solrCoreName);
JATEProperties jateProp = App.getJateProperties(args[6]);
for (File f : new File(args[1]).listFiles()) {
File outFolder = new File(args[2] + "/" + f.getName() + "/");
outFolder.mkdirs();
System.out.println(outFolder);
transformToKEAOutput(f.toString(), 0, args[3], outFolder.toString(), lem,
solrServer.getCoreContainer().getCore(solrCoreName), jateProp);
}
solrServer.close();
System.exit(0);
}
开发者ID:ziqizhang,项目名称:jate,代码行数:21,代码来源:ScienceIECorpusParser.java
示例4: test
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
@Test
public void test() throws Exception {
File lemmatiserDataDirectory = new File(BANNER_ROOT
+ "src/main/resources/pear_resources/nlpdata/lemmatiser");
assertTrue(lemmatiserDataDirectory.exists());
EngLemmatiser lemmatiser = new EngLemmatiser(
lemmatiserDataDirectory.getAbsolutePath(), false, true);
String[] texts = { "gone", "went", "" };
for (String text : texts) {
String lemma = lemmatiser.lemmatize(text);
System.out.println(text + "\t--> " + lemma);
}
}
开发者ID:BlueBrain,项目名称:bluima,代码行数:19,代码来源:LemmatizerTest.java
示例5: initialize
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException {
super.initialize(context);
try {
File lemmatiserDataDirectory = getFile(lemmatiserData);
checkFileExists(lemmatiserDataDirectory);
lemmatiser = new EngLemmatiser(
lemmatiserDataDirectory.getAbsolutePath(), false, true);
} catch (FileNotFoundException e) {
throw new ResourceInitializationException(RESOURCE_DATA_NOT_VALID,
new Object[] { lemmatiserData, "lemmatiserDataDirectory" },
e);
}
}
开发者ID:BlueBrain,项目名称:bluima,代码行数:16,代码来源:DragonLemmatiserAnnotator.java
示例6: init
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
private void init() {
// Initializes the temporary directory if required.
final JATEProperties instance = JATEProperties.getInstance();
String path = instance.getNLPPath();
if ( path.startsWith( "jar:" ) ) {
String dir = instance.getWorkPath() + File.separator + "lemmatizer";
path = dir;
File directory = new File(dir);
if ( !directory.exists() ) {
if ( !directory.mkdirs() ) {
throw new UnsupportedOperationException("Could not initialize " + directory);
}
for ( String file : new String[]{ "adj.exc", "adj.index", "adv.exc", "adv.index", "noun.exc", "stopwordexc.list", "umlserror.list", "verb.exc", "verb.index" } ) {
try {
InputStream input = null;
OutputStream output = null;
try {
input = instance.getNLPInputStream( "lemmatizer" + File.separator + file );
if ( input == null ) {
throw new IOException( "Unable to read: " + file );
}
output = new FileOutputStream( dir + File.separator + file );
byte [] bytes = new byte[ 4096 ];
while ( true ) {
int len = input.read( bytes );
if ( len == -1 ) {
break;
}
output.write( bytes, 0, len );
}
} finally {
try {
if ( input != null ) {
input.close();
}
} finally {
if ( output != null ) {
output.close();
}
}
}
} catch ( IOException ioe ) {
throw new UnsupportedOperationException( "Unable to copy data", ioe );
}
}
}
}
lemmatizer = new EngLemmatiser( path, false, true );
tagLookUp.put("NN", 1);
tagLookUp.put("NNS", 1);
tagLookUp.put("NNP", 1);
tagLookUp.put("NNPS", 1);
tagLookUp.put("VB", 2);
tagLookUp.put("VBG", 2);
tagLookUp.put("VBD", 2);
tagLookUp.put("VBN", 2);
tagLookUp.put("VBP", 2);
tagLookUp.put("VBZ", 2);
tagLookUp.put("JJ", 3);
tagLookUp.put("JJR", 3);
tagLookUp.put("JJS", 3);
tagLookUp.put("RB", 4);
tagLookUp.put("RBR", 4);
tagLookUp.put("RBS", 4);
}
开发者ID:etheriau,项目名称:jatetoolkit,代码行数:67,代码来源:Lemmatizer.java
示例7: load
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static BannerProperties load(Properties properties) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
BannerProperties bannerProperties = new BannerProperties();
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
bannerProperties.tokenizer = (Tokenizer) Class.forName(tokenizer).newInstance();
// Note assumption that the tokenizer constructor takes no
// parameters
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
String dictionaryFileName = properties.getProperty("dictionary");
if (dictionaryFileName != null)
{
// FIXME This is a temporary hack
DictionaryTagger dictTagger = new GeneDictionaryTagger(bannerProperties.tokenizer, true);
FileReader reader = new FileReader(dictionaryFileName);
dictTagger.add(reader, MentionType.getType("GENE"));
reader.close();
System.out.println("Dict size - " + dictTagger.size());
bannerProperties.preTagger = dictTagger;
}
bannerProperties.regexFilename = properties.getProperty("regexFilename");
return bannerProperties;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:43,代码来源:BannerProperties.java
示例8: getLemmatiser
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
/**
* @return The lemmatiser ({@link EngLemmatiser}) to use for training and
* tagging
*/
public EngLemmatiser getLemmatiser()
{
return lemmatiser;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:9,代码来源:BannerProperties.java
示例9: EnglishLemmatisationFilter
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public EnglishLemmatisationFilter(EngLemmatiser dragontoolLemmatiser, TokenStream input) {
super(input);
lemmatiser = new Lemmatiser(dragontoolLemmatiser);
}
开发者ID:ziqizhang,项目名称:jate,代码行数:5,代码来源:EnglishLemmatisationFilter.java
示例10: main
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static void main(String[] args) throws IOException, JATEException, ParseException {
if (args == null || args.length < 4) {
StringBuilder sb = new StringBuilder("Usage:\n");
sb.append("java -cp 'jate.jar' ").append(Scorer.class.getName()).append(" ")
.append("[CORPUS_NAME] [ATE_OUTPUT_DIR] [ATE_OUTPUT_FILE_TYPE] ").append("\n\n");
sb.append("Example: java -cp 'jate.jar' /c/jate/outputDir/ csv genia_eval.csv \n\n");
sb.append("[OPTIONS]:\n")
.append("\t\targs[0]:\t\t 'genia', 'aclrdtec1' or any other dataset name.\n")
.append("\t\targs[1]:\t\t ATE algorithms output folder that contains one or more ranked term candidates output.\n")
.append("\t\targs[2]:\t\t ATE algorithms output file type. Two options are 'csv' and 'json'. If file type is 'csv', it should contain a header row. \n")
.append("\t\targs[3]:\t\t A file name & path to save evaluation output (should not be the same folder of ATE algorithm output.\n");
System.out.println(sb);
System.exit(-1);
}
String workingDir = System.getProperty("user.dir");
Lemmatiser lemmatiser = new Lemmatiser(new EngLemmatiser(
Paths.get(workingDir, "src", "test", "resource", "lemmatiser").toString(), false, false
));
Path GENIA_CORPUS_CONCEPT_FILE = Paths.get(workingDir, "src", "test", "resource",
"eval", "GENIA", "concept.txt");
Path ACL_1_CORPUS_CONCEPT_FILE = Paths.get(workingDir, "src", "test", "resource",
"eval", "ACL_RD-TEC", "terms.txt");
String datasetName = args[0];
String ateOutputFolder = args[1];
String ateOutputType = args[2];
String outFile = args[3];
String gsFile = args[4];
if (datasetName.equals("genia")) {
/* gsFile = GENIA_CORPUS_CONCEPT_FILE.toString()*/
createReportGenia(lemmatiser, ateOutputFolder, ateOutputType,
gsFile, outFile,
EVAL_CONDITION_IGNORE_SYMBOL, EVAL_CONDITION_IGNORE_DIGITS, EVAL_CONDITION_CASE_INSENSITIVE,
EVAL_CONDITION_CHAR_RANGE_MIN, EVAL_CONDITION_CHAR_RANGE_MAX,
EVAL_CONDITION_TOKEN_RANGE_MIN, EVAL_CONDITION_TOKEN_RANGE_MAX,
EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);
} else {
//gsFile = ACL_1_CORPUS_CONCEPT_FILE.toString();
//DOES NOT REALLY NEED ANY PRUNE/LEMMATISATION FOR ACL RD-TEC 1.0
// RECOMMENDED EVALUATION OF ACL RD-TEC 1.0 AS FOLLOWS
// createReportACLRD(null, ateOutputFolder, ateOutputType,
// gsFile, outFile,
// false, false, false,
// -1, -1,
// -1, -1,
// EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);
createReportACLRD(lemmatiser, ateOutputFolder, ateOutputType,
gsFile, outFile,
EVAL_CONDITION_IGNORE_SYMBOL, EVAL_CONDITION_IGNORE_DIGITS, EVAL_CONDITION_CASE_INSENSITIVE,
EVAL_CONDITION_CHAR_RANGE_MIN, EVAL_CONDITION_CHAR_RANGE_MAX,
EVAL_CONDITION_TOKEN_RANGE_MIN, EVAL_CONDITION_TOKEN_RANGE_MAX,
EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);
}
}
开发者ID:ziqizhang,项目名称:jate,代码行数:58,代码来源:Scorer.java
示例11: load
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
/**
* Loads the properties file from the specified filename, and instantiates any objects to be used, such as the lemmatiser and part-of-speech (pos)
* tagger
*
* @param filename
* @return An instance of {@link BannerProperties} which can be queried for configuration parameters
*/
public static BannerProperties load(String filename)
{
Properties properties = new Properties();
BannerProperties bannerProperties = new BannerProperties();
try {
properties.load(new FileInputStream(filename));
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
if (tokenizer.equals(NaiveTokenizer.class.getName()))
bannerProperties.tokenizer = new NaiveTokenizer();
else if (tokenizer.equals(SimpleTokenizer.class.getName()))
bannerProperties.tokenizer = new SimpleTokenizer();
else if (tokenizer.equals(BaseTokenizer.class.getName()))
bannerProperties.tokenizer = new BaseTokenizer();
else
throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
} catch (Exception e) {
throw new RuntimeException(e);
}
return bannerProperties;
}
开发者ID:leebird,项目名称:legonlp,代码行数:50,代码来源:BannerProperties.java
示例12: getLemmatiser
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
/**
* @return The lemmatiser ({@link EngLemmatiser}) to use for training and tagging
*/
public EngLemmatiser getLemmatiser() {
return lemmatiser;
}
开发者ID:leebird,项目名称:legonlp,代码行数:7,代码来源:BannerProperties.java
示例13: load
import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static BannerProperties load(String filename, String dataroot)
{
Properties properties = new Properties();
BannerProperties bannerProperties = new BannerProperties();
try {
properties.load(new FileInputStream(filename));
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(dataroot+lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(dataroot+posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(dataroot+posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
if (tokenizer.equals(NaiveTokenizer.class.getName()))
bannerProperties.tokenizer = new NaiveTokenizer();
else if (tokenizer.equals(SimpleTokenizer.class.getName()))
bannerProperties.tokenizer = new SimpleTokenizer();
else if (tokenizer.equals(BaseTokenizer.class.getName()))
bannerProperties.tokenizer = new BaseTokenizer();
else
throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
} catch (Exception e) {
throw new RuntimeException(e);
}
return bannerProperties;
}
开发者ID:BlueBrain,项目名称:bluima,代码行数:43,代码来源:BannerProperties.java
注:本文中的dragon.nlp.tool.lemmatiser.EngLemmatiser类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论