本文整理汇总了Java中org.pdfbox.pdmodel.PDDocument类的典型用法代码示例。如果您正苦于以下问题:Java PDDocument类的具体用法?Java PDDocument怎么用?Java PDDocument使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PDDocument类属于org.pdfbox.pdmodel包,在下文中一共展示了PDDocument类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getWordsToHighlight
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
public String[] getWordsToHighlight(String[] highlightWords) {
NRC_PDFHighlighter hl;
CharArrayWriter xmlOutput = null;
String[] wordsToHighlight = null;
try {
hl = new NRC_PDFHighlighter();
PDDocument pdDocument = new PDDocument(document);
xmlOutput = new CharArrayWriter();
hl.generateXMLHighlight(pdDocument, highlightWords, xmlOutput);
wordsToHighlight = hl.getWordsToHighlight();
} catch (IOException e) {
e.printStackTrace();
}
return wordsToHighlight;
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:17,代码来源:NRC_PDFDocument.java
示例2: generateXMLHighlight
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* Generate an XML highlight string based on the PDF.
*
* @param pdDocument The PDF to find words in.
* @param sWords The words to search for.
* @param xmlOutput The resulting output xml file.
*
* @throws IOException If there is an error reading from the PDF, or writing to the XML.
*/
public void generateXMLHighlight(PDDocument pdDocument, String[] sWords, Writer xmlOutput ) throws IOException
{
String ls = System.getProperty("line.separator");
highlighterOutput = xmlOutput;
searchedWords = sWords;
foundWords = new Vector(); // initialization - vector filled in endPage()
highlighterOutput.write("<XML>"+ls+"<Body units=characters " +
//color and mode are not implemented by the highlight spec
//so don't include them for now
//" color=#" + getHighlightColorAsString() +
//" mode=active " + */
" version=2>"+ls+"<Highlight>");
highlighterOutput.write(ls);
textOS = new ByteArrayOutputStream();
textWriter = new OutputStreamWriter( textOS, "UTF-16" );
writeText(pdDocument, textWriter);
highlighterOutput.write("</Highlight>"+ls+"</Body>"+ls+"</XML>");
highlighterOutput.flush();
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:29,代码来源:NRC_PDFHighlighter.java
示例3: getPDFdocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* @param inputStream
* @param contentHandler
* @return
*/
private PDDocument getPDFdocument(InputStream inputStream, ContentHandler contentHandler) {
PDDocument doc = null;
// Create access to PDF Document
try {
// We get the document from the inputstream
doc = PDDocument.load(inputStream);
} catch (IOException e) {
logger.error("PDFParser(InputStream)", e);
doc = null; // We reset the object
// We write our some stuff into output document, so we have a
// chance to see what went wrong
addErrorTagToOutput(contentHandler, e.toString());
}
return doc;
}
开发者ID:evlist,项目名称:orbeon-forms,代码行数:23,代码来源:FromPdfConverter.java
示例4: textContentOf
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
private static String textContentOf(byte[] pdfData) throws IOException {
PDDocument pdfDocument = PDDocument.load(new ByteArrayInputStream(pdfData));
try {
return new PDFTextStripper().getText(pdfDocument);
} finally {
pdfDocument.close();
}
}
开发者ID:hmcts,项目名称:cmc-pdf-service,代码行数:9,代码来源:GeneratedPDFContentV2Test.java
示例5: getHighlightPositions
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
public OutputStreamWriter getHighlightPositions(String highlightWord, File filePath) {
NRC_PDFHighlighter hl;
OutputStreamWriter xmlOutput = null;
try {
hl = new NRC_PDFHighlighter();
PDDocument pdDocument = new PDDocument(document);
xmlOutput = new OutputStreamWriter(new FileOutputStream(filePath),"UTF-8");
hl.generateXMLHighlight(pdDocument, highlightWord, xmlOutput);
} catch (IOException e) {
e.printStackTrace();
}
return xmlOutput;
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:14,代码来源:NRC_PDFDocument.java
示例6: main
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* Command line application.
*
* @param args The command line arguments to the application.
*
* @throws IOException If there is an error generating the highlight file.
*/
public static void main(String[] args) throws IOException
{
NRC_PDFHighlighter xmlExtractor = new NRC_PDFHighlighter();
PDDocument doc = null;
try
{
if( args.length < 2 )
{
usage();
}
String[] highlightStrings = new String[ args.length - 1];
System.arraycopy( args, 1, highlightStrings, 0, highlightStrings.length );
doc = PDDocument.load( args[0] );
xmlExtractor.generateXMLHighlight(
doc,
highlightStrings,
new OutputStreamWriter( System.out ) );
}
finally
{
if( doc != null )
{
doc.close();
}
}
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:35,代码来源:NRC_PDFHighlighter.java
示例7: getText
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* This will return the text of a document. See writeText. <br />
* NOTE: The document must not be encrypted when coming into this method.
*
* @param doc The document to get the text from.
*
* @return The text of the PDF document.
*
* @throws IOException if the doc state is invalid or it is encrypted.
*/
public String getText (PDDocument doc) throws IOException
{
StringWriter outputStream = new StringWriter();
List ft = new ArrayList();
writeText( doc, outputStream, ft);
String fullText = null;
fullText = outputStream.toString();
return fullText;
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:21,代码来源:NRC_PDFTextStripperWithFonts.java
示例8: startDocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* This method is available for subclasses of this class. It will be called before processing
* of the document start.
*
* @param pdf The PDF document that is being processed.
* @throws IOException If an IO error occurs.
*/
protected void startDocument(PDDocument pdf) throws IOException
{
Iterator textIter = getCharactersByArticle().iterator();
guessTitle(textIter);
writeHeader();
pageNumber = 0;
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:15,代码来源:NRC_PDFText2XML.java
示例9: endDocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* @see PDFTextStripper#endDocument( PDDocument )
*/
public void endDocument(PDDocument pdf) throws IOException
{
output.write("</body>");
output.write("</xmlstream>");
output.flush();
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:10,代码来源:NRC_PDFText2XML.java
示例10: toPDDocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
public PDDocument toPDDocument() throws CryptographyException, InvalidPasswordException, IOException {
PDDocument doc;
if(barr!=null)
doc= PDDocument.load(new ByteArrayInputStream(barr,0,barr.length));
else if(resource instanceof FileResource)
doc= PDDocument.load((File)resource);
else
doc= PDDocument.load(new ByteArrayInputStream(IOUtil.toBytes(resource),0,barr.length));
if(password!=null)doc.decrypt(password);
return doc;
}
开发者ID:lucee,项目名称:Lucee4,代码行数:16,代码来源:PDFDocument.java
示例11: extractText
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
public static Object extractText(PDFDocument doc, Set<Integer> pageNumbers) throws IOException, CryptographyException, InvalidPasswordException {
PDDocument pdDoc = doc.toPDDocument();
//PDPageNode pages = pdDoc.getDocumentCatalog().getPages();
//pages.
//pdDoc.getDocumentCatalog().
/*Iterator<Integer> it = pageNumbers.iterator();
int p;
while(it.hasNext()){
p=it.next().intValue();
pdDoc.getDocumentCatalog().getPages()
}
*/
//print.o(pages);
//pdDoc.
//PDFTextStripperByArea stripper = new PDFTextStripperByArea();
//PDFHighlighter stripper = new PDFHighlighter();
PDFText2HTML stripper = new PDFText2HTML();
//PDFTextStripper stripper = new PDFTextStripper();
StringWriter writer = new StringWriter();
stripper.writeText(pdDoc, writer);
return writer.toString();
}
开发者ID:lucee,项目名称:Lucee4,代码行数:33,代码来源:PDFUtil.java
示例12: getDocumentInformation
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* @param doc
* @return PDFDocumentInformation
*/
private PDDocumentInformation getDocumentInformation(PDDocument doc, ContentHandler contentHandler) {
PDDocumentInformation tmpInfo = null;
try {
tmpInfo = doc.getDocumentInformation();
} catch (Exception e) {
logger.error(e);
addErrorTagToOutput(contentHandler, e.toString());
}
return tmpInfo;
}
开发者ID:evlist,项目名称:orbeon-forms,代码行数:15,代码来源:FromPdfConverter.java
示例13: addPageCountAttribute
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* @param atts
* @param doc
*/
private void addPageCountAttribute(AttributesImpl atts, PDDocument doc) {
int pageCount = 0; //The number of pages in this document
try {
pageCount = doc.getPageCount();
} catch (IOException e) {
logger.error(e);
pageCount = 0;
}
if (pageCount > 0) {
atts.addAttribute("", ATT_PAGES, ATT_PAGES, ATT_CDATA, String.valueOf(pageCount));
}
}
开发者ID:evlist,项目名称:orbeon-forms,代码行数:18,代码来源:FromPdfConverter.java
示例14: endDocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* @see PDFTextStripper#endDocument( PDDocument )
*/
public void endDocument(PDDocument pdf) throws IOException
{
output.write("</body></html>");
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:8,代码来源:NRC_PDFText2HTML.java
示例15: getText
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* This will return the text of a document. See writeText. <br />
* NOTE: The document must not be encrypted when coming into this method.
*
* @param doc The document to get the text from.
*
* @return The text of the PDF document.
*
* @throws IOException if the doc state is invalid or it is encrypted.
*/
public Object[][] getText( PDDocument doc ) throws IOException
{
StringWriter outputStream = new StringWriter();
List ft = new ArrayList();
writeText( doc, outputStream, ft);
return (Object[][])ft.toArray(new Object[][]{});
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:18,代码来源:NRC_PDFFonttedTextStripper.java
示例16: getText
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* This will return the text of a document. See writeText. <br />
* NOTE: The document must not be encrypted when coming into this method.
*
* @param doc The document to get the text from.
*
* @return The text of the PDF document.
*
* @throws IOException if the doc state is invalid or it is encrypted.
*/
public String getText( PDDocument doc ) throws IOException
{
StringWriter outputStream = new StringWriter();
writeText( doc, outputStream );
return outputStream.toString();
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:17,代码来源:NRC_PDFTextStripper.java
示例17: parseDocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* This will parse a PDF document.
*
* @param input The input stream for the document.
* @return The document.
* @throws IOException If there is an error parsing the document.
*/
private static PDDocument parseDocument(InputStream input) throws IOException {
PDFParser parser = new PDFParser(input);
parser.parse();
return parser.getPDDocument();
}
开发者ID:NCAR,项目名称:joai-project,代码行数:13,代码来源:PageDesc.java
示例18: writeText
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* @deprecated
* @see PDFFonttedTextStripper#writeText( PDDocument, Writer )
* @param doc The document to extract the text.
* @param outputStream The stream to write the text to.
* @throws IOException If there is an error extracting the text.
*/
public void writeText( COSDocument doc, Writer outputStream, List ft ) throws IOException
{
writeText( new PDDocument( doc ), outputStream, ft );
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:12,代码来源:NRC_PDFFonttedTextStripper.java
示例19: startDocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* This method is available for subclasses of this class. It will be called before processing
* of the document start.
*
* @param pdf The PDF document that is being processed.
* @throws IOException If an IO error occurs.
*/
protected void startDocument(PDDocument pdf) throws IOException
{
// no default implementation, but available for subclasses
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:12,代码来源:NRC_PDFFonttedTextStripper.java
示例20: endDocument
import org.pdfbox.pdmodel.PDDocument; //导入依赖的package包/类
/**
* This method is available for subclasses of this class. It will be called after processing
* of the document finishes.
*
* @param pdf The PDF document that is being processed.
* @throws IOException If an IO error occurs.
*/
protected void endDocument(PDDocument pdf ) throws IOException
{
// no default implementation, but available for subclasses
}
开发者ID:LowResourceLanguages,项目名称:InuktitutComputing,代码行数:12,代码来源:NRC_PDFFonttedTextStripper.java
注:本文中的org.pdfbox.pdmodel.PDDocument类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论