JAVA reads the PDF WORD document instance code
- 2020-07-21 08:01:15
- OfStack
Read the PDF file jar reference
<dependency>
<groupid>org.apache.pdfbox</groupid>
pdfbox</artifactid>
<version>1.8.13</version>
</dependency>
Read the WORD file jar reference
<dependency>
<groupid>org.apache.poi</groupid>
poi-scratchpad</artifactid>
<version>3.16-beta1</version>
</dependency>
<dependency>
<groupid>org.apache.poi</groupid>
poi</artifactid>
<version>3.16-beta1</version>
</dependency>
Read the WORD file method
/**
*
* @Title: getTextFromWord
* @Description: read word
* @param filePath
* The file path
* @return: String Read the Word The content of the
*/
public static String getTextFromWord(String filePath) {
String result = null;
File file = new File(filePath);
FileInputStream fis = null;
try {
fis = new FileInputStream(file);
@SuppressWarnings("resource")
WordExtractor wordExtractor = new WordExtractor(fis);
result = wordExtractor.getText();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (fis != null) {
try {
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
Read the PDF file method
/**
*
* @Title: getTextFromPdf
* @Description: read pdf The file content
* @param filePath
* @return: Read the pdf The content of the
*/
public static String getTextFromPdf(String filePath) {
String result = null;
FileInputStream is = null;
PDDocument document = null;
try {
is = new FileInputStream(filePath);
PDFParser parser = new PDFParser(is);
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(document);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
Hopefully, this example code will help you