java calls openoffice's sample method to convert the office family of documents to PDF

  • 2020-11-26 18:49:03
  • OfStack

Leading:

During development, java is often used to convert office series documents to PDF, and openoffice+jodconverter provided by Microsoft is generally used to convert documents.

openoffice is available in both windows and linux versions. Don't worry about the production environment being an linux system.

1. openoffice relies on jar, taking maven as an example:


<dependency> 
      <groupId>com.artofsolving</groupId> 
      <artifactId>jodconverter</artifactId> 
      <version>2.2.1</version> 
    </dependency> 
    <dependency> 
      <groupId>org.openoffice</groupId> 
      <artifactId>jurt</artifactId> 
      <version>3.0.1</version> 
    </dependency> 
    <dependency> 
      <groupId>org.openoffice</groupId> 
      <artifactId>ridl</artifactId> 
      <version>3.0.1</version> 
    </dependency> 
    <dependency> 
      <groupId>org.openoffice</groupId> 
      <artifactId>juh</artifactId> 
      <version>3.0.1</version> 
    </dependency> 
    <dependency> 
      <groupId>org.openoffice</groupId> 
      <artifactId>unoil</artifactId> 
      <version>3.0.1</version> 
    </dependency> 
 
    <!--jodconverter2.2.1 Have to rely on slf4j-jdk14 Must this version, otherwise the source log will report an error, very low the 1 A question --> 
    <dependency> 
      <groupId>org.slf4j</groupId> 
      <artifactId>slf4j-jdk14</artifactId> 
      <version>1.4.3</version> 
    </dependency> 

2. Directly convert the code, you need to listen to port 8100 of openoffice application.


public void convert(File sourceFile, File targetFile) { 
 
  try { 
    // 1:  Open the connection  
    OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100); 
    connection.connect(); 
 
    DocumentConverter converter = new OpenOfficeDocumentConverter(connection); 
    // 2: To obtain Format 
    DocumentFormatRegistry factory = new BasicDocumentFormatRegistry(); 
    DocumentFormat inputDocumentFormat = factory 
        .getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath())); 
    DocumentFormat outputDocumentFormat = factory 
        .getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath())); 
    // 3: Perform the conversion  
    converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat); 
  } catch (ConnectException e) { 
    log.info(" Document conversion PDF failure "); 
  } 
} 

3. Note: after converting the jodconverter version 2007, the ES31en. docx document will report an error, because we all specify the suffix of 03 xxx. doc version 07 after xxx. docx

jodconverter source code found that documentFormat does not support ES41en. docx format BasicDocumentFormatRegistry public DocumentFormat getFormatByFileExtension(String extension) default support is to use doc format

BasicDocumentFormatRegistry class source


// 
// JODConverter - Java OpenDocument Converter 
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com> 
// 
// This library is free software; you can redistribute it and/or 
// modify it under the terms of the GNU Lesser General Public 
// License as published by the Free Software Foundation; either 
// version 2.1 of the License, or (at your option) any later version. 
// 
// This library is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// Lesser General Public License for more details. 
// http://www.gnu.org/copyleft/lesser.html 
// 
package com.artofsolving.jodconverter; 
 
import java.util.ArrayList; 
import java.util.Iterator; 
import java.util.List; 
 
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry { 
 
  private List/*<DocumentFormat>*/ documentFormats = new ArrayList(); 
 
  public void addDocumentFormat(DocumentFormat documentFormat) { 
    documentFormats.add(documentFormat); 
  } 
 
  protected List/*<DocumentFormat>*/ getDocumentFormats() { 
    return documentFormats; 
  } 
 
  /** 
   * @param extension the file extension 
   * @return the DocumentFormat for this extension, or null if the extension is not mapped 
   */ 
  public DocumentFormat getFormatByFileExtension(String extension) { 
    if (extension == null) { 
      return null; 
    } 
    String lowerExtension = extension.toLowerCase(); 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next();    
      if (format.getFileExtension().equals(lowerExtension)) { 
        return format; 
      } 
    } 
    return null; 
  } 
 
  public DocumentFormat getFormatByMimeType(String mimeType) { 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next();    
      if (format.getMimeType().equals(mimeType)) { 
        return format; 
      } 
    } 
    return null; 
  } 
} 

The file formats supported in DefaultDocumentFormatRegistry, the default implementation class for BasicDocumentFormatRegistry, are as follows


// 
// JODConverter - Java OpenDocument Converter 
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com> 
// 
// This library is free software; you can redistribute it and/or 
// modify it under the terms of the GNU Lesser General Public 
// License as published by the Free Software Foundation; either 
// version 2.1 of the License, or (at your option) any later version. 
// 
// This library is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// Lesser General Public License for more details. 
// http://www.gnu.org/copyleft/lesser.html 
// 
package com.artofsolving.jodconverter; 
 
public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry { 
 
  public DefaultDocumentFormatRegistry() { 
    final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf"); 
    pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export"); 
    pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export"); 
    pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export"); 
    pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export"); 
    addDocumentFormat(pdf); 
     
    final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf"); 
    swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export"); 
    swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export"); 
    addDocumentFormat(swf); 
     
    final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml"); 
    xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File"); 
    xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File"); 
    xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File"); 
    addDocumentFormat(xhtml); 
 
    // HTML is treated as Text when supplied as input, but as an output it is also 
    // available for exporting Spreadsheet and Presentation formats 
    final DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html"); 
    html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export"); 
    html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)"); 
    html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)"); 
    addDocumentFormat(html); 
     
    final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt"); 
    odt.setExportFilter(DocumentFamily.TEXT, "writer8"); 
    addDocumentFormat(odt); 
 
    final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw"); 
    sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)"); 
    addDocumentFormat(sxw); 
 
    final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc"); 
    doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97"); 
    addDocumentFormat(doc); 
 
    final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf"); 
    rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format"); 
    addDocumentFormat(rtf); 
 
    final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd"); 
    addDocumentFormat(wpd); 
 
    final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt"); 
    // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog 
    // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed 
    txt.setImportOption("FilterName", "Text"); 
    txt.setExportFilter(DocumentFamily.TEXT, "Text"); 
    addDocumentFormat(txt); 
 
    final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki"); 
    wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki"); 
    addDocumentFormat(wikitext); 
     
    final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods"); 
    ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8"); 
    addDocumentFormat(ods); 
 
    final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc"); 
    sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)"); 
    addDocumentFormat(sxc); 
 
    final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls"); 
    xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97"); 
    addDocumentFormat(xls); 
 
    final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv"); 
    csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); 
    csv.setImportOption("FilterOptions", "44,34,0"); // Field Separator: ','; Text Delimiter: '"'  
    csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); 
    csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0"); 
    addDocumentFormat(csv); 
 
    final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv"); 
    tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); 
    tsv.setImportOption("FilterOptions", "9,34,0"); // Field Separator: '\t'; Text Delimiter: '"' 
    tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); 
    tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0"); 
    addDocumentFormat(tsv); 
 
    final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp"); 
    odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8"); 
    addDocumentFormat(odp); 
 
    final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi"); 
    sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)"); 
    addDocumentFormat(sxi); 
 
    final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt"); 
    ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97"); 
    addDocumentFormat(ppt); 
     
    final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg"); 
    odg.setExportFilter(DocumentFamily.DRAWING, "draw8"); 
    addDocumentFormat(odg); 
     
    final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg"); 
    svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export"); 
    addDocumentFormat(svg); 
  } 
} 

Solution: Override the public DocumentFormat getFormatByFileExtension(String extension) method in the BasicDocumentFormatRegistry class to use documentFormat document format of doc as long as the suffix contains doc


// 
// JODConverter - Java OpenDocument Converter 
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com> 
// 
// This library is free software; you can redistribute it and/or 
// modify it under the terms of the GNU Lesser General Public 
// License as published by the Free Software Foundation; either 
// version 2.1 of the License, or (at your option) any later version. 
// 
// This library is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// Lesser General Public License for more details. 
// http://www.gnu.org/copyleft/lesser.html 
// 
package com.artofsolving.jodconverter; 
 
import java.util.ArrayList; 
import java.util.Iterator; 
import java.util.List; 
 
/** 
 *  rewrite  BasicDocumentFormatRegistry  Document format  
 * @author HuGuangJun 
 */ 
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry { 
 
  private List/* <DocumentFormat> */ documentFormats = new ArrayList(); 
 
  public void addDocumentFormat(DocumentFormat documentFormat) { 
    documentFormats.add(documentFormat); 
  } 
 
  protected List/* <DocumentFormat> */ getDocumentFormats() { 
    return documentFormats; 
  } 
 
  /** 
   * @param extension 
   *      the file extension 
   * @return the DocumentFormat for this extension, or null if the extension 
   *     is not mapped 
   */ 
  public DocumentFormat getFormatByFileExtension(String extension) { 
    if (extension == null) { 
      return null; 
    } 
    // Unify filename suffixes 1 conversion  
    if (extension.indexOf("doc") >= 0) { 
      extension = "doc"; 
    } 
    if (extension.indexOf("ppt") >= 0) { 
      extension = "ppt"; 
    } 
    if (extension.indexOf("xls") >= 0) { 
      extension = "xls"; 
    } 
    String lowerExtension = extension.toLowerCase(); 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next(); 
      if (format.getFileExtension().equals(lowerExtension)) { 
        return format; 
      } 
    } 
    return null; 
  } 
 
  public DocumentFormat getFormatByMimeType(String mimeType) { 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next(); 
      if (format.getMimeType().equals(mimeType)) { 
        return format; 
      } 
    } 
    return null; 
  } 
} 

Related articles: