Leader:
During the distribution process, Java is often used to convert office series documents into PDFs, and generally use openoffice+jodconverter provided by Microsoft to convert documents.
Openoffice has both Windows version and Linux version. Don't worry about the production environment being a Linux system.
1. Openoffice depends on jar, take maven as an example:
<dependency> <groupId>com.artofsolving</groupId> <artifactId>jodconverter</artifactId> <version>2.2.1</version> </dependency> <dependency> <groupId>org.openoffice</groupId> <artifactId>jurt</artifactId> <version>3.0.1</version> </dependency> <dependency> <groupId>org.openoffice</groupId> <artifactId>ridl</artifactId> <version>3.0.1</version> </dependency> <dependency> <groupId>org.openoffice</groupId> <artifactId>juh</artifactId> <version>3.0.1</version> </dependency> <dependency> <groupId>org.openoffice</groupId> <artifactId>unoil</artifactId> <version>3.0.1</version> </dependency> <!--jodconverter2.2.1 must rely on slf4j-jdk14, otherwise the logs will report an error in the source code. It is a very low problem--> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-jdk14</artifactId> <version>1.4.3</version> </dependency>
2. To directly upload the code, you need to listen to the 8100 port of the openoffice application.
public void convert(File sourceFile, File targetFile) { try { // 1: Open the connection OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100); connection.connect(); DocumentConverter converter = new OpenOfficeDocumentConverter(connection); // 2: Get Format DocumentFormatRegistry factory = new BasicDocumentFormatRegistry(); DocumentFormat inputDocumentFormat = factory .getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath())); DocumentFormat outputDocumentFormat = factory .getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath())); // 3: Execute the conversion converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat); } catch (ConnectException e) { log.info("Document conversion failed"); } }3. Note: jodconverter will report an error after converting the xxx.docx document after the 2007 version. Everyone knows that the 03 suffix is xxx.doc. The 07 version will be xxx.docx.
Checking the jodconverter source code, it is found that documentFormat does not support xxx.docx format BasicDocumentFormatRegistry public DocumentFormat getFormatByFileExtension(String extension) default support is to use doc format
BasicDocumentFormatRegistry class source code
// // JODConverter - Java OpenDocument Converter // Copyright (C) 2004-2007 - Mirko Nasato <[email protected]> // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class BasicDocumentFormatRegistry implements DocumentFormatRegistry { private List/*<DocumentFormat>*/ documentFormats = new ArrayList(); public void addDocumentFormat(DocumentFormat documentFormat) { documentFormats.add(documentFormat); } protected List/*<DocumentFormat>*/ getDocumentFormats() { return documentFormats; } /** * @param extension the file extension * @return the DocumentFormat for this extension, or null if the extension is not mapped */ public DocumentFormat getFormatByFileExtension(String extension) { if (extension == null) { return null; } String lowerExtension = extension.toLowerCase(); for (Iterator it = documentFormats.iterator(); it.hasNext();) { DocumentFormat format = (DocumentFormat) it.next(); if (format.getFileExtension().equals(lowerExtension)) { return format; } } return null; } public DocumentFormat getFormatByMimeType(String mimeType) { for (Iterator it = documentFormats.iterator(); it.hasNext();) { DocumentFormat format = (DocumentFormat) it.next(); if (format.getMimeType().equals(mimeType)) { return format; } } return null; } }
The file formats supported in the default implementation class DefaultDocumentFormatRegistry are as follows
// // JODConverter - Java OpenDocument Converter // Copyright (C) 2004-2007 - Mirko Nasato <[email protected]> // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry { public DefaultDocumentFormatRegistry() { final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf"); pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export"); pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export"); pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export"); pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export"); addDocumentFormat(pdf); final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf"); swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export"); swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export"); addDocumentFormat(swf); final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml"); xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File"); xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File"); xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File"); addDocumentFormat(xhtml); // HTML is treated as Text when supplied as input, but as an output it is also // available for exporting Spreadsheet and Presentation formats final DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html"); html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export"); html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)"); html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)"); addDocumentFormat(html); final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt"); odt.setExportFilter(DocumentFamily.TEXT, "writer8"); addDocumentFormat(odt); final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw"); sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)"); addDocumentFormat(sxw); final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc"); doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97"); addDocumentFormat(doc); final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf"); rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format"); addDocumentFormat(rtf); final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd"); addDocumentFormat(wpd); final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt"); // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed txt.setImportOption("FilterName", "Text"); txt.setExportFilter(DocumentFamily.TEXT, "Text"); addDocumentFormat(txt); final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki"); wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki"); addDocumentFormat(wikitext); final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods"); ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8"); addDocumentFormat(ods); final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc"); sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)"); addDocumentFormat(sxc); final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls"); xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97"); addDocumentFormat(xls); final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv"); csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); csv.setImportOption("FilterOptions", "44,34,0"); // Field Separator: ','; Text Delimiter: '"' csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0"); addDocumentFormat(csv); final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv"); tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); tsv.setImportOption("FilterOptions", "9,34,0"); // Field Separator: '/t'; Text Delimiter: '"' tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0"); addDocumentFormat(tsv); final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp"); odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8"); addDocumentFormat(odp); final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi"); sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)"); addDocumentFormat(sxi); final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt"); ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97"); addDocumentFormat(ppt); final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg"); odg.setExportFilter(DocumentFamily.DRAWING, "draw8"); addDocumentFormat(odg); final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg"); svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export"); addDocumentFormat(svg); } }
Solution: Rewrite the public DocumentFormat getFormatByFileExtension(String extension) method in BasicDocumentFormatRegistry class. As long as the suffix name contains doc, use doc's documentFormat format
// // JODConverter - Java OpenDocument Converter // Copyright (C) 2004-2007 - Mirko Nasato <[email protected]> // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * Rewrite BasicDocumentFormatRegistry Document Format* @author HuGuangJun */ public class BasicDocumentFormatRegistry implements DocumentFormatRegistry { private List/* <DocumentFormat> */ documentFormats = new ArrayList(); public void addDocumentFormat(DocumentFormat documentFormat) { documentFormats.add(documentFormat); } protected List/* <DocumentFormat> */ getDocumentFormats() { return documentFormats; } /** * @param extension * the file extension * @return the DocumentFormat for this extension, or null if the extension * is not mapped */ public DocumentFormat getFormatByFileExtension(String extension) { if (extension == null) { return null; } //Unified file name suffix if (extension.indexOf("doc") >= 0) { extension = "doc"; } if (extension.indexOf("ppt") >= 0) { extension = "ppt"; } if (extension.indexOf("xls") >= 0) { extension = "xls"; } String lowerExtension = extension.toLowerCase(); for (Iterator it = documentFormats.iterator(); it.hasNext();) { DocumentFormat format = (DocumentFormat) it.next(); if (format.getFileExtension().equals(lowerExtension)) { return format; } } return null; } public DocumentFormat getFormatByMimeType(String mimeType) { for (Iterator it = documentFormats.iterator(); it.hasNext();) { DocumentFormat format = (DocumentFormat) it.next(); if (format.getMimeType().equals(mimeType)) { return format; } } return null; } }
The above is all the content of this article. I hope it will be helpful to everyone's learning and I hope everyone will support Wulin.com more.