使用 poi 将 palavra 转换为 html , 支持 doc, docx , 转换后可以保持图片、样式。
1. 导入 maven 包
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>xdocreport</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency>
2. 转换代码
importar org.apache.poi.hwpf.hwpfdocument; importar org.apache.poi.hwpf.converter.wordtohtmlconverter; importar org.apache.poi.xwpf.converter.core.basicuriresolver; importar org.apache.poi.xwpf.converter.core.fileImageExtractor; importar org.apache.poi.xwpf.converter.xhtml.xhtmlconverter; importar org.apache.poi.xwpf.converter.xhtml.xhtmloptions; importar org.apache.poi.xwpf.usermodel.xwpfdocument; importar org.w3c.dom.document; importar javax.xml.parsers.documentBuilderFactory; importar javax.xml.transform.outputkeys; importar javax.xml.transform.transformer; importar javax.xml.transform.transformerFactory; importar javax.xml.transform.dom.domsource; importar javax.xml.transform.stream.streamResult; importar java.io.file; importar java.io.fileInputStream; importar java.io.fileOutputStream; importar java.io.OutputStreamWriter; classe pública teste {// doc 转换为 html void doctohtml () lança exceção {string fontefilename = "c: //doc//test.doc"; String TargetFilename = "c: //html//test.html"; String imagepathstr = "c: // html // imagem //"; Hwpfdocument wordDocument = new hwpfdocument (new FileInputStream (SourceFileName)); Documento do documento = documentBuilderFactory.NewInstance (). NewDocumentBuilder (). NewDocument (); Wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter (document); // 保存图片 , 并返回图片的相对路径 wordtohtmlconverter.setPictureManager ((conteúdo, pictureType, nome, largura, altura) -> {try (fileOutputStream out = new FileOutputStream (imagePathstr + name)) {out.write (content);} catch (Exception e) {E.PrintSTACHTRACE (); wordtohtmlconverter.processdocument (wordDocument); Document htmldocument = wordtohtmlconverter.getDocument (); Domsource domsource = new DomSource (htmldocument); StreamResult StreamResult = new StreamResult (novo arquivo (TargetFileName)); TransformerFactory tf = transformerFactory.NewInstance (); Serializador de transformador = tf.newtransformer (); serializer.setOutputProperty (outputKeys.encoding, "UTF-8"); serializer.setOutputProperty (outputkeys.indent, "sim"); serializer.setOutputProperty (outputkeys.method, "html"); serializer.Transform (Domsource, StreamResult); } // docx 转换为 html public void dochohtml () lança exceção {string fontefilename = "d: //ac//00.docx"; String TargetFilename = "D: //ac//test.html"; String imagepathstr = "d: // ac // imagem //"; OutputStreamWriter outputStreamWriter = null; tente {xwpfdocument document = new xwpfdocument (new FileInputStream (SourceFileName)); Opções xhtmloptions = xhtmloptions.create (); // 存放图片的文件夹 options.setextractor (new FileImageExtractor (new File (ImagePathstr)); // html 中图片的路径 options.uriresolver (new Basicuriresolver ("Image")); outputStreamWriter = new OutputStreamWriter (new FileOutputStream (TargetFilename), "UTF-8"); XhtmlConverter xhtmlConverter = (xhtmlConverter) xhtmlconverter.getInstance (); xhtmlconverter.convert (documento, outputStreamWriter, opções); } finalmente {if (outputStreamWriter! = null) {outputStreamWriter.Close (); }}}演示地址: https://www.xiaoyun.studio/app/preview.html
以上就是本文的全部内容 , 希望对大家的学习有所帮助 , 也希望大家多多支持武林网。