Java использует POI, чтобы преобразовать слово в HTML

Автор：Eve Cole Время обновления：2025-06-10 18:00:04

使用 poi 将 слово 转换为 html ，支持 doc, docx ，转换后可以保持图片、样式。

1. 导入 Maven 包

<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>xdocreport</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency>

2. 转换代码

Импорт org.apache.poi.hwpf.hwpfdocument; Import org.apache.poi.hwpf.converter.wordtohtmlConverter; Импорт org.apache.poi.xwpf.converter.core.basicuriresolver; Import org.apache.poi.xwpf.converter.core.fileimageExtractor; Import org.apache.poi.xwpf.converter.xhtml.xhtmlConverter; Import org.apache.poi.xwpf.converter.xhtml.xhtmloptions; Import org.apache.poi.xwpf.usermodel.xwpfdocument; Импорт org.w3c.dom.document; Импорт javax.xml.parsers.documentbuilderfactory; импортировать javax.xml.transform.outputkeys; импортировать javax.xml.transform.transformer; Импорт javax.xml.transform.transformerfactory; импортировать javax.xml.transform.dom.domsource; Импорт javax.xml.transform.stream.streamResult; Импорт java.io.file; импортировать java.io.fileinputstream; Импорт java.io.fileOutputStream; импортировать java.io.outputstreamwriter; открытый тест класса {// doc 转换为 html void doctohtml () throws exception {string sourcefilename = "c: //doc//test.doc"; String targetfilename = "c: //html//test.html"; String ImagePathstr = "c: // html // Image //"; Hwpfdocument worddocument = new hwpfdocument (new FileInputStream (SourceFilename)); Document Document = DocumentBuilderFactory.newinStance (). NewDocumentBuilder (). NewDocument (); Wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter (document); // 保存图片 ， 并返回图片的相对路径 wordtohtmlconverter.setpicturesmanager ((content, pictureType, name, width, height) -> {try (fileOutputStream out = new FileOutputStream (ImagePathstr + name)) {out.write (content);} catch (Exception e) {e.printStacktrace ();} return »;} watch (Exception e) {e.printStacktrace ();} stacht/stame; wordtohtmlconverter.processdocument (WordDocument); Документ htmldocument = wordtohtmlconverter.getDocument (); Domsource domsource = new domsource (htmldocument); StreamResult streamResult = new StreamResult (новый файл (TargetFilename)); Transformerffactory TF = TransformerFactory.newinStance (); Transformer serializer = tf.newTransformer (); serializer.setOutputProperty (outputKeys.encoding, "UTF-8"); serializer.setOutputProperty (outputKeys.indent, "Да"); serializer.setOutputProperty (outputKeys.method, "html"); serializer.transform (domsource, Streamresult); } // docx 转换为 html public void docxtohtml () бросает exception {string sourcefilename = "d: //ac//00.docx"; String TargetFilEname = "d: //ac//test.html"; String imagepathstr = "d: // ac // image //"; OutputStreamWriter outputStreamWriter = null; try {xwpfdocument document = new xwpfdocument (new FileInputStream (SourceFilename)); Xhtmloptions options = xhtmloptions.create (); // 存放图片的文件夹 Options.setExtractor (новый FileImageExtractor (новый файл (imagePathstr))); // html 中图片的路径 options.uriresolver (new Basicuriresolver ("Image")); outputStreamWriter = new outputStreamWriter (new FileOutputStream (TargetFilename), "UTF-8"); Xhtmlconverter xhtmlconverter = (xhtmlconverter) xhtmlconverter.getinstance (); xhtmlconverter.convert (документ, outputstreamwriter, options); } наконец {if (outputStreamWriter! = null) {outputStreamWriter.Close (); }}}

演示地址: https://www.xiaoyun.studio/app/preview.html

以上就是本文的全部内容，希望对大家的学习有所帮助也希望大家多多支持武林网。