Java는 POI를 사용하여 Word를 HTML로 변환합니다

저자：Eve Cole 업데이트 시간：2025-06-10 18:00:04

使用 poi 将 단어 转换为 html, doc, docx, 转换后可以保持图片、样式。

1. 包 Maven 导入

<pectionency> <groupId> org.apache.poi </groupid> <artifactid> poi </artifactid> <bersion> 3.14 </version> </fectionency> <groupdency> <groupid> org.apache.poi </groupid> <artifactid> poi-scratchpad </arevactid> 3.14 </version> 3.14 </version> </version> 3.14 <groupid> org.apache.poi </groupid> <artifactid> poi-ooxml </artifactid> <bersion> 3.14 </version> </fectionency> <pectionement> <groupid> fr.opensagres.xdocreport </groupid> <artifactid> xdocreport </arevactident> 1.0.6 <0.6 <0.6 </version> 1.0 <0.6 <groupid> org.apache.poi </groupid> <artifactid> poi-ooxml-schemas </artifactid> <bersion> 3.14 </version> </dependency> <prectionement> <groupid> org.apache.poi </groupid> <artifactid> ooxml-schemas </artifactid> 1.3 </version>

2. 转换代码

import org.apache.poi.hwpf.hwpfdocument; import org.apache.poi.hwpf.converter.wordtohtmlconverter; import org.apache.poi.xwpf.converter.core.basicuriresolver; import org.apache.poi.xwpf.converter.core.fileimageextractor; import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter; import org.apache.poi.xwpf.converter.xhtml.xhtmloptions; import org.apache.poi.xwpf.usermodel.xwpfdocument; import org.w3c.dom.document; import javax.xml.parsers.documentbuilderfactory; javax.xml.transform.outputkeys import; import javax.xml.transform.transformer; import javax.xml.transform.transformerFactory; import javax.xml.transform.dom.domsource; import javax.xml.transform.stream.streamresult; import java.io.file; import java.io.fileInputStream; import java.io.fileoutputStream; import java.io.outputStreamwriter; 공개 클래스 테스트 {// doc 转换为 html void doctohtml ()는 예외 {string sourcefilename = "c : //doc//test.doc"; 문자열 targetfilename = "c : //html//test.html"; 문자열 imagePathStr = "C : // html // image //"; hwpfdocument wordDocument = new hwpfdocument (new FileInputStream (sourceFilename)); 문서 문서 = DocumentBuilDerfactory.newinstance (). newDocumentBuilder (). newDocument (); WordToHtMlConverter WordTohtmlConverter = new WordToHtmlConverter (Document); // 保存图片, 保存图片 wordtohtmlconverter.setPicturesManager ((내용, pictureType, 이름, 너비, 높이) -> {try (fileoutputStream out = new FileOutputStream (imagePathStr + name)) {out.write (content);} {e.printstacktrace ();}); WordToHtMlConverter.ProcessDocument (WordDocument); 문서 htmldocument = wordtohtmlconverter.getDocument (); domsource domsource = new domsource (htmldocument); StreamResult StreamResult = new StreamResult (새 파일 (TargetFilename)); TransformerFactory tf = transformerFactory.newinstance (); 변압기 Serializer = tf.newtransformer (); Serializer.setoutPutProperty (outputKeys.encoding, "UTF-8"); Serializer.setOutputProperty (outputKeys.Indent, "Yes"); Serializer.setoutPutProperty (outputKeys.Method, "HTML"); Serializer.Transform (Domsource, StreamResult); } // docx/html public void docxtohtml ()는 예외 {string sourcefilename = "d : //ac/00.docx"; 문자열 targetfilename = "d : //ac//test.html"; 문자열 imagePathStr = "d : // ac // image //"; outputStreamWriter outputStreamWriter = null; try {xwpfdocument document = new xwpfdocument (new FileInputStream (sourceFilename)); xhtmloptions 옵션 = xhtmloptions.create (); // 存放图片的文件夹 옵션 .Setextractor (새 FileImageExtractor (새 파일 (imagePathStrs)); // html t 옵션 .URESOLVER (new BasicIRIRESOLVER ( "image")); outputStreamWriter = new OutputStreamWriter (new FileOutputStream (TargetFilename), "UTF-8"); XHTMLCONVERTER XHTMLCONVERTER = (XHTMLCONVERTER) XHTMLCONVERTER.getInstance (); xhtmlconverter.convert (문서, outputstreamwriter, 옵션); } 마침내 {if (outputStreamWriter! = null) {outputStreamWriter.close (); }}}

演示地址 : https://www.xiaoyun.studio/app/preview.html

以上就是本文的全部内容以上就是本文的全部内容, 希望对大家的学习有所帮助, 也希望大家多多支持武林网。