java实现word文件转html(图片用base64转化)


1.添加需要的jar包:

        <dependency>
            <groupId>fr.opensagres.xdocreportgroupId>
            <artifactId>fr.opensagres.xdocreport.documentartifactId>
            <version>2.0.1version>
        dependency>
        <dependency>
            <groupId>org.apache.poigroupId>
            <artifactId>poiartifactId>
            <version>3.15version>
        dependency>
        <dependency>
            <groupId>org.apache.poigroupId>
            <artifactId>poi-scratchpadartifactId>
            <version>3.15version>
        dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreportgroupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.xhtmlartifactId>
            <version>2.0.1version>
        dependency>

2.来一个小demo吧。

对于该demo,描述几个我觉得需要注意的点:

2.1:不知道有没有小伙伴发生了jar包冲突的现象呢,可以考虑修改一下jar包版本号哦,基本上应该没什么问题呢;

2.2:word文档的后缀有.doc和.docx,需要知道转换的方法不是一样的。所以,对于不同的文档,我们需要知道其文档后缀是什么,才能进行下一步操作;

2.3:此demo,我选择通过接口直接返回动态的html,当然,如果想生成一个静态的html,可以自己修改输出方式;

2.4:对于文档中涉及到图片如何转化的问题,暂时选择用base64转码到html中

2.5:最后:此demo中测试转化的文档,目前只测试了简单的文本加图片,所以可能有别的问题待发现并解决。

    /**
     * 将word转成html
     *
     * @param id
     * @return
     * @throws Exception
     */
    @ApiOperation(value = "将word转成html")
    @GetMapping(value = "/convertWordToHtml")
    public void convertWordToHtml(@RequestParam(required = true) String id, HttpServletResponse httpServletResponse) throws Exception {
        demoService.convertWordToHtml(id, httpServletResponse);
    }

   //此处省略部分不重要的代码哈,只需将需要转化的文档转成inputStream。
   InputStream inputStream = null;
   OutputStream outputStream = httpServletResponse.getOutputStream();

   /**
   * 将 docx 转成 html
   *
   * @param outputStream 输出流
    * @throws Exception
    */
   public static void convertDocxFileToHtml(OutputStream outputStream) throws Exception {
   //创建操作word的对象
 XWPFDocument document = new XWPFDocument(inputStream);
 XHTMLOptions options = XHTMLOptions.create();
 options.setIgnoreStylesIfUnused(false);
 options.setFragment(true);
//图片用base64转化
options.setImageManager(new Base64EmbedImgManager());
 //转化成HTML
  XHTMLConverter.getInstance().convert(document, outputStream, options);
outputStream.flush();
outputStream.close();
inputStream.close();
   }

   /**
   * 将 doc 转成 html
   *
   * @param outputStream 输出流
    * @throws Exception
    */
   public static void convertDocFileToHtml(OutputStream outputStream) throws Exception {
      //ps:当inputStream!=null,而生成wordDocument报错,请检查文档是否用office word保存的
      HWPFDocument wordDocument = (HWPFDocument) WordToHtmlUtils.loadDoc(inputStream);
      WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
      DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
      );
      //将图片转成base64的格式
      PicturesManager pictureRunMapper = (bytes, pictureType, s, v, v1) -> "data:image/png;base64," + Base64.encodeBase64String(bytes);
      wordToHtmlConverter.setPicturesManager(pictureRunMapper);
      //解析word文档
      wordToHtmlConverter.processDocument(wordDocument);
      Document htmlDocument = wordToHtmlConverter.getDocument();
      DOMSource domSource = new DOMSource(htmlDocument);
      StreamResult streamResult = new StreamResult(outputStream);
      TransformerFactory factory = TransformerFactory.newInstance();
      Transformer serializer = factory.newTransformer();
      serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
      serializer.setOutputProperty(OutputKeys.INDENT, "yes");
      serializer.setOutputProperty(OutputKeys.METHOD, "html");
      serializer.transform(domSource, streamResult);
}


相关