java word轉pdf openoffice aspose ...

背景

　　之前一直是用戶點擊下載word文件到本地，然後使用office或者wps打開。需求優化，要實現可以直接線上預覽，無需下載到本地然後再打開。

　　隨後開始上網找資料，網上資料一大堆，方案也各有不同，大概有這麼幾種方案：

　　1.word轉html然後轉pdf

　　2.Openoffice + swftools + Flexmapper + jodconverter

　　3.kkFileView

　　分析之後最後決定使用Openoffice+PDF.js方式實現

環境搭建

　　1.安裝Openoffice，下載地址：http://www.openoffice.org/download/index.html

　　安裝完成之後，cmd進入安裝目錄執行命令：soffice "-accept=socket,host=localhost,port=8100;urp;StarOffice.ServiceManager" -nologo -headless -nofirststartwizard

　　2.PDF.js，下載地址：http://mozilla.github.io/pdf.js/

　　下載之後解壓，目錄結構如下：

代碼實現

　　編碼方面，分前端後：

　　後端：java後端使用openoffice把word文檔轉換成pdf文件，返迴流

　　前端：把PDF.js解壓後的文件加到項目中，修改對應路徑，PDF.js拿到後端返回的流直接展示

後端

　　項目使用springboot，pom文件添加依賴

<!-- openoffice word轉pdf -->
        <dependency>
            <groupId>com.artofsolving</groupId>
            <artifactId>jodconverter</artifactId>
            <version>2.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.openoffice</groupId>
            <artifactId>jurt</artifactId>
            <version>3.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.openoffice</groupId>
            <artifactId>ridl</artifactId>
            <version>3.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.openoffice</groupId>
            <artifactId>juh</artifactId>
            <version>3.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.openoffice</groupId>
            <artifactId>unoil</artifactId>
            <version>3.0.1</version>
        </dependency>

　　application.properties配置openoffice服務地址與埠

openoffice.host=127.0.0.1
openoffice.port=8100

　　doc文件轉pdf文件

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.ConnectException;

import javax.servlet.http.HttpServletResponse;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;

import com.xxx.utils.Doc2PdfUtil;

@Controller
@RequestMapping("/doc2PdfController")
public class Doc2PdfController {
    @Value("${openoffice.host}")
    private String OpenOfficeHost;
    @Value("${openoffice.port}")
    private Integer OpenOfficePort;
    
    private Logger logger = LoggerFactory.getLogger(Doc2PdfController.class);
    
    @RequestMapping("/doc2pdf")
    public void doc2pdf(String fileName,HttpServletResponse response){
        File pdfFile = null;
        OutputStream outputStream = null;
        BufferedInputStream bufferedInputStream = null;
        
        Doc2PdfUtil doc2PdfUtil = new Doc2PdfUtil(OpenOfficeHost, OpenOfficePort);
        
        try {
            //doc轉pdf，返回pdf文件
            pdfFile = doc2PdfUtil.doc2Pdf(fileName);
            outputStream = response.getOutputStream();
            response.setContentType("application/pdf;charset=UTF-8");  
            bufferedInputStream = new BufferedInputStream(new FileInputStream(pdfFile));  
            byte buffBytes[] = new byte[1024];  
            outputStream = response.getOutputStream();  
            int read = 0;    
            while ((read = bufferedInputStream.read(buffBytes)) != -1) {    
                outputStream.write(buffBytes, 0, read);    
            }
        } catch (ConnectException e) {
            logger.info("****調用Doc2PdfUtil doc轉pdf失敗****");
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }  finally {
            if(outputStream != null){
                try {
                    outputStream.flush();
                    outputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }    
            }
            if(bufferedInputStream != null){
                try {
                    bufferedInputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

import java.io.File;
import java.net.ConnectException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.StreamOpenOfficeDocumentConverter;

public class Doc2PdfUtil {
    private String OpenOfficeHost; //openOffice服務地址
    private Integer OpenOfficePort; //openOffice服務埠
    
    public Doc2PdfUtil(){
    }

    public Doc2PdfUtil(String OpenOfficeHost, Integer OpenOfficePort){
        this.OpenOfficeHost = OpenOfficeHost;
        this.OpenOfficePort = OpenOfficePort;
    }
    
    private Logger logger = LoggerFactory.getLogger(Doc2PdfUtil.class);
    
    /**
     * doc轉pdf
     * @return pdf文件路徑
     * @throws ConnectException
     */
    public File doc2Pdf(String fileName) throws ConnectException{
        File docFile = new File(fileName + ".doc");
        File pdfFile = new File(fileName + ".pdf");
        if (docFile.exists()) {
            if (!pdfFile.exists()) {
                OpenOfficeConnection connection = new SocketOpenOfficeConnection(OpenOfficeHost, OpenOfficePort);
                try {
                    connection.connect();
                    DocumentConverter converter = new StreamOpenOfficeDocumentConverter(connection);
                    //最核心的操作，doc轉pdf
                    converter.convert(docFile, pdfFile);
                    connection.disconnect();
                    logger.info("****pdf轉換成功，PDF輸出：" + pdfFile.getPath() + "****");
                } catch (java.net.ConnectException e) {
                    logger.info("****pdf轉換異常，openoffice服務未啟動！****");
                    e.printStackTrace();
                    throw e;
                } catch (com.artofsolving.jodconverter.openoffice.connection.OpenOfficeException e) {
                    System.out.println("****pdf轉換器異常，讀取轉換文件失敗****");
                    e.printStackTrace();
                    throw e;
                } catch (Exception e) {
                    e.printStackTrace();
                    throw e;
                }
            }
        } else {
            logger.info("****pdf轉換異常，需要轉換的doc文檔不存在，無法轉換****");
        }
        return pdfFile;
    }
}

前端

　　把pdfjs-2.0.943-dist下的兩個文件夾build、web整體加到項目中，然後把viewer.html改成viewer.jsp，並調整了位置，去掉了預設的pdf文件compressed.tracemonkey-pldi-09.pdf，將來使用我們生成的文件

　　viewer.jsp、viewer.js註意點：

　　1.引用的js、css路徑要修改過來

　　2.viewer.jsp中調用pdf/web/viewer.js，viewer.js中配置了預設的pdf文件路徑，我們要動態生成pdf，因此需要修改，在jsp中定義一個參數DEFAULT_URL，然後在js中使用它

　　3.jsp中寫了一個ajax獲取pdf流，之後賦值給DEFAULT_URL，然後再讓viewer.js去載入，因此需要把/pdf/web/viewer.js放到ajax方法後面

　　4.viewer.js中把compressed.tracemonkey-pldi-09.pdf改成我們定義的變數DEFAULT_URL；pdf.worker.js的路徑修改成對應路徑

<%@ page language="java" contentType="text/html; charset=utf-8"
    pageEncoding="utf-8"%>
<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<!DOCTYPE html>
<!--
Copyright 2012 Mozilla Foundation

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Adobe CMap resources are covered by their own copyright but the same license:

    Copyright 1990-2015 Adobe Systems Incorporated.

See https://github.com/adobe-type-tools/cmap-resources
-->
<html dir="ltr" mozdisallowselectionprint>
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
    <meta name="google" content="notranslate">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <c:set var="qtpath" value="${pageContext.request.contextPath}"/>
    <script>
        var qtpath = '${qtpath}';
        var fileName = '${fileName}';
    </script>
    
    <title>PDF.js viewer</title>


    <link rel="stylesheet" href="${qtpath}/res/pdf/web/viewer.css">


<!-- This snippet is used in production (included from viewer.html) -->
<link rel="resource" type="application/l10n" href="${qtpath}/res/pdf/web/locale/locale.properties">
<script type="text/javascript" src="${qtpath}/res/js/jquery/jquery-2.1.4.min.js"></script>
<script type="text/javascript">
    var DEFAULT_URL = "";//註意，刪除的變數在這裡重新定義  
    var PDFData = "";  
    $.ajax({  
        type:"post",  
        async:false,  //
        mimeType: 'text/plain; charset=x-user-defined',  
        url:'${qtpath}/doc2PdfController/doc2pdf',
        data:{'fileName':fileName},
        success:function(data){  
           PDFData = data;  
        }  
    });  
    var rawLength = PDFData.length;  
    //轉換成pdf.js能直接解析的Uint8Array類型,見pdf.js-4068  
    var array = new Uint8Array(new ArrayBuffer(rawLength));    
    for(i = 0; i < rawLength; i++) {  
      array[i] = PDFData.charCodeAt(i) & 0xff;  
    }  
    DEFAULT_URL = array;
</script>
<script type="text/javascript" src="${qtpath}/res/pdf/build/pdf.js"></script>
<script type="text/javascript" src="${qtpath}/res/pdf/web/viewer.js"></script>

  </head>

  ...

效果

分割線

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

　　本以為完美的實現了doc線上預覽，上測試環境後發現了一個大坑，我們的doc文件不是在本地office創建後上傳的，是其他同事用freemarker ftl模板生成的，這種生成的doc文件根本不是微軟標準的doc，本質是xml數據結構，openoffice拿這種文件去轉換pdf文件直接就報錯了

　　上網查資料查了半天也沒找到這種問題的解決方案，想想只能是放棄openoffice改用其他方法了（freemarker ftl生成doc這個肯定是不能動的）

　　看到一些博客使用word--html--pdf生成pdf，還有的使用freemarker ftl xml 生成pdf感覺還是太繁瑣了，我只是想拿現有的doc（雖然是freemarker ftl生成的）轉換成pdf啊

　　繼續看博客查資料，看到一種方法，使用aspose把doc轉換成pdf，抱著試一試的心態在本地測試了下，沒想到竟然成了，感覺太意外了，aspose方法超級簡單，只要導入jar包，幾行代碼就可以搞定，並且轉換速度比openoffice要快很多。很是奇怪，這麼好用這麼簡單的工具為什麼沒在我一開始搜索word轉pdf的時候就出現呢

aspose doc轉pdf

　　在maven倉庫搜索aspose，然後把依賴加入pom.xml發現jar包下載不下來，沒辦法，最後在csdn下載aspose jar包，然後mvn deploy到倉庫

　　pom.xml

<!-- word轉pdf maven倉庫沒有需要本地jar包發佈到私服 -->
        <dependency>
            <groupId>com.aspose.words</groupId>
            <artifactId>aspose-words-jdk16</artifactId>
            <version>14.9.0</version>
        </dependency>

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.ConnectException;

import javax.servlet.http.HttpServletResponse;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;

import com.xxx.utils.Doc2PdfUtil;

@Controller
@RequestMapping("/doc2PdfController")
public class Doc2PdfController {
    
    private Logger logger = LoggerFactory.getLogger(Doc2PdfController.class);
    
    @RequestMapping("/doc2pdf")
    public void doc2pdf(String fileName,HttpServletResponse response){
        File pdfFile = null;
        OutputStream outputStream = null;
        BufferedInputStream bufferedInputStream = null;
        String docPath = fileName + ".doc";
        String pdfPath = fileName + ".pdf";
        try {
            pdfFile = Doc2PdfUtil.doc2Pdf(docPath, pdfPath);
            outputStream = response.getOutputStream();
            response.setContentType("application/pdf;charset=UTF-8");  
            bufferedInputStream = new BufferedInputStream(new FileInputStream(pdfFile));  
            byte buffBytes[] = new byte[1024];  
            outputStream = response.getOutputStream();  
            int read = 0;    
            while ((read = bufferedInputStream.read(buffBytes)) != -1) {    
                outputStream.write(buffBytes, 0, read);    
            }
        } catch (ConnectException e) {
            logger.info("****調用Doc2PdfUtil doc轉pdf失敗****");
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }  finally {
            if(outputStream != null){
                try {
                    outputStream.flush();
                    outputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }    
            }
            if(bufferedInputStream != null){
                try {
                    bufferedInputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

　　Doc2PdfUtil.java

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.aspose.words.License;
import com.aspose.words.SaveFormat;

public class Doc2PdfUtil {
    
    private static Logger logger = LoggerFactory.getLogger(Doc2PdfUtil.class);
    
    /**
     * doc轉pdf
     * @param docPath doc文件路徑，包含.doc
     * @param pdfPath pdf文件路徑，包含.pdf
     * @return
     */
    public static File doc2Pdf(String docPath, String pdfPath){
        File pdfFile = new File(pdfPath);
        try {
            String s = "<License><Data><Products><Product>Aspose.Total for Java</Product><Product>Aspose.Words for Java</Product></Products><EditionType>Enterprise</EditionType><SubscriptionExpiry>20991231</SubscriptionExpiry><LicenseExpiry>20991231</LicenseExpiry><SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber></Data><Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature></License>";
            ByteArrayInputStream is = new ByteArrayInputStream(s.getBytes());
            License license = new License();
            license.setLicense(is);
            com.aspose.words.Document document = new com.aspose.words.Document(docPath);
            document.save(new FileOutputStream(pdfFile),SaveFormat.PDF);
        } catch (Exception e) {
            logger.info("****aspose doc轉pdf異常");
            e.printStackTrace();
        }
        return pdfFile;
    }
}

　　aspose-words-jdk16-14.9.0.jar下載地址

　　https://download.csdn.net/download/u013279345/10868189

window下正常，linux下亂碼的解決方案

　　使用com.aspose.words將word模板轉為PDF文件時，在開發平臺window下轉換沒有問題，中文也不會出現亂碼。但是將服務部署在正式伺服器（Linux）上，轉換出來的PDF中文就出現了亂碼。在網上找了很久，才找到原因，現將解決辦法分享給大家。

一、問題原因分析

在window下沒有問題但是在linux下有問題，就說明不是代碼或者輸入輸出流編碼的問題，根本原因是兩個平臺環境的問題。出現亂碼說明linux環境中沒有相應的字體以供使用，所以就會導致亂碼的出現。將轉換無問題的windos主機中的字體拷貝到linux平臺下進行安裝，重啟伺服器後轉換就不會出現亂碼了。

二、window字體複製到linux環境並安裝

按照教程安裝完成後重啟linux伺服器即可搞定亂碼問題。

1. From Windows

Windows下字體庫的位置為C:\Windows\fonts，這裡麵包含所有windows下可用的字體。

2. To Linux　　

linux的字體庫是 /usr/share/Fonts 。

在該目錄下新建一個目錄，比如目錄名叫 windows（根據個人的喜好，自己理解就行，當然這裡是有許可權要求的，你可以用sudo來執行）。

然後將 windows 字體庫中你要的字體文件複製到新建的目錄下(只需要複製*.ttc，和*.ttf的文件).

複製所有字體：
   sudo cp *.ttc /usr/share/fonts/windows/
   sudo cp *.ttf /usr/share/fonts/windows/

更改這些字體庫的許可權：
    sudo chmod 755 /usr/share/fonts/windows/*

然後進入Linux字體庫：
cd /usr/share/fonts/windows/

接著根據當前目錄下的字體建立scale文件
    sudo mkfontscale

接著建立dir文件
   sudo mkfontdir

然後運行
   sudo fc-cache

重啟 Linux 操作系統就可以使用這些字體了。

linux下亂碼問題解決方案轉載自:

https://blog.csdn.net/hanchuang213/article/details/64905214

https://blog.csdn.net/shanelooli/article/details/7212812

java實現word轉pdf線上預覽（前端使用PDF.js；後端使用openoffice、aspose）

背景

環境搭建

代碼實現

後端

前端

效果

分割線

aspose doc轉pdf

window下正常，linux下亂碼的解決方案

一、問題原因分析

二、window字體複製到linux環境並安裝

1. From Windows

2. To Linux

2. To Linux