|
word转换为html的代码如下,同时需要在C:\WINDOWS\system32 的目录下添加一个.DLL文件,DLL文件见附件。JAVA代码如下
import java.io.File;import java.util.Calendar;import java.util.Date;import com.hifly.common.db.Db;import com.jacob.activeX.ActiveXComponent;import com.jacob.com.ComThread;import com.jacob.com.Dispatch;import com.jacob.com.Variant;/** * Word转换成Html * jacob jar与dll文件下载: http://www.matrix.org.cn/down_view.asp?id=13 * 下载了jacob并放到指定的路径之后(dll放到path,jar文件放到classpath),就可以写你自己的抽取程序了,下面是一个简单的例子: * * */public class WordToHtml {/* * 转换单个word文件 * origPath为原地址,包括扩展名doc * destPath为转换后的文件地址,包括扩展名htm */ public static void word_To_Htm(String origPath , String destPath) throws Exception{ File ofile = new File(origPath); File dfile = new File(destPath); if(!destPath.endsWith(".htm")){ dfile = new File(destPath+".htm"); } if(ofile.exists() && !dfile.exists()){ ComThread.InitSTA(); ActiveXComponent app = new ActiveXComponent( "Word.Application"); try { app.setProperty("Visible", new Variant(false)); Dispatch docs = app.getProperty("Documents").toDispatch(); Dispatch doc = Dispatch.invoke( docs, "Open", Dispatch.Method, new Object[] {origPath, new Variant(false), new Variant(true) }, new int[1]) .toDispatch(); Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] { destPath, new Variant(8) }, new int[1]); Variant f = new Variant(false); Dispatch.call(doc, "Close", f); } catch (Exception e) { throw e; } finally { app.invoke("Quit", new Variant[] {}); ComThread.Release(); } } } /** * 把word文件转换成mht文件.转换完成后并自动关闭WORD文件 */public static void word_To_Mht(String wordFileName, String htmlFile) {ComThread.InitSTA();// 初始化com的线程,非常重要!!使用结束后要调用 realease方法// Instantiate objWord //Declare word objectActiveXComponent objWord = new ActiveXComponent("Word.Application");// Assign a local word objectDispatch wordObject = (Dispatch) objWord.getObject();// Create a Dispatch Parameter to show the document that is openedDispatch.put((Dispatch) wordObject, "Visible", new Variant(true));// new// Variant(true)表示word应用程序可见// Instantiate the Documents PropertyDispatch documents = objWord.getProperty("Documents").toDispatch(); // documents表示word的所有文档窗口,(word是多文档应用程序)// Add a new word document, Current Active DocumentDispatch document = Dispatch.call(documents, "Open", wordFileName).toDispatch(); // 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档Dispatch.invoke(document, "SaveAs", Dispatch.Method, new Object[] {htmlFile, new Variant(8) }, new int[1]);Dispatch.call(document, "Close");Dispatch.call(wordObject, "quit");ComThread.Release();// 释放com线程。根据jacob的帮助文档,com的线程回收不由java的垃圾回收器处理}/** * 文档转换函数 * * @param docfile * word文档的绝对路径加文件名(包含扩展名) * @param htmlfile * 转换后的html文件绝对路径和文件名(不含扩展名) */public static void change(String docfile, String htmlfile) {ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动wordtry {app.setProperty("Visible", new Variant(false));// 设置word不可见Object docs = app.getProperty("Documents").toDispatch();// Assign a local word objectDispatch wordObject = (Dispatch) app.getObject();// Create a Dispatch Parameter to show the document that is opened// Dispatch.put((Dispatch) wordObject, "Visible", new// Variant(true));// new Variant(true)表示word应用程序可见// Tip:设置一个对象的属性的时候,利用Dispatch的put方法,给属性赋值。上面这行语句相当于vb的// wordObject.Visible = true 语句//// //Instantiate the Documents Property// Dispatch documents =// objWord.getProperty("Documents").toDispatch();// //documents表示word的所有文档窗口,(word是多文档应用程序)// Add a new word document, Current Active Document// Dispatch document = Dispatch.call(app, "Add").toDispatch(); //// 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档Object doc = Dispatch.invoke(app,"Open",Dispatch.Method,new Object[] { docfile, new Variant(false),new Variant(true) }, new int[1]).toDispatch();// 打开word文件Dispatch.invoke(app, "SaveAs", Dispatch.Method, new Object[] {htmlfile, new Variant(8) }, new int[1]);// 作为html格式保存到临时文件Variant f = new Variant(false);Dispatch.call(app, "Close", f);} catch (Exception e) {e.printStackTrace();} finally {app.invoke("Quit", new Variant[] {});}}public static void wordToHtmlExpm(String wordFileName, String htmlFile) {ComThread.InitSTA();// 初始化com的线程,非常重要!!使用结束后要调用 realease方法// Instantiate objWord //Declare word objectActiveXComponent objWord = new ActiveXComponent("Word.Application");// Assign a local word objectDispatch wordObject = (Dispatch) objWord.getObject();// Create a Dispatch Parameter to show the document that is openedDispatch.put((Dispatch) wordObject, "Visible", new Variant(true));// new// Variant(true)表示word应用程序可见// Instantiate the Documents PropertyDispatch documents = objWord.getProperty("Documents").toDispatch(); // documents表示word的所有文档窗口,(word是多文档应用程序)// Add a new word document, Current Active DocumentDispatch document = Dispatch.call(documents, "Open", wordFileName).toDispatch(); // 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档// Dispatch document = Dispatch.call(documents, "Add").toDispatch(); //// 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档// Dispatch wordContent = Dispatch.get(document,// "Content").toDispatch(); // 取得word文件的内容// Dispatch.call(wordContent, "InsertAfter", "这里是一个段落的内容");//插入一个段落// Dispatch paragraphs = Dispatch.get(wordContent,// "Paragraphs").toDispatch(); // 所有段落// int paragraphCount = Dispatch.get(paragraphs, "Count").toInt(); //// 一共的段落数// 找到刚输入的段落,设置格式/* * Dispatch lastParagraph = Dispatch.call(paragraphs, "Item", new * Variant(paragraphCount)). toDispatch(); // 最后一段 Dispatch * lastParagraphRange = Dispatch.get(lastParagraph, "Range"). * toDispatch(); Dispatch font = Dispatch.get(lastParagraphRange, * "Font").toDispatch(); Dispatch.put(font, "Bold", new Variant(true)); // * 设置为黑体 Dispatch.put(font, "Italic", new Variant(true)); // 设置为斜体 * Dispatch.put(font, "Name", new Variant("宋体")); // Dispatch.put(font, * "Size", new Variant(12)); //小四 */// Dispatch.call(document, "SaveAs", new Variant("C:\\abc.doc"));// //保存一个新文档// Dispatch.get(document, "SaveAs")// Dispatch.call(document, "SaveAs", new Variant(htmlFile)); // 保存一个新文档Dispatch.invoke(document, "SaveAs", Dispatch.Method, new Object[] {htmlFile, new Variant(9) }, new int[1]);Dispatch.call(document, "Close");Dispatch.call(wordObject, "quit");ComThread.Release();// 释放com线程。根据jacob的帮助文档,com的线程回收不由java的垃圾回收器处理}public static void deal(File file,Date lastTime){File[] files = file.listFiles();for(File f : files){if(f.isDirectory()){deal(f,lastTime);}else {//if (f.getName().toLowerCase().endsWith(".doc") || f.getName().toLowerCase().endsWith(".docx")){Long time = f.lastModified();Calendar cd = Calendar.getInstance(); cd.setTimeInMillis(time); //System.out.println(DateUtils.format(cd.getTime(),"yyyy-MM-dd HH:mm:ss SSS")); //if(cd.getTime().before(lastTime)){ String name = f.getName().replace("“", "").replace("”", "");String docfile = f.getPath();String htmlfile = f.getParent()+"\\"+name.substring(0, name.indexOf("."))+".htm";word_To_Mht(docfile,htmlfile);//String fileName = name.substring(0,name.indexOf("."))+".htm";//isnertReport(f.getName(),"doc");//isnertReport(fileName,"html"); //}}try {Thread.sleep(1000*2);} catch (InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}public static void start(){String path = "e:\\file\\ab\\";Date lastTime = new Date();File file = new File(path);deal(file,lastTime);}public static void main(String[] strs) {start();String wordFile = "D:\\temp\\1234.doc";String htmlFile = "d:\\temp\\alarm\\" + System.currentTimeMillis()+ ".mht";WordToHtml.word_To_Mht(wordFile, htmlFile);}} |
|