Word 转成HTML

0
(0)

来自CSDN的代码:

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.junit.Assert;
import org.junit.Test;

public class word07toHtml {

	@Test
	public void canExtractImage() throws IOException {
		File f = new File("d:/test/test.docx");
		if (!f.exists()) {
			System.out.println("Sorry File does not Exists!");
		} else {
			if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
				
				// 1) Load DOCX into XWPFDocument
				InputStream in = new FileInputStream(f);
				XWPFDocument document = new XWPFDocument(in);

				// 2) Prepare XHTML options (here we set the IURIResolver to
				// load images from a "word/media" folder)
				File imageFolderFile = new File("d:/test/media");
				XHTMLOptions options = XHTMLOptions.create().URIResolver(
						new FileURIResolver(imageFolderFile));
				options.setExtractor(new FileImageExtractor(imageFolderFile));

				// 3) Convert XWPFDocument to XHTML
				OutputStream out = new FileOutputStream(new File(
						"d:/test/test.htm"));
				XHTMLConverter.getInstance().convert(document, out, options);
			} else {
				System.out.println("Enter only MS Office 2007+ files");
			}
		}
	}
}

 

这篇文章有用吗?

平均评分 0 / 5. 投票数: 0

到目前为止还没有投票!成为第一位评论此文章。

很抱歉,这篇文章对您没有用!

让我们改善这篇文章!

告诉我们我们如何改善这篇文章?

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据