Tika content transformer support for OOXML office

Enable explicit Tika content transform for OOXML files
Allow the Excel transformer (which does CSV as well as text/html) to handle .xlsx as well as .xls
Also update the .doc parser test to ensure that the older word 6 and word 95 files are correctly handled too


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20781 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2010-06-23 15:51:03 +00:00
parent 228d111c56
commit 325f8e7923
10 changed files with 393 additions and 121 deletions

View File

@@ -39,6 +39,7 @@ import org.alfresco.repo.content.transform.OpenOfficeContentTransformerTest;
import org.alfresco.repo.content.transform.PdfBoxContentTransformerTest;
import org.alfresco.repo.content.transform.PoiContentTransformerTest;
import org.alfresco.repo.content.transform.PoiHssfContentTransformerTest;
import org.alfresco.repo.content.transform.PoiOOXMLContentTransformerTest;
import org.alfresco.repo.content.transform.RuntimeExecutableContentTransformerTest;
import org.alfresco.repo.content.transform.StringExtractingContentTransformerTest;
import org.alfresco.repo.content.transform.TextMiningContentTransformerTest;
@@ -107,6 +108,7 @@ public class ContentMinimalContextTestSuite extends TestSuite
suite.addTestSuite(PdfBoxContentTransformerTest.class);
suite.addTestSuite(PoiContentTransformerTest.class);
suite.addTestSuite(PoiHssfContentTransformerTest.class);
suite.addTestSuite(PoiOOXMLContentTransformerTest.class);
suite.addTestSuite(RuntimeExecutableContentTransformerTest.class);
suite.addTestSuite(StringExtractingContentTransformerTest.class);
suite.addTestSuite(TextMiningContentTransformerTest.class);