diff --git a/source/java/org/alfresco/repo/content/transform/PoiHssfContentTransformer.java b/source/java/org/alfresco/repo/content/transform/PoiHssfContentTransformer.java index 077906ea5e..02f6405aad 100644 --- a/source/java/org/alfresco/repo/content/transform/PoiHssfContentTransformer.java +++ b/source/java/org/alfresco/repo/content/transform/PoiHssfContentTransformer.java @@ -85,6 +85,25 @@ public class PoiHssfContentTransformer extends TikaPoweredContentTransformer return super.isTransformable(sourceMimetype, targetMimetype, options); } + /** + * Make sure we win over openoffice when it comes to producing + * HTML + */ + @Override + public boolean isExplicitTransformation(String sourceMimetype, String targetMimetype, TransformationOptions options) + { + if(sourceMimeTypes.contains(sourceMimetype) && + (MimetypeMap.MIMETYPE_HTML.equals(targetMimetype) || + MimetypeMap.MIMETYPE_XHTML.equals(targetMimetype)) ) + { + // Special case to win for HTML + return true; + } + + // Otherwise fall back on the default Tika rules + return super.isTransformable(sourceMimetype, targetMimetype, options); + } + @Override protected ContentHandler getContentHandler(String targetMimeType, Writer output) throws TransformerConfigurationException diff --git a/source/java/org/alfresco/repo/content/transform/TextMiningContentTransformer.java b/source/java/org/alfresco/repo/content/transform/TextMiningContentTransformer.java index 90c138ed6b..5a415cf587 100644 --- a/source/java/org/alfresco/repo/content/transform/TextMiningContentTransformer.java +++ b/source/java/org/alfresco/repo/content/transform/TextMiningContentTransformer.java @@ -39,7 +39,8 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * do this, pending TIKA-408. When Apache POI 3.7 beta 2 has been * released, we can switch to Tika and then handle Word 6, * Word 95, Word 97, 2000, 2003, 2007 and 2010 formats. - * TODO Switch to Tika in August 2010 + * + * TODO Switch to Tika in November 2010 once 3.4 is out * * @author Nick Burch */