When listing the transformers for a given mimetype via the /mimetypes webscript, detect and report if a proxy transformer uses openoffice

Also tweak the Tika .xls transformer for .html to be a higher priority


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@23078 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2010-10-13 14:05:05 +00:00
parent 2d7ac6bf91
commit 5aa58d6b6e
2 changed files with 21 additions and 1 deletions

View File

@@ -85,6 +85,25 @@ public class PoiHssfContentTransformer extends TikaPoweredContentTransformer
return super.isTransformable(sourceMimetype, targetMimetype, options); return super.isTransformable(sourceMimetype, targetMimetype, options);
} }
/**
* Make sure we win over openoffice when it comes to producing
* HTML
*/
@Override
public boolean isExplicitTransformation(String sourceMimetype, String targetMimetype, TransformationOptions options)
{
if(sourceMimeTypes.contains(sourceMimetype) &&
(MimetypeMap.MIMETYPE_HTML.equals(targetMimetype) ||
MimetypeMap.MIMETYPE_XHTML.equals(targetMimetype)) )
{
// Special case to win for HTML
return true;
}
// Otherwise fall back on the default Tika rules
return super.isTransformable(sourceMimetype, targetMimetype, options);
}
@Override @Override
protected ContentHandler getContentHandler(String targetMimeType, Writer output) protected ContentHandler getContentHandler(String targetMimeType, Writer output)
throws TransformerConfigurationException throws TransformerConfigurationException

View File

@@ -39,7 +39,8 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* do this, pending TIKA-408. When Apache POI 3.7 beta 2 has been * do this, pending TIKA-408. When Apache POI 3.7 beta 2 has been
* released, we can switch to Tika and then handle Word 6, * released, we can switch to Tika and then handle Word 6,
* Word 95, Word 97, 2000, 2003, 2007 and 2010 formats. * Word 95, Word 97, 2000, 2003, 2007 and 2010 formats.
* TODO Switch to Tika in August 2010 *
* TODO Switch to Tika in November 2010 once 3.4 is out
* *
* @author Nick Burch * @author Nick Burch
*/ */