From ff9a4ba6cce98fc9f73ecddf342f2cb7246b644c Mon Sep 17 00:00:00 2001 From: Ray Gauss Date: Tue, 18 Dec 2012 19:13:57 +0000 Subject: [PATCH] ALF-16841: Converting files without a title to HTML generates invalid HTML - Added unit test which confirms the issue - Wrapped the html content handler in Tika's new ExpandedTitleContentHandler git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@44819 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../repo/content/transform/TikaPoweredContentTransformer.java | 2 ++ .../content/transform/TikaPoweredContentTransformerTest.java | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformer.java b/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformer.java index 34c2c9d25d..e1e499bcb3 100644 --- a/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformer.java +++ b/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformer.java @@ -41,6 +41,7 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; +import org.apache.tika.sax.ExpandedTitleContentHandler; import org.xml.sax.ContentHandler; /** @@ -135,6 +136,7 @@ public abstract class TikaPoweredContentTransformer extends AbstractContentTrans if(MimetypeMap.MIMETYPE_HTML.equals(targetMimeType)) { handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html"); + return new ExpandedTitleContentHandler(handler); } else if(MimetypeMap.MIMETYPE_XHTML.equals(targetMimeType) || MimetypeMap.MIMETYPE_XML.equals(targetMimeType)) diff --git a/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformerTest.java b/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformerTest.java index 9751807049..d03fcd421a 100644 --- a/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformerTest.java +++ b/source/java/org/alfresco/repo/content/transform/TikaPoweredContentTransformerTest.java @@ -80,6 +80,10 @@ public abstract class TikaPoweredContentTransformerTest extends AbstractContentT "HTML footer not found", contents.contains("") ); + assertTrue( + "Expanded HTML title not found", + contents.contains("") + ); } else if(targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN)) {