ALF-16841: Converting files without a title to HTML generates invalid HTML

- Added unit test which confirms the issue
   - Wrapped the html content handler in Tika's new ExpandedTitleContentHandler


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@44819 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Ray Gauss 2012-12-18 19:13:57 +00:00
parent 80344e8cdf
commit ff9a4ba6cc
2 changed files with 6 additions and 0 deletions

View File

@ -41,6 +41,7 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser; import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler; import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.xml.sax.ContentHandler; import org.xml.sax.ContentHandler;
/** /**
@ -135,6 +136,7 @@ public abstract class TikaPoweredContentTransformer extends AbstractContentTrans
if(MimetypeMap.MIMETYPE_HTML.equals(targetMimeType)) if(MimetypeMap.MIMETYPE_HTML.equals(targetMimeType))
{ {
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html"); handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
return new ExpandedTitleContentHandler(handler);
} }
else if(MimetypeMap.MIMETYPE_XHTML.equals(targetMimeType) || else if(MimetypeMap.MIMETYPE_XHTML.equals(targetMimeType) ||
MimetypeMap.MIMETYPE_XML.equals(targetMimeType)) MimetypeMap.MIMETYPE_XML.equals(targetMimeType))

View File

@ -80,6 +80,10 @@ public abstract class TikaPoweredContentTransformerTest extends AbstractContentT
"HTML footer not found", "HTML footer not found",
contents.contains("</html>") contents.contains("</html>")
); );
assertTrue(
"Expanded HTML title not found",
contents.contains("</title>")
);
} }
else if(targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN)) else if(targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN))
{ {