Fixed AR-327: Error during extraction of metadata from xhtml documents

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2097 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2006-01-10 16:53:40 +00:00
parent 5311248be8
commit 2f45f69dcd
2 changed files with 11 additions and 6 deletions

View File

@@ -44,6 +44,7 @@ public class MimetypeMap implements MimetypeService
public static final String MIMETYPE_TEXT_CSS = "text/css"; public static final String MIMETYPE_TEXT_CSS = "text/css";
public static final String MIMETYPE_XML = "text/xml"; public static final String MIMETYPE_XML = "text/xml";
public static final String MIMETYPE_HTML = "text/html"; public static final String MIMETYPE_HTML = "text/html";
public static final String MIMETYPE_XHTML = "application/xhtml+xml";
public static final String MIMETYPE_PDF = "application/pdf"; public static final String MIMETYPE_PDF = "application/pdf";
public static final String MIMETYPE_WORD = "application/msword"; public static final String MIMETYPE_WORD = "application/msword";
public static final String MIMETYPE_EXCEL = "application/vnd.excel"; public static final String MIMETYPE_EXCEL = "application/vnd.excel";

View File

@@ -22,7 +22,9 @@ import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Map; import java.util.Map;
import java.util.Set;
import javax.swing.text.ChangedCharSetException; import javax.swing.text.ChangedCharSetException;
import javax.swing.text.MutableAttributeSet; import javax.swing.text.MutableAttributeSet;
@@ -35,8 +37,6 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/** /**
* *
@@ -44,12 +44,16 @@ import org.apache.commons.logging.LogFactory;
*/ */
public class HtmlMetadataExtracter extends AbstractMetadataExtracter public class HtmlMetadataExtracter extends AbstractMetadataExtracter
{ {
private static final Set<String> MIMETYPES = new HashSet<String>(5);
private static final Log logger = LogFactory.getLog(HtmlMetadataExtracter.class); static
{
MIMETYPES.add(MimetypeMap.MIMETYPE_HTML);
MIMETYPES.add(MimetypeMap.MIMETYPE_XHTML);
}
public HtmlMetadataExtracter() public HtmlMetadataExtracter()
{ {
super(MimetypeMap.MIMETYPE_HTML, 1.0, 1000); super(MIMETYPES, 1.0, 1000);
} }
public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
@@ -95,7 +99,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
{ {
inHead = false; inHead = false;
} }
else if (HTML.Tag.TITLE.equals(t)) else if (HTML.Tag.TITLE.equals(t) && title != null)
{ {
trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination); trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
title = null; title = null;