mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Fixed AR-327: Error during extraction of metadata from xhtml documents
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2097 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -44,6 +44,7 @@ public class MimetypeMap implements MimetypeService
|
|||||||
public static final String MIMETYPE_TEXT_CSS = "text/css";
|
public static final String MIMETYPE_TEXT_CSS = "text/css";
|
||||||
public static final String MIMETYPE_XML = "text/xml";
|
public static final String MIMETYPE_XML = "text/xml";
|
||||||
public static final String MIMETYPE_HTML = "text/html";
|
public static final String MIMETYPE_HTML = "text/html";
|
||||||
|
public static final String MIMETYPE_XHTML = "application/xhtml+xml";
|
||||||
public static final String MIMETYPE_PDF = "application/pdf";
|
public static final String MIMETYPE_PDF = "application/pdf";
|
||||||
public static final String MIMETYPE_WORD = "application/msword";
|
public static final String MIMETYPE_WORD = "application/msword";
|
||||||
public static final String MIMETYPE_EXCEL = "application/vnd.excel";
|
public static final String MIMETYPE_EXCEL = "application/vnd.excel";
|
||||||
|
@@ -22,7 +22,9 @@ import java.io.InputStreamReader;
|
|||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import javax.swing.text.ChangedCharSetException;
|
import javax.swing.text.ChangedCharSetException;
|
||||||
import javax.swing.text.MutableAttributeSet;
|
import javax.swing.text.MutableAttributeSet;
|
||||||
@@ -35,8 +37,6 @@ import org.alfresco.repo.content.MimetypeMap;
|
|||||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
import org.alfresco.service.namespace.QName;
|
import org.alfresco.service.namespace.QName;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
@@ -44,12 +44,16 @@ import org.apache.commons.logging.LogFactory;
|
|||||||
*/
|
*/
|
||||||
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||||
{
|
{
|
||||||
|
private static final Set<String> MIMETYPES = new HashSet<String>(5);
|
||||||
private static final Log logger = LogFactory.getLog(HtmlMetadataExtracter.class);
|
static
|
||||||
|
{
|
||||||
|
MIMETYPES.add(MimetypeMap.MIMETYPE_HTML);
|
||||||
|
MIMETYPES.add(MimetypeMap.MIMETYPE_XHTML);
|
||||||
|
}
|
||||||
|
|
||||||
public HtmlMetadataExtracter()
|
public HtmlMetadataExtracter()
|
||||||
{
|
{
|
||||||
super(MimetypeMap.MIMETYPE_HTML, 1.0, 1000);
|
super(MIMETYPES, 1.0, 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
|
public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
|
||||||
@@ -95,7 +99,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
{
|
{
|
||||||
inHead = false;
|
inHead = false;
|
||||||
}
|
}
|
||||||
else if (HTML.Tag.TITLE.equals(t))
|
else if (HTML.Tag.TITLE.equals(t) && title != null)
|
||||||
{
|
{
|
||||||
trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
|
trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
|
||||||
title = null;
|
title = null;
|
||||||
|
Reference in New Issue
Block a user