mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Complete initial Tika-ification of the metadata extractor
The remaining extractors to be converted to Tika now have been, tests have been included for the image metadata extraction, and some extension points for future extractors have been created. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20669 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -66,7 +66,7 @@ public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties) {
|
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||||
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
|
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
|
||||||
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
|
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
|
||||||
return properties;
|
return properties;
|
||||||
|
@@ -81,7 +81,7 @@ public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties) {
|
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||||
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
|
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
|
||||||
putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties);
|
putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties);
|
||||||
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
|
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
|
||||||
|
@@ -18,20 +18,14 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content.metadata;
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
|
||||||
import org.apache.poi.hsmf.MAPIMessage;
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
//import org.apache.tika.parser.microsoft.OutlookExtractor; // TODO fix import
|
import org.apache.tika.parser.microsoft.OfficeParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Outlook MAPI format email meta-data extractor extracting the following values:
|
* Outlook MAPI format email meta-data extractor extracting the following values:
|
||||||
@@ -41,6 +35,9 @@ import org.apache.tika.parser.Parser;
|
|||||||
* <b>addressee:</b> -- cm:addressee
|
* <b>addressee:</b> -- cm:addressee
|
||||||
* <b>addressees:</b> -- cm:addressees
|
* <b>addressees:</b> -- cm:addressees
|
||||||
* <b>subjectLine:</b> -- cm:subjectline, cm:description
|
* <b>subjectLine:</b> -- cm:subjectline, cm:description
|
||||||
|
* <b>toNames:</b> --
|
||||||
|
* <b>ccNames:</b> --
|
||||||
|
* <b>bccNames:</b> --
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* TIKA note - to/cc/bcc go into the html part, not the metadata.
|
* TIKA note - to/cc/bcc go into the html part, not the metadata.
|
||||||
@@ -56,6 +53,9 @@ public class MailMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
private static final String KEY_ADDRESSEE = "addressee";
|
private static final String KEY_ADDRESSEE = "addressee";
|
||||||
private static final String KEY_ADDRESSEES = "addressees";
|
private static final String KEY_ADDRESSEES = "addressees";
|
||||||
private static final String KEY_SUBJECT = "subjectLine";
|
private static final String KEY_SUBJECT = "subjectLine";
|
||||||
|
private static final String KEY_TO_NAMES = "toNames";
|
||||||
|
private static final String KEY_CC_NAMES = "ccNames";
|
||||||
|
private static final String KEY_BCC_NAMES = "bccNames";
|
||||||
|
|
||||||
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
||||||
new String[] {MimetypeMap.MIMETYPE_OUTLOOK_MSG},
|
new String[] {MimetypeMap.MIMETYPE_OUTLOOK_MSG},
|
||||||
@@ -69,58 +69,29 @@ public class MailMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser() {
|
||||||
//return new OutlookExtractor(); // TODO fix import
|
// The office parser does Outlook as well as Word, Excel etc
|
||||||
return null;
|
return new OfficeParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties) {
|
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||||
// TODO move things from extractRaw to here
|
putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties);
|
||||||
|
putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties);
|
||||||
|
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties);
|
||||||
|
putRawValue(KEY_SENT_DATE, metadata.get(Metadata.LAST_SAVED), properties);
|
||||||
|
|
||||||
|
// Store the TO, but not cc/bcc in the addressee field
|
||||||
|
putRawValue(KEY_ADDRESSEE, metadata.get(Metadata.MESSAGE_TO), properties);
|
||||||
|
|
||||||
|
// Store each of To, CC and BCC in their own fields
|
||||||
|
putRawValue(KEY_TO_NAMES, metadata.get(Metadata.MESSAGE_TO), properties);
|
||||||
|
putRawValue(KEY_CC_NAMES, metadata.get(Metadata.MESSAGE_CC), properties);
|
||||||
|
putRawValue(KEY_BCC_NAMES, metadata.get(Metadata.MESSAGE_BCC), properties);
|
||||||
|
|
||||||
|
// But store all email addresses (to/cc/bcc) in the addresses field
|
||||||
|
putRawValue(KEY_ADDRESSEES, metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS), properties);
|
||||||
|
|
||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
|
||||||
{
|
|
||||||
// TODO remove this in favour of extractSpecific
|
|
||||||
final Map<String, Serializable> rawProperties = newRawMap();
|
|
||||||
|
|
||||||
InputStream is = null;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
is = reader.getContentInputStream();
|
|
||||||
MAPIMessage msg;
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
msg = new MAPIMessage(is);
|
|
||||||
msg.setReturnNullOnMissingChunk(true);
|
|
||||||
|
|
||||||
putRawValue(KEY_ORIGINATOR, msg.getDisplayFrom(), rawProperties);
|
|
||||||
putRawValue(KEY_SUBJECT, msg.getSubject(), rawProperties);
|
|
||||||
putRawValue(KEY_SENT_DATE, msg.getMessageDate().getTime(), rawProperties);
|
|
||||||
|
|
||||||
// Store the TO, but not cc/bcc in the addressee field
|
|
||||||
putRawValue(KEY_ADDRESSEE, msg.getDisplayTo(), rawProperties);
|
|
||||||
// But store all email addresses (to/cc/bcc) in the addresses field
|
|
||||||
putRawValue(KEY_ADDRESSEES, msg.getRecipientEmailAddressList(), rawProperties);
|
|
||||||
}
|
|
||||||
catch (IOException err)
|
|
||||||
{
|
|
||||||
// probably not an Outlook format MSG - ignore for now
|
|
||||||
if (logger.isWarnEnabled())
|
|
||||||
logger.warn("Unable to extract meta-data from message: " + err.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
if (is != null)
|
|
||||||
{
|
|
||||||
try { is.close(); } catch (IOException e) {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Done
|
|
||||||
return rawProperties;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@@ -113,19 +113,14 @@ public class MailMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_ADDRESSEE)));
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_ADDRESSEE)));
|
||||||
|
|
||||||
// Addressees
|
// Addressees
|
||||||
Collection<String> addressees = (Collection<String>)properties.get(ContentModel.PROP_ADDRESSEES);
|
|
||||||
assertTrue(
|
assertTrue(
|
||||||
"Property " + ContentModel.PROP_ADDRESSEES + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_ADDRESSEES + " not found for mimetype " + mimetype,
|
||||||
addressees != null
|
properties.get(ContentModel.PROP_ADDRESSEES) != null
|
||||||
);
|
);
|
||||||
assertEquals(
|
|
||||||
"Property " + ContentModel.PROP_ADDRESSEES + " wrong size for mimetype " + mimetype,
|
|
||||||
1,
|
|
||||||
addressees.size());
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_ADDRESSEES + " wrong content for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_ADDRESSEES + " wrong content for mimetype " + mimetype,
|
||||||
"kevin.roast@alfresco.org",
|
"kevin.roast@alfresco.org",
|
||||||
DefaultTypeConverter.INSTANCE.convert(String.class, addressees.iterator().next()));
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_ADDRESSEES)));
|
||||||
|
|
||||||
// Subject Line
|
// Subject Line
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@@ -49,7 +49,7 @@ import org.apache.tika.parser.microsoft.OfficeParser;
|
|||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* Uses Apache Tika
|
* Uses Apache Tika
|
||||||
|
*
|
||||||
* @author Derek Hulley
|
* @author Derek Hulley
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
@@ -92,7 +92,7 @@ public class OfficeMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties) {
|
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||||
putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
|
putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
|
||||||
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
|
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
|
||||||
putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
|
putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
|
||||||
|
@@ -18,25 +18,19 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content.metadata;
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.Arrays;
|
import java.util.ArrayList;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
|
||||||
import org.alfresco.service.namespace.QName;
|
import org.alfresco.service.namespace.QName;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.tika.parser.odf.OpenDocumentParser;
|
import org.apache.tika.parser.odf.OpenDocumentParser;
|
||||||
import org.apache.tika.sax.BodyContentHandler;
|
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -59,31 +53,30 @@ import org.xml.sax.ContentHandler;
|
|||||||
* <b>All user properties</b>
|
* <b>All user properties</b>
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* TIKA Note - this has been converted to deep-call into Tika.
|
* Uses Apache Tika
|
||||||
* This will be replaced with proper calls to Tika at a later date.
|
*
|
||||||
* Everything except some Print info has been ported to Tika.
|
* TODO decide if we need the few print info bits that
|
||||||
|
* Tika currently doesn't handle
|
||||||
*
|
*
|
||||||
* @author Antti Jokipii
|
* @author Antti Jokipii
|
||||||
* @author Derek Hulley
|
* @author Derek Hulley
|
||||||
*/
|
*/
|
||||||
public class OpenDocumentMetadataExtracter extends AbstractMappingMetadataExtracter
|
public class OpenDocumentMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||||
{
|
{
|
||||||
private static final String KEY_CREATION_DATE = "creationDate";
|
private static final String KEY_CREATION_DATE = "creationDate";
|
||||||
private static final String KEY_CREATOR = "creator";
|
private static final String KEY_CREATOR = "creator";
|
||||||
private static final String KEY_DATE = "date";
|
private static final String KEY_DATE = "date";
|
||||||
private static final String KEY_DESCRIPTION = "description";
|
|
||||||
private static final String KEY_GENERATOR = "generator";
|
private static final String KEY_GENERATOR = "generator";
|
||||||
private static final String KEY_INITIAL_CREATOR = "initialCreator";
|
private static final String KEY_INITIAL_CREATOR = "initialCreator";
|
||||||
private static final String KEY_KEYWORD = "keyword";
|
private static final String KEY_KEYWORD = "keyword";
|
||||||
private static final String KEY_LANGUAGE = "language";
|
private static final String KEY_LANGUAGE = "language";
|
||||||
private static final String KEY_PRINT_DATE = "printDate";
|
private static final String KEY_PRINT_DATE = "printDate";
|
||||||
private static final String KEY_PRINTED_BY = "printedBy";
|
private static final String KEY_PRINTED_BY = "printedBy";
|
||||||
private static final String KEY_SUBJECT = "subject";
|
|
||||||
private static final String KEY_TITLE = "title";
|
|
||||||
|
|
||||||
private static final String CUSTOM_PREFIX = "custom:";
|
private static final String CUSTOM_PREFIX = "custom:";
|
||||||
|
|
||||||
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
||||||
|
new String[] {
|
||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
|
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
|
||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE,
|
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE,
|
||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS,
|
MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS,
|
||||||
@@ -100,71 +93,55 @@ public class OpenDocumentMetadataExtracter extends AbstractMappingMetadataExtrac
|
|||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE,
|
MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE,
|
||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_MASTER,
|
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_MASTER,
|
||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_WEB,
|
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_WEB,
|
||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_DATABASE };
|
MimetypeMap.MIMETYPE_OPENDOCUMENT_DATABASE
|
||||||
|
}, new OpenDocumentParser()
|
||||||
|
);
|
||||||
|
|
||||||
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss");
|
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss");
|
||||||
|
|
||||||
public OpenDocumentMetadataExtracter()
|
public OpenDocumentMetadataExtracter()
|
||||||
{
|
{
|
||||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
super(SUPPORTED_MIMETYPES);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Parser getParser() {
|
||||||
|
return new OpenDocumentParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
{
|
Map<String, Serializable> properties, Map<String, String> headers) {
|
||||||
Map<String, Serializable> rawProperties = newRawMap();
|
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
|
||||||
|
putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
|
||||||
InputStream is = null;
|
putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
|
||||||
try
|
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties);
|
||||||
{
|
putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
|
||||||
is = reader.getContentInputStream();
|
putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
|
||||||
|
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
|
||||||
OpenDocumentParser docParser = new OpenDocumentParser();
|
putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties);
|
||||||
ContentHandler handler = new BodyContentHandler() ;
|
// putRawValue(KEY_PRINT_DATE, getDateOrNull(metadata.get(Metadata.)), rawProperties);
|
||||||
Metadata metadata = new Metadata();
|
// putRawValue(KEY_PRINTED_BY, metadata.get(Metadata.), rawProperties);
|
||||||
ParseContext context = new ParseContext();
|
|
||||||
|
// Handle user-defined properties dynamically
|
||||||
docParser.parse(is, handler, metadata, context);
|
Map<String, Set<QName>> mapping = super.getMapping();
|
||||||
|
for (String key : mapping.keySet())
|
||||||
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), rawProperties);
|
{
|
||||||
putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), rawProperties);
|
if (metadata.get(CUSTOM_PREFIX + key) != null)
|
||||||
putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), rawProperties);
|
{
|
||||||
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), rawProperties);
|
putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties);
|
||||||
putRawValue(KEY_GENERATOR, metadata.get("generator"), rawProperties);
|
}
|
||||||
putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), rawProperties);
|
}
|
||||||
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), rawProperties);
|
|
||||||
putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), rawProperties);
|
return properties;
|
||||||
// putRawValue(KEY_PRINT_DATE, getDateOrNull(metadata.get(Metadata.)), rawProperties);
|
|
||||||
// putRawValue(KEY_PRINTED_BY, metadata.get(Metadata.), rawProperties);
|
|
||||||
putRawValue(KEY_SUBJECT, metadata.get(Metadata.SUBJECT), rawProperties);
|
|
||||||
putRawValue(KEY_TITLE, metadata.get(Metadata.TITLE), rawProperties);
|
|
||||||
|
|
||||||
// Handle user-defined properties dynamically
|
|
||||||
Map<String, Set<QName>> mapping = super.getMapping();
|
|
||||||
for (String key : mapping.keySet())
|
|
||||||
{
|
|
||||||
if (metadata.get(CUSTOM_PREFIX + key) != null)
|
|
||||||
{
|
|
||||||
putRawValue(key, metadata.get(CUSTOM_PREFIX + key), rawProperties);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
if (is != null)
|
|
||||||
{
|
|
||||||
try { is.close(); } catch (IOException e) {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Done
|
|
||||||
return rawProperties;
|
|
||||||
}
|
}
|
||||||
|
private Date getDateOrNull(String dateString)
|
||||||
private Date getDateOrNull(String dateString) throws ParseException
|
|
||||||
{
|
{
|
||||||
if (dateString != null && dateString.length() != 0)
|
if (dateString != null && dateString.length() != 0)
|
||||||
{
|
{
|
||||||
return dateFormat.parse(dateString);
|
try {
|
||||||
|
return dateFormat.parse(dateString);
|
||||||
|
} catch(ParseException e) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
@@ -35,8 +35,8 @@ import org.alfresco.util.PropertyCheck;
|
|||||||
* <b>description:</b> -- cm:description
|
* <b>description:</b> -- cm:description
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* TIKA Note - this probably won't be ported to TIKA. There's currently
|
* Note - not converted to Apache Tika, as currently Tika
|
||||||
* no support for these old formats in tika.
|
* lacks support for these older formats
|
||||||
*
|
*
|
||||||
* @author Jesper Steen Møller
|
* @author Jesper Steen Møller
|
||||||
*/
|
*/
|
||||||
|
@@ -18,22 +18,13 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content.metadata;
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.util.ArrayList;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
import org.apache.tika.parser.pdf.PDFParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Metadata extractor for the PDF documents.
|
* Metadata extractor for the PDF documents.
|
||||||
@@ -42,115 +33,31 @@ import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
|||||||
* <b>title:</b> -- cm:title
|
* <b>title:</b> -- cm:title
|
||||||
* <b>subject:</b> -- cm:description
|
* <b>subject:</b> -- cm:description
|
||||||
* <b>created:</b> -- cm:created
|
* <b>created:</b> -- cm:created
|
||||||
* <b>Any custom property:</b> -- [not mapped]
|
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* TIKA Note - all the fields (plus a few others) are present
|
* Uses Apache Tika
|
||||||
* in the tika metadata.
|
*
|
||||||
|
* TODO - Update Tika to handle custom metadata
|
||||||
*
|
*
|
||||||
* @author Jesper Steen Møller
|
* @author Jesper Steen Møller
|
||||||
* @author Derek Hulley
|
* @author Derek Hulley
|
||||||
*/
|
*/
|
||||||
public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
|
public class PdfBoxMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||||
{
|
{
|
||||||
protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class);
|
protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class);
|
||||||
|
|
||||||
private static final String KEY_AUTHOR = "author";
|
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
||||||
private static final String KEY_TITLE = "title";
|
new String[] { MimetypeMap.MIMETYPE_PDF },
|
||||||
private static final String KEY_SUBJECT = "subject";
|
new PDFParser()
|
||||||
private static final String KEY_CREATED = "created";
|
);
|
||||||
|
|
||||||
public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_PDF };
|
|
||||||
|
|
||||||
public PdfBoxMetadataExtracter()
|
public PdfBoxMetadataExtracter()
|
||||||
{
|
{
|
||||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
super(SUPPORTED_MIMETYPES);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
protected Parser getParser() {
|
||||||
{
|
return new PDFParser();
|
||||||
Map<String, Serializable> rawProperties = newRawMap();
|
|
||||||
|
|
||||||
PDDocument pdf = null;
|
|
||||||
InputStream is = null;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
is = reader.getContentInputStream();
|
|
||||||
// stream the document in
|
|
||||||
pdf = PDDocument.load(is);
|
|
||||||
if (!pdf.isEncrypted())
|
|
||||||
{
|
|
||||||
// Scoop out the metadata
|
|
||||||
PDDocumentInformation docInfo = pdf.getDocumentInformation();
|
|
||||||
|
|
||||||
putRawValue(KEY_AUTHOR, docInfo.getAuthor(), rawProperties);
|
|
||||||
putRawValue(KEY_TITLE, docInfo.getTitle(), rawProperties);
|
|
||||||
putRawValue(KEY_SUBJECT, docInfo.getSubject(), rawProperties);
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
Calendar created = docInfo.getCreationDate();
|
|
||||||
if (created != null)
|
|
||||||
{
|
|
||||||
// Work around https://issues.apache.org/jira/browse/PDFBOX-598
|
|
||||||
created.set(Calendar.MILLISECOND, 0);
|
|
||||||
|
|
||||||
// Save
|
|
||||||
putRawValue(KEY_CREATED, created.getTime(), rawProperties);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (IOException iox)
|
|
||||||
{
|
|
||||||
// This sometimes fails because the date is a string: ETHREEOH-1936
|
|
||||||
// Alfresco bug ETHREEOH-801 refers to a bug in PDFBox (http://issues.apache.org/jira/browse/PDFBOX-145)
|
|
||||||
// where the above call to docInfo.getCreationDate() throws an IOException for some PDFs.
|
|
||||||
//
|
|
||||||
// The code below is a workaround for that issue.
|
|
||||||
|
|
||||||
// This creationDate has format: D:20080429+01'00'
|
|
||||||
String creationDate = docInfo.getCustomMetadataValue("CreationDate");
|
|
||||||
|
|
||||||
if (pdfLogger.isWarnEnabled())
|
|
||||||
{
|
|
||||||
pdfLogger.warn("IOException caught when extracting metadata from pdf file.");
|
|
||||||
pdfLogger.warn("This may be caused by a PDFBox bug that can often be worked around. The stack trace below is provided for information purposes only.");
|
|
||||||
pdfLogger.warn("", iox);
|
|
||||||
}
|
|
||||||
|
|
||||||
final SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
|
|
||||||
if (creationDate != null && creationDate.length() > 10) // 10 allows for "D:yyyyMMdd"
|
|
||||||
{
|
|
||||||
String dateWithoutLeadingDColon = creationDate.substring(2);
|
|
||||||
Date parsedDate = sdf.parse(dateWithoutLeadingDColon);
|
|
||||||
putRawValue(KEY_CREATED, parsedDate, rawProperties);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Extract remaining custom properties
|
|
||||||
for (String customProp : super.getMapping().keySet())
|
|
||||||
{
|
|
||||||
if (rawProperties.keySet().contains(customProp))
|
|
||||||
{
|
|
||||||
// Ignore it
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
String customValue = docInfo.getCustomMetadataValue(customProp);
|
|
||||||
putRawValue(customProp, customValue, rawProperties);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
if (is != null)
|
|
||||||
{
|
|
||||||
try { is.close(); } catch (IOException e) {}
|
|
||||||
}
|
|
||||||
if (pdf != null)
|
|
||||||
{
|
|
||||||
try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Done
|
|
||||||
return rawProperties;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -30,8 +30,9 @@ import java.util.Map;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.alfresco.model.ContentModel;
|
import org.alfresco.model.ContentModel;
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||||
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
||||||
import org.alfresco.service.namespace.QName;
|
import org.alfresco.service.namespace.QName;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
@@ -39,7 +40,6 @@ import org.apache.tika.mime.MediaType;
|
|||||||
import org.apache.tika.parser.AutoDetectParser;
|
import org.apache.tika.parser.AutoDetectParser;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.tika.parser.dwg.DWGParser;
|
|
||||||
import org.apache.tika.parser.microsoft.OfficeParser;
|
import org.apache.tika.parser.microsoft.OfficeParser;
|
||||||
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
|
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
|
||||||
import org.apache.tika.parser.mp3.Mp3Parser;
|
import org.apache.tika.parser.mp3.Mp3Parser;
|
||||||
@@ -181,5 +181,53 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
// mimetype,
|
// mimetype,
|
||||||
// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(TIKA_MIMETYPE_TEST_PROPERTY)));
|
// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(TIKA_MIMETYPE_TEST_PROPERTY)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We don't have explicit extractors for most image and video formats.
|
||||||
|
* Instead, these will be handled by the Auto Tika Parser, and
|
||||||
|
* this test ensures that they are
|
||||||
|
*/
|
||||||
|
public void testImageVideo() throws Throwable {
|
||||||
|
Map<String, Serializable> p;
|
||||||
|
|
||||||
|
// Image
|
||||||
|
p = openAndCheck(".jpg", "image/jpeg");
|
||||||
|
assertEquals("409 pixels", p.get("Image Width"));
|
||||||
|
assertEquals("92 pixels", p.get("Image Height"));
|
||||||
|
assertEquals("8 bits", p.get("Data Precision"));
|
||||||
|
|
||||||
|
p = openAndCheck(".gif", "image/gif");
|
||||||
|
assertEquals("409", p.get("width"));
|
||||||
|
assertEquals("92", p.get("height"));
|
||||||
|
|
||||||
|
p = openAndCheck(".png", "image/png");
|
||||||
|
assertEquals("409", p.get("width"));
|
||||||
|
assertEquals("92", p.get("height"));
|
||||||
|
assertEquals("8 8 8", p.get("Data BitsPerSample"));
|
||||||
|
assertEquals("none", p.get("Transparency Alpha"));
|
||||||
|
|
||||||
|
p = openAndCheck(".bmp", "image/bmp");
|
||||||
|
assertEquals("409", p.get("width"));
|
||||||
|
assertEquals("92", p.get("height"));
|
||||||
|
assertEquals("8 8 8", p.get("Data BitsPerSample"));
|
||||||
|
}
|
||||||
|
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
|
||||||
|
String filename = "quick" + fileBase;
|
||||||
|
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
|
||||||
|
File file = new File(url.getFile());
|
||||||
|
|
||||||
|
// Cheat and ask Tika for the mime type!
|
||||||
|
AutoDetectParser ap = new AutoDetectParser();
|
||||||
|
Metadata metadata = new Metadata();
|
||||||
|
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
|
||||||
|
MediaType mt = ap.getDetector().detect(
|
||||||
|
new BufferedInputStream(new FileInputStream(file)), metadata);
|
||||||
|
String mimetype = mt.toString();
|
||||||
|
|
||||||
|
assertEquals("Wrong mimetype for " + fileBase, mimetype, expMimeType);
|
||||||
|
|
||||||
|
ContentReader sourceReader = new FileContentReader(file);
|
||||||
|
sourceReader.setMimetype(mimetype);
|
||||||
|
return extracter.extractRaw(sourceReader);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -26,6 +26,7 @@ import java.text.ParseException;
|
|||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -35,11 +36,17 @@ import org.apache.commons.logging.Log;
|
|||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.parser.AutoDetectParser;
|
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.tika.sax.BodyContentHandler;
|
import org.apache.tika.sax.ContentHandlerDecorator;
|
||||||
|
import org.apache.tika.sax.XHTMLContentHandler;
|
||||||
|
import org.apache.tika.sax.xpath.Matcher;
|
||||||
|
import org.apache.tika.sax.xpath.MatchingContentHandler;
|
||||||
|
import org.apache.tika.sax.xpath.XPathParser;
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.ContentHandler;
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.Locator;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The parent of all Metadata Extractors which use
|
* The parent of all Metadata Extractors which use
|
||||||
@@ -145,11 +152,20 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
*/
|
*/
|
||||||
protected abstract Parser getParser();
|
protected abstract Parser getParser();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do we care about the contents of the
|
||||||
|
* extracted header, or nothing at all?
|
||||||
|
*/
|
||||||
|
protected boolean needHeaderContents() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows implementation specific mappings
|
* Allows implementation specific mappings
|
||||||
* to be done.
|
* to be done.
|
||||||
*/
|
*/
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata, Map<String, Serializable> properties) {
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
|
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,9 +179,19 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
{
|
{
|
||||||
is = reader.getContentInputStream();
|
is = reader.getContentInputStream();
|
||||||
Parser parser = getParser();
|
Parser parser = getParser();
|
||||||
ContentHandler handler = new BodyContentHandler() ;
|
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
ParseContext context = new ParseContext();
|
ParseContext context = new ParseContext();
|
||||||
|
|
||||||
|
ContentHandler handler;
|
||||||
|
Map<String,String> headers = null;
|
||||||
|
if(needHeaderContents()) {
|
||||||
|
MapCaptureContentHandler headerCapture =
|
||||||
|
new MapCaptureContentHandler();
|
||||||
|
headers = headerCapture.tags;
|
||||||
|
handler = new HeadContentHandler(headerCapture);
|
||||||
|
} else {
|
||||||
|
handler = new NullContentHandler();
|
||||||
|
}
|
||||||
|
|
||||||
parser.parse(is, handler, metadata, context);
|
parser.parse(is, handler, metadata, context);
|
||||||
|
|
||||||
@@ -213,7 +239,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
// instance to map the Tika keys onto its
|
// instance to map the Tika keys onto its
|
||||||
// existing namespace so that older properties
|
// existing namespace so that older properties
|
||||||
// files continue to map correctly
|
// files continue to map correctly
|
||||||
rawProperties = extractSpecific(metadata, rawProperties);
|
rawProperties = extractSpecific(metadata, rawProperties, headers);
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
@@ -225,4 +251,123 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
|
|
||||||
return rawProperties;
|
return rawProperties;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This content handler will capture entries from within
|
||||||
|
* the header of the Tika content XHTML, but ignore the
|
||||||
|
* rest.
|
||||||
|
*/
|
||||||
|
protected static class HeadContentHandler extends ContentHandlerDecorator {
|
||||||
|
/**
|
||||||
|
* XHTML XPath parser.
|
||||||
|
*/
|
||||||
|
private static final XPathParser PARSER =
|
||||||
|
new XPathParser("xhtml", XHTMLContentHandler.XHTML);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The XPath matcher used to select the XHTML body contents.
|
||||||
|
*/
|
||||||
|
private static final Matcher MATCHER =
|
||||||
|
PARSER.parse("/xhtml:html/xhtml:head/descendant:node()");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a content handler that passes all XHTML body events to the
|
||||||
|
* given underlying content handler.
|
||||||
|
*
|
||||||
|
* @param handler content handler
|
||||||
|
*/
|
||||||
|
protected HeadContentHandler(ContentHandler handler) {
|
||||||
|
super(new MatchingContentHandler(handler, MATCHER));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* This content handler will grab all tags and attributes,
|
||||||
|
* and record the textual content of the last seen one
|
||||||
|
* of them.
|
||||||
|
* Normally only used with {@link HeadContentHandler}
|
||||||
|
*/
|
||||||
|
protected static class MapCaptureContentHandler implements ContentHandler {
|
||||||
|
protected Map<String,String> tags =
|
||||||
|
new HashMap<String, String>();
|
||||||
|
private StringBuffer text;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void characters(char[] ch, int start, int len) {
|
||||||
|
if(text != null) {
|
||||||
|
text.append(ch, start, len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void endElement(String namespace, String localname,
|
||||||
|
String qname) {
|
||||||
|
if(text != null && text.length() > 0) {
|
||||||
|
tags.put(qname, text.toString());
|
||||||
|
}
|
||||||
|
text = null;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void startElement(String namespace, String localname,
|
||||||
|
String qname, Attributes attrs) {
|
||||||
|
for(int i=0; i<attrs.getLength(); i++) {
|
||||||
|
tags.put(attrs.getQName(i), attrs.getValue(i));
|
||||||
|
}
|
||||||
|
text = new StringBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endDocument() throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void endPrefixMapping(String paramString) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void ignorableWhitespace(char[] paramArrayOfChar, int paramInt1,
|
||||||
|
int paramInt2) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void processingInstruction(String paramString1, String paramString2)
|
||||||
|
throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void setDocumentLocator(Locator paramLocator) {}
|
||||||
|
@Override
|
||||||
|
public void skippedEntity(String paramString) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void startDocument() throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void startPrefixMapping(String paramString1, String paramString2)
|
||||||
|
throws SAXException {}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* A content handler that ignores all the content it finds.
|
||||||
|
* Normally used when we only want the metadata, and don't
|
||||||
|
* care about the file contents.
|
||||||
|
*/
|
||||||
|
protected static class NullContentHandler implements ContentHandler {
|
||||||
|
@Override
|
||||||
|
public void characters(char[] paramArrayOfChar, int paramInt1,
|
||||||
|
int paramInt2) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void endDocument() throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void endElement(String paramString1, String paramString2,
|
||||||
|
String paramString3) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void endPrefixMapping(String paramString) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void ignorableWhitespace(char[] paramArrayOfChar, int paramInt1,
|
||||||
|
int paramInt2) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void processingInstruction(String paramString1, String paramString2)
|
||||||
|
throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void setDocumentLocator(Locator paramLocator) {}
|
||||||
|
@Override
|
||||||
|
public void skippedEntity(String paramString) throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void startDocument() throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void startElement(String paramString1, String paramString2,
|
||||||
|
String paramString3, Attributes paramAttributes)
|
||||||
|
throws SAXException {}
|
||||||
|
@Override
|
||||||
|
public void startPrefixMapping(String paramString1, String paramString2)
|
||||||
|
throws SAXException {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user