mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Make all Tika metadata properties available, as well as existing specific ones
Following discussions with Neil, make all the Tika supplied properties available after the extraction, in case users wish to map them in a standard way onto their content model. Per-extractor specific names are still retained too git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20649 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -117,6 +117,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
* Version which also tries the ISO-8601 formats (in order..),
|
||||
* and similar formats, which Tika makes use of
|
||||
*/
|
||||
@Override
|
||||
protected Date makeDate(String dateStr) {
|
||||
// Try our formats first, in order
|
||||
for(DateFormat df : this.tikaDateFormats) {
|
||||
@@ -168,11 +169,25 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
|
||||
parser.parse(is, handler, metadata, context);
|
||||
|
||||
// First up, copy all the Tika metadata over
|
||||
// This allows people to map any of the Tika
|
||||
// keys onto their own content model
|
||||
for(String tikaKey : metadata.names()) {
|
||||
putRawValue(tikaKey, metadata.get(tikaKey), rawProperties);
|
||||
}
|
||||
|
||||
// Now, map the common Tika metadata keys onto
|
||||
// the common Alfresco metadata keys. This allows
|
||||
// existing mapping properties files to continue
|
||||
// to work without needing any changes
|
||||
|
||||
// The simple ones
|
||||
putRawValue(KEY_AUTHOR, metadata.get(Metadata.AUTHOR), rawProperties);
|
||||
putRawValue(KEY_TITLE, metadata.get(Metadata.TITLE), rawProperties);
|
||||
putRawValue(KEY_COMMENTS, metadata.get(Metadata.COMMENTS), rawProperties);
|
||||
|
||||
// Not everything is as consisent about these two as you might hope
|
||||
// Get the subject and description, despite things not
|
||||
// being nearly as consistent as one might hope
|
||||
String subject = metadata.get(Metadata.SUBJECT);
|
||||
String description = metadata.get(Metadata.DESCRIPTION);
|
||||
if(subject != null && description != null) {
|
||||
@@ -193,6 +208,11 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties);
|
||||
}
|
||||
|
||||
// If people created a specific instance
|
||||
// (eg OfficeMetadataExtractor), then allow that
|
||||
// instance to map the Tika keys onto its
|
||||
// existing namespace so that older properties
|
||||
// files continue to map correctly
|
||||
rawProperties = extractSpecific(metadata, rawProperties);
|
||||
}
|
||||
finally
|
||||
|
Reference in New Issue
Block a user