Merged HEAD-BUG-FIX (5.1/Cloud) to HEAD (5.1/Cloud)

100990: Merged 5.0.N (5.0.2) to HEAD-BUG-FIX (5.1/Cloud)
      100834: Merged V4.2-BUG-FIX (4.2.5) to 5.0.N (5.0.2)
         100784: Merged DEV to V4.2-BUG-FIX (4.2.5)
            100732: MNT-13655 : Just first keyword of the IPTC keywords list is extracted as metadata and put into description field of an image
               - Added special way for handling multi-valued meta-data properties retrieved from parser.


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@101005 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Alan Davis
2015-04-01 01:27:45 +00:00
parent fd90fbf2a7
commit f70dc05311
2 changed files with 34 additions and 7 deletions

View File

@@ -27,8 +27,10 @@ import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.repo.content.MimetypeMap;
@@ -386,7 +388,7 @@ public abstract class TikaPoweredMetadataExtracter
// keys onto their own content model
for(String tikaKey : metadata.names())
{
putRawValue(tikaKey, metadata.get(tikaKey), rawProperties);
putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties);
}
// Now, map the common Tika metadata keys onto
@@ -395,14 +397,14 @@ public abstract class TikaPoweredMetadataExtracter
// to work without needing any changes
// The simple ones
putRawValue(KEY_AUTHOR, metadata.get(Metadata.AUTHOR), rawProperties);
putRawValue(KEY_TITLE, metadata.get(Metadata.TITLE), rawProperties);
putRawValue(KEY_COMMENTS, metadata.get(Metadata.COMMENTS), rawProperties);
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties);
putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties);
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties);
// Get the subject and description, despite things not
// being nearly as consistent as one might hope
String subject = metadata.get(Metadata.SUBJECT);
String description = metadata.get(Metadata.DESCRIPTION);
String subject = getMetadataValue(metadata, Metadata.SUBJECT);
String description = getMetadataValue(metadata, Metadata.DESCRIPTION);
if(subject != null && description != null)
{
putRawValue(KEY_DESCRIPTION, description, rawProperties);
@@ -503,6 +505,31 @@ public abstract class TikaPoweredMetadataExtracter
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
}
private String getMetadataValue(Metadata metadata, String key)
{
if (metadata.isMultiValued(key))
{
String[] parts = metadata.getValues(key);
// use Set to prevent duplicates
Set<String> value = new LinkedHashSet<String>(parts.length);
for (int i = 0; i < parts.length; i++)
{
value.add(parts[i]);
}
String valueStr = value.toString();
// remove leading/trailing braces []
return valueStr.substring(1, valueStr.length() - 1);
}
else
{
return metadata.get(key);
}
}
/**
* This content handler will capture entries from within
* the header of the Tika content XHTML, but ignore the