Merged 5.2.N (5.2.1) to HEAD (5.2)

124364 adragoi: Merged 5.1.N (5.1.2) to 5.2.N (5.2.1)
      124313 adragoi: Merged 5.0.N (5.0.4) to 5.1.N (5.1.2)
         124244 abalmus: MNT-15497 : Keyword tags generated from metadata extraction are formed into a single string rather than split on delimiter
            - Fixed tag separation on delimiter
            - Enhanced existing test


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@127752 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Alan Davis
2016-06-03 16:03:24 +00:00
parent bffa610ff9
commit c8aa156dda
2 changed files with 43 additions and 2 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
* Copyright (C) 2005-2016 Alfresco Software Limited.
*
* This file is part of Alfresco
*
@@ -23,6 +23,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
@@ -97,6 +98,7 @@ public abstract class TikaPoweredMetadataExtracter
protected static final String KEY_CREATED = "created";
protected static final String KEY_DESCRIPTION = "description";
protected static final String KEY_COMMENTS = "comments";
protected static final String KEY_TAGS = "dc:subject";
private DateTimeFormatter tikaUTCDateFormater;
private DateTimeFormatter tikaDateFormater;
@@ -104,6 +106,18 @@ public abstract class TikaPoweredMetadataExtracter
private String extractorContext = null;
private String metadataSeparator = ","; // Default separator.
public String getMetadataSeparator()
{
return metadataSeparator;
}
public void setMetadataSeparator(String metadataSeparator)
{
this.metadataSeparator = metadataSeparator;
}
/**
* Builds up a list of supported mime types by merging
* an explicit list with any that Tika also claims to support
@@ -400,7 +414,10 @@ public abstract class TikaPoweredMetadataExtracter
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties);
putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties);
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties);
// Tags
putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
// Get the subject and description, despite things not
// being nearly as consistent as one might hope
String subject = getMetadataValue(metadata, Metadata.SUBJECT);
@@ -504,6 +521,28 @@ public abstract class TikaPoweredMetadataExtracter
OutputStream outputStream = writer.getContentOutputStream();
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
}
private Serializable getMetadataValues(Metadata metadata, String key)
{
// Use Set to prevent duplicates.
Set<String> valuesSet = new LinkedHashSet<String>();
String[] values = metadata.getValues(key);
for (int i = 0; i < values.length; i++)
{
String[] parts = values[i].split(metadataSeparator);
for (String subPart : parts)
{
valuesSet.add(subPart.trim());
}
}
Object[] objArrayValues = valuesSet.toArray();
values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class);
return values.length == 0 ? null : (values.length == 1 ? values[0] : values);
}
private String getMetadataValue(Metadata metadata, String key)
{