MNT-15909: Merged 5.0.N (5.0.4) to 5.1.N (5.1.2)

124244 abalmus: MNT-15497 : Keyword tags generated from metadata extraction are formed into a single string rather than split on delimiter
      - Fixed tag separation on delimiter
      - Enhanced existing test


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.1.N/root@124313 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andreea Dragoi
2016-03-21 15:24:58 +00:00
parent 88c3c0e730
commit 77cbbce481
2 changed files with 43 additions and 2 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
* Copyright (C) 2005-2016 Alfresco Software Limited.
*
* This file is part of Alfresco
*
@@ -23,6 +23,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
@@ -97,6 +98,7 @@ public abstract class TikaPoweredMetadataExtracter
protected static final String KEY_CREATED = "created";
protected static final String KEY_DESCRIPTION = "description";
protected static final String KEY_COMMENTS = "comments";
protected static final String KEY_TAGS = "dc:subject";
private DateTimeFormatter tikaUTCDateFormater;
private DateTimeFormatter tikaDateFormater;
@@ -104,6 +106,18 @@ public abstract class TikaPoweredMetadataExtracter
private String extractorContext = null;
private String metadataSeparator = ","; // Default separator.
public String getMetadataSeparator()
{
return metadataSeparator;
}
public void setMetadataSeparator(String metadataSeparator)
{
this.metadataSeparator = metadataSeparator;
}
/**
* Builds up a list of supported mime types by merging
* an explicit list with any that Tika also claims to support
@@ -400,7 +414,10 @@ public abstract class TikaPoweredMetadataExtracter
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties);
putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties);
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties);
// Tags
putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
// Get the subject and description, despite things not
// being nearly as consistent as one might hope
String subject = getMetadataValue(metadata, Metadata.SUBJECT);
@@ -504,6 +521,28 @@ public abstract class TikaPoweredMetadataExtracter
OutputStream outputStream = writer.getContentOutputStream();
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
}
private Serializable getMetadataValues(Metadata metadata, String key)
{
// Use Set to prevent duplicates.
Set<String> valuesSet = new LinkedHashSet<String>();
String[] values = metadata.getValues(key);
for (int i = 0; i < values.length; i++)
{
String[] parts = values[i].split(metadataSeparator);
for (String subPart : parts)
{
valuesSet.add(subPart.trim());
}
}
Object[] objArrayValues = valuesSet.toArray();
values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class);
return values.length == 0 ? null : (values.length == 1 ? values[0] : values);
}
private String getMetadataValue(Metadata metadata, String key)
{

View File

@@ -22,6 +22,7 @@ import java.io.File;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -274,6 +275,7 @@ public void testImageVideo() throws Throwable {
// Check regular Tika properties
assertEquals(QUICK_TITLE, p.get(Metadata.COMMENT));
assertEquals("canon-55-250, moscow-birds, serbor", p.get(Metadata.SUBJECT));
assertTrue(Arrays.equals(new String[] { "canon-55-250", "moscow-birds", "serbor" }, (String[]) p.get("dc:subject")));
// Check namespace'd Tika properties
assertEquals("12.54321", p.get("geo:lat"));
assertEquals("-54.1234", p.get("geo:long"));