mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-10-08 14:51:49 +00:00
MNT-15909: Merged 5.0.N (5.0.4) to 5.1.N (5.1.2)
124244 abalmus: MNT-15497 : Keyword tags generated from metadata extraction are formed into a single string rather than split on delimiter - Fixed tag separation on delimiter - Enhanced existing test git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.1.N/root@124313 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2014 Alfresco Software Limited.
|
||||
* Copyright (C) 2005-2016 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
@@ -23,6 +23,7 @@ import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
@@ -97,6 +98,7 @@ public abstract class TikaPoweredMetadataExtracter
|
||||
protected static final String KEY_CREATED = "created";
|
||||
protected static final String KEY_DESCRIPTION = "description";
|
||||
protected static final String KEY_COMMENTS = "comments";
|
||||
protected static final String KEY_TAGS = "dc:subject";
|
||||
|
||||
private DateTimeFormatter tikaUTCDateFormater;
|
||||
private DateTimeFormatter tikaDateFormater;
|
||||
@@ -104,6 +106,18 @@ public abstract class TikaPoweredMetadataExtracter
|
||||
|
||||
private String extractorContext = null;
|
||||
|
||||
private String metadataSeparator = ","; // Default separator.
|
||||
|
||||
public String getMetadataSeparator()
|
||||
{
|
||||
return metadataSeparator;
|
||||
}
|
||||
|
||||
public void setMetadataSeparator(String metadataSeparator)
|
||||
{
|
||||
this.metadataSeparator = metadataSeparator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds up a list of supported mime types by merging
|
||||
* an explicit list with any that Tika also claims to support
|
||||
@@ -400,7 +414,10 @@ public abstract class TikaPoweredMetadataExtracter
|
||||
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties);
|
||||
putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties);
|
||||
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties);
|
||||
|
||||
|
||||
// Tags
|
||||
putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
|
||||
|
||||
// Get the subject and description, despite things not
|
||||
// being nearly as consistent as one might hope
|
||||
String subject = getMetadataValue(metadata, Metadata.SUBJECT);
|
||||
@@ -504,6 +521,28 @@ public abstract class TikaPoweredMetadataExtracter
|
||||
OutputStream outputStream = writer.getContentOutputStream();
|
||||
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
|
||||
}
|
||||
|
||||
private Serializable getMetadataValues(Metadata metadata, String key)
|
||||
{
|
||||
// Use Set to prevent duplicates.
|
||||
Set<String> valuesSet = new LinkedHashSet<String>();
|
||||
String[] values = metadata.getValues(key);
|
||||
|
||||
for (int i = 0; i < values.length; i++)
|
||||
{
|
||||
String[] parts = values[i].split(metadataSeparator);
|
||||
|
||||
for (String subPart : parts)
|
||||
{
|
||||
valuesSet.add(subPart.trim());
|
||||
}
|
||||
}
|
||||
|
||||
Object[] objArrayValues = valuesSet.toArray();
|
||||
values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class);
|
||||
|
||||
return values.length == 0 ? null : (values.length == 1 ? values[0] : values);
|
||||
}
|
||||
|
||||
private String getMetadataValue(Metadata metadata, String key)
|
||||
{
|
||||
|
@@ -22,6 +22,7 @@ import java.io.File;
|
||||
import java.io.Serializable;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
@@ -274,6 +275,7 @@ public void testImageVideo() throws Throwable {
|
||||
// Check regular Tika properties
|
||||
assertEquals(QUICK_TITLE, p.get(Metadata.COMMENT));
|
||||
assertEquals("canon-55-250, moscow-birds, serbor", p.get(Metadata.SUBJECT));
|
||||
assertTrue(Arrays.equals(new String[] { "canon-55-250", "moscow-birds", "serbor" }, (String[]) p.get("dc:subject")));
|
||||
// Check namespace'd Tika properties
|
||||
assertEquals("12.54321", p.get("geo:lat"));
|
||||
assertEquals("-54.1234", p.get("geo:long"));
|
||||
|
Reference in New Issue
Block a user