mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-10-08 14:51:49 +00:00
Merged 5.2.N (5.2.1) to HEAD (5.2)
124364 adragoi: Merged 5.1.N (5.1.2) to 5.2.N (5.2.1) 124313 adragoi: Merged 5.0.N (5.0.4) to 5.1.N (5.1.2) 124244 abalmus: MNT-15497 : Keyword tags generated from metadata extraction are formed into a single string rather than split on delimiter - Fixed tag separation on delimiter - Enhanced existing test git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@127752 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2005-2014 Alfresco Software Limited.
|
* Copyright (C) 2005-2016 Alfresco Software Limited.
|
||||||
*
|
*
|
||||||
* This file is part of Alfresco
|
* This file is part of Alfresco
|
||||||
*
|
*
|
||||||
@@ -23,6 +23,7 @@ import java.io.InputStream;
|
|||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@@ -97,6 +98,7 @@ public abstract class TikaPoweredMetadataExtracter
|
|||||||
protected static final String KEY_CREATED = "created";
|
protected static final String KEY_CREATED = "created";
|
||||||
protected static final String KEY_DESCRIPTION = "description";
|
protected static final String KEY_DESCRIPTION = "description";
|
||||||
protected static final String KEY_COMMENTS = "comments";
|
protected static final String KEY_COMMENTS = "comments";
|
||||||
|
protected static final String KEY_TAGS = "dc:subject";
|
||||||
|
|
||||||
private DateTimeFormatter tikaUTCDateFormater;
|
private DateTimeFormatter tikaUTCDateFormater;
|
||||||
private DateTimeFormatter tikaDateFormater;
|
private DateTimeFormatter tikaDateFormater;
|
||||||
@@ -104,6 +106,18 @@ public abstract class TikaPoweredMetadataExtracter
|
|||||||
|
|
||||||
private String extractorContext = null;
|
private String extractorContext = null;
|
||||||
|
|
||||||
|
private String metadataSeparator = ","; // Default separator.
|
||||||
|
|
||||||
|
public String getMetadataSeparator()
|
||||||
|
{
|
||||||
|
return metadataSeparator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMetadataSeparator(String metadataSeparator)
|
||||||
|
{
|
||||||
|
this.metadataSeparator = metadataSeparator;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds up a list of supported mime types by merging
|
* Builds up a list of supported mime types by merging
|
||||||
* an explicit list with any that Tika also claims to support
|
* an explicit list with any that Tika also claims to support
|
||||||
@@ -400,7 +414,10 @@ public abstract class TikaPoweredMetadataExtracter
|
|||||||
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties);
|
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties);
|
||||||
putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties);
|
putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties);
|
||||||
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties);
|
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties);
|
||||||
|
|
||||||
|
// Tags
|
||||||
|
putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
|
||||||
|
|
||||||
// Get the subject and description, despite things not
|
// Get the subject and description, despite things not
|
||||||
// being nearly as consistent as one might hope
|
// being nearly as consistent as one might hope
|
||||||
String subject = getMetadataValue(metadata, Metadata.SUBJECT);
|
String subject = getMetadataValue(metadata, Metadata.SUBJECT);
|
||||||
@@ -504,6 +521,28 @@ public abstract class TikaPoweredMetadataExtracter
|
|||||||
OutputStream outputStream = writer.getContentOutputStream();
|
OutputStream outputStream = writer.getContentOutputStream();
|
||||||
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
|
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Serializable getMetadataValues(Metadata metadata, String key)
|
||||||
|
{
|
||||||
|
// Use Set to prevent duplicates.
|
||||||
|
Set<String> valuesSet = new LinkedHashSet<String>();
|
||||||
|
String[] values = metadata.getValues(key);
|
||||||
|
|
||||||
|
for (int i = 0; i < values.length; i++)
|
||||||
|
{
|
||||||
|
String[] parts = values[i].split(metadataSeparator);
|
||||||
|
|
||||||
|
for (String subPart : parts)
|
||||||
|
{
|
||||||
|
valuesSet.add(subPart.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Object[] objArrayValues = valuesSet.toArray();
|
||||||
|
values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class);
|
||||||
|
|
||||||
|
return values.length == 0 ? null : (values.length == 1 ? values[0] : values);
|
||||||
|
}
|
||||||
|
|
||||||
private String getMetadataValue(Metadata metadata, String key)
|
private String getMetadataValue(Metadata metadata, String key)
|
||||||
{
|
{
|
||||||
|
@@ -22,6 +22,7 @@ import java.io.File;
|
|||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -274,6 +275,7 @@ public void testImageVideo() throws Throwable {
|
|||||||
// Check regular Tika properties
|
// Check regular Tika properties
|
||||||
assertEquals(QUICK_TITLE, p.get(Metadata.COMMENT));
|
assertEquals(QUICK_TITLE, p.get(Metadata.COMMENT));
|
||||||
assertEquals("canon-55-250, moscow-birds, serbor", p.get(Metadata.SUBJECT));
|
assertEquals("canon-55-250, moscow-birds, serbor", p.get(Metadata.SUBJECT));
|
||||||
|
assertTrue(Arrays.equals(new String[] { "canon-55-250", "moscow-birds", "serbor" }, (String[]) p.get("dc:subject")));
|
||||||
// Check namespace'd Tika properties
|
// Check namespace'd Tika properties
|
||||||
assertEquals("12.54321", p.get("geo:lat"));
|
assertEquals("12.54321", p.get("geo:lat"));
|
||||||
assertEquals("-54.1234", p.get("geo:long"));
|
assertEquals("-54.1234", p.get("geo:long"));
|
||||||
|
Reference in New Issue
Block a user