ATS-829 Release T-Engines 2.3.6 (#307)

ATS-829: Release T-Core (T-Engines) 2.3.6 [trigger release]

Linked to REPO-5219 Allow AGS AMP to specify metadata extract mapping

Added an extractMapping transform option to all metadata extractors to override the default one.

3rd party libraries to get a green build.
* Upgrade cxf-rt-transports-http and woodstox-core to avoid issues
* Upgrade to org.springframework.boot:spring-boot-starter-parent:2.3.5.RELEASE to avoid problem in org.springframework:spring-web
* Upgrade to activemq 5.15.13 to avoid problem in activemq-broker 5.15.12
This commit is contained in:
Alan Davis 2020-11-19 18:35:22 +00:00 committed by GitHub
parent 3ef6a7a788
commit 00fbb6405a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 216 additions and 28 deletions

View File

@ -48,6 +48,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.alfresco.transformer.util.RequestParamMap.PAGE_LIMIT;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@ -97,7 +98,7 @@ public class AIOTransformRegistryTest
"Archive", "OutlookMsg", "PdfBox", "Office", "Poi", "OOXML", "TikaAuto", "TextMining");
List<String> expectedTransformOptionNames = Arrays.asList("tikaOptions", "archiveOptions", "pdfboxOptions",
"textToPdfOptions", "stringOptions");
"textToPdfOptions", "stringOptions", "metadataOptions");
TransformConfig miscConfig = loadConfig("misc_engine_config.json");
TransformConfig tikaConfig = loadConfig("tika_engine_config.json");
@ -116,8 +117,11 @@ public class AIOTransformRegistryTest
}
// check correct number of options
long distinctOptionCount = Stream.concat(
miscConfig.getTransformOptions().keySet().stream(),
tikaConfig.getTransformOptions().keySet().stream()).distinct().count();
assertEquals("Number of expected transformers",
miscConfig.getTransformOptions().size() + tikaConfig.getTransformOptions().size(),
distinctOptionCount,
aioTransformerRegistry.getTransformConfig().getTransformOptions().size());
Set<String> actualOptionNames = aioTransformerRegistry.getTransformConfig().getTransformOptions().keySet();
@ -125,7 +129,7 @@ public class AIOTransformRegistryTest
// check all options are there
for (String optionName : expectedTransformOptionNames)
{
assertTrue("Expected transform option missing.", actualOptionNames.contains(optionName));
assertTrue("Expected transform option missing:"+optionName, actualOptionNames.contains(optionName));
}
}

View File

@ -5,6 +5,9 @@
],
"stringOptions": [
{"value": {"name": "targetEncoding"}}
],
"metadataOptions": [
{"value": {"name": "extractMapping"}}
]
},
"transformers": [
@ -77,6 +80,7 @@
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -85,6 +89,7 @@
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
}
]

View File

@ -10,6 +10,9 @@
"pdfboxOptions": [
{"value": {"name": "notExtractBookmarksText"}},
{"value": {"name": "targetEncoding"}}
],
"metadataOptions": [
{"value": {"name": "extractMapping"}}
]
},
"transformers": [
@ -520,6 +523,7 @@
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -528,6 +532,7 @@
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -536,6 +541,7 @@
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -556,6 +562,7 @@
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -601,6 +608,7 @@
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -610,6 +618,7 @@
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -645,6 +654,7 @@
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -662,6 +672,7 @@
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -968,6 +979,7 @@
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
}
]

View File

@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@ -150,6 +150,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
null,
null,
null,
readTestFile("eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
@ -169,7 +170,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
null,
null,
readTestFile("spanish.eml"));
null, readTestFile("spanish.eml"));
String contentResult = new String(result.getResponse().getContentAsByteArray(), UTF_8);
assertTrue("Content from eml transform didn't contain expected value. ",
@ -191,6 +192,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
null,
null,
null,
readTestFile("attachment.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
@ -211,6 +213,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
null,
null,
null,
readTestFile("alternative.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
@ -230,11 +233,77 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
null,
null,
null,
readTestFile("nested.alternative.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
}
/**
* Test extracting default metadata from a valid eml file
*/
@Test
public void testExtractMetadataRFC822() throws Exception
{
String expected =
"{\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop <nevin.nollop@gmail.com>\"," +
"\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\"," +
"\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop <nevinn@alfresco.com>\"," +
"\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000," +
"\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop <nevin.nollop@gmail.com>\"," +
"\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\"," +
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"," +
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," +
"\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop <nevinn@alfresco.com>\"," +
"\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000," +
"\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\"," +
"\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
"\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"}";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"json",
"alfresco-metadata-extract",
null,
null,
null,
readTestFile("eml"));
String metadata = result.getResponse().getContentAsString();
assertEquals("Metadata extract", expected, metadata);
}
/**
* Test extracting metadata specified in an option from a valid eml file
*/
@Test
public void testExtractMetadataOptionRFC822() throws Exception
{
// {"messageSubject":["{http://www.alfresco.org/model/imap/1.0}messageSubject","{http://www.alfresco.org/model/content/1.0}subjectline","{http://www.alfresco.org/model/content/1.0}description","{http://www.alfresco.org/model/content/1.0}title"],"Thread-Index":["{http://www.alfresco.org/model/imap/1.0}threadIndex"],"messageTo":["{http://www.alfresco.org/model/imap/1.0}messageTo","{http://www.alfresco.org/model/content/1.0}addressee"],"messageSent":["{http://www.alfresco.org/model/content/1.0}sentdate","{http://www.alfresco.org/model/imap/1.0}dateSent"],"Message-ID":["{http://www.alfresco.org/model/imap/1.0}messageId"],"messageCc":["{http://www.alfresco.org/model/imap/1.0}messageCc","{http://www.alfresco.org/model/content/1.0}addressees"],"messageReceived":["{http://www.alfresco.org/model/imap/1.0}dateReceived"],"messageFrom":["{http://www.alfresco.org/model/imap/1.0}messageFrom","{http://www.alfresco.org/model/content/1.0}originator"]}
String extractMapping =
"{\"messageSubject\":[" +
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\"," +
"\"{http://www.alfresco.org/model/content/1.0}title\"]," +
"\"Thread-Index\":[" +
"\"{http://www.alfresco.org/model/imap/1.0}threadIndex\"]," +
"\"messageFrom\":[" +
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\"]}\n";
String expected =
"{\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," +
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"}";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"json",
"alfresco-metadata-extract",
null,
null,
extractMapping,
readTestFile("eml"));
String metadata = result.getResponse().getContentAsString();
assertEquals("Option metadata extract", expected, metadata);
}
/**
* Test transforming a valid eml with a html part containing html special characters to text
*/
@ -249,6 +318,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
null,
null,
null,
readTestFile("htmlChars.eml"));
assertFalse(result.getResponse().getContentAsString().contains(expected));
}
@ -275,6 +345,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
null,
null,
null,
expected.getBytes());
String contentResult = new String(result.getResponse().getContentAsByteArray(),
@ -304,6 +375,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
"UTF-8",
null,
null,
content);
String contentResult = new String(result.getResponse().getContentAsByteArray(),
@ -324,6 +396,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_TEXT_PLAIN,
"UTF-8",
null,
null,
content);
assertEquals("Returned content should be empty for an empty source file", 0,
@ -349,6 +422,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
MIMETYPE_PDF,
null,
"1",
null,
expected.getBytes());
// Read back in the PDF and check it
@ -368,7 +442,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testAppleIWorksPages() throws Exception
{
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("pages"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("pages"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -377,7 +451,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testAppleIWorksNumbers() throws Exception
{
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("numbers"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("numbers"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -386,7 +460,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testAppleIWorksKey() throws Exception
{
MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE,
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("key"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("key"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -396,7 +470,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testOOXML() throws Exception
{
MvcResult result = sendRequest("docx", null, MIMETYPE_OPENXML_WORDPROCESSING,
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("docx"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("docx"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -408,6 +482,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
String targetMimetype,
String targetEncoding,
String pageLimit,
String extractMapping,
byte[] content) throws Exception
{
final MockMultipartFile sourceFile = new MockMultipartFile("file",
@ -433,6 +508,10 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
{
requestBuilder.param("pageLimit", pageLimit);
}
if (extractMapping != null)
{
requestBuilder.param("extractMapping", extractMapping);
}
return mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))

View File

@ -5,6 +5,9 @@
],
"stringOptions": [
{"value": {"name": "targetEncoding"}}
],
"metadataOptions": [
{"value": {"name": "extractMapping"}}
]
},
"transformers": [
@ -77,6 +80,7 @@
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -85,6 +89,7 @@
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
}
]

View File

@ -78,8 +78,7 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
mapMetadataAndWrite(targetFile, metadata);
extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
}
@Override

View File

@ -86,8 +86,7 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractor implement
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
mapMetadataAndWrite(targetFile, metadata);
extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
}
@Override

View File

@ -5,6 +5,9 @@
],
"stringOptions": [
{"value": {"name": "targetEncoding"}}
],
"metadataOptions": [
{"value": {"name": "extractMapping"}}
]
},
"transformers": [
@ -77,6 +80,7 @@
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -85,6 +89,7 @@
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
}
]

View File

@ -10,6 +10,9 @@
"pdfboxOptions": [
{"value": {"name": "notExtractBookmarksText"}},
{"value": {"name": "targetEncoding"}}
],
"metadataOptions": [
{"value": {"name": "extractMapping"}}
]
},
"transformers": [
@ -520,6 +523,7 @@
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -528,6 +532,7 @@
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -536,6 +541,7 @@
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -556,6 +562,7 @@
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -601,6 +608,7 @@
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -610,6 +618,7 @@
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -645,6 +654,7 @@
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -662,6 +672,7 @@
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -968,6 +979,7 @@
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
}
]

View File

@ -168,8 +168,7 @@ public class TikaJavaExecutor implements JavaExecutor
throws Exception
{
AbstractTikaMetadataExtractor metadataExtractor = this.metadataExtractor.get(transformName);
Map<String, Serializable> metadata = metadataExtractor.extractMetadata(sourceMimetype, transformOptions, sourceFile);
metadataExtractor.mapMetadataAndWrite(targetFile, metadata);
metadataExtractor.extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
}
/**

View File

@ -10,6 +10,9 @@
"pdfboxOptions": [
{"value": {"name": "notExtractBookmarksText"}},
{"value": {"name": "targetEncoding"}}
],
"metadataOptions": [
{"value": {"name": "extractMapping"}}
]
},
"transformers": [
@ -520,6 +523,7 @@
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -528,6 +532,7 @@
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -536,6 +541,7 @@
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -556,6 +562,7 @@
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -601,6 +608,7 @@
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -610,6 +618,7 @@
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -645,6 +654,7 @@
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -662,6 +672,7 @@
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{
@ -968,6 +979,7 @@
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
}
]

View File

@ -63,7 +63,7 @@ import java.util.StringTokenizer;
* <li>The T-Engine's Controller class will call a method in a class that extends {@link AbstractMetadataExtractor}
* based on the source and target mediatypes in the normal way.</li>
* <li>The method extracts ALL available metadata is extracted from the document and then calls
* {@link #mapMetadataAndWrite(File, Map)}.</li>
* {@link #mapMetadataAndWrite(File, Map, Map)}.</li>
* <li>Selected values from the available metadata are mapped into content repository property names and values,
* depending on what is defined in a {@code "<classname>_metadata_extract.properties"} file.</li>
* <li>The selected values are set back to the content repository as a JSON representation of a Map, where the values
@ -95,6 +95,7 @@ public abstract class AbstractMetadataExtractor
private static final String EXTRACT = "extract";
private static final String EMBED = "embed";
private static final String METADATA = "metadata";
private static final String EXTRACT_MAPPING = "extractMapping";
private static final String NAMESPACE_PROPERTY_PREFIX = "namespace.prefix.";
private static final char NAMESPACE_PREFIX = ':';
@ -110,17 +111,18 @@ public abstract class AbstractMetadataExtractor
private static final ObjectMapper jsonObjectMapper = new ObjectMapper();
protected final Logger logger;
private Map<String, Set<String>> extractMapping;
private Map<String, Set<String>> defaultExtractMapping;
private ThreadLocal<Map<String, Set<String>>> extractMapping = new ThreadLocal<>();
private Map<String, Set<String>> embedMapping;
public AbstractMetadataExtractor(Logger logger)
{
this.logger = logger;
extractMapping = Collections.emptyMap();
defaultExtractMapping = Collections.emptyMap();
embedMapping = Collections.emptyMap();
try
{
extractMapping = buildExtractMapping();
defaultExtractMapping = buildExtractMapping();
embedMapping = buildEmbedMapping();
}
catch (Exception e)
@ -148,7 +150,7 @@ public abstract class AbstractMetadataExtractor
try
{
TypeReference<HashMap<String, String>> typeRef = new TypeReference<HashMap<String, String>>() {};
TypeReference<HashMap<String, String>> typeRef = new TypeReference<>() {};
return jsonObjectMapper.readValue(metadataAsJson, typeRef);
}
catch (JsonProcessingException e)
@ -159,7 +161,7 @@ public abstract class AbstractMetadataExtractor
protected Map<String, Set<String>> getExtractMapping()
{
return Collections.unmodifiableMap(extractMapping);
return Collections.unmodifiableMap(extractMapping.get());
}
public Map<String, Set<String>> getEmbedMapping()
@ -432,7 +434,60 @@ public abstract class AbstractMetadataExtractor
return true;
}
/**
* The {@code transformOptions} may contain a replacement set of mappings. These will be used in place of the
* default mappings from read from file if supplied.
*/
public void extractMetadata(String sourceMimetype, Map<String, String> transformOptions, File sourceFile,
File targetFile) throws Exception
{
Map<String, Set<String>> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
// Use a ThreadLocal to avoid changing method signatures of methods that currently call getExtractMapping.
try
{
extractMapping.set(mapping);
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
mapMetadataAndWrite(targetFile, metadata, mapping);
}
finally
{
extractMapping.set(null);
}
}
private Map<String, Set<String>> getExtractMappingFromOptions(Map<String, String> transformOptions, Map<String,
Set<String>> defaultExtractMapping)
{
String extractMappingOption = transformOptions.get(EXTRACT_MAPPING);
if (extractMappingOption != null)
{
try
{
TypeReference<HashMap<String, Set<String>>> typeRef = new TypeReference<>() {};
return jsonObjectMapper.readValue(extractMappingOption, typeRef);
}
catch (JsonProcessingException e)
{
throw new IllegalArgumentException("Failed to read "+ EXTRACT_MAPPING +" from request", e);
}
}
return defaultExtractMapping;
}
/**
* @deprecated use {@link #extractMetadata(String, Map, File, File)} rather than calling this method.
* By default call the overloaded method with the default {@code extractMapping}.
*/
@Deprecated
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata) throws IOException
{
mapMetadataAndWrite(targetFile, metadata, defaultExtractMapping);
}
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata,
Map<String, Set<String>> extractMapping) throws IOException
{
if (logger.isDebugEnabled())
{
@ -440,17 +495,19 @@ public abstract class AbstractMetadataExtractor
metadata.forEach((k,v) -> logger.debug(" "+k+"="+v));
}
metadata = mapRawToSystem(metadata);
metadata = mapRawToSystem(metadata, extractMapping);
writeMetadata(targetFile, metadata);
}
/**
* Based on AbstractMappingMetadataExtracter#mapRawToSystem.
*
* @param rawMetadata Metadata keyed by document properties
* @return Returns the metadata keyed by the system properties
* @param rawMetadata Metadata keyed by document properties
* @param extractMapping Mapping between document ans system properties
* @return Returns the metadata keyed by the system properties
*/
private Map<String, Serializable> mapRawToSystem(Map<String, Serializable> rawMetadata)
private Map<String, Serializable> mapRawToSystem(Map<String, Serializable> rawMetadata,
Map<String, Set<String>> extractMapping)
{
boolean debugEnabled = logger.isDebugEnabled();
if (debugEnabled)

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.1.RELEASE</version>
<version>2.3.5.RELEASE</version>
<relativePath />
</parent>
@ -24,9 +24,9 @@
<dependency.alfresco-jodconverter-core.version>3.0.1.1</dependency.alfresco-jodconverter-core.version>
<env.project_version>${project.version}</env.project_version>
<dependency.alfresco-transform-model.version>1.0.2.11</dependency.alfresco-transform-model.version>
<dependency.activemq.version>5.15.9</dependency.activemq.version>
<dependency.activemq.version>5.15.13</dependency.activemq.version>
<dependency.jackson.version>2.10.3</dependency.jackson.version>
<dependency.cxf.version>3.3.5</dependency.cxf.version>
<dependency.cxf.version>3.4.1</dependency.cxf.version>
<dependency.tika.version>1.24.1</dependency.tika.version>
<dependency.poi.version>4.1.2</dependency.poi.version>
<dependency.ooxml-schemas.version>1.4</dependency.ooxml-schemas.version>