mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-05-12 17:04:48 +00:00
ATS-829 Release T-Engines 2.3.6 (#307)
ATS-829: Release T-Core (T-Engines) 2.3.6 [trigger release] Linked to REPO-5219 Allow AGS AMP to specify metadata extract mapping Added an extractMapping transform option to all metadata extractors to override the default one. 3rd party libraries to get a green build. * Upgrade cxf-rt-transports-http and woodstox-core to avoid issues * Upgrade to org.springframework.boot:spring-boot-starter-parent:2.3.5.RELEASE to avoid problem in org.springframework:spring-web * Upgrade to activemq 5.15.13 to avoid problem in activemq-broker 5.15.12
This commit is contained in:
parent
3ef6a7a788
commit
00fbb6405a
@ -48,6 +48,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import static org.alfresco.transformer.util.RequestParamMap.PAGE_LIMIT;
|
import static org.alfresco.transformer.util.RequestParamMap.PAGE_LIMIT;
|
||||||
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
|
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
|
||||||
@ -97,7 +98,7 @@ public class AIOTransformRegistryTest
|
|||||||
"Archive", "OutlookMsg", "PdfBox", "Office", "Poi", "OOXML", "TikaAuto", "TextMining");
|
"Archive", "OutlookMsg", "PdfBox", "Office", "Poi", "OOXML", "TikaAuto", "TextMining");
|
||||||
|
|
||||||
List<String> expectedTransformOptionNames = Arrays.asList("tikaOptions", "archiveOptions", "pdfboxOptions",
|
List<String> expectedTransformOptionNames = Arrays.asList("tikaOptions", "archiveOptions", "pdfboxOptions",
|
||||||
"textToPdfOptions", "stringOptions");
|
"textToPdfOptions", "stringOptions", "metadataOptions");
|
||||||
|
|
||||||
TransformConfig miscConfig = loadConfig("misc_engine_config.json");
|
TransformConfig miscConfig = loadConfig("misc_engine_config.json");
|
||||||
TransformConfig tikaConfig = loadConfig("tika_engine_config.json");
|
TransformConfig tikaConfig = loadConfig("tika_engine_config.json");
|
||||||
@ -116,8 +117,11 @@ public class AIOTransformRegistryTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
// check correct number of options
|
// check correct number of options
|
||||||
|
long distinctOptionCount = Stream.concat(
|
||||||
|
miscConfig.getTransformOptions().keySet().stream(),
|
||||||
|
tikaConfig.getTransformOptions().keySet().stream()).distinct().count();
|
||||||
assertEquals("Number of expected transformers",
|
assertEquals("Number of expected transformers",
|
||||||
miscConfig.getTransformOptions().size() + tikaConfig.getTransformOptions().size(),
|
distinctOptionCount,
|
||||||
aioTransformerRegistry.getTransformConfig().getTransformOptions().size());
|
aioTransformerRegistry.getTransformConfig().getTransformOptions().size());
|
||||||
|
|
||||||
Set<String> actualOptionNames = aioTransformerRegistry.getTransformConfig().getTransformOptions().keySet();
|
Set<String> actualOptionNames = aioTransformerRegistry.getTransformConfig().getTransformOptions().keySet();
|
||||||
@ -125,7 +129,7 @@ public class AIOTransformRegistryTest
|
|||||||
// check all options are there
|
// check all options are there
|
||||||
for (String optionName : expectedTransformOptionNames)
|
for (String optionName : expectedTransformOptionNames)
|
||||||
{
|
{
|
||||||
assertTrue("Expected transform option missing.", actualOptionNames.contains(optionName));
|
assertTrue("Expected transform option missing:"+optionName, actualOptionNames.contains(optionName));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
],
|
],
|
||||||
"stringOptions": [
|
"stringOptions": [
|
||||||
{"value": {"name": "targetEncoding"}}
|
{"value": {"name": "targetEncoding"}}
|
||||||
|
],
|
||||||
|
"metadataOptions": [
|
||||||
|
{"value": {"name": "extractMapping"}}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"transformers": [
|
"transformers": [
|
||||||
@ -77,6 +80,7 @@
|
|||||||
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -85,6 +89,7 @@
|
|||||||
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -10,6 +10,9 @@
|
|||||||
"pdfboxOptions": [
|
"pdfboxOptions": [
|
||||||
{"value": {"name": "notExtractBookmarksText"}},
|
{"value": {"name": "notExtractBookmarksText"}},
|
||||||
{"value": {"name": "targetEncoding"}}
|
{"value": {"name": "targetEncoding"}}
|
||||||
|
],
|
||||||
|
"metadataOptions": [
|
||||||
|
{"value": {"name": "extractMapping"}}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"transformers": [
|
"transformers": [
|
||||||
@ -520,6 +523,7 @@
|
|||||||
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -528,6 +532,7 @@
|
|||||||
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -536,6 +541,7 @@
|
|||||||
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -556,6 +562,7 @@
|
|||||||
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -601,6 +608,7 @@
|
|||||||
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -610,6 +618,7 @@
|
|||||||
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -645,6 +654,7 @@
|
|||||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -662,6 +672,7 @@
|
|||||||
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -968,6 +979,7 @@
|
|||||||
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@ -150,6 +150,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
readTestFile("eml"));
|
readTestFile("eml"));
|
||||||
assertTrue("Content from eml transform didn't contain expected value. ",
|
assertTrue("Content from eml transform didn't contain expected value. ",
|
||||||
result.getResponse().getContentAsString().contains(expected));
|
result.getResponse().getContentAsString().contains(expected));
|
||||||
@ -169,7 +170,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
readTestFile("spanish.eml"));
|
null, readTestFile("spanish.eml"));
|
||||||
|
|
||||||
String contentResult = new String(result.getResponse().getContentAsByteArray(), UTF_8);
|
String contentResult = new String(result.getResponse().getContentAsByteArray(), UTF_8);
|
||||||
assertTrue("Content from eml transform didn't contain expected value. ",
|
assertTrue("Content from eml transform didn't contain expected value. ",
|
||||||
@ -191,6 +192,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
readTestFile("attachment.eml"));
|
readTestFile("attachment.eml"));
|
||||||
assertTrue("Content from eml transform didn't contain expected value. ",
|
assertTrue("Content from eml transform didn't contain expected value. ",
|
||||||
result.getResponse().getContentAsString().contains(expected));
|
result.getResponse().getContentAsString().contains(expected));
|
||||||
@ -211,6 +213,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
readTestFile("alternative.eml"));
|
readTestFile("alternative.eml"));
|
||||||
assertTrue("Content from eml transform didn't contain expected value. ",
|
assertTrue("Content from eml transform didn't contain expected value. ",
|
||||||
result.getResponse().getContentAsString().contains(expected));
|
result.getResponse().getContentAsString().contains(expected));
|
||||||
@ -230,11 +233,77 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
readTestFile("nested.alternative.eml"));
|
readTestFile("nested.alternative.eml"));
|
||||||
assertTrue("Content from eml transform didn't contain expected value. ",
|
assertTrue("Content from eml transform didn't contain expected value. ",
|
||||||
result.getResponse().getContentAsString().contains(expected));
|
result.getResponse().getContentAsString().contains(expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test extracting default metadata from a valid eml file
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testExtractMetadataRFC822() throws Exception
|
||||||
|
{
|
||||||
|
String expected =
|
||||||
|
"{\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop <nevin.nollop@gmail.com>\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop <nevinn@alfresco.com>\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000," +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop <nevin.nollop@gmail.com>\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop <nevinn@alfresco.com>\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"}";
|
||||||
|
MvcResult result = sendRequest("eml",
|
||||||
|
null,
|
||||||
|
MIMETYPE_RFC822,
|
||||||
|
"json",
|
||||||
|
"alfresco-metadata-extract",
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
readTestFile("eml"));
|
||||||
|
String metadata = result.getResponse().getContentAsString();
|
||||||
|
assertEquals("Metadata extract", expected, metadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test extracting metadata specified in an option from a valid eml file
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testExtractMetadataOptionRFC822() throws Exception
|
||||||
|
{
|
||||||
|
// {"messageSubject":["{http://www.alfresco.org/model/imap/1.0}messageSubject","{http://www.alfresco.org/model/content/1.0}subjectline","{http://www.alfresco.org/model/content/1.0}description","{http://www.alfresco.org/model/content/1.0}title"],"Thread-Index":["{http://www.alfresco.org/model/imap/1.0}threadIndex"],"messageTo":["{http://www.alfresco.org/model/imap/1.0}messageTo","{http://www.alfresco.org/model/content/1.0}addressee"],"messageSent":["{http://www.alfresco.org/model/content/1.0}sentdate","{http://www.alfresco.org/model/imap/1.0}dateSent"],"Message-ID":["{http://www.alfresco.org/model/imap/1.0}messageId"],"messageCc":["{http://www.alfresco.org/model/imap/1.0}messageCc","{http://www.alfresco.org/model/content/1.0}addressees"],"messageReceived":["{http://www.alfresco.org/model/imap/1.0}dateReceived"],"messageFrom":["{http://www.alfresco.org/model/imap/1.0}messageFrom","{http://www.alfresco.org/model/content/1.0}originator"]}
|
||||||
|
String extractMapping =
|
||||||
|
"{\"messageSubject\":[" +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}title\"]," +
|
||||||
|
"\"Thread-Index\":[" +
|
||||||
|
"\"{http://www.alfresco.org/model/imap/1.0}threadIndex\"]," +
|
||||||
|
"\"messageFrom\":[" +
|
||||||
|
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\"]}\n";
|
||||||
|
String expected =
|
||||||
|
"{\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
|
||||||
|
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"}";
|
||||||
|
MvcResult result = sendRequest("eml",
|
||||||
|
null,
|
||||||
|
MIMETYPE_RFC822,
|
||||||
|
"json",
|
||||||
|
"alfresco-metadata-extract",
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
extractMapping,
|
||||||
|
readTestFile("eml"));
|
||||||
|
String metadata = result.getResponse().getContentAsString();
|
||||||
|
assertEquals("Option metadata extract", expected, metadata);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test transforming a valid eml with a html part containing html special characters to text
|
* Test transforming a valid eml with a html part containing html special characters to text
|
||||||
*/
|
*/
|
||||||
@ -249,6 +318,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
readTestFile("htmlChars.eml"));
|
readTestFile("htmlChars.eml"));
|
||||||
assertFalse(result.getResponse().getContentAsString().contains(expected));
|
assertFalse(result.getResponse().getContentAsString().contains(expected));
|
||||||
}
|
}
|
||||||
@ -275,6 +345,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
expected.getBytes());
|
expected.getBytes());
|
||||||
|
|
||||||
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
||||||
@ -304,6 +375,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
content);
|
content);
|
||||||
|
|
||||||
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
||||||
@ -324,6 +396,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
null,
|
null,
|
||||||
|
null,
|
||||||
content);
|
content);
|
||||||
|
|
||||||
assertEquals("Returned content should be empty for an empty source file", 0,
|
assertEquals("Returned content should be empty for an empty source file", 0,
|
||||||
@ -349,6 +422,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
MIMETYPE_PDF,
|
MIMETYPE_PDF,
|
||||||
null,
|
null,
|
||||||
"1",
|
"1",
|
||||||
|
null,
|
||||||
expected.getBytes());
|
expected.getBytes());
|
||||||
|
|
||||||
// Read back in the PDF and check it
|
// Read back in the PDF and check it
|
||||||
@ -368,7 +442,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
public void testAppleIWorksPages() throws Exception
|
public void testAppleIWorksPages() throws Exception
|
||||||
{
|
{
|
||||||
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("pages"));
|
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("pages"));
|
||||||
assertTrue("Expected image content but content is empty.",
|
assertTrue("Expected image content but content is empty.",
|
||||||
result.getResponse().getContentLengthLong() > 0L);
|
result.getResponse().getContentLengthLong() > 0L);
|
||||||
}
|
}
|
||||||
@ -377,7 +451,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
public void testAppleIWorksNumbers() throws Exception
|
public void testAppleIWorksNumbers() throws Exception
|
||||||
{
|
{
|
||||||
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("numbers"));
|
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("numbers"));
|
||||||
assertTrue("Expected image content but content is empty.",
|
assertTrue("Expected image content but content is empty.",
|
||||||
result.getResponse().getContentLengthLong() > 0L);
|
result.getResponse().getContentLengthLong() > 0L);
|
||||||
}
|
}
|
||||||
@ -386,7 +460,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
public void testAppleIWorksKey() throws Exception
|
public void testAppleIWorksKey() throws Exception
|
||||||
{
|
{
|
||||||
MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE,
|
MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("key"));
|
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("key"));
|
||||||
assertTrue("Expected image content but content is empty.",
|
assertTrue("Expected image content but content is empty.",
|
||||||
result.getResponse().getContentLengthLong() > 0L);
|
result.getResponse().getContentLengthLong() > 0L);
|
||||||
}
|
}
|
||||||
@ -396,7 +470,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
public void testOOXML() throws Exception
|
public void testOOXML() throws Exception
|
||||||
{
|
{
|
||||||
MvcResult result = sendRequest("docx", null, MIMETYPE_OPENXML_WORDPROCESSING,
|
MvcResult result = sendRequest("docx", null, MIMETYPE_OPENXML_WORDPROCESSING,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("docx"));
|
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("docx"));
|
||||||
assertTrue("Expected image content but content is empty.",
|
assertTrue("Expected image content but content is empty.",
|
||||||
result.getResponse().getContentLengthLong() > 0L);
|
result.getResponse().getContentLengthLong() > 0L);
|
||||||
}
|
}
|
||||||
@ -408,6 +482,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
String targetMimetype,
|
String targetMimetype,
|
||||||
String targetEncoding,
|
String targetEncoding,
|
||||||
String pageLimit,
|
String pageLimit,
|
||||||
|
String extractMapping,
|
||||||
byte[] content) throws Exception
|
byte[] content) throws Exception
|
||||||
{
|
{
|
||||||
final MockMultipartFile sourceFile = new MockMultipartFile("file",
|
final MockMultipartFile sourceFile = new MockMultipartFile("file",
|
||||||
@ -433,6 +508,10 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
|
|||||||
{
|
{
|
||||||
requestBuilder.param("pageLimit", pageLimit);
|
requestBuilder.param("pageLimit", pageLimit);
|
||||||
}
|
}
|
||||||
|
if (extractMapping != null)
|
||||||
|
{
|
||||||
|
requestBuilder.param("extractMapping", extractMapping);
|
||||||
|
}
|
||||||
|
|
||||||
return mockMvc.perform(requestBuilder)
|
return mockMvc.perform(requestBuilder)
|
||||||
.andExpect(status().is(OK.value()))
|
.andExpect(status().is(OK.value()))
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
],
|
],
|
||||||
"stringOptions": [
|
"stringOptions": [
|
||||||
{"value": {"name": "targetEncoding"}}
|
{"value": {"name": "targetEncoding"}}
|
||||||
|
],
|
||||||
|
"metadataOptions": [
|
||||||
|
{"value": {"name": "extractMapping"}}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"transformers": [
|
"transformers": [
|
||||||
@ -77,6 +80,7 @@
|
|||||||
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -85,6 +89,7 @@
|
|||||||
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -78,8 +78,7 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
|
|||||||
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||||
File sourceFile, File targetFile) throws Exception
|
File sourceFile, File targetFile) throws Exception
|
||||||
{
|
{
|
||||||
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
|
extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
|
||||||
mapMetadataAndWrite(targetFile, metadata);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -86,8 +86,7 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractor implement
|
|||||||
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||||
File sourceFile, File targetFile) throws Exception
|
File sourceFile, File targetFile) throws Exception
|
||||||
{
|
{
|
||||||
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
|
extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
|
||||||
mapMetadataAndWrite(targetFile, metadata);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
],
|
],
|
||||||
"stringOptions": [
|
"stringOptions": [
|
||||||
{"value": {"name": "targetEncoding"}}
|
{"value": {"name": "targetEncoding"}}
|
||||||
|
],
|
||||||
|
"metadataOptions": [
|
||||||
|
{"value": {"name": "extractMapping"}}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"transformers": [
|
"transformers": [
|
||||||
@ -77,6 +80,7 @@
|
|||||||
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -85,6 +89,7 @@
|
|||||||
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -10,6 +10,9 @@
|
|||||||
"pdfboxOptions": [
|
"pdfboxOptions": [
|
||||||
{"value": {"name": "notExtractBookmarksText"}},
|
{"value": {"name": "notExtractBookmarksText"}},
|
||||||
{"value": {"name": "targetEncoding"}}
|
{"value": {"name": "targetEncoding"}}
|
||||||
|
],
|
||||||
|
"metadataOptions": [
|
||||||
|
{"value": {"name": "extractMapping"}}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"transformers": [
|
"transformers": [
|
||||||
@ -520,6 +523,7 @@
|
|||||||
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -528,6 +532,7 @@
|
|||||||
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -536,6 +541,7 @@
|
|||||||
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -556,6 +562,7 @@
|
|||||||
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -601,6 +608,7 @@
|
|||||||
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -610,6 +618,7 @@
|
|||||||
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -645,6 +654,7 @@
|
|||||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -662,6 +672,7 @@
|
|||||||
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -968,6 +979,7 @@
|
|||||||
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -168,8 +168,7 @@ public class TikaJavaExecutor implements JavaExecutor
|
|||||||
throws Exception
|
throws Exception
|
||||||
{
|
{
|
||||||
AbstractTikaMetadataExtractor metadataExtractor = this.metadataExtractor.get(transformName);
|
AbstractTikaMetadataExtractor metadataExtractor = this.metadataExtractor.get(transformName);
|
||||||
Map<String, Serializable> metadata = metadataExtractor.extractMetadata(sourceMimetype, transformOptions, sourceFile);
|
metadataExtractor.extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
|
||||||
metadataExtractor.mapMetadataAndWrite(targetFile, metadata);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -10,6 +10,9 @@
|
|||||||
"pdfboxOptions": [
|
"pdfboxOptions": [
|
||||||
{"value": {"name": "notExtractBookmarksText"}},
|
{"value": {"name": "notExtractBookmarksText"}},
|
||||||
{"value": {"name": "targetEncoding"}}
|
{"value": {"name": "targetEncoding"}}
|
||||||
|
],
|
||||||
|
"metadataOptions": [
|
||||||
|
{"value": {"name": "extractMapping"}}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"transformers": [
|
"transformers": [
|
||||||
@ -520,6 +523,7 @@
|
|||||||
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -528,6 +532,7 @@
|
|||||||
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -536,6 +541,7 @@
|
|||||||
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -556,6 +562,7 @@
|
|||||||
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -601,6 +608,7 @@
|
|||||||
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -610,6 +618,7 @@
|
|||||||
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -645,6 +654,7 @@
|
|||||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -662,6 +672,7 @@
|
|||||||
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -968,6 +979,7 @@
|
|||||||
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
|
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"metadataOptions"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -63,7 +63,7 @@ import java.util.StringTokenizer;
|
|||||||
* <li>The T-Engine's Controller class will call a method in a class that extends {@link AbstractMetadataExtractor}
|
* <li>The T-Engine's Controller class will call a method in a class that extends {@link AbstractMetadataExtractor}
|
||||||
* based on the source and target mediatypes in the normal way.</li>
|
* based on the source and target mediatypes in the normal way.</li>
|
||||||
* <li>The method extracts ALL available metadata is extracted from the document and then calls
|
* <li>The method extracts ALL available metadata is extracted from the document and then calls
|
||||||
* {@link #mapMetadataAndWrite(File, Map)}.</li>
|
* {@link #mapMetadataAndWrite(File, Map, Map)}.</li>
|
||||||
* <li>Selected values from the available metadata are mapped into content repository property names and values,
|
* <li>Selected values from the available metadata are mapped into content repository property names and values,
|
||||||
* depending on what is defined in a {@code "<classname>_metadata_extract.properties"} file.</li>
|
* depending on what is defined in a {@code "<classname>_metadata_extract.properties"} file.</li>
|
||||||
* <li>The selected values are set back to the content repository as a JSON representation of a Map, where the values
|
* <li>The selected values are set back to the content repository as a JSON representation of a Map, where the values
|
||||||
@ -95,6 +95,7 @@ public abstract class AbstractMetadataExtractor
|
|||||||
private static final String EXTRACT = "extract";
|
private static final String EXTRACT = "extract";
|
||||||
private static final String EMBED = "embed";
|
private static final String EMBED = "embed";
|
||||||
private static final String METADATA = "metadata";
|
private static final String METADATA = "metadata";
|
||||||
|
private static final String EXTRACT_MAPPING = "extractMapping";
|
||||||
|
|
||||||
private static final String NAMESPACE_PROPERTY_PREFIX = "namespace.prefix.";
|
private static final String NAMESPACE_PROPERTY_PREFIX = "namespace.prefix.";
|
||||||
private static final char NAMESPACE_PREFIX = ':';
|
private static final char NAMESPACE_PREFIX = ':';
|
||||||
@ -110,17 +111,18 @@ public abstract class AbstractMetadataExtractor
|
|||||||
private static final ObjectMapper jsonObjectMapper = new ObjectMapper();
|
private static final ObjectMapper jsonObjectMapper = new ObjectMapper();
|
||||||
|
|
||||||
protected final Logger logger;
|
protected final Logger logger;
|
||||||
private Map<String, Set<String>> extractMapping;
|
private Map<String, Set<String>> defaultExtractMapping;
|
||||||
|
private ThreadLocal<Map<String, Set<String>>> extractMapping = new ThreadLocal<>();
|
||||||
private Map<String, Set<String>> embedMapping;
|
private Map<String, Set<String>> embedMapping;
|
||||||
|
|
||||||
public AbstractMetadataExtractor(Logger logger)
|
public AbstractMetadataExtractor(Logger logger)
|
||||||
{
|
{
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
extractMapping = Collections.emptyMap();
|
defaultExtractMapping = Collections.emptyMap();
|
||||||
embedMapping = Collections.emptyMap();
|
embedMapping = Collections.emptyMap();
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
extractMapping = buildExtractMapping();
|
defaultExtractMapping = buildExtractMapping();
|
||||||
embedMapping = buildEmbedMapping();
|
embedMapping = buildEmbedMapping();
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
@ -148,7 +150,7 @@ public abstract class AbstractMetadataExtractor
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
TypeReference<HashMap<String, String>> typeRef = new TypeReference<HashMap<String, String>>() {};
|
TypeReference<HashMap<String, String>> typeRef = new TypeReference<>() {};
|
||||||
return jsonObjectMapper.readValue(metadataAsJson, typeRef);
|
return jsonObjectMapper.readValue(metadataAsJson, typeRef);
|
||||||
}
|
}
|
||||||
catch (JsonProcessingException e)
|
catch (JsonProcessingException e)
|
||||||
@ -159,7 +161,7 @@ public abstract class AbstractMetadataExtractor
|
|||||||
|
|
||||||
protected Map<String, Set<String>> getExtractMapping()
|
protected Map<String, Set<String>> getExtractMapping()
|
||||||
{
|
{
|
||||||
return Collections.unmodifiableMap(extractMapping);
|
return Collections.unmodifiableMap(extractMapping.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Set<String>> getEmbedMapping()
|
public Map<String, Set<String>> getEmbedMapping()
|
||||||
@ -432,7 +434,60 @@ public abstract class AbstractMetadataExtractor
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The {@code transformOptions} may contain a replacement set of mappings. These will be used in place of the
|
||||||
|
* default mappings from read from file if supplied.
|
||||||
|
*/
|
||||||
|
public void extractMetadata(String sourceMimetype, Map<String, String> transformOptions, File sourceFile,
|
||||||
|
File targetFile) throws Exception
|
||||||
|
{
|
||||||
|
Map<String, Set<String>> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
|
||||||
|
|
||||||
|
// Use a ThreadLocal to avoid changing method signatures of methods that currently call getExtractMapping.
|
||||||
|
try
|
||||||
|
{
|
||||||
|
extractMapping.set(mapping);
|
||||||
|
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
|
||||||
|
mapMetadataAndWrite(targetFile, metadata, mapping);
|
||||||
|
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
extractMapping.set(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Set<String>> getExtractMappingFromOptions(Map<String, String> transformOptions, Map<String,
|
||||||
|
Set<String>> defaultExtractMapping)
|
||||||
|
{
|
||||||
|
String extractMappingOption = transformOptions.get(EXTRACT_MAPPING);
|
||||||
|
if (extractMappingOption != null)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
TypeReference<HashMap<String, Set<String>>> typeRef = new TypeReference<>() {};
|
||||||
|
return jsonObjectMapper.readValue(extractMappingOption, typeRef);
|
||||||
|
}
|
||||||
|
catch (JsonProcessingException e)
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Failed to read "+ EXTRACT_MAPPING +" from request", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return defaultExtractMapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated use {@link #extractMetadata(String, Map, File, File)} rather than calling this method.
|
||||||
|
* By default call the overloaded method with the default {@code extractMapping}.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata) throws IOException
|
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata) throws IOException
|
||||||
|
{
|
||||||
|
mapMetadataAndWrite(targetFile, metadata, defaultExtractMapping);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata,
|
||||||
|
Map<String, Set<String>> extractMapping) throws IOException
|
||||||
{
|
{
|
||||||
if (logger.isDebugEnabled())
|
if (logger.isDebugEnabled())
|
||||||
{
|
{
|
||||||
@ -440,17 +495,19 @@ public abstract class AbstractMetadataExtractor
|
|||||||
metadata.forEach((k,v) -> logger.debug(" "+k+"="+v));
|
metadata.forEach((k,v) -> logger.debug(" "+k+"="+v));
|
||||||
}
|
}
|
||||||
|
|
||||||
metadata = mapRawToSystem(metadata);
|
metadata = mapRawToSystem(metadata, extractMapping);
|
||||||
writeMetadata(targetFile, metadata);
|
writeMetadata(targetFile, metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Based on AbstractMappingMetadataExtracter#mapRawToSystem.
|
* Based on AbstractMappingMetadataExtracter#mapRawToSystem.
|
||||||
*
|
*
|
||||||
* @param rawMetadata Metadata keyed by document properties
|
* @param rawMetadata Metadata keyed by document properties
|
||||||
* @return Returns the metadata keyed by the system properties
|
* @param extractMapping Mapping between document ans system properties
|
||||||
|
* @return Returns the metadata keyed by the system properties
|
||||||
*/
|
*/
|
||||||
private Map<String, Serializable> mapRawToSystem(Map<String, Serializable> rawMetadata)
|
private Map<String, Serializable> mapRawToSystem(Map<String, Serializable> rawMetadata,
|
||||||
|
Map<String, Set<String>> extractMapping)
|
||||||
{
|
{
|
||||||
boolean debugEnabled = logger.isDebugEnabled();
|
boolean debugEnabled = logger.isDebugEnabled();
|
||||||
if (debugEnabled)
|
if (debugEnabled)
|
||||||
|
6
pom.xml
6
pom.xml
@ -5,7 +5,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-parent</artifactId>
|
<artifactId>spring-boot-starter-parent</artifactId>
|
||||||
<version>2.3.1.RELEASE</version>
|
<version>2.3.5.RELEASE</version>
|
||||||
<relativePath />
|
<relativePath />
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
@ -24,9 +24,9 @@
|
|||||||
<dependency.alfresco-jodconverter-core.version>3.0.1.1</dependency.alfresco-jodconverter-core.version>
|
<dependency.alfresco-jodconverter-core.version>3.0.1.1</dependency.alfresco-jodconverter-core.version>
|
||||||
<env.project_version>${project.version}</env.project_version>
|
<env.project_version>${project.version}</env.project_version>
|
||||||
<dependency.alfresco-transform-model.version>1.0.2.11</dependency.alfresco-transform-model.version>
|
<dependency.alfresco-transform-model.version>1.0.2.11</dependency.alfresco-transform-model.version>
|
||||||
<dependency.activemq.version>5.15.9</dependency.activemq.version>
|
<dependency.activemq.version>5.15.13</dependency.activemq.version>
|
||||||
<dependency.jackson.version>2.10.3</dependency.jackson.version>
|
<dependency.jackson.version>2.10.3</dependency.jackson.version>
|
||||||
<dependency.cxf.version>3.3.5</dependency.cxf.version>
|
<dependency.cxf.version>3.4.1</dependency.cxf.version>
|
||||||
<dependency.tika.version>1.24.1</dependency.tika.version>
|
<dependency.tika.version>1.24.1</dependency.tika.version>
|
||||||
<dependency.poi.version>4.1.2</dependency.poi.version>
|
<dependency.poi.version>4.1.2</dependency.poi.version>
|
||||||
<dependency.ooxml-schemas.version>1.4</dependency.ooxml-schemas.version>
|
<dependency.ooxml-schemas.version>1.4</dependency.ooxml-schemas.version>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user