REPO-4639 Content conversion failed using Tika (#108)

* REPO-4639: Split tika engine_config.json into separate transformers.

* WIP: REPO-4639 Content conversion failed using Tika

The Tika T-Engine "transform" option does not exist when called via the Transform Service or Local transforms, which resulted in no transforms taking place. However this value is really not be needed as the T-Engine should be able to read its own engine_config.xml to work out which sub transform to use. Transforms only worked via Legacy transforms, which used a T-Engine.

This code is based on tried and tested ACS repository code. It has been further simplified.

TODO:
- replace the ConfigFileFinder class just added with something that uses Spring to read the JSON. i.e. simplify it.
- replace the CombinedConfig class just added with something that does not need the InLineTransformer. i.e. simplify it.
- create tests based on the repo tests
- remove the source and target mimetype checks in Tika as a check against engine_config.xml is cleaner.
- repeat the process for the Misc T-Engine as it has similar code checking source and target mimetypes.
- remove the transform option passed by the legacy transforms.

* Removed CombindConfig and ConfigFileFnder classes.

* Extracted AbstractTransformRegistry so that it may be used in the ACS repository too.

TODO AbstractTransformRegistry and AbstractTransformRegistry need to be moved to the alfresco-transform-model pakage

* tidy up only

* REPO-4639: Add priority to duplicate transforms.

* REPO-4639: Refactor TikaTransformationIT to use the new Tika /transform specifications
Changes AbstractTransformerControllerTest as the engine_config is now loaded in TransformRegistryImpl instead of AbstractTransformerController

* Rename to TransformServiceRegistry, so we don't have to change the repo code.

* Added the baseUrl parameter to the register method and fixed the missed rename in the last commit.

* Javadoc change only

* Moved common classes (with repo) AbbstractTransformRegistry and TransformServiceRegistry to alfresco-transform-model

* Replace (simplify) all the isTransformable calls with a check against the JSON.
- Tests now only pass targetEncoding to the 'string' transformer.

* Fix failing tests.

* Revert port change

* REPO-4639 : Add priorities to misc engine_config

* REPO-4639 : Add priorities to pdf-renderer and  imagemagick engine_config

* Remove test that is @Ignored

* Pick up alfresco-transformer-model 1.0.2.7-REPO-4639-1

* REPO-4639 : Add priorities to libreoffice engine_config

* REPO-4639 : Add priorities to tika engine_config

* REPO-4639 : Remove all priorities with value equal to 50 (default) from engine_config

* Switch over to using TransformServiceRegistry in org.alfresco.transform.client.registry
Reintroduce the noExtensionSourceFilenameTest having removed @Ignore.

* New whitesource issue on commons-compress 1.18. Upgrading to 1.19.

* Removed the text/javascript -> text/plain test as this is not supported

* Modifications as a result of changes to method names in alfresco-transform-model

* Pick up alfresco-transform-model 1.0.2.7-ATS545-2

* Remove unused imports
This commit is contained in:
alandavis
2019-09-12 13:34:42 +01:00
committed by CezarLeahu
parent 614bdbe52f
commit d6777b58eb
28 changed files with 2087 additions and 1875 deletions

View File

@@ -41,13 +41,14 @@ import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transform.client.model.TransformRequestValidator;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.clients.AlfrescoSharedFileStoreClient;
import org.alfresco.transformer.logging.LogEntry;
@@ -55,7 +56,6 @@ import org.alfresco.transformer.model.FileRefResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
@@ -70,8 +70,6 @@ import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.client.HttpClientErrorException;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
* <p>Abstract Controller, provides structure and helper methods to sub-class transformer controllers.</p>
*
@@ -105,8 +103,6 @@ public abstract class AbstractTransformerController implements TransformControll
private static final Logger logger = LoggerFactory.getLogger(
AbstractTransformerController.class);
private static String ENGINE_CONFIG = "engine_config.json";
@Autowired
private AlfrescoSharedFileStoreClient alfrescoSharedFileStoreClient;
@@ -114,26 +110,14 @@ public abstract class AbstractTransformerController implements TransformControll
private TransformRequestValidator transformRequestValidator;
@Autowired
private ObjectMapper objectMapper;
private TransformServiceRegistry transformRegistry;
@GetMapping(value = "/transform/config")
public ResponseEntity<TransformConfig> info()
{
logger.info("GET Transform Config.");
try
{
ClassPathResource classPathResource = new ClassPathResource(ENGINE_CONFIG);
InputStream engineConfigFile = classPathResource.getInputStream();
TransformConfig transformConfig = objectMapper.readValue(engineConfigFile,
TransformConfig.class);
return new ResponseEntity<>(transformConfig, OK);
}
catch (IOException e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Could not read Transform Config file.", e);
}
TransformConfig transformConfig = ((TransformRegistryImpl)transformRegistry).getTransformConfig();
return new ResponseEntity<>(transformConfig, OK);
}
/**
@@ -352,4 +336,37 @@ public abstract class AbstractTransformerController implements TransformControll
return sb.toString();
}
protected String getTransformerName(File sourceFile, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions)
{
long sourceSizeInBytes = sourceFile.length();
String transformerName = transformRegistry.findTransformerName(sourceMimetype, sourceSizeInBytes,
targetMimetype, transformOptions, null);
if (transformerName == null)
{
throw new TransformException(BAD_REQUEST.value(), "No transforms were able to handle the request");
}
return transformerName;
}
protected Map<String, String> createTransformOptions(Object... namesAndValues)
{
if (namesAndValues.length % 2 != 0)
{
logger.error("Incorrect number of parameters. Should have an even number as they are names and values.");
}
Map<String, String> transformOptions = new HashMap<>();
for (int i=0; i<namesAndValues.length; i+=2)
{
String name = namesAndValues[i].toString();
Object value = namesAndValues[i + 1];
if (value != null)
{
transformOptions.put(name, value.toString());
}
}
return transformOptions;
}
}

View File

@@ -0,0 +1,95 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transform.client.registry.AbstractTransformRegistry;
import org.alfresco.transform.client.registry.TransformCache;
import org.alfresco.transform.exceptions.TransformException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import javax.annotation.PostConstruct;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
/**
* Used by clients to work out if a transformation is supported based on the engine_config.json.
*/
public class TransformRegistryImpl extends AbstractTransformRegistry
{
private static final Logger log = LoggerFactory.getLogger(TransformRegistryImpl.class);
private static final String ENGINE_CONFIG_JSON = "classpath:engine_config.json";
@Value(ENGINE_CONFIG_JSON)
private Resource engineConfig;
// Holds the structures used by AbstractTransformRegistry to look up what is supported.
// Unlike other sub classes this class does not extend Data or replace it at run time.
private TransformCache data = new TransformCache();
private ObjectMapper jsonObjectMapper = new ObjectMapper();
TransformConfig getTransformConfig()
{
try (Reader reader = new InputStreamReader(engineConfig.getInputStream(), UTF_8))
{
return jsonObjectMapper.readValue(reader, TransformConfig.class);
}
catch (IOException e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Could not read "+ ENGINE_CONFIG_JSON, e);
}
}
@PostConstruct
public void afterPropertiesSet()
{
TransformConfig transformConfig = getTransformConfig();
registerAll(transformConfig, null, ENGINE_CONFIG_JSON);
}
@Override
public TransformCache getData()
{
return data;
}
@Override
protected void logError(String msg)
{
log.error(msg);
}
}

View File

@@ -27,6 +27,8 @@
package org.alfresco.transformer.config;
import org.alfresco.transform.client.model.TransformRequestValidator;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.alfresco.transformer.TransformRegistryImpl;
import org.alfresco.transformer.TransformInterceptor;
import org.alfresco.transformer.clients.AlfrescoSharedFileStoreClient;
import org.springframework.context.annotation.Bean;
@@ -69,4 +71,10 @@ public class WebApplicationConfig implements WebMvcConfigurer
{
return new TransformRequestValidator();
}
@Bean
public TransformServiceRegistry transformRegistry()
{
return new TransformRegistryImpl();
}
}

View File

@@ -106,7 +106,7 @@ public abstract class AbstractHttpRequestTest
getTransformerName() + " - Request parameter '" + name + "' is missing");
}
private void assertTransformError(boolean addFile, String errorMessage)
protected void assertTransformError(boolean addFile, String errorMessage)
{
LinkedMultiValueMap<String, Object> parameters = new LinkedMultiValueMap<>();
if (addFile)
@@ -118,8 +118,14 @@ public abstract class AbstractHttpRequestTest
headers.setContentType(MULTIPART_FORM_DATA);
HttpEntity<LinkedMultiValueMap<String, Object>> entity = new HttpEntity<>(parameters,
headers);
sendTranformationRequest(entity, errorMessage);
}
protected void sendTranformationRequest(HttpEntity<LinkedMultiValueMap<String, Object>> entity, String errorMessage)
{
ResponseEntity<String> response = restTemplate.exchange("/transform", POST, entity,
String.class, "");
String.class, "");
assertEquals(errorMessage, getErrorMessage(response.getBody()));
}

View File

@@ -59,13 +59,16 @@ import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transform.client.model.config.TransformOption;
import org.alfresco.transform.client.model.config.TransformOptionGroup;
import org.alfresco.transform.client.model.config.TransformOptionValue;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.alfresco.transform.client.model.config.Transformer;
import org.alfresco.transformer.clients.AlfrescoSharedFileStoreClient;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.boot.test.mock.mockito.SpyBean;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.MockMvc;
@@ -91,6 +94,9 @@ public abstract class AbstractTransformerControllerTest
@MockBean
protected AlfrescoSharedFileStoreClient alfrescoSharedFileStoreClient;
@SpyBean
protected TransformServiceRegistry transformRegistry;
protected String sourceExtension;
protected String targetExtension;
protected String sourceMimetype;
@@ -100,6 +106,13 @@ public abstract class AbstractTransformerControllerTest
protected String expectedSourceSuffix;
protected Long expectedTimeout = 0L;
protected byte[] expectedSourceFileBytes;
/**
* The expected result. Taken resting target quick file's bytes.
*
* Note: These checks generally don't work on Windows (Mac and Linux are okay). Possibly to do with byte order
* loading.
*/
protected byte[] expectedTargetFileBytes;
// Called by sub class
@@ -228,7 +241,7 @@ public abstract class AbstractTransformerControllerTest
}
@Test
// Is okay, as the target filename is built up from the whole source filename and the targetExtenstion
// Is okay, as the target filename is built up from the whole source filename and the targetExtension
public void noExtensionSourceFilenameTest() throws Exception
{
sourceFile = new MockMultipartFile("file", "../quick", sourceMimetype,
@@ -334,8 +347,8 @@ public abstract class AbstractTransformerControllerTest
.readValue(new ClassPathResource("engine_config.json").getFile(),
TransformConfig.class);
ReflectionTestUtils
.setField(AbstractTransformerController.class, "ENGINE_CONFIG", "engine_config.json");
ReflectionTestUtils.setField(transformRegistry,"engineConfig",
new ClassPathResource("engine_config.json"));
String response = mockMvc
.perform(MockMvcRequestBuilders.get("/transform/config"))
@@ -353,8 +366,8 @@ public abstract class AbstractTransformerControllerTest
{
TransformConfig expectedResult = buildCompleteTransformConfig();
ReflectionTestUtils.setField(AbstractTransformerController.class, "ENGINE_CONFIG",
"engine_config_with_duplicates.json");
ReflectionTestUtils.setField(transformRegistry,"engineConfig",
new ClassPathResource("engine_config_with_duplicates.json"));
String response = mockMvc
.perform(MockMvcRequestBuilders.get("/transform/config"))
@@ -380,8 +393,8 @@ public abstract class AbstractTransformerControllerTest
TransformConfig expectedResult = new TransformConfig();
expectedResult.setTransformers(ImmutableList.of(transformer));
ReflectionTestUtils.setField(AbstractTransformerController.class, "ENGINE_CONFIG",
"engine_config_incomplete.json");
ReflectionTestUtils.setField(transformRegistry,"engineConfig",
new ClassPathResource("engine_config_incomplete.json"));
String response = mockMvc
.perform(MockMvcRequestBuilders.get("/transform/config"))
@@ -403,8 +416,8 @@ public abstract class AbstractTransformerControllerTest
TransformConfig expectedResult = new TransformConfig();
expectedResult.setTransformers(ImmutableList.of(transformer));
ReflectionTestUtils.setField(AbstractTransformerController.class, "ENGINE_CONFIG",
"engine_config_no_transform_options.json");
ReflectionTestUtils.setField(transformRegistry,"engineConfig",
new ClassPathResource("engine_config_no_transform_options.json"));
String response = mockMvc
.perform(MockMvcRequestBuilders.get("/transform/config"))