REPO-4334 Move metadata extraction into T-Engines (#247)

* Metadata extract code added to T-Engines
* Required a refactor of duplicate code to avoid 3x more duplication:
        - try catches used to return return exit codes
        - calls to java libraries or commands to external processes
        - building of transform options in controllers, adaptors
* integration tests based on current extracts performed in the repo
* included extract code for libreoffice, and embed code even though not used out of the box any more. There may well be custom extracts using them that move to T-Engines
* removal of unused imports
* minor autoOrient / allowEnlargement bug fixes that were not included in Paddington on the T-Engine side.
This commit is contained in:
Alan Davis
2020-06-11 20:20:22 +01:00
committed by GitHub
parent ca394440bb
commit 06109dee75
158 changed files with 10288 additions and 1454 deletions

View File

@@ -26,59 +26,32 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_EXTENSION;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_MIMETYPE;
import static org.alfresco.transformer.util.RequestParamMap.TARGET_EXTENSION;
import static org.alfresco.transformer.util.RequestParamMap.TARGET_MIMETYPE;
import static org.alfresco.transformer.util.RequestParamMap.TEST_DELAY;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.Arrays;
import java.util.HashMap;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.Transformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import static org.springframework.http.HttpStatus.OK;
@Controller
public class AIOController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(AIOController.class);
// This property can be sent by acs repository's legacy transformers to force a transform,
// instead of letting this T-Engine determine it based on the request parameters.
// This allows clients to specify transform names as they appear in the engine config files, for example:
// imagemagick, libreoffice, PdfBox, TikaAuto, ....
// See ATS-731.
@Deprecated
private static final String TRANSFORM_NAME_PROPERTY = "transformName";
@Autowired
private AIOTransformRegistry transformRegistry;
@@ -94,19 +67,6 @@ public class AIOController extends AbstractTransformerController
return getTransformerName() + " available";
}
@Override
public void processTransform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> transformOptions, final Long timeout)
{
if (logger.isDebugEnabled())
{
logger.debug("Processing request via queue endpoint. Params: sourceMimetype: '{}', targetMimetype: '{}', "
+ "transformOptions: {}", sourceMimetype, targetMimetype, transformOptions);
}
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype, transformOptions);
transformInternal( transform, sourceFile, targetFile, sourceMimetype, targetMimetype, transformOptions);
}
// TODO ATS-713 Currently uses the Misc probeTest. The implementation will need to be changed such that the test can be selected based on the required transform
@Override
public ProbeTestTransform getProbeTestTransform()
@@ -122,69 +82,11 @@ public class AIOController extends AbstractTransformerController
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformInternal( "html", sourceFile, targetFile, MIMETYPE_HTML,
MIMETYPE_TEXT_PLAIN, parameters);
transform("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam(TARGET_EXTENSION) String targetExtension,
@RequestParam(SOURCE_MIMETYPE) String sourceMimetype,
@RequestParam(TARGET_MIMETYPE) String targetMimetype,
@RequestParam Map<String, String> requestParameters,
@RequestParam (value = TEST_DELAY, required = false) Long testDelay,
// The TRANSFORM_NAME_PROPERTY param allows ACS legacy transformers to specify which transform to use,
// It can be removed once legacy transformers are removed from ACS.
@RequestParam (value = TRANSFORM_NAME_PROPERTY, required = false) String requestTransformName)
{
if (logger.isDebugEnabled())
{
logger.debug("Processing request via HTTP endpoint. Params: sourceMimetype: '{}', targetMimetype: '{}', "
+ "targetExtension: '{}', requestParameters: {}", sourceMimetype, targetMimetype, targetExtension, requestParameters);
}
//Remove all required parameters from request parameters to get the list of options
List<String> optionsToFilter = Arrays.asList(SOURCE_EXTENSION, TARGET_EXTENSION, TARGET_MIMETYPE,
SOURCE_MIMETYPE, TEST_DELAY, TRANSFORM_NAME_PROPERTY);
Map<String, String> transformOptions = new HashMap<>(requestParameters);
transformOptions.keySet().removeAll(optionsToFilter);
transformOptions.values().removeIf(v -> v.isEmpty());
if (logger.isDebugEnabled())
{
logger.debug("Filtered requestParameters into transformOptions: {}" + transformOptions);
}
final String targetFilename = createTargetFileName(
sourceMultipartFile.getOriginalFilename(), targetExtension);
getProbeTestTransform().incrementTransformerCount();
final File sourceFile = createSourceFile(request, sourceMultipartFile);
final File targetFile = createTargetFile(request, targetFilename);
// Check if transformName was provided in the request (this can happen for ACS legacy transformers)
String transform = requestTransformName;
if (transform == null || transform.isEmpty())
{
transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype, transformOptions);
}
else if (logger.isInfoEnabled())
{
logger.info("Using transform name provided in the request: " + requestTransformName);
}
transformInternal(transform, sourceFile, targetFile, sourceMimetype, targetMimetype, transformOptions);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
}
@Override
public ResponseEntity<TransformConfig> info()
{
@@ -193,9 +95,9 @@ public class AIOController extends AbstractTransformerController
return new ResponseEntity<>(transformConfig, OK);
}
protected void transformInternal(final String transformName, final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions)
@Override
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
logger.debug("Processing transform with: transformName; '{}', sourceFile '{}', targetFile '{}', transformOptions" +
" {}", transformName, sourceFile, targetFile, transformOptions);
@@ -212,25 +114,7 @@ public class AIOController extends AbstractTransformerController
logger.debug("Performing transform with name '{}' using transformer with id '{}'.", transformName, transformer.getTransformerId());
}
try
{
Map<String, String> optionsWithTransformName = new HashMap<>(transformOptions);
optionsWithTransformName.put(Transformer.TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceFile, targetFile, sourceMimetype, targetMimetype, optionsWithTransformName);
}
catch (TransformException e)
{
throw e;
}
catch (IllegalArgumentException e)
{
throw new TransformException(BAD_REQUEST.value(), e.getMessage(), e);
}
catch (Exception e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), "Failed transform - transform:"
+ transformName + " sourceMimetype:" + sourceMimetype + " targetMimetype:" + targetMimetype);
}
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -27,11 +27,11 @@
package org.alfresco.transformer;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.alfresco.transformer.transformers.ImageMagickAdapter;
import org.alfresco.transformer.transformers.LibreOfficeAdapter;
import org.alfresco.transformer.transformers.MiscAdapter;
import org.alfresco.transformer.transformers.PdfRendererAdapter;
import org.alfresco.transformer.transformers.TikaAdapter;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -70,11 +70,11 @@ public class AIOCustomConfig
public TransformServiceRegistry aioTransformRegistry() throws Exception
{
AIOTransformRegistry aioTransformRegistry = new AIOTransformRegistry();
aioTransformRegistry.registerTransformer(new MiscAdapter());
aioTransformRegistry.registerTransformer(new TikaAdapter());
aioTransformRegistry.registerTransformer(new ImageMagickAdapter(imageMagickExePath, imageMagickDynPath, imageMagickRootPath, imageMagickCodersPath, imageMagickConfigPath));
aioTransformRegistry.registerTransformer(new LibreOfficeAdapter(libreofficePath));
aioTransformRegistry.registerTransformer(new PdfRendererAdapter(pdfRendererPath));
aioTransformRegistry.registerTransformer(new SelectingTransformer());
aioTransformRegistry.registerTransformer(new TikaJavaExecutor());
aioTransformRegistry.registerTransformer(new ImageMagickCommandExecutor(imageMagickExePath, imageMagickDynPath, imageMagickRootPath, imageMagickCodersPath, imageMagickConfigPath));
aioTransformRegistry.registerTransformer(new LibreOfficeJavaExecutor(libreofficePath));
aioTransformRegistry.registerTransformer(new PdfRendererCommandExecutor(pdfRendererPath));
return aioTransformRegistry;
}
}

View File

@@ -26,10 +26,7 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
import java.util.Arrays;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
@@ -47,7 +44,9 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})

View File

@@ -0,0 +1 @@
#logging.level.org.alfresco.transformer.metadataExtractors=debug

View File

@@ -26,15 +26,7 @@
*/
package org.alfresco.transformer;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.alfresco.transformer.transformers.ImageMagickAdapter;
import org.alfresco.transformer.transformers.Transformer;
import org.alfresco.transformer.executors.Transformer;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -46,6 +38,11 @@ import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import java.io.IOException;
import java.util.Map;
import static org.junit.Assert.assertTrue;
@RunWith(SpringRunner.class)
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
@@ -57,26 +54,17 @@ public class AIOControllerImageMagickTest extends ImageMagickControllerTest
{
// All tests contained in ImageMagickControllerTest
ImageMagickAdapter adapter;
@Autowired
AIOTransformRegistry transformRegistry;
@PostConstruct
private void init() throws Exception
{
adapter = new ImageMagickAdapter(EXE, DYN, ROOT, CODERS, CONFIG);
}
@Before @Override
public void before() throws IOException
{
ReflectionTestUtils.setField(commandExecutor, "transformCommand", mockTransformCommand);
ReflectionTestUtils.setField(commandExecutor, "checkCommand", mockCheckCommand);
ReflectionTestUtils.setField(adapter, "commandExecutor", commandExecutor);
//Need to wire in the mocked adapter into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerTransformMapping();
transformers.replace("imagemagick", adapter);
//Need to wire in the mocked commandExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("imagemagick", commandExecutor);
mockTransformCommand("jpg", "png", "image/jpeg", true);
}

View File

@@ -26,15 +26,8 @@
*/
package org.alfresco.transformer;
import static org.junit.Assert.assertTrue;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.transformers.LibreOfficeAdapter;
import org.alfresco.transformer.transformers.Transformer;
import org.alfresco.transformer.executors.Transformer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
@@ -42,9 +35,12 @@ import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import java.util.Map;
import static org.junit.Assert.assertTrue;
@RunWith(SpringRunner.class)
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
@@ -54,7 +50,6 @@ import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilde
*/
public class AIOControllerLibreOfficeTest extends LibreOfficeControllerTest
{
//Tests contained in LibreOfficeControllerTest
@Test
@@ -64,25 +59,16 @@ public class AIOControllerLibreOfficeTest extends LibreOfficeControllerTest
assertTrue("Wrong controller wired for test", controller instanceof AIOController);
}
LibreOfficeAdapter adapter;
@Autowired
AIOTransformRegistry transformRegistry;
@PostConstruct
private void init() throws Exception
{
adapter = new LibreOfficeAdapter(execPath);
}
@Override
// Used by the super class to mock the javaExecutor, a different implementation is required here
protected void setJavaExecutor(AbstractTransformerController controller, LibreOfficeJavaExecutor javaExecutor)
{
ReflectionTestUtils.setField(adapter, "javaExecutor", javaExecutor);
//Need to wire in the mocked adapter into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerTransformMapping();
transformers.replace("libreoffice", adapter);
//Need to wire in the mocked javaExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("libreoffice", javaExecutor);
// No need to set the transform registry to the controller as it is @Autowired in
}

View File

@@ -26,14 +26,7 @@
*/
package org.alfresco.transformer;
import static org.junit.Assert.assertTrue;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.alfresco.transformer.transformers.PdfRendererAdapter;
import org.alfresco.transformer.transformers.Transformer;
import org.alfresco.transformer.executors.Transformer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
@@ -44,6 +37,10 @@ import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import java.util.Map;
import static org.junit.Assert.assertTrue;
@RunWith(SpringRunner.class)
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
@@ -53,27 +50,17 @@ import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilde
*/
public class AIOControllerPdfRendererTest extends AlfrescoPdfRendererControllerTest
{
// All tests contained IN AlfrescoPdfRendererControllerTest
PdfRendererAdapter adapter;
@Autowired
AIOTransformRegistry transformRegistry;
@PostConstruct
private void init() throws Exception
{
adapter = new PdfRendererAdapter(execPath);
}
@Override
protected void setFields()
{
ReflectionTestUtils.setField(commandExecutor, "transformCommand", mockTransformCommand);
ReflectionTestUtils.setField(commandExecutor, "checkCommand", mockCheckCommand);
ReflectionTestUtils.setField(adapter, "pdfExecutor", commandExecutor);
//Need to wire in the mocked adapter into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerTransformMapping();
transformers.replace("pdfrenderer", adapter);
//Need to wire in the mocked commandExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("pdfrenderer", commandExecutor);
}
@Override

View File

@@ -24,33 +24,21 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transformer;
import java.io.File;
import java.util.Map;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
public class LibreOfficeAdapter implements Transformer
/**
* Metadata integration tests in the Misc T-Engine, but run from the AIO T-Engine.
*
* @author adavis
*/
@RunWith(Parameterized.class)
public class AIOMiscMetadataExtractsIT extends MiscMetadataExtractsIT
{
private static String ID = "libreoffice";
private LibreOfficeJavaExecutor javaExecutor;
public LibreOfficeAdapter(String execPath) throws Exception
public AIOMiscMetadataExtractsIT(TestFileInfo testFileInfo)
{
javaExecutor = new LibreOfficeJavaExecutor(execPath);
}
@Override
public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions)
{
javaExecutor.call(sourceFile, targetFile);
}
@Override
public String getTransformerId()
{
return ID;
super(testFileInfo);
}
}

View File

@@ -24,35 +24,21 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transformer;
import java.io.File;
import java.util.Map;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
public class MiscAdapter implements Transformer
/**
* Metadata integration tests in the Tika T-Engine, but run from the AIO T-Engine.
*
* @author adavis
*/
@RunWith(Parameterized.class)
public class AIOTikaMetadataExtractsIT extends TikaMetadataExtractsIT
{
private static final String ID = "misc";
private SelectingTransformer miscSelectingTransformer;
public MiscAdapter()
public AIOTikaMetadataExtractsIT(TestFileInfo testFileInfo)
{
miscSelectingTransformer = new SelectingTransformer();
}
@Override
public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype, Map<String,
String> transformOptions)
{
String transformerName = transformOptions.get(TRANSFORM_NAME_PARAMETER);
miscSelectingTransformer.transform(transformerName, sourceFile, targetFile,
sourceMimetype, targetMimetype, transformOptions);
}
@Override
public String getTransformerId()
{
return ID;
super(testFileInfo);
}
}

View File

@@ -28,27 +28,20 @@ package org.alfresco.transformer;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transform.client.model.config.TransformOption;
import org.alfresco.transform.client.registry.AbstractTransformRegistry;
import org.alfresco.transform.client.registry.TransformCache;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.transformers.Transformer;
import org.alfresco.transformer.executors.Transformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
/**
* AIOTransformRegistry manages all of the sub transformers registered to it and provides aggregated TransformConfig.
@@ -68,7 +61,7 @@ public class AIOTransformRegistry extends AbstractTransformRegistry
private ObjectMapper jsonObjectMapper = new ObjectMapper();
// Represents the mapping between a transform and a transformer, multiple mappings can point to the same transformer.
private Map<String, Transformer> transformerTransformMapping = new HashMap();
private Map<String, Transformer> transformerEngineMapping = new HashMap();
/**
* The registration will go through all supported sub transformers and map them to the transformer implementation.
@@ -87,11 +80,11 @@ public class AIOTransformRegistry extends AbstractTransformRegistry
for (org.alfresco.transform.client.model.config.Transformer transformerConfig : transformConfig.getTransformers())
{
String transformerName = transformerConfig.getTransformerName();
if (transformerTransformMapping.containsKey(transformerName))
if (transformerEngineMapping.containsKey(transformerName))
{
throw new Exception("Transformer name " + transformerName + " is already registered.");
}
transformerTransformMapping.put(transformerName, transformer);
transformerEngineMapping.put(transformerName, transformer);
log.debug("Registered transformer with name: '{}'.", transformerName);
}
@@ -108,7 +101,7 @@ public class AIOTransformRegistry extends AbstractTransformRegistry
*/
public Transformer getByTransformName(final String transformName)
{
return getTransformerTransformMapping().get(transformName);
return getTransformerEngineMapping().get(transformName);
}
/**
@@ -144,14 +137,14 @@ public class AIOTransformRegistry extends AbstractTransformRegistry
}
}
Map<String, Transformer> getTransformerTransformMapping()
Map<String, Transformer> getTransformerEngineMapping()
{
return transformerTransformMapping;
return transformerEngineMapping;
}
void setTransformerTransformMapping(Map<String, Transformer> transformerTransformMapping)
void setTransformerEngineMapping(Map<String, Transformer> transformerEngineMapping)
{
this.transformerTransformMapping = transformerTransformMapping;
this.transformerEngineMapping = transformerEngineMapping;
}
@Override

View File

@@ -1,120 +0,0 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transformer.util.RequestParamMap.ALLOW_ENLARGEMENT;
import static org.alfresco.transformer.util.RequestParamMap.ALPHA_REMOVE;
import static org.alfresco.transformer.util.RequestParamMap.AUTO_ORIENT;
import static org.alfresco.transformer.util.RequestParamMap.COMMAND_OPTIONS;
import static org.alfresco.transformer.util.RequestParamMap.CROP_GRAVITY;
import static org.alfresco.transformer.util.RequestParamMap.CROP_HEIGHT;
import static org.alfresco.transformer.util.RequestParamMap.CROP_PERCENTAGE;
import static org.alfresco.transformer.util.RequestParamMap.CROP_WIDTH;
import static org.alfresco.transformer.util.RequestParamMap.CROP_X_OFFSET;
import static org.alfresco.transformer.util.RequestParamMap.CROP_Y_OFFSET;
import static org.alfresco.transformer.util.RequestParamMap.END_PAGE;
import static org.alfresco.transformer.util.RequestParamMap.MAINTAIN_ASPECT_RATIO;
import static org.alfresco.transformer.util.RequestParamMap.RESIZE_HEIGHT;
import static org.alfresco.transformer.util.RequestParamMap.RESIZE_PERCENTAGE;
import static org.alfresco.transformer.util.RequestParamMap.RESIZE_WIDTH;
import static org.alfresco.transformer.util.RequestParamMap.START_PAGE;
import static org.alfresco.transformer.util.RequestParamMap.THUMBNAIL;
import static org.alfresco.transformer.util.RequestParamMap.TIMEOUT;
import static org.alfresco.transformer.util.Util.stringToInteger;
import static org.alfresco.transformer.util.Util.stringToLong;
import java.io.File;
import java.util.Map;
import org.alfresco.transformer.ImageMagickOptionsBuilder;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
public class ImageMagickAdapter implements Transformer
{
private static String ID = "imagemagick";
private ImageMagickCommandExecutor commandExecutor;
public ImageMagickAdapter(String exe, String dyn, String root, String coder, String config) throws Exception
{
commandExecutor = new ImageMagickCommandExecutor(exe, dyn, root, coder, config);
}
@Override
public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions)
{
final String options = ImageMagickOptionsBuilder
.builder()
.withStartPage(transformOptions.get(START_PAGE))
.withEndPage(transformOptions.get(END_PAGE))
.withAlphaRemove(transformOptions.get(ALPHA_REMOVE))
.withAutoOrient(transformOptions.get(AUTO_ORIENT))
.withCropGravity(transformOptions.get(CROP_GRAVITY))
.withCropWidth(transformOptions.get(CROP_WIDTH))
.withCropHeight(transformOptions.get(CROP_HEIGHT))
.withCropPercentage(transformOptions.get(CROP_PERCENTAGE))
.withCropXOffset(transformOptions.get(CROP_X_OFFSET))
.withCropYOffset(transformOptions.get(CROP_Y_OFFSET))
.withThumbnail(transformOptions.get(THUMBNAIL))
.withResizeWidth(transformOptions.get(RESIZE_WIDTH))
.withResizeHeight(transformOptions.get(RESIZE_HEIGHT))
.withResizePercentage(transformOptions.get(RESIZE_PERCENTAGE))
.withAllowEnlargement(transformOptions.get(ALLOW_ENLARGEMENT))
.withMaintainAspectRatio(transformOptions.get(MAINTAIN_ASPECT_RATIO))
.withCommandOptions(transformOptions.get(COMMAND_OPTIONS))
.build();
String pageRange = calculatePageRange(
stringToInteger(transformOptions.get(START_PAGE)),
stringToInteger(transformOptions.get(END_PAGE))
);
Long timeout = stringToLong(transformOptions.get(TIMEOUT));
commandExecutor.run(options, sourceFile, pageRange, targetFile, timeout);
}
@Override
public String getTransformerId()
{
return ID;
}
// COPIED From ImageMagickController
private static String calculatePageRange(Integer startPage, Integer endPage)
{
return startPage == null
? endPage == null
? ""
: "[" + endPage + ']'
: endPage == null || startPage.equals(endPage)
? "[" + startPage + ']'
: "[" + startPage + '-' + endPage + ']';
}
}

View File

@@ -1,79 +0,0 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transformer.util.RequestParamMap.ALLOW_PDF_ENLARGEMENT;
import static org.alfresco.transformer.util.RequestParamMap.HEIGHT_REQUEST_PARAM;
import static org.alfresco.transformer.util.RequestParamMap.MAINTAIN_PDF_ASPECT_RATIO;
import static org.alfresco.transformer.util.RequestParamMap.PAGE_REQUEST_PARAM;
import static org.alfresco.transformer.util.RequestParamMap.TIMEOUT;
import static org.alfresco.transformer.util.RequestParamMap.WIDTH_REQUEST_PARAM;
import static org.alfresco.transformer.util.Util.stringToLong;
import java.io.File;
import java.util.Map;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.PdfRendererOptionsBuilder;
public class PdfRendererAdapter implements Transformer
{
private static String ID = "pdfrenderer";
private PdfRendererCommandExecutor pdfExecutor;
public PdfRendererAdapter(String execPath) throws Exception
{
pdfExecutor = new PdfRendererCommandExecutor(execPath);
}
@Override
public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions)
{
final String options = PdfRendererOptionsBuilder
.builder()
.withPage(transformOptions.get(PAGE_REQUEST_PARAM))
.withWidth(transformOptions.get(WIDTH_REQUEST_PARAM))
.withHeight(transformOptions.get(HEIGHT_REQUEST_PARAM))
.withAllowPdfEnlargement(transformOptions.get(ALLOW_PDF_ENLARGEMENT))
.withMaintainPdfAspectRatio(transformOptions.get(MAINTAIN_PDF_ASPECT_RATIO))
.build();
Long timeout = stringToLong(transformOptions.get(TIMEOUT));
pdfExecutor.run(options, sourceFile, targetFile, timeout);
}
@Override
public String getTransformerId()
{
return ID;
}
}

View File

@@ -1,73 +0,0 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import java.io.File;
import java.util.Map;
import static java.lang.Boolean.parseBoolean;
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
public class TikaAdapter implements Transformer
{
private static final String ID = "tika";
private TikaJavaExecutor tikaJavaExecutor;
public TikaAdapter() throws Exception
{
tikaJavaExecutor = new TikaJavaExecutor();
}
@Override
public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions) throws Exception
{
final String transform = transformOptions.get(TRANSFORM_NAME_PARAMETER);
final boolean includeContents = parseBoolean(
transformOptions.getOrDefault("includeContents", "false"));
final boolean notExtractBookmarksText = parseBoolean(
transformOptions.getOrDefault("notExtractBookmarksText", "false"));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
tikaJavaExecutor.call(sourceFile, targetFile, transform,
includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
}
@Override
public String getTransformerId()
{
return ID;
}
}

View File

@@ -1,59 +0,0 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import java.io.File;
import java.util.Map;
/**
* Interface for transformers used with {@link org.alfresco.transformer.AIOTransformRegistry}.
*/
public interface Transformer
{
// Additional parameter used by transformers like {@link TikaAdapter} and {@link MiscAdapter}
String TRANSFORM_NAME_PARAMETER = "alfresco.transform-name-parameter";
/**
* Implementation of the actual transformation.
*
* @param sourceFile
* @param targetFile
* @param transformOptions
* @throws Exception
*/
void transform(File sourceFile, File targetFile, String sourceMimetype,
String targetMimetype, Map<String, String> transformOptions) throws Exception;
/**
* @return A unique transformer id,
*
*/
String getTransformerId();
}

View File

@@ -28,9 +28,9 @@ package org.alfresco.transformer;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transformer.transformers.MiscAdapter;
import org.alfresco.transformer.transformers.TikaAdapter;
import org.alfresco.transformer.transformers.Transformer;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.Before;
@@ -43,23 +43,22 @@ import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.alfresco.transformer.transformers.TextToPdfContentTransformer.PAGE_LIMIT;
import static org.alfresco.transformer.transformers.Transformer.TRANSFORM_NAME_PARAMETER;
import static org.junit.Assert.*;
import static org.alfresco.transformer.util.RequestParamMap.PAGE_LIMIT;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class AIOTransformRegistryTest
{
private static final String SOURCE_MIMETYPE = "text/html";
private static final String TARGET_MIMETYPE = "text/plain";
String SOURCE_ENCODING = "sourceEncoding";
String TARGET_ENCODING = "targetEncoding";
AIOTransformRegistry aioTransformerRegistry = new AIOTransformRegistry();
ObjectMapper objectMapper = new ObjectMapper();
@@ -68,8 +67,8 @@ public class AIOTransformRegistryTest
@Before
public void before() throws Exception
{
aioTransformerRegistry.registerTransformer(new MiscAdapter());
aioTransformerRegistry.registerTransformer(new TikaAdapter());
aioTransformerRegistry.registerTransformer(new SelectingTransformer());
aioTransformerRegistry.registerTransformer(new TikaJavaExecutor());
}
@@ -153,7 +152,7 @@ public class AIOTransformRegistryTest
public void testDuplicateTransformsException() throws Exception
{
// The Misc transformers are already registered
aioTransformerRegistry.registerTransformer(new MiscAdapter());
aioTransformerRegistry.registerTransformer(new SelectingTransformer());
}
// Test copied from Misc (HtmlParserContentTransformerTest) See ATS-712 aioTransformerRegistry - html
@@ -187,7 +186,7 @@ public class AIOTransformRegistryTest
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
parameters.put(TRANSFORM_NAME_PARAMETER, "html");
Transformer transformer = aioTransformerRegistry.getByTransformName("html");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
@@ -201,7 +200,7 @@ public class AIOTransformRegistryTest
parameters = new HashMap<>();
parameters.put(TRANSFORM_NAME_PARAMETER, "html");
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
@@ -214,7 +213,7 @@ public class AIOTransformRegistryTest
parameters = new HashMap<>();
parameters.put(TRANSFORM_NAME_PARAMETER, "html");
parameters.put(SOURCE_ENCODING, "UTF-16");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
@@ -240,7 +239,7 @@ public class AIOTransformRegistryTest
parameters = new HashMap<>();
parameters.put(TRANSFORM_NAME_PARAMETER, "html");
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
@@ -298,7 +297,7 @@ public class AIOTransformRegistryTest
parameters.put(PAGE_LIMIT, pageLimit);
parameters.put(TRANSFORM_NAME_PARAMETER, "textToPdf");
Transformer transformer = aioTransformerRegistry.getByTransformName("textToPdf");
transformer.transform(sourceFile, targetFile, "text/plain", "application/pdf", parameters);
transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile);
// Read back in the PDF and check it
PDDocument doc = PDDocument.load(targetFile);

View File

@@ -69,6 +69,23 @@
],
"transformOptions": [
]
},
{
"transformerName": "HtmlMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "text/html", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "RFC822MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}

View File

@@ -284,6 +284,7 @@
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.keynote", "priority": 120, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/plain"},
@@ -293,6 +294,7 @@
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.numbers", "priority": 120, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/plain"},
@@ -352,6 +354,7 @@
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "priority": 55, "targetMediaType": "text/plain"},
@@ -486,7 +489,12 @@
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/xml"}
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/xml"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "text/html"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "text/plain"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
@@ -503,6 +511,464 @@
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "DWGMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "MailMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "MP3MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "OfficeMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/msword", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.visio2013", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msoffice-embedded; format=ole10_native", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msworks-spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-mspublisher", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msoffice", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/sldworks", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "OpenDocumentMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.graphics-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.formula", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.database", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-web", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.chart", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.formula", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.presentation-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-master", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "PdfBoxMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "PoiMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.drawing", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-xpsdocument", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.drawing.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "model/vnd.dwfx+xps", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.stencil", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.stencil.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "TikaAudioMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "video/x-m4v", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-oggflac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mp4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/vorbis", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/3gpp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-flac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/3gpp2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/quicktime", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/mp4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "TikaAutoMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-htmlhelp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/atom+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/midi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/aaigrid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bag", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-quattro-pro; version=9", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ibooks+zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/wave", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-midi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/rss+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-daala", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/matlab-mat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/aiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/jaxa-pal-sar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-pcraster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/arg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-kro", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-hdf5-image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/big-gif", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/zlib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cosar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ntv2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/java-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.sun.xml.writer", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gmt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ida", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-groovy", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/sar-ceos", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/acad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.adobe.photoshop", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sharedlib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-m4a", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/webp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wap.xhtml+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-aiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-spreadsheetml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-airsar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pcidsk", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java-pack200", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-fujibas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-zmap", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/rtf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xz", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg; codecs=speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-l1b", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsbg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sdat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-coredump", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msaccess", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dods", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/png", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-outlook-pst", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bsb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cpio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dbf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-los-las", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/autocad_dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.workspace.3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.workspace.4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-bpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "gzip/document", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-java", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-brotli", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/elas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-jb2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cappi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/epub+zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ace2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sas-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-hdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-mff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-srp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogguvs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "drawing/dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-doq2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-acad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-kml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-autocad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-mff2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-snodas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/terragen", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-wcs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-c++src", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/timestamped-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/tiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/msexcel", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-asp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-envi-hdr", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/iso19139+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ecrg-toc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/aig", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-wav", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/jdem", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-webp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-arj", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lzma", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java-vm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/envisat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-doq1", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/vnd.wave", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ppi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ilwis", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gunzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-icon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/svg+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ms-owner", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-grib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/ms-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/fits", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-mpeg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bzip2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/tsv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-fictionbook+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-p-aux", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-font-ttf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-xcf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-ms-bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/wmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/eir", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-matlab-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/deflate64", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/wav", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rs2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tsx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lcp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-mbtiles", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-oggpcm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-epsilon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msgn", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/csv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-dimap", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.microsoft.icon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-envi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word2006ml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-font-adobe-metric", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rst", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vrt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ctg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-e00-grid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-ogg-flac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-psd", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/rss", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/sdts-raster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/oxps", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/leveller", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ingr", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/sgi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pnm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/raster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-ogg-pcm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg; codecs=opus", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/fits", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-r", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/gif", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/java-vm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mspowerpoint", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-http", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/applefile", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/rtf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/adrg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-rgb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ngs-geoid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-map", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ceos", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xpm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ers", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-yuv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isis2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-nwt-grd", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isis3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-nwt-grc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/daala", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-blx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-dirac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ndf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.wap.wbmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/theora", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/kate", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pkcs7-mime", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/fit", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ctable2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-executable", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isatab", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/grass-ascii-grid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/plain", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzipped", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gxf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lan", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xyz", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-jbig2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/nitf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mbox", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/chm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-fast", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-deflate", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-grib2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-ozi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pds", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.iwork", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-usgs-dem", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/dif+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/geotiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsag", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-snappy", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-theora", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ntf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=6.x", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pkcs7-signature", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=5.1", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=5.0", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-arj-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/geotopic", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-java-source", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/basic", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pcisdk", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rik", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/opus", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/jp2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gtx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-object", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-wordml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-wmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rpf-toc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-srtmhgt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-generic-bin", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/vnd.iptc.anpa", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msmetafile", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-wms", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-oggrgb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/xcf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/photoshop", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lz4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-7z-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-oggyuv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msdownload", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/jpeg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/icns", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-geo-pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-uvs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-flv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-zip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-unix-dump", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-coasp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dipex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-til", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gs7bg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-unix-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-elf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/dted", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rasterlite", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-mp4a", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gzip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-chm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}

View File

@@ -26,10 +26,7 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
import java.util.Arrays;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,7 +40,9 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,32 +26,17 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.util.Util.stringToInteger;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import java.io.File;
import java.util.Map;
import javax.annotation.PostConstruct;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.PostConstruct;
import java.io.File;
import java.util.Collections;
import java.util.Map;
/**
* Controller for the Docker based ImageMagick transformer.
@@ -125,135 +110,22 @@ public class ImageMagickController extends AbstractTransformerController
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
commandExecutor.run("", sourceFile, "", targetFile, null);
transform(null, null, null, Collections.emptyMap(), sourceFile, targetFile);
}
};
}
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam(value = "timeout", required = false) Long timeout,
@RequestParam(value = "testDelay", required = false) Long testDelay,
@RequestParam(value = "startPage", required = false) Integer startPage,
@RequestParam(value = "endPage", required = false) Integer endPage,
@RequestParam(value = "alphaRemove", required = false) Boolean alphaRemove,
@RequestParam(value = "autoOrient", required = false) Boolean autoOrient,
@RequestParam(value = "cropGravity", required = false) String cropGravity,
@RequestParam(value = "cropWidth", required = false) Integer cropWidth,
@RequestParam(value = "cropHeight", required = false) Integer cropHeight,
@RequestParam(value = "cropPercentage", required = false) Boolean cropPercentage,
@RequestParam(value = "cropXOffset", required = false) Integer cropXOffset,
@RequestParam(value = "cropYOffset", required = false) Integer cropYOffset,
@RequestParam(value = "thumbnail", required = false) Boolean thumbnail,
@RequestParam(value = "resizeWidth", required = false) Integer resizeWidth,
@RequestParam(value = "resizeHeight", required = false) Integer resizeHeight,
@RequestParam(value = "resizePercentage", required = false) Boolean resizePercentage,
@RequestParam(value = "allowEnlargement", required = false) Boolean allowEnlargement,
@RequestParam(value = "maintainAspectRatio", required = false) Boolean maintainAspectRatio,
// The commandOptions parameter is supported in ACS 6.0.1 because there may be
// custom renditions that use it. However the Transform service should
// not support it as it provides the option to specify arbitrary command
// options or even the option to run something else on the command line.
// All Transform service options should be checked as is done for the other
// request parameters. Setting this option in the rendition's
// ImageTransformationOptions object is being deprecated for the point where
// The Transform service is being used for all transforms. In the case of
// ACS 6.0, this is relatively safe as it requires an AMP to be installed
// which supplies the commandOptions.
@RequestParam(value = "commandOptions", required = false) String commandOptions)
@Override
protected String getTransformerName(final File sourceFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> transformOptions)
{
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(),
targetExtension);
getProbeTestTransform().incrementTransformerCount();
File sourceFile = createSourceFile(request, sourceMultipartFile);
File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
final String options = ImageMagickOptionsBuilder
.builder()
.withStartPage(startPage)
.withEndPage(endPage)
.withAlphaRemove(alphaRemove)
.withAutoOrient(autoOrient)
.withCropGravity(cropGravity)
.withCropWidth(cropWidth)
.withCropHeight(cropHeight)
.withCropPercentage(cropPercentage)
.withCropXOffset(cropXOffset)
.withCropYOffset(cropYOffset)
.withThumbnail(thumbnail)
.withResizeWidth(resizeWidth)
.withResizeHeight(resizeHeight)
.withResizePercentage(resizePercentage)
.withAllowEnlargement(allowEnlargement)
.withMaintainAspectRatio(maintainAspectRatio)
.withCommandOptions(commandOptions)
.build();
String pageRange = calculatePageRange(startPage, endPage);
commandExecutor.run(options, sourceFile, pageRange, targetFile,
timeout);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
return null; // does not matter what value is returned, as it is not used because there is only one.
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
final String options = ImageMagickOptionsBuilder
.builder()
.withStartPage(transformOptions.get("startPage"))
.withEndPage(transformOptions.get("endPage"))
.withAlphaRemove(transformOptions.get("alphaRemove"))
.withAutoOrient(transformOptions.get("autoOrient"))
.withCropGravity(transformOptions.get("cropGravity"))
.withCropWidth(transformOptions.get("cropWidth"))
.withCropHeight(transformOptions.get("cropHeight"))
.withCropPercentage(transformOptions.get("cropPercentage"))
.withCropXOffset(transformOptions.get("cropXOffset"))
.withCropYOffset(transformOptions.get("cropYOffset"))
.withThumbnail(transformOptions.get("thumbnail"))
.withResizeWidth(transformOptions.get("resizeWidth"))
.withResizeHeight(transformOptions.get("resizeHeight"))
.withResizePercentage(transformOptions.get("resizePercentage"))
.withAllowEnlargement(transformOptions.get("allowEnlargement"))
.withMaintainAspectRatio(transformOptions.get("maintainAspectRatio"))
.build();
final String pageRange = calculatePageRange(
stringToInteger(transformOptions.get("startPage")),
stringToInteger(transformOptions.get("endPage")));
commandExecutor.run(options, sourceFile, pageRange, targetFile,
timeout);
}
private static String calculatePageRange(Integer startPage, Integer endPage)
{
return startPage == null
? endPage == null
? ""
: "[" + endPage + ']'
: endPage == null || startPage.equals(endPage)
? "[" + startPage + ']'
: "[" + startPage + '-' + endPage + ']';
commandExecutor.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,36 +26,6 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.executors.RuntimeExec.ExecutionResult;
import static org.alfresco.transformer.util.MimetypeMap.PREFIX_IMAGE;
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.Mockito.when;
import static org.springframework.http.HttpHeaders.ACCEPT;
import static org.springframework.http.HttpHeaders.CONTENT_DISPOSITION;
import static org.springframework.http.HttpHeaders.CONTENT_TYPE;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.CREATED;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
import static org.springframework.http.MediaType.IMAGE_PNG_VALUE;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
@@ -80,6 +50,35 @@ import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import javax.annotation.PostConstruct;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import static org.alfresco.transformer.executors.RuntimeExec.ExecutionResult;
import static org.alfresco.transformer.util.MimetypeMap.PREFIX_IMAGE;
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.Mockito.when;
import static org.springframework.http.HttpHeaders.ACCEPT;
import static org.springframework.http.HttpHeaders.CONTENT_DISPOSITION;
import static org.springframework.http.HttpHeaders.CONTENT_TYPE;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.CREATED;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
import static org.springframework.http.MediaType.IMAGE_PNG_VALUE;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import static org.springframework.util.StringUtils.getFilenameExtension;
/**
* Test the ImageMagickController without a server.
@@ -228,7 +227,7 @@ public class ImageMagickControllerTest extends AbstractTransformerControllerTest
{
for (String value : new String[]{"North", "NorthEast", "East", "SouthEast", "South", "SouthWest", "West", "NorthWest", "Center"})
{
expectedOptions = "-gravity " + value + " +repage";
expectedOptions = "-auto-orient " + "-gravity " + value + " +repage";
mockMvc
.perform(MockMvcRequestBuilders
.multipart("/transform")
@@ -338,7 +337,7 @@ public class ImageMagickControllerTest extends AbstractTransformerControllerTest
public void deprecatedCommandOptionsTest() throws Exception
{
// Example of why the commandOptions parameter is a bad idea.
expectedOptions = "( horrible command / ); -resize 321x654>";
expectedOptions = "( horrible command / ); -auto-orient -resize 321x654";
mockMvc
.perform(MockMvcRequestBuilders
.multipart("/transform")

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,16 +26,15 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.util.Util.stringToBoolean;
import static org.alfresco.transformer.util.Util.stringToInteger;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import com.google.common.collect.ImmutableList;
import org.alfresco.transform.exceptions.TransformException;
import java.util.List;
import java.util.StringJoiner;
import org.alfresco.transform.exceptions.TransformException;
import com.google.common.collect.ImmutableList;
import static org.alfresco.transformer.util.Util.stringToBoolean;
import static org.alfresco.transformer.util.Util.stringToInteger;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
/**
* ImageMagick options builder.
@@ -107,7 +106,7 @@ public final class ImageMagickOptionsBuilder
public ImageMagickOptionsBuilder withAutoOrient(final Boolean autoOrient)
{
this.autoOrient = autoOrient;
this.autoOrient = autoOrient == null ? true : autoOrient;
return this;
}
@@ -223,7 +222,7 @@ public final class ImageMagickOptionsBuilder
public ImageMagickOptionsBuilder withAllowEnlargement(final Boolean allowEnlargement)
{
this.allowEnlargement = allowEnlargement;
this.allowEnlargement = allowEnlargement == null ? true : allowEnlargement;
return this;
}

View File

@@ -26,15 +26,42 @@
*/
package org.alfresco.transformer.executors;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.ImageMagickOptionsBuilder;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transformer.util.RequestParamMap.ALLOW_ENLARGEMENT;
import static org.alfresco.transformer.util.RequestParamMap.ALPHA_REMOVE;
import static org.alfresco.transformer.util.RequestParamMap.AUTO_ORIENT;
import static org.alfresco.transformer.util.RequestParamMap.COMMAND_OPTIONS;
import static org.alfresco.transformer.util.RequestParamMap.CROP_GRAVITY;
import static org.alfresco.transformer.util.RequestParamMap.CROP_HEIGHT;
import static org.alfresco.transformer.util.RequestParamMap.CROP_PERCENTAGE;
import static org.alfresco.transformer.util.RequestParamMap.CROP_WIDTH;
import static org.alfresco.transformer.util.RequestParamMap.CROP_X_OFFSET;
import static org.alfresco.transformer.util.RequestParamMap.CROP_Y_OFFSET;
import static org.alfresco.transformer.util.RequestParamMap.END_PAGE;
import static org.alfresco.transformer.util.RequestParamMap.MAINTAIN_ASPECT_RATIO;
import static org.alfresco.transformer.util.RequestParamMap.RESIZE_HEIGHT;
import static org.alfresco.transformer.util.RequestParamMap.RESIZE_PERCENTAGE;
import static org.alfresco.transformer.util.RequestParamMap.RESIZE_WIDTH;
import static org.alfresco.transformer.util.RequestParamMap.START_PAGE;
import static org.alfresco.transformer.util.RequestParamMap.THUMBNAIL;
import static org.alfresco.transformer.util.RequestParamMap.TIMEOUT;
import static org.alfresco.transformer.util.Util.stringToInteger;
import static org.alfresco.transformer.util.Util.stringToLong;
/**
* CommandExecutor implementation for running ImageMagick transformations. It runs the
* transformation logic as a separate Shell process.
*/
public class ImageMagickCommandExecutor extends AbstractCommandExecutor
{
private static final String ID = "imagemagick";
private final String ROOT;
private final String DYN;
private final String EXE;
@@ -65,6 +92,12 @@ public class ImageMagickCommandExecutor extends AbstractCommandExecutor
super.checkCommand = createCheckCommand();
}
@Override
public String getTransformerId()
{
return ID;
}
public static final String LICENCE = "This transformer uses ImageMagick from ImageMagick Studio LLC. See the license at http://www.imagemagick.org/script/license.php or in /ImageMagick-license.txt";
@Override
@@ -111,4 +144,51 @@ public class ImageMagickCommandExecutor extends AbstractCommandExecutor
runtimeExec.setCommandsAndArguments(commandsAndArguments);
return runtimeExec;
}
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws TransformException
{
final String options = ImageMagickOptionsBuilder
.builder()
.withStartPage(transformOptions.get(START_PAGE))
.withEndPage(transformOptions.get(END_PAGE))
.withAlphaRemove(transformOptions.get(ALPHA_REMOVE))
.withAutoOrient(transformOptions.get(AUTO_ORIENT))
.withCropGravity(transformOptions.get(CROP_GRAVITY))
.withCropWidth(transformOptions.get(CROP_WIDTH))
.withCropHeight(transformOptions.get(CROP_HEIGHT))
.withCropPercentage(transformOptions.get(CROP_PERCENTAGE))
.withCropXOffset(transformOptions.get(CROP_X_OFFSET))
.withCropYOffset(transformOptions.get(CROP_Y_OFFSET))
.withThumbnail(transformOptions.get(THUMBNAIL))
.withResizeWidth(transformOptions.get(RESIZE_WIDTH))
.withResizeHeight(transformOptions.get(RESIZE_HEIGHT))
.withResizePercentage(transformOptions.get(RESIZE_PERCENTAGE))
.withAllowEnlargement(transformOptions.get(ALLOW_ENLARGEMENT))
.withMaintainAspectRatio(transformOptions.get(MAINTAIN_ASPECT_RATIO))
.withCommandOptions(transformOptions.get(COMMAND_OPTIONS))
.build();
String pageRange = calculatePageRange(
stringToInteger(transformOptions.get(START_PAGE)),
stringToInteger(transformOptions.get(END_PAGE))
);
Long timeout = stringToLong(transformOptions.get(TIMEOUT));
run(options, sourceFile, pageRange, targetFile, timeout);
}
private static String calculatePageRange(Integer startPage, Integer endPage)
{
return startPage == null
? endPage == null
? ""
: "[" + endPage + ']'
: endPage == null || startPage.equals(endPage)
? "[" + startPage + ']'
: "[" + startPage + '-' + endPage + ']';
}
}

View File

@@ -26,10 +26,7 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
import java.util.Arrays;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,7 +40,9 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
@@ -60,6 +59,9 @@ public class Application
return registry -> registry.config().commonTags("containerName", containerName);
}
// To run the LibreOffice T-Engine from the command line on a Mac, you generally need to
// install LibreOffice and add: -Dtransform.core.libreoffice.path=/Applications/LibreOffice.app/Contents/
// to the start up command.
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,37 +26,21 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import java.io.File;
import java.util.Map;
import javax.annotation.PostConstruct;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.PostConstruct;
import java.io.File;
import java.util.Collections;
import java.util.Map;
/**
* Controller for the Docker based LibreOffice transformer.
*
*
* Status Codes:
*
* 200 Success
@@ -112,44 +96,23 @@ public class LibreOfficeController extends AbstractTransformerController
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
transform(null, null, null, Collections.emptyMap(), sourceFile, targetFile);
javaExecutor.call(sourceFile, targetFile);
}
};
}
//todo: the "timeout" request parameter is ignored; the timeout is preset at JodConverter creation
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam(value = "timeout", required = false) Long timeout,
@RequestParam(value = "testDelay", required = false) Long testDelay)
@Override
protected String getTransformerName(final File sourceFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> transformOptions)
{
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(),
targetExtension);
getProbeTestTransform().incrementTransformerCount();
File sourceFile = createSourceFile(request, sourceMultipartFile);
File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
javaExecutor.call(sourceFile, targetFile);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
return null; // does not matter what value is returned, as it is not used because there is only one.
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
javaExecutor.call(sourceFile, targetFile);
javaExecutor.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -0,0 +1 @@
#logging.level.org.alfresco.transformer.LibreOfficeController=debug

View File

@@ -6,6 +6,8 @@
<form method="POST" enctype="multipart/form-data" action="/transform">
<table>
<tr><td><div style="text-align:right">file *</div></td><td><input type="file" name="file" /></td></tr>
<tr><td><div style="text-align:right">sourceMimetype *</div></td><td><input type="text" name="sourceMimetype" value="" /></td></tr>
<tr><td><div style="text-align:right">targetMimetype</div></td><td><input type="text" name="targetMimetype" value="" /></td></tr>
<tr><td><div style="text-align:right">targetExtension *</div></td><td><input type="text" name="targetExtension" value="" /></td></tr>
<tr><td><div style="text-align:right">timeout</div></td><td><input type="text" name="timeout" value="" /></td></tr>
<tr><td><div style="text-align:right">testDelay</div></td><td><input type="text" name="testDelay" value="" /></td></tr>

View File

@@ -0,0 +1,211 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
import com.sun.star.beans.PropertyValue;
import com.sun.star.beans.UnknownPropertyException;
import com.sun.star.beans.XPropertySet;
import com.sun.star.document.XDocumentInfoSupplier;
import com.sun.star.frame.XComponentLoader;
import com.sun.star.io.IOException;
import com.sun.star.lang.IllegalArgumentException;
import com.sun.star.lang.WrappedTargetException;
import com.sun.star.lang.XComponent;
import com.sun.star.task.ErrorCodeIOException;
import com.sun.star.util.CloseVetoException;
import com.sun.star.util.XCloseable;
import com.sun.star.util.XRefreshable;
import org.artofsolving.jodconverter.office.OfficeContext;
import org.artofsolving.jodconverter.office.OfficeException;
import org.artofsolving.jodconverter.office.OfficeTask;
import java.io.File;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import static org.artofsolving.jodconverter.office.OfficeUtils.SERVICE_DESKTOP;
import static org.artofsolving.jodconverter.office.OfficeUtils.cast;
import static org.artofsolving.jodconverter.office.OfficeUtils.toUrl;
/**
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
*
* Extracts values from Open Office documents into the following:
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* </pre>
*
* @author Neil McErlean
* @author adavis
*/
@Deprecated
public class LibreOfficeExtractMetadataTask implements OfficeTask
{
/*
* These keys are used by Alfresco to map properties into a content model and do need to
* have lower-case initial letters.
*/
private static final String KEY_AUTHOR = "author";
private static final String KEY_TITLE = "title";
private static final String KEY_DESCRIPTION = "description";
private File inputFile;
private Map<String, Serializable> metadata = new HashMap<String, Serializable>();
public LibreOfficeExtractMetadataTask(File inputFile)
{
this.inputFile = inputFile;
}
public Map<String, Serializable> getMetadata()
{
return metadata;
}
public void execute(OfficeContext context)
{
XComponent document = null;
try
{
if (!inputFile.exists())
{
throw new OfficeException("input document not found");
}
XComponentLoader loader = cast(XComponentLoader.class, context
.getService(SERVICE_DESKTOP));
// Need to set the Hidden property to ensure that OOo GUI does not appear.
PropertyValue hiddenOOo = new PropertyValue();
hiddenOOo.Name = "Hidden";
hiddenOOo.Value = Boolean.TRUE;
PropertyValue readOnly = new PropertyValue();
readOnly.Name = "ReadOnly";
readOnly.Value = Boolean.TRUE;
try
{
// TODO The following call fails. Not debugged why as it appears this extractor is not used any more.
document = loader.loadComponentFromURL(toUrl(inputFile), "_blank", 0,
new PropertyValue[]{hiddenOOo, readOnly});
}
catch (IllegalArgumentException illegalArgumentException)
{
throw new OfficeException("could not load document: "
+ inputFile.getName(), illegalArgumentException);
}
catch (ErrorCodeIOException errorCodeIOException)
{
throw new OfficeException("could not load document: "
+ inputFile.getName() + "; errorCode: "
+ errorCodeIOException.ErrCode, errorCodeIOException);
}
catch (IOException ioException)
{
throw new OfficeException("could not load document: "
+ inputFile.getName(), ioException);
}
if (document == null)
{
throw new OfficeException("could not load document: "
+ inputFile.getName());
}
XRefreshable refreshable = cast(XRefreshable.class, document);
if (refreshable != null)
{
refreshable.refresh();
}
XDocumentInfoSupplier docInfoSupplier = cast(XDocumentInfoSupplier.class, document);
XPropertySet propSet = cast(XPropertySet.class, docInfoSupplier.getDocumentInfo());
// The strings below are property names as used by OOo. They need upper-case
// initial letters.
Object author = getPropertyValueIfAvailable(propSet, "Author");
Object description = getPropertyValueIfAvailable(propSet, "Subject");
Object title = getPropertyValueIfAvailable(propSet, "Title");
metadata = new HashMap<String, Serializable>(3);
metadata.put(KEY_AUTHOR, author == null ? null : author.toString());
metadata.put(KEY_DESCRIPTION, description == null ? null : description.toString());
metadata.put(KEY_TITLE, title == null ? null : title.toString());
}
catch (OfficeException officeException)
{
throw officeException;
}
catch (Exception exception)
{
throw new OfficeException("conversion failed", exception);
}
finally
{
if (document != null)
{
XCloseable closeable = cast(XCloseable.class, document);
if (closeable != null)
{
try
{
closeable.close(true);
}
catch (CloseVetoException closeVetoException)
{
// whoever raised the veto should close the document
}
}
else
{
document.dispose();
}
}
}
}
/**
* OOo throws exceptions if we ask for properties that aren't there, so we'll tread carefully.
*
* @param propSet
* @param propertyName property name as used by the OOo API.
* @throws UnknownPropertyException
* @throws WrappedTargetException
*/
private Object getPropertyValueIfAvailable(XPropertySet propSet, String propertyName)
throws UnknownPropertyException, WrappedTargetException
{
if (propSet.getPropertySetInfo().hasPropertyByName(propertyName))
{
return propSet.getPropertyValue(propertyName);
}
else
{
return null;
}
}
}

View File

@@ -26,12 +26,8 @@
*/
package org.alfresco.transformer.executors;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import java.io.File;
import java.io.IOException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.sun.star.task.ErrorCodeIOException;
import org.alfresco.transform.exceptions.TransformException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
@@ -42,7 +38,13 @@ import org.artofsolving.jodconverter.office.OfficeManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.sun.star.task.ErrorCodeIOException;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.Map;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
/**
* JavaExecutor implementation for running LibreOffice transformations. It loads the
@@ -50,6 +52,8 @@ import com.sun.star.task.ErrorCodeIOException;
*/
public class LibreOfficeJavaExecutor implements JavaExecutor
{
private static String ID = "libreoffice";
private static final Logger logger = LoggerFactory.getLogger(LibreOfficeJavaExecutor.class);
private static final int JODCONVERTER_TRANSFORMATION_ERROR_CODE = 3088;
@@ -58,7 +62,9 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
public static final String LICENCE = "This transformer uses LibreOffice from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt";
private JodConverter jodconverter;
private final JodConverter jodconverter;
private final ObjectMapper jsonObjectMapper = new ObjectMapper();
public LibreOfficeJavaExecutor(String path)
{
@@ -89,6 +95,19 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
return jodconverter;
}
@Override
public String getTransformerId()
{
return ID;
}
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile)
{
call(sourceFile, targetFile);
}
@Override
public void call(File sourceFile, File targetFile, String... args)
{
@@ -147,7 +166,7 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
PDPage pdfPage = new PDPage();
try (PDDocument pdfDoc = new PDDocument();
PDPageContentStream contentStream = new PDPageContentStream(pdfDoc, pdfPage))
PDPageContentStream ignore = new PDPageContentStream(pdfDoc, pdfPage))
{
// Even though, we want an empty PDF, some libs (e.g. PDFRenderer) object to PDFs
// that have literally nothing in them. So we'll put a content stream in it.
@@ -162,4 +181,48 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
"Error creating empty PDF file", iox);
}
}
/**
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1.
* This code exists in case there are custom implementations, that need to be converted to T-Engines.
* It is simply a copy and paste from the content repository and has received limited testing.
*/
@Override
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile)
{
OfficeManager officeManager = jodconverter.getOfficeManager();
LibreOfficeExtractMetadataTask extractMetadataTask = new LibreOfficeExtractMetadataTask(sourceFile);
try
{
officeManager.execute(extractMetadataTask);
}
catch (OfficeException e)
{
throw new TransformException(BAD_REQUEST.value(),
"LibreOffice metadata extract failed: \n" +
" from file: " + sourceFile, e);
}
Map<String, Serializable> metadata = extractMetadataTask.getMetadata();
if (logger.isDebugEnabled())
{
metadata.forEach((k,v) -> logger.debug(k+"="+v));
}
writeMetadataIntoTargetFile(targetFile, metadata);
}
private void writeMetadataIntoTargetFile(File targetFile, Map<String, Serializable> results)
{
try
{
jsonObjectMapper.writeValue(targetFile, results);
}
catch (IOException e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), "Failed to write metadata to targetFile", e);
}
}
}

View File

@@ -26,6 +26,7 @@
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -39,8 +40,6 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,34 +26,20 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.transformers.HtmlParserContentTransformer.SOURCE_ENCODING;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.transformers.HtmlParserContentTransformer.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@Controller
public class MiscController extends AbstractTransformerController
@@ -88,71 +74,16 @@ public class MiscController extends AbstractTransformerController
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform("html", sourceFile, targetFile, MIMETYPE_HTML,
MIMETYPE_TEXT_PLAIN, parameters);
transform("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
if (logger.isDebugEnabled())
{
logger.debug(
"Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
}
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
transformer.transform(transform, sourceFile, targetFile, sourceMimetype, targetMimetype,
transformOptions);
}
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam("targetMimetype") String targetMimetype,
@RequestParam(value = "targetEncoding", required = false) String targetEncoding,
@RequestParam("sourceMimetype") String sourceMimetype,
@RequestParam(value = "sourceEncoding", required = false) String sourceEncoding,
@RequestParam(value = "pageLimit", required = false) String pageLimit,
@RequestParam(value = "testDelay", required = false) Long testDelay)
{
if (logger.isDebugEnabled())
{
logger.debug(
"Processing request with: sourceMimetype '{}', sourceEncoding '{}', " +
"targetMimetype '{}', targetExtension '{}', targetEncoding '{}', pageLimit '{}'",
sourceMimetype, sourceEncoding, targetMimetype, targetExtension, targetEncoding,
pageLimit);
}
final String targetFilename = createTargetFileName(
sourceMultipartFile.getOriginalFilename(), targetExtension);
getProbeTestTransform().incrementTransformerCount();
final File sourceFile = createSourceFile(request, sourceMultipartFile);
final File targetFile = createTargetFile(request, targetFilename);
final Map<String, String> transformOptions = createTransformOptions(
"sourceEncoding", sourceEncoding,
"targetEncoding", targetEncoding,
"pageLimit", pageLimit);
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
transformer.transform(transform, sourceFile, targetFile, sourceMimetype, targetMimetype,
transformOptions);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -0,0 +1,72 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.List;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_RFC822;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_XHTML;
import static org.alfresco.transformer.TestFileInfo.testFile;
/**
* Metadata integration tests in the Misc T-Engine.
*
* @author adavis
*/
@RunWith(Parameterized.class)
public class MiscMetadataExtractsIT extends AbstractMetadataExtractsIT
{
public MiscMetadataExtractsIT(TestFileInfo testFileInfo)
{
super(testFileInfo);
}
@Parameterized.Parameters
public static List<TestFileInfo> engineTransformations()
{
return Stream.of(
// HtmlMetadataExtractor
testFile(MIMETYPE_HTML, "html", "quick.html"),
testFile(MIMETYPE_XHTML, "xhtml", "quick.xhtml.alf"), // avoid the license header check on xhtml
// RFC822MetadataExtractor
testFile(MIMETYPE_RFC822, "eml", "quick.eml"),
// Special test cases from the repo tests
// ======================================
testFile(MIMETYPE_RFC822, "eml", "quick.spanish.eml"),
testFile(MIMETYPE_HTML, "html", "quick.japanese.html")
).collect(toList());
}
}

View File

@@ -69,6 +69,23 @@
],
"transformOptions": [
]
},
{
"transformerName": "HtmlMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "text/html", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "RFC822MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}

View File

@@ -1,5 +1,5 @@
From: Nevin Nollop <nevin.nollop@alfresco.com>
To: Nevin Nollop <nevin.nollop@alfresco.com>
To: Nevin Nollop <nevin.nollop@gmail.com>
Cc: Nevin Nollop <nevinn@alfresco.com>
Message-ID: <20040604122322.GV1905@phoenix.home>
Date: Fri, 4 Jun 2004 14:23:22 +0200

View File

@@ -0,0 +1,15 @@
{
"{http://www.alfresco.org/model/content/1.0}addressee" : "Nevin Nollop <nevin.nollop@gmail.com>",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/content/1.0}addressees" : "Nevin Nollop <nevinn@alfresco.com>",
"{http://www.alfresco.org/model/imap/1.0}dateSent" : 1086351802000,
"{http://www.alfresco.org/model/imap/1.0}messageTo" : "Nevin Nollop <nevin.nollop@gmail.com>",
"{http://www.alfresco.org/model/imap/1.0}messageId" : "<20040604122322.GV1905@phoenix.home>",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageSubject" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageCc" : "Nevin Nollop <nevinn@alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}sentdate" : 1086351802000,
"{http://www.alfresco.org/model/content/1.0}subjectline" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageFrom" : "Nevin Nollop <nevin.nollop@alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}originator" : "Nevin Nollop <nevin.nollop@alfresco.com>"
}

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}author": "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}description": "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}title": "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,12 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
<title><EFBFBD>m<EFBFBD>F<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʂ<EFBFBD><EFBFBD>Y<EFBFBD>t<EFBFBD><EFBFBD><EFBFBD>܂<EFBFBD><EFBFBD>̂ŁA<EFBFBD>m<EFBFBD>F<EFBFBD><EFBFBD><EFBFBD>Ă<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD></title>
</head>
<body>
</body>
</html>

View File

@@ -0,0 +1,3 @@
{
"{http://www.alfresco.org/model/content/1.0}title" : "確認した結果を添付しますので、確認してください"
}

View File

@@ -0,0 +1,16 @@
{
"{http://www.alfresco.org/model/imap/1.0}dateReceived" : "Thu, 16 Aug 2012 08:13:29 -0700 (PDT)",
"{http://www.alfresco.org/model/content/1.0}addressee" : "jane.doe@alfresco.com",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/content/1.0}addressees" : null,
"{http://www.alfresco.org/model/imap/1.0}dateSent" : 1345130009000,
"{http://www.alfresco.org/model/imap/1.0}messageTo" : "jane.doe@alfresco.com",
"{http://www.alfresco.org/model/imap/1.0}messageId" : "<CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageSubject" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageCc" : null,
"{http://www.alfresco.org/model/content/1.0}sentdate" : 1345130009000,
"{http://www.alfresco.org/model/content/1.0}subjectline" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageFrom" : "john.doe@alfresco.com",
"{http://www.alfresco.org/model/content/1.0}originator" : "john.doe@alfresco.com"
}

View File

@@ -0,0 +1,17 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=windows-1252"/>
<title>The quick brown fox jumps over the lazy dog</title>
<meta name="author" content="Nevin Nollop"/>
<meta name="keywords" content="Pangram, fox, dog"/>
<meta name="description" content="Gym class featuring a brown fox and lazy dog"/>
</head>
<body lang="EN-US">
The quick brown fox jumps over the lazy dog
</body>
</html>

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}author": "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}description": "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}title": "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,203 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005-2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.transformers.SelectableTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.swing.text.ChangedCharSetException;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
/**
* Metadata extractor for HTML and XHTML.
*
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* </pre>
*
* Based on HtmlMetadataExtracter from the content repository.
*
* @author Jesper Steen Møller
* @author Derek Hulley
* @author adavis
*/
public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class);
private static final String KEY_AUTHOR = "author";
private static final String KEY_TITLE = "title";
private static final String KEY_DESCRIPTION= "description";
public HtmlMetadataExtractor()
{
super(logger);
}
@Override
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
mapMetadataAndWrite(targetFile, metadata);
}
@Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
File sourceFile) throws Exception
{
final Map<String, Serializable> rawProperties = new HashMap<>();
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
{
StringBuffer title = null;
boolean inHead = false;
public void handleText(char[] data, int pos)
{
if (title != null)
{
title.append(data);
}
}
public void handleComment(char[] data, int pos)
{
// Perhaps sniff for Office 9+ metadata in here?
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
{
if (HTML.Tag.HEAD.equals(t))
{
inHead = true;
}
else if (HTML.Tag.TITLE.equals(t) && inHead)
{
title = new StringBuffer();
}
else
{
handleSimpleTag(t, a, pos);
}
}
public void handleEndTag(HTML.Tag t, int pos)
{
if (HTML.Tag.HEAD.equals(t))
{
inHead = false;
}
else if (HTML.Tag.TITLE.equals(t) && title != null)
{
putRawValue(KEY_TITLE, title.toString(), rawProperties);
title = null;
}
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
{
if (HTML.Tag.META.equals(t))
{
Object nameO = a.getAttribute(HTML.Attribute.NAME);
Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
if (nameO == null || valueO == null)
{
return;
}
String name = nameO.toString();
if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
|| name.equalsIgnoreCase("dc.creator"))
{
putRawValue(KEY_AUTHOR, valueO.toString(), rawProperties);
}
else if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
{
putRawValue(KEY_DESCRIPTION, valueO.toString(), rawProperties);
}
}
}
public void handleError(String errorMsg, int pos)
{
}
};
String charsetGuess = "UTF-8";
int tries = 0;
while (tries < 3)
{
rawProperties.clear();
Reader r = null;
try (InputStream cis = new FileInputStream(sourceFile))
{
// TODO: for now, use default charset; we should attempt to map from html meta-data
r = new InputStreamReader(cis, charsetGuess);
HTMLEditorKit.Parser parser = new ParserDelegator();
parser.parse(r, callback, tries > 0);
break;
}
catch (ChangedCharSetException ccse)
{
tries++;
charsetGuess = ccse.getCharSetSpec();
int begin = charsetGuess.indexOf("charset=");
if (begin > 0)
{
charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
}
}
finally
{
if (r != null)
{
r.close();
}
}
}
return rawProperties;
}
}

View File

@@ -0,0 +1,196 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005-2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.transformers.SelectableTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.Header;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMessage.RecipientType;
import javax.mail.internet.MimeUtility;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* Metadata extractor for RFC822 mime emails.
*
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
*
* <pre>
* <b>messageFrom:</b> -- imap:messageFrom, cm:originator
* <b>messageTo:</b> -- imap:messageTo
* <b>messageCc:</b> -- imap:messageCc
* <b>messageSubject:</b> -- imap:messageSubject, cm:title, cm:description, cm:subjectline
* <b>messageSent:</b> -- imap:dateSent, cm:sentdate
* <b>messageReceived:</b> -- imap:dateReceived
* <b>All {@link Header#getName() header names}:</b>
* <b>Thread-Index:</b> -- imap:threadIndex
* <b>Message-ID:</b> -- imap:messageId
* </pre>
*
* @author Derek Hulley
* @author adavis
*/
public class RFC822MetadataExtractor extends AbstractMetadataExtractor implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class);
protected static final String KEY_MESSAGE_FROM = "messageFrom";
protected static final String KEY_MESSAGE_TO = "messageTo";
protected static final String KEY_MESSAGE_CC = "messageCc";
protected static final String KEY_MESSAGE_SUBJECT = "messageSubject";
protected static final String KEY_MESSAGE_SENT = "messageSent";
protected static final String KEY_MESSAGE_RECEIVED = "messageReceived";
public RFC822MetadataExtractor()
{
super(logger);
}
@Override
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
mapMetadataAndWrite(targetFile, metadata);
}
@Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
File sourceFile) throws Exception
{
final Map<String, Serializable> rawProperties = new HashMap<>();
try (InputStream is = new FileInputStream(sourceFile))
{
MimeMessage mimeMessage = new MimeMessage(null, is);
if (mimeMessage != null)
{
/**
* Extract RFC822 values that doesn't match to headers and need to be encoded.
* Or those special fields that require some code to extract data
*/
String tmp = InternetAddress.toString(mimeMessage.getFrom());
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
/**
* Received field from RFC 822
*
* "Received" ":" ; one per relay
* ["from" domain] ; sending host
* ["by" domain] ; receiving host
* ["via" atom] ; physical path
* ("with" atom) ; link/mail protocol
* ["id" msg-id] ; receiver msg id
* ["for" addr-spec] ; initial form
* ";" date-time ; time received
*/
Date rxDate = mimeMessage.getReceivedDate();
if(rxDate != null)
{
// The email implementation extracted the received date for us.
putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
}
else
{
// the email implementation did not parse the received date for us.
String[] rx = mimeMessage.getHeader("received");
if(rx != null && rx.length > 0)
{
String lastReceived = rx[0];
lastReceived = MimeUtility.unfold(lastReceived);
int x = lastReceived.lastIndexOf(';');
if(x > 0)
{
String dateStr = lastReceived.substring(x + 1).trim();
putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
}
}
}
String[] subj = mimeMessage.getHeader("Subject");
if (subj != null && subj.length > 0)
{
String decodedSubject = subj[0];
try
{
decodedSubject = MimeUtility.decodeText(decodedSubject);
}
catch (UnsupportedEncodingException e)
{
logger.warn(e.toString());
}
putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
}
/*
* Extract values from all header fields, including extension fields "X-"
*/
Set<String> keys = getExtractMapping().keySet();
@SuppressWarnings("unchecked")
Enumeration<Header> headers = mimeMessage.getAllHeaders();
while (headers.hasMoreElements())
{
Header header = (Header) headers.nextElement();
if (keys.contains(header.getName()))
{
tmp = header.getValue();
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(header.getName(), tmp, rawProperties);
}
}
}
}
return rawProperties;
}
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,7 +26,11 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
import com.google.common.collect.ImmutableList;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.File;
@@ -37,12 +41,7 @@ import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Map;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableList;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
/**
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
@@ -74,8 +73,8 @@ public class AppleIWorksContentTransformer implements SelectableTransformer
// (53 x 41) preview-micro.jpg
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters)
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile)
{
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
sourceMimetype, targetMimetype);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,10 +26,15 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
@@ -43,15 +48,9 @@ import java.io.Writer;
import java.util.Map;
import java.util.Properties;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
/**
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
@@ -74,8 +73,8 @@ public class EMLTransformer implements SelectableTransformer
private static final String DEFAULT_ENCODING = "UTF-8";
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
logger.debug("Performing RFC822 to text transform.");
// Use try with resource

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,6 +26,12 @@
*/
package org.alfresco.transformer.transformers;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
@@ -36,12 +42,6 @@ import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Content transformer which wraps the HTML Parser library for
* parsing HTML content.
@@ -75,8 +75,8 @@ public class HtmlParserContentTransformer implements SelectableTransformer
HtmlParserContentTransformer.class);
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,13 +26,6 @@
*/
package org.alfresco.transformer.transformers;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
@@ -41,6 +34,13 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
/**
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
* This transformer will only work for OOXML files where thumbnailing was enabled,
@@ -59,8 +59,8 @@ public class OOXMLThumbnailContentTransformer implements SelectableTransformer
OOXMLThumbnailContentTransformer.class);
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
if (logger.isDebugEnabled())
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -39,14 +39,18 @@ public interface SelectableTransformer
String SOURCE_ENCODING = "sourceEncoding";
String TARGET_ENCODING = "targetEncoding";
/**
* Implementation of the actual transformation.
*
* @param sourceFile
* @param targetFile
* @param parameters
* @throws Exception
*/
void transform(File sourceFile, File targetFile, String sourceMimetype,
String targetMimetype, Map<String, String> parameters) throws Exception;
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
File sourceFile, File targetFile) throws Exception
{
}
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
}

View File

@@ -26,19 +26,17 @@
*/
package org.alfresco.transformer.transformers;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
import java.io.File;
import java.util.Map;
import java.util.StringJoiner;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.logging.LogEntry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableMap;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* The SelectingTransformer selects a registered {@link SelectableTransformer}
@@ -46,9 +44,9 @@ import com.google.common.collect.ImmutableMap;
*
* @author eknizat
*/
public class SelectingTransformer
public class SelectingTransformer implements Transformer
{
private static final Logger logger = LoggerFactory.getLogger(SelectingTransformer.class);
private static final String ID = "misc";
public static final String LICENCE =
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
@@ -63,57 +61,45 @@ public class SelectingTransformer
.put("textToPdf", new TextToPdfContentTransformer())
.put("rfc822", new EMLTransformer())
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
.build();
/**
* Performs a transform using a transformer selected based on the provided sourceMimetype and targetMimetype
*
* @param transform the name of the transformer
* @param sourceFile File to transform from
* @param targetFile File to transform to
* @param sourceMimetype Mimetype of the source file
* @throws TransformException if there was a problem internally
*/
public void transform(String transform, File sourceFile, File targetFile, String sourceMimetype,
String targetMimetype, Map<String, String> parameters) throws TransformException
@Override
public String getTransformerId()
{
try
{
final SelectableTransformer transformer = transformers.get(transform);
logOptions(sourceFile, targetFile, parameters);
transformer.transform(sourceFile, targetFile, sourceMimetype, targetMimetype,
parameters);
}
catch (IllegalArgumentException e)
{
throw new TransformException(BAD_REQUEST.value(), getMessage(e));
}
catch (Exception e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), getMessage(e));
}
if (!targetFile.exists())
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Transformer failed to create an output file. Target file does not exist.");
}
if (sourceFile.length() > 0 && targetFile.length() == 0)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Transformer failed to create an output file. Target file is empty but source file was not empty.");
}
return ID;
}
private static String getMessage(Exception e)
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
return e.getMessage() == null || e.getMessage().isEmpty() ? e.getClass().getSimpleName() : e.getMessage();
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
{
StringJoiner sj = new StringJoiner(" ");
parameters.forEach((k, v) -> sj.add(
"--" + k + "=" + v)); // keeping the existing style used in other T-Engines
parameters.forEach((k, v) ->
{
if (!TRANSFORM_NAME_PARAMETER.equals(k))
{
sj.add("--" + k + "=" + v);
}
}); // keeping the existing style used in other T-Engines
sj.add(getExtension(sourceFile));
sj.add(getExtension(targetFile));
LogEntry.setOptions(sj.toString());

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,6 +26,9 @@
*/
package org.alfresco.transformer.transformers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
@@ -39,9 +42,6 @@ import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Converts any textual format to plain text.
* <p>
@@ -58,7 +58,7 @@ import org.apache.commons.logging.LogFactory;
public class StringExtractingContentTransformer implements SelectableTransformer
{
private static final Log logger = LogFactory.getLog(StringExtractingContentTransformer.class);
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
/**
* Text to text conversions are done directly using the content reader and writer string
@@ -69,8 +69,8 @@ public class StringExtractingContentTransformer implements SelectableTransformer
* be unformatted but valid.
*/
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String targetEncoding = parameters.get(TARGET_ENCODING);
@@ -126,11 +126,11 @@ public class StringExtractingContentTransformer implements SelectableTransformer
{
if (charReader != null)
{
try { charReader.close(); } catch (Throwable e) { logger.error(e); }
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
}
if (charWriter != null)
{
try { charWriter.close(); } catch (Throwable e) { logger.error(e); }
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
}
}
// done

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,6 +26,15 @@
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
@@ -40,14 +49,6 @@ import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
@@ -62,7 +63,7 @@ public class TextToPdfContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
public static final String PAGE_LIMIT = "pageLimit";
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
private final PagedTextToPDF transformer;
@@ -98,8 +99,8 @@ public class TextToPdfContentTransformer implements SelectableTransformer
}
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String stringPageLimit = parameters.get(PAGE_LIMIT);

View File

@@ -0,0 +1,12 @@
#
# HtmlMetadataExtractor - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -0,0 +1,22 @@
#
# RFC822MetadataExtractor - default mapping
#
# Namespaces
namespace.prefix.imap=http://www.alfresco.org/model/imap/1.0
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
#Default values that doesn't match exactly to Header
messageFrom=imap:messageFrom, cm:originator
messageTo=imap:messageTo, cm:addressee
messageCc=imap:messageCc, cm:addressees
messageSubject=imap:messageSubject, cm:title, cm:description, cm:subjectline
messageSent=imap:dateSent, cm:sentdate
messageReceived=imap:dateReceived
#Add here any values you want to extract.
# Use Header name for key. LHS is a list of the destination properties.
Thread-Index=imap:threadIndex
Message-ID=imap:messageId

View File

@@ -69,6 +69,23 @@
],
"transformOptions": [
]
},
{
"transformerName": "HtmlMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "text/html", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "RFC822MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,8 +26,7 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transformer.transformers.StringExtractingContentTransformer.SOURCE_ENCODING;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
@@ -36,7 +35,8 @@ import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
import org.junit.Test;
import static org.alfresco.transformer.transformers.StringExtractingContentTransformer.SOURCE_ENCODING;
import static org.junit.Assert.assertEquals;
public class HtmlParserContentTransformerTest
{
@@ -81,7 +81,7 @@ public class HtmlParserContentTransformerTest
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
@@ -94,7 +94,7 @@ public class HtmlParserContentTransformerTest
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
@@ -106,7 +106,7 @@ public class HtmlParserContentTransformerTest
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-16");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
@@ -131,7 +131,7 @@ public class HtmlParserContentTransformerTest
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,8 +26,10 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transformer.transformers.TextToPdfContentTransformer.PAGE_LIMIT;
import static org.junit.Assert.assertEquals;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
@@ -36,10 +38,8 @@ import java.io.StringWriter;
import java.util.HashMap;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.Before;
import org.junit.Test;
import static org.alfresco.transformer.util.RequestParamMap.PAGE_LIMIT;
import static org.junit.Assert.assertEquals;
public class TextToPdfContentTransformerTest
{
@@ -109,7 +109,7 @@ public class TextToPdfContentTransformerTest
// Transform to PDF
Map<String, String> parameters = new HashMap<>();
parameters.put(PAGE_LIMIT, pageLimit);
transformer.transform(sourceFile, targetFile, "text/plain", "application/pdf", parameters);
transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile);
// Read back in the PDF and check it
PDDocument doc = PDDocument.load(targetFile);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,31 +26,17 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import java.io.File;
import java.util.Map;
import javax.annotation.PostConstruct;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.PostConstruct;
import java.io.File;
import java.util.Collections;
import java.util.Map;
/**
* Controller for the Docker based alfresco-pdf-renderer transformer.
@@ -111,68 +97,22 @@ public class AlfrescoPdfRendererController extends AbstractTransformerController
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
commandExecutor.run("", sourceFile, targetFile, null);
transform(null, null, null, Collections.emptyMap(), sourceFile, targetFile);
}
};
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
protected String getTransformerName(final File sourceFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> transformOptions)
{
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
final String options = PdfRendererOptionsBuilder
.builder()
.withPage(transformOptions.get("page"))
.withWidth(transformOptions.get("width"))
.withHeight(transformOptions.get("height"))
.withAllowPdfEnlargement(transformOptions.get("allowPdfEnlargement"))
.withMaintainPdfAspectRatio(transformOptions.get("maintainPdfAspectRatio"))
.build();
commandExecutor.run(options, sourceFile, targetFile, timeout);
return null; // does not matter what value is returned, as it is not used because there is only one.
}
@Deprecated
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam(value = "timeout", required = false) Long timeout,
@RequestParam(value = "testDelay", required = false) Long testDelay,
@RequestParam(value = "page", required = false) Integer page,
@RequestParam(value = "width", required = false) Integer width,
@RequestParam(value = "height", required = false) Integer height,
@RequestParam(value = "allowPdfEnlargement", required = false) Boolean allowPdfEnlargement,
@RequestParam(value = "maintainPdfAspectRatio", required = false) Boolean maintainPdfAspectRatio)
@Override
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(),
targetExtension);
getProbeTestTransform().incrementTransformerCount();
File sourceFile = createSourceFile(request, sourceMultipartFile);
File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
final String options = PdfRendererOptionsBuilder
.builder()
.withPage(page)
.withWidth(width)
.withHeight(height)
.withAllowPdfEnlargement(allowPdfEnlargement)
.withMaintainPdfAspectRatio(maintainPdfAspectRatio)
.build();
commandExecutor.run(options, sourceFile, targetFile, timeout);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
commandExecutor.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -26,10 +26,7 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
import java.util.Arrays;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,7 +40,9 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})

View File

@@ -26,15 +26,29 @@
*/
package org.alfresco.transformer.executors;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.PdfRendererOptionsBuilder;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transformer.util.RequestParamMap.ALLOW_PDF_ENLARGEMENT;
import static org.alfresco.transformer.util.RequestParamMap.HEIGHT_REQUEST_PARAM;
import static org.alfresco.transformer.util.RequestParamMap.MAINTAIN_PDF_ASPECT_RATIO;
import static org.alfresco.transformer.util.RequestParamMap.PAGE_REQUEST_PARAM;
import static org.alfresco.transformer.util.RequestParamMap.TIMEOUT;
import static org.alfresco.transformer.util.RequestParamMap.WIDTH_REQUEST_PARAM;
import static org.alfresco.transformer.util.Util.stringToLong;
/**
* CommandExecutor implementation for running PDF Renderer transformations. It runs the
* transformation logic as a separate Shell process.
*/
public class PdfRendererCommandExecutor extends AbstractCommandExecutor
{
private static String ID = "pdfrenderer";
public static final String LICENCE = "This transformer uses alfresco-pdf-renderer which uses the PDFium library from Google Inc. See the license at https://pdfium.googlesource.com/pdfium/+/master/LICENSE or in /pdfium.txt";
private final String EXE;
@@ -50,6 +64,12 @@ public class PdfRendererCommandExecutor extends AbstractCommandExecutor
super.checkCommand = createCheckCommand();
}
@Override
public String getTransformerId()
{
return ID;
}
@Override
protected RuntimeExec createTransformCommand()
{
@@ -77,4 +97,23 @@ public class PdfRendererCommandExecutor extends AbstractCommandExecutor
runtimeExec.setCommandsAndArguments(commandsAndArguments);
return runtimeExec;
}
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws TransformException
{
final String options = PdfRendererOptionsBuilder
.builder()
.withPage(transformOptions.get(PAGE_REQUEST_PARAM))
.withWidth(transformOptions.get(WIDTH_REQUEST_PARAM))
.withHeight(transformOptions.get(HEIGHT_REQUEST_PARAM))
.withAllowPdfEnlargement(transformOptions.get(ALLOW_PDF_ENLARGEMENT))
.withMaintainPdfAspectRatio(transformOptions.get(MAINTAIN_PDF_ASPECT_RATIO))
.build();
Long timeout = stringToLong(transformOptions.get(TIMEOUT));
run(options, sourceFile, targetFile, timeout);
}
}

View File

@@ -26,10 +26,7 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
import java.util.Arrays;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,7 +40,9 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,37 +26,20 @@
*/
package org.alfresco.transformer;
import static java.lang.Boolean.parseBoolean;
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import java.io.File;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.util.Collections;
import java.util.Map;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PDF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* Controller for the Docker based Tika transformers.
@@ -109,81 +92,16 @@ public class TikaController extends AbstractTransformerController
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
javaExecutor.call(sourceFile, targetFile, PDF_BOX,
TARGET_MIMETYPE + MIMETYPE_TEXT_PLAIN, TARGET_ENCODING + "UTF-8");
transform(PDF_BOX, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, Collections.emptyMap(), sourceFile, targetFile);
}
};
}
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") final MultipartFile sourceMultipartFile,
@RequestParam("sourceMimetype") final String sourceMimetype,
@RequestParam("targetExtension") final String targetExtension,
@RequestParam("targetMimetype") final String targetMimetype,
@RequestParam(value = "targetEncoding", required = false, defaultValue = "UTF-8") final String targetEncoding,
@RequestParam(value = "timeout", required = false) final Long timeout,
@RequestParam(value = "testDelay", required = false) final Long testDelay,
@RequestParam(value = "includeContents", required = false) final Boolean includeContents,
@RequestParam(value = "notExtractBookmarksText", required = false) final Boolean notExtractBookmarksText)
{
final String targetFilename = createTargetFileName(
sourceMultipartFile.getOriginalFilename(), targetExtension);
getProbeTestTransform().incrementTransformerCount();
final File sourceFile = createSourceFile(request, sourceMultipartFile);
final File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
// TODO Consider streaming the request and response rather than using temporary files
// https://www.logicbig.com/tutorials/spring-framework/spring-web-mvc/streaming-response-body.html
final Map<String, String> transformOptions = createTransformOptions(
"includeContents", includeContents,
"notExtractBookmarksText", notExtractBookmarksText,
"targetEncoding", targetEncoding);
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
javaExecutor.call(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
final boolean includeContents = parseBoolean(
transformOptions.getOrDefault("includeContents", "false"));
final boolean notExtractBookmarksText = parseBoolean(
transformOptions.getOrDefault("notExtractBookmarksText", "false"));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
javaExecutor.call(sourceFile, targetFile, transform,
includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
javaExecutor.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -26,6 +26,34 @@
*/
package org.alfresco.transformer;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
@@ -59,6 +87,8 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
import static org.alfresco.transformer.util.RequestParamMap.INCLUDE_CONTENTS;
import static org.alfresco.transformer.util.RequestParamMap.NOT_EXTRACT_BOOKMARK_TEXT;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -79,34 +109,6 @@ import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
/**
* Test the TikaController without a server.
* Super class includes tests for the AbstractTransformerController.
@@ -245,7 +247,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
? mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension)
: mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension, "includeContents", includeContents.toString());
"targetExtension", this.targetExtension, INCLUDE_CONTENTS, includeContents.toString());
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
@@ -528,7 +530,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
mockMvc.perform(
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param(
"notExtractBookmarksText", "true"))
NOT_EXTRACT_BOOKMARK_TEXT, "true"))
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension));

View File

@@ -0,0 +1,533 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.List;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_APP_DWG;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transformer.TestFileInfo.testFile;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_AUDIO_MP4;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_EXCEL;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_BMP;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_GIF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_PNG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IWORK_KEYNOTE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IWORK_NUMBERS;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IWORK_PAGES;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_MP3;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PDF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PPT;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_3GP;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_3GP2;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_FLV;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_MP4;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_QUICKTIME;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VISIO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VORBIS;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
/**
* Metadata integration tests in the Tika T-Engine.
*
* @author adavis
*/
@RunWith(Parameterized.class)
public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
{
public TikaMetadataExtractsIT(TestFileInfo testFileInfo)
{
super(testFileInfo);
}
@Parameterized.Parameters
public static List<TestFileInfo> engineTransformations()
{
// The following files are the ones tested in the content repository.
// There are many more mimetypes supported by these extractors.
// Where a line has been commented out, the repository code tries to test it but stops because there is
// either no quick file or the target extension has not been registered.
return Stream.of(
// DWGMetadataExtractor
testFile(MIMETYPE_APP_DWG, "dwg", "quick2010CustomProps.dwg"),
// MailMetadataExtractor
testFile(MIMETYPE_OUTLOOK_MSG, "msg", "quick.msg"),
// MP3MetadataExtractor
testFile(MIMETYPE_MP3, "mp3", "quick.mp3"),
// OfficeMetadataExtractor
testFile(MIMETYPE_WORD, "doc", "quick.doc"),
//testFile("application/x-tika-msoffice-embedded; format=ole10_native", "", ""),
testFile(MIMETYPE_VISIO, "vsd", "quick.vsd"),
//testFile("application/vnd.ms-project", "mpp", ""),
//testFile("application/x-tika-msworks-spreadsheet", "", ""),
//testFile("application/x-mspublisher", "", ""),
testFile(MIMETYPE_PPT, "ppt", "quick.ppt"),
//testFile("application/x-tika-msoffice", "", ""),
//testFile(MIMETYPE_VISIO_2013, "vsdx", ""),
//testFile("application/sldworks", "", ""),
//testFile(MIMETYPE_ENCRYPTED_OFFICE, "", ""),
testFile(MIMETYPE_EXCEL, "xls", "quick.xls"),
// OpenDocumentMetadataExtractor
//testFile("application/x-vnd.oasis.opendocument.presentation", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_CHART, "odc", ""),
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.text-web", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_WEB, "oth", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE, "ots", "quick.ots"),
testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
//testFile("application/x-vnd.oasis.opendocument.graphics-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET, "ods", "quick.ods"),
//testFile("application/x-vnd.oasis.opendocument.chart", "", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE, "odi", ""),
//testFile("application/x-vnd.oasis.opendocument.text", "", ""),
//testFile("application/x-vnd.oasis.opendocument.text-template", "", ""),
//testFile("application/vnd.oasis.opendocument.formula-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.formula", "", ""),
//testFile("application/vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.presentation-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE, "otp", "quick.otp"),
testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
//testFile(MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE, "", ""),
testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
//testFile("application/vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.formula-template", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_DATABASE, "odb", ""),
//testFile("application/x-vnd.oasis.opendocument.text-master", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION, "odp", "quick.odp"),
//testFile(MIMETYPE_OPENDOCUMENT_CHART_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.graphics", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_MASTER, "odm", ""),
// PdfBoxMetadataExtractor
testFile(MIMETYPE_PDF, "pdf", "quick.pdf"),
//testFile(MIMETYPE_APPLICATION_ILLUSTRATOR, "ai", ""),
// PoiMetadataExtractor
//testFile(MIMETYPE_OPENXML_PRESENTATION_TEMPLATE_MACRO, "potm", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_ADDIN_MACRO, "xlam", ""),
//testFile(MIMETYPE_OPENXML_WORD_TEMPLATE, "dotx", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_BINARY_MACRO, "xlsb", ""),
testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "quick.docx"),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDE_MACRO, "sldm", ""),
//testFile("application/vnd.ms-visio.drawing", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO, "ppsm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_MACRO, "pptm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDE, "sldx", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_MACRO, "xlsm", ""),
//testFile(MIMETYPE_OPENXML_WORD_TEMPLATE_MACRO, "dotm", ""),
//testFile(MIMETYPE_OPENXML_WORDPROCESSING_MACRO, "docm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_ADDIN, "ppam", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE, "xltx", ""),
//testFile("application/vnd.ms-xpsdocument", "", ""),
//testFile("application/vnd.ms-visio.drawing.macroenabled.12", "", ""),
//testFile("application/vnd.ms-visio.template.macroenabled.12", "", ""),
//testFile("model/vnd.dwfx+xps", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_TEMPLATE, "potx", ""),
testFile(MIMETYPE_OPENXML_PRESENTATION, "pptx", "quick.pptx"),
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "quick.xlsx"),
//testFile("application/vnd.ms-visio.stencil", "", ""),
//testFile("application/vnd.ms-visio.template", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW, "ppsx", ""),
//testFile("application/vnd.ms-visio.stencil.macroenabled.12", "", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO, "xltm", ""),
// TikaAudioMetadataExtractor
testFile("video/x-m4v", "m4v", "quick.m4v"),
//testFile("audio/x-oggflac", "", ""),
//testFile("application/mp4", "", ""),
testFile(MIMETYPE_VORBIS, "ogg", "quick.ogg"),
testFile(MIMETYPE_VIDEO_3GP, "3gp", "quick.3gp"),
//testFile(MIMETYPE_FLAC, "flac", ""),
testFile(MIMETYPE_VIDEO_3GP2, "3g2", "quick.3g2"),
testFile(MIMETYPE_VIDEO_QUICKTIME, "mov", "quick.mov"),
testFile(MIMETYPE_AUDIO_MP4, "m4a", "quick.m4a"),
testFile(MIMETYPE_VIDEO_MP4, "mp4", "quick.mp4"),
// TikaAutoMetadataExtractor
// The following <source>_metadata.json files contain null values against author and title.
// This is not new and will be the case in the content repository, but was not tested.
//
// The expected ones are: txt, xml, zip, tar
//
// The unexpected ones are: quick.key, quick.numbers and quick.pages.
//
// quick.bmp, quick.gif, quick.png, quick.3g2, quick.3gp, quick.flv, quick.m4v, quick.mov & quick.mp4
// contain one or more values, but also include nulls. Again this may be correct, a bug or just the
// example quick file rather than a problem with the extractor.
//testFile("application/vnd.ms-htmlhelp", "", ""),
//testFile(MIMETYPE_ATOM, "", ""),
//testFile("audio/midi", "", ""),
//testFile("application/aaigrid", "", ""),
//testFile("application/x-bag", "", ""),
testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key"),
//testFile("application/x-quattro-pro; version=9", "", ""),
//testFile("application/x-ibooks+zip", "", ""),
//testFile("audio/wave", "", ""),
//testFile("application/x-midi", "", ""),
testFile(MIMETYPE_XML, "xml", "quick.xml"),
//testFile(MIMETYPE_RSS, "rss", ""),
//testFile("application/x-netcdf", "cdf", ""),
//testFile("video/x-daala", "", ""),
//testFile("application/matlab-mat", "", ""),
//testFile("audio/aiff", "", ""),
//testFile("application/jaxa-pal-sar", "", ""),
//testFile("image/x-pcraster", "", ""),
//testFile("image/arg", "", ""),
//testFile("application/x-kro", "", ""),
//testFile("image/x-hdf5-image", "", ""),
//testFile("audio/speex", "", ""),
//testFile("image/big-gif", "", ""),
//testFile("application/zlib", "", ""),
//testFile("application/x-cosar", "", ""),
//testFile("application/x-ntv2", "", ""),
//testFile("application/x-archive", "", ""),
//testFile("application/java-archive", "jar", ""),
//testFile("application/x-vnd.sun.xml.writer", "", ""),
//testFile("application/x-gmt", "", ""),
//testFile("application/x-xml", "", ""),
//testFile("application/gzip-compressed", "", ""),
//testFile("image/ida", "", ""),
//testFile("text/x-groovy", "", ""),
//testFile("image/x-emf", "", ""),
//testFile("application/x-rar", "", ""),
//testFile("image/sar-ceos", "", ""),
//testFile("application/acad", "", ""),
testFile(MIMETYPE_ZIP, "zip", "quick.zip"),
//testFile(MIMETYPE_IMAGE_PSD, "psd", ""),
//testFile("application/x-sharedlib", "", ""),
//testFile("audio/x-m4a", "", ""),
//testFile("image/webp", "", ""),
//testFile("application/vnd.wap.xhtml+xml", "", ""),
//testFile("audio/x-aiff", "aiff", ""),
//testFile("application/vnd.ms-spreadsheetml", "", ""),
//testFile("image/x-airsar", "", ""),
//testFile("application/x-pcidsk", "", ""),
//testFile("application/x-java-pack200", "", ""),
//testFile("image/x-fujibas", "", ""),
//testFile("application/x-zmap", "", ""),
//testFile("image/x-bmp", "", ""),
//testFile("image/bpg", "", ""),
//testFile(MIMETYPE_RTF, "rtf", ""),
//testFile("application/x-xz", "", ""),
//testFile("application/x-speex", "", ""),
//testFile("audio/ogg; codecs=speex", "", ""),
//testFile("application/x-l1b", "", ""),
//testFile("application/x-gsbg", "", ""),
//testFile("application/x-sdat", "", ""),
//testFile("application/vnd.ms-visio", "", ""),
//testFile("application/x-coredump", "", ""),
//testFile("application/x-msaccess", "", ""),
//testFile("application/x-dods", "", ""),
testFile(MIMETYPE_IMAGE_PNG, "png", "quick.png"),
//testFile("application/vnd.ms-outlook-pst", "", ""),
//testFile("image/bsb", "", ""),
//testFile("application/x-cpio", "cpio", ""),
//testFile("audio/ogg", "oga", ""),
testFile("application/x-tar", "tar", "quick.tar"),
//testFile("application/x-dbf", "", ""),
//testFile("video/x-ogm", "", ""),
//testFile("application/x-los-las", "", ""),
//testFile("application/autocad_dwg", "", ""),
//testFile("application/vnd.ms-excel.workspace.3", "", ""),
//testFile("application/vnd.ms-excel.workspace.4", "", ""),
//testFile("image/x-bpg", "", ""),
//testFile("gzip/document", "", ""),
//testFile("text/x-java", "", ""),
//testFile("application/x-brotli", "", ""),
//testFile("application/elas", "", ""),
//testFile("image/x-jb2", "", ""),
//testFile("application/x-cappi", "", ""),
//testFile("application/epub+zip", "", ""),
//testFile("application/x-ace2", "", ""),
//testFile("application/x-sas-data", "", ""),
//testFile("application/x-hdf", "hdf", ""),
//testFile("image/x-mff", "", ""),
//testFile("image/x-srp", "", ""),
testFile(MIMETYPE_IMAGE_BMP, "bmp", "quick.bmp"),
//testFile("video/x-ogguvs", "", ""),
//testFile("drawing/dwg", "", ""),
//testFile("application/x-doq2", "", ""),
//testFile("application/x-acad", "", ""),
//testFile("application/x-kml", "", ""),
//testFile("application/x-autocad", "", ""),
//testFile("image/x-mff2", "", ""),
//testFile("application/x-snodas", "", ""),
//testFile("application/terragen", "", ""),
//testFile("application/x-wcs", "", ""),
//testFile("text/x-c++src", "", ""),
//testFile("application/timestamped-data", "", ""),
testFile(MIMETYPE_IMAGE_TIFF, "tiff", "quick.tiff"),
//testFile("application/msexcel", "", ""),
//testFile("application/x-asp", "", ""),
//testFile("application/x-rar-compressed", "rar", ""),
//testFile("application/x-envi-hdr", "", ""),
//testFile("text/iso19139+xml", "", ""),
//testFile("application/vnd.ms-tnef", "", ""),
//testFile("application/x-ecrg-toc", "", ""),
//testFile("application/aig", "", ""),
//testFile("audio/x-wav", "wav", ""),
//testFile("image/emf", "", ""),
//testFile("application/x-bzip", "", ""),
//testFile("application/jdem", "", ""),
//testFile("application/x-webp", "", ""),
//testFile("application/x-arj", "", ""),
//testFile("application/x-lzma", "", ""),
//testFile("application/x-java-vm", "", ""),
//testFile("image/envisat", "", ""),
//testFile("application/x-doq1", "", ""),
//testFile("audio/vnd.wave", "", ""),
//testFile("application/x-ppi", "", ""),
//testFile("image/ilwis", "", ""),
//testFile("application/x-gunzip", "", ""),
//testFile("image/x-icon", "", ""),
//testFile("application/ogg", "ogx", ""),
//testFile(MIMETYPE_IMAGE_SVG, "svg", ""),
//testFile("application/x-ms-owner", "", ""),
//testFile("application/x-grib", "", ""),
//testFile("application/ms-tnef", "", ""),
//testFile("image/fits", "", ""),
//testFile("audio/x-mpeg", "", ""),
//testFile("application/x-bzip2", "", ""),
//testFile("text/tsv", "", ""),
//testFile("application/x-fictionbook+xml", "", ""),
//testFile("application/x-p-aux", "", ""),
//testFile("application/x-font-ttf", "", ""),
//testFile("image/x-xcf", "", ""),
//testFile("image/x-ms-bmp", "", ""),
//testFile("image/wmf", "", ""),
//testFile("image/eir", "", ""),
//testFile("application/x-matlab-data", "", ""),
//testFile("application/deflate64", "", ""),
//testFile("audio/wav", "", ""),
//testFile("application/x-rs2", "", ""),
//testFile("application/vnd.ms-word", "", ""),
//testFile("application/x-tsx", "", ""),
//testFile("application/x-lcp", "", ""),
//testFile("application/x-mbtiles", "", ""),
//testFile("audio/x-oggpcm", "", ""),
//testFile("application/x-epsilon", "", ""),
//testFile("application/x-msgn", "", ""),
//testFile(MIMETYPE_TEXT_CSV, "csv", ""),
//testFile("image/x-dimap", "", ""),
//testFile("image/vnd.microsoft.icon", "", ""),
//testFile("application/x-envi", "", ""),
//testFile("application/x-dwg", "", ""),
testFile(MIMETYPE_IWORK_NUMBERS, "numbers", "quick.numbers"),
//testFile("application/vnd.ms-word2006ml", "", ""),
//testFile("application/x-bt", "", ""),
//testFile("application/x-font-adobe-metric", "", ""),
//testFile("application/x-rst", "", ""),
//testFile("application/vrt", "", ""),
//testFile("application/x-ctg", "", ""),
//testFile("application/x-e00-grid", "", ""),
//testFile("audio/x-ogg-flac", "", ""),
//testFile("application/x-compress", "z", ""),
//testFile("image/x-psd", "", ""),
//testFile("text/rss", "", ""),
//testFile("application/sdts-raster", "", ""),
//testFile("application/oxps", "", ""),
//testFile("application/leveller", "", ""),
//testFile("application/x-ingr", "", ""),
//testFile("image/sgi", "", ""),
//testFile("application/x-pnm", "", ""),
//testFile("image/raster", "", ""),
//testFile("audio/x-ogg-pcm", "", ""),
//testFile("audio/ogg; codecs=opus", "", ""),
//testFile("application/fits", "", ""),
//testFile("application/x-r", "", ""),
testFile(MIMETYPE_IMAGE_GIF, "gif", "quick.gif"),
//testFile("application/java-vm", "", ""),
//testFile("application/mspowerpoint", "", ""),
//testFile("application/x-http", "", ""),
//testFile("application/x-rmf", "", ""),
//testFile("application/x-ogg", "", ""),
//testFile("video/ogg", "ogv", "quick.ogv"),
//testFile(MIMETYPE_APPLEFILE, "", ""),
//testFile("text/rtf", "", ""),
//testFile("image/adrg", "", ""),
//testFile("video/x-ogg-rgb", "", ""),
//testFile("application/x-ngs-geoid", "", ""),
//testFile("application/x-map", "", ""),
//testFile("image/ceos", "", ""),
//testFile("application/xpm", "", ""),
//testFile("application/x-ers", "", ""),
//testFile("video/x-ogg-yuv", "", ""),
//testFile("application/x-isis2", "", ""),
//testFile("application/x-nwt-grd", "", ""),
//testFile("application/x-isis3", "", ""),
//testFile("application/x-nwt-grc", "", ""),
//testFile("video/daala", "", ""),
//testFile("application/x-blx", "", ""),
//testFile("application/x-tnef", "", ""),
//testFile("video/x-dirac", "", ""),
//testFile("application/x-ndf", "", ""),
//testFile("image/vnd.wap.wbmp", "", ""),
//testFile("video/theora", "", ""),
//testFile("application/kate", "", ""),
//testFile("application/pkcs7-mime", "", ""),
//testFile("image/fit", "", ""),
//testFile("application/x-ctable2", "", ""),
//testFile("application/x-executable", "", ""),
//testFile("application/x-isatab", "", ""),
//testFile("application/grass-ascii-grid", "", ""),
testFile(MIMETYPE_TEXT_PLAIN, "txt", "quick.txt"),
//testFile("application/gzipped", "", ""),
//testFile("application/x-gxf", "", ""),
//testFile("application/x-cpg", "", ""),
//testFile("application/x-lan", "", ""),
//testFile("application/x-xyz", "", ""),
testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages"),
//testFile("image/x-jbig2", "", ""),
//testFile("image/nitf", "", ""),
//testFile("application/mbox", "", ""),
//testFile("application/chm", "", ""),
//testFile("application/x-fast", "", ""),
//testFile("application/x-gsc", "", ""),
//testFile("application/x-deflate", "", ""),
//testFile("application/x-grib2", "", ""),
//testFile("image/x-ozi", "", ""),
//testFile("application/x-pds", "", ""),
//testFile("application/vnd.apple.iwork", "", ""),
//testFile("application/x-usgs-dem", "", ""),
//testFile("application/vnd.ms-excel.sheet.2", "", ""),
//testFile("application/vnd.ms-excel.sheet.3", "", ""),
//testFile("application/dif+xml", "", ""),
//testFile("application/vnd.ms-excel.sheet.4", "", ""),
//testFile("application/x-java", "", ""),
//testFile("image/geotiff", "", ""),
//testFile("application/x-gsag", "", ""),
//testFile("application/x-snappy", "", ""),
//testFile("video/x-theora", "", ""),
//testFile("image/ntf", "", ""),
//testFile("application/x-pdf", "", ""),
//testFile("application/xml", "", ""),
//testFile("application/vnd.wordperfect; version=6.x", "", ""),
//testFile("application/pkcs7-signature", "", ""),
//testFile("application/vnd.wordperfect; version=5.1", "", ""),
//testFile("application/vnd.wordperfect; version=5.0", "", ""),
//testFile("application/x-arj-compressed", "", ""),
//testFile("application/geotopic", "", ""),
//testFile("text/x-java-source", "java", ""),
//testFile("audio/basic", "au", ""),
//testFile("application/pcisdk", "", ""),
//testFile("application/x-rik", "", ""),
//testFile("audio/opus", "", ""),
//testFile(MIMETYPE_IMAGE_JP2, "jp2", ""),
//testFile("application/x-gtx", "", ""),
//testFile("application/x-object", "", ""),
//testFile("application/vnd.ms-wordml", "", ""),
//testFile("image/x-wmf", "", ""),
//testFile("application/x-rpf-toc", "", ""),
//testFile("application/x-srtmhgt", "", ""),
//testFile("application/x-generic-bin", "", ""),
//testFile("text/vnd.iptc.anpa", "", ""),
//testFile("application/x-msmetafile", "", ""),
//testFile("application/x-wms", "", ""),
//testFile("video/x-oggrgb", "", ""),
//testFile("image/xcf", "", ""),
//testFile("application/photoshop", "", ""),
//testFile("application/x-lz4", "", ""),
//testFile("application/x-7z-compressed", "", ""),
//testFile("application/gff", "", ""),
//testFile("video/x-oggyuv", "", ""),
//testFile("application/x-msdownload", "", ""),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quick.jpg"),
//testFile("image/icns", "", ""),
//testFile("application/x-emf", "", ""),
//testFile("application/x-geo-pdf", "", ""),
//testFile("video/x-ogg-uvs", "", ""),
testFile(MIMETYPE_VIDEO_FLV, "flv", "quick.flv"),
//testFile("application/x-zip-compressed", "", ""),
//testFile("application/gzip", "", ""),
//testFile("application/x-tika-unix-dump", "", ""),
//testFile("application/x-coasp", "", ""),
//testFile("application/x-dipex", "", ""),
//testFile("application/x-til", "", ""),
//testFile("application/x-gzip", "gzip", ""),
//testFile("application/x-gs7bg", "", ""),
//testFile("application/x-unix-archive", "", ""),
//testFile("application/x-elf", "", ""),
//testFile("application/dted", "", ""),
//testFile("application/x-rasterlite", "", ""),
//testFile("audio/x-mp4a", "", ""),
//testFile("application/x-gzip-compressed", "", ""),
//testFile("application/x-chm", "", ""),
//testFile("image/hfa", "", ""),
// Special test cases from the repo tests
// ======================================
// Test for MNT-577: Alfresco is running 100% CPU for over 10 minutes while extracting metadata for
// Word office document
// testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "problemFootnotes2.docx")
// Test MNT-15219 Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may
// cause OutOfMemory in Tika Note - doesn't use extractFromMimetype
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "dmsu1332-reproduced.xlsx")
).collect(toList());
}
}

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}created" : "2016-03-29T21:01:55Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Udintsev, Anton (external - Project)",
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}created" : "2011-05-17T13:34:11Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "test file cs5"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,7 @@
{
"{http://www.alfresco.org/model/content/1.0}modified" : "2005-09-20T17:25:00Z",
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "2005-05-26T12:57:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "2010-01-06T17:32:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,13 @@
{
"{http://www.alfresco.org/model/audio/1.0}compressor" : "M4A",
"{http://www.alfresco.org/model/audio/1.0}artist" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}genre" : "Foxtrot",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog - About a dog and a fox (Hauskaz)",
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo",
"{http://www.alfresco.org/model/content/1.0}created" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "44100",
"{http://www.alfresco.org/model/content/1.0}author" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}album" : "About a dog and a fox",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,13 @@
{
"{http://www.alfresco.org/model/audio/1.0}compressor" : "MP3",
"{http://www.alfresco.org/model/audio/1.0}artist" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}genre" : "Foxtrot",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog - About a dog and a fox (Hauskaz)",
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo",
"{http://www.alfresco.org/model/content/1.0}created" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "44100",
"{http://www.alfresco.org/model/content/1.0}author" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}album" : "About a dog and a fox",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "90000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,9 @@
{
"{http://www.alfresco.org/model/content/1.0}addressee" : "mark.rogers@alfresco.com",
"{http://www.alfresco.org/model/content/1.0}description" : "This is a quick test",
"{http://www.alfresco.org/model/content/1.0}addressees" : [ "mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com" ],
"{http://www.alfresco.org/model/content/1.0}sentdate" : "2013-01-18T13:44:20Z",
"{http://www.alfresco.org/model/content/1.0}subjectline" : "This is a quick test",
"{http://www.alfresco.org/model/content/1.0}author" : "Mark Rogers",
"{http://www.alfresco.org/model/content/1.0}originator" : "Mark Rogers"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1138362922000,
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1138362371000,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 845336008000,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

Some files were not shown because too many files have changed in this diff Show More