From 82ba97f666db36ed2b30b523f4a34cf02e099ec9 Mon Sep 17 00:00:00 2001 From: Erik Knizat Date: Wed, 1 Apr 2020 17:41:16 +0100 Subject: [PATCH] Define interface and the aio transformer --- .../alfresco-transform-aio/pom.xml | 59 ++ .../transformers/AbstractTransformer.java | 92 ++++ .../transformers/AllInOneTransformer.java | 135 +++++ .../transformer/transformers/MiscAdapter.java | 59 ++ .../transformer/transformers/TikaAdapter.java | 73 +++ .../transformer/transformers/Transformer.java | 77 +++ .../transformers/AllInOneTransformerTest.java | 247 +++++++++ .../main/resources/misc_engine_config.json | 80 +++ .../main/resources/tika_engine_config.json | 508 ++++++++++++++++++ pom.xml | 1 + 10 files changed, 1331 insertions(+) create mode 100644 alfresco-transform-aio/alfresco-transform-aio/pom.xml create mode 100644 alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AbstractTransformer.java create mode 100644 alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AllInOneTransformer.java create mode 100644 alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/MiscAdapter.java create mode 100644 alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/TikaAdapter.java create mode 100644 alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/Transformer.java create mode 100644 alfresco-transform-aio/alfresco-transform-aio/src/test/java/org/alfresco/transformer/transformers/AllInOneTransformerTest.java create mode 100644 alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json create mode 100644 alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json diff --git a/alfresco-transform-aio/alfresco-transform-aio/pom.xml b/alfresco-transform-aio/alfresco-transform-aio/pom.xml new file mode 100644 index 00000000..141d172b --- /dev/null +++ b/alfresco-transform-aio/alfresco-transform-aio/pom.xml @@ -0,0 +1,59 @@ + + + 4.0.0 + alfresco-transform-aio + Alfresco All In One Transformer + jar + + + alfresco-transform-core + org.alfresco + 2.2.0-SNAPSHOT + ../../pom.xml + + + + + org.alfresco + alfresco-transformer-base + ${project.version} + + + org.alfresco + alfresco-transform-misc + ${project.version} + + + org.alfresco + alfresco-transform-tika + ${project.version} + + + + junit + junit + 4.13 + test + + + + + + + + org.codehaus.mojo + license-maven-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + org.apache.maven.plugins + maven-failsafe-plugin + + + + \ No newline at end of file diff --git a/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AbstractTransformer.java b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AbstractTransformer.java new file mode 100644 index 00000000..d7252245 --- /dev/null +++ b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AbstractTransformer.java @@ -0,0 +1,92 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.transformers; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.json.JsonMapper; +import org.alfresco.transform.client.model.config.TransformConfig; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; + +import static java.nio.charset.StandardCharsets.UTF_8; + +public abstract class AbstractTransformer implements Transformer +{ + private static final String TRANSFORMER_CONFIG_SUFFIX = "_engine_config.json"; + private ObjectMapper jsonObjectMapper; + TransformConfig transformConfig; + + public AbstractTransformer() throws Exception + { + jsonObjectMapper = new JsonMapper(); + transformConfig = loadTransformConfig(); + } + + public void setObjectMapper(ObjectMapper objectMapper) + { + this.jsonObjectMapper = objectMapper; + } + + /** + * Used to search for a engine configuration file. + * + * @return A unique prefix which is used to load an <prefix> _engine_config.json + */ + abstract String getTransformerConfigPrefix(); + + @Override + public TransformConfig getTransformConfig() + { + return transformConfig; + } + + /* + * TODO - Override default config name by a configurable location defined by a property + */ + private TransformConfig loadTransformConfig() throws Exception + { + String configFileName = getTransformerConfigPrefix() + TRANSFORMER_CONFIG_SUFFIX; + + if (getClass().getClassLoader().getResource(configFileName) == null) + { + throw new Exception("Configuration '" + configFileName + "' does not exist on the classpath."); + } + + try (InputStream is = getClass().getClassLoader().getResourceAsStream(configFileName); + Reader reader = new InputStreamReader(is, UTF_8)) + { + return jsonObjectMapper.readValue(reader, TransformConfig.class); + } + catch (IOException e) + { + throw new Exception("Could not read '" + configFileName + "' from the classpath.", e); + } + } +} diff --git a/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AllInOneTransformer.java b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AllInOneTransformer.java new file mode 100644 index 00000000..cfc791d3 --- /dev/null +++ b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/AllInOneTransformer.java @@ -0,0 +1,135 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.transformers; + +import org.alfresco.transform.client.model.config.TransformConfig; +import org.alfresco.transform.client.model.config.TransformOption; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Top level transformer managing multiple sub transformers. + * + * @author eknizat + */ +public class AllInOneTransformer implements Transformer +{ + + private static final Logger logger = LoggerFactory.getLogger(AllInOneTransformer.class); + + /** + * Represents the mapping between a transform and a transformer, multiple mappings can point to the same transformer. + */ + private Map transformerTransformMapping = new HashMap(); + + public AllInOneTransformer() + { + // TODO - use observer style registration? + try + { + this.registerTransformer(new MiscAdapter()); + this.registerTransformer(new TikaAdapter()); + } catch (Exception e) + { + // rethrow as runtime exception, nothing else cane be done + throw new RuntimeException("Failed to register all transformers.", e); + } + } + + /** + * The registration will go through all supported sub transformers and map them to the transformer implementation. + * + * @param transformer The transformer implementation, + * this could be a transformer managing multiple sub transformers. + * @throws Exception Exception is thrown if a mapping for a transformer name already exists. + */ + public void registerTransformer(Transformer transformer) throws Exception + { + for (org.alfresco.transform.client.model.config.Transformer transformerConfig + : transformer.getTransformConfig().getTransformers()) + { + String transformerName = transformerConfig.getTransformerName(); + if (transformerTransformMapping.containsKey(transformerName)) + { + throw new Exception("Transformer name " + transformerName + " is already registered."); + } + + transformerTransformMapping.put(transformerName, transformer); + logger.debug("Registered transformer with name: '" + transformerName + "' "); + } + } + + @Override + public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype, + Map transformOptions) throws Exception + { + String transformName = transformOptions.get(TRANSFORM_NAME_PARAMETER); + Transformer transformer = transformerTransformMapping.get(transformName); + + if(transformer == null) + { + throw new Exception("No transformer mapping for : transform:" + transformName + " sourceMimetype:" + + sourceMimetype + " targetMimetype:" + targetMimetype); + } + + if (logger.isDebugEnabled()) + { + logger.debug("Performing transform '" + transformName +"' using "+ transformer.getClass().getSimpleName()); + } + transformer.transform(sourceFile, targetFile, sourceMimetype, targetMimetype, transformOptions); + } + + @Override + public TransformConfig getTransformConfig() + { + + // Merge the config for all sub transformers + List transformerConfigs = new LinkedList<>(); + Map> transformOptions = new HashMap<>(); + { + for (Transformer transformer: transformerTransformMapping.values().stream().distinct().collect(Collectors.toList()) ) + { + transformerConfigs.addAll(transformer.getTransformConfig().getTransformers()); + transformOptions.putAll(transformer.getTransformConfig().getTransformOptions()); + } + } + + TransformConfig allInOneConfig = new TransformConfig(); + allInOneConfig.setTransformers(transformerConfigs); + allInOneConfig.setTransformOptions(transformOptions); + + return allInOneConfig; + } +} diff --git a/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/MiscAdapter.java b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/MiscAdapter.java new file mode 100644 index 00000000..375e11d0 --- /dev/null +++ b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/MiscAdapter.java @@ -0,0 +1,59 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.transformers; + +import java.io.File; +import java.util.Map; + +public class MiscAdapter extends AbstractTransformer +{ + private SelectingTransformer miscSelectingTransformer; + + + public MiscAdapter() throws Exception + { + super(); + miscSelectingTransformer = new SelectingTransformer(); + } + + @Override + public String getTransformerConfigPrefix() + { + return "misc"; + } + + @Override + public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype, Map transformOptions) throws Exception + { + String transformerName = transformOptions.get(TRANSFORM_NAME_PARAMETER); + miscSelectingTransformer.transform(transformerName, sourceFile, targetFile, + sourceMimetype, targetMimetype, transformOptions); + + } +} + diff --git a/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/TikaAdapter.java b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/TikaAdapter.java new file mode 100644 index 00000000..327159e8 --- /dev/null +++ b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/TikaAdapter.java @@ -0,0 +1,73 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.transformers; + +import org.alfresco.transformer.executors.TikaJavaExecutor; + +import java.io.File; +import java.util.Map; + +import static java.lang.Boolean.parseBoolean; +import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS; +import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT; +import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING; +import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE; + +public class TikaAdapter extends AbstractTransformer +{ + private static final String CONFIG_PREFIX = "tika"; + private TikaJavaExecutor tikaJavaExecutor; + + public TikaAdapter() throws Exception + { + super(); + tikaJavaExecutor = new TikaJavaExecutor(); + } + + @Override + String getTransformerConfigPrefix() + { + return CONFIG_PREFIX; + } + + @Override + public void transform(File sourceFile, File targetFile, String sourceMimetype, String targetMimetype, Map transformOptions) throws Exception + { + final String transform = transformOptions.get(TRANSFORM_NAME_PARAMETER); + + final boolean includeContents = parseBoolean( + transformOptions.getOrDefault("includeContents", "false")); + final boolean notExtractBookmarksText = parseBoolean( + transformOptions.getOrDefault("notExtractBookmarksText", "false")); + final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8"); + + tikaJavaExecutor.call(sourceFile, targetFile, transform, + includeContents ? INCLUDE_CONTENTS : null, + notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null, + TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding); + } +} diff --git a/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/Transformer.java b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/Transformer.java new file mode 100644 index 00000000..0233f2fc --- /dev/null +++ b/alfresco-transform-aio/alfresco-transform-aio/src/main/java/org/alfresco/transformer/transformers/Transformer.java @@ -0,0 +1,77 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.transformers; + + +import org.alfresco.transform.client.model.config.TransformConfig; + +import java.io.File; +import java.util.Map; + + +/** + * Interface for transformers which can perform transformations and specify their own supported configuration. + * + * TODO - This could be implemented by each individual Transform engine in its own module + * and used by controllers for simplicity and clarity. Controllers could be made generic + * + * @author eknizat + */ +public interface Transformer +{ + /** + * Controllers pass this as an additional parameter.. + */ + String TRANSFORM_NAME_PARAMETER = "alfresco.transform-name-parameter"; + + /** + * Implementation of the actual transformation. + * + * + * TODO - Do we really need the sourceMimetype and targetMimetype as separate arguments? + * they could be passed in parameters with predefined keys like TRANSFORM_NAME_PARAMETER + * + * @param sourceFile + * @param targetFile + * @param transformOptions + * @throws Exception + */ + void transform(File sourceFile, File targetFile, String sourceMimetype, + String targetMimetype, Map transformOptions) throws Exception; + + + /** + * @return Supported config for the transformer implementation. + * + * TODO - maybe this does not have to be part of the common transform interface? + * + */ + TransformConfig getTransformConfig(); + +} + + diff --git a/alfresco-transform-aio/alfresco-transform-aio/src/test/java/org/alfresco/transformer/transformers/AllInOneTransformerTest.java b/alfresco-transform-aio/alfresco-transform-aio/src/test/java/org/alfresco/transformer/transformers/AllInOneTransformerTest.java new file mode 100644 index 00000000..db5ed1b6 --- /dev/null +++ b/alfresco-transform-aio/alfresco-transform-aio/src/test/java/org/alfresco/transformer/transformers/AllInOneTransformerTest.java @@ -0,0 +1,247 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.transformers; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; +import org.junit.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.io.StringWriter; +import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; + +import static org.alfresco.transformer.transformers.TextToPdfContentTransformer.PAGE_LIMIT; +import static org.alfresco.transformer.transformers.Transformer.TRANSFORM_NAME_PARAMETER; +import static org.junit.Assert.*; + +public class AllInOneTransformerTest +{ + private static final String SOURCE_MIMETYPE = "text/html"; + private static final String TARGET_MIMETYPE = "text/plain"; + String SOURCE_ENCODING = "sourceEncoding"; + String TARGET_ENCODING = "targetEncoding"; + + AllInOneTransformer transformer = new AllInOneTransformer(); + + private void writeToFile(File file, String content, String encoding) throws Exception + { + try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding)) + { + ow.append(content); + } + } + + private String readFromFile(File file, final String encoding) throws Exception + { + return new String(Files.readAllBytes(file.toPath()), encoding); + } + + @Test + public void TestConfigAggregation() + { + transformer.getTransformConfig().getTransformers().forEach(t -> {System.out.println(t); System.out.println(" **");}); + + + // check all transformers are there + // check all options are there + + } + + /// Test copied from Misc transformer - html + @Test + public void testMiscHtml() throws Exception + { + final String NEWLINE = System.getProperty("line.separator"); + final String TITLE = "Testing!"; + final String TEXT_P1 = "This is some text in English"; + final String TEXT_P2 = "This is more text in English"; + final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol"; + String partA = "" + TITLE + "" + NEWLINE; + String partB = "

" + TEXT_P1 + "

" + NEWLINE + + "

" + TEXT_P2 + "

" + NEWLINE + + "

" + TEXT_P3 + "

" + NEWLINE; + String partC = ""; + final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE; + + File tmpS = null; + File tmpD = null; + + try + { + // Content set to ISO 8859-1 + tmpS = File.createTempFile("AlfrescoTestSource_", ".html"); + writeToFile(tmpS, partA + partB + partC, "ISO-8859-1"); + + tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt"); + + Map parameters = new HashMap<>(); + parameters.put(SOURCE_ENCODING, "ISO-8859-1"); + parameters.put(TRANSFORM_NAME_PARAMETER, "html"); + transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters); + + assertEquals(expected, readFromFile(tmpD, "UTF-8")); + tmpS.delete(); + tmpD.delete(); + + // Content set to UTF-8 + tmpS = File.createTempFile("AlfrescoTestSource_", ".html"); + writeToFile(tmpS, partA + partB + partC, "UTF-8"); + + tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt"); + parameters = new HashMap<>(); + parameters.put(TRANSFORM_NAME_PARAMETER, "html"); + parameters.put(SOURCE_ENCODING, "UTF-8"); + transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters); + assertEquals(expected, readFromFile(tmpD, "UTF-8")); + tmpS.delete(); + tmpD.delete(); + + // Content set to UTF-16 + tmpS = File.createTempFile("AlfrescoTestSource_", ".html"); + writeToFile(tmpS, partA + partB + partC, "UTF-16"); + + tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt"); + parameters = new HashMap<>(); + parameters.put(TRANSFORM_NAME_PARAMETER, "html"); + parameters.put(SOURCE_ENCODING, "UTF-16"); + transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters); + assertEquals(expected, readFromFile(tmpD, "UTF-8")); + tmpS.delete(); + tmpD.delete(); + + // Note - since HTML Parser 2.0 META tags specifying the + // document encoding will ONLY be respected if the original + // content type was set to ISO-8859-1. + // + // This means there is now only one test which we can perform + // to ensure that this now-limited overriding of the encoding + // takes effect. + + // Content set to ISO 8859-1, meta set to UTF-8 + tmpS = File.createTempFile("AlfrescoTestSource_", ".html"); + String str = partA + + "" + + partB + partC; + + writeToFile(tmpS, str, "UTF-8"); + + tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt"); + + parameters = new HashMap<>(); + parameters.put(TRANSFORM_NAME_PARAMETER, "html"); + parameters.put(SOURCE_ENCODING, "ISO-8859-1"); + transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters); + assertEquals(expected, readFromFile(tmpD, "UTF-8")); + tmpS.delete(); + tmpD.delete(); + + // Note - we can't test UTF-16 with only a meta encoding, + // because without that the parser won't know about the + // 2 byte format so won't be able to identify the meta tag + } + finally + { + if (tmpS != null && tmpS.exists()) tmpS.delete(); + if (tmpD != null && tmpD.exists()) tmpD.delete(); + } + } + + /// Test copied from Misc transformer - pdf + @Test + public void testMiscPdf() throws Exception + { + transformTextAndCheckPageLength(-1); + } + + private void transformTextAndCheckPageLength(int pageLimit) throws Exception + { + int pageLength = 32; + int lines = (pageLength + 10) * ((pageLimit > 0) ? pageLimit : 1); + StringBuilder sb = new StringBuilder(); + String checkText = null; + int cutoff = pageLimit * pageLength; + for (int i = 1; i <= lines; i++) + { + sb.append(i); + sb.append(" I must not talk in class or feed my homework to my cat.\n"); + if (i == cutoff) + checkText = sb.toString(); + } + sb.append("\nBart\n"); + String text = sb.toString(); + checkText = (checkText == null) ? clean(text) : clean(checkText); + transformTextAndCheck(text, "UTF-8", checkText, String.valueOf(pageLimit)); + } + + private void transformTextAndCheck(String text, String encoding, String checkText, + String pageLimit) throws Exception + { + // Get a reader for the text + File sourceFile = File.createTempFile("AlfrescoTestSource_", ".txt"); + writeToFile(sourceFile, text, encoding); + + // And a temp writer + File targetFile = File.createTempFile("AlfrescoTestTarget_", ".pdf"); + + // Transform to PDF + Map parameters = new HashMap<>(); + parameters.put(TRANSFORM_NAME_PARAMETER, "textToPdf"); + parameters.put(PAGE_LIMIT, pageLimit); + transformer.transform(sourceFile, targetFile, "text/plain", "application/pdf", parameters); + + // Read back in the PDF and check it + PDDocument doc = PDDocument.load(targetFile); + PDFTextStripper textStripper = new PDFTextStripper(); + StringWriter textWriter = new StringWriter(); + textStripper.writeText(doc, textWriter); + doc.close(); + + String roundTrip = clean(textWriter.toString()); + + assertEquals( + "Incorrect text in PDF when starting from text in " + encoding, + checkText, roundTrip + ); + + sourceFile.delete(); + targetFile.delete(); + } + + private String clean(String text) + { + text = text.replaceAll("\\s+\\r", ""); + text = text.replaceAll("\\s+\\n", ""); + text = text.replaceAll("\\r", ""); + text = text.replaceAll("\\n", ""); + return text; + } + +} \ No newline at end of file diff --git a/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json b/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json new file mode 100644 index 00000000..5948332f --- /dev/null +++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json @@ -0,0 +1,80 @@ +{ + "transformOptions": { + "textToPdfOptions": [ + {"value": {"name": "pageLimit"}}, + {"value": {"name": "sourceEncoding"}} + ], + "stringOptions": [ + {"value": {"name": "sourceEncoding"}}, + {"value": {"name": "targetEncoding"}} + ], + "htmlOptions": [ + {"value": {"name": "sourceEncoding"}} + ] + }, + "transformers": [ + { + "transformerName": "html", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "text/html", "targetMediaType": "text/plain"} + ], + "transformOptions": [ + "htmlOptions" + ] + }, + { + "transformerName": "string", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "text/plain", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/mediawiki", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/css", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/csv", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/xml", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/html", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/richtext", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/sgml", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/tab-separated-values", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/x-setext", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/x-java-source", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/x-jsp", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/x-markdown", "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/calendar", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-javascript", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/dita+xml", "targetMediaType": "text/plain"} + ], + "transformOptions": [ + "stringOptions" + ] + }, + { + "transformerName": "appleIWorks", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "image/jpeg"}, + {"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "image/jpeg"}, + {"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "image/jpeg"} + ], + "transformOptions": [ + ] + }, + { + "transformerName": "textToPdf", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "application/pdf"}, + {"sourceMediaType": "text/csv", "targetMediaType": "application/pdf"}, + {"sourceMediaType": "application/dita+xml", "targetMediaType": "application/pdf"}, + {"sourceMediaType": "text/xml", "targetMediaType": "application/pdf"} + ], + "transformOptions": [ + "textToPdfOptions" + ] + }, + { + "transformerName": "rfc822", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "message/rfc822", "targetMediaType": "text/plain"} + ], + "transformOptions": [ + ] + } + ] +} \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json new file mode 100644 index 00000000..ddf79787 --- /dev/null +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json @@ -0,0 +1,508 @@ +{ + "transformOptions": { + "tikaOptions": [ + {"value": {"name": "targetEncoding"}} + ], + "archiveOptions": [ + {"value": {"name": "includeContents"}}, + {"value": {"name": "targetEncoding"}} + ], + "pdfboxOptions": [ + {"value": {"name": "notExtractBookmarksText"}}, + {"value": {"name": "targetEncoding"}} + ] + }, + "transformers": [ + { + "transformerName": "Archive", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/x-cpio", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-cpio", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-cpio", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-cpio", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/java-archive", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/java-archive", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/java-archive", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/java-archive", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/x-tar", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-tar", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-tar", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-tar", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/zip", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/zip", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/zip", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/zip", "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "archiveOptions" + ] + }, + { + "transformerName": "OutlookMsg", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "tikaOptions" + ] + }, + { + "transformerName": "PdfBox", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/pdf", "targetMediaType": "text/csv"}, + {"sourceMediaType": "application/pdf", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/pdf", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/pdf", "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "pdfboxOptions" + ] + }, + { + "transformerName": "Office", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "tikaOptions" + ] + }, + { + "transformerName": "Poi", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/csv"}, + {"sourceMediaType": "application/vnd.ms-excel", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/csv"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 65, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "tikaOptions" + ] + }, + { + "transformerName": "OOXML", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 60, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "tikaOptions" + ] + }, + { + "transformerName": "TikaAuto", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-netcdf", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document" , "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/x-gzip", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-gzip", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-gzip", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-gzip", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/x-hdf", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-hdf", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-hdf", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-hdf", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "text/html", "targetMediaType": "text/html"}, + {"sourceMediaType": "text/html", "priority": 60, "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/html", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "text/html", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "text/x-java-source", "targetMediaType": "text/html"}, + {"sourceMediaType": "text/x-java-source", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/x-java-source", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "text/x-java-source", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.text", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/ogg", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/ogg", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/ogg", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/ogg", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/rss+xml", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/rss+xml", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/rss+xml", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/rss+xml", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/rtf", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/rtf", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/rtf", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/rtf", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.sun.xml.writer", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/xml"}, + + {"sourceMediaType": "text/xml", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "text/xml", "priority": 55, "targetMediaType": "text/plain"}, + {"sourceMediaType": "text/xml", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "text/xml", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.visio", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/xhtml+xml", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "text/xml"}, + + {"sourceMediaType": "application/x-compress", "targetMediaType": "text/html"}, + {"sourceMediaType": "application/x-compress", "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/x-compress", "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/x-compress", "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "tikaOptions" + ] + }, + { + "transformerName": "TextMining", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "text/html"}, + {"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "text/plain"}, + {"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "application/xhtml+xml"}, + {"sourceMediaType": "application/msword", "targetMediaType": "text/xml"} + ], + "transformOptions": [ + "tikaOptions" + ] + } + ] +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 59dda0b1..d997a0c6 100644 --- a/pom.xml +++ b/pom.xml @@ -42,6 +42,7 @@ alfresco-transform-imagemagick/alfresco-transform-imagemagick-boot alfresco-transform-misc/alfresco-transform-misc alfresco-transform-misc/alfresco-transform-misc-boot + alfresco-transform-aio/alfresco-transform-aio