ATS-671: Split engines into fat & skinny modules (ATS-674) (#192)

Each transform engine project has been separated into 2 modules so that an executable and non-executable jar can be created. 
Modules have been renamed such that *docker* has been removed from the artifactIds and project names.

Co-authored-by: Erik Knizat <erik.knizat@alfresco.com>
Co-authored-by: David Edwards <david.edwards@alfresco.com>
This commit is contained in:
eknizat
2020-03-27 13:45:15 +00:00
committed by GitHub
parent 46b2e6df5b
commit 3bed6930bf
215 changed files with 539 additions and 157 deletions

View File

@@ -0,0 +1 @@
target/docker/

View File

@@ -0,0 +1,34 @@
# Image provides a container in which to run Tika transformations for Alfresco Content Services.
# Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0.
FROM alfresco/alfresco-base-java:11.0.1-openjdk-centos-7-72b88c6f1f4c
ENV APACHE_LICENSE_FILE=https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/Apache%202.0.txt
ENV JAVA_OPTS=""
# Set default user information
ARG GROUPNAME=Alfresco
ARG GROUPID=1000
ARG TIKAUSERNAME=tika
ARG USERID=33004
COPY target/${env.project_artifactId}-${env.project_version}.jar /usr/bin
RUN ln /usr/bin/${env.project_artifactId}-${env.project_version}.jar /usr/bin/${env.project_artifactId}.jar && \
curl -s -S $APACHE_LICENSE_FILE -o Apache\ 2.0.txt && \
yum clean all
ADD target/generated-resources/licenses /licenses
ADD target/generated-resources/licenses.xml /licenses/
ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/
RUN groupadd -g ${GROUPID} ${GROUPNAME} && \
useradd -u ${USERID} -G ${GROUPNAME} ${TIKAUSERNAME} && \
chgrp -R ${GROUPNAME} /usr/bin/${env.project_artifactId}.jar
EXPOSE 8090
USER ${TIKAUSERNAME}
ENTRYPOINT java $JAVA_OPTS -jar /usr/bin/${env.project_artifactId}.jar

View File

@@ -0,0 +1,360 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>alfresco-transform-tika-boot</artifactId>
<name>Alfresco Tika Transformer Spring Boot</name>
<packaging>jar</packaging>
<parent>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transform-core</artifactId>
<version>2.2.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<properties>
<image.name>alfresco/alfresco-tika</image.name>
<image.registry>quay.io</image.registry>
<env.project_artifactId>${project.artifactId}</env.project_artifactId>
</properties>
<dependencies>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transform-tika</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>com.vaadin.external.google</groupId>
<artifactId>android-json</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.dom4j</groupId>
<artifactId>dom4j</artifactId>
</dependency>
<!-- Tika -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.21-20190624-alfresco-patched</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.21-20190624-alfresco-patched</version>
<exclusions>
<exclusion>
<groupId>com.tdunning</groupId>
<artifactId>json</artifactId>
</exclusion>
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
</exclusion>
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcmail-jdk15on</artifactId>
</exclusion>
<!-- TODO ATS-534 check transformations not affected by this missing quartz lib -->
<exclusion>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- for Apache Tika Parsers - eg. encrypted PDF -->
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
<version>1.64</version>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcmail-jdk15on</artifactId>
<version>1.64</version>
</dependency>
<!-- Apache POI -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<!-- Apache PDFBox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>license-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>docker-it-setup</id>
<!-- raises an ActiveMq container for the Integration Tests -->
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
<configuration>
<images>
<image>
<alias>activemq</alias>
<name>alfresco/alfresco-activemq:5.15.8</name>
<run>
<hostname>activemq</hostname>
<ports>
<port>8161:8161</port>
<port>5672:5672</port>
<port>61616:61616</port>
</ports>
<wait>
<log>Apache ActiveMQ 5.15.8 .* started</log>
<time>20000</time>
<kill>500</kill>
<shutdown>100</shutdown>
<exec>
<preStop>kill 1</preStop>
<preStop>kill -9 1</preStop>
</exec>
</wait>
</run>
</image>
<image>
<alias>tika</alias>
<name>${image.name}:${image.tag}</name>
<run>
<ports>
<port>8090:8090</port>
</ports>
<wait>
<http>
<url>http://localhost:8090/transform/config</url>
<method>GET</method>
<status>200...299</status>
</http>
<time>300000</time>
<kill>500</kill>
<shutdown>100</shutdown>
<exec>
<preStop>kill 1</preStop>
<preStop>kill -9 1</preStop>
</exec>
</wait>
</run>
</image>
</images>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>local</id>
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<configuration>
<images>
<image>
<name>${image.name}:${image.tag}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
<buildOptions>
<squash>true</squash>
</buildOptions>
</build>
</image>
</images>
</configuration>
<executions>
<execution>
<id>build-image</id>
<phase>package</phase>
<goals>
<goal>build</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>internal</id>
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<configuration>
<images>
<!-- QuayIO image -->
<image>
<name>${image.name}:${image.tag}</name>
<registry>${image.registry}</registry>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
<buildOptions>
<squash>true</squash>
</buildOptions>
</build>
</image>
<!-- DockerHub image -->
<image>
<name>${image.name}:${image.tag}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
<buildOptions>
<squash>true</squash>
</buildOptions>
</build>
</image>
</images>
</configuration>
<executions>
<execution>
<id>build-image</id>
<phase>package</phase>
<goals>
<goal>build</goal>
</goals>
</execution>
<execution>
<id>push-image</id>
<phase>install</phase>
<goals>
<goal>push</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>release</id>
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<configuration combine.self="override">
<images>
<!-- QuayIO image -->
<image>
<name>${image.name}:${project.version}</name>
<registry>${image.registry}</registry>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
<buildOptions>
<squash>true</squash>
</buildOptions>
</build>
</image>
<!-- DockerHub image -->
<image>
<name>${image.name}:${project.version}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
<buildOptions>
<squash>true</squash>
</buildOptions>
</build>
</image>
</images>
</configuration>
<executions>
<execution>
<id>build-push-image</id>
<phase>deploy</phase>
<goals>
<goal>build</goal>
<goal>push</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@@ -0,0 +1,46 @@
# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
#-------------------------------------------------------------------------------
# Already used licenses in project :
# - (MIT-style) netCDF C library license
# - Apache 2.0
# - Apache License 2.0
# - Apache License v2.0
# - Apache License, Version 2.0
# - Apache License, version 2.0
# - Apache Software License - Version 2.0
# - BSD
# - BSD 3-clause New License
# - BSD License
# - Bouncy Castle Licence
# - CDDL + GPLv2 with classpath exception
# - CDDL, v1.0
# - EPL 2.0
# - Eclipse Public License - v 1.0
# - Eclipse Public License, Version 1.0
# - GNU Lesser General Public License
# - GNU Lesser General Public License, Version 2.1
# - GPL2 w/ CPE
# - LGPL, v2.1 or later
# - LGPL, version 2.1
# - MIT License
# - MIT License (MIT)
# - MIT license
# - Mozilla Public License 1.1 (MPL 1.1)
# - New BSD license
# - OGC copyright
# - Public
# - Public Domain
# - Public Domain, per Creative Commons CC0
# - Similar to Apache License but with the acknowledgment clause removed
# - Specification License
# - The Apache License, Version 2.0
# - The Apache Software License, Version 2.0
# - The BSD License
# - The MIT License
# - UnRar License
#-------------------------------------------------------------------------------
# Please fill the missing licenses for dependencies :
#
#
#Mon Aug 19 18:06:38 EEST 2019
net.jcip--jcip-annotations--1.0=Public

View File

@@ -0,0 +1,77 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
logger.info("Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}

View File

@@ -0,0 +1,189 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static java.lang.Boolean.parseBoolean;
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import java.io.File;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
/**
* Controller for the Docker based Tika transformers.
*
* Status Codes:
*
* 200 Success
* 400 Bad Request: Invalid target mimetype <mimetype>
* 400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)
* 400 Bad Request: Request parameter <name> is of the wrong type
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
* 400 Bad Request: The source filename was not supplied
* 500 Internal Server Error: (no message with low level IO problems)
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
* 500 Internal Server Error: Transformer version check exit code was not 0
* 500 Internal Server Error: Transformer version check failed to create any output
* 500 Internal Server Error: Could not read the target file
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
* 500 Internal Server Error: Filename encoding error
* 507 Insufficient Storage: Failed to store the source file
*/
@Controller
public class TikaController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(TikaController.class);
private TikaJavaExecutor javaExecutor = new TikaJavaExecutor();
@Override
public String getTransformerName()
{
return "Tika";
}
@Override
public String version()
{
return "Tika available";
}
@Override
public ProbeTestTransform getProbeTestTransform()
{
// See the Javadoc on this method and Probes.md for the choice of these values.
// the livenessPercentage is a little large as Tika does tend to suffer from slow transforms that class with a gc.
return new ProbeTestTransform(this, "quick.pdf", "quick.txt",
60, 16, 400, 10240, 60 * 30 + 1, 60 * 15 + 20)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
javaExecutor.call(sourceFile, targetFile, PDF_BOX,
TARGET_MIMETYPE + MIMETYPE_TEXT_PLAIN, TARGET_ENCODING + "UTF-8");
}
};
}
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") final MultipartFile sourceMultipartFile,
@RequestParam("sourceMimetype") final String sourceMimetype,
@RequestParam("targetExtension") final String targetExtension,
@RequestParam("targetMimetype") final String targetMimetype,
@RequestParam(value = "targetEncoding", required = false, defaultValue = "UTF-8") final String targetEncoding,
@RequestParam(value = "timeout", required = false) final Long timeout,
@RequestParam(value = "testDelay", required = false) final Long testDelay,
@RequestParam(value = "includeContents", required = false) final Boolean includeContents,
@RequestParam(value = "notExtractBookmarksText", required = false) final Boolean notExtractBookmarksText)
{
final String targetFilename = createTargetFileName(
sourceMultipartFile.getOriginalFilename(), targetExtension);
getProbeTestTransform().incrementTransformerCount();
final File sourceFile = createSourceFile(request, sourceMultipartFile);
final File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
// TODO Consider streaming the request and response rather than using temporary files
// https://www.logicbig.com/tutorials/spring-framework/spring-web-mvc/streaming-response-body.html
final Map<String, String> transformOptions = createTransformOptions(
"includeContents", includeContents,
"notExtractBookmarksText", notExtractBookmarksText,
"targetEncoding", targetEncoding);
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
javaExecutor.call(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
{
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
final boolean includeContents = parseBoolean(
transformOptions.getOrDefault("includeContents", "false"));
final boolean notExtractBookmarksText = parseBoolean(
transformOptions.getOrDefault("notExtractBookmarksText", "false"));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
javaExecutor.call(sourceFile, targetFile, transform,
includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
}
}

View File

@@ -0,0 +1,2 @@
queue:
engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:org.alfresco.transform.engine.tika.acs}

View File

@@ -0,0 +1,508 @@
{
"transformOptions": {
"tikaOptions": [
{"value": {"name": "targetEncoding"}}
],
"archiveOptions": [
{"value": {"name": "includeContents"}},
{"value": {"name": "targetEncoding"}}
],
"pdfboxOptions": [
{"value": {"name": "notExtractBookmarksText"}},
{"value": {"name": "targetEncoding"}}
]
},
"transformers": [
{
"transformerName": "Archive",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/x-cpio", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-cpio", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-cpio", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-cpio", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/java-archive", "targetMediaType": "text/html"},
{"sourceMediaType": "application/java-archive", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/java-archive", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/java-archive", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/x-tar", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-tar", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-tar", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-tar", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/zip", "targetMediaType": "text/html"},
{"sourceMediaType": "application/zip", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/zip", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/zip", "targetMediaType": "text/xml"}
],
"transformOptions": [
"archiveOptions"
]
},
{
"transformerName": "OutlookMsg",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "PdfBox",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "text/csv"},
{"sourceMediaType": "application/pdf", "targetMediaType": "text/html"},
{"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/pdf", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/pdf", "targetMediaType": "text/xml"}
],
"transformOptions": [
"pdfboxOptions"
]
},
{
"transformerName": "Office",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "Poi",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/csv"},
{"sourceMediaType": "application/vnd.ms-excel", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/csv"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 65, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "OOXML",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 60, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "TikaAuto",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document" , "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/x-gzip", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-gzip", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-gzip", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-gzip", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/x-hdf", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-hdf", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-hdf", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-hdf", "targetMediaType": "text/xml"},
{"sourceMediaType": "text/html", "targetMediaType": "text/html"},
{"sourceMediaType": "text/html", "priority": 60, "targetMediaType": "text/plain"},
{"sourceMediaType": "text/html", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "text/html", "targetMediaType": "text/xml"},
{"sourceMediaType": "text/x-java-source", "targetMediaType": "text/html"},
{"sourceMediaType": "text/x-java-source", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "text/x-java-source", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "text/x-java-source", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/ogg", "targetMediaType": "text/html"},
{"sourceMediaType": "application/ogg", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/ogg", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/ogg", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/rss+xml", "targetMediaType": "text/html"},
{"sourceMediaType": "application/rss+xml", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/rss+xml", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/rss+xml", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/rtf", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/rtf", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/rtf", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/rtf", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.sun.xml.writer", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "text/xml"},
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/xml"},
{"sourceMediaType": "text/xml", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "text/xml", "priority": 55, "targetMediaType": "text/plain"},
{"sourceMediaType": "text/xml", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "text/xml", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/html"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "TextMining",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "text/html"},
{"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/msword", "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
]
}
]
}

View File

@@ -0,0 +1,27 @@
<html xmlns:th="http://www.thymeleaf.org">
<body>
<div>
<h2>Tika Test Transformations</h2>
<form method="POST" enctype="multipart/form-data" action="/transform">
<table>
<tr><td><div style="text-align:right">file *</div></td><td><input type="file" name="file" /></td></tr>
<tr><td><div style="text-align:right">sourceMimetype *</div></td><td><input type="text" name="sourceMimetype" value="application/msword" /></td></tr>
<tr><td><div style="text-align:right">targetExtension *</div></td><td><input type="text" name="targetExtension" value="txt" /></td></tr>
<tr><td><div style="text-align:right">targetMimetype *</div></td><td><input type="text" name="targetMimetype" value="text/plain" /></td></tr>
<tr><td><div style="text-align:right">targetEncoding *</div></td><td><input type="text" name="targetEncoding" value="UTF-8" /></td></tr>
<tr><td><div style="text-align:right">includeContents (archive) *</div></td><td><input type="checkbox" name="includeContents" value="true" /></td></tr>
<tr><td><div style="text-align:right">timeout</div></td><td><input type="text" name="timeout" value="" /></td></tr>
<tr><td><div style="text-align:right">testDelay</div></td><td><input type="text" name="testDelay" value="" /></td></tr>
<tr><td><div style="text-align:right">notExtractBookmarksText</div></td><td><input type="checkbox" name="notExtractBookmarksText" value="true" /></td></tr>
<tr><td></td><td><input type="submit" value="Transform" /></td></tr>
</table>
</form>
</div>
<div>
<a href="/log">Log entries</a>
</div>
</body>
</html>

View File

@@ -0,0 +1,594 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
import static org.alfresco.transformer.executors.Tika.DOC;
import static org.alfresco.transformer.executors.Tika.DOCX;
import static org.alfresco.transformer.executors.Tika.HTML;
import static org.alfresco.transformer.executors.Tika.MSG;
import static org.alfresco.transformer.executors.Tika.OUTLOOK_MSG;
import static org.alfresco.transformer.executors.Tika.PDF;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.executors.Tika.POI;
import static org.alfresco.transformer.executors.Tika.POI_OFFICE;
import static org.alfresco.transformer.executors.Tika.POI_OO_XML;
import static org.alfresco.transformer.executors.Tika.PPTX;
import static org.alfresco.transformer.executors.Tika.TEXT_MINING;
import static org.alfresco.transformer.executors.Tika.TIKA_AUTO;
import static org.alfresco.transformer.executors.Tika.TXT;
import static org.alfresco.transformer.executors.Tika.XHTML;
import static org.alfresco.transformer.executors.Tika.XML;
import static org.alfresco.transformer.executors.Tika.XSLX;
import static org.alfresco.transformer.executors.Tika.ZIP;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_HTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PDF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_CSV;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.Mockito.when;
import static org.springframework.http.HttpHeaders.ACCEPT;
import static org.springframework.http.HttpHeaders.CONTENT_DISPOSITION;
import static org.springframework.http.HttpHeaders.CONTENT_TYPE;
import static org.springframework.http.HttpStatus.CREATED;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
import static org.springframework.http.MediaType.APPLICATION_PDF_VALUE;
import static org.springframework.http.MediaType.TEXT_PLAIN_VALUE;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.boot.test.mock.mockito.SpyBean;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
/**
* Test the TikaController without a server.
* Super class includes tests for the AbstractTransformerController.
*/
@RunWith(SpringRunner.class)
@WebMvcTest(TikaController.class)
public class TikaControllerTest extends AbstractTransformerControllerTest
{
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
"\n" +
"The quick brown fox jumps over the lazy dogs";
private static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
@Mock
private RuntimeExec.ExecutionResult mockExecutionResult;
@Mock
private RuntimeExec mockTransformCommand;
@Mock
private RuntimeExec mockCheckCommand;
@SpyBean
private TikaController controller;
private String targetEncoding = "UTF-8";
private String targetMimetype = MIMETYPE_TEXT_PLAIN;
@Before
public void before()
{
sourceExtension = "pdf";
targetExtension = "txt";
}
@Override
protected void mockTransformCommand(String sourceExtension,
String targetExtension, String sourceMimetype,
boolean readTargetFileBytes) throws IOException
{
this.sourceExtension = sourceExtension;
this.targetExtension = targetExtension;
this.sourceMimetype = sourceMimetype;
expectedOptions = null;
expectedSourceSuffix = null;
expectedSourceFileBytes = readTestFile(sourceExtension);
expectedTargetFileBytes = readTargetFileBytes ? readTestFile(targetExtension) : null;
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype,
expectedSourceFileBytes);
when(mockTransformCommand.execute(any(), anyLong())).thenAnswer(
(Answer<RuntimeExec.ExecutionResult>) invocation -> {
Map<String, String> actualProperties = invocation.getArgument(0);
assertEquals("There should be 3 properties", 3, actualProperties.size());
String actualOptions = actualProperties.get("options");
String actualSource = actualProperties.get("source");
String actualTarget = actualProperties.get("target");
String actualTargetExtension = getFilenameExtension(actualTarget);
assertNotNull(actualSource);
assertNotNull(actualTarget);
if (expectedSourceSuffix != null)
{
assertTrue(
"The source file \"" + actualSource + "\" should have ended in \"" + expectedSourceSuffix + "\"",
actualSource.endsWith(expectedSourceSuffix));
actualSource = actualSource.substring(0,
actualSource.length() - expectedSourceSuffix.length());
}
assertNotNull(actualOptions);
if (expectedOptions != null)
{
assertEquals("expectedOptions", expectedOptions, actualOptions);
}
Long actualTimeout = invocation.getArgument(1);
assertNotNull(actualTimeout);
if (expectedTimeout != null)
{
assertEquals("expectedTimeout", expectedTimeout, actualTimeout);
}
// Copy a test file into the target file location if it exists
int i = actualTarget.lastIndexOf('_');
if (i >= 0)
{
String testFilename = actualTarget.substring(i + 1);
File testFile = getTestFile(testFilename, false);
File targetFile = new File(actualTarget);
generateTargetFileFromResourceFile(actualTargetExtension, testFile,
targetFile);
}
// Check the supplied source file has not been changed.
byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
assertArrayEquals("Source file is not the same", expectedSourceFileBytes,
actualSourceFileBytes);
return mockExecutionResult;
});
when(mockExecutionResult.getExitValue()).thenReturn(0);
when(mockExecutionResult.getStdErr()).thenReturn("STDERROR");
when(mockExecutionResult.getStdOut()).thenReturn("STDOUT");
}
@Override
protected AbstractTransformerController getController()
{
return controller;
}
private void transform(String transform, String sourceExtension, String targetExtension,
String sourceMimetype, String targetMimetype,
Boolean includeContents, String expectedContentContains) throws Exception
{
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
this.targetMimetype = targetMimetype;
System.out.println("Test " + transform + " " + sourceExtension + " to " + targetExtension);
MockHttpServletRequestBuilder requestBuilder = includeContents == null
? mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension)
: mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension, "includeContents", includeContents.toString());
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + this.targetExtension)).
andReturn();
String content = result.getResponse().getContentAsString();
assertTrue("The content did not include \"" + expectedContentContains,
content.contains(expectedContentContains));
}
@Override
// Add extra required parameters to the request.
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
return super.mockMvcRequest(url, sourceFile, params)
.param("targetEncoding", targetEncoding)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
}
@Test
@Override
public void simpleTransformTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.simpleTransformTest();
}
@Test
@Override
public void testDelayTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.testDelayTest();
}
@Test
@Override
public void noTargetFileTest()
{
// Ignore the test in super class as the Tika transforms are real rather than mocked up.
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
}
// --- Super class tests (need modified setup) ---
@Test
@Override
public void dotDotSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.dotDotSourceFilenameTest();
}
@Test
@Override
public void noExtensionSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.noExtensionSourceFilenameTest();
}
@Test
@Override
public void badSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.badSourceFilenameTest();
}
@Test
@Override
public void blankSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.blankSourceFilenameTest();
}
@Test
@Override
public void noTargetExtensionTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.noTargetExtensionTest();
}
@Test
@Override
public void calculateMaxTime() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.calculateMaxTime();
}
// --- General Tika tests ---
@Test
public void badEncodingTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
targetEncoding = "rubbish";
mockMvc.perform(
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
}
// --- Archive ---
@Test
public void zipToTextArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, false,
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n");
}
@Test
public void zipToTextIncludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, true,
"quick.html\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog\n" +
"\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog" +
"\n" +
"\n");
}
@Test
public void zipToTextExcludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
false, "\n" +
"folder/subfolder/quick.jpg\n" +
"\n" +
"\n" +
"quick.doc\n" +
"\n" +
"\n" +
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"quick.txt\n" +
"\n" +
"\n" +
"quick.xml\n" +
"\n");
}
// --- OutlookMsg ---
@Test
public void msgToTxtOutlookMsgTest() throws Exception
{
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_MSG_CONTENT_CONTAINS);
}
// --- PdfBox ---
@Test
public void pdfToTxtPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pdfToCsvPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null,
EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
}
@Test
public void pdfToXmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null,
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
@Test
public void pdfToXhtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null,
EXPECTED_XHTML_CONTENT_CONTAINS);
}
@Test
public void pdfToHtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null,
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
// --- Office ---
@Test
public void msgToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_MSG_CONTENT_CONTAINS);
}
@Test
public void docToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- Poi ---
@Test
public void xslxToCsvPoiTest() throws Exception
{
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null,
EXPECTED_CSV_CONTENT_CONTAINS);
}
// --- OOXML ---
@Test
public void docxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pptxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TikaAuto ---
@Test
public void ppxtToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void doctToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TextMining ---
@Test
public void docToTxtTextMiningTest() throws Exception
{
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pdfToTxtExtractBookmarksTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
mockMvc.perform(
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param(
"notExtractBookmarksText", "true"))
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension));
}
@Override
protected void updateTransformRequestWithSpecificOptions(TransformRequest transformRequest)
{
transformRequest.setSourceExtension(sourceExtension);
transformRequest.setTargetExtension(targetExtension);
transformRequest.setSourceMediaType(APPLICATION_PDF_VALUE);
transformRequest.setTargetMediaType(TEXT_PLAIN_VALUE);
transformRequest.getTransformRequestOptions().put("targetEncoding", "UTF-8");
}
@Test
public void testPojoTransform() throws Exception
{
// Files
String sourceFileRef = UUID.randomUUID().toString();
File sourceFile = getTestFile("quick." + sourceExtension, true);
String targetFileRef = UUID.randomUUID().toString();
// Transformation Request POJO
TransformRequest transformRequest = new TransformRequest();
transformRequest.setRequestId("1");
transformRequest.setSchema(1);
transformRequest.setClientData("Alfresco Digital Business Platform");
transformRequest.setTransformRequestOptions(new HashMap<>());
transformRequest.setSourceReference(sourceFileRef);
transformRequest.setSourceExtension(sourceExtension);
transformRequest.setSourceSize(sourceFile.length());
transformRequest.setTargetExtension(targetExtension);
transformRequest.setSourceMediaType(sourceMimetype);
// HTTP Request
HttpHeaders headers = new HttpHeaders();
headers.set(CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
sourceFile), headers, OK);
when(alfrescoSharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
when(alfrescoSharedFileStoreClient.saveFile(any()))
.thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
when(mockExecutionResult.getExitValue()).thenReturn(0);
// Update the Transformation Request with any specific params before sending it
updateTransformRequestWithSpecificOptions(transformRequest);
// Serialize and call the transformer
String tr = objectMapper.writeValueAsString(transformRequest);
String transformationReplyAsString = mockMvc
.perform(MockMvcRequestBuilders
.post("/transform")
.header(ACCEPT, APPLICATION_JSON_VALUE)
.header(CONTENT_TYPE, APPLICATION_JSON_VALUE)
.content(tr))
.andExpect(status().is(CREATED.value()))
.andReturn().getResponse().getContentAsString();
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,
TransformReply.class);
// Assert the reply
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
assertEquals(transformRequest.getClientData(), transformReply.getClientData());
assertEquals(transformRequest.getSchema(), transformReply.getSchema());
}
}

View File

@@ -0,0 +1,76 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
import org.springframework.core.io.ClassPathResource;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.util.LinkedMultiValueMap;
/**
* Tests TikaController with a server test harness.
*/
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
public class TikaHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "Tika";
}
@Override
protected String getSourceExtension()
{
return "pdf";
}
// Override method as Tika requires sourceMimetype
// If not provided then sourceMimetype request parameter error will be thrown.
@Override
protected void assertTransformError(boolean addFile, String errorMessage)
{
LinkedMultiValueMap<String, Object> parameters = new LinkedMultiValueMap<>();
if (addFile)
{
parameters.add("file", new ClassPathResource("quick." + getSourceExtension()));
}
parameters.add("sourceMimetype", "application/pdf");
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MULTIPART_FORM_DATA);
HttpEntity<LinkedMultiValueMap<String, Object>> entity = new HttpEntity<>(parameters,
headers);
super.sendTranformationRequest(entity, errorMessage);
}
}

View File

@@ -0,0 +1,62 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
/**
* @author Lucian Tuca
* created on 15/01/2019
*/
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class TikaQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_OPENXML_WORDPROCESSING)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L).build();
}
}

View File

@@ -0,0 +1,174 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static java.text.MessageFormat.format;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.toSet;
import static org.alfresco.transformer.EngineClient.sendTRequest;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.springframework.http.HttpStatus.OK;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.Triple;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import com.google.common.collect.ImmutableMap;
/**
* @author Cezar Leahu
*/
@RunWith(Parameterized.class)
public class TikaTransformationIT
{
private static final Logger logger = LoggerFactory.getLogger(TikaTransformationIT.class);
private static final String ENGINE_URL = "http://localhost:8090";
private static final Map<String, String> extensionMimetype = ImmutableMap.of(
"html", "text/html",
"txt", "text/plain",
"xhtml", "application/xhtml+xml",
"xml", "text/xml");
private final String sourceFile;
private final String targetExtension;
private final String targetMimetype;
private final String sourceMimetype;
public TikaTransformationIT(final Triple<String, String, String> entry)
{
sourceFile = entry.getLeft();
targetExtension = entry.getMiddle();
targetMimetype = extensionMimetype.get(entry.getMiddle());
sourceMimetype = entry.getRight();
}
// TODO unit tests for the following file types (for which is difficult to find file samples):
// *.ogx (application/ogg)
// *.cpio (application/x-cpio)
// *.cdf (application/x-netcdf)
// *.hdf (application/x-hdf)
@Parameterized.Parameters
public static Set<Triple<String, String, String>> engineTransformations()
{
return Stream
.of(
allTargets("quick.doc", "application/msword"),
allTargets("quick.docx",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
allTargets("quick.html", "text/html"),
allTargets("quick.jar", "application/java-archive"),
allTargets("quick.java", "text/x-java-source"),
Stream.of(
Triple.of("quick.key", "html", "application/vnd.apple.keynote"),
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
// Triple.of("quick.key", "txt", "TikaAuto"),
Triple.of("quick.key", "xhtml", "application/vnd.apple.keynote"),
Triple.of("quick.key", "xml", "application/vnd.apple.keynote")
),
allTargets("quick.msg", "application/vnd.ms-outlook"),
Stream.of(
Triple.of("quick.numbers", "html", "application/vnd.apple.numbers"),
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
// Triple.of("quick.numbers", "txt", "TikaAuto"),
Triple.of("quick.numbers", "xhtml", "application/vnd.apple.numbers"),
Triple.of("quick.numbers", "xml", "application/vnd.apple.numbers")
),
allTargets("quick.odp", "application/vnd.oasis.opendocument.presentation"),
allTargets("quick.ods", "application/vnd.oasis.opendocument.spreadsheet"),
allTargets("quick.odt", "application/vnd.oasis.opendocument.text"),
allTargets("quick.otp", "application/vnd.oasis.opendocument.presentation-template"),
allTargets("quick.ots", "application/vnd.oasis.opendocument.spreadsheet-template"),
allTargets("quick.ott", "application/vnd.oasis.opendocument.text-template"),
Stream.of(
Triple.of("quick.pages", "html", "application/vnd.apple.pages"),
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
// Triple.of("quick.pages", "txt", "TikaAuto"),
Triple.of("quick.pages", "xhtml", "application/vnd.apple.pages"),
Triple.of("quick.pages", "xml", "application/vnd.apple.pages")
),
allTargets("quick.pdf", "application/pdf"),
allTargets("quick.ppt", "application/vnd.ms-powerpoint"),
allTargets("quick.pptx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
allTargets("quick.sxw", "application/vnd.sun.xml.writer"),
allTargets("quick.txt", "text/plain"),
allTargets("quick.vsd", "application/vnd.visio"),
allTargets("quick.xls", "application/vnd.ms-excel"),
allTargets("quick.xslx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
allTargets("quick.zip", "application/zip"),
allTargets("quick.tar", "application/x-tar"),
allTargets("sample.rtf", "application/rtf"),
allTargets("quick.xml", "text/xml"),
allTargets("sample.xhtml.txt", "application/xhtml+xml"),
allTargets("sample.rss", "application/rss+xml"),
//allTargets("quick.rar", "application/x-rar-compressed"),
allTargets("quick.tar.gz", "application/x-gzip"))
.flatMap(identity())
.collect(toSet());
}
@Test
public void testTransformation()
{
final String descriptor = format("Transform ({0} -> {1}, {2}, sourceMimetype={3})",
sourceFile, targetMimetype, targetExtension, sourceMimetype);
try
{
final ResponseEntity<Resource> response = sendTRequest(ENGINE_URL, sourceFile, null,
targetMimetype, targetExtension, ImmutableMap.of(
"targetEncoding", "UTF-8",
"sourceMimetype", sourceMimetype));
assertEquals(descriptor, OK, response.getStatusCode());
}
catch (Exception e)
{
fail(descriptor + " exception: " + e.getMessage());
}
}
private static Stream<Triple<String, String, String>> allTargets(final String sourceFile,
final String sourceMimetype)
{
return extensionMimetype
.keySet()
.stream()
.map(k -> Triple.of(sourceFile, k, sourceMimetype));
}
}

View File

@@ -0,0 +1,22 @@
{
"transformOptions": {
"engineXOptions": [
{"value": {"name": "page"}},
{"value": {"name": "width"}},
{"group": {"transformOptions": [
{"value": {"name": "cropGravity"}}
]}}
]
},
"transformers": [
{
"transformerName": "engineX",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
],
"transformOptions": [
"engineXOptions"
]
}
]
}

View File

@@ -0,0 +1,10 @@
{
"transformOptions": {},
"transformers": [
{
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
]
}
]
}

View File

@@ -0,0 +1,10 @@
{
"transformers": [
{
"transformerName": "engineX",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
]
}
]
}

View File

@@ -0,0 +1,26 @@
{
"transformOptions": {
"engineXOptions": [
{"value": {"name": "page"}},
{"value": {"name": "page"}},
{"value": {"name": "width"}},
{"group": {"transformOptions": [
{"value": {"name": "cropGravity"}}
]}}
]
},
"transformers": [
{
"transformerName": "engineX",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" },
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" },
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
],
"transformOptions": [
"engineXOptions",
"engineXOptions"
]
}
]
}

View File

@@ -0,0 +1,17 @@
<html>
<head>
<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
<title>The quick brown fox jumps over the lazy dog</title>
<meta name="author" content="Nevin Nollop">
<meta name="keywords" content="Pangram, fox, dog">
<meta name="description" content="Gym class featuring a brown fox and lazy dog">
</head>
<body lang=EN-US>
The quick brown fox jumps over the lazy dog
</body>
</html>

View File

@@ -0,0 +1,31 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
public class quick
{
}

View File

@@ -0,0 +1,8 @@
The quick brown fox jumps over the lazy dog
Blank Page

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<document>
<text>The quick brown fox jumps over the lazy dog</text>
</document>

View File

@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>W3Schools Home Page</title>
<link>https://www.w3schools.com</link>
<description>Free web building tutorials</description>
<item>
<title>RSS Tutorial</title>
<link>https://www.w3schools.com/xml/xml_rss.asp</link>
<description>New RSS tutorial on W3Schools</description>
</item>
<item>
<title>XML Tutorial</title>
<link>https://www.w3schools.com/xml</link>
<description>New XML tutorial on W3Schools</description>
</item>
</channel>
</rss>

View File

@@ -0,0 +1,214 @@
{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff31507\deff0\stshfdbch31506\stshfloch31506\stshfhich31506\stshfbi31507\deflang1033\deflangfe1033\themelang1048\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}
{\f37\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0302020204030204}Calibri Light;}
{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}
{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f42\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f43\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
{\f45\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f46\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f47\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f48\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
{\f49\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f50\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f412\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\f413\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}
{\f415\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\f416\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}{\f417\fbidi \fswiss\fcharset177\fprq2 Calibri (Hebrew);}{\f418\fbidi \fswiss\fcharset178\fprq2 Calibri (Arabic);}
{\f419\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\f420\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
{\fhimajor\f31528\fbidi \fswiss\fcharset238\fprq2 Calibri Light CE;}{\fhimajor\f31529\fbidi \fswiss\fcharset204\fprq2 Calibri Light Cyr;}{\fhimajor\f31531\fbidi \fswiss\fcharset161\fprq2 Calibri Light Greek;}
{\fhimajor\f31532\fbidi \fswiss\fcharset162\fprq2 Calibri Light Tur;}{\fhimajor\f31533\fbidi \fswiss\fcharset177\fprq2 Calibri Light (Hebrew);}{\fhimajor\f31534\fbidi \fswiss\fcharset178\fprq2 Calibri Light (Arabic);}
{\fhimajor\f31535\fbidi \fswiss\fcharset186\fprq2 Calibri Light Baltic;}{\fhimajor\f31536\fbidi \fswiss\fcharset163\fprq2 Calibri Light (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}
{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}
{\fhiminor\f31573\fbidi \fswiss\fcharset177\fprq2 Calibri (Hebrew);}{\fhiminor\f31574\fbidi \fswiss\fcharset178\fprq2 Calibri (Arabic);}{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}
{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}
{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;
\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\red0\green0\blue0;\red0\green0\blue0;}{\*\defchp \f31506\fs24 }{\*\defpap
\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0
\f31506\fs24\lang1048\langfe1033\cgrid\langnp1048\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\*
\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0 \f31506\fs24\lang1048\langfe1033\cgrid\langnp1048\langfenp1033 \snext11 \ssemihidden \sunhideused Normal Table;}}
{\*\rsidtbl \rsid2693434\rsid4215609\rsid7808163\rsid16662808}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\author Cezar Leahu}{\operator Cezar Leahu}
{\creatim\yr2019\mo8\dy29\hr15\min41}{\revtim\yr2019\mo8\dy29\hr15\min43}{\version2}{\edmins2}{\nofpages1}{\nofwords17}{\nofchars102}{\nofcharsws118}{\vern2821}}{\*\userprops {\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_Enabled}\proptype30
{\staticval true}{\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_SetDate}\proptype30{\staticval 2019-08-29T12:41:57+0200}{\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_Method}\proptype30{\staticval Standard}{\propname MSIP_Label_ffb
520d8-df98-444b-9f20-0dd9d08cf98c_Name}\proptype30{\staticval ffb520d8-df98-444b-9f20-0dd9d08cf98c}{\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_SiteId}\proptype30{\staticval 65bc0b3b-7ca2-488c-ba9c-b1bebdd49af6}{\propname MSIP_Label_ffb520d8
-df98-444b-9f20-0dd9d08cf98c_ActionId}\proptype30{\staticval 6097ae90-22f7-448a-b9b7-0000b0413133}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}}
\paperw11900\paperh16840\margl1417\margr1417\margt1417\margb1417\gutter0\ltrsect
\widowctrl\ftnbj\aenddoc\trackmoves0\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen
\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1417\dgvorigin1417\dghshow1\dgvshow1
\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct
\asianbrkrule\rsidroot4215609\newtblstyruls\nogrowautofit\usenormstyforlist\noindnmbrts\felnbrelev\nocxsptable\indrlsweleven\noafcnsttbl\afelev\utinl\hwelev\spltpgpar\notcvasp\notbrkcnstfrctbl\notvatxbx\krnprsnet\cachedcolbal \nouicompat \fet0
{\*\wgrffmtfilter 2450}\nofeaturethrottle1\ilfomacatclnup0\ltrpar \sectd \ltrsect\linex0\headery708\footery708\colsx708\endnhere\sectlinegrid360\sectdefaultcl\sectrsid2693434\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}
{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}
{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9
\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid16662808 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0
\f31506\fs24\lang1048\langfe1033\cgrid\langnp1048\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid16662808
\par
\par
\par
\par
\par The quick brown fox jumps over the lazy dog
\par
\par
\par
\par
\par
\par
\par
\par The quick brown fox jumps over the lazy dog
\par
\par
\par
\par }\pard \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4996987
\par }{\*\themedata 504b030414000600080000002100e9de0fbfff0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb4ec3301045f748fc83e52d4a
9cb2400825e982c78ec7a27cc0c8992416c9d8b2a755fbf74cd25442a820166c2cd933f79e3be372bd1f07b5c3989ca74aaff2422b24eb1b475da5df374fd9ad
5689811a183c61a50f98f4babebc2837878049899a52a57be670674cb23d8e90721f90a4d2fa3802cb35762680fd800ecd7551dc18eb899138e3c943d7e503b6
b01d583deee5f99824e290b4ba3f364eac4a430883b3c092d4eca8f946c916422ecab927f52ea42b89a1cd59c254f919b0e85e6535d135a8de20f20b8c12c3b0
0c895fcf6720192de6bf3b9e89ecdbd6596cbcdd8eb28e7c365ecc4ec1ff1460f53fe813d3cc7f5b7f020000ffff0300504b030414000600080000002100a5d6
a7e7c0000000360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4f
c7060abb0884a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b6309512
0f88d94fbc52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462
a1a82fe353bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f746865
6d652f7468656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b
4b0d592c9c070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b
4757e8d3f729e245eb2b260a0238fd010000ffff0300504b030414000600080000002100b6f4679893070000c9200000160000007468656d652f7468656d652f
7468656d65312e786d6cec59cd8b1bc915bf07f23f347d97f5d5ad8fc1f2a24fcfda33b6b164873dd648a5eef2547789aad28cc56208de532e81c026e49085bd
ed21842cecc22eb9e48f31d8249b3f22afaa5bdd5552c99e191c3061463074977eefd5afde7bf5de53d5ddcf5e26d4bbc05c1096f6fcfa9d9aefe174ce16248d
7afeb3d9a4d2f13d2151ba4094a5b8e76fb0f03fbbf7eb5fdd454732c609f6403e1547a8e7c752ae8eaa5531876124eeb0154ee1bb25e30992f0caa3ea82a34b
d09bd06aa3566b55134452df4b51026a1f2f97648ebd9952e9dfdb2a1f53784da5500373caa74a35b6243476715e5708b11143cabd0b447b3eccb3609733fc52
fa1e4542c2173dbfa6fffceabdbb5574940b517940d6909be8bf5c2e17589c37f49c3c3a2b260d823068f50bfd1a40e53e6edc1eb7c6ad429f06a0f91c569a71
b175b61bc320c71aa0ecd1a17bd41e35eb16ded0dfdce3dc0fd5c7c26b50a63fd8c34f2643b0a285d7a00c1feee1c3417730b2f56b50866fede1dbb5fe28685b
fa3528a6243ddf43d7c25673b85d6d0159327aec8477c360d26ee4ca4b144443115d6a8a254be5a1584bd00bc6270050408a24493db959e1259a43140f112567
9c7827248a21f056286502866b8ddaa4d684ffea13e827ed5174849121ad780113b137a4f87862cec94af6fc07a0d537206f7ffef9cdeb1fdfbcfee9cd575fbd
79fdf77c6eadca923b466964cafdf2dd1ffef3cd6fbd7ffff0ed2f5fff319b7a172f4cfcbbbffdeedd3ffef93ef5b0e2d2146ffff4fdbb1fbf7ffbe7dfffebaf
5f3bb4f7393a33e1339260e13dc297de5396c0021dfcf119bf9ec42c46c494e8a791402952b338f48f656ca11f6d10450edc00db767cce21d5b880f7d72f2cc2
d398af2571687c182716f094313a60dc6985876a2ec3ccb3751ab927e76b13f714a10bd7dc43945a5e1eaf579063894be530c616cd2714a5124538c5d253dfb1
738c1dabfb8210cbaea764ce99604be97d41bc01224e93ccc899154da5d03149c02f1b1741f0b7659bd3e7de8051d7aa47f8c246c2de40d4417e86a965c6fb68
2d51e252394309350d7e8264ec2239ddf0b9891b0b099e8e3065de78818570c93ce6b05ec3e90f21cdb8dd7e4a37898de4929cbb749e20c64ce4889d0f6394ac
5cd829496313fbb938871045de13265df05366ef10f50e7e40e941773f27d872f787b3c133c8b026a53240d4376beef0e57dccacf89d6ee8126157aae9f3c44a
b17d4e9cd131584756689f604cd1255a60ec3dfbdcc160c05696cd4bd20f62c82ac7d815580f901dabea3dc5027a25d5dcece7c91322ac909de2881de073bad9
493c1b9426881fd2fc08bc6eda7c0ca52e7105c0633a3f37818f08f480102f4ea33c16a0c308ee835a9fc4c82a60ea5db8e375c32dff5d658fc1be7c61d1b8c2
be04197c6d1948eca6cc7b6d3343d49aa00c9819822ec3956e41c4727f29a28aab165b3be596f6a62ddd00dd91d5f42424fd6007b4d3fb84ffbbde073a8cb77f
f9c6b10f3e4ebfe3566c25ab6b763a8792c9f14e7f7308b7dbd50c195f904fbfa919a175fa04431dd9cf58b73dcd6d4fe3ffdff73487f6f36d2773a8dfb8ed64
7ce8306e3b99fc70e5e3743265f3027d8d3af0c80e7af4b14f72f0d46749289dca0dc527421ffc08f83db398c0a092d3279eb838055cc5f0a8ca1c4c60e1228e
b48cc799fc0d91f134462b381daafb4a492472d591f0564cc0a1911e76ea5678ba4e4ed9223becacd7d5c16656590592e5782d2cc6e1a04a66e856bb3cc02bd4
6bb6913e68dd1250b2d721614c6693683a48b4b783ca48fa58178ce620a157f65158741d2c3a4afdd6557b2c805ae115f8c1edc1cff49e1f06200242701e07cd
f942f92973f5d6bbda991fd3d3878c69450034d8db08283ddd555c0f2e4fad2e0bb52b78da2261849b4d425b46377822869fc17974aad1abd0b8aeafbba54b2d
7aca147a3e08ad9246bbf33e1637f535c8ede6069a9a9982a6de65cf6f35430899395af5fc251c1ac363b282d811ea3717a211dcbccc25cf36fc4d32cb8a0b39
4222ce0cae934e960d122231f728497abe5a7ee1069aea1ca2b9d51b90103e59725d482b9f1a3970baed64bc5ce2b934dd6e8c284b67af90e1b35ce1fc568bdf
1cac24d91adc3d8d1797de195df3a708422c6cd795011744c0dd413db3e682c0655891c8caf8db294c79da356fa3740c65e388ae62945714339967709dca0b3a
faadb081f196af190c6a98242f8467912ab0a651ad6a5a548d8cc3c1aafb6121653923699635d3ca2aaa6abab39835c3b60cecd8f26645de60b53531e434b3c2
67a97b37e576b7b96ea74f28aa0418bcb09fa3ea5ea12018d4cac92c6a8af17e1a56393b1fb56bc776811fa07695226164fdd656ed8edd8a1ae19c0e066f54f9
416e376a6168b9ed2bb5a5f5adb979b1cdce5e40f2184197bba6526857c2c92e47d0104d754f92a50dd8222f65be35e0c95b73d2f3bfac85fd60d80887955a27
1c57826650ab74c27eb3d20fc3667d1cd66ba341e31514161927f530bbb19fc00506dde4f7f67a7cefee3ed9ded1dc99b3a4caf4dd7c5513d777f7f5c6e1bb7b
8f40d2f9b2d598749bdd41abd26df627956034e854bac3d6a0326a0ddba3c9681876ba9357be77a1c141bf390c5ae34ea5551f0e2b41aba6e877ba9576d068f4
8376bf330efaaff23606569ea58fdc16605ecdebde7f010000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d65
2f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d36
3f2451eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e
3198720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d985
0528a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100e9de0fbfff0000001c020000130000000000000000000000
0000000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b00000000000000000000
000000300100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c0000000000000000000000000019020000
7468656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d0014000600080000002100b6f4679893070000c92000001600000000000000
000000000000d60200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b01000027000000
000000000000000000009d0a00007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000980b00000000}
{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d
617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169
6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363
656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e}
{\*\latentstyles\lsdstimax375\lsdlockeddef0\lsdsemihiddendef0\lsdunhideuseddef0\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4;
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 1;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 5;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 9;
\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 1;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 2;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 3;
\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 4;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 5;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 6;
\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 7;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 8;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Indent;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 header;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footer;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index heading;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of figures;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope return;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation reference;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 line number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 page number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote text;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of authorities;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 macro;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toa heading;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 3;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 3;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 3;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 5;\lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Closing;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Signature;\lsdsemihidden1 \lsdunhideused1 \lsdpriority1 \lsdlocked0 Default Paragraph Font;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 4;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Message Header;\lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Salutation;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Date;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Note Heading;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 3;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Block Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 FollowedHyperlink;\lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;
\lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Document Map;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Plain Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 E-mail Signature;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Top of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Bottom of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal (Web);\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Acronym;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Cite;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Code;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Definition;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Keyboard;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Preformatted;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Sample;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Typewriter;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Variable;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Table;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation subject;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 No List;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 1;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 2;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 2;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 3;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 2;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 6;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 2;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 6;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 2;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Contemporary;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Elegant;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Professional;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 2;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Balloon Text;\lsdpriority39 \lsdlocked0 Table Grid;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Theme;\lsdsemihidden1 \lsdlocked0 Placeholder Text;
\lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdpriority60 \lsdlocked0 Light Shading;\lsdpriority61 \lsdlocked0 Light List;\lsdpriority62 \lsdlocked0 Light Grid;\lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdpriority64 \lsdlocked0 Medium Shading 2;
\lsdpriority65 \lsdlocked0 Medium List 1;\lsdpriority66 \lsdlocked0 Medium List 2;\lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdpriority68 \lsdlocked0 Medium Grid 2;\lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdpriority70 \lsdlocked0 Dark List;
\lsdpriority71 \lsdlocked0 Colorful Shading;\lsdpriority72 \lsdlocked0 Colorful List;\lsdpriority73 \lsdlocked0 Colorful Grid;\lsdpriority60 \lsdlocked0 Light Shading Accent 1;\lsdpriority61 \lsdlocked0 Light List Accent 1;
\lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdsemihidden1 \lsdlocked0 Revision;
\lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;
\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;\lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;\lsdpriority72 \lsdlocked0 Colorful List Accent 1;
\lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdpriority60 \lsdlocked0 Light Shading Accent 2;\lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;
\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;
\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdpriority70 \lsdlocked0 Dark List Accent 2;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;
\lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdpriority61 \lsdlocked0 Light List Accent 3;\lsdpriority62 \lsdlocked0 Light Grid Accent 3;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;
\lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;
\lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;\lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdpriority60 \lsdlocked0 Light Shading Accent 4;
\lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdpriority62 \lsdlocked0 Light Grid Accent 4;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;
\lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdpriority70 \lsdlocked0 Dark List Accent 4;
\lsdpriority71 \lsdlocked0 Colorful Shading Accent 4;\lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdpriority61 \lsdlocked0 Light List Accent 5;
\lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 5;
\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;
\lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdpriority62 \lsdlocked0 Light Grid Accent 6;
\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;
\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;
\lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis;
\lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdsemihidden1 \lsdunhideused1 \lsdpriority37 \lsdlocked0 Bibliography;
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;\lsdpriority41 \lsdlocked0 Plain Table 1;\lsdpriority42 \lsdlocked0 Plain Table 2;\lsdpriority43 \lsdlocked0 Plain Table 3;\lsdpriority44 \lsdlocked0 Plain Table 4;
\lsdpriority45 \lsdlocked0 Plain Table 5;\lsdpriority40 \lsdlocked0 Grid Table Light;\lsdpriority46 \lsdlocked0 Grid Table 1 Light;\lsdpriority47 \lsdlocked0 Grid Table 2;\lsdpriority48 \lsdlocked0 Grid Table 3;\lsdpriority49 \lsdlocked0 Grid Table 4;
\lsdpriority50 \lsdlocked0 Grid Table 5 Dark;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 1;
\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 1;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 1;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 1;
\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 1;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 2;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 2;
\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 2;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 2;
\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 3;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 3;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 3;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 3;
\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 3;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 4;
\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 4;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 4;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 4;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 4;
\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 4;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 5;
\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 5;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 5;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 5;
\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 5;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 6;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 6;
\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 6;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 6;
\lsdpriority46 \lsdlocked0 List Table 1 Light;\lsdpriority47 \lsdlocked0 List Table 2;\lsdpriority48 \lsdlocked0 List Table 3;\lsdpriority49 \lsdlocked0 List Table 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark;
\lsdpriority51 \lsdlocked0 List Table 6 Colorful;\lsdpriority52 \lsdlocked0 List Table 7 Colorful;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 List Table 2 Accent 1;\lsdpriority48 \lsdlocked0 List Table 3 Accent 1;
\lsdpriority49 \lsdlocked0 List Table 4 Accent 1;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 1;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 1;
\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 List Table 2 Accent 2;\lsdpriority48 \lsdlocked0 List Table 3 Accent 2;\lsdpriority49 \lsdlocked0 List Table 4 Accent 2;
\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 2;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 3;
\lsdpriority47 \lsdlocked0 List Table 2 Accent 3;\lsdpriority48 \lsdlocked0 List Table 3 Accent 3;\lsdpriority49 \lsdlocked0 List Table 4 Accent 3;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 3;
\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 4;\lsdpriority47 \lsdlocked0 List Table 2 Accent 4;
\lsdpriority48 \lsdlocked0 List Table 3 Accent 4;\lsdpriority49 \lsdlocked0 List Table 4 Accent 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 4;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 4;
\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 List Table 2 Accent 5;\lsdpriority48 \lsdlocked0 List Table 3 Accent 5;
\lsdpriority49 \lsdlocked0 List Table 4 Accent 5;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 5;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 5;
\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 List Table 2 Accent 6;\lsdpriority48 \lsdlocked0 List Table 3 Accent 6;\lsdpriority49 \lsdlocked0 List Table 4 Accent 6;
\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Mention;
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Smart Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hashtag;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Unresolved Mention;}}{\*\datastore }}

View File

@@ -0,0 +1,9 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>The quick brown fox</title>
</head>
<body>
The quick brown fox jumps over the lazy dog
</body>
</html>

View File

@@ -0,0 +1,5 @@
### Licenses
* Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0 or the
[Apache 2.0.txt](https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/Apache%202.0.txt)
file placed in the root directory of the docker image.

View File

@@ -0,0 +1,112 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>alfresco-transform-tika</artifactId>
<name>Alfresco Tika Transformer</name>
<packaging>jar</packaging>
<parent>
<artifactId>alfresco-transform-core</artifactId>
<groupId>org.alfresco</groupId>
<version>2.2.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<dependencies>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<version>${project.version}</version>
</dependency>
<!-- Tika -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.21-20190624-alfresco-patched</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.21-20190624-alfresco-patched</version>
<exclusions>
<exclusion>
<groupId>com.tdunning</groupId>
<artifactId>json</artifactId>
</exclusion>
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
</exclusion>
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcmail-jdk15on</artifactId>
</exclusion>
<!-- TODO ATS-534 check transformations not affected by this missing quartz lib -->
<exclusion>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- for Apache Tika Parsers - eg. encrypted PDF -->
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
<version>1.64</version>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcmail-jdk15on</artifactId>
<version>1.64</version>
</dependency>
<!-- Apache POI -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<!-- Apache PDFBox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>license-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,52 @@
# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
#-------------------------------------------------------------------------------
# Already used licenses in project :
# - (MIT-style) netCDF C library license
# - 3-Clause BSD License
# - Apache 2.0
# - Apache License 2.0
# - Apache License v2
# - Apache License v2.0
# - Apache License, Version 2.0
# - Apache License, version 2.0
# - Apache Software License - Version 2.0
# - BSD
# - BSD 3-clause License w/nuclear disclaimer
# - BSD 3-clause New License
# - BSD License
# - BSD-2-Clause
# - Bouncy Castle Licence
# - CDDL + GPLv2 with classpath exception
# - CDDL, v1.0
# - CDDL/GPLv2+CE
# - EDL 1.0
# - EPL 2.0
# - Eclipse Distribution License - v 1.0
# - Eclipse Public License - v 1.0
# - Eclipse Public License 2.0
# - GNU General Public License, version 2 with the GNU Classpath Exception
# - GNU Lesser General Public License
# - GPL2 w/ CPE
# - LGPL, v2.1 or later
# - LGPL, version 2.1
# - MIT License
# - MIT License (MIT)
# - Mozilla Public License 1.1 (MPL 1.1)
# - OGC copyright
# - Public Domain
# - Public Domain, per Creative Commons CC0
# - Similar to Apache License but with the acknowledgment clause removed
# - The Apache License, Version 2.0
# - The Apache Software License, Version 2.0
# - The BSD License
# - The MIT License
# - The SAX License
# - The W3C License
# - UnRar License
# - lgpl
#-------------------------------------------------------------------------------
# Please fill the missing licenses for dependencies :
#
#
#Thu Mar 26 11:14:47 GMT 2020
net.jcip--jcip-annotations--1.0=

View File

@@ -0,0 +1,859 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_HTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_PNG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_CSV;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.util.List;
import java.util.regex.Pattern;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.parser.pkg.PackageParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import com.google.common.collect.ImmutableList;
/**
* Stripped down command line Tika transformers. Not actually run as a separate process, but the code fits the patten
* used by transformers that do.
* <pre>
*
* Archive 0 ms
* 1) cpio html [100] unlimited
* 2) cpio txt [50] unlimited
* 3) cpio xhtml [100] unlimited
* 4) cpio xml [100] unlimited
* 5) jar html [100] unlimited
* 6) jar txt [50] unlimited
* 7) jar xhtml [100] unlimited
* 8) jar xml [100] unlimited
* 9) tar html [100] unlimited
* 10) tar txt [50] unlimited
* 11) tar xhtml [100] unlimited
* 12) tar xml [100] unlimited
* 13) zip html [100] unlimited
* 14) zip txt [50] unlimited
* 15) zip xhtml [100] unlimited
* 16) zip xml [100] unlimited
* PdfBox 0 ms
* 1) pdf html [110] unlimited
* 2) pdf txt [50] 25 MB
* 3) pdf xhtml [110] unlimited
* 4) pdf xml [110] unlimited
* OutlookMsg 0 ms
* 1) msg html [125] unlimited
* 2) msg txt [125] unlimited
* 3) msg xhtml [125] unlimited
* 4) msg xml [125] unlimited
* PdfBox 0 ms
* 1) pdf html [110] unlimited
* 2) pdf txt [50] 25 MB
* 3) pdf xhtml [110] unlimited
* 4) pdf xml [110] unlimited
* Office 0 ms
* 1) doc html [130] unlimited
* 2) doc txt [130] unlimited
* 3) doc xhtml [130] unlimited
* 4) doc xml [130] unlimited
* 5) mpp html [130] unlimited
* 6) mpp txt [130] unlimited
* 7) mpp xhtml [130] unlimited
* 8) mpp xml [130] unlimited
* 9) msg html [130] unlimited
* 10) msg txt [130] unlimited
* 11) msg xhtml [130] unlimited
* 12) msg xml [130] unlimited
* 13) ppt html [130] unlimited
* 14) ppt txt [130] unlimited
* 15) ppt xhtml [130] unlimited
* 16) ppt xml [130] unlimited
* 17) vsd html [130] unlimited
* 18) vsd txt [130] unlimited
* 19) vsd xhtml [130] unlimited
* 20) vsd xml [130] unlimited
* Poi 0 ms
* 1) xls csv [130] unlimited
* 2) xls html [130] unlimited
* 3) xls txt [130] unlimited
* 4) xls xhtml [130] unlimited
* 5) xls xml [130] unlimited
* 6) xlsx csv [130] unlimited
* 7) xlsx html [130] unlimited
* 8) xlsx txt [130] unlimited
* 9) xlsx xhtml [130] unlimited
* 10) xlsx xml [130] unlimited
* OOXML 0 ms
* 1) docm html [130] unlimited
* 2) docm txt [130] unlimited
* 3) docm xhtml [130] unlimited
* 4) docm xml [130] unlimited
* 5) docx html [130] unlimited
* 6) docx txt [130] unlimited
* 7) docx xhtml [130] unlimited
* 8) docx xml [130] unlimited
* 9) dotm html [130] unlimited
* 10) dotm txt [130] unlimited
* 11) dotm xhtml [130] unlimited
* 12) dotm xml [130] unlimited
* 13) dotx html [130] unlimited
* 14) dotx txt [130] unlimited
* 15) dotx xhtml [130] unlimited
* 16) dotx xml [130] unlimited
* 17) potm html [130] unlimited
* 18) potm txt [130] unlimited
* 19) potm xhtml [130] unlimited
* 20) potm xml [130] unlimited
* 21) potx html [130] unlimited
* 22) potx txt [130] unlimited
* 23) potx xhtml [130] unlimited
* 24) potx xml [130] unlimited
* 25) ppam html [130] unlimited
* 26) ppam txt [130] unlimited
* 27) ppam xhtml [130] unlimited
* 28) ppam xml [130] unlimited
* 29) ppsm html [130] unlimited
* 30) ppsm txt [130] unlimited
* 31) ppsm xhtml [130] unlimited
* 32) ppsm xml [130] unlimited
* 33) ppsx html [130] unlimited
* 34) ppsx txt [130] unlimited
* 35) ppsx xhtml [130] unlimited
* 36) ppsx xml [130] unlimited
* 37) pptm html [130] unlimited
* 38) pptm txt [130] unlimited
* 39) pptm xhtml [130] unlimited
* 40) pptm xml [130] unlimited
* 41) pptx html [130] unlimited
* 42) pptx txt [130] unlimited
* 43) pptx xhtml [130] unlimited
* 44) pptx xml [130] unlimited
* 45) sldm html [130] unlimited
* 46) sldm txt [130] unlimited
* 47) sldm xhtml [130] unlimited
* 48) sldm xml [130] unlimited
* 49) sldx html [130] unlimited
* 50) sldx txt [130] unlimited
* 51) sldx xhtml [130] unlimited
* 52) sldx xml [130] unlimited
* 53) xlam html [130] unlimited
* 54) xlam txt [130] unlimited
* 55) xlam xhtml [130] unlimited
* 56) xlam xml [130] unlimited
* 57) xlsb html [130] unlimited
* 58) xlsb txt [130] unlimited
* 59) xlsb xhtml [130] unlimited
* 60) xlsb xml [130] unlimited
* 61) xlsm html [130] unlimited
* 62) xlsm txt [130] unlimited
* 63) xlsm xhtml [130] unlimited
* 64) xlsm xml [130] unlimited
* 65) xlsx html [130] unlimited
* 66) xlsx txt [130] unlimited
* 67) xlsx xhtml [130] unlimited
* 68) xlsx xml [130] unlimited
* 69) xltm html [130] unlimited
* 70) xltm txt [130] unlimited
* 71) xltm xhtml [130] unlimited
* 72) xltm xml [130] unlimited
* 73) xltx html [130] unlimited
* 74) xltx txt [130] unlimited
* 75) xltx xhtml [130] unlimited
* 76) xltx xml [130] unlimited
* TikaAuto 0 ms
* 1) cdf html [120] unlimited
* 2) cdf txt [120] unlimited
* 3) cdf xhtml [120] unlimited
* 4) cdf xml [120] unlimited
* 5) cpio html [120] unlimited
* 6) cpio txt [120] unlimited
* 7) cpio xhtml [120] unlimited
* 8) cpio xml [120] unlimited
* 9) doc html [120] unlimited
* 10) doc txt [120] unlimited
* 11) doc xhtml [120] unlimited
* 12) doc xml [120] unlimited
* 13) docm html [120] unlimited
* 14) docm txt [120] unlimited
* 15) docm xhtml [120] unlimited
* 16) docm xml [120] unlimited
* 17) docx html [120] unlimited
* 18) docx txt [120] unlimited
* 19) docx xhtml [120] unlimited
* 20) docx xml [120] unlimited
* 21) dotm html [120] unlimited
* 22) dotm txt [120] unlimited
* 23) dotm xhtml [120] unlimited
* 24) dotm xml [120] unlimited
* 25) dotx html [120] unlimited
* 26) dotx txt [120] unlimited
* 27) dotx xhtml [120] unlimited
* 28) dotx xml [120] unlimited
* 29) gzip html [120] unlimited
* 30) gzip txt [120] unlimited
* 31) gzip xhtml [120] unlimited
* 32) gzip xml [120] unlimited
* 33) hdf html [120] unlimited
* 34) hdf txt [120] unlimited
* 35) hdf xhtml [120] unlimited
* 36) hdf xml [120] unlimited
* 37) html html [120] unlimited
* 38) html txt [120] unlimited
* 39) html xhtml [120] unlimited
* 40) html xml [120] unlimited
* 41) jar html [120] unlimited
* 42) jar txt [120] unlimited
* 43) jar xhtml [120] unlimited
* 44) jar xml [120] unlimited
* 45) java html [120] unlimited
* 46) java txt [120] unlimited
* 47) java xhtml [120] unlimited
* 48) java xml [120] unlimited
* 49) key html [120] unlimited
* 50) key txt [120] unlimited
* 51) key xhtml [120] unlimited
* 52) key xml [120] unlimited
* 53) mpp html [120] unlimited
* 54) mpp txt [120] unlimited
* 55) mpp xhtml [120] unlimited
* 56) mpp xml [120] unlimited
* 57) numbers html [120] unlimited
* 58) numbers txt [120] unlimited
* 59) numbers xhtml [120] unlimited
* 60) numbers xml [120] unlimited
* 61) odc html [120] unlimited
* 62) odc txt [120] unlimited
* 63) odc xhtml [120] unlimited
* 64) odc xml [120] unlimited
* 65) odi html [120] unlimited
* 66) odi txt [120] unlimited
* 67) odi xhtml [120] unlimited
* 68) odi xml [120] unlimited
* 69) odm html [120] unlimited
* 70) odm txt [120] unlimited
* 71) odm xhtml [120] unlimited
* 72) odm xml [120] unlimited
* 73) odp html [120] unlimited
* 74) odp txt [120] unlimited
* 75) odp xhtml [120] unlimited
* 76) odp xml [120] unlimited
* 77) ods html [120] unlimited
* 78) ods txt [120] unlimited
* 79) ods xhtml [120] unlimited
* 80) ods xml [120] unlimited
* 81) odt html [120] unlimited
* 82) odt txt [120] unlimited
* 83) odt xhtml [120] unlimited
* 84) odt xml [120] unlimited
* 85) ogx html [120] unlimited
* 86) ogx txt [120] unlimited
* 87) ogx xhtml [120] unlimited
* 88) ogx xml [120] unlimited
* 89) oth html [120] unlimited
* 90) oth txt [120] unlimited
* 91) oth xhtml [120] unlimited
* 92) oth xml [120] unlimited
* 93) otp html [120] unlimited
* 94) otp txt [120] unlimited
* 95) otp xhtml [120] unlimited
* 96) otp xml [120] unlimited
* 97) ots html [120] unlimited
* 98) ots txt [120] unlimited
* 99) ots xhtml [120] unlimited
* 100) ots xml [120] unlimited
* 101) ott html [120] unlimited
* 102) ott txt [120] unlimited
* 103) ott xhtml [120] unlimited
* 104) ott xml [120] unlimited
* 105) pages html [120] unlimited
* 106) pages txt [120] unlimited
* 107) pages xhtml [120] unlimited
* 108) pages xml [120] unlimited
* 109) pdf html [120] unlimited
* 110) pdf txt [120] 25 MB
* 111) pdf xhtml [120] unlimited
* 112) pdf xml [120] unlimited
* 113) potm html [120] unlimited
* 114) potm txt [120] unlimited
* 115) potm xhtml [120] unlimited
* 116) potm xml [120] unlimited
* 117) potx html [120] unlimited
* 118) potx txt [120] unlimited
* 119) potx xhtml [120] unlimited
* 120) potx xml [120] unlimited
* 121) ppam html [120] unlimited
* 122) ppam txt [120] unlimited
* 123) ppam xhtml [120] unlimited
* 124) ppam xml [120] unlimited
* 125) ppsm html [120] unlimited
* 126) ppsm txt [120] unlimited
* 127) ppsm xhtml [120] unlimited
* 128) ppsm xml [120] unlimited
* 129) ppsx html [120] unlimited
* 130) ppsx txt [120] unlimited
* 131) ppsx xhtml [120] unlimited
* 132) ppsx xml [120] unlimited
* 133) ppt html [120] unlimited
* 134) ppt txt [120] unlimited
* 135) ppt xhtml [120] unlimited
* 136) ppt xml [120] unlimited
* 137) pptm html [120] unlimited
* 138) pptm txt [120] unlimited
* 139) pptm xhtml [120] unlimited
* 140) pptm xml [120] unlimited
* 141) pptx html [120] unlimited
* 142) pptx txt [120] unlimited
* 143) pptx xhtml [120] unlimited
* 144) pptx xml [120] unlimited
* 145) rar html [120] unlimited
* 146) rar txt [120] unlimited
* 147) rar xhtml [120] unlimited
* 148) rar xml [120] unlimited
* 149) rss html [120] unlimited
* 150) rss txt [120] unlimited
* 151) rss xhtml [120] unlimited
* 152) rss xml [120] unlimited
* 153) rtf html [120] unlimited
* 154) rtf txt [120] unlimited
* 155) rtf xhtml [120] unlimited
* 156) rtf xml [120] unlimited
* 157) sldm html [120] unlimited
* 158) sldm txt [120] unlimited
* 159) sldm xhtml [120] unlimited
* 160) sldm xml [120] unlimited
* 161) sldx html [120] unlimited
* 162) sldx txt [120] unlimited
* 163) sldx xhtml [120] unlimited
* 164) sldx xml [120] unlimited
* 165) sxw html [120] unlimited
* 166) sxw txt [120] unlimited
* 167) sxw xhtml [120] unlimited
* 168) sxw xml [120] unlimited
* 169) txt html [120] unlimited
* 170) txt txt [120] unlimited
* 171) txt xhtml [120] unlimited
* 172) txt xml [120] unlimited
* 173) vsd html [120] unlimited
* 174) vsd txt [120] unlimited
* 175) vsd xhtml [120] unlimited
* 176) vsd xml [120] unlimited
* 177) xhtml html [120] unlimited
* 178) xhtml txt [120] unlimited
* 179) xhtml xhtml [120] unlimited
* 180) xhtml xml [120] unlimited
* 181) xlam html [120] unlimited
* 182) xlam txt [120] unlimited
* 183) xlam xhtml [120] unlimited
* 184) xlam xml [120] unlimited
* 185) xls html [120] unlimited
* 186) xls txt [120] unlimited
* 187) xls xhtml [120] unlimited
* 188) xls xml [120] unlimited
* 189) xlsb html [120] unlimited
* 190) xlsb txt [120] unlimited
* 191) xlsb xhtml [120] unlimited
* 192) xlsb xml [120] unlimited
* 193) xlsm html [120] unlimited
* 194) xlsm txt [120] unlimited
* 195) xlsm xhtml [120] unlimited
* 196) xlsm xml [120] unlimited
* 197) xlsx html [120] unlimited
* 198) xlsx txt [120] unlimited
* 199) xlsx xhtml [120] unlimited
* 200) xlsx xml [120] unlimited
* 201) xltm html [120] unlimited
* 202) xltm txt [120] unlimited
* 203) xltm xhtml [120] unlimited
* 204) xltm xml [120] unlimited
* 205) xltx html [120] unlimited
* 206) xltx txt [120] unlimited
* 207) xltx xhtml [120] unlimited
* 208) xltx xml [120] unlimited
* 209) xml html [120] unlimited
* 210) xml txt [120] unlimited
* 211) xml xhtml [120] unlimited
* 212) xml xml [120] unlimited
* 213) z html [120] unlimited
* 214) z txt [120] unlimited
* 215) z xhtml [120] unlimited
* 216) z xml [120] unlimited
* TextMining 0 ms
* 1) doc html [130] unlimited
* 2) doc txt [50] unlimited
* 3) doc xhtml [130] unlimited
* 4) doc xml [130] unlimited
* </pre>
*/
public class Tika
{
public static final String ARCHIVE = "Archive";
public static final String OUTLOOK_MSG = "OutlookMsg";
public static final String PDF_BOX = "PdfBox";
public static final String POI_OFFICE = "Office";
public static final String POI = "Poi";
public static final String POI_OO_XML = "OOXML";
public static final String TIKA_AUTO = "TikaAuto";
public static final String TEXT_MINING = "TextMining";
public static final List<String> TRANSFORM_NAMES = ImmutableList.of(
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
public static final String TARGET_MIMETYPE = "--targetMimetype=";
public static final String TARGET_ENCODING = "--targetEncoding=";
public static final String INCLUDE_CONTENTS = "--includeContents";
public static final String NOT_EXTRACT_BOOKMARKS_TEXT = "--notExtractBookmarksText";
public static final String CSV = "csv";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String HTML = "html";
public static final String MSG = "msg";
public static final String PDF = "pdf";
public static final String PPTX = "pptx";
public static final String TXT = "txt";
public static final String XHTML = "xhtml";
public static final String XSLX = "xslx";
public static final String XML = "xml";
public static final String ZIP = "zip";
private final Parser packageParser = new PackageParser();
private final Parser pdfParser = new PDFParser();
private final Parser officeParser = new OfficeParser();
private final Parser autoDetectParser;
private final Parser ooXmlParser = new OOXMLParser();
private final Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
private final PDFParserConfig pdfParserConfig = new PDFParserConfig();
private final DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
{
private final List<String> disabledMediaTypes = ImmutableList.of(MIMETYPE_IMAGE_JPEG,
MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
@Override
public boolean select(Metadata metadata)
{
String contentType = metadata.get(Metadata.CONTENT_TYPE);
if (contentType == null || contentType.equals("") || disabledMediaTypes == null)
{
return true;
}
return !disabledMediaTypes.contains(contentType);
}
};
public Tika() throws TikaException, IOException, SAXException
{
ClassLoader classLoader = getClass().getClassLoader();
URL tikaConfigXml = classLoader.getResource("tika-config.xml");
TikaConfig tikaConfig = new TikaConfig(tikaConfigXml);
autoDetectParser = new AutoDetectParser(tikaConfig);
}
// Method included for developer testing
public static void main(String[] args)
{
long start = System.currentTimeMillis();
try
{
new Tika().transform(args);
}
catch (IllegalArgumentException e)
{
System.err.println("ERROR " + e.getMessage());
System.exit(-1);
}
catch (IllegalStateException | TikaException | IOException | SAXException e)
{
System.err.println("ERROR " + e.getMessage());
e.printStackTrace();
System.exit(-2);
}
System.out.println("Finished in " + (System.currentTimeMillis() - start) + "ms");
}
// Extracts parameters form args
public void transform(String[] args)
{
String transform = null;
String targetMimetype = null;
String targetEncoding = null;
String sourceFilename = null;
String targetFilename = null;
Boolean includeContents = null;
Boolean notExtractBookmarksText = null;
for (String arg : args)
{
if (arg.startsWith("--"))
{
if (INCLUDE_CONTENTS.startsWith(arg))
{
getValue(arg, false, includeContents, INCLUDE_CONTENTS);
includeContents = true;
}
else if (arg.startsWith(TARGET_ENCODING))
{
targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING);
}
else if (arg.startsWith(TARGET_MIMETYPE))
{
targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE);
}
else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT))
{
getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT);
notExtractBookmarksText = true;
}
else
{
throw new IllegalArgumentException("Unexpected argument " + arg);
}
}
else
{
if (transform == null)
{
transform = arg;
}
else if (sourceFilename == null)
{
sourceFilename = arg;
}
else if (targetFilename == null)
{
targetFilename = arg;
}
else
{
throw new IllegalArgumentException("Unexpected argument " + arg);
}
}
}
if (targetFilename == null)
{
throw new IllegalArgumentException("Missing arguments");
}
includeContents = includeContents == null ? false : includeContents;
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
transform(transform, includeContents, notExtractBookmarksText, sourceFilename,
targetFilename, targetMimetype, targetEncoding);
}
private String getValue(String arg, boolean valueExpected, Object value, String optionName)
{
if (value != null)
{
throw new IllegalArgumentException("Duplicate " + optionName);
}
String stringValue = arg.substring(optionName.length()).trim();
if (!valueExpected && stringValue.length() > 0)
{
throw new IllegalArgumentException("Unexpected value with " + optionName);
}
if (valueExpected && stringValue.length() == 0)
{
throw new IllegalArgumentException("Expected value with " + optionName);
}
return stringValue;
}
// Adds transform specific values such as parser and documentSelector.
private void transform(String transform, Boolean includeContents,
Boolean notExtractBookmarksText,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
Parser parser = null;
DocumentSelector documentSelector = null;
switch (transform)
{
case ARCHIVE:
parser = packageParser;
break;
case OUTLOOK_MSG:
case POI_OFFICE:
case TEXT_MINING:
parser = officeParser;
break;
case PDF_BOX:
parser = pdfParser;
documentSelector = pdfBoxEmbededDocumentSelector;
break;
case POI:
parser = tikaOfficeDetectParser;
break;
case POI_OO_XML:
parser = ooXmlParser;
break;
case TIKA_AUTO:
parser = autoDetectParser;
break;
}
transform(parser, documentSelector, includeContents, notExtractBookmarksText,
sourceFilename, targetFilename, targetMimetype, targetEncoding);
}
private void transform(Parser parser, DocumentSelector documentSelector,
Boolean includeContents,
Boolean notExtractBookmarksText,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
OutputStream os = new FileOutputStream(targetFilename);
Writer ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding)))
{
Metadata metadata = new Metadata();
ParseContext context = buildParseContext(documentSelector, includeContents,
notExtractBookmarksText);
ContentHandler handler = getContentHandler(targetMimetype, ow);
parser.parse(is, handler, metadata, context);
}
catch (SAXException | TikaException | IOException e)
{
throw new IllegalStateException(e.getMessage(), e);
}
}
private ContentHandler getContentHandler(String targetMimetype, Writer output)
{
try
{
ContentHandler handler;
if (MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
handler = new BodyContentHandler(output);
}
else
{
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler transformerHandler;
transformerHandler = factory.newTransformerHandler();
transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
transformerHandler.setResult(new StreamResult(output));
handler = transformerHandler;
if (MIMETYPE_HTML.equals(targetMimetype))
{
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, HTML);
return new ExpandedTitleContentHandler(transformerHandler);
}
else if (MIMETYPE_XHTML.equals(targetMimetype) ||
MIMETYPE_XML.equals(targetMimetype))
{
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, XML);
}
else if (MIMETYPE_TEXT_CSV.equals(targetMimetype))
{
handler = new CsvContentHandler(output);
}
else
{
throw new IllegalArgumentException("Invalid target mimetype " + targetMimetype);
}
}
return handler;
}
catch (TransformerConfigurationException e)
{
throw new IllegalStateException(e.getMessage(), e);
}
}
/**
* A wrapper around the normal Tika BodyContentHandler for CSV rather encoding than tab separated.
*/
protected static class CsvContentHandler extends BodyContentHandler
{
private static final char[] comma = new char[]{','};
private static final Pattern all_nums = Pattern.compile("[\\d\\.\\-\\+]+");
private boolean inCell = false;
private boolean needsComma = false;
protected CsvContentHandler(Writer output)
{
super(output);
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException
{
if (length == 1 && ch[0] == '\t')
{
// Ignore tabs, as they mess up the CSV output
}
else
{
super.ignorableWhitespace(ch, start, length);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException
{
if (inCell)
{
StringBuffer t = new StringBuffer(new String(ch, start, length));
// Quote if not all numbers
if (all_nums.matcher(t).matches())
{
super.characters(ch, start, length);
}
else
{
for (int i = t.length() - 1; i >= 0; i--)
{
if (t.charAt(i) == '\"')
{
// Double up double quotes
t.insert(i, '\"');
i--;
}
}
t.insert(0, '\"');
t.append('\"');
char[] c = t.toString().toCharArray();
super.characters(c, 0, c.length);
}
}
else
{
super.characters(ch, start, length);
}
}
@Override
public void startElement(String uri, String localName, String name,
Attributes atts) throws SAXException
{
if (localName.equals("td"))
{
inCell = true;
if (needsComma)
{
super.characters(comma, 0, 1);
needsComma = true;
}
}
else
{
super.startElement(uri, localName, name, atts);
}
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException
{
if (localName.equals("td"))
{
needsComma = true;
inCell = false;
}
else
{
if (localName.equals("tr"))
{
needsComma = false;
}
super.endElement(uri, localName, name);
}
}
}
private ParseContext buildParseContext(DocumentSelector documentSelector,
Boolean includeContents, Boolean notExtractBookmarksText)
{
ParseContext context = new ParseContext();
if (documentSelector != null)
{
context.set(DocumentSelector.class, documentSelector);
}
if (notExtractBookmarksText.equals(true))
{
pdfParserConfig.setExtractBookmarksText(false);
// pdfParserConfig is set to override default settings
context.set(PDFParserConfig.class, pdfParserConfig);
}
// If Archive transform
if (includeContents != null)
{
context.set(Parser.class, includeContents ? autoDetectParser : new EmptyParser());
}
return context;
}
}

View File

@@ -0,0 +1,128 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringJoiner;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.logging.LogEntry;
import org.apache.tika.exception.TikaException;
import org.xml.sax.SAXException;
/**
* JavaExecutor implementation for running TIKA transformations. It loads the
* transformation logic in the same JVM (check {@link Tika}).
*/
public class TikaJavaExecutor implements JavaExecutor
{
private final Tika tika;
public TikaJavaExecutor()
{
try
{
tika = new Tika();
}
catch (SAXException | IOException | TikaException e)
{
throw new RuntimeException("Unable to instantiate Tika: " + e.getMessage());
}
}
@Override
public void call(File sourceFile, File targetFile, String... args)
throws TransformException
{
args = buildArgs(sourceFile, targetFile, args);
try
{
tika.transform(args);
}
catch (IllegalArgumentException e)
{
throw new TransformException(BAD_REQUEST.value(), getMessage(e));
}
catch (Exception e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), getMessage(e));
}
if (!targetFile.exists() || targetFile.length() == 0)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Transformer failed to create an output file");
}
}
private static String getMessage(Exception e)
{
return e.getMessage() == null ? e.getClass().getSimpleName() : e.getMessage();
}
private static String[] buildArgs(File sourceFile, File targetFile, String[] args)
{
ArrayList<String> methodArgs = new ArrayList<>(args.length + 2);
StringJoiner sj = new StringJoiner(" ");
for (String arg : args)
{
addArg(methodArgs, sj, arg);
}
addFileArg(methodArgs, sj, sourceFile);
addFileArg(methodArgs, sj, targetFile);
LogEntry.setOptions(sj.toString());
return methodArgs.toArray(new String[0]);
}
private static void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
{
if (arg != null)
{
sj.add(arg);
methodArgs.add(arg);
}
}
private static void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
{
if (arg != null)
{
String path = arg.getAbsolutePath();
int i = path.lastIndexOf('.');
String ext = i == -1 ? "???" : path.substring(i + 1);
sj.add(ext);
methodArgs.add(path);
}
}
}

View File

@@ -0,0 +1,120 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.HashSet;
import java.util.Set;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
///////// THIS FILE WAS A COPY OF THE CODE IN alfresco-repository /////////////
/**
* <a href="http://tika.apache.org/Apache Tika">Apache Tika</a> assumes that
* you either know exactly what your content is, or that
* you'll leave it to auto-detection.
* Within Alfresco, we usually do know. However, from time
* to time, we don't know if we have one of the old or one
* of the new office files (eg .xls and .xlsx).
* This class allows automatically selects the appropriate
* old (OLE2) or new (OOXML) Tika parser as required.
*
* @author Nick Burch
*/
public class TikaOfficeDetectParser implements Parser
{
private final Parser ole2Parser = new OfficeParser();
private final Parser ooxmlParser = new OOXMLParser();
public Set<MediaType> getSupportedTypes(ParseContext parseContext)
{
Set<MediaType> types = new HashSet<>();
types.addAll(ole2Parser.getSupportedTypes(parseContext));
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
return types;
}
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata,
ParseContext parseContext) throws IOException, SAXException,
TikaException
{
byte[] initial4 = new byte[4];
InputStream wrapped;
// Preserve TikaInputStreams as TikaInputStreams as they require less memory to process
if (stream.markSupported())
{
stream.mark(initial4.length);
IOUtils.readFully(stream, initial4);
stream.reset();
wrapped = stream;
}
else
{
PushbackInputStream inp = new PushbackInputStream(stream, 4);
IOUtils.readFully(inp, initial4);
inp.unread(initial4);
wrapped = inp;
}
// Which is it?
if (initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
{
ooxmlParser.parse(wrapped, handler, metadata, parseContext);
}
else
{
ole2Parser.parse(wrapped, handler, metadata, parseContext);
}
}
/**
* @deprecated This method will be removed in Apache Tika 1.0.
*/
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException
{
parse(stream, handler, metadata, new ParseContext());
}
}

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<properties>
<!-- This property, when set, will hide the start up warnings of tika for libraries are missing. -->
<!-- See https://issues.apache.org/jira/browse/TIKA-2490 -->
<service-loader initializableProblemHandler="ignore"/>
</properties>