mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-10-08 14:51:18 +00:00
ATS-671: Split engines into fat & skinny modules (ATS-674) (#192)
Each transform engine project has been separated into 2 modules so that an executable and non-executable jar can be created. Modules have been renamed such that *docker* has been removed from the artifactIds and project names. Co-authored-by: Erik Knizat <erik.knizat@alfresco.com> Co-authored-by: David Edwards <david.edwards@alfresco.com>
This commit is contained in:
@@ -0,0 +1 @@
|
||||
target/docker/
|
@@ -0,0 +1,34 @@
|
||||
# Image provides a container in which to run Tika transformations for Alfresco Content Services.
|
||||
|
||||
# Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
|
||||
FROM alfresco/alfresco-base-java:11.0.1-openjdk-centos-7-72b88c6f1f4c
|
||||
|
||||
ENV APACHE_LICENSE_FILE=https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/Apache%202.0.txt
|
||||
ENV JAVA_OPTS=""
|
||||
|
||||
# Set default user information
|
||||
ARG GROUPNAME=Alfresco
|
||||
ARG GROUPID=1000
|
||||
ARG TIKAUSERNAME=tika
|
||||
ARG USERID=33004
|
||||
|
||||
COPY target/${env.project_artifactId}-${env.project_version}.jar /usr/bin
|
||||
|
||||
RUN ln /usr/bin/${env.project_artifactId}-${env.project_version}.jar /usr/bin/${env.project_artifactId}.jar && \
|
||||
curl -s -S $APACHE_LICENSE_FILE -o Apache\ 2.0.txt && \
|
||||
yum clean all
|
||||
|
||||
ADD target/generated-resources/licenses /licenses
|
||||
ADD target/generated-resources/licenses.xml /licenses/
|
||||
ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/
|
||||
|
||||
RUN groupadd -g ${GROUPID} ${GROUPNAME} && \
|
||||
useradd -u ${USERID} -G ${GROUPNAME} ${TIKAUSERNAME} && \
|
||||
chgrp -R ${GROUPNAME} /usr/bin/${env.project_artifactId}.jar
|
||||
|
||||
EXPOSE 8090
|
||||
|
||||
USER ${TIKAUSERNAME}
|
||||
|
||||
ENTRYPOINT java $JAVA_OPTS -jar /usr/bin/${env.project_artifactId}.jar
|
360
alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml
Normal file
360
alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml
Normal file
@@ -0,0 +1,360 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>alfresco-transform-tika-boot</artifactId>
|
||||
<name>Alfresco Tika Transformer Spring Boot</name>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<parent>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-transform-core</artifactId>
|
||||
<version>2.2.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<image.name>alfresco/alfresco-tika</image.name>
|
||||
<image.registry>quay.io</image.registry>
|
||||
<env.project_artifactId>${project.artifactId}</env.project_artifactId>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-transformer-base</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-transformer-base</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-transform-tika</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-thymeleaf</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.vaadin.external.google</groupId>
|
||||
<artifactId>android-json</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.dom4j</groupId>
|
||||
<artifactId>dom4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Tika -->
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-core</artifactId>
|
||||
<version>1.21-20190624-alfresco-patched</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-parsers</artifactId>
|
||||
<version>1.21-20190624-alfresco-patched</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.tdunning</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcprov-jdk15on</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcmail-jdk15on</artifactId>
|
||||
</exclusion>
|
||||
<!-- TODO ATS-534 check transformations not affected by this missing quartz lib -->
|
||||
<exclusion>
|
||||
<groupId>org.quartz-scheduler</groupId>
|
||||
<artifactId>quartz</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- for Apache Tika Parsers - eg. encrypted PDF -->
|
||||
<dependency>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcprov-jdk15on</artifactId>
|
||||
<version>1.64</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcmail-jdk15on</artifactId>
|
||||
<version>1.64</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache POI -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
<version>${dependency.poi.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
<version>${dependency.poi.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-scratchpad</artifactId>
|
||||
<version>${dependency.poi.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache PDFBox -->
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>repackage</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-failsafe-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>license-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>docker-it-setup</id>
|
||||
<!-- raises an ActiveMq container for the Integration Tests -->
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<images>
|
||||
<image>
|
||||
<alias>activemq</alias>
|
||||
<name>alfresco/alfresco-activemq:5.15.8</name>
|
||||
<run>
|
||||
<hostname>activemq</hostname>
|
||||
<ports>
|
||||
<port>8161:8161</port>
|
||||
<port>5672:5672</port>
|
||||
<port>61616:61616</port>
|
||||
</ports>
|
||||
<wait>
|
||||
<log>Apache ActiveMQ 5.15.8 .* started</log>
|
||||
<time>20000</time>
|
||||
<kill>500</kill>
|
||||
<shutdown>100</shutdown>
|
||||
<exec>
|
||||
<preStop>kill 1</preStop>
|
||||
<preStop>kill -9 1</preStop>
|
||||
</exec>
|
||||
</wait>
|
||||
</run>
|
||||
</image>
|
||||
|
||||
<image>
|
||||
<alias>tika</alias>
|
||||
<name>${image.name}:${image.tag}</name>
|
||||
<run>
|
||||
<ports>
|
||||
<port>8090:8090</port>
|
||||
</ports>
|
||||
<wait>
|
||||
<http>
|
||||
<url>http://localhost:8090/transform/config</url>
|
||||
<method>GET</method>
|
||||
<status>200...299</status>
|
||||
</http>
|
||||
<time>300000</time>
|
||||
<kill>500</kill>
|
||||
<shutdown>100</shutdown>
|
||||
<exec>
|
||||
<preStop>kill 1</preStop>
|
||||
<preStop>kill -9 1</preStop>
|
||||
</exec>
|
||||
</wait>
|
||||
</run>
|
||||
</image>
|
||||
</images>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>local</id>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>fabric8-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<images>
|
||||
<image>
|
||||
<name>${image.name}:${image.tag}</name>
|
||||
<build>
|
||||
<dockerFileDir>${project.basedir}/</dockerFileDir>
|
||||
<buildOptions>
|
||||
<squash>true</squash>
|
||||
</buildOptions>
|
||||
</build>
|
||||
</image>
|
||||
</images>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>build-image</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>internal</id>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>fabric8-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<images>
|
||||
<!-- QuayIO image -->
|
||||
<image>
|
||||
<name>${image.name}:${image.tag}</name>
|
||||
<registry>${image.registry}</registry>
|
||||
<build>
|
||||
<dockerFileDir>${project.basedir}/</dockerFileDir>
|
||||
<buildOptions>
|
||||
<squash>true</squash>
|
||||
</buildOptions>
|
||||
</build>
|
||||
</image>
|
||||
<!-- DockerHub image -->
|
||||
<image>
|
||||
<name>${image.name}:${image.tag}</name>
|
||||
<build>
|
||||
<dockerFileDir>${project.basedir}/</dockerFileDir>
|
||||
<buildOptions>
|
||||
<squash>true</squash>
|
||||
</buildOptions>
|
||||
</build>
|
||||
</image>
|
||||
</images>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>build-image</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>push-image</id>
|
||||
<phase>install</phase>
|
||||
<goals>
|
||||
<goal>push</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>release</id>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>fabric8-maven-plugin</artifactId>
|
||||
<configuration combine.self="override">
|
||||
<images>
|
||||
<!-- QuayIO image -->
|
||||
<image>
|
||||
<name>${image.name}:${project.version}</name>
|
||||
<registry>${image.registry}</registry>
|
||||
<build>
|
||||
<dockerFileDir>${project.basedir}/</dockerFileDir>
|
||||
<buildOptions>
|
||||
<squash>true</squash>
|
||||
</buildOptions>
|
||||
</build>
|
||||
</image>
|
||||
<!-- DockerHub image -->
|
||||
<image>
|
||||
<name>${image.name}:${project.version}</name>
|
||||
<build>
|
||||
<dockerFileDir>${project.basedir}/</dockerFileDir>
|
||||
<buildOptions>
|
||||
<squash>true</squash>
|
||||
</buildOptions>
|
||||
</build>
|
||||
</image>
|
||||
</images>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>build-push-image</id>
|
||||
<phase>deploy</phase>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
<goal>push</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
@@ -0,0 +1,46 @@
|
||||
# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
|
||||
#-------------------------------------------------------------------------------
|
||||
# Already used licenses in project :
|
||||
# - (MIT-style) netCDF C library license
|
||||
# - Apache 2.0
|
||||
# - Apache License 2.0
|
||||
# - Apache License v2.0
|
||||
# - Apache License, Version 2.0
|
||||
# - Apache License, version 2.0
|
||||
# - Apache Software License - Version 2.0
|
||||
# - BSD
|
||||
# - BSD 3-clause New License
|
||||
# - BSD License
|
||||
# - Bouncy Castle Licence
|
||||
# - CDDL + GPLv2 with classpath exception
|
||||
# - CDDL, v1.0
|
||||
# - EPL 2.0
|
||||
# - Eclipse Public License - v 1.0
|
||||
# - Eclipse Public License, Version 1.0
|
||||
# - GNU Lesser General Public License
|
||||
# - GNU Lesser General Public License, Version 2.1
|
||||
# - GPL2 w/ CPE
|
||||
# - LGPL, v2.1 or later
|
||||
# - LGPL, version 2.1
|
||||
# - MIT License
|
||||
# - MIT License (MIT)
|
||||
# - MIT license
|
||||
# - Mozilla Public License 1.1 (MPL 1.1)
|
||||
# - New BSD license
|
||||
# - OGC copyright
|
||||
# - Public
|
||||
# - Public Domain
|
||||
# - Public Domain, per Creative Commons CC0
|
||||
# - Similar to Apache License but with the acknowledgment clause removed
|
||||
# - Specification License
|
||||
# - The Apache License, Version 2.0
|
||||
# - The Apache Software License, Version 2.0
|
||||
# - The BSD License
|
||||
# - The MIT License
|
||||
# - UnRar License
|
||||
#-------------------------------------------------------------------------------
|
||||
# Please fill the missing licenses for dependencies :
|
||||
#
|
||||
#
|
||||
#Mon Aug 19 18:06:38 EEST 2019
|
||||
net.jcip--jcip-annotations--1.0=Public
|
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.event.EventListener;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
|
||||
public class Application
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(Application.class);
|
||||
|
||||
@Value("${container.name}")
|
||||
private String containerName;
|
||||
|
||||
@Bean
|
||||
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
|
||||
{
|
||||
return registry -> registry.config().commonTags("containerName", containerName);
|
||||
}
|
||||
|
||||
public static void main(String[] args)
|
||||
{
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
@EventListener(ApplicationReadyEvent.class)
|
||||
public void startup()
|
||||
{
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
|
||||
logger.info("Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
|
||||
logger.info("Starting application components... Done");
|
||||
}
|
||||
}
|
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static java.lang.Boolean.parseBoolean;
|
||||
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
|
||||
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
|
||||
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
|
||||
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
|
||||
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
|
||||
import static org.alfresco.transformer.fs.FileManager.createAttachment;
|
||||
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
|
||||
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
|
||||
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
|
||||
import static org.springframework.http.HttpStatus.OK;
|
||||
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.alfresco.transformer.executors.TikaJavaExecutor;
|
||||
import org.alfresco.transformer.logging.LogEntry;
|
||||
import org.alfresco.transformer.probes.ProbeTestTransform;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
/**
|
||||
* Controller for the Docker based Tika transformers.
|
||||
*
|
||||
* Status Codes:
|
||||
*
|
||||
* 200 Success
|
||||
* 400 Bad Request: Invalid target mimetype <mimetype>
|
||||
* 400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)
|
||||
* 400 Bad Request: Request parameter <name> is of the wrong type
|
||||
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
|
||||
* 400 Bad Request: The source filename was not supplied
|
||||
* 500 Internal Server Error: (no message with low level IO problems)
|
||||
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
|
||||
* 500 Internal Server Error: Transformer version check exit code was not 0
|
||||
* 500 Internal Server Error: Transformer version check failed to create any output
|
||||
* 500 Internal Server Error: Could not read the target file
|
||||
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
|
||||
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
|
||||
* 500 Internal Server Error: Filename encoding error
|
||||
* 507 Insufficient Storage: Failed to store the source file
|
||||
*/
|
||||
@Controller
|
||||
public class TikaController extends AbstractTransformerController
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(TikaController.class);
|
||||
|
||||
private TikaJavaExecutor javaExecutor = new TikaJavaExecutor();
|
||||
|
||||
@Override
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "Tika";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version()
|
||||
{
|
||||
return "Tika available";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProbeTestTransform getProbeTestTransform()
|
||||
{
|
||||
// See the Javadoc on this method and Probes.md for the choice of these values.
|
||||
// the livenessPercentage is a little large as Tika does tend to suffer from slow transforms that class with a gc.
|
||||
return new ProbeTestTransform(this, "quick.pdf", "quick.txt",
|
||||
60, 16, 400, 10240, 60 * 30 + 1, 60 * 15 + 20)
|
||||
{
|
||||
@Override
|
||||
protected void executeTransformCommand(File sourceFile, File targetFile)
|
||||
{
|
||||
javaExecutor.call(sourceFile, targetFile, PDF_BOX,
|
||||
TARGET_MIMETYPE + MIMETYPE_TEXT_PLAIN, TARGET_ENCODING + "UTF-8");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
|
||||
public ResponseEntity<Resource> transform(HttpServletRequest request,
|
||||
@RequestParam("file") final MultipartFile sourceMultipartFile,
|
||||
@RequestParam("sourceMimetype") final String sourceMimetype,
|
||||
@RequestParam("targetExtension") final String targetExtension,
|
||||
@RequestParam("targetMimetype") final String targetMimetype,
|
||||
@RequestParam(value = "targetEncoding", required = false, defaultValue = "UTF-8") final String targetEncoding,
|
||||
|
||||
@RequestParam(value = "timeout", required = false) final Long timeout,
|
||||
@RequestParam(value = "testDelay", required = false) final Long testDelay,
|
||||
|
||||
@RequestParam(value = "includeContents", required = false) final Boolean includeContents,
|
||||
@RequestParam(value = "notExtractBookmarksText", required = false) final Boolean notExtractBookmarksText)
|
||||
{
|
||||
final String targetFilename = createTargetFileName(
|
||||
sourceMultipartFile.getOriginalFilename(), targetExtension);
|
||||
|
||||
getProbeTestTransform().incrementTransformerCount();
|
||||
|
||||
final File sourceFile = createSourceFile(request, sourceMultipartFile);
|
||||
final File targetFile = createTargetFile(request, targetFilename);
|
||||
// Both files are deleted by TransformInterceptor.afterCompletion
|
||||
|
||||
// TODO Consider streaming the request and response rather than using temporary files
|
||||
// https://www.logicbig.com/tutorials/spring-framework/spring-web-mvc/streaming-response-body.html
|
||||
|
||||
final Map<String, String> transformOptions = createTransformOptions(
|
||||
"includeContents", includeContents,
|
||||
"notExtractBookmarksText", notExtractBookmarksText,
|
||||
"targetEncoding", targetEncoding);
|
||||
|
||||
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
|
||||
transformOptions);
|
||||
|
||||
javaExecutor.call(sourceFile, targetFile, transform,
|
||||
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
|
||||
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
|
||||
|
||||
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
|
||||
|
||||
LogEntry.setTargetSize(targetFile.length());
|
||||
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
|
||||
time += LogEntry.addDelay(testDelay);
|
||||
getProbeTestTransform().recordTransformTime(time);
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processTransform(final File sourceFile, final File targetFile,
|
||||
final String sourceMimetype, final String targetMimetype,
|
||||
final Map<String, String> transformOptions, final Long timeout)
|
||||
{
|
||||
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
|
||||
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
|
||||
|
||||
final boolean includeContents = parseBoolean(
|
||||
transformOptions.getOrDefault("includeContents", "false"));
|
||||
final boolean notExtractBookmarksText = parseBoolean(
|
||||
transformOptions.getOrDefault("notExtractBookmarksText", "false"));
|
||||
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
|
||||
|
||||
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
|
||||
transformOptions);
|
||||
|
||||
javaExecutor.call(sourceFile, targetFile, transform,
|
||||
includeContents ? INCLUDE_CONTENTS : null,
|
||||
notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
|
||||
}
|
||||
}
|
@@ -0,0 +1,2 @@
|
||||
queue:
|
||||
engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:org.alfresco.transform.engine.tika.acs}
|
@@ -0,0 +1,508 @@
|
||||
{
|
||||
"transformOptions": {
|
||||
"tikaOptions": [
|
||||
{"value": {"name": "targetEncoding"}}
|
||||
],
|
||||
"archiveOptions": [
|
||||
{"value": {"name": "includeContents"}},
|
||||
{"value": {"name": "targetEncoding"}}
|
||||
],
|
||||
"pdfboxOptions": [
|
||||
{"value": {"name": "notExtractBookmarksText"}},
|
||||
{"value": {"name": "targetEncoding"}}
|
||||
]
|
||||
},
|
||||
"transformers": [
|
||||
{
|
||||
"transformerName": "Archive",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/x-cpio", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-cpio", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-cpio", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-cpio", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/java-archive", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/java-archive", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/java-archive", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/java-archive", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/x-tar", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-tar", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-tar", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-tar", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/zip", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/zip", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/zip", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/zip", "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"archiveOptions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"transformerName": "OutlookMsg",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"tikaOptions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"transformerName": "PdfBox",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "text/csv"},
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"pdfboxOptions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"transformerName": "Office",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/msword", "priority": 60, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-project", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-outlook", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.visio", "priority": 55, "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"tikaOptions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"transformerName": "Poi",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/csv"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/csv"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 65, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"tikaOptions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"transformerName": "OOXML",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 60, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"tikaOptions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"transformerName": "TikaAuto",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-cpio", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/java-archive", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/msword", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document" , "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/x-gzip", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-gzip", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-gzip", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-gzip", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/x-hdf", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-hdf", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-hdf", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-hdf", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "text/html", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "text/html", "priority": 60, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "text/html", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "text/html", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "text/x-java-source", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "text/x-java-source", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "text/x-java-source", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "text/x-java-source", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/ogg", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/ogg", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/ogg", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/ogg", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/rss+xml", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/rss+xml", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/rss+xml", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/rss+xml", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/rtf", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/rtf", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/rtf", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/rtf", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.sun.xml.writer", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "text/plain", "priority": 55, "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "text/xml", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "text/xml", "priority": 55, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "text/xml", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "text/xml", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "priority": 55, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "text/xml"},
|
||||
|
||||
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/x-compress", "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"tikaOptions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"transformerName": "TextMining",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "text/html"},
|
||||
{"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "text/plain"},
|
||||
{"sourceMediaType": "application/msword", "priority": 65, "targetMediaType": "application/xhtml+xml"},
|
||||
{"sourceMediaType": "application/msword", "targetMediaType": "text/xml"}
|
||||
],
|
||||
"transformOptions": [
|
||||
"tikaOptions"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
Binary file not shown.
@@ -0,0 +1,27 @@
|
||||
<html xmlns:th="http://www.thymeleaf.org">
|
||||
<body>
|
||||
|
||||
<div>
|
||||
<h2>Tika Test Transformations</h2>
|
||||
<form method="POST" enctype="multipart/form-data" action="/transform">
|
||||
<table>
|
||||
<tr><td><div style="text-align:right">file *</div></td><td><input type="file" name="file" /></td></tr>
|
||||
<tr><td><div style="text-align:right">sourceMimetype *</div></td><td><input type="text" name="sourceMimetype" value="application/msword" /></td></tr>
|
||||
<tr><td><div style="text-align:right">targetExtension *</div></td><td><input type="text" name="targetExtension" value="txt" /></td></tr>
|
||||
<tr><td><div style="text-align:right">targetMimetype *</div></td><td><input type="text" name="targetMimetype" value="text/plain" /></td></tr>
|
||||
<tr><td><div style="text-align:right">targetEncoding *</div></td><td><input type="text" name="targetEncoding" value="UTF-8" /></td></tr>
|
||||
<tr><td><div style="text-align:right">includeContents (archive) *</div></td><td><input type="checkbox" name="includeContents" value="true" /></td></tr>
|
||||
<tr><td><div style="text-align:right">timeout</div></td><td><input type="text" name="timeout" value="" /></td></tr>
|
||||
<tr><td><div style="text-align:right">testDelay</div></td><td><input type="text" name="testDelay" value="" /></td></tr>
|
||||
<tr><td><div style="text-align:right">notExtractBookmarksText</div></td><td><input type="checkbox" name="notExtractBookmarksText" value="true" /></td></tr>
|
||||
<tr><td></td><td><input type="submit" value="Transform" /></td></tr>
|
||||
</table>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<a href="/log">Log entries</a>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -0,0 +1,594 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static java.nio.file.Files.readAllBytes;
|
||||
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
|
||||
import static org.alfresco.transformer.executors.Tika.CSV;
|
||||
import static org.alfresco.transformer.executors.Tika.DOC;
|
||||
import static org.alfresco.transformer.executors.Tika.DOCX;
|
||||
import static org.alfresco.transformer.executors.Tika.HTML;
|
||||
import static org.alfresco.transformer.executors.Tika.MSG;
|
||||
import static org.alfresco.transformer.executors.Tika.OUTLOOK_MSG;
|
||||
import static org.alfresco.transformer.executors.Tika.PDF;
|
||||
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
|
||||
import static org.alfresco.transformer.executors.Tika.POI;
|
||||
import static org.alfresco.transformer.executors.Tika.POI_OFFICE;
|
||||
import static org.alfresco.transformer.executors.Tika.POI_OO_XML;
|
||||
import static org.alfresco.transformer.executors.Tika.PPTX;
|
||||
import static org.alfresco.transformer.executors.Tika.TEXT_MINING;
|
||||
import static org.alfresco.transformer.executors.Tika.TIKA_AUTO;
|
||||
import static org.alfresco.transformer.executors.Tika.TXT;
|
||||
import static org.alfresco.transformer.executors.Tika.XHTML;
|
||||
import static org.alfresco.transformer.executors.Tika.XML;
|
||||
import static org.alfresco.transformer.executors.Tika.XSLX;
|
||||
import static org.alfresco.transformer.executors.Tika.ZIP;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_HTML;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OUTLOOK_MSG;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PDF;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_CSV;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyLong;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.springframework.http.HttpHeaders.ACCEPT;
|
||||
import static org.springframework.http.HttpHeaders.CONTENT_DISPOSITION;
|
||||
import static org.springframework.http.HttpHeaders.CONTENT_TYPE;
|
||||
import static org.springframework.http.HttpStatus.CREATED;
|
||||
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
|
||||
import static org.springframework.http.HttpStatus.OK;
|
||||
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
|
||||
import static org.springframework.http.MediaType.APPLICATION_PDF_VALUE;
|
||||
import static org.springframework.http.MediaType.TEXT_PLAIN_VALUE;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||
import static org.springframework.util.StringUtils.getFilenameExtension;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.alfresco.transform.client.model.TransformReply;
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
import org.alfresco.transformer.executors.RuntimeExec;
|
||||
import org.alfresco.transformer.executors.TikaJavaExecutor;
|
||||
import org.alfresco.transformer.model.FileRefEntity;
|
||||
import org.alfresco.transformer.model.FileRefResponse;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.stubbing.Answer;
|
||||
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
|
||||
import org.springframework.boot.test.mock.mockito.SpyBean;
|
||||
import org.springframework.core.io.FileSystemResource;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
import org.springframework.test.web.servlet.MvcResult;
|
||||
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
||||
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
|
||||
|
||||
/**
|
||||
* Test the TikaController without a server.
|
||||
* Super class includes tests for the AbstractTransformerController.
|
||||
*/
|
||||
@RunWith(SpringRunner.class)
|
||||
@WebMvcTest(TikaController.class)
|
||||
public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
{
|
||||
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
|
||||
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
|
||||
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
|
||||
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
|
||||
"\n" +
|
||||
"The quick brown fox jumps over the lazy dogs";
|
||||
private static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
|
||||
|
||||
@Mock
|
||||
private RuntimeExec.ExecutionResult mockExecutionResult;
|
||||
|
||||
@Mock
|
||||
private RuntimeExec mockTransformCommand;
|
||||
|
||||
@Mock
|
||||
private RuntimeExec mockCheckCommand;
|
||||
|
||||
@SpyBean
|
||||
private TikaController controller;
|
||||
|
||||
private String targetEncoding = "UTF-8";
|
||||
private String targetMimetype = MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
@Before
|
||||
public void before()
|
||||
{
|
||||
sourceExtension = "pdf";
|
||||
targetExtension = "txt";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mockTransformCommand(String sourceExtension,
|
||||
String targetExtension, String sourceMimetype,
|
||||
boolean readTargetFileBytes) throws IOException
|
||||
{
|
||||
this.sourceExtension = sourceExtension;
|
||||
this.targetExtension = targetExtension;
|
||||
this.sourceMimetype = sourceMimetype;
|
||||
|
||||
expectedOptions = null;
|
||||
expectedSourceSuffix = null;
|
||||
expectedSourceFileBytes = readTestFile(sourceExtension);
|
||||
expectedTargetFileBytes = readTargetFileBytes ? readTestFile(targetExtension) : null;
|
||||
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype,
|
||||
expectedSourceFileBytes);
|
||||
|
||||
when(mockTransformCommand.execute(any(), anyLong())).thenAnswer(
|
||||
(Answer<RuntimeExec.ExecutionResult>) invocation -> {
|
||||
Map<String, String> actualProperties = invocation.getArgument(0);
|
||||
assertEquals("There should be 3 properties", 3, actualProperties.size());
|
||||
|
||||
String actualOptions = actualProperties.get("options");
|
||||
String actualSource = actualProperties.get("source");
|
||||
String actualTarget = actualProperties.get("target");
|
||||
String actualTargetExtension = getFilenameExtension(actualTarget);
|
||||
|
||||
assertNotNull(actualSource);
|
||||
assertNotNull(actualTarget);
|
||||
if (expectedSourceSuffix != null)
|
||||
{
|
||||
assertTrue(
|
||||
"The source file \"" + actualSource + "\" should have ended in \"" + expectedSourceSuffix + "\"",
|
||||
actualSource.endsWith(expectedSourceSuffix));
|
||||
actualSource = actualSource.substring(0,
|
||||
actualSource.length() - expectedSourceSuffix.length());
|
||||
}
|
||||
|
||||
assertNotNull(actualOptions);
|
||||
if (expectedOptions != null)
|
||||
{
|
||||
assertEquals("expectedOptions", expectedOptions, actualOptions);
|
||||
}
|
||||
|
||||
Long actualTimeout = invocation.getArgument(1);
|
||||
assertNotNull(actualTimeout);
|
||||
if (expectedTimeout != null)
|
||||
{
|
||||
assertEquals("expectedTimeout", expectedTimeout, actualTimeout);
|
||||
}
|
||||
|
||||
// Copy a test file into the target file location if it exists
|
||||
int i = actualTarget.lastIndexOf('_');
|
||||
if (i >= 0)
|
||||
{
|
||||
String testFilename = actualTarget.substring(i + 1);
|
||||
File testFile = getTestFile(testFilename, false);
|
||||
File targetFile = new File(actualTarget);
|
||||
generateTargetFileFromResourceFile(actualTargetExtension, testFile,
|
||||
targetFile);
|
||||
}
|
||||
|
||||
// Check the supplied source file has not been changed.
|
||||
byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
|
||||
assertArrayEquals("Source file is not the same", expectedSourceFileBytes,
|
||||
actualSourceFileBytes);
|
||||
|
||||
return mockExecutionResult;
|
||||
});
|
||||
|
||||
when(mockExecutionResult.getExitValue()).thenReturn(0);
|
||||
when(mockExecutionResult.getStdErr()).thenReturn("STDERROR");
|
||||
when(mockExecutionResult.getStdOut()).thenReturn("STDOUT");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AbstractTransformerController getController()
|
||||
{
|
||||
return controller;
|
||||
}
|
||||
|
||||
private void transform(String transform, String sourceExtension, String targetExtension,
|
||||
String sourceMimetype, String targetMimetype,
|
||||
Boolean includeContents, String expectedContentContains) throws Exception
|
||||
{
|
||||
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
|
||||
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
|
||||
this.targetMimetype = targetMimetype;
|
||||
|
||||
System.out.println("Test " + transform + " " + sourceExtension + " to " + targetExtension);
|
||||
MockHttpServletRequestBuilder requestBuilder = includeContents == null
|
||||
? mockMvcRequest("/transform", sourceFile,
|
||||
"targetExtension", this.targetExtension)
|
||||
: mockMvcRequest("/transform", sourceFile,
|
||||
"targetExtension", this.targetExtension, "includeContents", includeContents.toString());
|
||||
MvcResult result = mockMvc.perform(requestBuilder)
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition",
|
||||
"attachment; filename*= UTF-8''quick." + this.targetExtension)).
|
||||
andReturn();
|
||||
String content = result.getResponse().getContentAsString();
|
||||
assertTrue("The content did not include \"" + expectedContentContains,
|
||||
content.contains(expectedContentContains));
|
||||
}
|
||||
|
||||
@Override
|
||||
// Add extra required parameters to the request.
|
||||
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
|
||||
String... params)
|
||||
{
|
||||
return super.mockMvcRequest(url, sourceFile, params)
|
||||
.param("targetEncoding", targetEncoding)
|
||||
.param("targetMimetype", targetMimetype)
|
||||
.param("sourceMimetype", sourceMimetype);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void simpleTransformTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.simpleTransformTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void testDelayTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.testDelayTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void noTargetFileTest()
|
||||
{
|
||||
// Ignore the test in super class as the Tika transforms are real rather than mocked up.
|
||||
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
|
||||
}
|
||||
|
||||
// --- Super class tests (need modified setup) ---
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void dotDotSourceFilenameTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.dotDotSourceFilenameTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void noExtensionSourceFilenameTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.noExtensionSourceFilenameTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void badSourceFilenameTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.badSourceFilenameTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void blankSourceFilenameTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.blankSourceFilenameTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void noTargetExtensionTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.noTargetExtensionTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void calculateMaxTime() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
super.calculateMaxTime();
|
||||
}
|
||||
|
||||
// --- General Tika tests ---
|
||||
|
||||
@Test
|
||||
public void badEncodingTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
targetEncoding = "rubbish";
|
||||
mockMvc.perform(
|
||||
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
|
||||
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
|
||||
}
|
||||
|
||||
// --- Archive ---
|
||||
|
||||
@Test
|
||||
public void zipToTextArchiveTest() throws Exception
|
||||
{
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, false,
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void zipToTextIncludeArchiveTest() throws Exception
|
||||
{
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, true,
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"The quick brown fox jumps over the lazy dog\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"The quick brown fox jumps over the lazy dog" +
|
||||
"\n" +
|
||||
"\n");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void zipToTextExcludeArchiveTest() throws Exception
|
||||
{
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
|
||||
false, "\n" +
|
||||
"folder/subfolder/quick.jpg\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.doc\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.txt\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.xml\n" +
|
||||
"\n");
|
||||
}
|
||||
|
||||
// --- OutlookMsg ---
|
||||
|
||||
@Test
|
||||
public void msgToTxtOutlookMsgTest() throws Exception
|
||||
{
|
||||
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_MSG_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- PdfBox ---
|
||||
|
||||
@Test
|
||||
public void pdfToTxtPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToCsvPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToXmlPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null,
|
||||
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToXhtmlPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null,
|
||||
EXPECTED_XHTML_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToHtmlPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null,
|
||||
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||
}
|
||||
|
||||
// --- Office ---
|
||||
|
||||
@Test
|
||||
public void msgToTxtOfficeTest() throws Exception
|
||||
{
|
||||
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_MSG_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void docToTxtOfficeTest() throws Exception
|
||||
{
|
||||
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- Poi ---
|
||||
|
||||
@Test
|
||||
public void xslxToCsvPoiTest() throws Exception
|
||||
{
|
||||
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null,
|
||||
EXPECTED_CSV_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- OOXML ---
|
||||
|
||||
@Test
|
||||
public void docxToTxtOoXmlTest() throws Exception
|
||||
{
|
||||
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pptxToTxtOoXmlTest() throws Exception
|
||||
{
|
||||
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- TikaAuto ---
|
||||
|
||||
@Test
|
||||
public void ppxtToTxtTikaAutoTest() throws Exception
|
||||
{
|
||||
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void doctToTxtTikaAutoTest() throws Exception
|
||||
{
|
||||
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- TextMining ---
|
||||
|
||||
@Test
|
||||
public void docToTxtTextMiningTest() throws Exception
|
||||
{
|
||||
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToTxtExtractBookmarksTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
mockMvc.perform(
|
||||
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param(
|
||||
"notExtractBookmarksText", "true"))
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition",
|
||||
"attachment; filename*= UTF-8''quick." + targetExtension));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void updateTransformRequestWithSpecificOptions(TransformRequest transformRequest)
|
||||
{
|
||||
transformRequest.setSourceExtension(sourceExtension);
|
||||
transformRequest.setTargetExtension(targetExtension);
|
||||
transformRequest.setSourceMediaType(APPLICATION_PDF_VALUE);
|
||||
transformRequest.setTargetMediaType(TEXT_PLAIN_VALUE);
|
||||
transformRequest.getTransformRequestOptions().put("targetEncoding", "UTF-8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPojoTransform() throws Exception
|
||||
{
|
||||
// Files
|
||||
String sourceFileRef = UUID.randomUUID().toString();
|
||||
File sourceFile = getTestFile("quick." + sourceExtension, true);
|
||||
String targetFileRef = UUID.randomUUID().toString();
|
||||
|
||||
// Transformation Request POJO
|
||||
TransformRequest transformRequest = new TransformRequest();
|
||||
transformRequest.setRequestId("1");
|
||||
transformRequest.setSchema(1);
|
||||
transformRequest.setClientData("Alfresco Digital Business Platform");
|
||||
transformRequest.setTransformRequestOptions(new HashMap<>());
|
||||
transformRequest.setSourceReference(sourceFileRef);
|
||||
transformRequest.setSourceExtension(sourceExtension);
|
||||
transformRequest.setSourceSize(sourceFile.length());
|
||||
transformRequest.setTargetExtension(targetExtension);
|
||||
transformRequest.setSourceMediaType(sourceMimetype);
|
||||
|
||||
// HTTP Request
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.set(CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
|
||||
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
|
||||
sourceFile), headers, OK);
|
||||
|
||||
when(alfrescoSharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
|
||||
when(alfrescoSharedFileStoreClient.saveFile(any()))
|
||||
.thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
|
||||
when(mockExecutionResult.getExitValue()).thenReturn(0);
|
||||
|
||||
// Update the Transformation Request with any specific params before sending it
|
||||
updateTransformRequestWithSpecificOptions(transformRequest);
|
||||
|
||||
// Serialize and call the transformer
|
||||
String tr = objectMapper.writeValueAsString(transformRequest);
|
||||
String transformationReplyAsString = mockMvc
|
||||
.perform(MockMvcRequestBuilders
|
||||
.post("/transform")
|
||||
.header(ACCEPT, APPLICATION_JSON_VALUE)
|
||||
.header(CONTENT_TYPE, APPLICATION_JSON_VALUE)
|
||||
.content(tr))
|
||||
.andExpect(status().is(CREATED.value()))
|
||||
.andReturn().getResponse().getContentAsString();
|
||||
|
||||
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,
|
||||
TransformReply.class);
|
||||
|
||||
// Assert the reply
|
||||
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
|
||||
assertEquals(transformRequest.getClientData(), transformReply.getClientData());
|
||||
assertEquals(transformRequest.getSchema(), transformReply.getSchema());
|
||||
}
|
||||
}
|
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA;
|
||||
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
|
||||
/**
|
||||
* Tests TikaController with a server test harness.
|
||||
*/
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
|
||||
public class TikaHttpRequestTest extends AbstractHttpRequestTest
|
||||
{
|
||||
@Override
|
||||
protected String getTransformerName()
|
||||
{
|
||||
return "Tika";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getSourceExtension()
|
||||
{
|
||||
return "pdf";
|
||||
}
|
||||
|
||||
// Override method as Tika requires sourceMimetype
|
||||
// If not provided then sourceMimetype request parameter error will be thrown.
|
||||
@Override
|
||||
protected void assertTransformError(boolean addFile, String errorMessage)
|
||||
{
|
||||
LinkedMultiValueMap<String, Object> parameters = new LinkedMultiValueMap<>();
|
||||
if (addFile)
|
||||
{
|
||||
parameters.add("file", new ClassPathResource("quick." + getSourceExtension()));
|
||||
}
|
||||
parameters.add("sourceMimetype", "application/pdf");
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MULTIPART_FORM_DATA);
|
||||
HttpEntity<LinkedMultiValueMap<String, Object>> entity = new HttpEntity<>(parameters,
|
||||
headers);
|
||||
super.sendTranformationRequest(entity, errorMessage);
|
||||
}
|
||||
}
|
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
/**
|
||||
* @author Lucian Tuca
|
||||
* created on 15/01/2019
|
||||
*/
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
|
||||
properties = {"activemq.url=nio://localhost:61616"})
|
||||
public class TikaQueueTransformServiceIT extends AbstractQueueTransformServiceIT
|
||||
{
|
||||
@Override
|
||||
protected TransformRequest buildRequest()
|
||||
{
|
||||
return TransformRequest
|
||||
.builder()
|
||||
.withRequestId(UUID.randomUUID().toString())
|
||||
.withSourceMediaType(MIMETYPE_OPENXML_WORDPROCESSING)
|
||||
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
|
||||
.withTargetExtension("txt")
|
||||
.withSchema(1)
|
||||
.withClientData("ACS")
|
||||
.withSourceReference(UUID.randomUUID().toString())
|
||||
.withSourceSize(32L).build();
|
||||
}
|
||||
}
|
@@ -0,0 +1,174 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static java.text.MessageFormat.format;
|
||||
import static java.util.function.Function.identity;
|
||||
import static java.util.stream.Collectors.toSet;
|
||||
import static org.alfresco.transformer.EngineClient.sendTRequest;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.springframework.http.HttpStatus.OK;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Triple;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
/**
|
||||
* @author Cezar Leahu
|
||||
*/
|
||||
@RunWith(Parameterized.class)
|
||||
public class TikaTransformationIT
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(TikaTransformationIT.class);
|
||||
private static final String ENGINE_URL = "http://localhost:8090";
|
||||
private static final Map<String, String> extensionMimetype = ImmutableMap.of(
|
||||
"html", "text/html",
|
||||
"txt", "text/plain",
|
||||
"xhtml", "application/xhtml+xml",
|
||||
"xml", "text/xml");
|
||||
|
||||
private final String sourceFile;
|
||||
private final String targetExtension;
|
||||
private final String targetMimetype;
|
||||
private final String sourceMimetype;
|
||||
|
||||
public TikaTransformationIT(final Triple<String, String, String> entry)
|
||||
{
|
||||
sourceFile = entry.getLeft();
|
||||
targetExtension = entry.getMiddle();
|
||||
targetMimetype = extensionMimetype.get(entry.getMiddle());
|
||||
sourceMimetype = entry.getRight();
|
||||
}
|
||||
|
||||
// TODO unit tests for the following file types (for which is difficult to find file samples):
|
||||
// *.ogx (application/ogg)
|
||||
// *.cpio (application/x-cpio)
|
||||
// *.cdf (application/x-netcdf)
|
||||
// *.hdf (application/x-hdf)
|
||||
|
||||
@Parameterized.Parameters
|
||||
public static Set<Triple<String, String, String>> engineTransformations()
|
||||
{
|
||||
return Stream
|
||||
.of(
|
||||
allTargets("quick.doc", "application/msword"),
|
||||
allTargets("quick.docx",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
|
||||
allTargets("quick.html", "text/html"),
|
||||
allTargets("quick.jar", "application/java-archive"),
|
||||
allTargets("quick.java", "text/x-java-source"),
|
||||
Stream.of(
|
||||
Triple.of("quick.key", "html", "application/vnd.apple.keynote"),
|
||||
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
|
||||
// Triple.of("quick.key", "txt", "TikaAuto"),
|
||||
Triple.of("quick.key", "xhtml", "application/vnd.apple.keynote"),
|
||||
Triple.of("quick.key", "xml", "application/vnd.apple.keynote")
|
||||
),
|
||||
allTargets("quick.msg", "application/vnd.ms-outlook"),
|
||||
Stream.of(
|
||||
Triple.of("quick.numbers", "html", "application/vnd.apple.numbers"),
|
||||
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
|
||||
// Triple.of("quick.numbers", "txt", "TikaAuto"),
|
||||
Triple.of("quick.numbers", "xhtml", "application/vnd.apple.numbers"),
|
||||
Triple.of("quick.numbers", "xml", "application/vnd.apple.numbers")
|
||||
),
|
||||
allTargets("quick.odp", "application/vnd.oasis.opendocument.presentation"),
|
||||
allTargets("quick.ods", "application/vnd.oasis.opendocument.spreadsheet"),
|
||||
allTargets("quick.odt", "application/vnd.oasis.opendocument.text"),
|
||||
allTargets("quick.otp", "application/vnd.oasis.opendocument.presentation-template"),
|
||||
allTargets("quick.ots", "application/vnd.oasis.opendocument.spreadsheet-template"),
|
||||
allTargets("quick.ott", "application/vnd.oasis.opendocument.text-template"),
|
||||
Stream.of(
|
||||
Triple.of("quick.pages", "html", "application/vnd.apple.pages"),
|
||||
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
|
||||
// Triple.of("quick.pages", "txt", "TikaAuto"),
|
||||
Triple.of("quick.pages", "xhtml", "application/vnd.apple.pages"),
|
||||
Triple.of("quick.pages", "xml", "application/vnd.apple.pages")
|
||||
),
|
||||
allTargets("quick.pdf", "application/pdf"),
|
||||
allTargets("quick.ppt", "application/vnd.ms-powerpoint"),
|
||||
allTargets("quick.pptx",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
|
||||
allTargets("quick.sxw", "application/vnd.sun.xml.writer"),
|
||||
allTargets("quick.txt", "text/plain"),
|
||||
allTargets("quick.vsd", "application/vnd.visio"),
|
||||
allTargets("quick.xls", "application/vnd.ms-excel"),
|
||||
allTargets("quick.xslx",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
|
||||
allTargets("quick.zip", "application/zip"),
|
||||
allTargets("quick.tar", "application/x-tar"),
|
||||
allTargets("sample.rtf", "application/rtf"),
|
||||
allTargets("quick.xml", "text/xml"),
|
||||
allTargets("sample.xhtml.txt", "application/xhtml+xml"),
|
||||
allTargets("sample.rss", "application/rss+xml"),
|
||||
//allTargets("quick.rar", "application/x-rar-compressed"),
|
||||
allTargets("quick.tar.gz", "application/x-gzip"))
|
||||
.flatMap(identity())
|
||||
.collect(toSet());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTransformation()
|
||||
{
|
||||
final String descriptor = format("Transform ({0} -> {1}, {2}, sourceMimetype={3})",
|
||||
sourceFile, targetMimetype, targetExtension, sourceMimetype);
|
||||
|
||||
try
|
||||
{
|
||||
final ResponseEntity<Resource> response = sendTRequest(ENGINE_URL, sourceFile, null,
|
||||
targetMimetype, targetExtension, ImmutableMap.of(
|
||||
"targetEncoding", "UTF-8",
|
||||
"sourceMimetype", sourceMimetype));
|
||||
assertEquals(descriptor, OK, response.getStatusCode());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
fail(descriptor + " exception: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static Stream<Triple<String, String, String>> allTargets(final String sourceFile,
|
||||
final String sourceMimetype)
|
||||
{
|
||||
return extensionMimetype
|
||||
.keySet()
|
||||
.stream()
|
||||
.map(k -> Triple.of(sourceFile, k, sourceMimetype));
|
||||
}
|
||||
}
|
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"transformOptions": {
|
||||
"engineXOptions": [
|
||||
{"value": {"name": "page"}},
|
||||
{"value": {"name": "width"}},
|
||||
{"group": {"transformOptions": [
|
||||
{"value": {"name": "cropGravity"}}
|
||||
]}}
|
||||
]
|
||||
},
|
||||
"transformers": [
|
||||
{
|
||||
"transformerName": "engineX",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
|
||||
],
|
||||
"transformOptions": [
|
||||
"engineXOptions"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"transformOptions": {},
|
||||
"transformers": [
|
||||
{
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"transformers": [
|
||||
{
|
||||
"transformerName": "engineX",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"transformOptions": {
|
||||
"engineXOptions": [
|
||||
{"value": {"name": "page"}},
|
||||
{"value": {"name": "page"}},
|
||||
{"value": {"name": "width"}},
|
||||
{"group": {"transformOptions": [
|
||||
{"value": {"name": "cropGravity"}}
|
||||
]}}
|
||||
]
|
||||
},
|
||||
"transformers": [
|
||||
{
|
||||
"transformerName": "engineX",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" },
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" },
|
||||
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
|
||||
],
|
||||
"transformOptions": [
|
||||
"engineXOptions",
|
||||
"engineXOptions"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,17 @@
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
|
||||
<title>The quick brown fox jumps over the lazy dog</title>
|
||||
<meta name="author" content="Nevin Nollop">
|
||||
<meta name="keywords" content="Pangram, fox, dog">
|
||||
<meta name="description" content="Gym class featuring a brown fox and lazy dog">
|
||||
</head>
|
||||
|
||||
<body lang=EN-US>
|
||||
|
||||
The quick brown fox jumps over the lazy dog
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
Binary file not shown.
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
|
||||
public class quick
|
||||
{
|
||||
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
|
||||
The quick brown fox jumps over the lazy dog
|
||||
|
||||
|
||||
|
||||
|
||||
Blank Page
|
||||
|
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
|
||||
<document>
|
||||
<text>The quick brown fox jumps over the lazy dog</text>
|
||||
</document>
|
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<rss version="2.0">
|
||||
|
||||
<channel>
|
||||
<title>W3Schools Home Page</title>
|
||||
<link>https://www.w3schools.com</link>
|
||||
<description>Free web building tutorials</description>
|
||||
<item>
|
||||
<title>RSS Tutorial</title>
|
||||
<link>https://www.w3schools.com/xml/xml_rss.asp</link>
|
||||
<description>New RSS tutorial on W3Schools</description>
|
||||
</item>
|
||||
<item>
|
||||
<title>XML Tutorial</title>
|
||||
<link>https://www.w3schools.com/xml</link>
|
||||
<description>New XML tutorial on W3Schools</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
@@ -0,0 +1,214 @@
|
||||
{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff31507\deff0\stshfdbch31506\stshfloch31506\stshfhich31506\stshfbi31507\deflang1033\deflangfe1033\themelang1048\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}
|
||||
{\f37\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
|
||||
{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0302020204030204}Calibri Light;}
|
||||
{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
|
||||
{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}
|
||||
{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f42\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f43\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
|
||||
{\f45\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f46\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f47\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f48\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
|
||||
{\f49\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f50\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f412\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\f413\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}
|
||||
{\f415\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\f416\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}{\f417\fbidi \fswiss\fcharset177\fprq2 Calibri (Hebrew);}{\f418\fbidi \fswiss\fcharset178\fprq2 Calibri (Arabic);}
|
||||
{\f419\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\f420\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
|
||||
{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
|
||||
{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
|
||||
{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
|
||||
{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
|
||||
{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
|
||||
{\fhimajor\f31528\fbidi \fswiss\fcharset238\fprq2 Calibri Light CE;}{\fhimajor\f31529\fbidi \fswiss\fcharset204\fprq2 Calibri Light Cyr;}{\fhimajor\f31531\fbidi \fswiss\fcharset161\fprq2 Calibri Light Greek;}
|
||||
{\fhimajor\f31532\fbidi \fswiss\fcharset162\fprq2 Calibri Light Tur;}{\fhimajor\f31533\fbidi \fswiss\fcharset177\fprq2 Calibri Light (Hebrew);}{\fhimajor\f31534\fbidi \fswiss\fcharset178\fprq2 Calibri Light (Arabic);}
|
||||
{\fhimajor\f31535\fbidi \fswiss\fcharset186\fprq2 Calibri Light Baltic;}{\fhimajor\f31536\fbidi \fswiss\fcharset163\fprq2 Calibri Light (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
|
||||
{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
|
||||
{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
|
||||
{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
|
||||
{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
|
||||
{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
|
||||
{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
|
||||
{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
|
||||
{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}
|
||||
{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}
|
||||
{\fhiminor\f31573\fbidi \fswiss\fcharset177\fprq2 Calibri (Hebrew);}{\fhiminor\f31574\fbidi \fswiss\fcharset178\fprq2 Calibri (Arabic);}{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}
|
||||
{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
|
||||
{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
|
||||
{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}
|
||||
{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;
|
||||
\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\red0\green0\blue0;\red0\green0\blue0;}{\*\defchp \f31506\fs24 }{\*\defpap
|
||||
\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0
|
||||
\f31506\fs24\lang1048\langfe1033\cgrid\langnp1048\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\*
|
||||
\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
|
||||
\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0 \f31506\fs24\lang1048\langfe1033\cgrid\langnp1048\langfenp1033 \snext11 \ssemihidden \sunhideused Normal Table;}}
|
||||
{\*\rsidtbl \rsid2693434\rsid4215609\rsid7808163\rsid16662808}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\author Cezar Leahu}{\operator Cezar Leahu}
|
||||
{\creatim\yr2019\mo8\dy29\hr15\min41}{\revtim\yr2019\mo8\dy29\hr15\min43}{\version2}{\edmins2}{\nofpages1}{\nofwords17}{\nofchars102}{\nofcharsws118}{\vern2821}}{\*\userprops {\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_Enabled}\proptype30
|
||||
{\staticval true}{\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_SetDate}\proptype30{\staticval 2019-08-29T12:41:57+0200}{\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_Method}\proptype30{\staticval Standard}{\propname MSIP_Label_ffb
|
||||
520d8-df98-444b-9f20-0dd9d08cf98c_Name}\proptype30{\staticval ffb520d8-df98-444b-9f20-0dd9d08cf98c}{\propname MSIP_Label_ffb520d8-df98-444b-9f20-0dd9d08cf98c_SiteId}\proptype30{\staticval 65bc0b3b-7ca2-488c-ba9c-b1bebdd49af6}{\propname MSIP_Label_ffb520d8
|
||||
-df98-444b-9f20-0dd9d08cf98c_ActionId}\proptype30{\staticval 6097ae90-22f7-448a-b9b7-0000b0413133}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}}
|
||||
\paperw11900\paperh16840\margl1417\margr1417\margt1417\margb1417\gutter0\ltrsect
|
||||
\widowctrl\ftnbj\aenddoc\trackmoves0\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen
|
||||
\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1417\dgvorigin1417\dghshow1\dgvshow1
|
||||
\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct
|
||||
\asianbrkrule\rsidroot4215609\newtblstyruls\nogrowautofit\usenormstyforlist\noindnmbrts\felnbrelev\nocxsptable\indrlsweleven\noafcnsttbl\afelev\utinl\hwelev\spltpgpar\notcvasp\notbrkcnstfrctbl\notvatxbx\krnprsnet\cachedcolbal \nouicompat \fet0
|
||||
{\*\wgrffmtfilter 2450}\nofeaturethrottle1\ilfomacatclnup0\ltrpar \sectd \ltrsect\linex0\headery708\footery708\colsx708\endnhere\sectlinegrid360\sectdefaultcl\sectrsid2693434\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}
|
||||
{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}
|
||||
{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9
|
||||
\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid16662808 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0
|
||||
\f31506\fs24\lang1048\langfe1033\cgrid\langnp1048\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid16662808
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par The quick brown fox jumps over the lazy dog
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par The quick brown fox jumps over the lazy dog
|
||||
\par
|
||||
\par
|
||||
\par
|
||||
\par }\pard \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4996987
|
||||
\par }{\*\themedata 504b030414000600080000002100e9de0fbfff0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb4ec3301045f748fc83e52d4a
|
||||
9cb2400825e982c78ec7a27cc0c8992416c9d8b2a755fbf74cd25442a820166c2cd933f79e3be372bd1f07b5c3989ca74aaff2422b24eb1b475da5df374fd9ad
|
||||
5689811a183c61a50f98f4babebc2837878049899a52a57be670674cb23d8e90721f90a4d2fa3802cb35762680fd800ecd7551dc18eb899138e3c943d7e503b6
|
||||
b01d583deee5f99824e290b4ba3f364eac4a430883b3c092d4eca8f946c916422ecab927f52ea42b89a1cd59c254f919b0e85e6535d135a8de20f20b8c12c3b0
|
||||
0c895fcf6720192de6bf3b9e89ecdbd6596cbcdd8eb28e7c365ecc4ec1ff1460f53fe813d3cc7f5b7f020000ffff0300504b030414000600080000002100a5d6
|
||||
a7e7c0000000360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4f
|
||||
c7060abb0884a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b6309512
|
||||
0f88d94fbc52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462
|
||||
a1a82fe353bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f746865
|
||||
6d652f7468656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b
|
||||
4b0d592c9c070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b
|
||||
4757e8d3f729e245eb2b260a0238fd010000ffff0300504b030414000600080000002100b6f4679893070000c9200000160000007468656d652f7468656d652f
|
||||
7468656d65312e786d6cec59cd8b1bc915bf07f23f347d97f5d5ad8fc1f2a24fcfda33b6b164873dd648a5eef2547789aad28cc56208de532e81c026e49085bd
|
||||
ed21842cecc22eb9e48f31d8249b3f22afaa5bdd5552c99e191c3061463074977eefd5afde7bf5de53d5ddcf5e26d4bbc05c1096f6fcfa9d9aefe174ce16248d
|
||||
7afeb3d9a4d2f13d2151ba4094a5b8e76fb0f03fbbf7eb5fdd454732c609f6403e1547a8e7c752ae8eaa5531876124eeb0154ee1bb25e30992f0caa3ea82a34b
|
||||
d09bd06aa3566b55134452df4b51026a1f2f97648ebd9952e9dfdb2a1f53784da5500373caa74a35b6243476715e5708b11143cabd0b447b3eccb3609733fc52
|
||||
fa1e4542c2173dbfa6fffceabdbb5574940b517940d6909be8bf5c2e17589c37f49c3c3a2b260d823068f50bfd1a40e53e6edc1eb7c6ad429f06a0f91c569a71
|
||||
b175b61bc320c71aa0ecd1a17bd41e35eb16ded0dfdce3dc0fd5c7c26b50a63fd8c34f2643b0a285d7a00c1feee1c3417730b2f56b50866fede1dbb5fe28685b
|
||||
fa3528a6243ddf43d7c25673b85d6d0159327aec8477c360d26ee4ca4b144443115d6a8a254be5a1584bd00bc6270050408a24493db959e1259a43140f112567
|
||||
9c7827248a21f056286502866b8ddaa4d684ffea13e827ed5174849121ad780113b137a4f87862cec94af6fc07a0d537206f7ffef9cdeb1fdfbcfee9cd575fbd
|
||||
79fdf77c6eadca923b466964cafdf2dd1ffef3cd6fbd7ffff0ed2f5fff319b7a172f4cfcbbbffdeedd3ffef93ef5b0e2d2146ffff4fdbb1fbf7ffbe7dfffebaf
|
||||
5f3bb4f7393a33e1339260e13dc297de5396c0021dfcf119bf9ec42c46c494e8a791402952b338f48f656ca11f6d10450edc00db767cce21d5b880f7d72f2cc2
|
||||
d398af2571687c182716f094313a60dc6985876a2ec3ccb3751ab927e76b13f714a10bd7dc43945a5e1eaf579063894be530c616cd2714a5124538c5d253dfb1
|
||||
738c1dabfb8210cbaea764ce99604be97d41bc01224e93ccc899154da5d03149c02f1b1741f0b7659bd3e7de8051d7aa47f8c246c2de40d4417e86a965c6fb68
|
||||
2d51e252394309350d7e8264ec2239ddf0b9891b0b099e8e3065de78818570c93ce6b05ec3e90f21cdb8dd7e4a37898de4929cbb749e20c64ce4889d0f6394ac
|
||||
5cd829496313fbb938871045de13265df05366ef10f50e7e40e941773f27d872f787b3c133c8b026a53240d4376beef0e57dccacf89d6ee8126157aae9f3c44a
|
||||
b17d4e9cd131584756689f604cd1255a60ec3dfbdcc160c05696cd4bd20f62c82ac7d815580f901dabea3dc5027a25d5dcece7c91322ac909de2881de073bad9
|
||||
493c1b9426881fd2fc08bc6eda7c0ca52e7105c0633a3f37818f08f480102f4ea33c16a0c308ee835a9fc4c82a60ea5db8e375c32dff5d658fc1be7c61d1b8c2
|
||||
be04197c6d1948eca6cc7b6d3343d49aa00c9819822ec3956e41c4727f29a28aab165b3be596f6a62ddd00dd91d5f42424fd6007b4d3fb84ffbbde073a8cb77f
|
||||
f9c6b10f3e4ebfe3566c25ab6b763a8792c9f14e7f7308b7dbd50c195f904fbfa919a175fa04431dd9cf58b73dcd6d4fe3ffdff73487f6f36d2773a8dfb8ed64
|
||||
7ce8306e3b99fc70e5e3743265f3027d8d3af0c80e7af4b14f72f0d46749289dca0dc527421ffc08f83db398c0a092d3279eb838055cc5f0a8ca1c4c60e1228e
|
||||
b48cc799fc0d91f134462b381daafb4a492472d591f0564cc0a1911e76ea5678ba4e4ed9223becacd7d5c16656590592e5782d2cc6e1a04a66e856bb3cc02bd4
|
||||
6bb6913e68dd1250b2d721614c6693683a48b4b783ca48fa58178ce620a157f65158741d2c3a4afdd6557b2c805ae115f8c1edc1cff49e1f06200242701e07cd
|
||||
f942f92973f5d6bbda991fd3d3878c69450034d8db08283ddd555c0f2e4fad2e0bb52b78da2261849b4d425b46377822869fc17974aad1abd0b8aeafbba54b2d
|
||||
7aca147a3e08ad9246bbf33e1637f535c8ede6069a9a9982a6de65cf6f35430899395af5fc251c1ac363b282d811ea3717a211dcbccc25cf36fc4d32cb8a0b39
|
||||
4222ce0cae934e960d122231f728497abe5a7ee1069aea1ca2b9d51b90103e59725d482b9f1a3970baed64bc5ce2b934dd6e8c284b67af90e1b35ce1fc568bdf
|
||||
1cac24d91adc3d8d1797de195df3a708422c6cd795011744c0dd413db3e682c0655891c8caf8db294c79da356fa3740c65e388ae62945714339967709dca0b3a
|
||||
faadb081f196af190c6a98242f8467912ab0a651ad6a5a548d8cc3c1aafb6121653923699635d3ca2aaa6abab39835c3b60cecd8f26645de60b53531e434b3c2
|
||||
67a97b37e576b7b96ea74f28aa0418bcb09fa3ea5ea12018d4cac92c6a8af17e1a56393b1fb56bc776811fa07695226164fdd656ed8edd8a1ae19c0e066f54f9
|
||||
416e376a6168b9ed2bb5a5f5adb979b1cdce5e40f2184197bba6526857c2c92e47d0104d754f92a50dd8222f65be35e0c95b73d2f3bfac85fd60d80887955a27
|
||||
1c57826650ab74c27eb3d20fc3667d1cd66ba341e31514161927f530bbb19fc00506dde4f7f67a7cefee3ed9ded1dc99b3a4caf4dd7c5513d777f7f5c6e1bb7b
|
||||
8f40d2f9b2d598749bdd41abd26df627956034e854bac3d6a0326a0ddba3c9681876ba9357be77a1c141bf390c5ae34ea5551f0e2b41aba6e877ba9576d068f4
|
||||
8376bf330efaaff23606569ea58fdc16605ecdebde7f010000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d65
|
||||
2f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d36
|
||||
3f2451eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e
|
||||
3198720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d985
|
||||
0528a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100e9de0fbfff0000001c020000130000000000000000000000
|
||||
0000000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b00000000000000000000
|
||||
000000300100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c0000000000000000000000000019020000
|
||||
7468656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d0014000600080000002100b6f4679893070000c92000001600000000000000
|
||||
000000000000d60200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b01000027000000
|
||||
000000000000000000009d0a00007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000980b00000000}
|
||||
{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d
|
||||
617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169
|
||||
6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363
|
||||
656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e}
|
||||
{\*\latentstyles\lsdstimax375\lsdlockeddef0\lsdsemihiddendef0\lsdunhideuseddef0\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 1;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 5;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 9;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 1;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 2;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 3;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 4;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 5;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 6;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 7;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 8;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Indent;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 header;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footer;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index heading;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of figures;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope return;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation reference;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 line number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 page number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote text;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of authorities;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 macro;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toa heading;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 3;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 3;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 3;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 5;\lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Closing;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Signature;\lsdsemihidden1 \lsdunhideused1 \lsdpriority1 \lsdlocked0 Default Paragraph Font;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 4;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Message Header;\lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Salutation;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Date;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Note Heading;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 3;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Block Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 FollowedHyperlink;\lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;
|
||||
\lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Document Map;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Plain Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 E-mail Signature;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Top of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Bottom of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal (Web);\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Acronym;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Cite;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Code;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Definition;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Keyboard;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Preformatted;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Sample;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Typewriter;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Variable;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Table;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation subject;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 No List;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 1;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 2;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 2;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 3;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 2;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 6;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 2;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 6;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 2;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Contemporary;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Elegant;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Professional;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 2;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Balloon Text;\lsdpriority39 \lsdlocked0 Table Grid;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Theme;\lsdsemihidden1 \lsdlocked0 Placeholder Text;
|
||||
\lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdpriority60 \lsdlocked0 Light Shading;\lsdpriority61 \lsdlocked0 Light List;\lsdpriority62 \lsdlocked0 Light Grid;\lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdpriority64 \lsdlocked0 Medium Shading 2;
|
||||
\lsdpriority65 \lsdlocked0 Medium List 1;\lsdpriority66 \lsdlocked0 Medium List 2;\lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdpriority68 \lsdlocked0 Medium Grid 2;\lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdpriority70 \lsdlocked0 Dark List;
|
||||
\lsdpriority71 \lsdlocked0 Colorful Shading;\lsdpriority72 \lsdlocked0 Colorful List;\lsdpriority73 \lsdlocked0 Colorful Grid;\lsdpriority60 \lsdlocked0 Light Shading Accent 1;\lsdpriority61 \lsdlocked0 Light List Accent 1;
|
||||
\lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdsemihidden1 \lsdlocked0 Revision;
|
||||
\lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;
|
||||
\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;\lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;\lsdpriority72 \lsdlocked0 Colorful List Accent 1;
|
||||
\lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdpriority60 \lsdlocked0 Light Shading Accent 2;\lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;
|
||||
\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;
|
||||
\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdpriority70 \lsdlocked0 Dark List Accent 2;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;
|
||||
\lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdpriority61 \lsdlocked0 Light List Accent 3;\lsdpriority62 \lsdlocked0 Light Grid Accent 3;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;
|
||||
\lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;
|
||||
\lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;\lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdpriority60 \lsdlocked0 Light Shading Accent 4;
|
||||
\lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdpriority62 \lsdlocked0 Light Grid Accent 4;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;
|
||||
\lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdpriority70 \lsdlocked0 Dark List Accent 4;
|
||||
\lsdpriority71 \lsdlocked0 Colorful Shading Accent 4;\lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdpriority61 \lsdlocked0 Light List Accent 5;
|
||||
\lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 5;
|
||||
\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;
|
||||
\lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdpriority62 \lsdlocked0 Light Grid Accent 6;
|
||||
\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;
|
||||
\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;
|
||||
\lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis;
|
||||
\lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdsemihidden1 \lsdunhideused1 \lsdpriority37 \lsdlocked0 Bibliography;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;\lsdpriority41 \lsdlocked0 Plain Table 1;\lsdpriority42 \lsdlocked0 Plain Table 2;\lsdpriority43 \lsdlocked0 Plain Table 3;\lsdpriority44 \lsdlocked0 Plain Table 4;
|
||||
\lsdpriority45 \lsdlocked0 Plain Table 5;\lsdpriority40 \lsdlocked0 Grid Table Light;\lsdpriority46 \lsdlocked0 Grid Table 1 Light;\lsdpriority47 \lsdlocked0 Grid Table 2;\lsdpriority48 \lsdlocked0 Grid Table 3;\lsdpriority49 \lsdlocked0 Grid Table 4;
|
||||
\lsdpriority50 \lsdlocked0 Grid Table 5 Dark;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 1;
|
||||
\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 1;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 1;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 1;
|
||||
\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 1;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 2;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 2;
|
||||
\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 2;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 2;
|
||||
\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 3;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 3;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 3;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 3;
|
||||
\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 3;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 4;
|
||||
\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 4;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 4;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 4;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 4;
|
||||
\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 4;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 5;
|
||||
\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 5;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 5;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 5;
|
||||
\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 5;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 6;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 6;
|
||||
\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 6;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 6;
|
||||
\lsdpriority46 \lsdlocked0 List Table 1 Light;\lsdpriority47 \lsdlocked0 List Table 2;\lsdpriority48 \lsdlocked0 List Table 3;\lsdpriority49 \lsdlocked0 List Table 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark;
|
||||
\lsdpriority51 \lsdlocked0 List Table 6 Colorful;\lsdpriority52 \lsdlocked0 List Table 7 Colorful;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 List Table 2 Accent 1;\lsdpriority48 \lsdlocked0 List Table 3 Accent 1;
|
||||
\lsdpriority49 \lsdlocked0 List Table 4 Accent 1;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 1;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 1;
|
||||
\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 List Table 2 Accent 2;\lsdpriority48 \lsdlocked0 List Table 3 Accent 2;\lsdpriority49 \lsdlocked0 List Table 4 Accent 2;
|
||||
\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 2;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 3;
|
||||
\lsdpriority47 \lsdlocked0 List Table 2 Accent 3;\lsdpriority48 \lsdlocked0 List Table 3 Accent 3;\lsdpriority49 \lsdlocked0 List Table 4 Accent 3;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 3;
|
||||
\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 4;\lsdpriority47 \lsdlocked0 List Table 2 Accent 4;
|
||||
\lsdpriority48 \lsdlocked0 List Table 3 Accent 4;\lsdpriority49 \lsdlocked0 List Table 4 Accent 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 4;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 4;
|
||||
\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 List Table 2 Accent 5;\lsdpriority48 \lsdlocked0 List Table 3 Accent 5;
|
||||
\lsdpriority49 \lsdlocked0 List Table 4 Accent 5;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 5;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 5;
|
||||
\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 List Table 2 Accent 6;\lsdpriority48 \lsdlocked0 List Table 3 Accent 6;\lsdpriority49 \lsdlocked0 List Table 4 Accent 6;
|
||||
\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Mention;
|
||||
\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Smart Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hashtag;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Unresolved Mention;}}{\*\datastore }}
|
@@ -0,0 +1,9 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>The quick brown fox</title>
|
||||
</head>
|
||||
<body>
|
||||
The quick brown fox jumps over the lazy dog
|
||||
</body>
|
||||
</html>
|
@@ -0,0 +1,5 @@
|
||||
### Licenses
|
||||
|
||||
* Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0 or the
|
||||
[Apache 2.0.txt](https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/Apache%202.0.txt)
|
||||
file placed in the root directory of the docker image.
|
112
alfresco-transform-tika/alfresco-transform-tika/pom.xml
Normal file
112
alfresco-transform-tika/alfresco-transform-tika/pom.xml
Normal file
@@ -0,0 +1,112 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>alfresco-transform-tika</artifactId>
|
||||
<name>Alfresco Tika Transformer</name>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<parent>
|
||||
<artifactId>alfresco-transform-core</artifactId>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<version>2.2.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-transformer-base</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Tika -->
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-core</artifactId>
|
||||
<version>1.21-20190624-alfresco-patched</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-parsers</artifactId>
|
||||
<version>1.21-20190624-alfresco-patched</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.tdunning</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcprov-jdk15on</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcmail-jdk15on</artifactId>
|
||||
</exclusion>
|
||||
<!-- TODO ATS-534 check transformations not affected by this missing quartz lib -->
|
||||
<exclusion>
|
||||
<groupId>org.quartz-scheduler</groupId>
|
||||
<artifactId>quartz</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- for Apache Tika Parsers - eg. encrypted PDF -->
|
||||
<dependency>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcprov-jdk15on</artifactId>
|
||||
<version>1.64</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
<artifactId>bcmail-jdk15on</artifactId>
|
||||
<version>1.64</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache POI -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
<version>${dependency.poi.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
<version>${dependency.poi.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-scratchpad</artifactId>
|
||||
<version>${dependency.poi.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache PDFBox -->
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-failsafe-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>license-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
@@ -0,0 +1,52 @@
|
||||
# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
|
||||
#-------------------------------------------------------------------------------
|
||||
# Already used licenses in project :
|
||||
# - (MIT-style) netCDF C library license
|
||||
# - 3-Clause BSD License
|
||||
# - Apache 2.0
|
||||
# - Apache License 2.0
|
||||
# - Apache License v2
|
||||
# - Apache License v2.0
|
||||
# - Apache License, Version 2.0
|
||||
# - Apache License, version 2.0
|
||||
# - Apache Software License - Version 2.0
|
||||
# - BSD
|
||||
# - BSD 3-clause License w/nuclear disclaimer
|
||||
# - BSD 3-clause New License
|
||||
# - BSD License
|
||||
# - BSD-2-Clause
|
||||
# - Bouncy Castle Licence
|
||||
# - CDDL + GPLv2 with classpath exception
|
||||
# - CDDL, v1.0
|
||||
# - CDDL/GPLv2+CE
|
||||
# - EDL 1.0
|
||||
# - EPL 2.0
|
||||
# - Eclipse Distribution License - v 1.0
|
||||
# - Eclipse Public License - v 1.0
|
||||
# - Eclipse Public License 2.0
|
||||
# - GNU General Public License, version 2 with the GNU Classpath Exception
|
||||
# - GNU Lesser General Public License
|
||||
# - GPL2 w/ CPE
|
||||
# - LGPL, v2.1 or later
|
||||
# - LGPL, version 2.1
|
||||
# - MIT License
|
||||
# - MIT License (MIT)
|
||||
# - Mozilla Public License 1.1 (MPL 1.1)
|
||||
# - OGC copyright
|
||||
# - Public Domain
|
||||
# - Public Domain, per Creative Commons CC0
|
||||
# - Similar to Apache License but with the acknowledgment clause removed
|
||||
# - The Apache License, Version 2.0
|
||||
# - The Apache Software License, Version 2.0
|
||||
# - The BSD License
|
||||
# - The MIT License
|
||||
# - The SAX License
|
||||
# - The W3C License
|
||||
# - UnRar License
|
||||
# - lgpl
|
||||
#-------------------------------------------------------------------------------
|
||||
# Please fill the missing licenses for dependencies :
|
||||
#
|
||||
#
|
||||
#Thu Mar 26 11:14:47 GMT 2020
|
||||
net.jcip--jcip-annotations--1.0=
|
@@ -0,0 +1,859 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.executors;
|
||||
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_HTML;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_JPEG;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_PNG;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_TIFF;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_CSV;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
|
||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.TransformerConfigurationException;
|
||||
import javax.xml.transform.sax.SAXTransformerFactory;
|
||||
import javax.xml.transform.sax.TransformerHandler;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.extractor.DocumentSelector;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.EmptyParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.microsoft.OfficeParser;
|
||||
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
|
||||
import org.apache.tika.parser.pdf.PDFParser;
|
||||
import org.apache.tika.parser.pdf.PDFParserConfig;
|
||||
import org.apache.tika.parser.pkg.PackageParser;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
import org.apache.tika.sax.ExpandedTitleContentHandler;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
/**
|
||||
* Stripped down command line Tika transformers. Not actually run as a separate process, but the code fits the patten
|
||||
* used by transformers that do.
|
||||
* <pre>
|
||||
*
|
||||
* Archive 0 ms
|
||||
* 1) cpio html [100] unlimited
|
||||
* 2) cpio txt [50] unlimited
|
||||
* 3) cpio xhtml [100] unlimited
|
||||
* 4) cpio xml [100] unlimited
|
||||
* 5) jar html [100] unlimited
|
||||
* 6) jar txt [50] unlimited
|
||||
* 7) jar xhtml [100] unlimited
|
||||
* 8) jar xml [100] unlimited
|
||||
* 9) tar html [100] unlimited
|
||||
* 10) tar txt [50] unlimited
|
||||
* 11) tar xhtml [100] unlimited
|
||||
* 12) tar xml [100] unlimited
|
||||
* 13) zip html [100] unlimited
|
||||
* 14) zip txt [50] unlimited
|
||||
* 15) zip xhtml [100] unlimited
|
||||
* 16) zip xml [100] unlimited
|
||||
* PdfBox 0 ms
|
||||
* 1) pdf html [110] unlimited
|
||||
* 2) pdf txt [50] 25 MB
|
||||
* 3) pdf xhtml [110] unlimited
|
||||
* 4) pdf xml [110] unlimited
|
||||
* OutlookMsg 0 ms
|
||||
* 1) msg html [125] unlimited
|
||||
* 2) msg txt [125] unlimited
|
||||
* 3) msg xhtml [125] unlimited
|
||||
* 4) msg xml [125] unlimited
|
||||
* PdfBox 0 ms
|
||||
* 1) pdf html [110] unlimited
|
||||
* 2) pdf txt [50] 25 MB
|
||||
* 3) pdf xhtml [110] unlimited
|
||||
* 4) pdf xml [110] unlimited
|
||||
* Office 0 ms
|
||||
* 1) doc html [130] unlimited
|
||||
* 2) doc txt [130] unlimited
|
||||
* 3) doc xhtml [130] unlimited
|
||||
* 4) doc xml [130] unlimited
|
||||
* 5) mpp html [130] unlimited
|
||||
* 6) mpp txt [130] unlimited
|
||||
* 7) mpp xhtml [130] unlimited
|
||||
* 8) mpp xml [130] unlimited
|
||||
* 9) msg html [130] unlimited
|
||||
* 10) msg txt [130] unlimited
|
||||
* 11) msg xhtml [130] unlimited
|
||||
* 12) msg xml [130] unlimited
|
||||
* 13) ppt html [130] unlimited
|
||||
* 14) ppt txt [130] unlimited
|
||||
* 15) ppt xhtml [130] unlimited
|
||||
* 16) ppt xml [130] unlimited
|
||||
* 17) vsd html [130] unlimited
|
||||
* 18) vsd txt [130] unlimited
|
||||
* 19) vsd xhtml [130] unlimited
|
||||
* 20) vsd xml [130] unlimited
|
||||
* Poi 0 ms
|
||||
* 1) xls csv [130] unlimited
|
||||
* 2) xls html [130] unlimited
|
||||
* 3) xls txt [130] unlimited
|
||||
* 4) xls xhtml [130] unlimited
|
||||
* 5) xls xml [130] unlimited
|
||||
* 6) xlsx csv [130] unlimited
|
||||
* 7) xlsx html [130] unlimited
|
||||
* 8) xlsx txt [130] unlimited
|
||||
* 9) xlsx xhtml [130] unlimited
|
||||
* 10) xlsx xml [130] unlimited
|
||||
* OOXML 0 ms
|
||||
* 1) docm html [130] unlimited
|
||||
* 2) docm txt [130] unlimited
|
||||
* 3) docm xhtml [130] unlimited
|
||||
* 4) docm xml [130] unlimited
|
||||
* 5) docx html [130] unlimited
|
||||
* 6) docx txt [130] unlimited
|
||||
* 7) docx xhtml [130] unlimited
|
||||
* 8) docx xml [130] unlimited
|
||||
* 9) dotm html [130] unlimited
|
||||
* 10) dotm txt [130] unlimited
|
||||
* 11) dotm xhtml [130] unlimited
|
||||
* 12) dotm xml [130] unlimited
|
||||
* 13) dotx html [130] unlimited
|
||||
* 14) dotx txt [130] unlimited
|
||||
* 15) dotx xhtml [130] unlimited
|
||||
* 16) dotx xml [130] unlimited
|
||||
* 17) potm html [130] unlimited
|
||||
* 18) potm txt [130] unlimited
|
||||
* 19) potm xhtml [130] unlimited
|
||||
* 20) potm xml [130] unlimited
|
||||
* 21) potx html [130] unlimited
|
||||
* 22) potx txt [130] unlimited
|
||||
* 23) potx xhtml [130] unlimited
|
||||
* 24) potx xml [130] unlimited
|
||||
* 25) ppam html [130] unlimited
|
||||
* 26) ppam txt [130] unlimited
|
||||
* 27) ppam xhtml [130] unlimited
|
||||
* 28) ppam xml [130] unlimited
|
||||
* 29) ppsm html [130] unlimited
|
||||
* 30) ppsm txt [130] unlimited
|
||||
* 31) ppsm xhtml [130] unlimited
|
||||
* 32) ppsm xml [130] unlimited
|
||||
* 33) ppsx html [130] unlimited
|
||||
* 34) ppsx txt [130] unlimited
|
||||
* 35) ppsx xhtml [130] unlimited
|
||||
* 36) ppsx xml [130] unlimited
|
||||
* 37) pptm html [130] unlimited
|
||||
* 38) pptm txt [130] unlimited
|
||||
* 39) pptm xhtml [130] unlimited
|
||||
* 40) pptm xml [130] unlimited
|
||||
* 41) pptx html [130] unlimited
|
||||
* 42) pptx txt [130] unlimited
|
||||
* 43) pptx xhtml [130] unlimited
|
||||
* 44) pptx xml [130] unlimited
|
||||
* 45) sldm html [130] unlimited
|
||||
* 46) sldm txt [130] unlimited
|
||||
* 47) sldm xhtml [130] unlimited
|
||||
* 48) sldm xml [130] unlimited
|
||||
* 49) sldx html [130] unlimited
|
||||
* 50) sldx txt [130] unlimited
|
||||
* 51) sldx xhtml [130] unlimited
|
||||
* 52) sldx xml [130] unlimited
|
||||
* 53) xlam html [130] unlimited
|
||||
* 54) xlam txt [130] unlimited
|
||||
* 55) xlam xhtml [130] unlimited
|
||||
* 56) xlam xml [130] unlimited
|
||||
* 57) xlsb html [130] unlimited
|
||||
* 58) xlsb txt [130] unlimited
|
||||
* 59) xlsb xhtml [130] unlimited
|
||||
* 60) xlsb xml [130] unlimited
|
||||
* 61) xlsm html [130] unlimited
|
||||
* 62) xlsm txt [130] unlimited
|
||||
* 63) xlsm xhtml [130] unlimited
|
||||
* 64) xlsm xml [130] unlimited
|
||||
* 65) xlsx html [130] unlimited
|
||||
* 66) xlsx txt [130] unlimited
|
||||
* 67) xlsx xhtml [130] unlimited
|
||||
* 68) xlsx xml [130] unlimited
|
||||
* 69) xltm html [130] unlimited
|
||||
* 70) xltm txt [130] unlimited
|
||||
* 71) xltm xhtml [130] unlimited
|
||||
* 72) xltm xml [130] unlimited
|
||||
* 73) xltx html [130] unlimited
|
||||
* 74) xltx txt [130] unlimited
|
||||
* 75) xltx xhtml [130] unlimited
|
||||
* 76) xltx xml [130] unlimited
|
||||
* TikaAuto 0 ms
|
||||
* 1) cdf html [120] unlimited
|
||||
* 2) cdf txt [120] unlimited
|
||||
* 3) cdf xhtml [120] unlimited
|
||||
* 4) cdf xml [120] unlimited
|
||||
* 5) cpio html [120] unlimited
|
||||
* 6) cpio txt [120] unlimited
|
||||
* 7) cpio xhtml [120] unlimited
|
||||
* 8) cpio xml [120] unlimited
|
||||
* 9) doc html [120] unlimited
|
||||
* 10) doc txt [120] unlimited
|
||||
* 11) doc xhtml [120] unlimited
|
||||
* 12) doc xml [120] unlimited
|
||||
* 13) docm html [120] unlimited
|
||||
* 14) docm txt [120] unlimited
|
||||
* 15) docm xhtml [120] unlimited
|
||||
* 16) docm xml [120] unlimited
|
||||
* 17) docx html [120] unlimited
|
||||
* 18) docx txt [120] unlimited
|
||||
* 19) docx xhtml [120] unlimited
|
||||
* 20) docx xml [120] unlimited
|
||||
* 21) dotm html [120] unlimited
|
||||
* 22) dotm txt [120] unlimited
|
||||
* 23) dotm xhtml [120] unlimited
|
||||
* 24) dotm xml [120] unlimited
|
||||
* 25) dotx html [120] unlimited
|
||||
* 26) dotx txt [120] unlimited
|
||||
* 27) dotx xhtml [120] unlimited
|
||||
* 28) dotx xml [120] unlimited
|
||||
* 29) gzip html [120] unlimited
|
||||
* 30) gzip txt [120] unlimited
|
||||
* 31) gzip xhtml [120] unlimited
|
||||
* 32) gzip xml [120] unlimited
|
||||
* 33) hdf html [120] unlimited
|
||||
* 34) hdf txt [120] unlimited
|
||||
* 35) hdf xhtml [120] unlimited
|
||||
* 36) hdf xml [120] unlimited
|
||||
* 37) html html [120] unlimited
|
||||
* 38) html txt [120] unlimited
|
||||
* 39) html xhtml [120] unlimited
|
||||
* 40) html xml [120] unlimited
|
||||
* 41) jar html [120] unlimited
|
||||
* 42) jar txt [120] unlimited
|
||||
* 43) jar xhtml [120] unlimited
|
||||
* 44) jar xml [120] unlimited
|
||||
* 45) java html [120] unlimited
|
||||
* 46) java txt [120] unlimited
|
||||
* 47) java xhtml [120] unlimited
|
||||
* 48) java xml [120] unlimited
|
||||
* 49) key html [120] unlimited
|
||||
* 50) key txt [120] unlimited
|
||||
* 51) key xhtml [120] unlimited
|
||||
* 52) key xml [120] unlimited
|
||||
* 53) mpp html [120] unlimited
|
||||
* 54) mpp txt [120] unlimited
|
||||
* 55) mpp xhtml [120] unlimited
|
||||
* 56) mpp xml [120] unlimited
|
||||
* 57) numbers html [120] unlimited
|
||||
* 58) numbers txt [120] unlimited
|
||||
* 59) numbers xhtml [120] unlimited
|
||||
* 60) numbers xml [120] unlimited
|
||||
* 61) odc html [120] unlimited
|
||||
* 62) odc txt [120] unlimited
|
||||
* 63) odc xhtml [120] unlimited
|
||||
* 64) odc xml [120] unlimited
|
||||
* 65) odi html [120] unlimited
|
||||
* 66) odi txt [120] unlimited
|
||||
* 67) odi xhtml [120] unlimited
|
||||
* 68) odi xml [120] unlimited
|
||||
* 69) odm html [120] unlimited
|
||||
* 70) odm txt [120] unlimited
|
||||
* 71) odm xhtml [120] unlimited
|
||||
* 72) odm xml [120] unlimited
|
||||
* 73) odp html [120] unlimited
|
||||
* 74) odp txt [120] unlimited
|
||||
* 75) odp xhtml [120] unlimited
|
||||
* 76) odp xml [120] unlimited
|
||||
* 77) ods html [120] unlimited
|
||||
* 78) ods txt [120] unlimited
|
||||
* 79) ods xhtml [120] unlimited
|
||||
* 80) ods xml [120] unlimited
|
||||
* 81) odt html [120] unlimited
|
||||
* 82) odt txt [120] unlimited
|
||||
* 83) odt xhtml [120] unlimited
|
||||
* 84) odt xml [120] unlimited
|
||||
* 85) ogx html [120] unlimited
|
||||
* 86) ogx txt [120] unlimited
|
||||
* 87) ogx xhtml [120] unlimited
|
||||
* 88) ogx xml [120] unlimited
|
||||
* 89) oth html [120] unlimited
|
||||
* 90) oth txt [120] unlimited
|
||||
* 91) oth xhtml [120] unlimited
|
||||
* 92) oth xml [120] unlimited
|
||||
* 93) otp html [120] unlimited
|
||||
* 94) otp txt [120] unlimited
|
||||
* 95) otp xhtml [120] unlimited
|
||||
* 96) otp xml [120] unlimited
|
||||
* 97) ots html [120] unlimited
|
||||
* 98) ots txt [120] unlimited
|
||||
* 99) ots xhtml [120] unlimited
|
||||
* 100) ots xml [120] unlimited
|
||||
* 101) ott html [120] unlimited
|
||||
* 102) ott txt [120] unlimited
|
||||
* 103) ott xhtml [120] unlimited
|
||||
* 104) ott xml [120] unlimited
|
||||
* 105) pages html [120] unlimited
|
||||
* 106) pages txt [120] unlimited
|
||||
* 107) pages xhtml [120] unlimited
|
||||
* 108) pages xml [120] unlimited
|
||||
* 109) pdf html [120] unlimited
|
||||
* 110) pdf txt [120] 25 MB
|
||||
* 111) pdf xhtml [120] unlimited
|
||||
* 112) pdf xml [120] unlimited
|
||||
* 113) potm html [120] unlimited
|
||||
* 114) potm txt [120] unlimited
|
||||
* 115) potm xhtml [120] unlimited
|
||||
* 116) potm xml [120] unlimited
|
||||
* 117) potx html [120] unlimited
|
||||
* 118) potx txt [120] unlimited
|
||||
* 119) potx xhtml [120] unlimited
|
||||
* 120) potx xml [120] unlimited
|
||||
* 121) ppam html [120] unlimited
|
||||
* 122) ppam txt [120] unlimited
|
||||
* 123) ppam xhtml [120] unlimited
|
||||
* 124) ppam xml [120] unlimited
|
||||
* 125) ppsm html [120] unlimited
|
||||
* 126) ppsm txt [120] unlimited
|
||||
* 127) ppsm xhtml [120] unlimited
|
||||
* 128) ppsm xml [120] unlimited
|
||||
* 129) ppsx html [120] unlimited
|
||||
* 130) ppsx txt [120] unlimited
|
||||
* 131) ppsx xhtml [120] unlimited
|
||||
* 132) ppsx xml [120] unlimited
|
||||
* 133) ppt html [120] unlimited
|
||||
* 134) ppt txt [120] unlimited
|
||||
* 135) ppt xhtml [120] unlimited
|
||||
* 136) ppt xml [120] unlimited
|
||||
* 137) pptm html [120] unlimited
|
||||
* 138) pptm txt [120] unlimited
|
||||
* 139) pptm xhtml [120] unlimited
|
||||
* 140) pptm xml [120] unlimited
|
||||
* 141) pptx html [120] unlimited
|
||||
* 142) pptx txt [120] unlimited
|
||||
* 143) pptx xhtml [120] unlimited
|
||||
* 144) pptx xml [120] unlimited
|
||||
* 145) rar html [120] unlimited
|
||||
* 146) rar txt [120] unlimited
|
||||
* 147) rar xhtml [120] unlimited
|
||||
* 148) rar xml [120] unlimited
|
||||
* 149) rss html [120] unlimited
|
||||
* 150) rss txt [120] unlimited
|
||||
* 151) rss xhtml [120] unlimited
|
||||
* 152) rss xml [120] unlimited
|
||||
* 153) rtf html [120] unlimited
|
||||
* 154) rtf txt [120] unlimited
|
||||
* 155) rtf xhtml [120] unlimited
|
||||
* 156) rtf xml [120] unlimited
|
||||
* 157) sldm html [120] unlimited
|
||||
* 158) sldm txt [120] unlimited
|
||||
* 159) sldm xhtml [120] unlimited
|
||||
* 160) sldm xml [120] unlimited
|
||||
* 161) sldx html [120] unlimited
|
||||
* 162) sldx txt [120] unlimited
|
||||
* 163) sldx xhtml [120] unlimited
|
||||
* 164) sldx xml [120] unlimited
|
||||
* 165) sxw html [120] unlimited
|
||||
* 166) sxw txt [120] unlimited
|
||||
* 167) sxw xhtml [120] unlimited
|
||||
* 168) sxw xml [120] unlimited
|
||||
* 169) txt html [120] unlimited
|
||||
* 170) txt txt [120] unlimited
|
||||
* 171) txt xhtml [120] unlimited
|
||||
* 172) txt xml [120] unlimited
|
||||
* 173) vsd html [120] unlimited
|
||||
* 174) vsd txt [120] unlimited
|
||||
* 175) vsd xhtml [120] unlimited
|
||||
* 176) vsd xml [120] unlimited
|
||||
* 177) xhtml html [120] unlimited
|
||||
* 178) xhtml txt [120] unlimited
|
||||
* 179) xhtml xhtml [120] unlimited
|
||||
* 180) xhtml xml [120] unlimited
|
||||
* 181) xlam html [120] unlimited
|
||||
* 182) xlam txt [120] unlimited
|
||||
* 183) xlam xhtml [120] unlimited
|
||||
* 184) xlam xml [120] unlimited
|
||||
* 185) xls html [120] unlimited
|
||||
* 186) xls txt [120] unlimited
|
||||
* 187) xls xhtml [120] unlimited
|
||||
* 188) xls xml [120] unlimited
|
||||
* 189) xlsb html [120] unlimited
|
||||
* 190) xlsb txt [120] unlimited
|
||||
* 191) xlsb xhtml [120] unlimited
|
||||
* 192) xlsb xml [120] unlimited
|
||||
* 193) xlsm html [120] unlimited
|
||||
* 194) xlsm txt [120] unlimited
|
||||
* 195) xlsm xhtml [120] unlimited
|
||||
* 196) xlsm xml [120] unlimited
|
||||
* 197) xlsx html [120] unlimited
|
||||
* 198) xlsx txt [120] unlimited
|
||||
* 199) xlsx xhtml [120] unlimited
|
||||
* 200) xlsx xml [120] unlimited
|
||||
* 201) xltm html [120] unlimited
|
||||
* 202) xltm txt [120] unlimited
|
||||
* 203) xltm xhtml [120] unlimited
|
||||
* 204) xltm xml [120] unlimited
|
||||
* 205) xltx html [120] unlimited
|
||||
* 206) xltx txt [120] unlimited
|
||||
* 207) xltx xhtml [120] unlimited
|
||||
* 208) xltx xml [120] unlimited
|
||||
* 209) xml html [120] unlimited
|
||||
* 210) xml txt [120] unlimited
|
||||
* 211) xml xhtml [120] unlimited
|
||||
* 212) xml xml [120] unlimited
|
||||
* 213) z html [120] unlimited
|
||||
* 214) z txt [120] unlimited
|
||||
* 215) z xhtml [120] unlimited
|
||||
* 216) z xml [120] unlimited
|
||||
* TextMining 0 ms
|
||||
* 1) doc html [130] unlimited
|
||||
* 2) doc txt [50] unlimited
|
||||
* 3) doc xhtml [130] unlimited
|
||||
* 4) doc xml [130] unlimited
|
||||
* </pre>
|
||||
*/
|
||||
public class Tika
|
||||
{
|
||||
public static final String ARCHIVE = "Archive";
|
||||
public static final String OUTLOOK_MSG = "OutlookMsg";
|
||||
public static final String PDF_BOX = "PdfBox";
|
||||
public static final String POI_OFFICE = "Office";
|
||||
public static final String POI = "Poi";
|
||||
public static final String POI_OO_XML = "OOXML";
|
||||
public static final String TIKA_AUTO = "TikaAuto";
|
||||
public static final String TEXT_MINING = "TextMining";
|
||||
|
||||
public static final List<String> TRANSFORM_NAMES = ImmutableList.of(
|
||||
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
|
||||
|
||||
public static final String TARGET_MIMETYPE = "--targetMimetype=";
|
||||
public static final String TARGET_ENCODING = "--targetEncoding=";
|
||||
public static final String INCLUDE_CONTENTS = "--includeContents";
|
||||
public static final String NOT_EXTRACT_BOOKMARKS_TEXT = "--notExtractBookmarksText";
|
||||
|
||||
public static final String CSV = "csv";
|
||||
public static final String DOC = "doc";
|
||||
public static final String DOCX = "docx";
|
||||
public static final String HTML = "html";
|
||||
public static final String MSG = "msg";
|
||||
public static final String PDF = "pdf";
|
||||
public static final String PPTX = "pptx";
|
||||
public static final String TXT = "txt";
|
||||
public static final String XHTML = "xhtml";
|
||||
public static final String XSLX = "xslx";
|
||||
public static final String XML = "xml";
|
||||
public static final String ZIP = "zip";
|
||||
|
||||
private final Parser packageParser = new PackageParser();
|
||||
private final Parser pdfParser = new PDFParser();
|
||||
private final Parser officeParser = new OfficeParser();
|
||||
private final Parser autoDetectParser;
|
||||
private final Parser ooXmlParser = new OOXMLParser();
|
||||
private final Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
|
||||
private final PDFParserConfig pdfParserConfig = new PDFParserConfig();
|
||||
|
||||
private final DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
|
||||
{
|
||||
private final List<String> disabledMediaTypes = ImmutableList.of(MIMETYPE_IMAGE_JPEG,
|
||||
MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
|
||||
|
||||
@Override
|
||||
public boolean select(Metadata metadata)
|
||||
{
|
||||
String contentType = metadata.get(Metadata.CONTENT_TYPE);
|
||||
if (contentType == null || contentType.equals("") || disabledMediaTypes == null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return !disabledMediaTypes.contains(contentType);
|
||||
}
|
||||
};
|
||||
|
||||
public Tika() throws TikaException, IOException, SAXException
|
||||
{
|
||||
ClassLoader classLoader = getClass().getClassLoader();
|
||||
URL tikaConfigXml = classLoader.getResource("tika-config.xml");
|
||||
TikaConfig tikaConfig = new TikaConfig(tikaConfigXml);
|
||||
autoDetectParser = new AutoDetectParser(tikaConfig);
|
||||
}
|
||||
|
||||
// Method included for developer testing
|
||||
public static void main(String[] args)
|
||||
{
|
||||
long start = System.currentTimeMillis();
|
||||
try
|
||||
{
|
||||
new Tika().transform(args);
|
||||
}
|
||||
catch (IllegalArgumentException e)
|
||||
{
|
||||
System.err.println("ERROR " + e.getMessage());
|
||||
System.exit(-1);
|
||||
}
|
||||
catch (IllegalStateException | TikaException | IOException | SAXException e)
|
||||
{
|
||||
System.err.println("ERROR " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
System.exit(-2);
|
||||
}
|
||||
System.out.println("Finished in " + (System.currentTimeMillis() - start) + "ms");
|
||||
}
|
||||
|
||||
// Extracts parameters form args
|
||||
public void transform(String[] args)
|
||||
{
|
||||
String transform = null;
|
||||
String targetMimetype = null;
|
||||
String targetEncoding = null;
|
||||
String sourceFilename = null;
|
||||
String targetFilename = null;
|
||||
Boolean includeContents = null;
|
||||
Boolean notExtractBookmarksText = null;
|
||||
|
||||
for (String arg : args)
|
||||
{
|
||||
if (arg.startsWith("--"))
|
||||
{
|
||||
if (INCLUDE_CONTENTS.startsWith(arg))
|
||||
{
|
||||
getValue(arg, false, includeContents, INCLUDE_CONTENTS);
|
||||
includeContents = true;
|
||||
}
|
||||
else if (arg.startsWith(TARGET_ENCODING))
|
||||
{
|
||||
targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING);
|
||||
}
|
||||
else if (arg.startsWith(TARGET_MIMETYPE))
|
||||
{
|
||||
targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE);
|
||||
}
|
||||
else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT))
|
||||
{
|
||||
getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT);
|
||||
notExtractBookmarksText = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected argument " + arg);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (transform == null)
|
||||
{
|
||||
transform = arg;
|
||||
}
|
||||
else if (sourceFilename == null)
|
||||
{
|
||||
sourceFilename = arg;
|
||||
}
|
||||
else if (targetFilename == null)
|
||||
{
|
||||
targetFilename = arg;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected argument " + arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (targetFilename == null)
|
||||
{
|
||||
throw new IllegalArgumentException("Missing arguments");
|
||||
}
|
||||
includeContents = includeContents == null ? false : includeContents;
|
||||
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
|
||||
|
||||
transform(transform, includeContents, notExtractBookmarksText, sourceFilename,
|
||||
targetFilename, targetMimetype, targetEncoding);
|
||||
}
|
||||
|
||||
private String getValue(String arg, boolean valueExpected, Object value, String optionName)
|
||||
{
|
||||
if (value != null)
|
||||
{
|
||||
throw new IllegalArgumentException("Duplicate " + optionName);
|
||||
}
|
||||
String stringValue = arg.substring(optionName.length()).trim();
|
||||
if (!valueExpected && stringValue.length() > 0)
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected value with " + optionName);
|
||||
}
|
||||
if (valueExpected && stringValue.length() == 0)
|
||||
{
|
||||
throw new IllegalArgumentException("Expected value with " + optionName);
|
||||
}
|
||||
return stringValue;
|
||||
}
|
||||
|
||||
// Adds transform specific values such as parser and documentSelector.
|
||||
private void transform(String transform, Boolean includeContents,
|
||||
Boolean notExtractBookmarksText,
|
||||
String sourceFilename,
|
||||
String targetFilename, String targetMimetype, String targetEncoding)
|
||||
{
|
||||
Parser parser = null;
|
||||
DocumentSelector documentSelector = null;
|
||||
|
||||
switch (transform)
|
||||
{
|
||||
case ARCHIVE:
|
||||
parser = packageParser;
|
||||
break;
|
||||
case OUTLOOK_MSG:
|
||||
case POI_OFFICE:
|
||||
case TEXT_MINING:
|
||||
parser = officeParser;
|
||||
break;
|
||||
case PDF_BOX:
|
||||
parser = pdfParser;
|
||||
documentSelector = pdfBoxEmbededDocumentSelector;
|
||||
break;
|
||||
case POI:
|
||||
parser = tikaOfficeDetectParser;
|
||||
break;
|
||||
case POI_OO_XML:
|
||||
parser = ooXmlParser;
|
||||
break;
|
||||
case TIKA_AUTO:
|
||||
parser = autoDetectParser;
|
||||
break;
|
||||
}
|
||||
|
||||
transform(parser, documentSelector, includeContents, notExtractBookmarksText,
|
||||
sourceFilename, targetFilename, targetMimetype, targetEncoding);
|
||||
}
|
||||
|
||||
private void transform(Parser parser, DocumentSelector documentSelector,
|
||||
Boolean includeContents,
|
||||
Boolean notExtractBookmarksText,
|
||||
String sourceFilename,
|
||||
String targetFilename, String targetMimetype, String targetEncoding)
|
||||
{
|
||||
|
||||
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
|
||||
OutputStream os = new FileOutputStream(targetFilename);
|
||||
Writer ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding)))
|
||||
{
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext context = buildParseContext(documentSelector, includeContents,
|
||||
notExtractBookmarksText);
|
||||
ContentHandler handler = getContentHandler(targetMimetype, ow);
|
||||
|
||||
parser.parse(is, handler, metadata, context);
|
||||
}
|
||||
catch (SAXException | TikaException | IOException e)
|
||||
{
|
||||
throw new IllegalStateException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private ContentHandler getContentHandler(String targetMimetype, Writer output)
|
||||
{
|
||||
try
|
||||
{
|
||||
ContentHandler handler;
|
||||
if (MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
|
||||
{
|
||||
handler = new BodyContentHandler(output);
|
||||
}
|
||||
else
|
||||
{
|
||||
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
|
||||
TransformerHandler transformerHandler;
|
||||
transformerHandler = factory.newTransformerHandler();
|
||||
transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
|
||||
transformerHandler.setResult(new StreamResult(output));
|
||||
handler = transformerHandler;
|
||||
|
||||
if (MIMETYPE_HTML.equals(targetMimetype))
|
||||
{
|
||||
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, HTML);
|
||||
return new ExpandedTitleContentHandler(transformerHandler);
|
||||
}
|
||||
else if (MIMETYPE_XHTML.equals(targetMimetype) ||
|
||||
MIMETYPE_XML.equals(targetMimetype))
|
||||
{
|
||||
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, XML);
|
||||
}
|
||||
else if (MIMETYPE_TEXT_CSV.equals(targetMimetype))
|
||||
{
|
||||
handler = new CsvContentHandler(output);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid target mimetype " + targetMimetype);
|
||||
}
|
||||
}
|
||||
return handler;
|
||||
}
|
||||
catch (TransformerConfigurationException e)
|
||||
{
|
||||
throw new IllegalStateException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper around the normal Tika BodyContentHandler for CSV rather encoding than tab separated.
|
||||
*/
|
||||
protected static class CsvContentHandler extends BodyContentHandler
|
||||
{
|
||||
private static final char[] comma = new char[]{','};
|
||||
private static final Pattern all_nums = Pattern.compile("[\\d\\.\\-\\+]+");
|
||||
|
||||
private boolean inCell = false;
|
||||
private boolean needsComma = false;
|
||||
|
||||
protected CsvContentHandler(Writer output)
|
||||
{
|
||||
super(output);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void ignorableWhitespace(char[] ch, int start, int length)
|
||||
throws SAXException
|
||||
{
|
||||
if (length == 1 && ch[0] == '\t')
|
||||
{
|
||||
// Ignore tabs, as they mess up the CSV output
|
||||
}
|
||||
else
|
||||
{
|
||||
super.ignorableWhitespace(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException
|
||||
{
|
||||
if (inCell)
|
||||
{
|
||||
StringBuffer t = new StringBuffer(new String(ch, start, length));
|
||||
|
||||
// Quote if not all numbers
|
||||
if (all_nums.matcher(t).matches())
|
||||
{
|
||||
super.characters(ch, start, length);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = t.length() - 1; i >= 0; i--)
|
||||
{
|
||||
if (t.charAt(i) == '\"')
|
||||
{
|
||||
// Double up double quotes
|
||||
t.insert(i, '\"');
|
||||
i--;
|
||||
}
|
||||
}
|
||||
t.insert(0, '\"');
|
||||
t.append('\"');
|
||||
char[] c = t.toString().toCharArray();
|
||||
super.characters(c, 0, c.length);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
super.characters(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(String uri, String localName, String name,
|
||||
Attributes atts) throws SAXException
|
||||
{
|
||||
if (localName.equals("td"))
|
||||
{
|
||||
inCell = true;
|
||||
if (needsComma)
|
||||
{
|
||||
super.characters(comma, 0, 1);
|
||||
needsComma = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
super.startElement(uri, localName, name, atts);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(String uri, String localName, String name)
|
||||
throws SAXException
|
||||
{
|
||||
if (localName.equals("td"))
|
||||
{
|
||||
needsComma = true;
|
||||
inCell = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (localName.equals("tr"))
|
||||
{
|
||||
needsComma = false;
|
||||
}
|
||||
super.endElement(uri, localName, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ParseContext buildParseContext(DocumentSelector documentSelector,
|
||||
Boolean includeContents, Boolean notExtractBookmarksText)
|
||||
{
|
||||
ParseContext context = new ParseContext();
|
||||
|
||||
if (documentSelector != null)
|
||||
{
|
||||
context.set(DocumentSelector.class, documentSelector);
|
||||
}
|
||||
|
||||
if (notExtractBookmarksText.equals(true))
|
||||
{
|
||||
pdfParserConfig.setExtractBookmarksText(false);
|
||||
// pdfParserConfig is set to override default settings
|
||||
context.set(PDFParserConfig.class, pdfParserConfig);
|
||||
}
|
||||
|
||||
// If Archive transform
|
||||
if (includeContents != null)
|
||||
{
|
||||
context.set(Parser.class, includeContents ? autoDetectParser : new EmptyParser());
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
}
|
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.executors;
|
||||
|
||||
import static org.springframework.http.HttpStatus.BAD_REQUEST;
|
||||
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
import org.alfresco.transform.exceptions.TransformException;
|
||||
import org.alfresco.transformer.logging.LogEntry;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* JavaExecutor implementation for running TIKA transformations. It loads the
|
||||
* transformation logic in the same JVM (check {@link Tika}).
|
||||
*/
|
||||
public class TikaJavaExecutor implements JavaExecutor
|
||||
{
|
||||
private final Tika tika;
|
||||
|
||||
public TikaJavaExecutor()
|
||||
{
|
||||
try
|
||||
{
|
||||
tika = new Tika();
|
||||
}
|
||||
catch (SAXException | IOException | TikaException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to instantiate Tika: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void call(File sourceFile, File targetFile, String... args)
|
||||
throws TransformException
|
||||
{
|
||||
args = buildArgs(sourceFile, targetFile, args);
|
||||
try
|
||||
{
|
||||
tika.transform(args);
|
||||
}
|
||||
catch (IllegalArgumentException e)
|
||||
{
|
||||
throw new TransformException(BAD_REQUEST.value(), getMessage(e));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new TransformException(INTERNAL_SERVER_ERROR.value(), getMessage(e));
|
||||
}
|
||||
if (!targetFile.exists() || targetFile.length() == 0)
|
||||
{
|
||||
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
|
||||
"Transformer failed to create an output file");
|
||||
}
|
||||
}
|
||||
|
||||
private static String getMessage(Exception e)
|
||||
{
|
||||
return e.getMessage() == null ? e.getClass().getSimpleName() : e.getMessage();
|
||||
}
|
||||
|
||||
private static String[] buildArgs(File sourceFile, File targetFile, String[] args)
|
||||
{
|
||||
ArrayList<String> methodArgs = new ArrayList<>(args.length + 2);
|
||||
StringJoiner sj = new StringJoiner(" ");
|
||||
for (String arg : args)
|
||||
{
|
||||
addArg(methodArgs, sj, arg);
|
||||
}
|
||||
|
||||
addFileArg(methodArgs, sj, sourceFile);
|
||||
addFileArg(methodArgs, sj, targetFile);
|
||||
|
||||
LogEntry.setOptions(sj.toString());
|
||||
|
||||
return methodArgs.toArray(new String[0]);
|
||||
}
|
||||
|
||||
private static void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
|
||||
{
|
||||
if (arg != null)
|
||||
{
|
||||
sj.add(arg);
|
||||
methodArgs.add(arg);
|
||||
}
|
||||
}
|
||||
|
||||
private static void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
|
||||
{
|
||||
if (arg != null)
|
||||
{
|
||||
String path = arg.getAbsolutePath();
|
||||
int i = path.lastIndexOf('.');
|
||||
String ext = i == -1 ? "???" : path.substring(i + 1);
|
||||
sj.add(ext);
|
||||
methodArgs.add(path);
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.executors;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.microsoft.OfficeParser;
|
||||
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
///////// THIS FILE WAS A COPY OF THE CODE IN alfresco-repository /////////////
|
||||
|
||||
/**
|
||||
* <a href="http://tika.apache.org/Apache Tika">Apache Tika</a> assumes that
|
||||
* you either know exactly what your content is, or that
|
||||
* you'll leave it to auto-detection.
|
||||
* Within Alfresco, we usually do know. However, from time
|
||||
* to time, we don't know if we have one of the old or one
|
||||
* of the new office files (eg .xls and .xlsx).
|
||||
* This class allows automatically selects the appropriate
|
||||
* old (OLE2) or new (OOXML) Tika parser as required.
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class TikaOfficeDetectParser implements Parser
|
||||
{
|
||||
private final Parser ole2Parser = new OfficeParser();
|
||||
private final Parser ooxmlParser = new OOXMLParser();
|
||||
|
||||
public Set<MediaType> getSupportedTypes(ParseContext parseContext)
|
||||
{
|
||||
Set<MediaType> types = new HashSet<>();
|
||||
types.addAll(ole2Parser.getSupportedTypes(parseContext));
|
||||
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
|
||||
return types;
|
||||
}
|
||||
|
||||
public void parse(InputStream stream,
|
||||
ContentHandler handler, Metadata metadata,
|
||||
ParseContext parseContext) throws IOException, SAXException,
|
||||
TikaException
|
||||
{
|
||||
byte[] initial4 = new byte[4];
|
||||
InputStream wrapped;
|
||||
// Preserve TikaInputStreams as TikaInputStreams as they require less memory to process
|
||||
if (stream.markSupported())
|
||||
{
|
||||
stream.mark(initial4.length);
|
||||
IOUtils.readFully(stream, initial4);
|
||||
stream.reset();
|
||||
wrapped = stream;
|
||||
}
|
||||
else
|
||||
{
|
||||
PushbackInputStream inp = new PushbackInputStream(stream, 4);
|
||||
IOUtils.readFully(inp, initial4);
|
||||
inp.unread(initial4);
|
||||
wrapped = inp;
|
||||
}
|
||||
|
||||
// Which is it?
|
||||
if (initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
|
||||
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
|
||||
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
|
||||
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
|
||||
{
|
||||
ooxmlParser.parse(wrapped, handler, metadata, parseContext);
|
||||
}
|
||||
else
|
||||
{
|
||||
ole2Parser.parse(wrapped, handler, metadata, parseContext);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated This method will be removed in Apache Tika 1.0.
|
||||
*/
|
||||
public void parse(InputStream stream,
|
||||
ContentHandler handler, Metadata metadata)
|
||||
throws IOException, SAXException, TikaException
|
||||
{
|
||||
parse(stream, handler, metadata, new ParseContext());
|
||||
}
|
||||
}
|
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<properties>
|
||||
<!-- This property, when set, will hide the start up warnings of tika for libraries are missing. -->
|
||||
<!-- See https://issues.apache.org/jira/browse/TIKA-2490 -->
|
||||
<service-loader initializableProblemHandler="ignore"/>
|
||||
</properties>
|
Reference in New Issue
Block a user