REPO-3425 Transformers: Tika based transformers

This commit is contained in:
Alan Davis 2018-06-28 13:25:01 +01:00
parent c9ced17097
commit 82c5e3e96a
31 changed files with 1997 additions and 55 deletions

View File

@ -3,7 +3,7 @@
# The container is only intended to be used with the Alfresco Enterprise editon which is covered by https://www.alfresco.com/legal/agreements and https://www.alfresco.com/terms-use.
# alfresco-pdf-renderer uses the PDFium library from Google Inc. See the license at https://pdfium.googlesource.com/pdfium/+/master/LICENSE or in /pdfium.txt.
FROM quay.io/alfresco/alfresco-base-java:9
FROM alfresco/alfresco-base-java:8
ENV ALFRESCO_PDF_RENDERER_LIB_RPM_URL=https://nexus.alfresco.com/nexus/service/local/repositories/releases/content/org/alfresco/alfresco-pdf-renderer/1.1/alfresco-pdf-renderer-1.1-linux.tgz
ENV PDFIUM_LICENSE_FILE=https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/pdfium.txt

View File

@ -155,16 +155,4 @@ public class AlfrescoPdfRendererController extends AbstractTransformerController
return createAttachment(targetFilename, targetFile, testDelay);
}
private void executeTransformCommand(String options, File sourceFile, File targetFile, @RequestParam(value = "timeout", required = false) Long timeout)
{
LogEntry.setOptions(options);
Map<String, String> properties = new HashMap<String, String>(5);
properties.put("options", options);
properties.put("source", sourceFile.getAbsolutePath());
properties.put("target", targetFile.getAbsolutePath());
executeTransformCommand(properties, targetFile, timeout);
}
}

View File

@ -51,7 +51,7 @@ public class AlfrescoPdfRendererControllerTest extends AbstractTransformerContro
@Before
public void before() throws IOException
{
super.mockTransformCommand(controller, "pdf", "png", "application/pdf");
super.mockTransformCommand(controller, "pdf", "png", "application/pdf", true);
}
@Test

View File

@ -3,7 +3,7 @@
# The container is only intended to be used with the Alfresco Enterprise editon which is covered by https://www.alfresco.com/legal/agreements and https://www.alfresco.com/terms-use.
# ImageMagick is from ImageMagick Studio LLC. See the license at http://www.imagemagick.org/script/license.php or in /ImageMagick-license.txt.
FROM quay.io/alfresco/alfresco-base-java:9
FROM alfresco/alfresco-base-java:8
ENV IMAGEMAGICK_RPM_URL=https://nexus.alfresco.com/nexus/service/local/repositories/thirdparty/content/org/imagemagick/imagemagick-distribution/7.0.7-27/imagemagick-distribution-7.0.7-27-linux.rpm
ENV IMAGEMAGICK_LIB_RPM_URL=https://nexus.alfresco.com/nexus/service/local/repositories/thirdparty/content/org/imagemagick/imagemagick-distribution/7.0.7-27/imagemagick-distribution-7.0.7-27-libs-linux.rpm

View File

@ -51,7 +51,7 @@ public class ImageMagickControllerTest extends AbstractTransformerControllerTest
@Before
public void before() throws IOException
{
super.mockTransformCommand(controller, "jpg", "png", "image/jpg");
super.mockTransformCommand(controller, "jpg", "png", "image/jpg", true);
}
@Test

View File

@ -3,7 +3,7 @@
# The container is only intended to be used with the Alfresco Enterprise editon which is covered by https://www.alfresco.com/legal/agreements and https://www.alfresco.com/terms-use.
# LibreOffice is from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt.
FROM quay.io/alfresco/alfresco-base-java:9
FROM alfresco/alfresco-base-java:8
ENV LIBREOFFICE_RPM_URL=https://nexus.alfresco.com/nexus/service/local/repositories/thirdparty/content/org/libreoffice/libreoffice-dist/5.4.6/libreoffice-dist-5.4.6-linux.gz
ENV LIBREOFFICE_LICENSE_FILE=https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/libreoffice.txt

View File

@ -0,0 +1 @@
target/docker/

View File

@ -0,0 +1,20 @@
# Image provides a container in which to run Tika transformations for Alfresco Enterprise Content Services.
# The container is only intended to be used with the Alfresco Enterprise editon which is covered by https://www.alfresco.com/legal/agreements and https://www.alfresco.com/terms-use.
# Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0.
FROM alfresco/alfresco-base-java:8
ENV APACHE_LICENSE_FILE=https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/Apache%202.0.txt
COPY target/alfresco-docker-tika-${env.project_version}.jar /usr/bin
RUN ln /usr/bin/alfresco-docker-tika-${env.project_version}.jar /usr/bin/alfresco-docker-tika.jar && \
yum install -y wget && \
wget $APACHE_LICENSE_FILE && \
yum remove -y wget && \
yum clean all
EXPOSE 8090
ENTRYPOINT java -jar /usr/bin/alfresco-docker-tika.jar

View File

@ -0,0 +1,7 @@
### Licenses
* The code in the alfresco-docker-imagemagick project is only intended to be use with the Alfresco Enterprise
Content Repository which is covered by [https://www.alfresco.com/legal/agreements](https://www.alfresco.com/legal/agreements) and [https://www.alfresco.com/terms-use](https://www.alfresco.com/terms-use)
* Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0 or the
[Apache 2.0.txt](https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/Apache%202.0.txt)
file placed in the root directory of the docker image.

View File

@ -0,0 +1,268 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>alfresco-docker-tika</artifactId>
<name>Alfresco Docker Tika</name>
<packaging>jar</packaging>
<parent>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-docker-transformers</artifactId>
<version>1.2-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<properties>
<image.name>alfresco/alfresco-tika</image.name>
<image.registry>quay.io</image.registry>
<dependency.poi.version>3.17</dependency.poi.version>
</properties>
<dependencies>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-core</artifactId>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-data-model</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.24</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
<!-- Tika -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.17-20180201-alfresco-patched</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.17-20180201-alfresco-patched</version>
<exclusions>
<exclusion>
<groupId>com.tdunning</groupId>
<artifactId>json</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Apache POI -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${dependency.poi.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${dependency.spring-boot.version}</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<version>${dependency.fabric8.version}</version>
<configuration>
<images>
<image>
<name>${image.name}:${image.tag}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
</build>
</image>
</images>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>enterpriseDocker</id>
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<version>${dependency.fabric8.version}</version>
<executions>
<execution>
<id>build-image</id>
<phase>install</phase>
<goals>
<goal>build</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>internal</id>
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<version>${dependency.fabric8.version}</version>
<configuration>
<images>
<image>
<name>${image.registry}/${image.name}:${image.tag}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
</build>
</image>
</images>
</configuration>
<executions>
<execution>
<id>build-push-image</id>
<phase>install</phase>
<goals>
<goal>build</goal>
<goal>push</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>master</id>
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<version>${dependency.fabric8.version}</version>
<configuration>
<images>
<image>
<name>${image.registry}/${image.name}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
</build>
</image>
<image>
<name>${image.name}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
</build>
</image>
</images>
</configuration>
<executions>
<execution>
<id>build-push-image</id>
<phase>install</phase>
<goals>
<goal>build</goal>
<goal>push</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>release</id>
<build>
<plugins>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>fabric8-maven-plugin</artifactId>
<version>${dependency.fabric8.version}</version>
<configuration>
<images>
<image>
<name>${image.name}:${project.version}</name>
<registry>${image.registry}</registry>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
</build>
</image>
<image>
<name>${image.name}:${project.version}</name>
<build>
<dockerFileDir>${project.basedir}/</dockerFileDir>
</build>
</image>
</images>
</configuration>
<executions>
<execution>
<id>build-push-image</id>
<phase>deploy</phase>
<goals>
<goal>build</goal>
<goal>push</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,27 @@
/*
* #%L
* Alfresco Enterprise Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* License rights for this program may be obtained from Alfresco Software, Ltd.
* pursuant to a written agreement and any use of this program without such an
* agreement is prohibited.
* #L%
*/
package org.alfresco.transformer;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
@SpringBootApplication
@EnableAutoConfiguration(exclude={DataSourceAutoConfiguration.class})
public class Application
{
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
}

View File

@ -0,0 +1,801 @@
/*
* #%L
* Alfresco Enterprise Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* License rights for this program may be obtained from Alfresco Software, Ltd.
* pursuant to a written agreement and any use of this program without such an
* agreement is prohibited.
* #L%
*/
package org.alfresco.transformer;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.parser.pkg.PackageParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import static org.alfresco.repo.content.MimetypeMap.*;
/**
* Stripped down command line Tika transformers. Not actually run as a separate process, but the code fits the patten
* used by transformers that do.
* <pre>
*
* Archive 0 ms
* 1) cpio html [100] unlimited
* 2) cpio txt [50] unlimited
* 3) cpio xhtml [100] unlimited
* 4) cpio xml [100] unlimited
* 5) jar html [100] unlimited
* 6) jar txt [50] unlimited
* 7) jar xhtml [100] unlimited
* 8) jar xml [100] unlimited
* 9) tar html [100] unlimited
* 10) tar txt [50] unlimited
* 11) tar xhtml [100] unlimited
* 12) tar xml [100] unlimited
* 13) zip html [100] unlimited
* 14) zip txt [50] unlimited
* 15) zip xhtml [100] unlimited
* 16) zip xml [100] unlimited
* PdfBox 0 ms
* 1) pdf html [110] unlimited
* 2) pdf txt [50] 25 MB
* 3) pdf xhtml [110] unlimited
* 4) pdf xml [110] unlimited
* OutlookMsg 0 ms
* 1) msg html [125] unlimited
* 2) msg txt [125] unlimited
* 3) msg xhtml [125] unlimited
* 4) msg xml [125] unlimited
* PdfBox 0 ms
* 1) pdf html [110] unlimited
* 2) pdf txt [50] 25 MB
* 3) pdf xhtml [110] unlimited
* 4) pdf xml [110] unlimited
* Office 0 ms
* 1) doc html [130] unlimited
* 2) doc txt [130] unlimited
* 3) doc xhtml [130] unlimited
* 4) doc xml [130] unlimited
* 5) mpp html [130] unlimited
* 6) mpp txt [130] unlimited
* 7) mpp xhtml [130] unlimited
* 8) mpp xml [130] unlimited
* 9) msg html [130] unlimited
* 10) msg txt [130] unlimited
* 11) msg xhtml [130] unlimited
* 12) msg xml [130] unlimited
* 13) ppt html [130] unlimited
* 14) ppt txt [130] unlimited
* 15) ppt xhtml [130] unlimited
* 16) ppt xml [130] unlimited
* 17) vsd html [130] unlimited
* 18) vsd txt [130] unlimited
* 19) vsd xhtml [130] unlimited
* 20) vsd xml [130] unlimited
* Poi 0 ms
* 1) xls csv [130] unlimited
* 2) xls html [130] unlimited
* 3) xls txt [130] unlimited
* 4) xls xhtml [130] unlimited
* 5) xls xml [130] unlimited
* 6) xlsx csv [130] unlimited
* 7) xlsx html [130] unlimited
* 8) xlsx txt [130] unlimited
* 9) xlsx xhtml [130] unlimited
* 10) xlsx xml [130] unlimited
* OOXML 0 ms
* 1) docm html [130] unlimited
* 2) docm txt [130] unlimited
* 3) docm xhtml [130] unlimited
* 4) docm xml [130] unlimited
* 5) docx html [130] unlimited
* 6) docx txt [130] unlimited
* 7) docx xhtml [130] unlimited
* 8) docx xml [130] unlimited
* 9) dotm html [130] unlimited
* 10) dotm txt [130] unlimited
* 11) dotm xhtml [130] unlimited
* 12) dotm xml [130] unlimited
* 13) dotx html [130] unlimited
* 14) dotx txt [130] unlimited
* 15) dotx xhtml [130] unlimited
* 16) dotx xml [130] unlimited
* 17) potm html [130] unlimited
* 18) potm txt [130] unlimited
* 19) potm xhtml [130] unlimited
* 20) potm xml [130] unlimited
* 21) potx html [130] unlimited
* 22) potx txt [130] unlimited
* 23) potx xhtml [130] unlimited
* 24) potx xml [130] unlimited
* 25) ppam html [130] unlimited
* 26) ppam txt [130] unlimited
* 27) ppam xhtml [130] unlimited
* 28) ppam xml [130] unlimited
* 29) ppsm html [130] unlimited
* 30) ppsm txt [130] unlimited
* 31) ppsm xhtml [130] unlimited
* 32) ppsm xml [130] unlimited
* 33) ppsx html [130] unlimited
* 34) ppsx txt [130] unlimited
* 35) ppsx xhtml [130] unlimited
* 36) ppsx xml [130] unlimited
* 37) pptm html [130] unlimited
* 38) pptm txt [130] unlimited
* 39) pptm xhtml [130] unlimited
* 40) pptm xml [130] unlimited
* 41) pptx html [130] unlimited
* 42) pptx txt [130] unlimited
* 43) pptx xhtml [130] unlimited
* 44) pptx xml [130] unlimited
* 45) sldm html [130] unlimited
* 46) sldm txt [130] unlimited
* 47) sldm xhtml [130] unlimited
* 48) sldm xml [130] unlimited
* 49) sldx html [130] unlimited
* 50) sldx txt [130] unlimited
* 51) sldx xhtml [130] unlimited
* 52) sldx xml [130] unlimited
* 53) xlam html [130] unlimited
* 54) xlam txt [130] unlimited
* 55) xlam xhtml [130] unlimited
* 56) xlam xml [130] unlimited
* 57) xlsb html [130] unlimited
* 58) xlsb txt [130] unlimited
* 59) xlsb xhtml [130] unlimited
* 60) xlsb xml [130] unlimited
* 61) xlsm html [130] unlimited
* 62) xlsm txt [130] unlimited
* 63) xlsm xhtml [130] unlimited
* 64) xlsm xml [130] unlimited
* 65) xlsx html [130] unlimited
* 66) xlsx txt [130] unlimited
* 67) xlsx xhtml [130] unlimited
* 68) xlsx xml [130] unlimited
* 69) xltm html [130] unlimited
* 70) xltm txt [130] unlimited
* 71) xltm xhtml [130] unlimited
* 72) xltm xml [130] unlimited
* 73) xltx html [130] unlimited
* 74) xltx txt [130] unlimited
* 75) xltx xhtml [130] unlimited
* 76) xltx xml [130] unlimited
* TikaAuto 0 ms
* 1) cdf html [120] unlimited
* 2) cdf txt [120] unlimited
* 3) cdf xhtml [120] unlimited
* 4) cdf xml [120] unlimited
* 5) cpio html [120] unlimited
* 6) cpio txt [120] unlimited
* 7) cpio xhtml [120] unlimited
* 8) cpio xml [120] unlimited
* 9) doc html [120] unlimited
* 10) doc txt [120] unlimited
* 11) doc xhtml [120] unlimited
* 12) doc xml [120] unlimited
* 13) docm html [120] unlimited
* 14) docm txt [120] unlimited
* 15) docm xhtml [120] unlimited
* 16) docm xml [120] unlimited
* 17) docx html [120] unlimited
* 18) docx txt [120] unlimited
* 19) docx xhtml [120] unlimited
* 20) docx xml [120] unlimited
* 21) dotm html [120] unlimited
* 22) dotm txt [120] unlimited
* 23) dotm xhtml [120] unlimited
* 24) dotm xml [120] unlimited
* 25) dotx html [120] unlimited
* 26) dotx txt [120] unlimited
* 27) dotx xhtml [120] unlimited
* 28) dotx xml [120] unlimited
* 29) gzip html [120] unlimited
* 30) gzip txt [120] unlimited
* 31) gzip xhtml [120] unlimited
* 32) gzip xml [120] unlimited
* 33) hdf html [120] unlimited
* 34) hdf txt [120] unlimited
* 35) hdf xhtml [120] unlimited
* 36) hdf xml [120] unlimited
* 37) html html [120] unlimited
* 38) html txt [120] unlimited
* 39) html xhtml [120] unlimited
* 40) html xml [120] unlimited
* 41) jar html [120] unlimited
* 42) jar txt [120] unlimited
* 43) jar xhtml [120] unlimited
* 44) jar xml [120] unlimited
* 45) java html [120] unlimited
* 46) java txt [120] unlimited
* 47) java xhtml [120] unlimited
* 48) java xml [120] unlimited
* 49) key html [120] unlimited
* 50) key txt [120] unlimited
* 51) key xhtml [120] unlimited
* 52) key xml [120] unlimited
* 53) mpp html [120] unlimited
* 54) mpp txt [120] unlimited
* 55) mpp xhtml [120] unlimited
* 56) mpp xml [120] unlimited
* 57) numbers html [120] unlimited
* 58) numbers txt [120] unlimited
* 59) numbers xhtml [120] unlimited
* 60) numbers xml [120] unlimited
* 61) odc html [120] unlimited
* 62) odc txt [120] unlimited
* 63) odc xhtml [120] unlimited
* 64) odc xml [120] unlimited
* 65) odi html [120] unlimited
* 66) odi txt [120] unlimited
* 67) odi xhtml [120] unlimited
* 68) odi xml [120] unlimited
* 69) odm html [120] unlimited
* 70) odm txt [120] unlimited
* 71) odm xhtml [120] unlimited
* 72) odm xml [120] unlimited
* 73) odp html [120] unlimited
* 74) odp txt [120] unlimited
* 75) odp xhtml [120] unlimited
* 76) odp xml [120] unlimited
* 77) ods html [120] unlimited
* 78) ods txt [120] unlimited
* 79) ods xhtml [120] unlimited
* 80) ods xml [120] unlimited
* 81) odt html [120] unlimited
* 82) odt txt [120] unlimited
* 83) odt xhtml [120] unlimited
* 84) odt xml [120] unlimited
* 85) ogx html [120] unlimited
* 86) ogx txt [120] unlimited
* 87) ogx xhtml [120] unlimited
* 88) ogx xml [120] unlimited
* 89) oth html [120] unlimited
* 90) oth txt [120] unlimited
* 91) oth xhtml [120] unlimited
* 92) oth xml [120] unlimited
* 93) otp html [120] unlimited
* 94) otp txt [120] unlimited
* 95) otp xhtml [120] unlimited
* 96) otp xml [120] unlimited
* 97) ots html [120] unlimited
* 98) ots txt [120] unlimited
* 99) ots xhtml [120] unlimited
* 100) ots xml [120] unlimited
* 101) ott html [120] unlimited
* 102) ott txt [120] unlimited
* 103) ott xhtml [120] unlimited
* 104) ott xml [120] unlimited
* 105) pages html [120] unlimited
* 106) pages txt [120] unlimited
* 107) pages xhtml [120] unlimited
* 108) pages xml [120] unlimited
* 109) pdf html [120] unlimited
* 110) pdf txt [120] 25 MB
* 111) pdf xhtml [120] unlimited
* 112) pdf xml [120] unlimited
* 113) potm html [120] unlimited
* 114) potm txt [120] unlimited
* 115) potm xhtml [120] unlimited
* 116) potm xml [120] unlimited
* 117) potx html [120] unlimited
* 118) potx txt [120] unlimited
* 119) potx xhtml [120] unlimited
* 120) potx xml [120] unlimited
* 121) ppam html [120] unlimited
* 122) ppam txt [120] unlimited
* 123) ppam xhtml [120] unlimited
* 124) ppam xml [120] unlimited
* 125) ppsm html [120] unlimited
* 126) ppsm txt [120] unlimited
* 127) ppsm xhtml [120] unlimited
* 128) ppsm xml [120] unlimited
* 129) ppsx html [120] unlimited
* 130) ppsx txt [120] unlimited
* 131) ppsx xhtml [120] unlimited
* 132) ppsx xml [120] unlimited
* 133) ppt html [120] unlimited
* 134) ppt txt [120] unlimited
* 135) ppt xhtml [120] unlimited
* 136) ppt xml [120] unlimited
* 137) pptm html [120] unlimited
* 138) pptm txt [120] unlimited
* 139) pptm xhtml [120] unlimited
* 140) pptm xml [120] unlimited
* 141) pptx html [120] unlimited
* 142) pptx txt [120] unlimited
* 143) pptx xhtml [120] unlimited
* 144) pptx xml [120] unlimited
* 145) rar html [120] unlimited
* 146) rar txt [120] unlimited
* 147) rar xhtml [120] unlimited
* 148) rar xml [120] unlimited
* 149) rss html [120] unlimited
* 150) rss txt [120] unlimited
* 151) rss xhtml [120] unlimited
* 152) rss xml [120] unlimited
* 153) rtf html [120] unlimited
* 154) rtf txt [120] unlimited
* 155) rtf xhtml [120] unlimited
* 156) rtf xml [120] unlimited
* 157) sldm html [120] unlimited
* 158) sldm txt [120] unlimited
* 159) sldm xhtml [120] unlimited
* 160) sldm xml [120] unlimited
* 161) sldx html [120] unlimited
* 162) sldx txt [120] unlimited
* 163) sldx xhtml [120] unlimited
* 164) sldx xml [120] unlimited
* 165) sxw html [120] unlimited
* 166) sxw txt [120] unlimited
* 167) sxw xhtml [120] unlimited
* 168) sxw xml [120] unlimited
* 169) txt html [120] unlimited
* 170) txt txt [120] unlimited
* 171) txt xhtml [120] unlimited
* 172) txt xml [120] unlimited
* 173) vsd html [120] unlimited
* 174) vsd txt [120] unlimited
* 175) vsd xhtml [120] unlimited
* 176) vsd xml [120] unlimited
* 177) xhtml html [120] unlimited
* 178) xhtml txt [120] unlimited
* 179) xhtml xhtml [120] unlimited
* 180) xhtml xml [120] unlimited
* 181) xlam html [120] unlimited
* 182) xlam txt [120] unlimited
* 183) xlam xhtml [120] unlimited
* 184) xlam xml [120] unlimited
* 185) xls html [120] unlimited
* 186) xls txt [120] unlimited
* 187) xls xhtml [120] unlimited
* 188) xls xml [120] unlimited
* 189) xlsb html [120] unlimited
* 190) xlsb txt [120] unlimited
* 191) xlsb xhtml [120] unlimited
* 192) xlsb xml [120] unlimited
* 193) xlsm html [120] unlimited
* 194) xlsm txt [120] unlimited
* 195) xlsm xhtml [120] unlimited
* 196) xlsm xml [120] unlimited
* 197) xlsx html [120] unlimited
* 198) xlsx txt [120] unlimited
* 199) xlsx xhtml [120] unlimited
* 200) xlsx xml [120] unlimited
* 201) xltm html [120] unlimited
* 202) xltm txt [120] unlimited
* 203) xltm xhtml [120] unlimited
* 204) xltm xml [120] unlimited
* 205) xltx html [120] unlimited
* 206) xltx txt [120] unlimited
* 207) xltx xhtml [120] unlimited
* 208) xltx xml [120] unlimited
* 209) xml html [120] unlimited
* 210) xml txt [120] unlimited
* 211) xml xhtml [120] unlimited
* 212) xml xml [120] unlimited
* 213) z html [120] unlimited
* 214) z txt [120] unlimited
* 215) z xhtml [120] unlimited
* 216) z xml [120] unlimited
* TextMining 0 ms
* 1) doc html [130] unlimited
* 2) doc txt [50] unlimited
* 3) doc xhtml [130] unlimited
* 4) doc xml [130] unlimited
* </pre>
*/
public class Tika
{
public static final String ARCHIVE = "Archive";
public static final String OUTLOOK_MSG = "OutlookMsg";
public static final String PDF_BOX = "PdfBox";
public static final String POI_OFFICE = "Office";
public static final String POI = "Poi";
public static final String POI_OO_XML = "OOXML";
public static final String TIKA_AUTO = "TikaAuto";
public static final String TEXT_MINING = "TextMining";
public static final List<String> TRANSFORM_NAMES = Arrays.asList(
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
public static final String TARGET_MIMETYPE = "--targetMimetype=";
public static final String TARGET_ENCODING = "--targetEncoding=";
public static final String INCLUDE_CONTENTS = "--includeContents";
public static final String CSV = "csv";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String HTML = "html";
public static final String MSG = "msg";
public static final String PDF = "pdf";
public static final String PPTX = "pptx";
public static final String TXT = "txt";
public static final String XHTML = "xhtml";
public static final String XSLX = "xslx";
public static final String XML = "xml";
public static final String ZIP = "zip";
private Parser packageParser = new PackageParser();
private Parser pdfParser = new PDFParser();
private Parser officeParser = new OfficeParser();
private Parser autoDetectParser;
private Parser ooXmlParser = new OOXMLParser();
private Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
private DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
{
private List<String> disabledMediaTypes = Arrays.asList(new String[] {MIMETYPE_IMAGE_JPEG, MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG});
@Override
public boolean select(Metadata metadata)
{
String contentType = metadata.get(Metadata.CONTENT_TYPE);
if (contentType == null || contentType.equals("") || disabledMediaTypes == null)
{
return true;
}
return !disabledMediaTypes.contains(contentType);
}
};
public Tika() throws TikaException, IOException, SAXException
{
ClassLoader classLoader = getClass().getClassLoader();
URL tikaConfigXml = classLoader.getResource("tika-config.xml");
TikaConfig tikaConfig = new TikaConfig(tikaConfigXml);
autoDetectParser = new AutoDetectParser(tikaConfig);
}
// Method included for developer testing
public static void main(String[] args)
{
long start = System.currentTimeMillis();
try
{
new Tika().transform(args);
}
catch (IllegalArgumentException e)
{
System.err.println("ERROR "+e.getMessage());
System.exit(-1);
}
catch (IllegalStateException | TikaException | IOException | SAXException e)
{
System.err.println("ERROR "+e.getMessage());
e.printStackTrace();
System.exit(-2);
}
System.out.println("Finished in "+(System.currentTimeMillis()-start)+"ms");
}
// Extracts parameters form args
public void transform(String[] args)
{
String transform = null;
String targetMimetype = null;
String targetEncoding = null;
String sourceFilename = null;
String targetFilename = null;
Boolean includeContents = null;
for (String arg: args)
{
if (arg.startsWith("--"))
{
if (INCLUDE_CONTENTS.startsWith(arg))
{
getValue(arg, false, includeContents, INCLUDE_CONTENTS);
includeContents = true;
}
else if (arg.startsWith(TARGET_ENCODING))
{
targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING);
}
else if (arg.startsWith(TARGET_MIMETYPE))
{
targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE);
}
else
{
throw new IllegalArgumentException("Unexpected argument "+arg);
}
}
else
{
if (transform == null)
{
transform = arg;
}
else if (sourceFilename == null)
{
sourceFilename = arg;
}
else if (targetFilename == null)
{
targetFilename = arg;
}
else
{
throw new IllegalArgumentException("Unexpected argument "+arg);
}
}
}
if (targetFilename == null)
{
throw new IllegalArgumentException("Missing arguments");
}
includeContents = includeContents == null ? false : includeContents;
transform(transform, includeContents, sourceFilename, targetFilename, targetMimetype, targetEncoding);
}
private String getValue(String arg, boolean valueExpected, Object value, String optionName)
{
if (value != null)
{
throw new IllegalArgumentException("Duplicate "+optionName);
}
String stringValue = arg.substring(optionName.length()).trim();
if (!valueExpected && stringValue.length() > 0)
{
throw new IllegalArgumentException("Unexpected value with "+optionName);
}
if (valueExpected && stringValue.length() == 0)
{
throw new IllegalArgumentException("Expected value with "+optionName);
}
return stringValue;
}
// Adds transform specific values such as parser and documentSelector.
private void transform(String transform, Boolean includeContents,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
Parser parser = null;
DocumentSelector documentSelector = null;
switch(transform)
{
case ARCHIVE:
parser = packageParser;
break;
case OUTLOOK_MSG:
case POI_OFFICE:
case TEXT_MINING:
parser = officeParser;
break;
case PDF_BOX:
parser = pdfParser;
documentSelector = pdfBoxEmbededDocumentSelector;
break;
case POI:
parser = tikaOfficeDetectParser;
break;
case POI_OO_XML:
parser = ooXmlParser;
break;
case TIKA_AUTO:
parser = autoDetectParser;
break;
}
transform(parser, documentSelector, includeContents, sourceFilename, targetFilename, targetMimetype, targetEncoding);
}
private void transform(Parser parser, DocumentSelector documentSelector, Boolean includeContents,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
InputStream is = null;
OutputStream os = null;
Writer ow = null;
try
{
is = new BufferedInputStream(new FileInputStream(sourceFilename));
os = new FileOutputStream(targetFilename);
ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding));
Metadata metadata = new Metadata();
ParseContext context = buildParseContext(documentSelector, includeContents);
ContentHandler handler = getContentHandler(targetMimetype, ow);
parser.parse(is, handler, metadata, context);
}
catch (SAXException | TikaException | IOException e)
{
throw new IllegalStateException(e.getMessage(), e);
}
finally
{
if (is != null)
{
try { is.close(); } catch (Throwable e) {}
}
if (os != null)
{
try { os.close(); } catch (Throwable e) {}
}
if (ow != null)
{
try { ow.close(); } catch (Throwable e) {}
}
}
}
protected ContentHandler getContentHandler(String targetMimetype, Writer output)
{
try
{
ContentHandler handler;
if (MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
handler = new BodyContentHandler(output);
}
else
{
SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance();
TransformerHandler transformerHandler = null;
transformerHandler = factory.newTransformerHandler();
transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
transformerHandler.setResult(new StreamResult(output));
handler = transformerHandler;
if (MIMETYPE_HTML.equals(targetMimetype))
{
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, HTML);
return new ExpandedTitleContentHandler(transformerHandler);
}
else if (MIMETYPE_XHTML.equals(targetMimetype) ||
MIMETYPE_XML.equals(targetMimetype))
{
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, XML);
}
else if (MIMETYPE_TEXT_CSV.equals(targetMimetype))
{
handler = new CsvContentHandler(output);
}
else
{
throw new IllegalArgumentException("Invalid target mimetype " + targetMimetype);
}
}
return handler;
}
catch (TransformerConfigurationException e)
{
throw new IllegalStateException(e.getMessage(), e);
}
}
/**
* A wrapper around the normal Tika BodyContentHandler for CSV rather encoding than tab separated.
*/
protected static class CsvContentHandler extends BodyContentHandler {
private static final char[] comma = new char[]{ ',' };
private static final Pattern all_nums = Pattern.compile("[\\d\\.\\-\\+]+");
private boolean inCell = false;
private boolean needsComma = false;
protected CsvContentHandler(Writer output) {
super(output);
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
if(length == 1 && ch[0] == '\t') {
// Ignore tabs, as they mess up the CSV output
} else {
super.ignorableWhitespace(ch, start, length);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
if(inCell) {
StringBuffer t = new StringBuffer(new String(ch,start,length));
// Quote if not all numbers
if(all_nums.matcher(t).matches())
{
super.characters(ch, start, length);
}
else
{
for(int i=t.length()-1; i>=0; i--) {
if(t.charAt(i) == '\"') {
// Double up double quotes
t.insert(i, '\"');
i--;
}
}
t.insert(0, '\"');
t.append('\"');
char[] c = t.toString().toCharArray();
super.characters(c, 0, c.length);
}
} else {
super.characters(ch, start, length);
}
}
@Override
public void startElement(String uri, String localName, String name,
Attributes atts) throws SAXException {
if(localName.equals("td")) {
inCell = true;
if(needsComma) {
super.characters(comma, 0, 1);
needsComma = true;
}
} else {
super.startElement(uri, localName, name, atts);
}
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
if(localName.equals("td")) {
needsComma = true;
inCell = false;
} else {
if(localName.equals("tr")) {
needsComma = false;
}
super.endElement(uri, localName, name);
}
}
}
protected ParseContext buildParseContext(DocumentSelector documentSelector, Boolean includeContents)
{
ParseContext context = new ParseContext();
if (documentSelector != null)
{
context.set(DocumentSelector.class, documentSelector);
}
// pdfParserConfig is never set in the original repo code, so code removed here.
// If Archive transform
if (includeContents != null)
{
context.set(Parser.class, includeContents ? autoDetectParser : new EmptyParser());
}
return context;
}
}

View File

@ -0,0 +1,137 @@
/*
* #%L
* Alfresco Enterprise Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* License rights for this program may be obtained from Alfresco Software, Ltd.
* pursuant to a written agreement and any use of this program without such an
* agreement is prohibited.
* #L%
*/
package org.alfresco.transformer;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.xml.sax.SAXException;
import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.IOException;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.Tika.*;
/**
* Controller for the Docker based Tika transformers.
*
* Status Codes:
*
* 200 Success
* 400 Bad Request: Invalid target mimetype &lt;mimetype>
* 400 Bad Request: Request parameter &lt;name> is missing (missing mandatory parameter)
* 400 Bad Request: Request parameter &lt;name> is of the wrong type
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
* 400 Bad Request: The source filename was not supplied
* 500 Internal Server Error: (no message with low level IO problems)
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
* 500 Internal Server Error: Transformer version check exit code was not 0
* 500 Internal Server Error: Transformer version check failed to create any output
* 500 Internal Server Error: Could not read the target file
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
* 500 Internal Server Error: Filename encoding error
* 507 Insufficient Storage: Failed to store the source file
*/
@Controller
public class TikaController extends AbstractTransformerController
{
private Tika tika;
@Autowired
public TikaController() throws TikaException, IOException, SAXException
{
logger = LogFactory.getLog(TikaController.class);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logEnterpriseLicenseMessage();
logger.info("Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
tika = new Tika();
}
@Override
protected String getTransformerName()
{
return "Tika";
}
@Override
public void callTransform(String... args)
{
tika.transform(args);
}
@Override
protected String version()
{
return "Tika available";
}
@Override
protected ProbeTestTransform getProbeTestTransform()
{
// See the Javadoc on this method and Probes.md for the choice of these values.
// the livenessPercentage is a little large as Tika does tend to suffer from slow transforms that class with a gc.
return new ProbeTestTransform(this, "quick.pdf", "quick.txt",
60, 16, 400, 10240, 60*30+1, 60*15+20)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
TikaController.this.callTransform(sourceFile, targetFile, PDF_BOX,
TARGET_MIMETYPE+MIMETYPE_TEXT_PLAIN, TARGET_ENCODING+"UTF-8");
}
};
}
@PostMapping("/transform")
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam("targetMimetype") String targetMimetype,
@RequestParam("targetEncoding") String targetEncoding,
@RequestParam(value = "timeout", required = false) Long timeout,
@RequestParam(value = "testDelay", required = false) Long testDelay,
@RequestParam(value = "transform") String transform,
@RequestParam(value="includeContents", required = false) Boolean includeContents)
{
if (!TRANSFORM_NAMES.contains(transform))
{
throw new TransformException(400, "Invalid transform value");
}
String targetFilename = createTargetFileName(sourceMultipartFile, targetExtension);
File sourceFile = createSourceFile(request, sourceMultipartFile);
File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
// TODO Consider streaming the request and response rather than using temporary files
// https://www.logicbig.com/tutorials/spring-framework/spring-web-mvc/streaming-response-body.html
callTransform(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
TARGET_MIMETYPE+targetMimetype, TARGET_ENCODING+targetEncoding);
return createAttachment(targetFilename, targetFile, testDelay);
}
}

View File

@ -0,0 +1,117 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.HashSet;
import java.util.Set;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
///////// THIS FILE IS A COPY OF THE CODE IN alfresco-repository /////////////
/**
* <a href="http://tika.apache.org/Apache Tika">Apache Tika</a> assumes that
* you either know exactly what your content is, or that
* you'll leave it to auto-detection.
* Within Alfresco, we usually do know. However, from time
* to time, we don't know if we have one of the old or one
* of the new office files (eg .xls and .xlsx).
* This class allows automatically selects the appropriate
* old (OLE2) or new (OOXML) Tika parser as required.
*
* @author Nick Burch
*/
public class TikaOfficeDetectParser implements Parser {
private Parser ole2Parser = new OfficeParser();
private Parser ooxmlParser = new OOXMLParser();
public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
Set<MediaType> types = new HashSet<MediaType>();
types.addAll(ole2Parser.getSupportedTypes(parseContext));
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
return types;
}
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata,
ParseContext parseContext) throws IOException, SAXException,
TikaException
{
byte[] initial4 = new byte[4];
InputStream wrapped;
// Preserve TikaInputStreams as TikaInputStreams as they require less memory to process
if (stream.markSupported())
{
stream.mark(initial4.length);
IOUtils.readFully(stream, initial4);
stream.reset();
wrapped = stream;
}
else
{
PushbackInputStream inp = new PushbackInputStream(stream, 4);
IOUtils.readFully(inp, initial4);
inp.unread(initial4);
wrapped = inp;
}
// Which is it?
if(initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
{
ooxmlParser.parse(wrapped, handler, metadata, parseContext);
}
else
{
ole2Parser.parse(wrapped, handler, metadata, parseContext);
}
}
/**
* @deprecated This method will be removed in Apache Tika 1.0.
*/
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException
{
parse(stream, handler, metadata, new ParseContext());
}
}

Binary file not shown.

View File

@ -0,0 +1,39 @@
<html xmlns:th="http://www.thymeleaf.org">
<body>
<div>
<h2>Tika Test Transformations</h2>
<form method="POST" enctype="multipart/form-data" action="/transform">
<table>
<tr><td><div style="text-align:right">transform *</div></td><td><select name="transform">
<option value="Archive">Archive</option>
<option value="OutlookMsg">OutlookMsg</option>
<option selected="selected" value="PdfBox">PdfBox</option>
<option value="Office">Office</option>
<option value="Poi">Poi</option>
<option value="OOXML">OOXML</option>
<option value="TikaAuto">TikaAuto</option>
<option value="TextMining">TextMining</option>
<option value="UNSET"></option>
<option value="BADVALUE">BADVALUE</option>
<option value="MIXED CASE TikaAuto">TikaAuto</option>
</select></td></tr>
<tr><td><div style="text-align:right">file *</div></td><td><input type="file" name="file" /></td></tr>
<tr><td><div style="text-align:right">targetExtension *</div></td><td><input type="text" name="targetExtension" value="txt" /></td></tr>
<tr><td><div style="text-align:right">targetMimetype *</div></td><td><input type="text" name="targetMimetype" value="text/plain" /></td></tr>
<tr><td><div style="text-align:right">targetEncoding *</div></td><td><input type="text" name="targetEncoding" value="UTF-8" /></td></tr>
<tr><td><div style="text-align:right">includeContents (archive) *</div></td><td><input type="checkbox" name="includeContents" value="true" /></td></tr>
<tr><td><div style="text-align:right">timeout</div></td><td><input type="text" name="timeout" value="" /></td></tr>
<tr><td><div style="text-align:right">testDelay</div></td><td><input type="text" name="testDelay" value="" /></td></tr>
<tr><td></td><td><input type="submit" value="Transform" /></td></tr>
</table>
</form>
</div>
<div>
<a href="/log">Log entries</a>
</div>
</body>
</html>

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<properties>
<!-- This property, when set, will hide the start up warnings of tika for libraries are missing. -->
<!-- See https://issues.apache.org/jira/browse/TIKA-2490 -->
<service-loader initializableProblemHandler="ignore"/>
</properties>

View File

@ -0,0 +1,344 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.boot.test.mock.mockito.SpyBean;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import static org.alfresco.repo.content.MimetypeMap.*;
import static org.alfresco.transformer.Tika.*;
import static org.springframework.test.util.AssertionErrors.assertTrue;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
/**
* Test the TikaController without a server.
* Super class includes tests for the AbstractTransformerController.
*/
@RunWith(SpringRunner.class)
@WebMvcTest(TikaController.class)
public class TikaControllerTest extends AbstractTransformerControllerTest
{
public static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
public static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
public static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
"\n" +
"The quick brown fox jumps over the lazy dogs";
public static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
@SpyBean
private TikaController controller;
String transform = PDF_BOX;
String targetEncoding = "UTF-8";
String targetMimetype = MIMETYPE_TEXT_PLAIN;
private void transform(String transform, String sourceExtension, String targetExtension,
String sourceMimetype, String targetMimetype,
Boolean includeContents, String expectedContentContains) throws Exception
{
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
super.mockTransformCommand(controller, sourceExtension, targetExtension, sourceMimetype, false);
this.transform = transform;
this.targetMimetype = targetMimetype;
System.out.println("Test "+transform+" "+ sourceExtension +" to "+targetExtension);
MockHttpServletRequestBuilder requestBuilder = includeContents == null
? mockMvcRequest("/transform", sourceFile, "targetExtension", this.targetExtension)
: mockMvcRequest("/transform", sourceFile, "targetExtension", this.targetExtension, "includeContents", includeContents.toString());
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(200))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick." + this.targetExtension)).
andReturn();
String content = result.getResponse().getContentAsString();
assertTrue("The content did not include \""+expectedContentContains, content.contains(expectedContentContains));
}
@Override
// Add extra required parameters to the request.
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
{
return super.mockMvcRequest(url, sourceFile, params)
.param("transform", transform)
.param("targetEncoding", targetEncoding)
.param("targetMimetype", targetMimetype);
}
@Test
@Override
public void simpleTransformTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.simpleTransformTest();
}
@Test
@Override
public void testDelayTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.testDelayTest();
}
@Test
@Override
public void badExitCodeTest() throws Exception
{
// Ignore the test in super class as the Tika transforms are real rather than mocked up.
// It is the mock that returns a non zero exit code.
}
@Test
@Override
public void noTargetFileTest() throws Exception
{
// Ignore the test in super class as the Tika transforms are real rather than mocked up.
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
}
// --- Super class tests (need modified setup) ---
@Test
@Override
public void dotDotSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.dotDotSourceFilenameTest();
}
@Test
@Override
public void noExtensionSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.noExtensionSourceFilenameTest();
}
@Test
@Override
public void badSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.badSourceFilenameTest();
}
@Test
@Override
public void blankSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.blankSourceFilenameTest();
}
@Test
@Override
public void noTargetExtensionTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.noTargetExtensionTest();
}
@Test
@Override
public void calculateMaxTime() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
super.calculateMaxTime();
}
// --- General Tika tests ---
@Test
public void badEncodingTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
targetEncoding = "rubbish";
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(500));
}
// --- Archive ---
@Test
public void zipToTextArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,false,
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n");
}
@Test
public void zipToTextIncludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,true,
"quick.html\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog\n" +
"\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog" +
"\n" +
"\n");
}
@Test
public void zipToTextExcludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
false, "\n" +
"folder/subfolder/quick.jpg\n" +
"\n" +
"\n" +
"quick.doc\n" +
"\n" +
"\n" +
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"quick.txt\n" +
"\n" +
"\n" +
"quick.xml\n" +
"\n");
}
// --- OutlookMsg ---
@Test
public void msgToTxtOutlookMsgTest() throws Exception
{
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null, EXPECTED_MSG_CONTENT_CONTAINS);
}
// --- PdfBox ---
@Test
public void pdfToTxtPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pdfToCsvPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null, EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
}
@Test
public void pdfToXmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null, EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
@Test
public void pdfToXhtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null, EXPECTED_XHTML_CONTENT_CONTAINS);
}
@Test
public void pdfToHtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null, EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
// --- Office ---
@Test
public void msgToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null, EXPECTED_MSG_CONTENT_CONTAINS);
}
@Test
public void docToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- Poi ---
@Test
public void xslxToCsvPoiTest() throws Exception
{
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null, EXPECTED_CSV_CONTENT_CONTAINS);
}
// --- OOXML ---
@Test
public void docxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pptxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TikaAuto ---
@Test
public void ppxtToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void doctToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TextMining ---
@Test
public void docToTxtTextMiningTest() throws Exception
{
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
}
}

View File

@ -0,0 +1,51 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
import org.springframework.test.context.junit4.SpringRunner;
/**
* Tests TikaController with a server test harness.
*/
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
public class TikaHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "Tika";
}
@Override
protected String getSourceExtension()
{
return "pdf";
};
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,6 @@
The quick brown fox jumps over the lazy dog
Blank Page

Binary file not shown.

Binary file not shown.

View File

@ -51,8 +51,7 @@ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Collection;
import java.util.Map;
import java.util.*;
/**
* <p>Abstract Controller, provides structure and helper methods to sub-class transformer controllers.</p>
@ -376,6 +375,87 @@ public abstract class AbstractTransformerController
}
}
public void callTransform(File sourceFile, File targetFile, String... args) throws TransformException
{
args = buildArgs(sourceFile, targetFile, args);
try
{
callTransform(args);
}
catch (IllegalArgumentException e)
{
throw new TransformException(400, getMessage(e));
}
catch (Exception e)
{
throw new TransformException(500, getMessage(e));
}
if (!targetFile.exists() || targetFile.length() == 0)
{
throw new TransformException(500, "Transformer failed to create an output file");
}
}
private String getMessage(Exception e)
{
return e.getMessage() == null ? e.getClass().getSimpleName(): e.getMessage();
}
protected void callTransform(String[] args)
{
// Overridden when the transform is done in the JVM rather than in an external command.
}
protected String[] buildArgs(File sourceFile, File targetFile, String[] args)
{
ArrayList<String> methodArgs = new ArrayList<>(args.length+2);
StringJoiner sj = new StringJoiner(" ");
for (String arg: args)
{
addArg(methodArgs, sj, arg);
}
addFileArg(methodArgs, sj, sourceFile);
addFileArg(methodArgs, sj, targetFile);
LogEntry.setOptions(sj.toString());
return methodArgs.toArray(new String[methodArgs.size()]);
}
private void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
{
if (arg != null)
{
sj.add(arg);
methodArgs.add(arg);
}
}
private void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
{
if (arg != null)
{
String path = arg.getAbsolutePath();
int i = path.lastIndexOf('.');
String ext = i == -1 ? "???" : path.substring(i+1);
sj.add(ext);
methodArgs.add(path);
}
}
protected void executeTransformCommand(String options, File sourceFile, File targetFile, Long timeout)
{
LogEntry.setOptions(options);
Map<String, String> properties = new HashMap<String, String>(5);
properties.put("options", options);
properties.put("source", sourceFile.getAbsolutePath());
properties.put("target", targetFile.getAbsolutePath());
executeTransformCommand(properties, targetFile, timeout);
}
public void executeTransformCommand(Map<String, String> properties, File targetFile, Long timeout)
{
timeout = timeout != null && timeout > 0 ? timeout : 0;

View File

@ -6,4 +6,5 @@ server.port = 8090
logging.level.org.alfresco.transformer.LibreOfficeController=debug
logging.level.org.alfresco.transformer.JodConverterSharedInstance=debug
logging.level.org.alfresco.transformer.AlfrescoPdfRendererController=debug
logging.level.org.alfresco.transformer.ImageMagickController=debug
logging.level.org.alfresco.transformer.ImageMagickController=debug
logging.level.org.alfresco.transformer.TikaController=debug

View File

@ -33,12 +33,10 @@ import org.mockito.stubbing.Answer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.web.servlet.MockMvc;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.*;
import java.net.URL;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
@ -83,7 +81,9 @@ public abstract class AbstractTransformerControllerTest
protected AbstractTransformerController controller;
// Called by sub class
public void mockTransformCommand(AbstractTransformerController controller, String sourceExtension, String targetExtension, String sourceMimetype) throws IOException
public void mockTransformCommand(AbstractTransformerController controller, String sourceExtension,
String targetExtension, String sourceMimetype,
boolean readTargetFileBytes) throws IOException
{
this.controller = controller;
this.sourceExtension = sourceExtension;
@ -92,8 +92,8 @@ public abstract class AbstractTransformerControllerTest
expectedOptions = null;
expectedSourceSuffix = null;
expectedSourceFileBytes = Files.readAllBytes(getTestFile("quick."+sourceExtension, true).toPath());
expectedTargetFileBytes = Files.readAllBytes(getTestFile("quick."+targetExtension, true).toPath());
expectedSourceFileBytes = readTestFile(sourceExtension);
expectedTargetFileBytes = readTargetFileBytes ? readTestFile(targetExtension) : null;
sourceFile = new MockMultipartFile("file", "quick."+sourceExtension, sourceMimetype, expectedSourceFileBytes);
controller.setTransformCommand(mockTransformCommand);
@ -159,6 +159,11 @@ public abstract class AbstractTransformerControllerTest
when(mockExecutionResult.getStdOut()).thenReturn("STDOUT");
}
protected byte[] readTestFile(String extension) throws IOException
{
return Files.readAllBytes(getTestFile("quick."+extension, true).toPath());
}
protected File getTestFile(String testFilename, boolean required) throws IOException
{
ClassLoader classLoader = getClass().getClassLoader();
@ -170,12 +175,26 @@ public abstract class AbstractTransformerControllerTest
return testFileUrl == null ? null : new File(testFileUrl.getFile());
}
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
{
MockHttpServletRequestBuilder builder = MockMvcRequestBuilders.fileUpload("/transform").file(sourceFile);
if (params.length % 2 != 0)
{
throw new IllegalArgumentException("each param should have a name and value.");
}
for (int i=0; i<params.length; i+=2)
{
builder = builder.param(params[i], params[i+1]);
}
return builder;
}
@Test
public void simpleTransformTest() throws Exception
{
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", targetExtension))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(200))
.andExpect(content().bytes(expectedTargetFileBytes))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick."+targetExtension));
@ -185,10 +204,7 @@ public abstract class AbstractTransformerControllerTest
public void testDelayTest() throws Exception
{
long start = System.currentTimeMillis();
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", targetExtension)
.param("testDelay", "400"))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension, "testDelay", "400"))
.andExpect(status().is(200))
.andExpect(content().bytes(expectedTargetFileBytes))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick."+targetExtension));
@ -201,9 +217,7 @@ public abstract class AbstractTransformerControllerTest
@Test
public void noTargetFileTest() throws Exception
{
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", "xxx"))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", "xxx"))
.andExpect(status().is(500));
}
@ -212,9 +226,7 @@ public abstract class AbstractTransformerControllerTest
{
when(mockExecutionResult.getExitValue()).thenReturn(1);
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", "xxx"))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", "xxx"))
.andExpect(status().is(400))
.andExpect(status().reason(containsString("Transformer exit code was not 0: \nSTDERR")));
}
@ -225,9 +237,7 @@ public abstract class AbstractTransformerControllerTest
{
sourceFile = new MockMultipartFile("file", "../quick."+sourceExtension, sourceMimetype, expectedSourceFileBytes);
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", targetExtension))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(200))
.andExpect(content().bytes(expectedTargetFileBytes))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick."+targetExtension));
@ -239,9 +249,7 @@ public abstract class AbstractTransformerControllerTest
{
sourceFile = new MockMultipartFile("file", "../quick", sourceMimetype, expectedSourceFileBytes);
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", targetExtension))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(200))
.andExpect(content().bytes(expectedTargetFileBytes))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick."+targetExtension));
@ -253,9 +261,7 @@ public abstract class AbstractTransformerControllerTest
{
sourceFile = new MockMultipartFile("file", "abc/", sourceMimetype, expectedSourceFileBytes);
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", targetExtension))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(400))
.andExpect(status().reason(containsString("The source filename was not supplied")));
}
@ -265,9 +271,7 @@ public abstract class AbstractTransformerControllerTest
{
sourceFile = new MockMultipartFile("file", "", sourceMimetype, expectedSourceFileBytes);
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile)
.param("targetExtension", targetExtension))
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(400))
.andExpect(status().reason(containsString("The source filename was not supplied")));
}
@ -275,8 +279,7 @@ public abstract class AbstractTransformerControllerTest
@Test
public void noTargetExtensionTest() throws Exception
{
mockMvc.perform(MockMvcRequestBuilders.fileUpload("/transform")
.file(sourceFile))
mockMvc.perform(mockMvcRequest("/transform", sourceFile))
.andExpect(status().is(400))
.andExpect(status().reason(containsString("Request parameter targetExtension is missing")));
}

View File

@ -19,7 +19,8 @@
<dependency.pdfbox.version>2.0.8</dependency.pdfbox.version>
<dependency.fabric8.version>3.5.37</dependency.fabric8.version>
<dependency.spring-boot.version>1.5.12.RELEASE</dependency.spring-boot.version>
<dependency.alfresco-core.version>7.2</dependency.alfresco-core.version>
<dependency.alfresco-core.version>7.3</dependency.alfresco-core.version>
<dependency.alfresco-data-model.version>8.8</dependency.alfresco-data-model.version>
<dependency.alfresco-jodconverter-core.version>3.0.1.1</dependency.alfresco-jodconverter-core.version>
<dependency.ch-qos-logback.version>1.2.3</dependency.ch-qos-logback.version>
<env.project_version>${project.version}</env.project_version>
@ -27,6 +28,7 @@
<modules>
<module>alfresco-transformer-base</module>
<module>alfresco-docker-tika</module>
<module>alfresco-docker-alfresco-pdf-renderer</module>
<module>alfresco-docker-imagemagick</module>
<module>alfresco-docker-libreoffice</module>
@ -68,6 +70,11 @@
<artifactId>alfresco-core</artifactId>
<version>${dependency.alfresco-core.version}</version>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-data-model</artifactId>
<version>${dependency.alfresco-data-model.version}</version>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-jodconverter-core</artifactId>

39
scripts/testImages.sh Executable file
View File

@ -0,0 +1,39 @@
#!/bin/bash
# For each transform project, check the live probe in each docker image works.
set -e
docker images
echo
transformers=`ls | grep alfresco-docker- | sed 's/alfresco-docker-\(.*\)/\1/'`
for transformer in $transformers
do
echo
echo === $transformer ===
repo=`docker images | awk '{print $1}' | grep $transformer | sort -u`
echo docker run --rm -d -p 8090:8090 --name $transformer $repo:$tag
docker run --rm -d -p 8090:8090 --name $transformer $repo:$tag >/dev/null
WAIT_INTERVAL=1
COUNTER=0
TIMEOUT=30
t0=`date +%s`
echo -n "Waiting for $transformer to start "
until $(curl --output /dev/null --silent --fail http://localhost:8090/live) || [ "$COUNTER" -eq "$TIMEOUT" ]; do
printf '.'
sleep $WAIT_INTERVAL
COUNTER=$(($COUNTER+$WAIT_INTERVAL))
done
t1=`date +%s`
delta=$(($t1 - $t0))
docker stop $transformer > /dev/null
if (("$COUNTER" < "$TIMEOUT")) ; then
echo " started in $delta seconds"
else
echo " did not start after $delta seconds"
exit 1
fi
done
echo