Save point: [skip ci]

* pick up alfresco-t-engine-base in 5 base t-engines
* Switch over to using new base
* Moved files in 5 base t-engines so we can remove the -boot package in the next round of changes
This commit is contained in:
alandavis
2022-07-01 17:14:01 +01:00
parent 2e05eb71fb
commit c44ff5016a
373 changed files with 967 additions and 2119 deletions

View File

@@ -31,41 +31,38 @@ import org.alfresco.transform.base.probes.ProbeTestTransform;
import org.alfresco.transform.common.TransformConfigResourceReader;
import org.alfresco.transform.config.TransformConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.Collections;
import static org.alfresco.transform.base.logging.StandardMessages.COMMUNITY_LICENCE;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
@Component
public class TikaTransformEngine implements TransformEngine
{
private static final String LICENCE =
"This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n" +
"This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt";
@Autowired
private TransformConfigResourceReader transformConfigResourceReader;
@Value("${transform.core.config.location:classpath:engine_config.json}")
private String engineConfigLocation;
@Override
public String getTransformEngineName()
{
return "0001-Tika";
return "0010-Tika";
}
@Override
public String getStartupMessage() {
return LICENCE;
public String getStartupMessage()
{
return COMMUNITY_LICENCE +
"This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n" +
"This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt";
}
@Override
public TransformConfig getTransformConfig()
{
return transformConfigResourceReader.read(engineConfigLocation);
return transformConfigResourceReader.read("classpath:tika_engine_config.json");
}
@Override

View File

@@ -27,8 +27,8 @@
package org.alfresco.transform.tika.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.common.TransformException;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.alfresco.transform.common.TransformException;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.DublinCore;
@@ -69,8 +69,6 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the
* common parts of processing the files, and the common mappings.
@@ -101,17 +99,9 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
private final DateTimeFormatter tikaUTCDateFormater;
private final DateTimeFormatter tikaDateFormater;
public static enum Type
{
EXTRACTOR, EMBEDDER
}
private final Type type;
public AbstractTikaMetadataExtractor(Type type, Logger logger)
{
super(logger);
this.type = type;
super(type, logger);
// TODO Once TIKA-451 is fixed this list will get nicer
DateTimeParser[] parsersUTC = {
@@ -130,26 +120,6 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
tikaDateFormater = new DateTimeFormatterBuilder().append(null, parsers).toFormatter();
}
@Override
public String getTransformerName() {
return getClass().getSimpleName();
}
@Override
public void transform(String sourceMimetype, String sourceEncoding, InputStream inputStream,
String targetMimetype, String targetEncoding, OutputStream outputStream,
Map<String, String> transformOptions) throws Exception
{
if (type == EXTRACTOR)
{
extractMetadata(sourceMimetype, transformOptions, sourceEncoding, inputStream, targetEncoding, outputStream);
}
else
{
embedMetadata(sourceMimetype, transformOptions, sourceEncoding, inputStream, targetEncoding, outputStream);
}
}
/**
* Version which also tries the ISO-8601 formats (in order..),
* and similar formats, which Tika makes use of

View File

@@ -37,7 +37,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor.

View File

@@ -26,13 +26,6 @@
*/
package org.alfresco.transform.tika.metadataExtractors;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.alfresco.transform.tika.parsers.ExifToolParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.Metadata;
@@ -41,7 +34,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
@Component
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor

View File

@@ -38,8 +38,6 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* MP3 file metadata extractor.
*

View File

@@ -38,7 +38,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* Outlook MAPI format email metadata extractor.

View File

@@ -38,7 +38,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* Office file format metadata extractor.

View File

@@ -26,9 +26,6 @@
*/
package org.alfresco.transform.tika.metadataExtractors;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
@@ -51,6 +48,9 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
/**
* {@code "application/vnd.oasis.opendocument..."} and {@code "applicationvnd.oasis.opendocument..."} metadata extractor.
*

View File

@@ -35,7 +35,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* Metadata extractor for the PDF documents.

View File

@@ -45,7 +45,7 @@ import java.util.Collections;
import java.util.Set;
import java.util.StringJoiner;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI.

View File

@@ -43,8 +43,8 @@ import java.io.Serializable;
import java.util.Calendar;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* A Metadata Extractor which makes use of the Apache Tika Audio Parsers to extract metadata from media files.

View File

@@ -38,9 +38,9 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* A Metadata Extractor which makes use of the Apache Tika auto-detection to select the best parser to extract the

View File

@@ -28,7 +28,7 @@ package org.alfresco.transform.tika.transformers;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.logging.LogEntry;
import org.alfresco.transform.base.util.RequestParamMap;
import org.alfresco.transform.common.RequestParamMap;
import org.alfresco.transform.common.TransformException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.parser.Parser;

View File

@@ -3,8 +3,6 @@ queue:
transform:
core:
version: @project.version@
config:
location: classpath:tika_engine_config.json
tika:
pdfBox:
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}

View File

@@ -26,28 +26,40 @@
*/
package org.alfresco.transform.tika;
import org.alfresco.transform.base.AbstractTransformerControllerTest;
import org.alfresco.transform.base.TransformController;
import org.alfresco.transform.base.executors.RuntimeExec;
import org.alfresco.transform.base.model.FileRefEntity;
import org.alfresco.transform.base.model.FileRefResponse;
import org.alfresco.transform.base.probes.ProbeTestTransform;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import javax.servlet.http.HttpServletRequest;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.UUID;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
import static org.alfresco.transform.tika.transformers.Tika.ARCHIVE;
import static org.alfresco.transform.tika.transformers.Tika.CSV;
import static org.alfresco.transform.tika.transformers.Tika.DOC;
import static org.alfresco.transform.tika.transformers.Tika.DOCX;
import static org.alfresco.transform.tika.transformers.Tika.HTML;
import static org.alfresco.transform.tika.transformers.Tika.MSG;
import static org.alfresco.transform.tika.transformers.Tika.OUTLOOK_MSG;
import static org.alfresco.transform.tika.transformers.Tika.PDF;
import static org.alfresco.transform.tika.transformers.Tika.PDF_BOX;
import static org.alfresco.transform.tika.transformers.Tika.POI;
import static org.alfresco.transform.tika.transformers.Tika.OFFICE;
import static org.alfresco.transform.tika.transformers.Tika.OOXML;
import static org.alfresco.transform.tika.transformers.Tika.PPTX;
import static org.alfresco.transform.tika.transformers.Tika.TEXT_MINING;
import static org.alfresco.transform.tika.transformers.Tika.TIKA_AUTO;
import static org.alfresco.transform.tika.transformers.Tika.TXT;
import static org.alfresco.transform.tika.transformers.Tika.XHTML;
import static org.alfresco.transform.tika.transformers.Tika.XML;
import static org.alfresco.transform.tika.transformers.Tika.XSLX;
import static org.alfresco.transform.tika.transformers.Tika.ZIP;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_METADATA_EMBED;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_PRESENTATION;
@@ -61,8 +73,29 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_WORD;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XHTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_ZIP;
import static org.alfresco.transform.base.util.RequestParamMap.INCLUDE_CONTENTS;
import static org.alfresco.transform.base.util.RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
import static org.alfresco.transform.common.RequestParamMap.INCLUDE_CONTENTS;
import static org.alfresco.transform.common.RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transform.tika.transformers.Tika.ARCHIVE;
import static org.alfresco.transform.tika.transformers.Tika.CSV;
import static org.alfresco.transform.tika.transformers.Tika.DOC;
import static org.alfresco.transform.tika.transformers.Tika.DOCX;
import static org.alfresco.transform.tika.transformers.Tika.HTML;
import static org.alfresco.transform.tika.transformers.Tika.MSG;
import static org.alfresco.transform.tika.transformers.Tika.OFFICE;
import static org.alfresco.transform.tika.transformers.Tika.OOXML;
import static org.alfresco.transform.tika.transformers.Tika.OUTLOOK_MSG;
import static org.alfresco.transform.tika.transformers.Tika.PDF;
import static org.alfresco.transform.tika.transformers.Tika.PDF_BOX;
import static org.alfresco.transform.tika.transformers.Tika.POI;
import static org.alfresco.transform.tika.transformers.Tika.PPTX;
import static org.alfresco.transform.tika.transformers.Tika.TEXT_MINING;
import static org.alfresco.transform.tika.transformers.Tika.TIKA_AUTO;
import static org.alfresco.transform.tika.transformers.Tika.TXT;
import static org.alfresco.transform.tika.transformers.Tika.XHTML;
import static org.alfresco.transform.tika.transformers.Tika.XML;
import static org.alfresco.transform.tika.transformers.Tika.XSLX;
import static org.alfresco.transform.tika.transformers.Tika.ZIP;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -83,40 +116,6 @@ import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.UUID;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transform.base.AbstractTransformerControllerTest;
import org.alfresco.transform.base.TransformController;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transform.base.executors.RuntimeExec;
import org.alfresco.transform.base.model.FileRefEntity;
import org.alfresco.transform.base.model.FileRefResponse;
import org.alfresco.transform.base.probes.ProbeTestTransform;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
/**
* Test the TikaController without a server.
* Super class includes tests for the AbstractTransformerController.
@@ -201,14 +200,14 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
assertNotNull(actualOptions);
if (expectedOptions != null)
{
assertEquals(expectedOptions, actualOptions, "expectedOptions");
Assertions.assertEquals(expectedOptions, actualOptions, "expectedOptions");
}
Long actualTimeout = invocation.getArgument(1);
assertNotNull(actualTimeout);
if (expectedTimeout != null)
{
assertEquals(expectedTimeout, actualTimeout, "expectedTimeout");
Assertions.assertEquals(expectedTimeout, actualTimeout, "expectedTimeout");
}
// Copy a test file into the target file location if it exists
@@ -224,7 +223,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
// Check the supplied source file has not been changed.
byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
assertArrayEquals(expectedSourceFileBytes, actualSourceFileBytes,
Assertions.assertArrayEquals(expectedSourceFileBytes, actualSourceFileBytes,
"Source file is not the same");
return mockExecutionResult;
@@ -250,8 +249,8 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
: mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
"targetExtension", this.targetExtension, INCLUDE_CONTENTS, includeContents.toString());
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
.andExpect(MockMvcResultMatchers.status().is(OK.value()))
.andExpect(MockMvcResultMatchers.header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + this.targetExtension)).
andReturn();
String content = result.getResponse().getContentAsString();
@@ -366,7 +365,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
targetEncoding = "rubbish";
mockMvc.perform(
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
.andExpect(MockMvcResultMatchers.status().is(INTERNAL_SERVER_ERROR.value()));
}
// --- Archive ---
@@ -557,8 +556,8 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
"sourceMimetype", MIMETYPE_OPENXML_SPREADSHEET);
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
.andExpect(MockMvcResultMatchers.status().is(OK.value()))
.andExpect(MockMvcResultMatchers.header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension)).
andReturn();
@@ -582,8 +581,8 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
mockMvc.perform(
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension).param(
NOT_EXTRACT_BOOKMARKS_TEXT, "true"))
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
.andExpect(MockMvcResultMatchers.status().is(OK.value()))
.andExpect(MockMvcResultMatchers.header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension));
}
@@ -629,7 +628,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
.header(ACCEPT, APPLICATION_JSON_VALUE)
.header(CONTENT_TYPE, APPLICATION_JSON_VALUE)
.content(tr))
.andExpect(status().is(CREATED.value()))
.andExpect(MockMvcResultMatchers.status().is(CREATED.value()))
.andReturn().getResponse().getContentAsString();
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,

View File

@@ -106,40 +106,40 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
return Stream.of(
//IPTCMetadataExtractor
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quick.jpg"),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-EXT.jpg"),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-multi-creator.jpg"),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "testJPEG_IPTC_EXT.jpg"),
testFile(MIMETYPE_IMAGE_GIF, "gif", "quickIPTC.gif"),
testFile(MIMETYPE_IMAGE_PNG, "png", "quickIPTC.png"),
testFile(MIMETYPE_IMAGE_RAW_ARW, "arw", "20140614_163822_Photogrpahy_Class.ARW"),
testFile(MIMETYPE_IMAGE_RAW_CR2, "cr2", "20141227_134519_Palace.CR2"),
testFile(MIMETYPE_IMAGE_RAW_RW2, "rw2", "20140629_145035_Flower.RW2"),
testFile(MIMETYPE_IMAGE_RAW_NEF, "nef", "20150408_074941_Bush.NEF"),
testFile(MIMETYPE_IMAGE_RAW_RAF, "raf", "20160502_190928_London_Underground.RAF"),
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quick.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-EXT.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-multi-creator.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "testJPEG_IPTC_EXT.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_GIF, "gif", "quickIPTC.gif"),
TestFileInfo.testFile(MIMETYPE_IMAGE_PNG, "png", "quickIPTC.png"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_ARW, "arw", "20140614_163822_Photogrpahy_Class.ARW"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_CR2, "cr2", "20141227_134519_Palace.CR2"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_RW2, "rw2", "20140629_145035_Flower.RW2"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_NEF, "nef", "20150408_074941_Bush.NEF"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_RAF, "raf", "20160502_190928_London_Underground.RAF"),
// DWGMetadataExtractor
testFile(MIMETYPE_APP_DWG, "dwg", "quick2010CustomProps.dwg"),
TestFileInfo.testFile(MIMETYPE_APP_DWG, "dwg", "quick2010CustomProps.dwg"),
// MailMetadataExtractor
testFile(MIMETYPE_OUTLOOK_MSG, "msg", "quick.msg"),
TestFileInfo.testFile(MIMETYPE_OUTLOOK_MSG, "msg", "quick.msg"),
// MP3MetadataExtractor
testFile(MIMETYPE_MP3, "mp3", "quick.mp3"),
TestFileInfo.testFile(MIMETYPE_MP3, "mp3", "quick.mp3"),
// OfficeMetadataExtractor
testFile(MIMETYPE_WORD, "doc", "quick.doc"),
TestFileInfo.testFile(MIMETYPE_WORD, "doc", "quick.doc"),
//testFile("application/x-tika-msoffice-embedded; format=ole10_native", "", ""),
testFile(MIMETYPE_VISIO, "vsd", "quick.vsd"),
TestFileInfo.testFile(MIMETYPE_VISIO, "vsd", "quick.vsd"),
//testFile("application/vnd.ms-project", "mpp", ""),
//testFile("application/x-tika-msworks-spreadsheet", "", ""),
//testFile("application/x-mspublisher", "", ""),
testFile(MIMETYPE_PPT, "ppt", "quick.ppt"),
TestFileInfo.testFile(MIMETYPE_PPT, "ppt", "quick.ppt"),
//testFile("application/x-tika-msoffice", "", ""),
//testFile(MIMETYPE_VISIO_2013, "vsdx", ""),
//testFile("application/sldworks", "", ""),
//testFile(MIMETYPE_ENCRYPTED_OFFICE, "", ""),
testFile(MIMETYPE_EXCEL, "xls", "quick.xls"),
TestFileInfo.testFile(MIMETYPE_EXCEL, "xls", "quick.xls"),
// OpenDocumentMetadataExtractor
//testFile("application/x-vnd.oasis.opendocument.presentation", "", ""),
@@ -147,14 +147,14 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.text-web", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_WEB, "oth", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE, "ots", "quick.ots"),
testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE, "ots", "quick.ots"),
TestFileInfo.testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
//testFile("application/x-vnd.oasis.opendocument.graphics-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET, "ods", "quick.ods"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET, "ods", "quick.ods"),
//testFile("application/x-vnd.oasis.opendocument.chart", "", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE, "odi", ""),
@@ -165,23 +165,23 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.presentation-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE, "otp", "quick.otp"),
testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE, "otp", "quick.otp"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
//testFile(MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE, "", ""),
testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
//testFile("application/vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.formula-template", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_DATABASE, "odb", ""),
//testFile("application/x-vnd.oasis.opendocument.text-master", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION, "odp", "quick.odp"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION, "odp", "quick.odp"),
//testFile(MIMETYPE_OPENDOCUMENT_CHART_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.graphics", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_MASTER, "odm", ""),
// PdfBoxMetadataExtractor
testFile(MIMETYPE_PDF, "pdf", "quick.pdf"),
TestFileInfo.testFile(MIMETYPE_PDF, "pdf", "quick.pdf"),
//testFile(MIMETYPE_APPLICATION_ILLUSTRATOR, "ai", ""),
// PoiMetadataExtractor
@@ -189,7 +189,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile(MIMETYPE_OPENXML_SPREADSHEET_ADDIN_MACRO, "xlam", ""),
//testFile(MIMETYPE_OPENXML_WORD_TEMPLATE, "dotx", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_BINARY_MACRO, "xlsb", ""),
testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "quick.docx"),
TestFileInfo.testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "quick.docx"),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDE_MACRO, "sldm", ""),
//testFile("application/vnd.ms-visio.drawing", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO, "ppsm", ""),
@@ -205,8 +205,8 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/vnd.ms-visio.template.macroenabled.12", "", ""),
//testFile("model/vnd.dwfx+xps", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_TEMPLATE, "potx", ""),
testFile(MIMETYPE_OPENXML_PRESENTATION, "pptx", "quick.pptx"),
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "quick.xlsx"),
TestFileInfo.testFile(MIMETYPE_OPENXML_PRESENTATION, "pptx", "quick.pptx"),
TestFileInfo.testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "quick.xlsx"),
//testFile("application/vnd.ms-visio.stencil", "", ""),
//testFile("application/vnd.ms-visio.template", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW, "ppsx", ""),
@@ -214,16 +214,16 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile(MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO, "xltm", ""),
// TikaAudioMetadataExtractor
testFile("video/x-m4v", "m4v", "quick.m4v"),
TestFileInfo.testFile("video/x-m4v", "m4v", "quick.m4v"),
//testFile("audio/x-oggflac", "", ""),
//testFile("application/mp4", "", ""),
testFile(MIMETYPE_VORBIS, "ogg", "quick.ogg"),
testFile(MIMETYPE_VIDEO_3GP, "3gp", "quick.3gp"),
TestFileInfo.testFile(MIMETYPE_VORBIS, "ogg", "quick.ogg"),
TestFileInfo.testFile(MIMETYPE_VIDEO_3GP, "3gp", "quick.3gp"),
//testFile(MIMETYPE_FLAC, "flac", ""),
testFile(MIMETYPE_VIDEO_3GP2, "3g2", "quick.3g2"),
testFile(MIMETYPE_VIDEO_QUICKTIME, "mov", "quick.mov"),
testFile(MIMETYPE_AUDIO_MP4, "m4a", "quick.m4a"),
testFile(MIMETYPE_VIDEO_MP4, "mp4", "quick.mp4"),
TestFileInfo.testFile(MIMETYPE_VIDEO_3GP2, "3g2", "quick.3g2"),
TestFileInfo.testFile(MIMETYPE_VIDEO_QUICKTIME, "mov", "quick.mov"),
TestFileInfo.testFile(MIMETYPE_AUDIO_MP4, "m4a", "quick.m4a"),
TestFileInfo.testFile(MIMETYPE_VIDEO_MP4, "mp4", "quick.mp4"),
// TikaAutoMetadataExtractor
@@ -243,12 +243,12 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("audio/midi", "", ""),
//testFile("application/aaigrid", "", ""),
//testFile("application/x-bag", "", ""),
testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key"),
TestFileInfo.testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key"),
//testFile("application/x-quattro-pro; version=9", "", ""),
//testFile("application/x-ibooks+zip", "", ""),
//testFile("audio/wave", "", ""),
//testFile("application/x-midi", "", ""),
testFile(MIMETYPE_XML, "xml", "quick.xml"),
TestFileInfo.testFile(MIMETYPE_XML, "xml", "quick.xml"),
//testFile(MIMETYPE_RSS, "rss", ""),
//testFile("application/x-netcdf", "cdf", ""),
//testFile("video/x-daala", "", ""),
@@ -276,7 +276,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/x-rar", "", ""),
//testFile("image/sar-ceos", "", ""),
//testFile("application/acad", "", ""),
testFile(MIMETYPE_ZIP, "zip", "quick.zip"),
TestFileInfo.testFile(MIMETYPE_ZIP, "zip", "quick.zip"),
//testFile(MIMETYPE_IMAGE_PSD, "psd", ""),
//testFile("application/x-sharedlib", "", ""),
//testFile("audio/x-m4a", "", ""),
@@ -302,12 +302,12 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/x-coredump", "", ""),
//testFile("application/x-msaccess", "", ""),
//testFile("application/x-dods", "", ""),
testFile(MIMETYPE_IMAGE_PNG, "png", "quick.png"),
TestFileInfo.testFile(MIMETYPE_IMAGE_PNG, "png", "quick.png"),
//testFile("application/vnd.ms-outlook-pst", "", ""),
//testFile("image/bsb", "", ""),
//testFile("application/x-cpio", "cpio", ""),
//testFile("audio/ogg", "oga", ""),
testFile("application/x-tar", "tar", "quick.tar"),
TestFileInfo.testFile("application/x-tar", "tar", "quick.tar"),
//testFile("application/x-dbf", "", ""),
//testFile("video/x-ogm", "", ""),
//testFile("application/x-los-las", "", ""),
@@ -327,7 +327,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/x-hdf", "hdf", ""),
//testFile("image/x-mff", "", ""),
//testFile("image/x-srp", "", ""),
testFile(MIMETYPE_IMAGE_BMP, "bmp", "quick.bmp"),
TestFileInfo.testFile(MIMETYPE_IMAGE_BMP, "bmp", "quick.bmp"),
//testFile("video/x-ogguvs", "", ""),
//testFile("drawing/dwg", "", ""),
//testFile("application/x-doq2", "", ""),
@@ -340,7 +340,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/x-wcs", "", ""),
//testFile("text/x-c++src", "", ""),
//testFile("application/timestamped-data", "", ""),
testFile(MIMETYPE_IMAGE_TIFF, "tiff", "quick.tiff"),
TestFileInfo.testFile(MIMETYPE_IMAGE_TIFF, "tiff", "quick.tiff"),
//testFile("application/msexcel", "", ""),
//testFile("application/x-asp", "", ""),
//testFile("application/x-rar-compressed", "rar", ""),
@@ -396,7 +396,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("image/vnd.microsoft.icon", "", ""),
//testFile("application/x-envi", "", ""),
//testFile("application/x-dwg", "", ""),
testFile(MIMETYPE_IWORK_NUMBERS, "numbers", "quick.numbers"),
TestFileInfo.testFile(MIMETYPE_IWORK_NUMBERS, "numbers", "quick.numbers"),
//testFile("application/vnd.ms-word2006ml", "", ""),
//testFile("application/x-bt", "", ""),
//testFile("application/x-font-adobe-metric", "", ""),
@@ -419,7 +419,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("audio/ogg; codecs=opus", "", ""),
//testFile("application/fits", "", ""),
//testFile("application/x-r", "", ""),
testFile(MIMETYPE_IMAGE_GIF, "gif", "quick.gif"),
TestFileInfo.testFile(MIMETYPE_IMAGE_GIF, "gif", "quick.gif"),
//testFile("application/java-vm", "", ""),
//testFile("application/mspowerpoint", "", ""),
//testFile("application/x-http", "", ""),
@@ -454,13 +454,13 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/x-executable", "", ""),
//testFile("application/x-isatab", "", ""),
//testFile("application/grass-ascii-grid", "", ""),
testFile(MIMETYPE_TEXT_PLAIN, "txt", "quick.txt"),
TestFileInfo.testFile(MIMETYPE_TEXT_PLAIN, "txt", "quick.txt"),
//testFile("application/gzipped", "", ""),
//testFile("application/x-gxf", "", ""),
//testFile("application/x-cpg", "", ""),
//testFile("application/x-lan", "", ""),
//testFile("application/x-xyz", "", ""),
testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages"),
TestFileInfo.testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages"),
//testFile("image/x-jbig2", "", ""),
//testFile("image/nitf", "", ""),
//testFile("application/mbox", "", ""),
@@ -519,7 +519,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
//testFile("application/x-emf", "", ""),
//testFile("application/x-geo-pdf", "", ""),
//testFile("video/x-ogg-uvs", "", ""),
testFile(MIMETYPE_VIDEO_FLV, "flv", "quick.flv"),
TestFileInfo.testFile(MIMETYPE_VIDEO_FLV, "flv", "quick.flv"),
//testFile("application/x-zip-compressed", "", ""),
//testFile("application/gzip", "", ""),
//testFile("application/x-tika-unix-dump", "", ""),
@@ -546,7 +546,7 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
// Test MNT-15219 Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may
// cause OutOfMemory in Tika Note - doesn't use extractFromMimetype
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "dmsu1332-reproduced.xlsx")
TestFileInfo.testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "dmsu1332-reproduced.xlsx")
);
}
@@ -565,13 +565,13 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
// - the replacement TikaCoreProperties.SUBJECT raw metadata changed into a multi value
// The following test files were the ones that failed.
return Stream.of(
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
testFile(MIMETYPE_PDF, "pdf", "quick.pdf")
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
TestFileInfo.testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
TestFileInfo.testFile(MIMETYPE_PDF, "pdf", "quick.pdf")
);
}
}

View File

@@ -84,7 +84,7 @@ public class TikaTransformationIT
sourceFile, sourceMimetype, targetMimetype, targetExtension);
try
{
final ResponseEntity<Resource> response = sendTRequest(ENGINE_URL, sourceFile, null,
final ResponseEntity<Resource> response = EngineClient.sendTRequest(ENGINE_URL, sourceFile, null,
targetMimetype, targetExtension, ImmutableMap.of(
"targetEncoding", "UTF-8",
"sourceMimetype", sourceMimetype));

Some files were not shown because too many files have changed in this diff Show More