REPO-4334 Move metadata extraction into T-Engines (#247)

* Metadata extract code added to T-Engines
* Required a refactor of duplicate code to avoid 3x more duplication:
        - try catches used to return return exit codes
        - calls to java libraries or commands to external processes
        - building of transform options in controllers, adaptors
* integration tests based on current extracts performed in the repo
* included extract code for libreoffice, and embed code even though not used out of the box any more. There may well be custom extracts using them that move to T-Engines
* removal of unused imports
* minor autoOrient / allowEnlargement bug fixes that were not included in Paddington on the T-Engine side.
This commit is contained in:
Alan Davis
2020-06-11 20:20:22 +01:00
committed by GitHub
parent ca394440bb
commit 06109dee75
158 changed files with 10288 additions and 1454 deletions

View File

@@ -26,10 +26,7 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
import java.util.Arrays;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,7 +40,9 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,37 +26,20 @@
*/
package org.alfresco.transformer;
import static java.lang.Boolean.parseBoolean;
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import java.io.File;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.util.Collections;
import java.util.Map;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PDF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* Controller for the Docker based Tika transformers.
@@ -109,81 +92,16 @@ public class TikaController extends AbstractTransformerController
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
javaExecutor.call(sourceFile, targetFile, PDF_BOX,
TARGET_MIMETYPE + MIMETYPE_TEXT_PLAIN, TARGET_ENCODING + "UTF-8");
transform(PDF_BOX, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, Collections.emptyMap(), sourceFile, targetFile);
}
};
}
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") final MultipartFile sourceMultipartFile,
@RequestParam("sourceMimetype") final String sourceMimetype,
@RequestParam("targetExtension") final String targetExtension,
@RequestParam("targetMimetype") final String targetMimetype,
@RequestParam(value = "targetEncoding", required = false, defaultValue = "UTF-8") final String targetEncoding,
@RequestParam(value = "timeout", required = false) final Long timeout,
@RequestParam(value = "testDelay", required = false) final Long testDelay,
@RequestParam(value = "includeContents", required = false) final Boolean includeContents,
@RequestParam(value = "notExtractBookmarksText", required = false) final Boolean notExtractBookmarksText)
{
final String targetFilename = createTargetFileName(
sourceMultipartFile.getOriginalFilename(), targetExtension);
getProbeTestTransform().incrementTransformerCount();
final File sourceFile = createSourceFile(request, sourceMultipartFile);
final File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
// TODO Consider streaming the request and response rather than using temporary files
// https://www.logicbig.com/tutorials/spring-framework/spring-web-mvc/streaming-response-body.html
final Map<String, String> transformOptions = createTransformOptions(
"includeContents", includeContents,
"notExtractBookmarksText", notExtractBookmarksText,
"targetEncoding", targetEncoding);
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
javaExecutor.call(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
final boolean includeContents = parseBoolean(
transformOptions.getOrDefault("includeContents", "false"));
final boolean notExtractBookmarksText = parseBoolean(
transformOptions.getOrDefault("notExtractBookmarksText", "false"));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
javaExecutor.call(sourceFile, targetFile, transform,
includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
javaExecutor.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -26,6 +26,34 @@
*/
package org.alfresco.transformer;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
@@ -59,6 +87,8 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
import static org.alfresco.transformer.util.RequestParamMap.INCLUDE_CONTENTS;
import static org.alfresco.transformer.util.RequestParamMap.NOT_EXTRACT_BOOKMARK_TEXT;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -79,34 +109,6 @@ import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
/**
* Test the TikaController without a server.
* Super class includes tests for the AbstractTransformerController.
@@ -245,7 +247,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
? mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension)
: mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension, "includeContents", includeContents.toString());
"targetExtension", this.targetExtension, INCLUDE_CONTENTS, includeContents.toString());
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
@@ -528,7 +530,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
mockMvc.perform(
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param(
"notExtractBookmarksText", "true"))
NOT_EXTRACT_BOOKMARK_TEXT, "true"))
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension));

View File

@@ -0,0 +1,533 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.List;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_APP_DWG;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transformer.TestFileInfo.testFile;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_AUDIO_MP4;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_EXCEL;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_BMP;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_GIF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_PNG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IWORK_KEYNOTE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IWORK_NUMBERS;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IWORK_PAGES;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_MP3;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PDF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_PPT;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_3GP;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_3GP2;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_FLV;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_MP4;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VIDEO_QUICKTIME;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VISIO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_VORBIS;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
/**
* Metadata integration tests in the Tika T-Engine.
*
* @author adavis
*/
@RunWith(Parameterized.class)
public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
{
public TikaMetadataExtractsIT(TestFileInfo testFileInfo)
{
super(testFileInfo);
}
@Parameterized.Parameters
public static List<TestFileInfo> engineTransformations()
{
// The following files are the ones tested in the content repository.
// There are many more mimetypes supported by these extractors.
// Where a line has been commented out, the repository code tries to test it but stops because there is
// either no quick file or the target extension has not been registered.
return Stream.of(
// DWGMetadataExtractor
testFile(MIMETYPE_APP_DWG, "dwg", "quick2010CustomProps.dwg"),
// MailMetadataExtractor
testFile(MIMETYPE_OUTLOOK_MSG, "msg", "quick.msg"),
// MP3MetadataExtractor
testFile(MIMETYPE_MP3, "mp3", "quick.mp3"),
// OfficeMetadataExtractor
testFile(MIMETYPE_WORD, "doc", "quick.doc"),
//testFile("application/x-tika-msoffice-embedded; format=ole10_native", "", ""),
testFile(MIMETYPE_VISIO, "vsd", "quick.vsd"),
//testFile("application/vnd.ms-project", "mpp", ""),
//testFile("application/x-tika-msworks-spreadsheet", "", ""),
//testFile("application/x-mspublisher", "", ""),
testFile(MIMETYPE_PPT, "ppt", "quick.ppt"),
//testFile("application/x-tika-msoffice", "", ""),
//testFile(MIMETYPE_VISIO_2013, "vsdx", ""),
//testFile("application/sldworks", "", ""),
//testFile(MIMETYPE_ENCRYPTED_OFFICE, "", ""),
testFile(MIMETYPE_EXCEL, "xls", "quick.xls"),
// OpenDocumentMetadataExtractor
//testFile("application/x-vnd.oasis.opendocument.presentation", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_CHART, "odc", ""),
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.text-web", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_WEB, "oth", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE, "ots", "quick.ots"),
testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
//testFile("application/x-vnd.oasis.opendocument.graphics-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET, "ods", "quick.ods"),
//testFile("application/x-vnd.oasis.opendocument.chart", "", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE, "odi", ""),
//testFile("application/x-vnd.oasis.opendocument.text", "", ""),
//testFile("application/x-vnd.oasis.opendocument.text-template", "", ""),
//testFile("application/vnd.oasis.opendocument.formula-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.formula", "", ""),
//testFile("application/vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.presentation-template", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE, "otp", "quick.otp"),
testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
//testFile(MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE, "", ""),
testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
//testFile("application/vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.formula-template", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_DATABASE, "odb", ""),
//testFile("application/x-vnd.oasis.opendocument.text-master", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION, "odp", "quick.odp"),
//testFile(MIMETYPE_OPENDOCUMENT_CHART_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.graphics", "", ""),
testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_MASTER, "odm", ""),
// PdfBoxMetadataExtractor
testFile(MIMETYPE_PDF, "pdf", "quick.pdf"),
//testFile(MIMETYPE_APPLICATION_ILLUSTRATOR, "ai", ""),
// PoiMetadataExtractor
//testFile(MIMETYPE_OPENXML_PRESENTATION_TEMPLATE_MACRO, "potm", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_ADDIN_MACRO, "xlam", ""),
//testFile(MIMETYPE_OPENXML_WORD_TEMPLATE, "dotx", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_BINARY_MACRO, "xlsb", ""),
testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "quick.docx"),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDE_MACRO, "sldm", ""),
//testFile("application/vnd.ms-visio.drawing", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO, "ppsm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_MACRO, "pptm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDE, "sldx", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_MACRO, "xlsm", ""),
//testFile(MIMETYPE_OPENXML_WORD_TEMPLATE_MACRO, "dotm", ""),
//testFile(MIMETYPE_OPENXML_WORDPROCESSING_MACRO, "docm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_ADDIN, "ppam", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE, "xltx", ""),
//testFile("application/vnd.ms-xpsdocument", "", ""),
//testFile("application/vnd.ms-visio.drawing.macroenabled.12", "", ""),
//testFile("application/vnd.ms-visio.template.macroenabled.12", "", ""),
//testFile("model/vnd.dwfx+xps", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_TEMPLATE, "potx", ""),
testFile(MIMETYPE_OPENXML_PRESENTATION, "pptx", "quick.pptx"),
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "quick.xlsx"),
//testFile("application/vnd.ms-visio.stencil", "", ""),
//testFile("application/vnd.ms-visio.template", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW, "ppsx", ""),
//testFile("application/vnd.ms-visio.stencil.macroenabled.12", "", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO, "xltm", ""),
// TikaAudioMetadataExtractor
testFile("video/x-m4v", "m4v", "quick.m4v"),
//testFile("audio/x-oggflac", "", ""),
//testFile("application/mp4", "", ""),
testFile(MIMETYPE_VORBIS, "ogg", "quick.ogg"),
testFile(MIMETYPE_VIDEO_3GP, "3gp", "quick.3gp"),
//testFile(MIMETYPE_FLAC, "flac", ""),
testFile(MIMETYPE_VIDEO_3GP2, "3g2", "quick.3g2"),
testFile(MIMETYPE_VIDEO_QUICKTIME, "mov", "quick.mov"),
testFile(MIMETYPE_AUDIO_MP4, "m4a", "quick.m4a"),
testFile(MIMETYPE_VIDEO_MP4, "mp4", "quick.mp4"),
// TikaAutoMetadataExtractor
// The following <source>_metadata.json files contain null values against author and title.
// This is not new and will be the case in the content repository, but was not tested.
//
// The expected ones are: txt, xml, zip, tar
//
// The unexpected ones are: quick.key, quick.numbers and quick.pages.
//
// quick.bmp, quick.gif, quick.png, quick.3g2, quick.3gp, quick.flv, quick.m4v, quick.mov & quick.mp4
// contain one or more values, but also include nulls. Again this may be correct, a bug or just the
// example quick file rather than a problem with the extractor.
//testFile("application/vnd.ms-htmlhelp", "", ""),
//testFile(MIMETYPE_ATOM, "", ""),
//testFile("audio/midi", "", ""),
//testFile("application/aaigrid", "", ""),
//testFile("application/x-bag", "", ""),
testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key"),
//testFile("application/x-quattro-pro; version=9", "", ""),
//testFile("application/x-ibooks+zip", "", ""),
//testFile("audio/wave", "", ""),
//testFile("application/x-midi", "", ""),
testFile(MIMETYPE_XML, "xml", "quick.xml"),
//testFile(MIMETYPE_RSS, "rss", ""),
//testFile("application/x-netcdf", "cdf", ""),
//testFile("video/x-daala", "", ""),
//testFile("application/matlab-mat", "", ""),
//testFile("audio/aiff", "", ""),
//testFile("application/jaxa-pal-sar", "", ""),
//testFile("image/x-pcraster", "", ""),
//testFile("image/arg", "", ""),
//testFile("application/x-kro", "", ""),
//testFile("image/x-hdf5-image", "", ""),
//testFile("audio/speex", "", ""),
//testFile("image/big-gif", "", ""),
//testFile("application/zlib", "", ""),
//testFile("application/x-cosar", "", ""),
//testFile("application/x-ntv2", "", ""),
//testFile("application/x-archive", "", ""),
//testFile("application/java-archive", "jar", ""),
//testFile("application/x-vnd.sun.xml.writer", "", ""),
//testFile("application/x-gmt", "", ""),
//testFile("application/x-xml", "", ""),
//testFile("application/gzip-compressed", "", ""),
//testFile("image/ida", "", ""),
//testFile("text/x-groovy", "", ""),
//testFile("image/x-emf", "", ""),
//testFile("application/x-rar", "", ""),
//testFile("image/sar-ceos", "", ""),
//testFile("application/acad", "", ""),
testFile(MIMETYPE_ZIP, "zip", "quick.zip"),
//testFile(MIMETYPE_IMAGE_PSD, "psd", ""),
//testFile("application/x-sharedlib", "", ""),
//testFile("audio/x-m4a", "", ""),
//testFile("image/webp", "", ""),
//testFile("application/vnd.wap.xhtml+xml", "", ""),
//testFile("audio/x-aiff", "aiff", ""),
//testFile("application/vnd.ms-spreadsheetml", "", ""),
//testFile("image/x-airsar", "", ""),
//testFile("application/x-pcidsk", "", ""),
//testFile("application/x-java-pack200", "", ""),
//testFile("image/x-fujibas", "", ""),
//testFile("application/x-zmap", "", ""),
//testFile("image/x-bmp", "", ""),
//testFile("image/bpg", "", ""),
//testFile(MIMETYPE_RTF, "rtf", ""),
//testFile("application/x-xz", "", ""),
//testFile("application/x-speex", "", ""),
//testFile("audio/ogg; codecs=speex", "", ""),
//testFile("application/x-l1b", "", ""),
//testFile("application/x-gsbg", "", ""),
//testFile("application/x-sdat", "", ""),
//testFile("application/vnd.ms-visio", "", ""),
//testFile("application/x-coredump", "", ""),
//testFile("application/x-msaccess", "", ""),
//testFile("application/x-dods", "", ""),
testFile(MIMETYPE_IMAGE_PNG, "png", "quick.png"),
//testFile("application/vnd.ms-outlook-pst", "", ""),
//testFile("image/bsb", "", ""),
//testFile("application/x-cpio", "cpio", ""),
//testFile("audio/ogg", "oga", ""),
testFile("application/x-tar", "tar", "quick.tar"),
//testFile("application/x-dbf", "", ""),
//testFile("video/x-ogm", "", ""),
//testFile("application/x-los-las", "", ""),
//testFile("application/autocad_dwg", "", ""),
//testFile("application/vnd.ms-excel.workspace.3", "", ""),
//testFile("application/vnd.ms-excel.workspace.4", "", ""),
//testFile("image/x-bpg", "", ""),
//testFile("gzip/document", "", ""),
//testFile("text/x-java", "", ""),
//testFile("application/x-brotli", "", ""),
//testFile("application/elas", "", ""),
//testFile("image/x-jb2", "", ""),
//testFile("application/x-cappi", "", ""),
//testFile("application/epub+zip", "", ""),
//testFile("application/x-ace2", "", ""),
//testFile("application/x-sas-data", "", ""),
//testFile("application/x-hdf", "hdf", ""),
//testFile("image/x-mff", "", ""),
//testFile("image/x-srp", "", ""),
testFile(MIMETYPE_IMAGE_BMP, "bmp", "quick.bmp"),
//testFile("video/x-ogguvs", "", ""),
//testFile("drawing/dwg", "", ""),
//testFile("application/x-doq2", "", ""),
//testFile("application/x-acad", "", ""),
//testFile("application/x-kml", "", ""),
//testFile("application/x-autocad", "", ""),
//testFile("image/x-mff2", "", ""),
//testFile("application/x-snodas", "", ""),
//testFile("application/terragen", "", ""),
//testFile("application/x-wcs", "", ""),
//testFile("text/x-c++src", "", ""),
//testFile("application/timestamped-data", "", ""),
testFile(MIMETYPE_IMAGE_TIFF, "tiff", "quick.tiff"),
//testFile("application/msexcel", "", ""),
//testFile("application/x-asp", "", ""),
//testFile("application/x-rar-compressed", "rar", ""),
//testFile("application/x-envi-hdr", "", ""),
//testFile("text/iso19139+xml", "", ""),
//testFile("application/vnd.ms-tnef", "", ""),
//testFile("application/x-ecrg-toc", "", ""),
//testFile("application/aig", "", ""),
//testFile("audio/x-wav", "wav", ""),
//testFile("image/emf", "", ""),
//testFile("application/x-bzip", "", ""),
//testFile("application/jdem", "", ""),
//testFile("application/x-webp", "", ""),
//testFile("application/x-arj", "", ""),
//testFile("application/x-lzma", "", ""),
//testFile("application/x-java-vm", "", ""),
//testFile("image/envisat", "", ""),
//testFile("application/x-doq1", "", ""),
//testFile("audio/vnd.wave", "", ""),
//testFile("application/x-ppi", "", ""),
//testFile("image/ilwis", "", ""),
//testFile("application/x-gunzip", "", ""),
//testFile("image/x-icon", "", ""),
//testFile("application/ogg", "ogx", ""),
//testFile(MIMETYPE_IMAGE_SVG, "svg", ""),
//testFile("application/x-ms-owner", "", ""),
//testFile("application/x-grib", "", ""),
//testFile("application/ms-tnef", "", ""),
//testFile("image/fits", "", ""),
//testFile("audio/x-mpeg", "", ""),
//testFile("application/x-bzip2", "", ""),
//testFile("text/tsv", "", ""),
//testFile("application/x-fictionbook+xml", "", ""),
//testFile("application/x-p-aux", "", ""),
//testFile("application/x-font-ttf", "", ""),
//testFile("image/x-xcf", "", ""),
//testFile("image/x-ms-bmp", "", ""),
//testFile("image/wmf", "", ""),
//testFile("image/eir", "", ""),
//testFile("application/x-matlab-data", "", ""),
//testFile("application/deflate64", "", ""),
//testFile("audio/wav", "", ""),
//testFile("application/x-rs2", "", ""),
//testFile("application/vnd.ms-word", "", ""),
//testFile("application/x-tsx", "", ""),
//testFile("application/x-lcp", "", ""),
//testFile("application/x-mbtiles", "", ""),
//testFile("audio/x-oggpcm", "", ""),
//testFile("application/x-epsilon", "", ""),
//testFile("application/x-msgn", "", ""),
//testFile(MIMETYPE_TEXT_CSV, "csv", ""),
//testFile("image/x-dimap", "", ""),
//testFile("image/vnd.microsoft.icon", "", ""),
//testFile("application/x-envi", "", ""),
//testFile("application/x-dwg", "", ""),
testFile(MIMETYPE_IWORK_NUMBERS, "numbers", "quick.numbers"),
//testFile("application/vnd.ms-word2006ml", "", ""),
//testFile("application/x-bt", "", ""),
//testFile("application/x-font-adobe-metric", "", ""),
//testFile("application/x-rst", "", ""),
//testFile("application/vrt", "", ""),
//testFile("application/x-ctg", "", ""),
//testFile("application/x-e00-grid", "", ""),
//testFile("audio/x-ogg-flac", "", ""),
//testFile("application/x-compress", "z", ""),
//testFile("image/x-psd", "", ""),
//testFile("text/rss", "", ""),
//testFile("application/sdts-raster", "", ""),
//testFile("application/oxps", "", ""),
//testFile("application/leveller", "", ""),
//testFile("application/x-ingr", "", ""),
//testFile("image/sgi", "", ""),
//testFile("application/x-pnm", "", ""),
//testFile("image/raster", "", ""),
//testFile("audio/x-ogg-pcm", "", ""),
//testFile("audio/ogg; codecs=opus", "", ""),
//testFile("application/fits", "", ""),
//testFile("application/x-r", "", ""),
testFile(MIMETYPE_IMAGE_GIF, "gif", "quick.gif"),
//testFile("application/java-vm", "", ""),
//testFile("application/mspowerpoint", "", ""),
//testFile("application/x-http", "", ""),
//testFile("application/x-rmf", "", ""),
//testFile("application/x-ogg", "", ""),
//testFile("video/ogg", "ogv", "quick.ogv"),
//testFile(MIMETYPE_APPLEFILE, "", ""),
//testFile("text/rtf", "", ""),
//testFile("image/adrg", "", ""),
//testFile("video/x-ogg-rgb", "", ""),
//testFile("application/x-ngs-geoid", "", ""),
//testFile("application/x-map", "", ""),
//testFile("image/ceos", "", ""),
//testFile("application/xpm", "", ""),
//testFile("application/x-ers", "", ""),
//testFile("video/x-ogg-yuv", "", ""),
//testFile("application/x-isis2", "", ""),
//testFile("application/x-nwt-grd", "", ""),
//testFile("application/x-isis3", "", ""),
//testFile("application/x-nwt-grc", "", ""),
//testFile("video/daala", "", ""),
//testFile("application/x-blx", "", ""),
//testFile("application/x-tnef", "", ""),
//testFile("video/x-dirac", "", ""),
//testFile("application/x-ndf", "", ""),
//testFile("image/vnd.wap.wbmp", "", ""),
//testFile("video/theora", "", ""),
//testFile("application/kate", "", ""),
//testFile("application/pkcs7-mime", "", ""),
//testFile("image/fit", "", ""),
//testFile("application/x-ctable2", "", ""),
//testFile("application/x-executable", "", ""),
//testFile("application/x-isatab", "", ""),
//testFile("application/grass-ascii-grid", "", ""),
testFile(MIMETYPE_TEXT_PLAIN, "txt", "quick.txt"),
//testFile("application/gzipped", "", ""),
//testFile("application/x-gxf", "", ""),
//testFile("application/x-cpg", "", ""),
//testFile("application/x-lan", "", ""),
//testFile("application/x-xyz", "", ""),
testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages"),
//testFile("image/x-jbig2", "", ""),
//testFile("image/nitf", "", ""),
//testFile("application/mbox", "", ""),
//testFile("application/chm", "", ""),
//testFile("application/x-fast", "", ""),
//testFile("application/x-gsc", "", ""),
//testFile("application/x-deflate", "", ""),
//testFile("application/x-grib2", "", ""),
//testFile("image/x-ozi", "", ""),
//testFile("application/x-pds", "", ""),
//testFile("application/vnd.apple.iwork", "", ""),
//testFile("application/x-usgs-dem", "", ""),
//testFile("application/vnd.ms-excel.sheet.2", "", ""),
//testFile("application/vnd.ms-excel.sheet.3", "", ""),
//testFile("application/dif+xml", "", ""),
//testFile("application/vnd.ms-excel.sheet.4", "", ""),
//testFile("application/x-java", "", ""),
//testFile("image/geotiff", "", ""),
//testFile("application/x-gsag", "", ""),
//testFile("application/x-snappy", "", ""),
//testFile("video/x-theora", "", ""),
//testFile("image/ntf", "", ""),
//testFile("application/x-pdf", "", ""),
//testFile("application/xml", "", ""),
//testFile("application/vnd.wordperfect; version=6.x", "", ""),
//testFile("application/pkcs7-signature", "", ""),
//testFile("application/vnd.wordperfect; version=5.1", "", ""),
//testFile("application/vnd.wordperfect; version=5.0", "", ""),
//testFile("application/x-arj-compressed", "", ""),
//testFile("application/geotopic", "", ""),
//testFile("text/x-java-source", "java", ""),
//testFile("audio/basic", "au", ""),
//testFile("application/pcisdk", "", ""),
//testFile("application/x-rik", "", ""),
//testFile("audio/opus", "", ""),
//testFile(MIMETYPE_IMAGE_JP2, "jp2", ""),
//testFile("application/x-gtx", "", ""),
//testFile("application/x-object", "", ""),
//testFile("application/vnd.ms-wordml", "", ""),
//testFile("image/x-wmf", "", ""),
//testFile("application/x-rpf-toc", "", ""),
//testFile("application/x-srtmhgt", "", ""),
//testFile("application/x-generic-bin", "", ""),
//testFile("text/vnd.iptc.anpa", "", ""),
//testFile("application/x-msmetafile", "", ""),
//testFile("application/x-wms", "", ""),
//testFile("video/x-oggrgb", "", ""),
//testFile("image/xcf", "", ""),
//testFile("application/photoshop", "", ""),
//testFile("application/x-lz4", "", ""),
//testFile("application/x-7z-compressed", "", ""),
//testFile("application/gff", "", ""),
//testFile("video/x-oggyuv", "", ""),
//testFile("application/x-msdownload", "", ""),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quick.jpg"),
//testFile("image/icns", "", ""),
//testFile("application/x-emf", "", ""),
//testFile("application/x-geo-pdf", "", ""),
//testFile("video/x-ogg-uvs", "", ""),
testFile(MIMETYPE_VIDEO_FLV, "flv", "quick.flv"),
//testFile("application/x-zip-compressed", "", ""),
//testFile("application/gzip", "", ""),
//testFile("application/x-tika-unix-dump", "", ""),
//testFile("application/x-coasp", "", ""),
//testFile("application/x-dipex", "", ""),
//testFile("application/x-til", "", ""),
//testFile("application/x-gzip", "gzip", ""),
//testFile("application/x-gs7bg", "", ""),
//testFile("application/x-unix-archive", "", ""),
//testFile("application/x-elf", "", ""),
//testFile("application/dted", "", ""),
//testFile("application/x-rasterlite", "", ""),
//testFile("audio/x-mp4a", "", ""),
//testFile("application/x-gzip-compressed", "", ""),
//testFile("application/x-chm", "", ""),
//testFile("image/hfa", "", ""),
// Special test cases from the repo tests
// ======================================
// Test for MNT-577: Alfresco is running 100% CPU for over 10 minutes while extracting metadata for
// Word office document
// testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "problemFootnotes2.docx")
// Test MNT-15219 Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may
// cause OutOfMemory in Tika Note - doesn't use extractFromMimetype
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "dmsu1332-reproduced.xlsx")
).collect(toList());
}
}

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}created" : "2016-03-29T21:01:55Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Udintsev, Anton (external - Project)",
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}created" : "2011-05-17T13:34:11Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "test file cs5"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,7 @@
{
"{http://www.alfresco.org/model/content/1.0}modified" : "2005-09-20T17:25:00Z",
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "2005-05-26T12:57:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "2010-01-06T17:32:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,13 @@
{
"{http://www.alfresco.org/model/audio/1.0}compressor" : "M4A",
"{http://www.alfresco.org/model/audio/1.0}artist" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}genre" : "Foxtrot",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog - About a dog and a fox (Hauskaz)",
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo",
"{http://www.alfresco.org/model/content/1.0}created" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "44100",
"{http://www.alfresco.org/model/content/1.0}author" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}album" : "About a dog and a fox",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,13 @@
{
"{http://www.alfresco.org/model/audio/1.0}compressor" : "MP3",
"{http://www.alfresco.org/model/audio/1.0}artist" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}genre" : "Foxtrot",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog - About a dog and a fox (Hauskaz)",
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo",
"{http://www.alfresco.org/model/content/1.0}created" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "44100",
"{http://www.alfresco.org/model/content/1.0}author" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}album" : "About a dog and a fox",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,8 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "90000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,9 @@
{
"{http://www.alfresco.org/model/content/1.0}addressee" : "mark.rogers@alfresco.com",
"{http://www.alfresco.org/model/content/1.0}description" : "This is a quick test",
"{http://www.alfresco.org/model/content/1.0}addressees" : [ "mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com" ],
"{http://www.alfresco.org/model/content/1.0}sentdate" : "2013-01-18T13:44:20Z",
"{http://www.alfresco.org/model/content/1.0}subjectline" : "This is a quick test",
"{http://www.alfresco.org/model/content/1.0}author" : "Mark Rogers",
"{http://www.alfresco.org/model/content/1.0}originator" : "Mark Rogers"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1138362922000,
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1138362371000,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 845336008000,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1126049640000,
"{http://www.alfresco.org/model/content/1.0}author" : "Jesper Steen Møller",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,13 @@
{
"{http://www.alfresco.org/model/audio/1.0}compressor" : "Vorbis",
"{http://www.alfresco.org/model/audio/1.0}artist" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}genre" : "Foxtrot",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog - About a dog and a fox (Hauskaz)",
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo",
"{http://www.alfresco.org/model/content/1.0}created" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "44100",
"{http://www.alfresco.org/model/content/1.0}author" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}album" : "About a dog and a fox",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1138362371000,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1179313846000,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 845336008000,
"{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1126049640000,
"{http://www.alfresco.org/model/content/1.0}author" : "Jesper Steen Møller",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "2005-05-26T19:52:58Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,7 @@
{
"{http://www.alfresco.org/model/content/1.0}modified" : "2005-09-20T18:23:41Z",
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "1601-01-01T00:00:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "1601-01-01T00:00:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : 1126049640000,
"{http://www.alfresco.org/model/content/1.0}author" : "Jesper Steen Møller",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,11 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "584",
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/exif/1.0}orientation" : "1",
"{http://www.alfresco.org/model/exif/1.0}yResolution" : "50.0",
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit" : "Inch",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "413",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/exif/1.0}xResolution" : "50.0",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,7 @@
{
"{http://www.alfresco.org/model/content/1.0}modified" : "2012-09-07T10:36:57Z",
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,7 @@
{
"{http://www.alfresco.org/model/content/1.0}modified" : "2005-09-20T18:22:32Z",
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "1996-10-14T23:33:28Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "1996-10-14T23:33:28Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

View File

@@ -284,6 +284,7 @@
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.keynote", "priority": 120, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "text/plain"},
@@ -293,6 +294,7 @@
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.numbers", "priority": 120, "targetMediaType": "text/plain"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "text/plain"},
@@ -352,6 +354,7 @@
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/html"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/xml"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/pdf", "priority": 55, "targetMediaType": "text/html"},
{"sourceMediaType": "application/pdf", "maxSourceSizeBytes": 26214400, "priority": 55, "targetMediaType": "text/plain"},
@@ -486,7 +489,12 @@
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/html"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/plain"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/xml"}
{"sourceMediaType": "application/x-compress", "targetMediaType": "text/xml"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "text/html"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "text/plain"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "application/xhtml+xml"},
{"sourceMediaType": "text/csv", "priority": 120, "targetMediaType": "text/xml"}
],
"transformOptions": [
"tikaOptions"
@@ -503,6 +511,464 @@
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "DWGMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "MailMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "MP3MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "OfficeMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/msword", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.visio2013", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msoffice-embedded; format=ole10_native", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msworks-spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-mspublisher", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msoffice", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/sldworks", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "OpenDocumentMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.graphics-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.formula", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.database", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-web", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.chart", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.formula", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.presentation-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-master", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "PdfBoxMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "PoiMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.drawing", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-xpsdocument", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.drawing.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "model/vnd.dwfx+xps", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.stencil", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.stencil.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "TikaAudioMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "video/x-m4v", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-oggflac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mp4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/vorbis", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/3gpp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-flac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/3gpp2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/quicktime", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/mp4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "TikaAutoMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-htmlhelp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/atom+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/midi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/aaigrid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bag", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-quattro-pro; version=9", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ibooks+zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/wave", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-midi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/rss+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-daala", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/matlab-mat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/aiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/jaxa-pal-sar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-pcraster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/arg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-kro", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-hdf5-image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/big-gif", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/zlib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cosar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ntv2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/java-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.sun.xml.writer", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gmt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ida", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-groovy", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/sar-ceos", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/acad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.adobe.photoshop", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sharedlib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-m4a", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/webp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wap.xhtml+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-aiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-spreadsheetml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-airsar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pcidsk", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java-pack200", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-fujibas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-zmap", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/rtf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xz", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg; codecs=speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-l1b", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsbg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sdat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-coredump", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msaccess", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dods", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/png", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-outlook-pst", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bsb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cpio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dbf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-los-las", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/autocad_dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.workspace.3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.workspace.4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-bpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "gzip/document", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-java", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-brotli", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/elas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-jb2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cappi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/epub+zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ace2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sas-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-hdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-mff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-srp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogguvs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "drawing/dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-doq2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-acad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-kml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-autocad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-mff2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-snodas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/terragen", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-wcs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-c++src", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/timestamped-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/tiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/msexcel", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-asp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-envi-hdr", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/iso19139+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ecrg-toc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/aig", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-wav", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/jdem", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-webp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-arj", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lzma", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java-vm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/envisat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-doq1", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/vnd.wave", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ppi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ilwis", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gunzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-icon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/svg+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ms-owner", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-grib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/ms-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/fits", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-mpeg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bzip2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/tsv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-fictionbook+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-p-aux", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-font-ttf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-xcf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-ms-bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/wmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/eir", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-matlab-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/deflate64", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/wav", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rs2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tsx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lcp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-mbtiles", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-oggpcm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-epsilon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msgn", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/csv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-dimap", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.microsoft.icon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-envi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word2006ml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-font-adobe-metric", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rst", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vrt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ctg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-e00-grid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-ogg-flac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-psd", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/rss", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/sdts-raster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/oxps", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/leveller", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ingr", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/sgi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pnm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/raster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-ogg-pcm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg; codecs=opus", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/fits", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-r", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/gif", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/java-vm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mspowerpoint", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-http", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/applefile", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/rtf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/adrg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-rgb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ngs-geoid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-map", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ceos", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xpm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ers", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-yuv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isis2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-nwt-grd", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isis3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-nwt-grc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/daala", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-blx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-dirac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ndf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.wap.wbmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/theora", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/kate", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pkcs7-mime", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/fit", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ctable2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-executable", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isatab", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/grass-ascii-grid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/plain", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzipped", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gxf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lan", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xyz", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-jbig2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/nitf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mbox", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/chm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-fast", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-deflate", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-grib2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-ozi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pds", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.iwork", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-usgs-dem", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/dif+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/geotiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsag", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-snappy", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-theora", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ntf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=6.x", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pkcs7-signature", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=5.1", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=5.0", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-arj-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/geotopic", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-java-source", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/basic", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pcisdk", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rik", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/opus", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/jp2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gtx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-object", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-wordml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-wmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rpf-toc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-srtmhgt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-generic-bin", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/vnd.iptc.anpa", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msmetafile", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-wms", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-oggrgb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/xcf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/photoshop", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lz4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-7z-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-oggyuv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msdownload", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/jpeg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/icns", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-geo-pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-uvs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-flv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-zip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-unix-dump", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-coasp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dipex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-til", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gs7bg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-unix-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-elf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/dted", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rasterlite", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-mp4a", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gzip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-chm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,34 +26,7 @@
*/
package org.alfresco.transformer.executors;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_HTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_PNG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_CSV;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.util.List;
import java.util.regex.Pattern;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import com.google.common.collect.ImmutableList;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.DocumentSelector;
@@ -69,11 +42,37 @@ import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.parser.pkg.PackageParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.slf4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import com.google.common.collect.ImmutableList;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.util.List;
import java.util.regex.Pattern;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_HTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_PNG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_CSV;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
/**
* Stripped down command line Tika transformers. Not actually run as a separate process, but the code fits the patten
@@ -485,7 +484,7 @@ public class Tika
private final Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
private final PDFParserConfig pdfParserConfig = new PDFParserConfig();
private final DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
public static final DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
{
private final List<String> disabledMediaTypes = ImmutableList.of(MIMETYPE_IMAGE_JPEG,
MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
@@ -504,12 +503,30 @@ public class Tika
public Tika() throws TikaException, IOException, SAXException
{
ClassLoader classLoader = getClass().getClassLoader();
URL tikaConfigXml = classLoader.getResource("tika-config.xml");
TikaConfig tikaConfig = new TikaConfig(tikaConfigXml);
TikaConfig tikaConfig = readTikaConfig();
autoDetectParser = new AutoDetectParser(tikaConfig);
}
public static TikaConfig readTikaConfig(Logger logger)
{
try
{
return readTikaConfig();
}
catch (Exception e)
{
logger.error("Failed to read tika-config.xml", e);
return null;
}
}
private static TikaConfig readTikaConfig() throws TikaException, IOException, SAXException
{
ClassLoader classLoader = Tika.class.getClassLoader();
URL tikaConfigXml = classLoader.getResource("tika-config.xml");
return new TikaConfig(tikaConfigXml);
}
// Method included for developer testing
public static void main(String[] args)
{

View File

@@ -26,18 +26,35 @@
*/
package org.alfresco.transformer.executors;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.metadataExtractors.AbstractTikaMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.DWGMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.MP3MetadataExtractor;
import org.alfresco.transformer.metadataExtractors.MailMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.OfficeMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.OpenDocumentMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.PdfBoxMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.PoiMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.TikaAudioMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.TikaAutoMetadataExtractor;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.tika.exception.TikaException;
import org.xml.sax.SAXException;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Map;
import java.util.StringJoiner;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.logging.LogEntry;
import org.apache.tika.exception.TikaException;
import org.xml.sax.SAXException;
import static java.lang.Boolean.parseBoolean;
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
import static org.alfresco.transformer.util.RequestParamMap.NOT_EXTRACT_BOOKMARK_TEXT;
/**
* JavaExecutor implementation for running TIKA transformations. It loads the
@@ -45,9 +62,26 @@ import org.xml.sax.SAXException;
*/
public class TikaJavaExecutor implements JavaExecutor
{
private static final String ID = "tika";
public static final String LICENCE = "This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt";
private final Tika tika;
private final Map<String, AbstractTikaMetadataExtractor> metadataExtractor = ImmutableMap
.<String, AbstractTikaMetadataExtractor>builder()
.put("DWGMetadataExtractor", new DWGMetadataExtractor())
.put("MailMetadataExtractor", new MailMetadataExtractor())
.put("MP3MetadataExtractor", new MP3MetadataExtractor())
.put("OfficeMetadataExtractor", new OfficeMetadataExtractor())
.put("OpenDocumentMetadataExtractor", new OpenDocumentMetadataExtractor())
.put("PdfBoxMetadataExtractor", new PdfBoxMetadataExtractor())
.put("PoiMetadataExtractor", new PoiMetadataExtractor())
.put("TikaAudioMetadataExtractor", new TikaAudioMetadataExtractor())
.put("TikaAutoMetadataExtractor", new TikaAutoMetadataExtractor())
.build();
private final Map<String, AbstractTikaMetadataExtractor> metadataEmbedder = ImmutableMap
.<String, AbstractTikaMetadataExtractor>builder()
.build();
public TikaJavaExecutor()
{
@@ -62,32 +96,33 @@ public class TikaJavaExecutor implements JavaExecutor
}
@Override
public void call(File sourceFile, File targetFile, String... args)
throws TransformException
public String getTransformerId()
{
args = buildArgs(sourceFile, targetFile, args);
try
{
tika.transform(args);
}
catch (IllegalArgumentException e)
{
throw new TransformException(BAD_REQUEST.value(), getMessage(e));
}
catch (Exception e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), getMessage(e));
}
if (!targetFile.exists() || targetFile.length() == 0)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Transformer failed to create an output file");
}
return ID;
}
private static String getMessage(Exception e)
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
throws Exception
{
return e.getMessage() == null ? e.getClass().getSimpleName() : e.getMessage();
final boolean includeContents = parseBoolean(
transformOptions.getOrDefault(RequestParamMap.INCLUDE_CONTENTS, "false"));
final boolean notExtractBookmarksText = parseBoolean(
transformOptions.getOrDefault(NOT_EXTRACT_BOOKMARK_TEXT, "false"));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
call(sourceFile, targetFile, transformName,
includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
}
@Override
public void call(File sourceFile, File targetFile, String... args) throws Exception
{
args = buildArgs(sourceFile, targetFile, args);
tika.transform(args);
}
private static String[] buildArgs(File sourceFile, File targetFile, String[] args)
@@ -127,4 +162,28 @@ public class TikaJavaExecutor implements JavaExecutor
methodArgs.add(path);
}
}
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
throws Exception
{
AbstractTikaMetadataExtractor metadataExtractor = this.metadataExtractor.get(transformName);
Map<String, Serializable> metadata = metadataExtractor.extractMetadata(sourceMimetype, transformOptions, sourceFile);
metadataExtractor.mapMetadataAndWrite(targetFile, metadata);
}
/**
* @deprecated The content repository's TikaPoweredMetadataExtracter provides no non test implementations.
* This code exists in case there are custom implementations, that need to be converted to T-Engines.
* It is simply a copy and paste from the content repository and has received limited testing.
*/
@Override
@SuppressWarnings("deprecation" )
public void embedMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
throws Exception
{
AbstractTikaMetadataExtractor metadataExtractor = this.metadataEmbedder.get(transformName);
metadataExtractor.embedMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -0,0 +1,474 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.DateTimeFormatterBuilder;
import org.joda.time.format.DateTimeParser;
import org.slf4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
/**
* The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the
* common parts of processing the files, and the common mappings.
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>comments:</b>
* </pre>
*
* @author Nick Burch
* @author adavis
*/
public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtractor
{
protected static final String KEY_AUTHOR = "author";
protected static final String KEY_TITLE = "title";
protected static final String KEY_SUBJECT = "subject";
protected static final String KEY_CREATED = "created";
protected static final String KEY_DESCRIPTION = "description";
protected static final String KEY_COMMENTS = "comments";
protected static final String KEY_TAGS = "dc:subject";
private static final String METADATA_SEPARATOR = ",";
private final DateTimeFormatter tikaUTCDateFormater;
private final DateTimeFormatter tikaDateFormater;
public AbstractTikaMetadataExtractor(Logger logger)
{
super(logger);
// TODO Once TIKA-451 is fixed this list will get nicer
DateTimeParser[] parsersUTC = {
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").getParser(),
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZ").getParser()
};
DateTimeParser[] parsers = {
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss").getParser(),
DateTimeFormat.forPattern("yyyy-MM-dd").getParser(),
DateTimeFormat.forPattern("yyyy/MM/dd HH:mm:ss").getParser(),
DateTimeFormat.forPattern("yyyy/MM/dd").getParser(),
DateTimeFormat.forPattern("EEE MMM dd hh:mm:ss zzz yyyy").getParser()
};
tikaUTCDateFormater = new DateTimeFormatterBuilder().append(null, parsersUTC).toFormatter().withZone(DateTimeZone.UTC);
tikaDateFormater = new DateTimeFormatterBuilder().append(null, parsers).toFormatter();
}
/**
* Version which also tries the ISO-8601 formats (in order..),
* and similar formats, which Tika makes use of
*/
protected Serializable makeDate(String dateStr)
{
// Try our formats first, in order
try
{
return this.tikaUTCDateFormater.parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
try
{
return this.tikaUTCDateFormater.withLocale(Locale.US).parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
try
{
return this.tikaDateFormater.parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
try
{
return this.tikaDateFormater.withLocale(Locale.US).parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
// Fall back to the normal ones: We just return the String as AbstractMappingMetadataExtracter
// convertSystemPropertyValues in the repo will do the conversion that was previously done here.
return dateStr;
}
/**
* Returns the correct Tika Parser to process the document.
* If you don't know which you want, use {@link TikaAutoMetadataExtractor}
* which makes use of the Tika auto-detection.
*/
protected abstract Parser getParser();
/**
* Returns the Tika Embedder to modify
* the document.
*
* @return the Tika embedder
*/
protected Embedder getEmbedder()
{
// TODO make this an abstract method once more extracters support embedding
return null;
}
/**
* Do we care about the contents of the
* extracted header, or nothing at all?
*/
protected boolean needHeaderContents()
{
return false;
}
/**
* Allows implementation specific mappings to be done.
*/
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
return properties;
}
/**
* Gets the document selector, used for determining whether to parse embedded resources,
* null by default so parse all.
*/
protected DocumentSelector getDocumentSelector(Metadata metadata, String targetMimeType)
{
return null;
}
/**
* By default returns a new ParseContent
*/
private ParseContext buildParseContext(Metadata metadata, String sourceMimeType)
{
ParseContext context = new ParseContext();
DocumentSelector selector = getDocumentSelector(metadata, sourceMimeType);
if (selector != null)
{
context.set(DocumentSelector.class, selector);
}
return context;
}
@Override
@SuppressWarnings( "deprecation" )
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
File sourceFile) throws Exception
{
Map<String, Serializable> rawProperties = new HashMap<>();
try (InputStream is = new FileInputStream(sourceFile))
{
Parser parser = getParser();
Metadata metadata = new Metadata();
metadata.add(Metadata.CONTENT_TYPE, sourceMimetype);
ParseContext context = buildParseContext(metadata, sourceMimetype);
ContentHandler handler;
Map<String,String> headers = null;
if (needHeaderContents())
{
MapCaptureContentHandler headerCapture =
new MapCaptureContentHandler();
headers = headerCapture.tags;
handler = new HeadContentHandler(headerCapture);
}
else
{
handler = new NullContentHandler();
}
parser.parse(is, handler, metadata, context);
// First up, copy all the Tika metadata over
// This allows people to map any of the Tika
// keys onto their own content model
for (String tikaKey : metadata.names())
{
// TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties);
}
// Now, map the common Tika metadata keys onto
// the common Alfresco metadata keys. This allows
// existing mapping properties files to continue
// to work without needing any changes
// The simple ones
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties);
putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties);
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties);
// Tags
putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
// Get the subject and description, despite things not
// being nearly as consistent as one might hope
String subject = getMetadataValue(metadata, Metadata.SUBJECT);
String description = getMetadataValue(metadata, Metadata.DESCRIPTION);
if(subject != null && description != null)
{
putRawValue(KEY_DESCRIPTION, description, rawProperties);
putRawValue(KEY_SUBJECT, subject, rawProperties);
}
else if(subject != null)
{
putRawValue(KEY_DESCRIPTION, subject, rawProperties);
putRawValue(KEY_SUBJECT, subject, rawProperties);
}
else if(description != null)
{
putRawValue(KEY_DESCRIPTION, description, rawProperties);
putRawValue(KEY_SUBJECT, description, rawProperties);
}
// Try for the dates two different ways too
if(metadata.get(Metadata.CREATION_DATE) != null)
{
putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties);
}
else if(metadata.get(Metadata.DATE) != null)
{
putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties);
}
// If people created a specific instance
// (eg OfficeMetadataExtractor), then allow that
// instance to map the Tika keys onto its
// existing namespace so that older properties
// files continue to map correctly
rawProperties = extractSpecific(metadata, rawProperties, headers);
}
return rawProperties;
}
/**
* @deprecated The content repository's TikaPoweredMetadataExtracter provides no non test implementations.
* This code exists in case there are custom implementations, that need to be converted to T-Engines.
* It is simply a copy and paste from the content repository and has received limited testing.
*/
@Override
public void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Embedder embedder = getEmbedder();
if (embedder == null)
{
return;
}
Metadata metadataToEmbed = new Metadata();
Map<String, String> metadataAsStrings = getMetadata(transformOptions);
metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v));
try (InputStream inputStream = new FileInputStream(sourceFile);
OutputStream outputStream = new FileOutputStream(targetFile))
{
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
}
}
private Serializable getMetadataValues(Metadata metadata, String key)
{
// Use Set to prevent duplicates.
Set<String> valuesSet = new LinkedHashSet<String>();
String[] values = metadata.getValues(key);
for (int i = 0; i < values.length; i++)
{
String[] parts = values[i].split(METADATA_SEPARATOR);
for (String subPart : parts)
{
valuesSet.add(subPart.trim());
}
}
Object[] objArrayValues = valuesSet.toArray();
values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class);
return values.length == 0 ? null : (values.length == 1 ? values[0] : values);
}
private String getMetadataValue(Metadata metadata, String key)
{
if (metadata.isMultiValued(key))
{
String[] parts = metadata.getValues(key);
// use Set to prevent duplicates
Set<String> value = new LinkedHashSet<>(parts.length);
for (int i = 0; i < parts.length; i++)
{
value.add(parts[i]);
}
String valueStr = value.toString();
// remove leading/trailing braces []
return valueStr.substring(1, valueStr.length() - 1);
}
else
{
return metadata.get(key);
}
}
/**
* This content handler will capture entries from within
* the header of the Tika content XHTML, but ignore the
* rest.
*/
protected static class HeadContentHandler extends ContentHandlerDecorator
{
/**
* XHTML XPath parser.
*/
private static final XPathParser PARSER =
new XPathParser("xhtml", XHTMLContentHandler.XHTML);
/**
* The XPath matcher used to select the XHTML body contents.
*/
private static final Matcher MATCHER =
PARSER.parse("/xhtml:html/xhtml:head/descendant:node()");
/**
* Creates a content handler that passes all XHTML body events to the
* given underlying content handler.
*
* @param handler content handler
*/
protected HeadContentHandler(ContentHandler handler)
{
super(new MatchingContentHandler(handler, MATCHER));
}
}
/**
* This content handler will grab all tags and attributes,
* and record the textual content of the last seen one
* of them.
* Normally only used with {@link HeadContentHandler}
*/
protected static class MapCaptureContentHandler implements ContentHandler
{
protected Map<String, String> tags = new HashMap<>();
private StringBuffer text;
public void characters(char[] ch, int start, int len)
{
if(text != null)
{
text.append(ch, start, len);
}
}
public void endElement(String namespace, String localname, String qname)
{
if(text != null && text.length() > 0)
{
tags.put(qname, text.toString());
}
text = null;
}
public void startElement(String namespace, String localname, String qname, Attributes attrs)
{
for(int i=0; i<attrs.getLength(); i++)
{
tags.put(attrs.getQName(i), attrs.getValue(i));
}
text = new StringBuffer();
}
public void endDocument() {}
public void endPrefixMapping(String paramString) {}
public void ignorableWhitespace(char[] paramArrayOfChar, int paramInt1, int paramInt2) {}
public void processingInstruction(String paramString1, String paramString2) {}
public void setDocumentLocator(Locator paramLocator) {}
public void skippedEntity(String paramString) {}
public void startDocument() {}
public void startPrefixMapping(String paramString1, String paramString2) {}
}
/**
* A content handler that ignores all the content it finds.
* Normally used when we only want the metadata, and don't
* care about the file contents.
*/
protected static class NullContentHandler implements ContentHandler
{
public void characters(char[] paramArrayOfChar, int paramInt1, int paramInt2) {}
public void endDocument() {}
public void endElement(String paramString1, String paramString2, String paramString3) {}
public void endPrefixMapping(String paramString) {}
public void ignorableWhitespace(char[] paramArrayOfChar, int paramInt1, int paramInt2) {}
public void processingInstruction(String paramString1, String paramString2) {}
public void setDocumentLocator(Locator paramLocator) {}
public void skippedEntity(String paramString) {}
public void startDocument() {}
public void startElement(String paramString1, String paramString2,
String paramString3, Attributes paramAttributes) {}
public void startPrefixMapping(String paramString1, String paramString2) {}
}
}

View File

@@ -0,0 +1,82 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.dwg.DWGParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Map;
/**
* {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor.
*
* Configuration: (see DWGMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* <b>author:</b> -- cm:author
* <b>keywords:</b>
* <b>comments:</b>
* <b>lastauthor:</b>
* </pre>
*
* @author Nick Burch
* @author adavis
*/
public class DWGMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(DWGMetadataExtractor.class);
private static final String KEY_KEYWORD = "keyword";
private static final String KEY_LAST_AUTHOR = "lastAuthor";
public DWGMetadataExtractor()
{
super(logger);
}
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
return properties;
}
@Override
protected Parser getParser()
{
return new DWGParser();
}
}

View File

@@ -0,0 +1,112 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mp3.Mp3Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Map;
/**
* MP3 file metadata extractor.
*
* Configuration: (see MP3MetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>songTitle:</b> -- cm:title
* <b>albumTitle:</b> -- audio:album
* <b>artist:</b> -- audio:artist, cm:author
* <b>description:</b> -- cm:description
* <b>comment:</b> --
* <b>yearReleased:</b> -- audio:releaseDate
* <b>trackNumber:</b> -- audio:trackNumber
* <b>genre:</b> -- audio:genre
* <b>composer:</b> -- audio:composer
* <b>lyrics:</b> --
* </pre>
*
* Note - XMPDM metadata keys are also emitted, in common with
* the other Tika powered extracters
*
* Uses Apache Tika
*
* @author Nick Burch
* @author adavis
*/
public class MP3MetadataExtractor extends TikaAudioMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(MP3MetadataExtractor.class);
private static final String KEY_SONG_TITLE = "songTitle";
private static final String KEY_ALBUM_TITLE = "albumTitle";
private static final String KEY_ARTIST = "artist";
private static final String KEY_COMMENT = "comment";
private static final String KEY_YEAR_RELEASED = "yearReleased";
private static final String KEY_TRACK_NUMBER = "trackNumber";
private static final String KEY_GENRE = "genre";
private static final String KEY_COMPOSER = "composer";
public MP3MetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
return new Mp3Parser();
}
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
// Do the normal Audio mappings
super.extractSpecific(metadata, properties, headers);
// Now do the compatibility ones
// We only need these for people who had pre-existing mapping
// properties from before the proper audio model was added
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties);
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
putRawValue(KEY_COMMENT, metadata.get(XMPDM.LOG_COMMENT), properties);
putRawValue(KEY_TRACK_NUMBER, metadata.get(XMPDM.TRACK_NUMBER), properties);
putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties);
putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties);
putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties);
// All done
return properties;
}
}

View File

@@ -0,0 +1,108 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Map;
/**
* Outlook MAPI format email metadata extractor.
*
* Configuration: (see MailMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>sentDate:</b> -- cm:sentdate
* <b>originator:</b> -- cm:originator, cm:author
* <b>addressee:</b> -- cm:addressee
* <b>addressees:</b> -- cm:addressees
* <b>subjectLine:</b> -- cm:subjectline, cm:description
* <b>toNames:</b> --
* <b>ccNames:</b> --
* <b>bccNames:</b> --
* </pre>
*
* TIKA note - to/cc/bcc go into the html part, not the metadata.
* Also, email addresses not included as yet.
*
* @author Kevin Roast
* @author adavis
*/
public class MailMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(MailMetadataExtractor.class);
private static final String KEY_SENT_DATE = "sentDate";
private static final String KEY_ORIGINATOR = "originator";
private static final String KEY_ADDRESSEE = "addressee";
private static final String KEY_ADDRESSEES = "addressees";
private static final String KEY_SUBJECT = "subjectLine";
private static final String KEY_TO_NAMES = "toNames";
private static final String KEY_CC_NAMES = "ccNames";
private static final String KEY_BCC_NAMES = "bccNames";
public MailMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
// The office parser does Outlook as well as Word, Excel etc
return new OfficeParser();
}
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties);
putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties);
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties);
putRawValue(KEY_SENT_DATE, metadata.get(Metadata.LAST_SAVED), properties);
// Store the TO, but not cc/bcc in the addressee field
putRawValue(KEY_ADDRESSEE, metadata.get(Metadata.MESSAGE_TO), properties);
// Store each of To, CC and BCC in their own fields
putRawValue(KEY_TO_NAMES, metadata.getValues(Metadata.MESSAGE_TO), properties);
putRawValue(KEY_CC_NAMES, metadata.getValues(Metadata.MESSAGE_CC), properties);
putRawValue(KEY_BCC_NAMES, metadata.getValues(Metadata.MESSAGE_BCC), properties);
// But store all email addresses (to/cc/bcc) in the addresses field
putRawValue(KEY_ADDRESSEES, metadata.getValues(Metadata.MESSAGE_RECIPIENT_ADDRESS), properties);
return properties;
}
}

View File

@@ -0,0 +1,113 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Map;
/**
* Office file format metadata extractor.
*
* Configuration: (see OfficeMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* This extracter uses the POI library to extract the following:
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>createDateTime:</b> -- cm:created
* <b>lastSaveDateTime:</b> -- cm:modified
* <b>comments:</b>
* <b>editTime:</b>
* <b>format:</b>
* <b>keywords:</b>
* <b>lastAuthor:</b>
* <b>lastPrinted:</b>
* <b>osVersion:</b>
* <b>thumbnail:</b>
* <b>pageCount:</b>
* <b>wordCount:</b>
* </pre>
*
* Uses Apache Tika
*
* @author Derek Hulley
* @author Nick Burch
* @author adavis
*/
public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(OfficeMetadataExtractor.class);
public static final String KEY_CREATE_DATETIME = "createDateTime";
public static final String KEY_LAST_SAVE_DATETIME = "lastSaveDateTime";
public static final String KEY_EDIT_TIME = "editTime";
public static final String KEY_FORMAT = "format";
public static final String KEY_KEYWORDS = "keywords";
public static final String KEY_LAST_AUTHOR = "lastAuthor";
public static final String KEY_LAST_PRINTED = "lastPrinted";
public static final String KEY_PAGE_COUNT = "pageCount";
public static final String KEY_PARAGRAPH_COUNT = "paragraphCount";
public static final String KEY_WORD_COUNT = "wordCount";
public OfficeMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
return new OfficeParser();
}
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
putRawValue(KEY_FORMAT, metadata.get(Metadata.FORMAT), properties);
putRawValue(KEY_KEYWORDS, metadata.get(Metadata.KEYWORDS), properties);
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
putRawValue(KEY_LAST_PRINTED, metadata.get(Metadata.LAST_PRINTED), properties);
// putRawValue(KEY_OS_VERSION, metadata.get(Metadata.OS_VERSION), properties);
// putRawValue(KEY_THUMBNAIL, metadata.get(Metadata.THUMBNAIL), properties);
putRawValue(KEY_PAGE_COUNT, metadata.get(Metadata.PAGE_COUNT), properties);
putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Metadata.PARAGRAPH_COUNT), properties);
putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties);
return properties;
}
}

View File

@@ -0,0 +1,137 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.odf.OpenDocumentParser;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Date;
import java.util.Map;
import java.util.Set;
/**
* {@code "application/vnd.oasis.opendocument..."} and {@code "applicationvnd.oasis.opendocument..."} metadata extractor.
*
* Configuration: (see OpenDocumentMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>creationDate:</b> -- cm:created
* <b>creator:</b> -- cm:author
* <b>date:</b>
* <b>description:</b> -- cm:description
* <b>generator:</b>
* <b>initialCreator:</b>
* <b>keyword:</b>
* <b>language:</b>
* <b>printDate:</b>
* <b>printedBy:</b>
* <b>subject:</b>
* <b>title:</b> -- cm:title
* <b>All user properties</b>
* </pre>
*
* Uses Apache Tika
*
* @author Antti Jokipii
* @author Derek Hulley
* @author adavis
*/
public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(OpenDocumentMetadataExtractor.class);
private static final String KEY_CREATION_DATE = "creationDate";
private static final String KEY_CREATOR = "creator";
private static final String KEY_DATE = "date";
private static final String KEY_GENERATOR = "generator";
private static final String KEY_INITIAL_CREATOR = "initialCreator";
private static final String KEY_KEYWORD = "keyword";
private static final String KEY_LANGUAGE = "language";
private static final String CUSTOM_PREFIX = "custom:";
private static final DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss");
public OpenDocumentMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
return new OpenDocumentParser();
}
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String, String> headers)
{
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties);
putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties);
// Handle user-defined properties dynamically
Map<String, Set<String>> mapping = super.getExtractMapping();
for (String key : mapping.keySet())
{
if (metadata.get(CUSTOM_PREFIX + key) != null)
{
putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties);
}
}
return properties;
}
private Date getDateOrNull(String dateString)
{
if (dateString != null && dateString.length() != 0)
{
try
{
return dateFormatter.parseDateTime(dateString).toDate();
}
catch (IllegalArgumentException ignore)
{
}
}
return null;
}
}

View File

@@ -0,0 +1,75 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.executors.Tika;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Metadata extractor for the PDF documents.
*
* Configuration: (see PdfBoxMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* </pre>
*
* Uses Apache Tika
*
* @author Jesper Steen Møller
* @author Derek Hulley
* @author adavis
*/
public class PdfBoxMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(PdfBoxMetadataExtractor.class);
public PdfBoxMetadataExtractor()
{
super(logger);
}
@Override
protected DocumentSelector getDocumentSelector(Metadata metadata, String targetMimeType)
{
return Tika.pdfBoxEmbededDocumentSelector;
}
@Override
protected Parser getParser()
{
return new PDFParser();
}
}

View File

@@ -0,0 +1,68 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI.
*
* Configuration: (see PoiMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>Any custom property:</b> -- [not mapped]
* </pre>
*
* Uses Apache Tika
*
* @author Nick Burch
* @author Neil McErlean
* @author Dmitry Velichkevich
* @author adavis
*/
public class PoiMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(PoiMetadataExtractor.class);
public PoiMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
return new OOXMLParser();
}
}

View File

@@ -0,0 +1,175 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mp4.MP4Parser;
import org.gagravarr.tika.FlacParser;
import org.gagravarr.tika.VorbisParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Calendar;
import java.util.Map;
import static org.alfresco.transformer.executors.Tika.readTikaConfig;
/**
* A Metadata Extractor which makes use of the Apache Tika Audio Parsers to extract metadata from media files.
* For backwards compatibility reasons, this doesn't handle the MP3 format, which has its own dedicated extractor
* in {@link MP3MetadataExtractor}
*
* Configuration: (see TikaAudioMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>created:</b> -- cm:created
* <b>xmpDM:artist</b> -- audio:artist
* <b>xmpDM:composer</b> -- audio:composer
* <b>xmpDM:engineer</b> -- audio:engineer
* <b>xmpDM:genre</b> -- audio:genre
* <b>xmpDM:trackNumber</b> -- audio:trackNumber
* <b>xmpDM:releaseDate</b> -- audio:releaseDate
* </pre>
*
* @author Nick Burch
* @author adavis
*/
public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(TikaAudioMetadataExtractor.class);
// The Audio related parsers we use
private static final Parser[] parsers = new Parser[] {
new VorbisParser(),
new FlacParser(),
new MP4Parser()
};
protected final TikaConfig tikaConfig;
public TikaAudioMetadataExtractor()
{
this(logger);
}
public TikaAudioMetadataExtractor(Logger logger)
{
super(logger);
tikaConfig = readTikaConfig(logger);
}
@Override
protected Parser getParser()
{
return new CompositeParser(tikaConfig.getMediaTypeRegistry(), parsers);
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
// Most things can go with the default Tika -> Alfresco Mapping
// Handle the few special cases here
// The description is special
putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
// The release date can be fiddly
Serializable releaseDate = generateReleaseDate(metadata);
putRawValue(KEY_CREATED, releaseDate, properties);
putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties);
return properties;
}
/**
* Generates the release date
*/
private Serializable generateReleaseDate(Metadata metadata)
{
String date = metadata.get(XMPDM.RELEASE_DATE);
if(date == null || date.length() == 0)
{
return null;
}
// Is it just a year?
if(date.matches("\\d\\d\\d\\d"))
{
// Just a year, we need a full date
// Go for the 1st of the 1st
Calendar c = Calendar.getInstance();
c.set(
Integer.parseInt(date), Calendar.JANUARY, 1,
0, 0, 0
);
c.set(Calendar.MILLISECOND, 0);
return c.getTime();
}
// Treat as a normal date
return makeDate(date);
}
/**
* Generate the description
*
* @param metadata the metadata extracted from the file
* @return the description
*/
@SuppressWarnings("deprecation")
private String generateDescription(Metadata metadata)
{
StringBuilder result = new StringBuilder();
if (metadata.get(Metadata.TITLE) != null)
{
result.append(metadata.get(Metadata.TITLE));
if (metadata.get(XMPDM.ALBUM) != null)
{
result
.append(" - ")
.append(metadata.get(XMPDM.ALBUM));
}
if (metadata.get(XMPDM.ARTIST) != null)
{
result
.append(" (")
.append(metadata.get(XMPDM.ARTIST))
.append(")");
}
}
return result.toString();
}
}

View File

@@ -0,0 +1,144 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.util.MimetypeMap;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transformer.executors.Tika.readTikaConfig;
/**
* A Metadata Extractor which makes use of the Apache Tika auto-detection to select the best parser to extract the
* metadata from a document. This will be used for all files which Tika can handle, but where no other more explicit
* extractor is defined.
*
* Configuration: (see TikaAutoMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>comments:</b>
* <b>geo:lat:</b> -- cm:latitude
* <b>geo:long:</b> -- cm:longitude
* </pre>
*
* @author Nick Burch
* @author adavis
*/
public class TikaAutoMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(TikaAutoMetadataExtractor.class);
private static final String EXIF_IMAGE_HEIGHT_TAG = "Exif Image Height";
private static final String EXIF_IMAGE_WIDTH_TAG = "Exif Image Width";
private static final String JPEG_IMAGE_HEIGHT_TAG = "Image Height";
private static final String JPEG_IMAGE_WIDTH_TAG = "Image Width";
private static final String COMPRESSION_TAG = "Compression";
protected final TikaConfig tikaConfig;
public TikaAutoMetadataExtractor()
{
super(logger);
tikaConfig = readTikaConfig(logger);
}
/**
* Does auto-detection to select the best Tika Parser.
*/
@Override
protected Parser getParser()
{
return new AutoDetectParser(tikaConfig);
}
/**
* Because some editors use JPEG_IMAGE_HEIGHT_TAG when
* saving JPEG images , a more reliable source for
* image size are the values provided by Tika
* and not the exif/tiff metadata read from the file
* This will override the tiff:Image size
* which gets embedded into the alfresco node properties
* for jpeg files that contain such exif information
*/
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
if (MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(metadata.get(Metadata.CONTENT_TYPE)))
{
//check if the image has exif information
if (metadata.get(EXIF_IMAGE_WIDTH_TAG) != null
&& metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null
&& metadata.get(COMPRESSION_TAG) != null)
{
//replace the exif size properties that will be embedded in the node with
//the guessed dimensions from Tika
putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(EXIF_IMAGE_HEIGHT_TAG)), properties);
putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(EXIF_IMAGE_WIDTH_TAG)), properties);
putRawValue(JPEG_IMAGE_HEIGHT_TAG, metadata.get(EXIF_IMAGE_HEIGHT_TAG), properties);
putRawValue(JPEG_IMAGE_WIDTH_TAG, metadata.get(EXIF_IMAGE_WIDTH_TAG), properties);
}
}
return properties;
}
/**
* Exif metadata for size also returns the string "pixels"
* after the number value , this function will
* stop at the first non digit character found in the text
* @param sizeText string text
* @return the size value
*/
private String extractSize(String sizeText)
{
StringBuilder sizeValue = new StringBuilder();
for(char c : sizeText.toCharArray())
{
if(Character.isDigit(c))
{
sizeValue.append(c);
}
else
{
break;
}
}
return sizeValue.toString();
}
}

View File

@@ -0,0 +1,12 @@
#
# DWGMetadataExtracter - default mapping
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -0,0 +1,30 @@
#
# MP3MetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -0,0 +1,14 @@
#
# MailMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
sentDate=cm:sentdate
originator=cm:originator, cm:author
addressee=cm:addressee
addressees=cm:addressees
subjectLine=cm:subjectline, cm:description

View File

@@ -0,0 +1,14 @@
#
# OfficeMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
createDateTime=cm:created
lastSaveDateTime=cm:modified

View File

@@ -0,0 +1,21 @@
#
# OpenDocumentMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
creationDate=cm:created
creator=cm:author
date=
description=
generator=
initialCreator=
keyword=
language=
printDate=
printedBy=
subject=cm:description
title=cm:title

View File

@@ -0,0 +1,13 @@
#
# PdfBoxMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
created=cm:created

View File

@@ -0,0 +1,13 @@
#
# PoiMetadataExtracter - default mapping
#
# author: Neil McErlean
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created

View File

@@ -0,0 +1,34 @@
#
# TikaAudioMetadataExtracter - audio mapping
#
# This is used to map from the Tika audio metadata onto your
# content model. This will be used for any Audio content
# for which an explicit extractor isn't defined
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -0,0 +1,52 @@
#
# TikaAutoMetadataExtracter - default mapping
#
# This is used to map from the Tika and standard namespaces
# onto your content model. This will be used for any
# content for which an explicit extractor isn't defined,
# by using Tika's auto-selection facilities.
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude
tiff\:ImageWidth=exif:pixelXDimension
tiff\:ImageLength=exif:pixelYDimension
tiff\:Make=exif:manufacturer
tiff\:Model=exif:model
tiff\:Software=exif:software
tiff\:Orientation=exif:orientation
tiff\:XResolution=exif:xResolution
tiff\:YResolution=exif:yResolution
tiff\:ResolutionUnit=exif:resolutionUnit
exif\:Flash=exif:flash
exif\:ExposureTime=exif:exposureTime
exif\:FNumber=exif:fNumber
exif\:FocalLength=exif:focalLength
exif\:IsoSpeedRatings=exif:isoSpeedRatings
exif\:DateTimeOriginal=exif:dateTimeOriginal
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -511,6 +511,464 @@
"transformOptions": [
"tikaOptions"
]
},
{
"transformerName": "DWGMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-dwg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "MailMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-outlook", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "MP3MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "audio/mpeg", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "OfficeMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/msword", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.visio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.visio2013", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msoffice-embedded; format=ole10_native", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-project", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msworks-spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-mspublisher", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-msoffice", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/sldworks", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-ooxml-protected", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "OpenDocumentMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.oasis.opendocument.text", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.graphics-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.presentation-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.formula", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-master", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.text-web", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.oasis.opendocument.database", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-web", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.spreadsheet-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.sun.xml.writer", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.chart", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.spreadsheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.formula", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.image-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.presentation-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.formula-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.text-master", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "applicationvnd.oasis.opendocument.chart-template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.oasis.opendocument.graphics", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "PdfBoxMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/illustrator", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "PoiMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.drawing", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-xpsdocument", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.drawing.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "model/vnd.dwfx+xps", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.stencil", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.template", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio.stencil.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "TikaAudioMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "video/x-m4v", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-oggflac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mp4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/vorbis", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/3gpp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-flac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/3gpp2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/quicktime", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/mp4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/mp4", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "TikaAutoMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.ms-htmlhelp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/atom+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/midi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/aaigrid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bag", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.keynote", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-quattro-pro; version=9", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ibooks+zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/wave", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-midi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/rss+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-netcdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-daala", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/matlab-mat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/aiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/jaxa-pal-sar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-pcraster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/arg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-kro", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-hdf5-image", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/big-gif", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/zlib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cosar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ntv2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/java-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-vnd.sun.xml.writer", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gmt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ida", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-groovy", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/sar-ceos", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/acad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.adobe.photoshop", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sharedlib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-m4a", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/webp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wap.xhtml+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-aiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-spreadsheetml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-airsar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pcidsk", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java-pack200", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-fujibas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-zmap", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/rtf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xz", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg; codecs=speex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-l1b", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsbg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sdat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-visio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-coredump", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msaccess", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dods", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/png", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-outlook-pst", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bsb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cpio", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tar", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dbf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-los-las", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/autocad_dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.workspace.3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.workspace.4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-bpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "gzip/document", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-java", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-brotli", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/elas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-jb2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cappi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/epub+zip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ace2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-sas-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-hdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-mff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-srp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogguvs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "drawing/dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-doq2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-acad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-kml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-autocad", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-mff2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-snodas", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/terragen", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-wcs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-c++src", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/timestamped-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/tiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/msexcel", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-asp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rar-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-envi-hdr", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/iso19139+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ecrg-toc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/aig", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-wav", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/jdem", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-webp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-arj", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lzma", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java-vm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/envisat", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-doq1", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/vnd.wave", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ppi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ilwis", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gunzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-icon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/svg+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ms-owner", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-grib", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/ms-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/fits", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-mpeg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bzip2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/tsv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-fictionbook+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-p-aux", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-font-ttf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-xcf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-ms-bmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/wmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/eir", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-matlab-data", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/deflate64", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/wav", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rs2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tsx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lcp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-mbtiles", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-oggpcm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-epsilon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msgn", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/csv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-dimap", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.microsoft.icon", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-envi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dwg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.numbers", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-word2006ml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-bt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-font-adobe-metric", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rst", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vrt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ctg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-e00-grid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-ogg-flac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-compress", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-psd", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/rss", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/sdts-raster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/oxps", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/leveller", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ingr", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/sgi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pnm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/raster", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-ogg-pcm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/ogg; codecs=opus", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/fits", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-r", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/gif", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/java-vm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mspowerpoint", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-http", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/ogg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/applefile", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/rtf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/adrg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-rgb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ngs-geoid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-map", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ceos", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xpm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ers", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-yuv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isis2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-nwt-grd", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isis3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-nwt-grc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/daala", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-blx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tnef", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-dirac", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ndf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/vnd.wap.wbmp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/theora", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/kate", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pkcs7-mime", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/fit", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-ctable2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-executable", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-isatab", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/grass-ascii-grid", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/plain", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzipped", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gxf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-cpg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lan", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-xyz", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.pages", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-jbig2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/nitf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/mbox", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/chm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-fast", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-deflate", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-grib2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-ozi", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pds", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.apple.iwork", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-usgs-dem", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.3", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/dif+xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-java", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/geotiff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gsag", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-snappy", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-theora", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/ntf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=6.x", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pkcs7-signature", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=5.1", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.wordperfect; version=5.0", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-arj-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/geotopic", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/x-java-source", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/basic", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/pcisdk", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rik", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/opus", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/jp2", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gtx", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-object", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/vnd.ms-wordml", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/x-wmf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rpf-toc", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-srtmhgt", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-generic-bin", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "text/vnd.iptc.anpa", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msmetafile", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-wms", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-oggrgb", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/xcf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/photoshop", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-lz4", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-7z-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gff", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-oggyuv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-msdownload", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/jpeg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/icns", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-emf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-geo-pdf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-ogg-uvs", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "video/x-flv", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-zip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/gzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-tika-unix-dump", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-coasp", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-dipex", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-til", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gzip", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gs7bg", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-unix-archive", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-elf", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/dted", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-rasterlite", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "audio/x-mp4a", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-gzip-compressed", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/x-chm", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/hfa", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}