initial commit
This commit is contained in:
commit
e9f53ab5f7
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
# Eclipse
|
||||
.project
|
||||
.classpath
|
||||
.settings
|
||||
|
||||
# Maven
|
||||
target
|
||||
pom.xml.versionsBackup
|
||||
|
103
pom.xml
Normal file
103
pom.xml
Normal file
@ -0,0 +1,103 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.inteligr8.alfresco</groupId>
|
||||
<artifactId>pdfmeta-tengine</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>pdfmeta Alfresco T-Engine</name>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
|
||||
<ats.version>2.3.6</ats.version>
|
||||
<spring-boot.version>2.3.5.RELEASE</spring-boot.version>
|
||||
|
||||
<docker.image.registry>docker.inteligr8.com</docker.image.registry>
|
||||
<docker.image.name>inteligr8/${project.artifactId}</docker.image.name>
|
||||
<docker.image.tag>${project.version}</docker.image.tag>
|
||||
<ate.app.className>com.inteligr8.alfresco.pdfmeta.Application</ate.app.className>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.inteligr8.ootbee</groupId>
|
||||
<artifactId>beedk-ate-springboot</artifactId>
|
||||
<version>[1.0.0,2.0.0)</version>
|
||||
<type>pom</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-transform-tika</artifactId>
|
||||
<version>${ats.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.inteligr8.ootbee</groupId>
|
||||
<artifactId>beedk-ate-springboot-test</artifactId>
|
||||
<version>[1.0.0,2.0.0)</version>
|
||||
<type>pom</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.13.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>io.repaint.maven</groupId>
|
||||
<artifactId>tiles-maven-plugin</artifactId>
|
||||
<version>2.26</version>
|
||||
<extensions>true</extensions>
|
||||
<configuration>
|
||||
<tiles>
|
||||
<!-- Documentation: https://bitbucket.org/inteligr8/ootbee-beedk/src/stable/beedk-ate-springboot-tile -->
|
||||
<tile>com.inteligr8.ootbee:beedk-ate-springboot-tile:[1.0.0,2.0.0)</tile>
|
||||
<!-- Documentation: https://bitbucket.org/inteligr8/ootbee-beedk/src/stable/beedk-ate-docker-tile -->
|
||||
<tile>com.inteligr8.ootbee:beedk-ate-docker-tile:[1.0.0,2.0.0)</tile>
|
||||
|
||||
<!-- Enables rapid application development support -->
|
||||
<tile>com.inteligr8.ootbee:beedk-ate-self-rad-tile:[1.0.0,2.0.0)</tile>
|
||||
</tiles>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>inteligr8-releases</id>
|
||||
<url>http://repos.inteligr8.com/nexus/repository/inteligr8-public</url>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
<repository>
|
||||
<id>inteligr8-snapshots</id>
|
||||
<url>http://repos.inteligr8.com/nexus/repository/inteligr8-snapshots</url>
|
||||
<releases>
|
||||
<enabled>false</enabled>
|
||||
</releases>
|
||||
</repository>
|
||||
<repository>
|
||||
<id>alfresco-public</id>
|
||||
<url>https://artifacts.alfresco.com/nexus/content/groups/public</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<pluginRepositories>
|
||||
<pluginRepository>
|
||||
<id>inteligr8-releases</id>
|
||||
<url>http://repos.inteligr8.com/nexus/repository/inteligr8-public</url>
|
||||
</pluginRepository>
|
||||
</pluginRepositories>
|
||||
</project>
|
28
src/main/docker/Dockerfile
Normal file
28
src/main/docker/Dockerfile
Normal file
@ -0,0 +1,28 @@
|
||||
|
||||
FROM docker.inteligr8.com/inteligr8/ubuntu-jdk:20.04-11
|
||||
|
||||
# Set default user information
|
||||
ARG JAR_FILE
|
||||
ARG APPGROUPNAME=alfresco
|
||||
ARG APPGROUPID=1000
|
||||
ARG APPUSERNAME=atengine
|
||||
ARG APPUSERID=33001
|
||||
|
||||
ENV JAVA_OPTS="-Xmx128m"
|
||||
ENV JAR_PATH=/usr/local/bin/${project.artifactId}.jar
|
||||
|
||||
COPY ${JAR_FILE} ${JAR_PATH}
|
||||
|
||||
# Install your engine's dependencies here
|
||||
#RUN apt update && \
|
||||
# apt -y install {dependency names in APT repository}
|
||||
|
||||
RUN groupadd -g ${APPGROUPID} ${APPGROUPNAME} && \
|
||||
useradd -u ${APPUSERID} -G ${APPGROUPNAME} ${APPUSERNAME} && \
|
||||
chown ${APPUSERNAME}:${APPGROUPNAME} ${JAR_PATH}
|
||||
|
||||
EXPOSE 8090
|
||||
|
||||
USER ${APPUSERNAME}
|
||||
|
||||
ENTRYPOINT java ${JAVA_OPTS} -jar ${JAR_PATH}
|
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ImportResource;
|
||||
import org.springframework.context.event.EventListener;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
|
||||
@ImportResource({"classpath*:application-context.xml"})
|
||||
public class Application {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(Application.class);
|
||||
|
||||
@Value("${container.name}")
|
||||
private String containerName;
|
||||
|
||||
@Bean
|
||||
public MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() {
|
||||
return registry -> registry.config().commonTags("containerName", this.containerName);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
@EventListener(ApplicationReadyEvent.class)
|
||||
public void startup() {
|
||||
this.logger.info("Starting application components... Done");
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.inteligr8.alfresco.pdfmeta.util.DynamicDiscoveryMap;
|
||||
|
||||
public class AutoPropertyMappingWrapper extends DynamicDiscoveryMap<String, Set<String>> {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(this.getClass());
|
||||
|
||||
public AutoPropertyMappingWrapper(Map<String, Set<String>> map) {
|
||||
super(map);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void discoverExtraEntry(String key) {
|
||||
this.logger.trace("Looking up key as ACS property: {}", key);
|
||||
|
||||
int colon = key != null ? key.indexOf(':') : -1;
|
||||
if (colon < 0) {
|
||||
// key is not formatted like an ACS property
|
||||
this.nullKeys.add(key);
|
||||
} else if (key.indexOf(':', colon+1) < 0) {
|
||||
logger.debug("Adding key/property to mapping: {}", key);
|
||||
this.extMap.put(key, Collections.singleton(key));
|
||||
} else {
|
||||
// 2 colons; not formatted like an ACS property
|
||||
this.nullKeys.add(key);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.alfresco.transformer.executors.Transformer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class PdfMetaTransformer implements Transformer {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(PdfMetaTransformer.class);
|
||||
private final String id = "pdfmeta";
|
||||
|
||||
private PdfTikaMetadataExtractor extractor;
|
||||
|
||||
@PostConstruct
|
||||
public void init() throws Exception {
|
||||
if (this.logger.isDebugEnabled())
|
||||
this.logger.debug("init()");
|
||||
|
||||
this.extractor = new PdfTikaMetadataExtractor();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTransformerId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype, Map<String, String> transformOptions, File sourceFile, File targetFile) throws Exception {
|
||||
this.logger.trace("extractMetadata({}, {}, {}, {}, '{}', '{}')", transformName, sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
|
||||
if (!MediaType.APPLICATION_PDF_VALUE.equals(sourceMimetype))
|
||||
throw new IllegalArgumentException();
|
||||
if (transformOptions != null && !transformOptions.isEmpty())
|
||||
this.logger.debug("Transform options were specified, but they will be ignored: {}", transformOptions);
|
||||
|
||||
this.extractor.extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,207 @@
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.xml.namespace.NamespaceContext;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpression;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import org.dom4j.io.DOMSAXContentHandler;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
/**
|
||||
* This is a custom implementation that extracts more meta-data from a PDF
|
||||
* than the built-in PDF extractor provided OOTB by Alfresco. Namely, it
|
||||
* extracts the bookmarks, in addition to the property and text extraction
|
||||
* provided OOTB.
|
||||
*
|
||||
* @author brian@inteligr8.com
|
||||
*/
|
||||
public class PdfTikaContentHandler extends DOMSAXContentHandler {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(this.getClass());
|
||||
|
||||
private static final XPathExpression xpathExprBodyPageText;
|
||||
private static final XPathExpression xpathExprBodyUl;
|
||||
private static final XPathExpression xpathExprLiText;
|
||||
private static final XPathExpression xpathExprText;
|
||||
|
||||
private final Object parseBookmarkSync = new Object();
|
||||
private List<String> texts;
|
||||
private final Object parseTextSync = new Object();
|
||||
private Map<String, Serializable> bookmarks;
|
||||
|
||||
static {
|
||||
XPath xpath = XPathFactory.newInstance().newXPath();
|
||||
xpath.setNamespaceContext(new TikaNamespaceContext());
|
||||
try {
|
||||
xpathExprBodyPageText = xpath.compile("/ns:html/ns:body/ns:div[@name=\"page\"]/text()");
|
||||
xpathExprBodyUl = xpath.compile("/ns:html/ns:body/ns:ul");
|
||||
xpathExprLiText = xpath.compile("ns:li/text()");
|
||||
xpathExprText = xpath.compile("text()");
|
||||
} catch (XPathExpressionException xpee) {
|
||||
throw new ExceptionInInitializerError(xpee);
|
||||
}
|
||||
}
|
||||
|
||||
public String getTextByPage(int page) {
|
||||
this.parseText();
|
||||
return this.texts.get(page-1);
|
||||
}
|
||||
|
||||
public List<String> getTextPerPages() {
|
||||
this.parseText();
|
||||
return this.texts;
|
||||
}
|
||||
|
||||
private void parseText() {
|
||||
synchronized (this.parseTextSync) {
|
||||
if (this.texts == null) {
|
||||
try {
|
||||
this.texts = this.parseBodyForTexts(this.getDocument());
|
||||
} catch (XPathExpressionException xpee) {
|
||||
throw new IllegalStateException(xpee);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, Serializable> getBookmarks() {
|
||||
synchronized (this.parseBookmarkSync) {
|
||||
if (this.bookmarks == null) {
|
||||
try {
|
||||
this.bookmarks = this.parseBodyForBookmarks(this.getDocument());
|
||||
} catch (XPathExpressionException xpee) {
|
||||
throw new IllegalStateException(xpee);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return this.bookmarks;
|
||||
}
|
||||
|
||||
private List<String> parseBodyForTexts(Document document) throws XPathExpressionException {
|
||||
NodeList pageTexts = (NodeList)xpathExprBodyPageText.evaluate(document, XPathConstants.NODESET);
|
||||
if (pageTexts == null || pageTexts.getLength() == 0)
|
||||
return null;
|
||||
return this.parseTexts(pageTexts);
|
||||
}
|
||||
|
||||
private List<String> parseTexts(NodeList pageTexts) throws XPathExpressionException {
|
||||
List<String> texts = new LinkedList<>();
|
||||
|
||||
for (int n = 0; n < pageTexts.getLength(); n++) {
|
||||
Node node = pageTexts.item(n);
|
||||
texts.add(node.getTextContent());
|
||||
}
|
||||
|
||||
return texts;
|
||||
}
|
||||
|
||||
private Map<String, Serializable> parseBodyForBookmarks(Document document) throws XPathExpressionException {
|
||||
this.logger.debug("Extracting bookmarks from the XML embedded in a PDF");
|
||||
Element element = (Element)xpathExprBodyUl.evaluate(document, XPathConstants.NODE);
|
||||
if (element == null)
|
||||
return null;
|
||||
return this.parseBookmarks(element);
|
||||
}
|
||||
|
||||
private Map<String, Serializable> parseBookmarks(Element ulElement) throws XPathExpressionException {
|
||||
Map<String, Serializable> bookmarks = new LinkedHashMap<>();
|
||||
|
||||
Element lastBookmarkKey = null;
|
||||
Element lastBookmarkValues = null;
|
||||
|
||||
NodeList nodes = ulElement.getChildNodes();
|
||||
this.logger.debug("Found {} XML nodes; filtering down to just bookmarks ...", nodes.getLength());
|
||||
|
||||
for (int n = 0; n < nodes.getLength(); n++) {
|
||||
Node node = nodes.item(n);
|
||||
if (node.getNodeType() == Node.ELEMENT_NODE) {
|
||||
if (node.getLocalName().equals("li")) {
|
||||
lastBookmarkKey = (Element)node;
|
||||
} else if (node.getLocalName().equals("ul")) {
|
||||
lastBookmarkValues = (Element)node;
|
||||
}
|
||||
|
||||
if (lastBookmarkKey != null && lastBookmarkValues != null) {
|
||||
this.parseBookmark(lastBookmarkKey, lastBookmarkValues, bookmarks);
|
||||
lastBookmarkKey = null;
|
||||
lastBookmarkValues = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bookmarks;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void parseBookmark(Element liElement, Element ulElement, Map<String, Serializable> bookmarks)
|
||||
throws XPathExpressionException {
|
||||
String bookmarkKey = (String)xpathExprText.evaluate(liElement, XPathConstants.STRING);
|
||||
|
||||
Serializable bookmarkValue = bookmarks.get(bookmarkKey);
|
||||
|
||||
NodeList nodes = (NodeList)xpathExprLiText.evaluate(ulElement, XPathConstants.NODESET);
|
||||
|
||||
for (int n = 0; n < nodes.getLength(); n++) {
|
||||
Node node = nodes.item(n);
|
||||
this.logger.trace("Found bookmark value: {} => {}", bookmarkKey, node.getNodeValue());
|
||||
|
||||
if (bookmarkValue == null) {
|
||||
bookmarks.put(bookmarkKey, node.getNodeValue());
|
||||
} else if (bookmarkValue instanceof List) {
|
||||
((List<Serializable>)bookmarkValue).add(node.getNodeValue());
|
||||
} else {
|
||||
LinkedList<Serializable> bookmarkValues = new LinkedList<>();
|
||||
bookmarkValues.add(bookmarkValue);
|
||||
bookmarkValues.add(node.getNodeValue());
|
||||
bookmarks.put(bookmarkKey, bookmarkValues);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static class TikaNamespaceContext implements NamespaceContext {
|
||||
|
||||
private Map<String, String> prefix2uri = new HashMap<>();
|
||||
private Map<String, String> uri2prefix = new HashMap<>();
|
||||
|
||||
public TikaNamespaceContext() {
|
||||
this.prefix2uri.put("ns", "http://www.w3.org/1999/xhtml");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNamespaceURI(String prefix) {
|
||||
return this.prefix2uri.get(prefix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPrefix(String namespaceURI) {
|
||||
return this.uri2prefix.get(namespaceURI);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> getPrefixes(String namespaceURI) {
|
||||
return Arrays.asList(this.uri2prefix.get(namespaceURI)).iterator();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,174 @@
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.transformer.metadataExtractors.PdfBoxMetadataExtractor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.tika.extractor.DocumentSelector;
|
||||
import org.apache.tika.metadata.DublinCore;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.Office;
|
||||
import org.apache.tika.metadata.PDF;
|
||||
import org.apache.tika.metadata.Property;
|
||||
import org.apache.tika.metadata.TikaCoreProperties;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class PdfTikaMetadataExtractor extends PdfBoxMetadataExtractor {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(this.getClass());
|
||||
|
||||
protected PdfTikaContentHandler newContentHandler() {
|
||||
return new PdfTikaContentHandler();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata, Map<String, Set<String>> extractMapping) throws IOException {
|
||||
super.mapMetadataAndWrite(targetFile, metadata, new AutoPropertyMappingWrapper(extractMapping));
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a copy of the AbstractTikaMetadataExtractor from ATS v2.3.6.
|
||||
* {@link https://github.com/Alfresco/alfresco-transform-core/blob/2.3.6/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java}
|
||||
*/
|
||||
@Override
|
||||
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions, File sourceFile) throws Exception {
|
||||
this.logger.trace("extractRaw({}, '{}')", sourceMimetype, sourceFile);
|
||||
|
||||
Map<String, Serializable> rawProperties = new HashMap<>();
|
||||
|
||||
InputStream istream = new FileInputStream(sourceFile);
|
||||
try {
|
||||
Parser parser = this.getParser();
|
||||
|
||||
Metadata metadata = new Metadata();
|
||||
metadata.add(Metadata.CONTENT_TYPE, sourceMimetype);
|
||||
|
||||
ParseContext context = this.buildParseContext(metadata, sourceMimetype);
|
||||
|
||||
PdfTikaContentHandler handler = this.newContentHandler();
|
||||
|
||||
this.logger.debug("Parsing {}b PDF using Apache Tika: {}", sourceFile.length(), sourceFile);
|
||||
parser.parse(istream, handler, metadata, context);
|
||||
this.logger.trace("Parsed {}b PDF using Apache Tika: {}", sourceFile.length(), sourceFile);
|
||||
this.logger.debug("Parsed PDF has meta-data: {}", Arrays.asList(metadata.names()));
|
||||
|
||||
this.processMetadata(metadata, rawProperties);
|
||||
|
||||
// this is the processing of that major difference
|
||||
this.processHandler(handler, rawProperties);
|
||||
if (this.logger.isTraceEnabled()) {
|
||||
this.logger.trace("Parsed PDF has properties: {}", rawProperties);
|
||||
} else if (this.logger.isTraceEnabled()) {
|
||||
this.logger.trace("Parsed PDF has properties: {}", rawProperties.keySet());
|
||||
}
|
||||
} finally {
|
||||
istream.close();
|
||||
}
|
||||
|
||||
return rawProperties;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a copy of the AbstractTikaMetadataExtractor from ATS v2.3.6.
|
||||
* {@link https://github.com/Alfresco/alfresco-transform-core/blob/2.3.6/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java}
|
||||
*/
|
||||
private ParseContext buildParseContext(Metadata metadata, String sourceMimeType) {
|
||||
ParseContext context = new ParseContext();
|
||||
DocumentSelector selector = this.getDocumentSelector(metadata, sourceMimeType);
|
||||
if (selector != null)
|
||||
context.set(DocumentSelector.class, selector);
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a copy of the AbstractTikaMetadataExtractor from ATS v2.3.6.
|
||||
* {@link https://github.com/Alfresco/alfresco-transform-core/blob/2.3.6/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java}
|
||||
*
|
||||
* One slight difference is we are now using the Property object instead of
|
||||
* the depreciated String object.
|
||||
*/
|
||||
protected Map<String, Serializable> processMetadata(Metadata metadata, Map<String, Serializable> rawProperties) {
|
||||
for (String tikaKey : metadata.names())
|
||||
this.putRawValue(tikaKey, this.getMetadataValue(metadata, tikaKey), rawProperties);
|
||||
|
||||
this.putRawValue(KEY_TITLE, this.getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties);
|
||||
this.putRawValue(KEY_COMMENTS, this.getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties);
|
||||
this.putRawValue(KEY_TAGS, this.getMetadataValue(metadata, KEY_TAGS), rawProperties);
|
||||
this.putRawValue(KEY_SUBJECT, this.getMetadataValue(metadata, PDF.DOC_INFO_SUBJECT), rawProperties);
|
||||
this.putRawValue(KEY_DESCRIPTION, this.getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION), rawProperties);
|
||||
this.putRawValue(KEY_CREATED, this.getMetadataValue(metadata, Property.composite(PDF.DOC_INFO_CREATED, new Property[] {DublinCore.CREATED, Office.CREATION_DATE, TikaCoreProperties.METADATA_DATE})), rawProperties);
|
||||
this.putRawValue(KEY_AUTHOR, this.getMetadataValue(metadata, Property.composite(PDF.DOC_INFO_CREATOR, new Property[] {DublinCore.CREATOR, Office.AUTHOR})), rawProperties);
|
||||
|
||||
rawProperties = extractSpecific(metadata, rawProperties, null);
|
||||
return rawProperties;
|
||||
}
|
||||
|
||||
protected void processHandler(PdfTikaContentHandler handler, Map<String, Serializable> rawProperties) {
|
||||
// TODO this should be more flexible than just a two-layer name/value pair
|
||||
|
||||
Map<String, Serializable> bookmarks = handler.getBookmarks();
|
||||
if (bookmarks == null) {
|
||||
this.logger.debug("Found no bookmarks in PDF");
|
||||
return;
|
||||
}
|
||||
|
||||
this.logger.debug("Found {} bookmarks in PDF", bookmarks.size());
|
||||
this.logger.trace("Found bookmarks in PDF: {}", bookmarks);
|
||||
|
||||
rawProperties.putAll(bookmarks);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is all effectively copied/translated from Alfresco Community v6.2.
|
||||
*/
|
||||
private String getMetadataValue(Metadata metadata, String key) {
|
||||
if (metadata.isMultiValued(key)) {
|
||||
String[] parts = metadata.getValues(key);
|
||||
return StringUtils.trimToNull(this.getMetadataMultiValue(parts));
|
||||
} else {
|
||||
return StringUtils.trimToNull(metadata.get(key));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is a mirror of the one above, but with a Property instead of
|
||||
* String parameter.
|
||||
*/
|
||||
private String getMetadataValue(Metadata metadata, Property prop) {
|
||||
if (metadata.isMultiValued(prop)) {
|
||||
String[] parts = metadata.getValues(prop);
|
||||
return StringUtils.trimToNull(this.getMetadataMultiValue(parts));
|
||||
} else {
|
||||
return StringUtils.trimToNull(metadata.get(prop));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is all effectively copied/translated from Alfresco Community v6.2.
|
||||
*/
|
||||
private String getMetadataMultiValue(String[] parts) {
|
||||
// use Set to prevent duplicates
|
||||
Set<String> value = new LinkedHashSet<>(parts.length);
|
||||
|
||||
for (int i = 0; i < parts.length; i++)
|
||||
value.add(parts[i]);
|
||||
|
||||
String valueStr = value.toString();
|
||||
|
||||
// remove leading/trailing braces []
|
||||
return valueStr.substring(1, valueStr.length() - 1);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*
|
||||
* Copyright (C) 2020 - 2021 Inteligr8
|
||||
*/
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import static org.alfresco.transformer.util.RequestParamMap.FILE;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_MIMETYPE;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TARGET_EXTENSION;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TARGET_MIMETYPE;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TEST_DELAY;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PROPERTY;
|
||||
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.alfresco.transformer.AbstractTransformerController;
|
||||
import org.alfresco.transformer.probes.ProbeTestTransform;
|
||||
import org.alfresco.transformer.util.MimetypeMap;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.client.HttpClientErrorException;
|
||||
import org.springframework.web.client.HttpServerErrorException;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
/**
|
||||
* Controller for the Spring Boot transformer.
|
||||
*
|
||||
* Status Codes:
|
||||
*
|
||||
* 200 Success
|
||||
* 400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)
|
||||
* 400 Bad Request: Request parameter <name> is of the wrong type
|
||||
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
|
||||
* 400 Bad Request: The source filename was not supplied
|
||||
* 500 Internal Server Error: (no message with low level IO problems)
|
||||
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
|
||||
* 500 Internal Server Error: Transformer version check exit code was not 0
|
||||
* 500 Internal Server Error: Transformer version check failed to create any output
|
||||
* 500 Internal Server Error: Could not read the target file
|
||||
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
|
||||
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
|
||||
* 500 Internal Server Error: Filename encoding error
|
||||
* 507 Insufficient Storage: Failed to store the source file
|
||||
*/
|
||||
@Controller
|
||||
public class TransformerController extends AbstractTransformerController {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(TransformerController.class);
|
||||
private final Pattern fileext = Pattern.compile("\\.([^\\.]+)$");
|
||||
|
||||
@Autowired
|
||||
private PdfMetaTransformer transformer;
|
||||
|
||||
@Value("${transform.pdfmeta.version}")
|
||||
private String version;
|
||||
|
||||
private ProbeTestTransform probe;
|
||||
|
||||
@Override
|
||||
public String getTransformerName() {
|
||||
return "pdfmeta";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version() {
|
||||
return this.version;
|
||||
}
|
||||
|
||||
@PostConstruct
|
||||
public void initProbe() {
|
||||
this.probe = new ProbeTestTransform(this, "quick.pdf", "quick",
|
||||
7455L, 1024L, 150, 10240L, 60L * 20L + 1L, 60L * 15L - 15L) {
|
||||
@Override
|
||||
protected void executeTransformCommand(File sourceFile, File targetFile) {
|
||||
logger.trace("getProbeTestTransform().executeTransformCommand('{}', '{}')", sourceFile, targetFile);
|
||||
|
||||
// FIXME
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProbeTestTransform getProbeTestTransform() {
|
||||
this.logger.trace("getProbeTestTransform()");
|
||||
return this.probe;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getTransformerName(final File sourceFile, final String sourceMimetype, final String targetMimetype, final Map<String, String> transformOptions) {
|
||||
this.logger.trace("getTransformerName('{}', {}, {}, {})", sourceFile, sourceMimetype, targetMimetype, transformOptions);
|
||||
// does not matter what value is returned, as it is not used because there is only one.
|
||||
return this.getTransformerName();
|
||||
}
|
||||
|
||||
/**
|
||||
* This override of simply makes targetExtension optional
|
||||
*/
|
||||
@Override
|
||||
@SuppressWarnings("deprecation")
|
||||
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
|
||||
public ResponseEntity<Resource> transform(HttpServletRequest request,
|
||||
@RequestParam(FILE) MultipartFile sourceMultipartFile,
|
||||
@RequestParam(value = TARGET_EXTENSION, required = false) String targetExtension,
|
||||
@RequestParam(value = SOURCE_MIMETYPE, required = false) String sourceMimetype,
|
||||
@RequestParam(value = TARGET_MIMETYPE, required = false) String targetMimetype,
|
||||
@RequestParam Map<String, String> requestParameters,
|
||||
@RequestParam (value = TEST_DELAY, required = false) Long testDelay,
|
||||
|
||||
// The TRANSFORM_NAME_PROPERTY param allows ACS legacy transformers to specify which transform to use,
|
||||
// It can be removed once legacy transformers are removed from ACS.
|
||||
@RequestParam (value = TRANSFORM_NAME_PROPERTY, required = false) String requestTransformName) {
|
||||
if (targetExtension == null)
|
||||
targetExtension = "json";
|
||||
return super.transform(request, sourceMultipartFile, targetExtension, sourceMimetype, targetMimetype, requestParameters, testDelay, requestTransformName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype, Map<String, String> transformOptions, File sourceFile, File targetFile) {
|
||||
this.logger.trace("transformImpl({}, {}, {}, {}, '{}', '{}')", transformName, sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
|
||||
if (sourceMimetype == null) {
|
||||
Matcher matcher = this.fileext.matcher(sourceFile.getAbsolutePath());
|
||||
sourceMimetype = matcher.find() ? this.ext2mime(matcher.group(1)) : null;
|
||||
}
|
||||
if (targetMimetype == null) {
|
||||
Matcher matcher = this.fileext.matcher(targetFile.getAbsolutePath());
|
||||
targetMimetype = matcher.find() ? this.ext2mime(matcher.group(1)) : MimetypeMap.MIMETYPE_METADATA_EXTRACT;
|
||||
}
|
||||
|
||||
try {
|
||||
if (targetMimetype.equals(MimetypeMap.MIMETYPE_METADATA_EXTRACT)) {
|
||||
this.transformer.extractMetadata(transformName, sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
} else {
|
||||
this.transformer.transform(transformName, sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
} catch (IllegalArgumentException iae) {
|
||||
throw new HttpClientErrorException(HttpStatus.BAD_REQUEST);
|
||||
} catch (Exception e) {
|
||||
throw new HttpServerErrorException(HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
private String ext2mime(String ext) {
|
||||
switch (ext.toLowerCase()) {
|
||||
// add applicable extensions here
|
||||
case "pdf":
|
||||
case "pdfa": return MediaType.APPLICATION_PDF_VALUE;
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,132 @@
|
||||
package com.inteligr8.alfresco.pdfmeta.util;
|
||||
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public abstract class DynamicDiscoveryMap<K, V> implements Map<K, V> {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(this.getClass());
|
||||
|
||||
private final Map<K, V> underlyingMap;
|
||||
protected final Map<K, V> extMap = new HashMap<>();
|
||||
protected final Set<K> nullKeys = new HashSet<>();
|
||||
|
||||
public DynamicDiscoveryMap(Map<K, V> map) {
|
||||
this.underlyingMap = map;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private K castKey(Object key) {
|
||||
return (K)key;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsKey(Object key) {
|
||||
if (this.underlyingMap.containsKey(key) ||
|
||||
this.extMap.containsKey(key))
|
||||
return true;
|
||||
|
||||
if (this.nullKeys.contains(key))
|
||||
return false;
|
||||
|
||||
this.discoverExtraEntry(this.castKey(key));
|
||||
return this.extMap.containsKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsValue(Object value) {
|
||||
return this.underlyingMap.containsValue(value) ||
|
||||
this.extMap.containsValue(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return this.underlyingMap.isEmpty() && this.extMap.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return this.underlyingMap.size() + this.extMap.size();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public Set<K> keySet() {
|
||||
Set<K>[] sets = (Set<K>[])Array.newInstance(this.underlyingMap.keySet().getClass(), 2);
|
||||
sets[0] = this.underlyingMap.keySet();
|
||||
sets[1] = this.extMap.keySet();
|
||||
return new MultiSet<>(sets);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public Set<Entry<K, V>> entrySet() {
|
||||
Set<Entry<K, V>>[] sets = (Set<Entry<K, V>>[])Array.newInstance(this.underlyingMap.entrySet().getClass(), 2);
|
||||
sets[0] = this.underlyingMap.entrySet();
|
||||
sets[1] = this.extMap.entrySet();
|
||||
return new MultiSet<>(sets);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public Collection<V> values() {
|
||||
Collection<V> values = this.underlyingMap.values();
|
||||
Collection<V>[] cs = (Collection<V>[])Array.newInstance(values.getClass(), 2);
|
||||
cs[0] = values;
|
||||
cs[1] = this.extMap.values();
|
||||
return new MultiCollection<>(cs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public V get(Object key) {
|
||||
if (this.underlyingMap.containsKey(key))
|
||||
return this.underlyingMap.get(key);
|
||||
if (!this.extMap.containsKey(key)) {
|
||||
this.logger.debug("Discover possible extra key: {}", key);
|
||||
this.discoverExtraEntry(this.castKey(key));
|
||||
}
|
||||
return this.extMap.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public V put(K key, V value) {
|
||||
return this.underlyingMap.put(key, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putAll(Map<? extends K, ? extends V> m) {
|
||||
this.underlyingMap.putAll(m);
|
||||
}
|
||||
|
||||
@Override
|
||||
public V remove(Object key) {
|
||||
this.nullKeys.remove(key);
|
||||
V values1 = this.extMap.remove(key);
|
||||
V values2 = this.underlyingMap.remove(key);
|
||||
return values2 != null ? values2 : values1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(Object key, Object value) {
|
||||
boolean b1 = this.underlyingMap.remove(key, value);
|
||||
boolean b2 = this.extMap.remove(key, value);
|
||||
return b1 || b2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
this.underlyingMap.clear();
|
||||
this.extMap.clear();
|
||||
this.nullKeys.clear();
|
||||
}
|
||||
|
||||
protected abstract void discoverExtraEntry(K key);
|
||||
|
||||
}
|
@ -0,0 +1,143 @@
|
||||
package com.inteligr8.alfresco.pdfmeta.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class MultiCollection<E> implements Collection<E> {
|
||||
|
||||
private final Collection<E>[] cs;
|
||||
|
||||
public MultiCollection(Collection<E>[] cs) {
|
||||
this.cs = cs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Object o) {
|
||||
for (Collection<E> list : this.cs)
|
||||
if (list.contains(o))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsAll(Collection<?> c) {
|
||||
for (Object e : c)
|
||||
if (!this.contains(e))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
for (Collection<E> list : this.cs)
|
||||
if (!list.isEmpty())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
int count = 0;
|
||||
for (Collection<E> list : this.cs)
|
||||
count += list.size();
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<E> iterator() {
|
||||
return new Iterator<E>() {
|
||||
|
||||
private Iterator<E> i = cs[0].iterator();
|
||||
private int arrayIndex = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (this.i == null)
|
||||
return false;
|
||||
if (this.i.hasNext())
|
||||
return true;
|
||||
while (this.arrayIndex < cs.length) {
|
||||
this.i = cs[this.arrayIndex++].iterator();
|
||||
if (this.i.hasNext())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public E next() {
|
||||
return this.i.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
this.i.remove();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] toArray() {
|
||||
Object[] array = new Object[this.size()];
|
||||
int i = 0;
|
||||
for (E o : this)
|
||||
array[i++] = o;
|
||||
return array;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public <T> T[] toArray(T[] a) {
|
||||
int size = this.size();
|
||||
if (a == null || a.length < size)
|
||||
a = Arrays.copyOf(a, size);
|
||||
|
||||
int i = 0;
|
||||
for (E o : this)
|
||||
a[i++] = (T)o;
|
||||
return a;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(E e) {
|
||||
return this.cs[0].add(e);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addAll(Collection<? extends E> c) {
|
||||
return this.cs[0].addAll(c);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(Object o) {
|
||||
for (Collection<E> set : this.cs)
|
||||
if (set.remove(o))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean removeAll(Collection<?> c) {
|
||||
boolean changed = false;
|
||||
for (Object e : c)
|
||||
changed = this.remove(e) || changed;
|
||||
return changed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean retainAll(Collection<?> c) {
|
||||
boolean changed = false;
|
||||
for (Collection<E> set : this.cs)
|
||||
changed = set.retainAll(c) || changed;
|
||||
return changed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
for (Collection<E> set : this.cs)
|
||||
set.clear();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
package com.inteligr8.alfresco.pdfmeta.util;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
public class MultiSet<K> extends MultiCollection<K> implements Set<K> {
|
||||
|
||||
public MultiSet(Set<K>[] sets) {
|
||||
super(sets);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
subject=cm:description
|
||||
created=cm:created
|
11
src/main/resources/application-context.xml
Normal file
11
src/main/resources/application-context.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
|
||||
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
|
||||
|
||||
<bean autowire-candidate="true" class="org.alfresco.transformer.clients.AlfrescoSharedFileStoreClient" />
|
||||
<bean autowire-candidate="true" class="org.springframework.web.client.RestTemplate" />
|
||||
<bean autowire-candidate="true" class="org.alfresco.transform.client.model.TransformRequestValidator" />
|
||||
<bean autowire-candidate="true" class="org.alfresco.transformer.TransformRegistryImpl" />
|
||||
|
||||
</beans>
|
13
src/main/resources/application-default.yaml
Normal file
13
src/main/resources/application-default.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
queue:
|
||||
engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:com.inteligr8.alfresco.pdfmeta.acs}
|
||||
|
||||
transform:
|
||||
core:
|
||||
config:
|
||||
location: classpath:this_engine_config.json
|
||||
pdfmeta:
|
||||
version: ${project.version}
|
||||
|
||||
logging:
|
||||
level:
|
||||
com.inteligr8.alfresco.pdfmeta: ${LOG_LEVEL:trace}
|
21
src/main/resources/templates/transformForm.html
Normal file
21
src/main/resources/templates/transformForm.html
Normal file
@ -0,0 +1,21 @@
|
||||
<html xmlns:th="http://www.thymeleaf.org">
|
||||
<body>
|
||||
|
||||
<div>
|
||||
<h2>pdfmeta Test Transformation</h2>
|
||||
<form method="POST" enctype="multipart/form-data" action="/transform">
|
||||
<input type="hidden" name="targetMimetype" value="alfresco-metadata-extract" />
|
||||
<table>
|
||||
<tr><td><div style="text-align:right">file *</div></td><td><input type="file" name="file" /></td></tr>
|
||||
<!-- Add a row for each of your transform options -->
|
||||
<tr><td></td><td><input type="submit" value="Transform" /></td></tr>
|
||||
</table>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<a href="/log">Log entries</a>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
10
src/main/resources/this_engine_config.json
Normal file
10
src/main/resources/this_engine_config.json
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"transformers": [
|
||||
{
|
||||
"transformerName": "pdfmeta",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/pdf", "priority": 5, "targetMediaType": "alfresco-metadata-extract" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
164
src/test/java/com/inteligr8/alfresco/pdfmeta/HttpRequestIT.java
Normal file
164
src/test/java/com/inteligr8/alfresco/pdfmeta/HttpRequestIT.java
Normal file
@ -0,0 +1,164 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.alfresco.transformer.util.MimetypeMap;
|
||||
import org.alfresco.transformer.util.RequestParamMap;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
|
||||
import org.springframework.boot.test.web.client.TestRestTemplate;
|
||||
import org.springframework.boot.web.server.LocalServerPort;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
import org.springframework.web.client.HttpStatusCodeException;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
|
||||
public class HttpRequestIT {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(HttpRequestIT.class);
|
||||
|
||||
@LocalServerPort
|
||||
private int port;
|
||||
|
||||
@Autowired
|
||||
private TestRestTemplate restTemplate;
|
||||
|
||||
protected String baseUrl;
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
this.baseUrl = "http://localhost:" + this.port;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRootPath() {
|
||||
String result = this.restTemplate.getForObject(this.baseUrl, String.class);
|
||||
if (this.logger.isDebugEnabled())
|
||||
this.logger.debug("testRootPath(): result: " + result);
|
||||
|
||||
Assert.assertNotNull("A result from the HTTP GET was expected", result);
|
||||
Document htmldoc = Jsoup.parse(result);
|
||||
Assert.assertNotNull("An HTML compliant result was expected: " + result.substring(0, 50), htmldoc);
|
||||
|
||||
Elements elements = htmldoc.select("html body h2");
|
||||
Assert.assertFalse("The HTML body is expected to have an h2 element: html: " + htmldoc.toString(), elements.isEmpty());
|
||||
Assert.assertEquals("The HTML body is expected to have just one h2 element", 1, elements.size());
|
||||
Assert.assertEquals("The HTML body header is not what was expected", "pdfmeta Test Transformation", elements.html());
|
||||
|
||||
elements = htmldoc.select("html input");
|
||||
Set<String> inputs = new HashSet<String>();
|
||||
for (Element element : elements)
|
||||
inputs.add(element.attr("name"));
|
||||
Assert.assertTrue("The HTML is expected to have a form input for 'file': " + inputs.toString(), inputs.contains("file"));
|
||||
Assert.assertTrue("The HTML is expected to have a form input for 'targetExtension': " + inputs.toString(), inputs.contains("targetExtension"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLogPath() {
|
||||
String result = this.restTemplate.getForObject(this.baseUrl + "/log", String.class);
|
||||
if (this.logger.isDebugEnabled())
|
||||
this.logger.debug("testLogPath(): result: " + result);
|
||||
|
||||
Assert.assertNotNull("A result from the HTTP GET was expected", result);
|
||||
Document htmldoc = Jsoup.parse(result);
|
||||
Assert.assertNotNull("An HTML compliant result was expected: " + result.substring(0, 50), htmldoc);
|
||||
|
||||
Elements elements = htmldoc.select("html body div h2");
|
||||
Assert.assertFalse("The HTML is expected to have an html/body/div/h2 element: html: " + htmldoc.select("html").toString(), elements.isEmpty());
|
||||
Assert.assertEquals("The HTML is expected to have just one html/body/div/h2 element", 1, elements.size());
|
||||
Assert.assertEquals("The HTML body header is not what was expected", "pdfmeta Log Entries", elements.html());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoPath() {
|
||||
try {
|
||||
ResponseEntity<String> response = this.restTemplate.getForEntity(this.baseUrl + "/doesnotexist", String.class);
|
||||
Assert.assertEquals("An unexpected path must return a 404 error", 404, response.getStatusCodeValue());
|
||||
} catch (HttpStatusCodeException hsce) {
|
||||
Assert.assertEquals("An unexpected path must return a 404 error", 404, hsce.getRawStatusCode());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testServiceGet() {
|
||||
try {
|
||||
ResponseEntity<String> response = this.restTemplate.getForEntity(this.baseUrl + "/transform", String.class);
|
||||
Assert.assertEquals("An unexpected path must return a 405 error", 405, response.getStatusCodeValue());
|
||||
} catch (HttpStatusCodeException hsce) {
|
||||
Assert.assertEquals("An unexpected path must return a 405 error", 405, hsce.getRawStatusCode());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testServiceNoFile() {
|
||||
try {
|
||||
ResponseEntity<String> response = this._testService(null);
|
||||
Assert.assertEquals("An unexpected path must return a 400 error", 400, response.getStatusCodeValue());
|
||||
} catch (HttpStatusCodeException hsce) {
|
||||
Assert.assertEquals("An unexpected path must return a 400 error", 400, hsce.getRawStatusCode());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testServiceQuick() {
|
||||
this._testService("pdf-bookmarked.pdf");
|
||||
}
|
||||
|
||||
protected ResponseEntity<String> _testService(String filename) {
|
||||
LinkedMultiValueMap<String, Object> parameters = new LinkedMultiValueMap<>();
|
||||
if (filename != null)
|
||||
parameters.add(RequestParamMap.FILE, new ClassPathResource(filename));
|
||||
parameters.add(RequestParamMap.TARGET_MIMETYPE, MimetypeMap.MIMETYPE_METADATA_EXTRACT);
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.MULTIPART_FORM_DATA);
|
||||
HttpEntity<LinkedMultiValueMap<String, Object>> entity = new HttpEntity<>(parameters, headers);
|
||||
return this.restTemplate.postForEntity(this.baseUrl + "/transform", entity, String.class);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package com.inteligr8.alfresco.pdfmeta;
|
||||
|
||||
import org.alfresco.transformer.AbstractTransformerController;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@WebMvcTest(controllers = TransformerController.class)
|
||||
public class TransformerControllerTest {
|
||||
|
||||
@Autowired
|
||||
protected AbstractTransformerController controller;
|
||||
|
||||
@Test @Ignore
|
||||
public void test() {
|
||||
|
||||
}
|
||||
}
|
13
src/test/resources/application-default.yaml
Normal file
13
src/test/resources/application-default.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
queue:
|
||||
engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:com.inteligr8.alfresco.pdfmeta.acs}
|
||||
|
||||
transform:
|
||||
core:
|
||||
config:
|
||||
location: classpath:this_engine_config.json
|
||||
pdfmeta:
|
||||
version: ${project.version}
|
||||
|
||||
logging:
|
||||
level:
|
||||
com.inteligr8.alfresco.pdfmeta: ${LOG_LEVEL:trace}
|
BIN
src/test/resources/bookmarked.pdf
Normal file
BIN
src/test/resources/bookmarked.pdf
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user