From 3e54ada31e3b83d9e3343450734d06c2f1f3691a Mon Sep 17 00:00:00 2001 From: Alan Davis Date: Thu, 28 Sep 2017 21:44:41 +0100 Subject: [PATCH] MNT-18275 Change detected mimetype: pdf->ai ps->eps if extension correct --- .../alfresco/repo/content/MimetypeMap.java | 59 ++++++++++++++----- .../repo/content/MimetypeMapTest.java | 54 +++++++++++------ 2 files changed, 80 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/alfresco/repo/content/MimetypeMap.java b/src/main/java/org/alfresco/repo/content/MimetypeMap.java index 3d638599ca..54768df3ea 100644 --- a/src/main/java/org/alfresco/repo/content/MimetypeMap.java +++ b/src/main/java/org/alfresco/repo/content/MimetypeMap.java @@ -25,20 +25,6 @@ */ package org.alfresco.repo.content; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; - import org.alfresco.repo.content.encoding.ContentCharsetFinder; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.FileContentReader; @@ -57,6 +43,20 @@ import org.springframework.extensions.config.ConfigElement; import org.springframework.extensions.config.ConfigLookupContext; import org.springframework.extensions.config.ConfigService; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeMap; + /** * Provides a bidirectional mapping between well-known mimetypes and the * registered file extensions. All mimetypes and extensions are stored and @@ -719,6 +719,7 @@ public class MimetypeMap implements MimetypeService try { type = detector.detect(inp, metadata); + type = typeBasedOnDetectedTypeAndExtension(type, filename); logger.debug(input + " detected by Tika as being " + type.toString()); } catch (Exception e) @@ -743,6 +744,36 @@ public class MimetypeMap implements MimetypeService return type; } + // We have a problem with .ai files, as Tika detects them as .pdf, but if we can use the filename + // we can correct that. Similar problem with .eps and .ps. + private MediaType typeBasedOnDetectedTypeAndExtension(MediaType type, String filename) + { + if (filename != null && type != null) + { + String[] detectedAndPossibleTypes = new String[] + { + MIMETYPE_PDF, MIMETYPE_APPLICATION_ILLUSTRATOR, + MIMETYPE_APPLICATION_PS, MIMETYPE_APPLICATION_EPS + }; + + for (int i=detectedAndPossibleTypes.length-1; i>=0; i-=2) + { + String detectedType = detectedAndPossibleTypes[i-1]; + if (detectedType.equals(type.toString())) + { + String possibleType = detectedAndPossibleTypes[i]; + String extension = getExtension(possibleType); + if (filename.endsWith("."+extension)) + { + type = MediaType.parse(possibleType); + break; + } + } + } + } + return type; + } + /** * Use Apache Tika to check if the mime type of the document really matches * what it claims to be. This is typically used when a transformation or diff --git a/src/test/java/org/alfresco/repo/content/MimetypeMapTest.java b/src/test/java/org/alfresco/repo/content/MimetypeMapTest.java index 9da228117a..338f3e7dda 100644 --- a/src/test/java/org/alfresco/repo/content/MimetypeMapTest.java +++ b/src/test/java/org/alfresco/repo/content/MimetypeMapTest.java @@ -25,20 +25,7 @@ */ package org.alfresco.repo.content; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.channels.FileChannel; -import java.nio.channels.ReadableByteChannel; -import java.nio.channels.WritableByteChannel; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Locale; -import java.util.Map; - +import junit.framework.TestCase; import org.alfresco.service.cmr.repository.ContentData; import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; @@ -52,7 +39,19 @@ import org.springframework.extensions.config.ConfigService; import org.springframework.extensions.config.ConfigSource; import org.springframework.extensions.config.xml.XMLConfigService; -import junit.framework.TestCase; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.channels.FileChannel; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.Map; /** * @see org.alfresco.repo.content.MimetypeMap @@ -238,6 +237,17 @@ public class MimetypeMapTest extends TestCase assertEquals(MimetypeMap.MIMETYPE_VIDEO_QUICKTIME, mimetypeService.guessMimetype("file.rm", reader)); } + public void testTypeBasedOnDetectedTypeAndExtension() throws Exception + { + ContentReader reader = new DummyContentReader(MimetypeMap.MIMETYPE_PDF, "%PDF\r"); + assertEquals(MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR, mimetypeService.guessMimetype("file.ai", reader.getContentInputStream())); + assertEquals(MimetypeMap.MIMETYPE_PDF, mimetypeService.guessMimetype("file.pdf", reader.getContentInputStream())); + + reader = new DummyContentReader(MimetypeMap.MIMETYPE_APPLICATION_PS, "%!PS"); + assertEquals(MimetypeMap.MIMETYPE_APPLICATION_EPS, mimetypeService.guessMimetype("file.eps", reader.getContentInputStream())); + assertEquals(MimetypeMap.MIMETYPE_APPLICATION_PS, mimetypeService.guessMimetype("file.ps", reader.getContentInputStream())); + } + public void testDuplicates() throws Exception { setConfigService( @@ -288,17 +298,23 @@ public class MimetypeMapTest extends TestCase public static class DummyContentReader implements ContentReader { - - private String mimetype = MimetypeMap.MIMETYPE_HTML; + private String mimetype; + private String content; public DummyContentReader() { - super(); + this(MimetypeMap.MIMETYPE_HTML); } public DummyContentReader(String mimetype) + { + this(mimetype, "@@/Y"); + } + + public DummyContentReader(String mimetype, String content) { this.mimetype = mimetype; + this.content = content; } @Override @@ -340,7 +356,7 @@ public class MimetypeMapTest extends TestCase @Override public InputStream getContentInputStream() throws ContentIOException { - return new ByteArrayInputStream("@@/Y".getBytes(StandardCharsets.UTF_8)); + return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)); } @Override