MNT-18275 Change detected mimetype: pdf->ai ps->eps if extension correct

This commit is contained in:
Alan Davis
2017-09-28 21:44:41 +01:00
parent 900f8f4529
commit 3e54ada31e
2 changed files with 80 additions and 33 deletions

View File

@@ -25,20 +25,6 @@
*/ */
package org.alfresco.repo.content; package org.alfresco.repo.content;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import org.alfresco.repo.content.encoding.ContentCharsetFinder; import org.alfresco.repo.content.encoding.ContentCharsetFinder;
import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.FileContentReader; import org.alfresco.service.cmr.repository.FileContentReader;
@@ -57,6 +43,20 @@ import org.springframework.extensions.config.ConfigElement;
import org.springframework.extensions.config.ConfigLookupContext; import org.springframework.extensions.config.ConfigLookupContext;
import org.springframework.extensions.config.ConfigService; import org.springframework.extensions.config.ConfigService;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
/** /**
* Provides a bidirectional mapping between well-known mimetypes and the * Provides a bidirectional mapping between well-known mimetypes and the
* registered file extensions. All mimetypes and extensions are stored and * registered file extensions. All mimetypes and extensions are stored and
@@ -719,6 +719,7 @@ public class MimetypeMap implements MimetypeService
try try
{ {
type = detector.detect(inp, metadata); type = detector.detect(inp, metadata);
type = typeBasedOnDetectedTypeAndExtension(type, filename);
logger.debug(input + " detected by Tika as being " + type.toString()); logger.debug(input + " detected by Tika as being " + type.toString());
} }
catch (Exception e) catch (Exception e)
@@ -743,6 +744,36 @@ public class MimetypeMap implements MimetypeService
return type; return type;
} }
// We have a problem with .ai files, as Tika detects them as .pdf, but if we can use the filename
// we can correct that. Similar problem with .eps and .ps.
private MediaType typeBasedOnDetectedTypeAndExtension(MediaType type, String filename)
{
if (filename != null && type != null)
{
String[] detectedAndPossibleTypes = new String[]
{
MIMETYPE_PDF, MIMETYPE_APPLICATION_ILLUSTRATOR,
MIMETYPE_APPLICATION_PS, MIMETYPE_APPLICATION_EPS
};
for (int i=detectedAndPossibleTypes.length-1; i>=0; i-=2)
{
String detectedType = detectedAndPossibleTypes[i-1];
if (detectedType.equals(type.toString()))
{
String possibleType = detectedAndPossibleTypes[i];
String extension = getExtension(possibleType);
if (filename.endsWith("."+extension))
{
type = MediaType.parse(possibleType);
break;
}
}
}
}
return type;
}
/** /**
* Use Apache Tika to check if the mime type of the document really matches * Use Apache Tika to check if the mime type of the document really matches
* what it claims to be. This is typically used when a transformation or * what it claims to be. This is typically used when a transformation or

View File

@@ -25,20 +25,7 @@
*/ */
package org.alfresco.repo.content; package org.alfresco.repo.content;
import java.io.ByteArrayInputStream; import junit.framework.TestCase;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.alfresco.service.cmr.repository.ContentData; import org.alfresco.service.cmr.repository.ContentData;
import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentReader;
@@ -52,7 +39,19 @@ import org.springframework.extensions.config.ConfigService;
import org.springframework.extensions.config.ConfigSource; import org.springframework.extensions.config.ConfigSource;
import org.springframework.extensions.config.xml.XMLConfigService; import org.springframework.extensions.config.xml.XMLConfigService;
import junit.framework.TestCase; import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
/** /**
* @see org.alfresco.repo.content.MimetypeMap * @see org.alfresco.repo.content.MimetypeMap
@@ -238,6 +237,17 @@ public class MimetypeMapTest extends TestCase
assertEquals(MimetypeMap.MIMETYPE_VIDEO_QUICKTIME, mimetypeService.guessMimetype("file.rm", reader)); assertEquals(MimetypeMap.MIMETYPE_VIDEO_QUICKTIME, mimetypeService.guessMimetype("file.rm", reader));
} }
public void testTypeBasedOnDetectedTypeAndExtension() throws Exception
{
ContentReader reader = new DummyContentReader(MimetypeMap.MIMETYPE_PDF, "%PDF\r");
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR, mimetypeService.guessMimetype("file.ai", reader.getContentInputStream()));
assertEquals(MimetypeMap.MIMETYPE_PDF, mimetypeService.guessMimetype("file.pdf", reader.getContentInputStream()));
reader = new DummyContentReader(MimetypeMap.MIMETYPE_APPLICATION_PS, "%!PS");
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_EPS, mimetypeService.guessMimetype("file.eps", reader.getContentInputStream()));
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_PS, mimetypeService.guessMimetype("file.ps", reader.getContentInputStream()));
}
public void testDuplicates() throws Exception public void testDuplicates() throws Exception
{ {
setConfigService( setConfigService(
@@ -288,17 +298,23 @@ public class MimetypeMapTest extends TestCase
public static class DummyContentReader implements ContentReader public static class DummyContentReader implements ContentReader
{ {
private String mimetype;
private String mimetype = MimetypeMap.MIMETYPE_HTML; private String content;
public DummyContentReader() public DummyContentReader()
{ {
super(); this(MimetypeMap.MIMETYPE_HTML);
} }
public DummyContentReader(String mimetype) public DummyContentReader(String mimetype)
{
this(mimetype, "<X>@@/Y");
}
public DummyContentReader(String mimetype, String content)
{ {
this.mimetype = mimetype; this.mimetype = mimetype;
this.content = content;
} }
@Override @Override
@@ -340,7 +356,7 @@ public class MimetypeMapTest extends TestCase
@Override @Override
public InputStream getContentInputStream() throws ContentIOException public InputStream getContentInputStream() throws ContentIOException
{ {
return new ByteArrayInputStream("<X>@@/Y".getBytes(StandardCharsets.UTF_8)); return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
} }
@Override @Override