mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
MNT-18275 Change detected mimetype: pdf->ai ps->eps if extension correct
This commit is contained in:
@@ -25,20 +25,6 @@
|
||||
*/
|
||||
package org.alfresco.repo.content;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.alfresco.repo.content.encoding.ContentCharsetFinder;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.FileContentReader;
|
||||
@@ -57,6 +43,20 @@ import org.springframework.extensions.config.ConfigElement;
|
||||
import org.springframework.extensions.config.ConfigLookupContext;
|
||||
import org.springframework.extensions.config.ConfigService;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Provides a bidirectional mapping between well-known mimetypes and the
|
||||
* registered file extensions. All mimetypes and extensions are stored and
|
||||
@@ -719,6 +719,7 @@ public class MimetypeMap implements MimetypeService
|
||||
try
|
||||
{
|
||||
type = detector.detect(inp, metadata);
|
||||
type = typeBasedOnDetectedTypeAndExtension(type, filename);
|
||||
logger.debug(input + " detected by Tika as being " + type.toString());
|
||||
}
|
||||
catch (Exception e)
|
||||
@@ -743,6 +744,36 @@ public class MimetypeMap implements MimetypeService
|
||||
return type;
|
||||
}
|
||||
|
||||
// We have a problem with .ai files, as Tika detects them as .pdf, but if we can use the filename
|
||||
// we can correct that. Similar problem with .eps and .ps.
|
||||
private MediaType typeBasedOnDetectedTypeAndExtension(MediaType type, String filename)
|
||||
{
|
||||
if (filename != null && type != null)
|
||||
{
|
||||
String[] detectedAndPossibleTypes = new String[]
|
||||
{
|
||||
MIMETYPE_PDF, MIMETYPE_APPLICATION_ILLUSTRATOR,
|
||||
MIMETYPE_APPLICATION_PS, MIMETYPE_APPLICATION_EPS
|
||||
};
|
||||
|
||||
for (int i=detectedAndPossibleTypes.length-1; i>=0; i-=2)
|
||||
{
|
||||
String detectedType = detectedAndPossibleTypes[i-1];
|
||||
if (detectedType.equals(type.toString()))
|
||||
{
|
||||
String possibleType = detectedAndPossibleTypes[i];
|
||||
String extension = getExtension(possibleType);
|
||||
if (filename.endsWith("."+extension))
|
||||
{
|
||||
type = MediaType.parse(possibleType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use Apache Tika to check if the mime type of the document really matches
|
||||
* what it claims to be. This is typically used when a transformation or
|
||||
|
@@ -25,20 +25,7 @@
|
||||
*/
|
||||
package org.alfresco.repo.content;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.ReadableByteChannel;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.alfresco.service.cmr.repository.ContentData;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
@@ -52,7 +39,19 @@ import org.springframework.extensions.config.ConfigService;
|
||||
import org.springframework.extensions.config.ConfigSource;
|
||||
import org.springframework.extensions.config.xml.XMLConfigService;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.ReadableByteChannel;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.content.MimetypeMap
|
||||
@@ -238,6 +237,17 @@ public class MimetypeMapTest extends TestCase
|
||||
assertEquals(MimetypeMap.MIMETYPE_VIDEO_QUICKTIME, mimetypeService.guessMimetype("file.rm", reader));
|
||||
}
|
||||
|
||||
public void testTypeBasedOnDetectedTypeAndExtension() throws Exception
|
||||
{
|
||||
ContentReader reader = new DummyContentReader(MimetypeMap.MIMETYPE_PDF, "%PDF\r");
|
||||
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR, mimetypeService.guessMimetype("file.ai", reader.getContentInputStream()));
|
||||
assertEquals(MimetypeMap.MIMETYPE_PDF, mimetypeService.guessMimetype("file.pdf", reader.getContentInputStream()));
|
||||
|
||||
reader = new DummyContentReader(MimetypeMap.MIMETYPE_APPLICATION_PS, "%!PS");
|
||||
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_EPS, mimetypeService.guessMimetype("file.eps", reader.getContentInputStream()));
|
||||
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_PS, mimetypeService.guessMimetype("file.ps", reader.getContentInputStream()));
|
||||
}
|
||||
|
||||
public void testDuplicates() throws Exception
|
||||
{
|
||||
setConfigService(
|
||||
@@ -288,17 +298,23 @@ public class MimetypeMapTest extends TestCase
|
||||
|
||||
public static class DummyContentReader implements ContentReader
|
||||
{
|
||||
|
||||
private String mimetype = MimetypeMap.MIMETYPE_HTML;
|
||||
private String mimetype;
|
||||
private String content;
|
||||
|
||||
public DummyContentReader()
|
||||
{
|
||||
super();
|
||||
this(MimetypeMap.MIMETYPE_HTML);
|
||||
}
|
||||
|
||||
public DummyContentReader(String mimetype)
|
||||
{
|
||||
this(mimetype, "<X>@@/Y");
|
||||
}
|
||||
|
||||
public DummyContentReader(String mimetype, String content)
|
||||
{
|
||||
this.mimetype = mimetype;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -340,7 +356,7 @@ public class MimetypeMapTest extends TestCase
|
||||
@Override
|
||||
public InputStream getContentInputStream() throws ContentIOException
|
||||
{
|
||||
return new ByteArrayInputStream("<X>@@/Y".getBytes(StandardCharsets.UTF_8));
|
||||
return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
Reference in New Issue
Block a user