mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-08-07 17:49:17 +00:00
MNT-18275 Change detected mimetype: pdf->ai ps->eps if extension correct
This commit is contained in:
@@ -25,20 +25,6 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content;
|
package org.alfresco.repo.content;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.SortedSet;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
|
|
||||||
import org.alfresco.repo.content.encoding.ContentCharsetFinder;
|
import org.alfresco.repo.content.encoding.ContentCharsetFinder;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
import org.alfresco.service.cmr.repository.FileContentReader;
|
import org.alfresco.service.cmr.repository.FileContentReader;
|
||||||
@@ -57,6 +43,20 @@ import org.springframework.extensions.config.ConfigElement;
|
|||||||
import org.springframework.extensions.config.ConfigLookupContext;
|
import org.springframework.extensions.config.ConfigLookupContext;
|
||||||
import org.springframework.extensions.config.ConfigService;
|
import org.springframework.extensions.config.ConfigService;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides a bidirectional mapping between well-known mimetypes and the
|
* Provides a bidirectional mapping between well-known mimetypes and the
|
||||||
* registered file extensions. All mimetypes and extensions are stored and
|
* registered file extensions. All mimetypes and extensions are stored and
|
||||||
@@ -719,6 +719,7 @@ public class MimetypeMap implements MimetypeService
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
type = detector.detect(inp, metadata);
|
type = detector.detect(inp, metadata);
|
||||||
|
type = typeBasedOnDetectedTypeAndExtension(type, filename);
|
||||||
logger.debug(input + " detected by Tika as being " + type.toString());
|
logger.debug(input + " detected by Tika as being " + type.toString());
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
@@ -743,6 +744,36 @@ public class MimetypeMap implements MimetypeService
|
|||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We have a problem with .ai files, as Tika detects them as .pdf, but if we can use the filename
|
||||||
|
// we can correct that. Similar problem with .eps and .ps.
|
||||||
|
private MediaType typeBasedOnDetectedTypeAndExtension(MediaType type, String filename)
|
||||||
|
{
|
||||||
|
if (filename != null && type != null)
|
||||||
|
{
|
||||||
|
String[] detectedAndPossibleTypes = new String[]
|
||||||
|
{
|
||||||
|
MIMETYPE_PDF, MIMETYPE_APPLICATION_ILLUSTRATOR,
|
||||||
|
MIMETYPE_APPLICATION_PS, MIMETYPE_APPLICATION_EPS
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i=detectedAndPossibleTypes.length-1; i>=0; i-=2)
|
||||||
|
{
|
||||||
|
String detectedType = detectedAndPossibleTypes[i-1];
|
||||||
|
if (detectedType.equals(type.toString()))
|
||||||
|
{
|
||||||
|
String possibleType = detectedAndPossibleTypes[i];
|
||||||
|
String extension = getExtension(possibleType);
|
||||||
|
if (filename.endsWith("."+extension))
|
||||||
|
{
|
||||||
|
type = MediaType.parse(possibleType);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use Apache Tika to check if the mime type of the document really matches
|
* Use Apache Tika to check if the mime type of the document really matches
|
||||||
* what it claims to be. This is typically used when a transformation or
|
* what it claims to be. This is typically used when a transformation or
|
||||||
|
@@ -25,20 +25,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content;
|
package org.alfresco.repo.content;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import junit.framework.TestCase;
|
||||||
import java.io.File;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.nio.channels.FileChannel;
|
|
||||||
import java.nio.channels.ReadableByteChannel;
|
|
||||||
import java.nio.channels.WritableByteChannel;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.alfresco.service.cmr.repository.ContentData;
|
import org.alfresco.service.cmr.repository.ContentData;
|
||||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
@@ -52,7 +39,19 @@ import org.springframework.extensions.config.ConfigService;
|
|||||||
import org.springframework.extensions.config.ConfigSource;
|
import org.springframework.extensions.config.ConfigSource;
|
||||||
import org.springframework.extensions.config.xml.XMLConfigService;
|
import org.springframework.extensions.config.xml.XMLConfigService;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.nio.channels.ReadableByteChannel;
|
||||||
|
import java.nio.channels.WritableByteChannel;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see org.alfresco.repo.content.MimetypeMap
|
* @see org.alfresco.repo.content.MimetypeMap
|
||||||
@@ -238,6 +237,17 @@ public class MimetypeMapTest extends TestCase
|
|||||||
assertEquals(MimetypeMap.MIMETYPE_VIDEO_QUICKTIME, mimetypeService.guessMimetype("file.rm", reader));
|
assertEquals(MimetypeMap.MIMETYPE_VIDEO_QUICKTIME, mimetypeService.guessMimetype("file.rm", reader));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTypeBasedOnDetectedTypeAndExtension() throws Exception
|
||||||
|
{
|
||||||
|
ContentReader reader = new DummyContentReader(MimetypeMap.MIMETYPE_PDF, "%PDF\r");
|
||||||
|
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR, mimetypeService.guessMimetype("file.ai", reader.getContentInputStream()));
|
||||||
|
assertEquals(MimetypeMap.MIMETYPE_PDF, mimetypeService.guessMimetype("file.pdf", reader.getContentInputStream()));
|
||||||
|
|
||||||
|
reader = new DummyContentReader(MimetypeMap.MIMETYPE_APPLICATION_PS, "%!PS");
|
||||||
|
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_EPS, mimetypeService.guessMimetype("file.eps", reader.getContentInputStream()));
|
||||||
|
assertEquals(MimetypeMap.MIMETYPE_APPLICATION_PS, mimetypeService.guessMimetype("file.ps", reader.getContentInputStream()));
|
||||||
|
}
|
||||||
|
|
||||||
public void testDuplicates() throws Exception
|
public void testDuplicates() throws Exception
|
||||||
{
|
{
|
||||||
setConfigService(
|
setConfigService(
|
||||||
@@ -288,17 +298,23 @@ public class MimetypeMapTest extends TestCase
|
|||||||
|
|
||||||
public static class DummyContentReader implements ContentReader
|
public static class DummyContentReader implements ContentReader
|
||||||
{
|
{
|
||||||
|
private String mimetype;
|
||||||
private String mimetype = MimetypeMap.MIMETYPE_HTML;
|
private String content;
|
||||||
|
|
||||||
public DummyContentReader()
|
public DummyContentReader()
|
||||||
{
|
{
|
||||||
super();
|
this(MimetypeMap.MIMETYPE_HTML);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DummyContentReader(String mimetype)
|
public DummyContentReader(String mimetype)
|
||||||
|
{
|
||||||
|
this(mimetype, "<X>@@/Y");
|
||||||
|
}
|
||||||
|
|
||||||
|
public DummyContentReader(String mimetype, String content)
|
||||||
{
|
{
|
||||||
this.mimetype = mimetype;
|
this.mimetype = mimetype;
|
||||||
|
this.content = content;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -340,7 +356,7 @@ public class MimetypeMapTest extends TestCase
|
|||||||
@Override
|
@Override
|
||||||
public InputStream getContentInputStream() throws ContentIOException
|
public InputStream getContentInputStream() throws ContentIOException
|
||||||
{
|
{
|
||||||
return new ByteArrayInputStream("<X>@@/Y".getBytes(StandardCharsets.UTF_8));
|
return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
Reference in New Issue
Block a user