diff --git a/config/quick/quickSizeSample.jpg b/config/quick/quickSizeSample.jpg new file mode 100644 index 0000000000..b06ec18026 Binary files /dev/null and b/config/quick/quickSizeSample.jpg differ diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java index 7f2db26e4f..d3ec28d810 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java @@ -18,11 +18,16 @@ */ package org.alfresco.repo.content.metadata; +import java.io.Serializable; import java.util.ArrayList; +import java.util.Map; +import org.alfresco.repo.content.MimetypeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.tika.config.TikaConfig; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TIFF; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.Parser; @@ -53,6 +58,10 @@ public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter protected static Log logger = LogFactory.getLog(TikaAutoMetadataExtracter.class); private static AutoDetectParser parser; private static TikaConfig config; + private static String EXIF_IMAGE_HEIGHT_TAG = "Exif Image Height"; + private static String EXIF_IMAGE_WIDTH_TAG = "Exif Image Width"; + private static String JPEG_IMAGE_HEIGHT_TAG = "Image Height"; + private static String JPEG_IMAGE_WIDTH_TAG = "Image Width"; public static ArrayList SUPPORTED_MIMETYPES; private static ArrayList buildMimeTypes(TikaConfig tikaConfig) @@ -90,4 +99,33 @@ public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter { return parser; } + + /** + * Because some editors use JPEG_IMAGE_HEIGHT_TAG when + * saving JPEG images , a more reliable source for + * image size are the values provided by Tika + * and not the exif/tiff metadata read from the file + * This will override the tiff:Image size + * which gets embedded into the alfresco node properties + * for jpeg files that contain such exif information + */ + @Override + protected Map extractSpecific(Metadata metadata, + Map properties, Map headers) + { + + if(MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(metadata.get(Metadata.CONTENT_TYPE))) + { + //check if the image has exif information + if(metadata.get(EXIF_IMAGE_WIDTH_TAG) != null && metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null ) + { + //replace the exif size properties that will be embedded in the node with + //the guessed dimensions from Tika + putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(JPEG_IMAGE_HEIGHT_TAG)), properties); + putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(JPEG_IMAGE_WIDTH_TAG)), properties); + } + } + return properties; + } + } diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index c8425822b3..15b8745ba2 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -530,6 +530,30 @@ public abstract class TikaPoweredMetadataExtracter } } + /** + * Exif metadata for size also returns the string "pixels" + * after the number value , this function will + * stop at the first non digit character found in the text + * @param sizeText string text + * @return the size value + */ + protected String extractSize(String sizeText) + { + StringBuilder sizeValue = new StringBuilder(); + for(char c : sizeText.toCharArray()) + { + if(Character.isDigit(c)) + { + sizeValue.append(c); + } + else + { + break; + } + } + return sizeValue.toString(); + } + /** * This content handler will capture entries from within * the header of the Tika content XHTML, but ignore the diff --git a/source/test-java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java b/source/test-java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java index 9b51e74051..269016fbd5 100644 --- a/source/test-java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java +++ b/source/test-java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java @@ -250,6 +250,20 @@ public void testImageVideo() throws Throwable { assertEquals("92", p.get("height")); assertEquals("8 8 8", p.get("Data BitsPerSample")); + // Image with wrong tiff:Width property. see MNT-13920 + p = openAndCheck("SizeSample.jpg", "image/jpeg"); + // Check raw EXIF properties + assertEquals("1535 pixels", p.get("Image Width")); + assertEquals("367 pixels", p.get("Image Height")); + + // Map and check + Map propsJPG = new HashMap(); + ContentReader readerJPG = new FileContentReader(open("SizeSample.jpg")); + readerJPG.setMimetype("image/jpeg"); + extracter.extract(readerJPG, propsJPG); + assertEquals(1535, propsJPG.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelXDimension"))); + assertEquals(367, propsJPG.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelYDimension"))); + // Geo tagged image p = openAndCheck("GEO.jpg", "image/jpeg");