MNT-22055 : Metadata extraction fails for certain documents when using legacy transformations

(cherry picked from commit 3351862)
This commit is contained in:
Epure Alexandru-Eusebiu
2020-11-20 09:41:50 +02:00
parent 98df0752d8
commit 578d779183

View File

@@ -68,11 +68,10 @@ public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
protected static Log logger = LogFactory.getLog(TikaAutoMetadataExtracter.class);
private static AutoDetectParser parser;
private static TikaConfig config;
private static String EXIF_IMAGE_HEIGHT_TAG = "Exif Image Height";
private static String EXIF_IMAGE_WIDTH_TAG = "Exif Image Width";
private static String EXIF_IMAGE_HEIGHT_TAG = "Exif SubIFD:Exif Image Height";
private static String EXIF_IMAGE_WIDTH_TAG = "Exif SubIFD:Exif Image Width";
private static String JPEG_IMAGE_HEIGHT_TAG = "Image Height";
private static String JPEG_IMAGE_WIDTH_TAG = "Image Width";
private static String COMPRESSION_TAG = "Compression";
public static ArrayList<String> SUPPORTED_MIMETYPES;
private static ArrayList<String> buildMimeTypes(TikaConfig tikaConfig)
@@ -127,16 +126,12 @@ public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
if(MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(metadata.get(Metadata.CONTENT_TYPE)))
{
//check if the image has exif information
if(metadata.get(EXIF_IMAGE_WIDTH_TAG) != null
&& metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null
&& metadata.get(COMPRESSION_TAG) != null)
if (metadata.get(EXIF_IMAGE_WIDTH_TAG) != null && metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null)
{
//replace the exif size properties that will be embedded in the node with
//the guessed dimensions from Tika
putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(EXIF_IMAGE_HEIGHT_TAG)), properties);
putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(EXIF_IMAGE_WIDTH_TAG)), properties);
putRawValue(JPEG_IMAGE_HEIGHT_TAG, metadata.get(EXIF_IMAGE_HEIGHT_TAG), properties);
putRawValue(JPEG_IMAGE_WIDTH_TAG, metadata.get(EXIF_IMAGE_WIDTH_TAG), properties);
putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(JPEG_IMAGE_HEIGHT_TAG)), properties);
putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(JPEG_IMAGE_WIDTH_TAG)), properties);
}
}
return properties;