Update the Metadata Extractor properties used when Tika processes images, so that the EXIF metadata is mapped onto the new content model properties. Includes tests.

Also tweak the exif model slightly to better match what Tika/XMP has, and add the exif
 namespace to the list


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@22251 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2010-09-03 18:10:49 +00:00
parent bafa459acf
commit fc91409c79
4 changed files with 46 additions and 9 deletions

View File

@@ -1223,7 +1223,7 @@
</property> </property>
<property name="exif:fNumber"> <property name="exif:fNumber">
<title>F Number</title> <title>F Number</title>
<type>d:text</type> <type>d:double</type>
</property> </property>
<property name="exif:flash"> <property name="exif:flash">
<title>Flash Activated</title> <title>Flash Activated</title>
@@ -1231,7 +1231,7 @@
</property> </property>
<property name="exif:focalLength"> <property name="exif:focalLength">
<title>Focal Length</title> <title>Focal Length</title>
<type>d:int</type> <type>d:double</type>
</property> </property>
<property name="exif:isoSpeedRatings"> <property name="exif:isoSpeedRatings">
<title>ISO Speed</title> <title>ISO Speed</title>
@@ -1255,11 +1255,11 @@
</property> </property>
<property name="exif:xResolution"> <property name="exif:xResolution">
<title>Horizontal Resolution</title> <title>Horizontal Resolution</title>
<type>d:int</type> <type>d:double</type>
</property> </property>
<property name="exif:yResolution"> <property name="exif:yResolution">
<title>Vertical Resolution</title> <title>Vertical Resolution</title>
<type>d:int</type> <type>d:double</type>
</property> </property>
<property name="exif:resolutionUnit"> <property name="exif:resolutionUnit">
<title>Resolution Unit</title> <title>Resolution Unit</title>

View File

@@ -87,7 +87,7 @@ public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest
} }
else if (MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET.equals(mimetype)) else if (MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET.equals(mimetype))
{ {
checkFileCreationDate(mimetype, properties, "1996-10-14T23:33:28.000+01:00"); checkFileCreationDate(mimetype, properties, "1996-10-15T00:33:28.000+01:00");
} }
else if (MimetypeMap.MIMETYPE_OPENXML_PRESENTATION.equals(mimetype)) else if (MimetypeMap.MIMETYPE_OPENXML_PRESENTATION.equals(mimetype))
{ {

View File

@@ -10,6 +10,7 @@
# Namespaces # Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
# Mappings # Mappings
author=cm:author author=cm:author
@@ -19,5 +20,19 @@ created=cm:created
geo\:lat=cm:latitude geo\:lat=cm:latitude
geo\:long=cm:longitude geo\:long=cm:longitude
#tiff\:ImageWidth=cm:imageWidth
#tiff\:ImageLength=cm:imageHeight tiff\:ImageWidth=exif:pixelXDimension
tiff\:ImageLength=exif:pixelYDimension
tiff\:Make=exif:manufacturer
tiff\:Model=exif:model
tiff\:Software=exif:software
tiff\:Orientation=exif:orientation
tiff\:XResolution=exif:xResolution
tiff\:YResolution=exif:yResolution
tiff\:ResolutionUnit=exif:resolutionUnit
exif\:Flash=exif:flash
exif\:ExposureTime=exif:exposureTime
exif\:FNumber=exif:fNumber
exif\:FocalLength=exif:focalLength
exif\:IsoSpeedRatings=exif:isoSpeedRatings
exif\:DateTimeOriginal=exif:dateTimeOriginal

View File

@@ -34,6 +34,7 @@ import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MediaType;
@@ -218,16 +219,37 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
assertEquals("68 pixels", p.get("Image Height")); assertEquals("68 pixels", p.get("Image Height"));
assertEquals("8 bits", p.get("Data Precision")); assertEquals("8 bits", p.get("Data Precision"));
assertEquals(QUICK_TITLE, p.get("Comments")); assertEquals(QUICK_TITLE, p.get("Comments"));
// Check namespace'd Tika properties
assertEquals("12.54321", p.get("geo:lat")); assertEquals("12.54321", p.get("geo:lat"));
assertEquals("-54.1234", p.get("geo:long")); assertEquals("-54.1234", p.get("geo:long"));
assertEquals("100", p.get("tiff:ImageWidth"));
assertEquals("68", p.get("tiff:ImageLength"));
assertEquals("Canon", p.get("tiff:Make"));
assertEquals("5.6", p.get("exif:FNumber"));
// Map and check // Map and check
Map<QName, Serializable> properties = new HashMap<QName, Serializable>(); Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
ContentReader reader = new FileContentReader(open("GEO.jpg")); ContentReader reader = new FileContentReader(open("GEO.jpg"));
reader.setMimetype("image/jpeg"); reader.setMimetype("image/jpeg");
extracter.extract(reader, properties); extracter.extract(reader, properties);
assertEquals(12.54321, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","latitude"))); // Check the geo bits
assertEquals(-54.1234, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","longitude"))); assertEquals(12.54321, properties.get(ContentModel.PROP_LATITUDE));
assertEquals(-54.1234, properties.get(ContentModel.PROP_LONGITUDE));
// Check the exif bits
assertEquals(100, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelXDimension")));
assertEquals(68, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelYDimension")));
assertEquals(0.000625, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "exposureTime")));
assertEquals(5.6, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "fNumber")));
assertEquals(false, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "flash")));
assertEquals(194.0, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "focalLength")));
assertEquals("400", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "isoSpeedRatings")));
assertEquals("Canon", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "manufacturer")));
assertEquals("Canon EOS 40D", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "model")));
assertEquals("Adobe Photoshop CS3 Macintosh", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "software")));
assertEquals(null, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "orientation")));
assertEquals(240.0, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "xResolution")));
assertEquals(240.0, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "yResolution")));
assertEquals("Inch", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "resolutionUnit")));
} }
private File open(String fileBase) throws Throwable { private File open(String fileBase) throws Throwable {
String filename = "quick" + fileBase; String filename = "quick" + fileBase;