diff --git a/config/alfresco/model/contentModel.xml b/config/alfresco/model/contentModel.xml
index ac565c07ef..25b036161a 100644
--- a/config/alfresco/model/contentModel.xml
+++ b/config/alfresco/model/contentModel.xml
@@ -1175,6 +1175,20 @@
+
+ Geographic
+
+
+ Latitude
+ d:double
+
+
+ Longitude
+ d:double
+
+
+
+
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
index 0ee0db9c03..61c9951e25 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -33,12 +33,11 @@ import org.apache.tika.parser.pdf.PDFParser;
* title: -- cm:title
* subject: -- cm:description
* created: -- cm:created
+ * (custom metadata): --
*
*
* Uses Apache Tika
*
- * TODO - Update Tika to handle custom metadata
- *
* @author Jesper Steen Møller
* @author Derek Hulley
*/
diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java
index 021889b730..7dd75d8c54 100644
--- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java
@@ -40,6 +40,8 @@ import org.apache.tika.parser.Parser;
* subject: -- cm:description
* created: -- cm:created
* comments:
+ *
geo:lat: -- cm:latitude
+ *
geo:long: -- cm:longitude
*
*
* @author Nick Burch
diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties
index b0d67029d8..de6520459b 100644
--- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties
+++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties
@@ -16,3 +16,8 @@ author=cm:author
title=cm:title
description=cm:description
created=cm:created
+
+geo\:lat=cm:latitude
+geo\:long=cm:longitude
+#tiff\:ImageWidth=cm:imageWidth
+#tiff\:ImageLength=cm:imageHeight
\ No newline at end of file
diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java
index bf8ba991e9..8858b9b4a2 100644
--- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java
@@ -210,16 +210,38 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
assertEquals("409", p.get("width"));
assertEquals("92", p.get("height"));
assertEquals("8 8 8", p.get("Data BitsPerSample"));
+
+
+ // Geo tagged image
+ p = openAndCheck("GEO.jpg", "image/jpeg");
+ assertEquals("100 pixels", p.get("Image Width"));
+ assertEquals("68 pixels", p.get("Image Height"));
+ assertEquals("8 bits", p.get("Data Precision"));
+ assertEquals(QUICK_TITLE, p.get("Comments"));
+ assertEquals("12.54321", p.get("geo:lat"));
+ assertEquals("-54.1234", p.get("geo:long"));
+
+ // Map and check
+ Map properties = new HashMap();
+ ContentReader reader = new FileContentReader(open("GEO.jpg"));
+ reader.setMimetype("image/jpeg");
+ extracter.extract(reader, properties);
+ assertEquals(12.54321, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","latitude")));
+ assertEquals(-54.1234, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","longitude")));
}
- private Map openAndCheck(String fileBase, String expMimeType) throws Throwable {
+ private File open(String fileBase) throws Throwable {
String filename = "quick" + fileBase;
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
File file = new File(url.getFile());
-
+ assertTrue(file.exists());
+ return file;
+ }
+ private Map openAndCheck(String fileBase, String expMimeType) throws Throwable {
// Cheat and ask Tika for the mime type!
+ File file = open(fileBase);
AutoDetectParser ap = new AutoDetectParser();
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(Metadata.RESOURCE_NAME_KEY, "quick"+fileBase);
MediaType mt = ap.getDetector().detect(
new BufferedInputStream(new FileInputStream(file)), metadata);
String mimetype = mt.toString();
diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java
index d514fec080..87da227b5f 100644
--- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java
@@ -18,6 +18,7 @@
*/
package org.alfresco.repo.content.metadata;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
@@ -31,9 +32,11 @@ import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
+import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.service.cmr.repository.ContentReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
@@ -109,6 +112,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
{
super(supportedMimeTypes);
+ // TODO Once TIKA-451 is fixed this list will get nicer
this.tikaDateFormats = new DateFormat[] {
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"),
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
@@ -116,6 +120,10 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US),
new SimpleDateFormat("yyyy-MM-dd"),
new SimpleDateFormat("yyyy-MM-dd", Locale.US),
+ new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"),
+ new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US),
+ new SimpleDateFormat("yyyy/MM/dd"),
+ new SimpleDateFormat("yyyy/MM/dd", Locale.US),
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy"),
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy", Locale.US)
};
@@ -169,6 +177,28 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
return properties;
}
+ /**
+ * There seems to be some sort of issue with some downstream
+ * 3rd party libraries, and input streams that come from
+ * a {@link ContentReader}. This happens most often with
+ * JPEG and Tiff files.
+ * For these cases, buffer out to a local file if not
+ * already there
+ */
+ private InputStream getInputStream(ContentReader reader) throws IOException {
+ if("image/jpeg".equals(reader.getMimetype()) ||
+ "image/tiff".equals(reader.getMimetype())) {
+ if(reader instanceof FileContentReader) {
+ return TikaInputStream.get( ((FileContentReader)reader).getFile() );
+ } else {
+ File tmpFile = File.createTempFile("tika", "tmp");
+ reader.getContent(tmpFile);
+ return TikaInputStream.get(tmpFile);
+ }
+ }
+ return reader.getContentInputStream();
+ }
+
@Override
protected Map extractRaw(ContentReader reader) throws Throwable
{
@@ -177,7 +207,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
InputStream is = null;
try
{
- is = reader.getContentInputStream();
+ is = getInputStream(reader);
Parser parser = getParser();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
diff --git a/source/test-resources/quick/quickGEO.jpg b/source/test-resources/quick/quickGEO.jpg
new file mode 100644
index 0000000000..a5609b3bd6
Binary files /dev/null and b/source/test-resources/quick/quickGEO.jpg differ