diff --git a/config/alfresco/model/contentModel.xml b/config/alfresco/model/contentModel.xml index ac565c07ef..25b036161a 100644 --- a/config/alfresco/model/contentModel.xml +++ b/config/alfresco/model/contentModel.xml @@ -1175,6 +1175,20 @@ + + Geographic + + + Latitude + d:double + + + Longitude + d:double + + + + diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java index 0ee0db9c03..61c9951e25 100644 --- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java @@ -33,12 +33,11 @@ import org.apache.tika.parser.pdf.PDFParser; * title: -- cm:title * subject: -- cm:description * created: -- cm:created + * (custom metadata): -- * * * Uses Apache Tika * - * TODO - Update Tika to handle custom metadata - * * @author Jesper Steen Møller * @author Derek Hulley */ diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java index 021889b730..7dd75d8c54 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java @@ -40,6 +40,8 @@ import org.apache.tika.parser.Parser; * subject: -- cm:description * created: -- cm:created * comments: + *

geo:lat: -- cm:latitude + *

geo:long: -- cm:longitude * * * @author Nick Burch diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties index b0d67029d8..de6520459b 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties +++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties @@ -16,3 +16,8 @@ author=cm:author title=cm:title description=cm:description created=cm:created + +geo\:lat=cm:latitude +geo\:long=cm:longitude +#tiff\:ImageWidth=cm:imageWidth +#tiff\:ImageLength=cm:imageHeight \ No newline at end of file diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java index bf8ba991e9..8858b9b4a2 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java @@ -210,16 +210,38 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest assertEquals("409", p.get("width")); assertEquals("92", p.get("height")); assertEquals("8 8 8", p.get("Data BitsPerSample")); + + + // Geo tagged image + p = openAndCheck("GEO.jpg", "image/jpeg"); + assertEquals("100 pixels", p.get("Image Width")); + assertEquals("68 pixels", p.get("Image Height")); + assertEquals("8 bits", p.get("Data Precision")); + assertEquals(QUICK_TITLE, p.get("Comments")); + assertEquals("12.54321", p.get("geo:lat")); + assertEquals("-54.1234", p.get("geo:long")); + + // Map and check + Map properties = new HashMap(); + ContentReader reader = new FileContentReader(open("GEO.jpg")); + reader.setMimetype("image/jpeg"); + extracter.extract(reader, properties); + assertEquals(12.54321, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","latitude"))); + assertEquals(-54.1234, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","longitude"))); } - private Map openAndCheck(String fileBase, String expMimeType) throws Throwable { + private File open(String fileBase) throws Throwable { String filename = "quick" + fileBase; URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename); File file = new File(url.getFile()); - + assertTrue(file.exists()); + return file; + } + private Map openAndCheck(String fileBase, String expMimeType) throws Throwable { // Cheat and ask Tika for the mime type! + File file = open(fileBase); AutoDetectParser ap = new AutoDetectParser(); Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, filename); + metadata.set(Metadata.RESOURCE_NAME_KEY, "quick"+fileBase); MediaType mt = ap.getDetector().detect( new BufferedInputStream(new FileInputStream(file)), metadata); String mimetype = mt.toString(); diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index d514fec080..87da227b5f 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -18,6 +18,7 @@ */ package org.alfresco.repo.content.metadata; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; @@ -31,9 +32,11 @@ import java.util.HashSet; import java.util.Locale; import java.util.Map; +import org.alfresco.repo.content.filestore.FileContentReader; import org.alfresco.service.cmr.repository.ContentReader; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.ParseContext; @@ -109,6 +112,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada { super(supportedMimeTypes); + // TODO Once TIKA-451 is fixed this list will get nicer this.tikaDateFormats = new DateFormat[] { new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"), new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US), @@ -116,6 +120,10 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US), new SimpleDateFormat("yyyy-MM-dd"), new SimpleDateFormat("yyyy-MM-dd", Locale.US), + new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"), + new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US), + new SimpleDateFormat("yyyy/MM/dd"), + new SimpleDateFormat("yyyy/MM/dd", Locale.US), new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy"), new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy", Locale.US) }; @@ -169,6 +177,28 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada return properties; } + /** + * There seems to be some sort of issue with some downstream + * 3rd party libraries, and input streams that come from + * a {@link ContentReader}. This happens most often with + * JPEG and Tiff files. + * For these cases, buffer out to a local file if not + * already there + */ + private InputStream getInputStream(ContentReader reader) throws IOException { + if("image/jpeg".equals(reader.getMimetype()) || + "image/tiff".equals(reader.getMimetype())) { + if(reader instanceof FileContentReader) { + return TikaInputStream.get( ((FileContentReader)reader).getFile() ); + } else { + File tmpFile = File.createTempFile("tika", "tmp"); + reader.getContent(tmpFile); + return TikaInputStream.get(tmpFile); + } + } + return reader.getContentInputStream(); + } + @Override protected Map extractRaw(ContentReader reader) throws Throwable { @@ -177,7 +207,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada InputStream is = null; try { - is = reader.getContentInputStream(); + is = getInputStream(reader); Parser parser = getParser(); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); diff --git a/source/test-resources/quick/quickGEO.jpg b/source/test-resources/quick/quickGEO.jpg new file mode 100644 index 0000000000..a5609b3bd6 Binary files /dev/null and b/source/test-resources/quick/quickGEO.jpg differ