Add cm:geographic Aspect, which has cm:latitude and cm:longitude, and update the Tika auto parser to map to this (plus tests)

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20925 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2010-07-02 14:57:58 +00:00
parent 2433636201
commit d2c1cc78e5
7 changed files with 78 additions and 6 deletions

View File

@@ -1175,6 +1175,20 @@
<!-- DEPRECATED (end of) -->
<!-- -->
<aspect name="cm:geographic">
<title>Geographic</title>
<properties>
<property name="cm:latitude">
<title>Latitude</title>
<type>d:double</type>
</property>
<property name="cm:longitude">
<title>Longitude</title>
<type>d:double</type>
</property>
</properties>
</aspect>
</aspects>
</model>

View File

@@ -33,12 +33,11 @@ import org.apache.tika.parser.pdf.PDFParser;
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>(custom metadata):</b> --
* </pre>
*
* Uses Apache Tika
*
* TODO - Update Tika to handle custom metadata
*
* @author Jesper Steen Møller
* @author Derek Hulley
*/

View File

@@ -40,6 +40,8 @@ import org.apache.tika.parser.Parser;
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>comments:</b>
* <p>geo:lat:</b> -- cm:latitude
* <p>geo:long:</b> -- cm:longitude
* </pre>
*
* @author Nick Burch

View File

@@ -16,3 +16,8 @@ author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude
#tiff\:ImageWidth=cm:imageWidth
#tiff\:ImageLength=cm:imageHeight

View File

@@ -210,16 +210,38 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
assertEquals("409", p.get("width"));
assertEquals("92", p.get("height"));
assertEquals("8 8 8", p.get("Data BitsPerSample"));
// Geo tagged image
p = openAndCheck("GEO.jpg", "image/jpeg");
assertEquals("100 pixels", p.get("Image Width"));
assertEquals("68 pixels", p.get("Image Height"));
assertEquals("8 bits", p.get("Data Precision"));
assertEquals(QUICK_TITLE, p.get("Comments"));
assertEquals("12.54321", p.get("geo:lat"));
assertEquals("-54.1234", p.get("geo:long"));
// Map and check
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
ContentReader reader = new FileContentReader(open("GEO.jpg"));
reader.setMimetype("image/jpeg");
extracter.extract(reader, properties);
assertEquals(12.54321, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","latitude")));
assertEquals(-54.1234, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","longitude")));
}
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
private File open(String fileBase) throws Throwable {
String filename = "quick" + fileBase;
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
File file = new File(url.getFile());
assertTrue(file.exists());
return file;
}
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
// Cheat and ask Tika for the mime type!
File file = open(fileBase);
AutoDetectParser ap = new AutoDetectParser();
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
metadata.set(Metadata.RESOURCE_NAME_KEY, "quick"+fileBase);
MediaType mt = ap.getDetector().detect(
new BufferedInputStream(new FileInputStream(file)), metadata);
String mimetype = mt.toString();

View File

@@ -18,6 +18,7 @@
*/
package org.alfresco.repo.content.metadata;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
@@ -31,9 +32,11 @@ import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.service.cmr.repository.ContentReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
@@ -109,6 +112,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
{
super(supportedMimeTypes);
// TODO Once TIKA-451 is fixed this list will get nicer
this.tikaDateFormats = new DateFormat[] {
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"),
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
@@ -116,6 +120,10 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US),
new SimpleDateFormat("yyyy-MM-dd"),
new SimpleDateFormat("yyyy-MM-dd", Locale.US),
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"),
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US),
new SimpleDateFormat("yyyy/MM/dd"),
new SimpleDateFormat("yyyy/MM/dd", Locale.US),
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy"),
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy", Locale.US)
};
@@ -169,6 +177,28 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
return properties;
}
/**
* There seems to be some sort of issue with some downstream
* 3rd party libraries, and input streams that come from
* a {@link ContentReader}. This happens most often with
* JPEG and Tiff files.
* For these cases, buffer out to a local file if not
* already there
*/
private InputStream getInputStream(ContentReader reader) throws IOException {
if("image/jpeg".equals(reader.getMimetype()) ||
"image/tiff".equals(reader.getMimetype())) {
if(reader instanceof FileContentReader) {
return TikaInputStream.get( ((FileContentReader)reader).getFile() );
} else {
File tmpFile = File.createTempFile("tika", "tmp");
reader.getContent(tmpFile);
return TikaInputStream.get(tmpFile);
}
}
return reader.getContentInputStream();
}
@Override
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
@@ -177,7 +207,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
InputStream is = null;
try
{
is = reader.getContentInputStream();
is = getInputStream(reader);
Parser parser = getParser();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB