mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Add cm:geographic Aspect, which has cm:latitude and cm:longitude, and update the Tika auto parser to map to this (plus tests)
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20925 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -1175,6 +1175,20 @@
|
||||
<!-- DEPRECATED (end of) -->
|
||||
<!-- -->
|
||||
|
||||
<aspect name="cm:geographic">
|
||||
<title>Geographic</title>
|
||||
<properties>
|
||||
<property name="cm:latitude">
|
||||
<title>Latitude</title>
|
||||
<type>d:double</type>
|
||||
</property>
|
||||
<property name="cm:longitude">
|
||||
<title>Longitude</title>
|
||||
<type>d:double</type>
|
||||
</property>
|
||||
</properties>
|
||||
</aspect>
|
||||
|
||||
</aspects>
|
||||
|
||||
</model>
|
||||
|
@@ -33,12 +33,11 @@ import org.apache.tika.parser.pdf.PDFParser;
|
||||
* <b>title:</b> -- cm:title
|
||||
* <b>subject:</b> -- cm:description
|
||||
* <b>created:</b> -- cm:created
|
||||
* <b>(custom metadata):</b> --
|
||||
* </pre>
|
||||
*
|
||||
* Uses Apache Tika
|
||||
*
|
||||
* TODO - Update Tika to handle custom metadata
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
|
@@ -40,6 +40,8 @@ import org.apache.tika.parser.Parser;
|
||||
* <b>subject:</b> -- cm:description
|
||||
* <b>created:</b> -- cm:created
|
||||
* <b>comments:</b>
|
||||
* <p>geo:lat:</b> -- cm:latitude
|
||||
* <p>geo:long:</b> -- cm:longitude
|
||||
* </pre>
|
||||
*
|
||||
* @author Nick Burch
|
||||
|
@@ -16,3 +16,8 @@ author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
||||
|
||||
geo\:lat=cm:latitude
|
||||
geo\:long=cm:longitude
|
||||
#tiff\:ImageWidth=cm:imageWidth
|
||||
#tiff\:ImageLength=cm:imageHeight
|
@@ -210,16 +210,38 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
assertEquals("409", p.get("width"));
|
||||
assertEquals("92", p.get("height"));
|
||||
assertEquals("8 8 8", p.get("Data BitsPerSample"));
|
||||
|
||||
|
||||
// Geo tagged image
|
||||
p = openAndCheck("GEO.jpg", "image/jpeg");
|
||||
assertEquals("100 pixels", p.get("Image Width"));
|
||||
assertEquals("68 pixels", p.get("Image Height"));
|
||||
assertEquals("8 bits", p.get("Data Precision"));
|
||||
assertEquals(QUICK_TITLE, p.get("Comments"));
|
||||
assertEquals("12.54321", p.get("geo:lat"));
|
||||
assertEquals("-54.1234", p.get("geo:long"));
|
||||
|
||||
// Map and check
|
||||
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||
ContentReader reader = new FileContentReader(open("GEO.jpg"));
|
||||
reader.setMimetype("image/jpeg");
|
||||
extracter.extract(reader, properties);
|
||||
assertEquals(12.54321, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","latitude")));
|
||||
assertEquals(-54.1234, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","longitude")));
|
||||
}
|
||||
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
|
||||
private File open(String fileBase) throws Throwable {
|
||||
String filename = "quick" + fileBase;
|
||||
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
|
||||
File file = new File(url.getFile());
|
||||
|
||||
assertTrue(file.exists());
|
||||
return file;
|
||||
}
|
||||
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
|
||||
// Cheat and ask Tika for the mime type!
|
||||
File file = open(fileBase);
|
||||
AutoDetectParser ap = new AutoDetectParser();
|
||||
Metadata metadata = new Metadata();
|
||||
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
|
||||
metadata.set(Metadata.RESOURCE_NAME_KEY, "quick"+fileBase);
|
||||
MediaType mt = ap.getDetector().detect(
|
||||
new BufferedInputStream(new FileInputStream(file)), metadata);
|
||||
String mimetype = mt.toString();
|
||||
|
@@ -18,6 +18,7 @@
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
@@ -31,9 +32,11 @@ import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.tika.io.TikaInputStream;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
@@ -109,6 +112,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
{
|
||||
super(supportedMimeTypes);
|
||||
|
||||
// TODO Once TIKA-451 is fixed this list will get nicer
|
||||
this.tikaDateFormats = new DateFormat[] {
|
||||
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"),
|
||||
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
|
||||
@@ -116,6 +120,10 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US),
|
||||
new SimpleDateFormat("yyyy-MM-dd"),
|
||||
new SimpleDateFormat("yyyy-MM-dd", Locale.US),
|
||||
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"),
|
||||
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US),
|
||||
new SimpleDateFormat("yyyy/MM/dd"),
|
||||
new SimpleDateFormat("yyyy/MM/dd", Locale.US),
|
||||
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy"),
|
||||
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy", Locale.US)
|
||||
};
|
||||
@@ -169,6 +177,28 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
return properties;
|
||||
}
|
||||
|
||||
/**
|
||||
* There seems to be some sort of issue with some downstream
|
||||
* 3rd party libraries, and input streams that come from
|
||||
* a {@link ContentReader}. This happens most often with
|
||||
* JPEG and Tiff files.
|
||||
* For these cases, buffer out to a local file if not
|
||||
* already there
|
||||
*/
|
||||
private InputStream getInputStream(ContentReader reader) throws IOException {
|
||||
if("image/jpeg".equals(reader.getMimetype()) ||
|
||||
"image/tiff".equals(reader.getMimetype())) {
|
||||
if(reader instanceof FileContentReader) {
|
||||
return TikaInputStream.get( ((FileContentReader)reader).getFile() );
|
||||
} else {
|
||||
File tmpFile = File.createTempFile("tika", "tmp");
|
||||
reader.getContent(tmpFile);
|
||||
return TikaInputStream.get(tmpFile);
|
||||
}
|
||||
}
|
||||
return reader.getContentInputStream();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
@@ -177,7 +207,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
is = getInputStream(reader);
|
||||
Parser parser = getParser();
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext context = new ParseContext();
|
||||
|
BIN
source/test-resources/quick/quickGEO.jpg
Normal file
BIN
source/test-resources/quick/quickGEO.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
Reference in New Issue
Block a user