mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Add cm:geographic Aspect, which has cm:latitude and cm:longitude, and update the Tika auto parser to map to this (plus tests)
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20925 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -1175,6 +1175,20 @@
|
|||||||
<!-- DEPRECATED (end of) -->
|
<!-- DEPRECATED (end of) -->
|
||||||
<!-- -->
|
<!-- -->
|
||||||
|
|
||||||
|
<aspect name="cm:geographic">
|
||||||
|
<title>Geographic</title>
|
||||||
|
<properties>
|
||||||
|
<property name="cm:latitude">
|
||||||
|
<title>Latitude</title>
|
||||||
|
<type>d:double</type>
|
||||||
|
</property>
|
||||||
|
<property name="cm:longitude">
|
||||||
|
<title>Longitude</title>
|
||||||
|
<type>d:double</type>
|
||||||
|
</property>
|
||||||
|
</properties>
|
||||||
|
</aspect>
|
||||||
|
|
||||||
</aspects>
|
</aspects>
|
||||||
|
|
||||||
</model>
|
</model>
|
||||||
|
@@ -33,12 +33,11 @@ import org.apache.tika.parser.pdf.PDFParser;
|
|||||||
* <b>title:</b> -- cm:title
|
* <b>title:</b> -- cm:title
|
||||||
* <b>subject:</b> -- cm:description
|
* <b>subject:</b> -- cm:description
|
||||||
* <b>created:</b> -- cm:created
|
* <b>created:</b> -- cm:created
|
||||||
|
* <b>(custom metadata):</b> --
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* Uses Apache Tika
|
* Uses Apache Tika
|
||||||
*
|
*
|
||||||
* TODO - Update Tika to handle custom metadata
|
|
||||||
*
|
|
||||||
* @author Jesper Steen Møller
|
* @author Jesper Steen Møller
|
||||||
* @author Derek Hulley
|
* @author Derek Hulley
|
||||||
*/
|
*/
|
||||||
|
@@ -40,6 +40,8 @@ import org.apache.tika.parser.Parser;
|
|||||||
* <b>subject:</b> -- cm:description
|
* <b>subject:</b> -- cm:description
|
||||||
* <b>created:</b> -- cm:created
|
* <b>created:</b> -- cm:created
|
||||||
* <b>comments:</b>
|
* <b>comments:</b>
|
||||||
|
* <p>geo:lat:</b> -- cm:latitude
|
||||||
|
* <p>geo:long:</b> -- cm:longitude
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
|
@@ -16,3 +16,8 @@ author=cm:author
|
|||||||
title=cm:title
|
title=cm:title
|
||||||
description=cm:description
|
description=cm:description
|
||||||
created=cm:created
|
created=cm:created
|
||||||
|
|
||||||
|
geo\:lat=cm:latitude
|
||||||
|
geo\:long=cm:longitude
|
||||||
|
#tiff\:ImageWidth=cm:imageWidth
|
||||||
|
#tiff\:ImageLength=cm:imageHeight
|
@@ -210,16 +210,38 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
assertEquals("409", p.get("width"));
|
assertEquals("409", p.get("width"));
|
||||||
assertEquals("92", p.get("height"));
|
assertEquals("92", p.get("height"));
|
||||||
assertEquals("8 8 8", p.get("Data BitsPerSample"));
|
assertEquals("8 8 8", p.get("Data BitsPerSample"));
|
||||||
|
|
||||||
|
|
||||||
|
// Geo tagged image
|
||||||
|
p = openAndCheck("GEO.jpg", "image/jpeg");
|
||||||
|
assertEquals("100 pixels", p.get("Image Width"));
|
||||||
|
assertEquals("68 pixels", p.get("Image Height"));
|
||||||
|
assertEquals("8 bits", p.get("Data Precision"));
|
||||||
|
assertEquals(QUICK_TITLE, p.get("Comments"));
|
||||||
|
assertEquals("12.54321", p.get("geo:lat"));
|
||||||
|
assertEquals("-54.1234", p.get("geo:long"));
|
||||||
|
|
||||||
|
// Map and check
|
||||||
|
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||||
|
ContentReader reader = new FileContentReader(open("GEO.jpg"));
|
||||||
|
reader.setMimetype("image/jpeg");
|
||||||
|
extracter.extract(reader, properties);
|
||||||
|
assertEquals(12.54321, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","latitude")));
|
||||||
|
assertEquals(-54.1234, properties.get(QName.createQName("http://www.alfresco.org/model/content/1.0","longitude")));
|
||||||
}
|
}
|
||||||
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
|
private File open(String fileBase) throws Throwable {
|
||||||
String filename = "quick" + fileBase;
|
String filename = "quick" + fileBase;
|
||||||
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
|
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
|
||||||
File file = new File(url.getFile());
|
File file = new File(url.getFile());
|
||||||
|
assertTrue(file.exists());
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
|
||||||
// Cheat and ask Tika for the mime type!
|
// Cheat and ask Tika for the mime type!
|
||||||
|
File file = open(fileBase);
|
||||||
AutoDetectParser ap = new AutoDetectParser();
|
AutoDetectParser ap = new AutoDetectParser();
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
|
metadata.set(Metadata.RESOURCE_NAME_KEY, "quick"+fileBase);
|
||||||
MediaType mt = ap.getDetector().detect(
|
MediaType mt = ap.getDetector().detect(
|
||||||
new BufferedInputStream(new FileInputStream(file)), metadata);
|
new BufferedInputStream(new FileInputStream(file)), metadata);
|
||||||
String mimetype = mt.toString();
|
String mimetype = mt.toString();
|
||||||
|
@@ -18,6 +18,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content.metadata;
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
@@ -31,9 +32,11 @@ import java.util.HashSet;
|
|||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.tika.io.TikaInputStream;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
@@ -109,6 +112,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
{
|
{
|
||||||
super(supportedMimeTypes);
|
super(supportedMimeTypes);
|
||||||
|
|
||||||
|
// TODO Once TIKA-451 is fixed this list will get nicer
|
||||||
this.tikaDateFormats = new DateFormat[] {
|
this.tikaDateFormats = new DateFormat[] {
|
||||||
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"),
|
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"),
|
||||||
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
|
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
|
||||||
@@ -116,6 +120,10 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US),
|
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US),
|
||||||
new SimpleDateFormat("yyyy-MM-dd"),
|
new SimpleDateFormat("yyyy-MM-dd"),
|
||||||
new SimpleDateFormat("yyyy-MM-dd", Locale.US),
|
new SimpleDateFormat("yyyy-MM-dd", Locale.US),
|
||||||
|
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"),
|
||||||
|
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US),
|
||||||
|
new SimpleDateFormat("yyyy/MM/dd"),
|
||||||
|
new SimpleDateFormat("yyyy/MM/dd", Locale.US),
|
||||||
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy"),
|
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy"),
|
||||||
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy", Locale.US)
|
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy", Locale.US)
|
||||||
};
|
};
|
||||||
@@ -169,6 +177,28 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* There seems to be some sort of issue with some downstream
|
||||||
|
* 3rd party libraries, and input streams that come from
|
||||||
|
* a {@link ContentReader}. This happens most often with
|
||||||
|
* JPEG and Tiff files.
|
||||||
|
* For these cases, buffer out to a local file if not
|
||||||
|
* already there
|
||||||
|
*/
|
||||||
|
private InputStream getInputStream(ContentReader reader) throws IOException {
|
||||||
|
if("image/jpeg".equals(reader.getMimetype()) ||
|
||||||
|
"image/tiff".equals(reader.getMimetype())) {
|
||||||
|
if(reader instanceof FileContentReader) {
|
||||||
|
return TikaInputStream.get( ((FileContentReader)reader).getFile() );
|
||||||
|
} else {
|
||||||
|
File tmpFile = File.createTempFile("tika", "tmp");
|
||||||
|
reader.getContent(tmpFile);
|
||||||
|
return TikaInputStream.get(tmpFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return reader.getContentInputStream();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||||
{
|
{
|
||||||
@@ -177,7 +207,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
is = reader.getContentInputStream();
|
is = getInputStream(reader);
|
||||||
Parser parser = getParser();
|
Parser parser = getParser();
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
ParseContext context = new ParseContext();
|
ParseContext context = new ParseContext();
|
||||||
|
BIN
source/test-resources/quick/quickGEO.jpg
Normal file
BIN
source/test-resources/quick/quickGEO.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
Reference in New Issue
Block a user