Upgrade Apache Tika to the latest snapshot, so that we get better date processing in metadata, and more EXIF based metadata

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@22250 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2010-09-03 17:21:23 +00:00
parent 7c79fcea58
commit bafa459acf

View File

@@ -31,6 +31,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.service.cmr.repository.ContentReader;
@@ -114,6 +115,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
// TODO Once TIKA-451 is fixed this list will get nicer
this.tikaDateFormats = new DateFormat[] {
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"),
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US),
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"),
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"),
@@ -127,6 +130,18 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy"),
new SimpleDateFormat("EEE MMM dd hh:mm:ss zzz yyyy", Locale.US)
};
// Set the timezone on the UTC based formats
for(DateFormat df : this.tikaDateFormats)
{
if(df instanceof SimpleDateFormat)
{
SimpleDateFormat sdf = (SimpleDateFormat)df;
if(sdf.toPattern().endsWith("'Z'"))
{
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
}
}
}
}
/**