diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml index f055ac04b1..9bec1b8025 100644 --- a/config/alfresco/content-services-context.xml +++ b/config/alfresco/content-services-context.xml @@ -224,7 +224,6 @@ - @@ -235,6 +234,12 @@ + + + + + + diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java index 2438ef8631..02d40a0527 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java @@ -662,7 +662,16 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac // Ask Tika to detect the document, and report back on if // the current mime type is plausible String typeErrorMessage = null; - String differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader()); + String differentType = null; + if(mimetypeService != null) + { + differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader()); + } + else + { + logger.info("Unable to verify mimetype of " + reader.getReader() + + " as no MimetypeService available to " + getClass().getName()); + } if(differentType != null) { typeErrorMessage = "\n" + diff --git a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java index 1cb213f75e..5368a76fc9 100644 --- a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java @@ -31,38 +31,35 @@ import org.apache.tika.parser.mp3.Mp3Parser; /** * Extracts the following values from MP3 files: *
- *   songTitle:              --      {music}songTitle, cm:title
- *   albumTitle:             --      {music}albumTitle
- *   artist:                 --      {music}artist, cm:author
+ *   songTitle:              --      cm:title
+ *   albumTitle:             --      audio:album
+ *   artist:                 --      audio:artist, cm:author
  *   description:            --      cm:description
- *   comment:                --      {music}comment
- *   yearReleased:           --      {music}yearReleased
- *   trackNumber:            --      {music}trackNumber
- *   genre:                  --      {music}genre
- *   composer:               --      {music}composer
- *   lyrics:                 --      {music}lyrics
+ *   comment:                --      
+ *   yearReleased:           --      audio:releaseDate
+ *   trackNumber:            --      audio:trackNumber
+ *   genre:                  --      audio:genre
+ *   composer:               --      audio:composer
+ *   lyrics:                 --      
  * 
* - * TODO Get hold of a mp3 file with some lyrics in it, so we - * can contribute the patch to Tika + * Note - XMPDM metadata keys are also emitted, in common with + * the other Tika powered extracters * * Uses Apache Tika * * @author Nick Burch - * @author Roy Wetherall */ -public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter +public class MP3MetadataExtracter extends TikaAudioMetadataExtracter { private static final String KEY_SONG_TITLE = "songTitle"; private static final String KEY_ALBUM_TITLE = "albumTitle"; private static final String KEY_ARTIST = "artist"; - private static final String KEY_DESCRIPTION = "description"; private static final String KEY_COMMENT = "comment"; private static final String KEY_YEAR_RELEASED = "yearReleased"; private static final String KEY_TRACK_NUMBER = "trackNumber"; private static final String KEY_GENRE = "genre"; private static final String KEY_COMPOSER = "composer"; - private static final String KEY_LYRICS = "lyrics"; public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( new String[] { MimetypeMap.MIMETYPE_MP3 }, @@ -82,6 +79,12 @@ public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter @Override protected Map extractSpecific(Metadata metadata, Map properties, Map headers) { + // Do the normal Audio mappings + super.extractSpecific(metadata, properties, headers); + + // Now do the compatibility ones + // We only need these for people who had pre-existing mapping + // properties from before the proper audio model was added putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties); putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties); putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties); @@ -90,41 +93,8 @@ public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties); putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties); putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties); - // TODO lyrics - //putRawValue(KEY_LYRICS, getLyrics(), properties); - - putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties); - + + // All done return properties; } - - /** - * Generate the description - * - * @param props the properties extracted from the file - * @return the description - */ - private String generateDescription(Metadata metadata) - { - StringBuilder result = new StringBuilder(); - if (metadata.get(Metadata.TITLE) != null) - { - result.append(metadata.get(Metadata.TITLE)); - if (metadata.get(XMPDM.ALBUM) != null) - { - result - .append(" - ") - .append(metadata.get(XMPDM.ALBUM)); - } - if (metadata.get(XMPDM.ARTIST) != null) - { - result - .append(" (") - .append(metadata.get(XMPDM.ARTIST)) - .append(")"); - } - } - - return result.toString(); - } } diff --git a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties index 89d14b3867..eba36d7d57 100644 --- a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties +++ b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties @@ -5,8 +5,26 @@ # Namespaces namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 +namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0 -# Mappings -songTitle=cm:title -artist=cm:author -description=cm:description \ No newline at end of file +# Core mappings +author=cm:author +title=cm:title +description=cm:description +created=cm:created + +# Audio descriptive mappings +xmpDM\:album=audio:album +xmpDM\:artist=audio:artist +xmpDM\:composer=audio:composer +xmpDM\:engineer=audio:engineer +xmpDM\:genre=audio:genre +xmpDM\:trackNumber=audio:trackNumber +xmpDM\:releaseDate=audio:releaseDate +#xmpDM:logComment + +# Audio specific mappings +xmpDM\:audioSampleRate=audio:sampleRate +xmpDM\:audioSampleType=audio:sampleType +xmpDM\:audioChannelType=audio:channelType +xmpDM\:audioCompressor=audio:compressor diff --git a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java index 39863b42f0..2262426abb 100644 --- a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java @@ -29,17 +29,15 @@ import org.alfresco.service.namespace.QName; /** * Test for the MP3 metadata extraction from id3 tags. */ -public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest +public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest { private MP3MetadataExtracter extracter; - private static final String ARTIST = "Hauskaz"; - + @Override public void setUp() throws Exception { super.setUp(); - extracter = new MP3MetadataExtracter(); - extracter.setDictionaryService(dictionaryService); + extracter = (MP3MetadataExtracter)ctx.getBean("extracter.MP3"); extracter.register(); } @@ -64,6 +62,10 @@ public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest { testExtractFromMimetype(MimetypeMap.MIMETYPE_MP3); } + @Override + public void testOggExtraction() throws Exception {} + @Override + public void testFlacExtraction() throws Exception {} /** * We don't have quite the usual metadata. Tests the descriptions one. @@ -93,23 +95,6 @@ public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest * Tests for various MP3 specific bits of metadata */ public void testFileSpecificMetadata(String mimetype, Map properties) { - // Pending ALF-6170 for proper music namespace -// QName songTitle = QName.createQName("music","songTitle"); -// assertEquals( -// "Property " + songTitle + " not found for mimetype " + mimetype, -// QUICK_TITLE, -// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(songTitle))); -// -// QName songArtist = QName.createQName("music","artist"); -// assertEquals( -// "Property " + songArtist + " not found for mimetype " + mimetype, -// ARTIST, -// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(songArtist))); - - // Description is a composite - check the artist part - assertContains( - "Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + ARTIST + " for mimetype " + mimetype, - ARTIST, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); + super.testFileSpecificMetadata(mimetype, properties); } } diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java new file mode 100644 index 0000000000..ca2d7ec8b4 --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2005-2010 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.repo.content.metadata; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.Map; + +import org.alfresco.repo.content.MimetypeMap; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.XMPDM; +import org.apache.tika.parser.CompositeParser; +import org.apache.tika.parser.Parser; +import org.gagravarr.tika.FlacParser; +import org.gagravarr.tika.VorbisParser; + +/** + * A Metadata Extractor which makes use of the Apache + * Tika Audio Parsers to extract metadata from your + * media files. + * For backwards compatibility reasons, this doesn't + * handle the MP3 format, which has its own dedicated + * extractor in {@link MP3MetadataExtracter} + + *
+ *   author:                 --      cm:author
+ *   title:                  --      cm:title
+ *   created:                --      cm:created
+ *   xmpDM:artist            --      audio:artist
+ *   xmpDM:composer          --      audio:composer
+ *   xmpDM:engineer          --      audio:engineer
+ *   xmpDM:genre             --      audio:genre
+ *   xmpDM:trackNumber       --      audio:trackNumber
+ *   xmpDM:releaseDate       --      audio:releaseDate
+ * 
+ * + * @author Nick Burch + */ +public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter +{ + protected static final String KEY_LYRICS = "lyrics"; + + private static Parser[] parsers = new Parser[] { + new VorbisParser(), + new FlacParser() + }; + public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( + new String[] { MimetypeMap.MIMETYPE_VORBIS, MimetypeMap.MIMETYPE_FLAC }, + parsers + ); + + protected TikaConfig tikaConfig; + public void setTikaConfig(TikaConfig tikaConfig) + { + this.tikaConfig = tikaConfig; + } + + public TikaAudioMetadataExtracter() + { + this(SUPPORTED_MIMETYPES); + } + public TikaAudioMetadataExtracter(ArrayList supportedMimeTypes) + { + super(supportedMimeTypes); + } + + @Override + protected Parser getParser() { + return new CompositeParser( + tikaConfig.getMediaTypeRegistry(), parsers + ); + } + + @Override + protected Map extractSpecific(Metadata metadata, + Map properties, Map headers) { + // Most things can go with the default Tika -> Alfresco Mapping + // Handle the few special cases here + + // The description is special + putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties); + + // The release date can be fiddly + Date releaseDate = generateReleaseDate(metadata); + putRawValue(KEY_CREATED, releaseDate, properties); + putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties); + + // TODO Get the Lyrics from the content + //putRawValue(KEY_LYRICS, getLyrics(), properties); + + // All done + return properties; + } + + /** + * Generates the release date + */ + private Date generateReleaseDate(Metadata metadata) + { + String date = metadata.get(XMPDM.RELEASE_DATE); + if(date == null || date.length() == 0) + { + return null; + } + + // Is it just a year? + if(date.matches("\\d\\d\\d\\d")) + { + // Just a year, we need a full date + // Go for the 1st of the 1st + Calendar c = Calendar.getInstance(); + c.set( + Integer.parseInt(date), Calendar.JANUARY, 1, + 0, 0, 0 + ); + c.set(Calendar.MILLISECOND, 0); + return c.getTime(); + } + + // Treat as a normal date + return makeDate(date); + } + + /** + * Generate the description + * + * @param props the properties extracted from the file + * @return the description + */ + private String generateDescription(Metadata metadata) + { + StringBuilder result = new StringBuilder(); + if (metadata.get(Metadata.TITLE) != null) + { + result.append(metadata.get(Metadata.TITLE)); + if (metadata.get(XMPDM.ALBUM) != null) + { + result + .append(" - ") + .append(metadata.get(XMPDM.ALBUM)); + } + if (metadata.get(XMPDM.ARTIST) != null) + { + result + .append(" (") + .append(metadata.get(XMPDM.ARTIST)) + .append(")"); + } + } + + return result.toString(); + } +} diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.properties new file mode 100644 index 0000000000..542a71ce8f --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.properties @@ -0,0 +1,34 @@ +# +# TikaAudioMetadataExtracter - audio mapping +# +# This is used to map from the Tika audio metadata onto your +# content model. This will be used for any Audio content +# for which an explicit extractor isn't defined +# +# author: Nick Burch + +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 +namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0 + +# Core mappings +author=cm:author +title=cm:title +description=cm:description +created=cm:created + +# Audio descriptive mappings +xmpDM\:album=audio:album +xmpDM\:artist=audio:artist +xmpDM\:composer=audio:composer +xmpDM\:engineer=audio:engineer +xmpDM\:genre=audio:genre +xmpDM\:trackNumber=audio:trackNumber +xmpDM\:releaseDate=audio:releaseDate +#xmpDM:logComment + +# Audio specific mappings +xmpDM\:audioSampleRate=audio:sampleRate +xmpDM\:audioSampleType=audio:sampleType +xmpDM\:audioChannelType=audio:channelType +xmpDM\:audioCompressor=audio:compressor diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java new file mode 100644 index 0000000000..5b41104138 --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2005 Jesper Steen Møller + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.repo.content.metadata; + +import java.io.Serializable; +import java.util.Map; + +import org.alfresco.model.ContentModel; +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; +import org.alfresco.service.namespace.NamespaceService; +import org.alfresco.service.namespace.QName; + +/** + * Test for the audio metadata extraction. + */ +public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTest +{ + private TikaAudioMetadataExtracter extracter; + private static final String ARTIST = "Hauskaz"; + private static final String ALBUM = "About a dog and a fox"; + private static final String GENRE = "Foxtrot"; + + @Override + public void setUp() throws Exception + { + super.setUp(); + extracter = (TikaAudioMetadataExtracter)ctx.getBean("extracter.Audio"); + extracter.register(); + } + + /** + * @return Returns the same transformer regardless - it is allowed + */ + protected MetadataExtracter getExtracter() + { + return extracter; + } + + public void testSupports() throws Exception + { + for (String mimetype : TikaAudioMetadataExtracter.SUPPORTED_MIMETYPES) + { + boolean supports = extracter.isSupported(mimetype); + assertTrue("Mimetype should be supported: " + mimetype, supports); + } + } + + public void testOggExtraction() throws Exception + { + testExtractFromMimetype(MimetypeMap.MIMETYPE_VORBIS); + } + public void testFlacExtraction() throws Exception + { + testExtractFromMimetype(MimetypeMap.MIMETYPE_FLAC); + } + + /** + * We don't have quite the usual metadata. Tests the descriptions one. + * Other tests in {@link #testFileSpecificMetadata(String, Map)} + */ + protected void testCommonMetadata(String mimetype, Map properties) { + // Title is as normal + assertEquals( + "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, + QUICK_TITLE, + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE))); + // Has Author, not Creator, and is different + assertEquals( + "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, + "Hauskaz", + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); + + // Description is a composite + assertContains( + "Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + QUICK_TITLE + " for mimetype " + mimetype, + QUICK_TITLE, + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); + // Check rest of it later + } + + /** + * Tests for various Audio specific bits of metadata + */ + public void testFileSpecificMetadata(String mimetype, Map properties) { + QName album = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "album"); + assertEquals( + "Property " + album + " not found for mimetype " + mimetype, + ALBUM, + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(album))); + + QName artist = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "artist"); + assertEquals( + "Property " + artist + " not found for mimetype " + mimetype, + ARTIST, + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(artist))); + + QName genre = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "genre"); + assertEquals( + "Property " + genre + " not found for mimetype " + mimetype, + GENRE, + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(genre))); + + QName releaseDate = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "releaseDate"); + assertEquals( + "Property " + releaseDate + " not found for mimetype " + mimetype, + "2009-01-01T00:00:00.000Z", + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(releaseDate))); + + QName channels = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "channelType"); + assertEquals( + "Property " + channels + " not found for mimetype " + mimetype, + "Stereo", + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(channels))); + + + // Description is a composite - check the artist part + assertContains( + "Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + ARTIST + " for mimetype " + mimetype, + ARTIST, + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); + } +} diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties index 68cc261647..6982bb96d9 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties +++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.properties @@ -11,6 +11,7 @@ # Namespaces namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0 +namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0 # Mappings author=cm:author @@ -35,4 +36,17 @@ exif\:ExposureTime=exif:exposureTime exif\:FNumber=exif:fNumber exif\:FocalLength=exif:focalLength exif\:IsoSpeedRatings=exif:isoSpeedRatings -exif\:DateTimeOriginal=exif:dateTimeOriginal \ No newline at end of file +exif\:DateTimeOriginal=exif:dateTimeOriginal + +xmpDM\:album=audio:album +xmpDM\:artist=audio:artist +xmpDM\:composer=audio:composer +xmpDM\:engineer=audio:engineer +xmpDM\:genre=audio:genre +xmpDM\:trackNumber=audio:trackNumber +xmpDM\:releaseDate=audio:releaseDate +#xmpDM:logComment +xmpDM\:audioSampleRate=audio:sampleRate +xmpDM\:audioSampleType=audio:sampleType +xmpDM\:audioChannelType=audio:channelType +xmpDM\:audioCompressor=audio:compressor diff --git a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java index 846fa21ea9..d679f807a6 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java @@ -124,7 +124,6 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest //"2010.dwg", // Not auto-detected properly yet ".pdf", ".odt", - ".ogg" }; for (String fileBase : testFiles) diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index cd0a59f253..bfb88aabeb 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -88,18 +88,21 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada * Builds up a list of supported mime types by merging an explicit * list with any that Tika also claims to support */ - protected static ArrayList buildSupportedMimetypes(String[] explicitTypes, Parser tikaParser) { + protected static ArrayList buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers) { ArrayList types = new ArrayList(); for(String type : explicitTypes) { if(!types.contains(type)) { types.add(type); } } - if(tikaParser != null) { - for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) { - String type = mt.toString(); - if(!types.contains(type)) { - types.add(type); + if(tikaParsers != null) { + for(Parser tikaParser : tikaParsers) + { + for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) { + String type = mt.toString(); + if(!types.contains(type)) { + types.add(type); + } } } } @@ -225,9 +228,11 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada { is = getInputStream(reader); Parser parser = getParser(); - Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); + Metadata metadata = new Metadata(); + metadata.add(Metadata.CONTENT_TYPE, reader.getMimetype()); + ContentHandler handler; Map headers = null; if(needHeaderContents()) {