mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Update the MP3 extractor to output audio keys (related to ALF-6170), and refactor the audio extractors to share more common code. Also expands the audio extractor tests to share common code, and test more metadata. (Needed for devcon demo)
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@31013 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -224,7 +224,6 @@
|
||||
<bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.Mail" class="org.alfresco.repo.content.metadata.MailMetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.Html" class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.MP3" class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.OpenDocument" class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.DWG" class="org.alfresco.repo.content.metadata.DWGMetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.RFC822" class="org.alfresco.repo.content.metadata.RFC822MetadataExtracter" parent="baseMetadataExtracter" >
|
||||
@@ -235,6 +234,12 @@
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
<bean id="extracter.MP3" class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter">
|
||||
<property name="tikaConfig" ref="tikaConfig"/>
|
||||
</bean>
|
||||
<bean id="extracter.Audio" class="org.alfresco.repo.content.metadata.TikaAudioMetadataExtracter" parent="baseMetadataExtracter">
|
||||
<property name="tikaConfig" ref="tikaConfig"/>
|
||||
</bean>
|
||||
<bean id="extracter.OpenOffice" class="org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracter" parent="baseMetadataExtracter">
|
||||
<property name="worker">
|
||||
<ref bean="extracter.worker.OpenOffice" />
|
||||
|
@@ -662,7 +662,16 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
// Ask Tika to detect the document, and report back on if
|
||||
// the current mime type is plausible
|
||||
String typeErrorMessage = null;
|
||||
String differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader());
|
||||
String differentType = null;
|
||||
if(mimetypeService != null)
|
||||
{
|
||||
differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader());
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.info("Unable to verify mimetype of " + reader.getReader() +
|
||||
" as no MimetypeService available to " + getClass().getName());
|
||||
}
|
||||
if(differentType != null)
|
||||
{
|
||||
typeErrorMessage = "\n" +
|
||||
|
@@ -31,38 +31,35 @@ import org.apache.tika.parser.mp3.Mp3Parser;
|
||||
/**
|
||||
* Extracts the following values from MP3 files:
|
||||
* <pre>
|
||||
* <b>songTitle:</b> -- {music}songTitle, cm:title
|
||||
* <b>albumTitle:</b> -- {music}albumTitle
|
||||
* <b>artist:</b> -- {music}artist, cm:author
|
||||
* <b>songTitle:</b> -- cm:title
|
||||
* <b>albumTitle:</b> -- audio:album
|
||||
* <b>artist:</b> -- audio:artist, cm:author
|
||||
* <b>description:</b> -- cm:description
|
||||
* <b>comment:</b> -- {music}comment
|
||||
* <b>yearReleased:</b> -- {music}yearReleased
|
||||
* <b>trackNumber:</b> -- {music}trackNumber
|
||||
* <b>genre:</b> -- {music}genre
|
||||
* <b>composer:</b> -- {music}composer
|
||||
* <b>lyrics:</b> -- {music}lyrics
|
||||
* <b>comment:</b> --
|
||||
* <b>yearReleased:</b> -- audio:releaseDate
|
||||
* <b>trackNumber:</b> -- audio:trackNumber
|
||||
* <b>genre:</b> -- audio:genre
|
||||
* <b>composer:</b> -- audio:composer
|
||||
* <b>lyrics:</b> --
|
||||
* </pre>
|
||||
*
|
||||
* TODO Get hold of a mp3 file with some lyrics in it, so we
|
||||
* can contribute the patch to Tika
|
||||
* Note - XMPDM metadata keys are also emitted, in common with
|
||||
* the other Tika powered extracters
|
||||
*
|
||||
* Uses Apache Tika
|
||||
*
|
||||
* @author Nick Burch
|
||||
* @author Roy Wetherall
|
||||
*/
|
||||
public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter
|
||||
public class MP3MetadataExtracter extends TikaAudioMetadataExtracter
|
||||
{
|
||||
private static final String KEY_SONG_TITLE = "songTitle";
|
||||
private static final String KEY_ALBUM_TITLE = "albumTitle";
|
||||
private static final String KEY_ARTIST = "artist";
|
||||
private static final String KEY_DESCRIPTION = "description";
|
||||
private static final String KEY_COMMENT = "comment";
|
||||
private static final String KEY_YEAR_RELEASED = "yearReleased";
|
||||
private static final String KEY_TRACK_NUMBER = "trackNumber";
|
||||
private static final String KEY_GENRE = "genre";
|
||||
private static final String KEY_COMPOSER = "composer";
|
||||
private static final String KEY_LYRICS = "lyrics";
|
||||
|
||||
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
||||
new String[] { MimetypeMap.MIMETYPE_MP3 },
|
||||
@@ -82,6 +79,12 @@ public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter
|
||||
@Override
|
||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||
// Do the normal Audio mappings
|
||||
super.extractSpecific(metadata, properties, headers);
|
||||
|
||||
// Now do the compatibility ones
|
||||
// We only need these for people who had pre-existing mapping
|
||||
// properties from before the proper audio model was added
|
||||
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
|
||||
putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties);
|
||||
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
|
||||
@@ -90,41 +93,8 @@ public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter
|
||||
putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties);
|
||||
putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties);
|
||||
putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties);
|
||||
// TODO lyrics
|
||||
//putRawValue(KEY_LYRICS, getLyrics(), properties);
|
||||
|
||||
putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
|
||||
|
||||
// All done
|
||||
return properties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the description
|
||||
*
|
||||
* @param props the properties extracted from the file
|
||||
* @return the description
|
||||
*/
|
||||
private String generateDescription(Metadata metadata)
|
||||
{
|
||||
StringBuilder result = new StringBuilder();
|
||||
if (metadata.get(Metadata.TITLE) != null)
|
||||
{
|
||||
result.append(metadata.get(Metadata.TITLE));
|
||||
if (metadata.get(XMPDM.ALBUM) != null)
|
||||
{
|
||||
result
|
||||
.append(" - ")
|
||||
.append(metadata.get(XMPDM.ALBUM));
|
||||
}
|
||||
if (metadata.get(XMPDM.ARTIST) != null)
|
||||
{
|
||||
result
|
||||
.append(" (")
|
||||
.append(metadata.get(XMPDM.ARTIST))
|
||||
.append(")");
|
||||
}
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
|
@@ -5,8 +5,26 @@
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||
|
||||
# Mappings
|
||||
songTitle=cm:title
|
||||
artist=cm:author
|
||||
# Core mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
||||
|
||||
# Audio descriptive mappings
|
||||
xmpDM\:album=audio:album
|
||||
xmpDM\:artist=audio:artist
|
||||
xmpDM\:composer=audio:composer
|
||||
xmpDM\:engineer=audio:engineer
|
||||
xmpDM\:genre=audio:genre
|
||||
xmpDM\:trackNumber=audio:trackNumber
|
||||
xmpDM\:releaseDate=audio:releaseDate
|
||||
#xmpDM:logComment
|
||||
|
||||
# Audio specific mappings
|
||||
xmpDM\:audioSampleRate=audio:sampleRate
|
||||
xmpDM\:audioSampleType=audio:sampleType
|
||||
xmpDM\:audioChannelType=audio:channelType
|
||||
xmpDM\:audioCompressor=audio:compressor
|
||||
|
@@ -29,17 +29,15 @@ import org.alfresco.service.namespace.QName;
|
||||
/**
|
||||
* Test for the MP3 metadata extraction from id3 tags.
|
||||
*/
|
||||
public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest
|
||||
{
|
||||
private MP3MetadataExtracter extracter;
|
||||
private static final String ARTIST = "Hauskaz";
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = new MP3MetadataExtracter();
|
||||
extracter.setDictionaryService(dictionaryService);
|
||||
extracter = (MP3MetadataExtracter)ctx.getBean("extracter.MP3");
|
||||
extracter.register();
|
||||
}
|
||||
|
||||
@@ -64,6 +62,10 @@ public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_MP3);
|
||||
}
|
||||
@Override
|
||||
public void testOggExtraction() throws Exception {}
|
||||
@Override
|
||||
public void testFlacExtraction() throws Exception {}
|
||||
|
||||
/**
|
||||
* We don't have quite the usual metadata. Tests the descriptions one.
|
||||
@@ -93,23 +95,6 @@ public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
* Tests for various MP3 specific bits of metadata
|
||||
*/
|
||||
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) {
|
||||
// Pending ALF-6170 for proper music namespace
|
||||
// QName songTitle = QName.createQName("music","songTitle");
|
||||
// assertEquals(
|
||||
// "Property " + songTitle + " not found for mimetype " + mimetype,
|
||||
// QUICK_TITLE,
|
||||
// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(songTitle)));
|
||||
//
|
||||
// QName songArtist = QName.createQName("music","artist");
|
||||
// assertEquals(
|
||||
// "Property " + songArtist + " not found for mimetype " + mimetype,
|
||||
// ARTIST,
|
||||
// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(songArtist)));
|
||||
|
||||
// Description is a composite - check the artist part
|
||||
assertContains(
|
||||
"Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + ARTIST + " for mimetype " + mimetype,
|
||||
ARTIST,
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION)));
|
||||
super.testFileSpecificMetadata(mimetype, properties);
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.XMPDM;
|
||||
import org.apache.tika.parser.CompositeParser;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.gagravarr.tika.FlacParser;
|
||||
import org.gagravarr.tika.VorbisParser;
|
||||
|
||||
/**
|
||||
* A Metadata Extractor which makes use of the Apache
|
||||
* Tika Audio Parsers to extract metadata from your
|
||||
* media files.
|
||||
* For backwards compatibility reasons, this doesn't
|
||||
* handle the MP3 format, which has its own dedicated
|
||||
* extractor in {@link MP3MetadataExtracter}
|
||||
|
||||
* <pre>
|
||||
* <b>author:</b> -- cm:author
|
||||
* <b>title:</b> -- cm:title
|
||||
* <b>created:</b> -- cm:created
|
||||
* <b>xmpDM:artist</b> -- audio:artist
|
||||
* <b>xmpDM:composer</b> -- audio:composer
|
||||
* <b>xmpDM:engineer</b> -- audio:engineer
|
||||
* <b>xmpDM:genre</b> -- audio:genre
|
||||
* <b>xmpDM:trackNumber</b> -- audio:trackNumber
|
||||
* <b>xmpDM:releaseDate</b> -- audio:releaseDate
|
||||
* </pre>
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||
{
|
||||
protected static final String KEY_LYRICS = "lyrics";
|
||||
|
||||
private static Parser[] parsers = new Parser[] {
|
||||
new VorbisParser(),
|
||||
new FlacParser()
|
||||
};
|
||||
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
||||
new String[] { MimetypeMap.MIMETYPE_VORBIS, MimetypeMap.MIMETYPE_FLAC },
|
||||
parsers
|
||||
);
|
||||
|
||||
protected TikaConfig tikaConfig;
|
||||
public void setTikaConfig(TikaConfig tikaConfig)
|
||||
{
|
||||
this.tikaConfig = tikaConfig;
|
||||
}
|
||||
|
||||
public TikaAudioMetadataExtracter()
|
||||
{
|
||||
this(SUPPORTED_MIMETYPES);
|
||||
}
|
||||
public TikaAudioMetadataExtracter(ArrayList<String> supportedMimeTypes)
|
||||
{
|
||||
super(supportedMimeTypes);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Parser getParser() {
|
||||
return new CompositeParser(
|
||||
tikaConfig.getMediaTypeRegistry(), parsers
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||
// Most things can go with the default Tika -> Alfresco Mapping
|
||||
// Handle the few special cases here
|
||||
|
||||
// The description is special
|
||||
putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
|
||||
|
||||
// The release date can be fiddly
|
||||
Date releaseDate = generateReleaseDate(metadata);
|
||||
putRawValue(KEY_CREATED, releaseDate, properties);
|
||||
putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties);
|
||||
|
||||
// TODO Get the Lyrics from the content
|
||||
//putRawValue(KEY_LYRICS, getLyrics(), properties);
|
||||
|
||||
// All done
|
||||
return properties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the release date
|
||||
*/
|
||||
private Date generateReleaseDate(Metadata metadata)
|
||||
{
|
||||
String date = metadata.get(XMPDM.RELEASE_DATE);
|
||||
if(date == null || date.length() == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Is it just a year?
|
||||
if(date.matches("\\d\\d\\d\\d"))
|
||||
{
|
||||
// Just a year, we need a full date
|
||||
// Go for the 1st of the 1st
|
||||
Calendar c = Calendar.getInstance();
|
||||
c.set(
|
||||
Integer.parseInt(date), Calendar.JANUARY, 1,
|
||||
0, 0, 0
|
||||
);
|
||||
c.set(Calendar.MILLISECOND, 0);
|
||||
return c.getTime();
|
||||
}
|
||||
|
||||
// Treat as a normal date
|
||||
return makeDate(date);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the description
|
||||
*
|
||||
* @param props the properties extracted from the file
|
||||
* @return the description
|
||||
*/
|
||||
private String generateDescription(Metadata metadata)
|
||||
{
|
||||
StringBuilder result = new StringBuilder();
|
||||
if (metadata.get(Metadata.TITLE) != null)
|
||||
{
|
||||
result.append(metadata.get(Metadata.TITLE));
|
||||
if (metadata.get(XMPDM.ALBUM) != null)
|
||||
{
|
||||
result
|
||||
.append(" - ")
|
||||
.append(metadata.get(XMPDM.ALBUM));
|
||||
}
|
||||
if (metadata.get(XMPDM.ARTIST) != null)
|
||||
{
|
||||
result
|
||||
.append(" (")
|
||||
.append(metadata.get(XMPDM.ARTIST))
|
||||
.append(")");
|
||||
}
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
}
|
@@ -0,0 +1,34 @@
|
||||
#
|
||||
# TikaAudioMetadataExtracter - audio mapping
|
||||
#
|
||||
# This is used to map from the Tika audio metadata onto your
|
||||
# content model. This will be used for any Audio content
|
||||
# for which an explicit extractor isn't defined
|
||||
#
|
||||
# author: Nick Burch
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||
|
||||
# Core mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
||||
|
||||
# Audio descriptive mappings
|
||||
xmpDM\:album=audio:album
|
||||
xmpDM\:artist=audio:artist
|
||||
xmpDM\:composer=audio:composer
|
||||
xmpDM\:engineer=audio:engineer
|
||||
xmpDM\:genre=audio:genre
|
||||
xmpDM\:trackNumber=audio:trackNumber
|
||||
xmpDM\:releaseDate=audio:releaseDate
|
||||
#xmpDM:logComment
|
||||
|
||||
# Audio specific mappings
|
||||
xmpDM\:audioSampleRate=audio:sampleRate
|
||||
xmpDM\:audioSampleType=audio:sampleType
|
||||
xmpDM\:audioChannelType=audio:channelType
|
||||
xmpDM\:audioCompressor=audio:compressor
|
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
||||
import org.alfresco.service.namespace.NamespaceService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
* Test for the audio metadata extraction.
|
||||
*/
|
||||
public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
private TikaAudioMetadataExtracter extracter;
|
||||
private static final String ARTIST = "Hauskaz";
|
||||
private static final String ALBUM = "About a dog and a fox";
|
||||
private static final String GENRE = "Foxtrot";
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = (TikaAudioMetadataExtracter)ctx.getBean("extracter.Audio");
|
||||
extracter.register();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected MetadataExtracter getExtracter()
|
||||
{
|
||||
return extracter;
|
||||
}
|
||||
|
||||
public void testSupports() throws Exception
|
||||
{
|
||||
for (String mimetype : TikaAudioMetadataExtracter.SUPPORTED_MIMETYPES)
|
||||
{
|
||||
boolean supports = extracter.isSupported(mimetype);
|
||||
assertTrue("Mimetype should be supported: " + mimetype, supports);
|
||||
}
|
||||
}
|
||||
|
||||
public void testOggExtraction() throws Exception
|
||||
{
|
||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_VORBIS);
|
||||
}
|
||||
public void testFlacExtraction() throws Exception
|
||||
{
|
||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_FLAC);
|
||||
}
|
||||
|
||||
/**
|
||||
* We don't have quite the usual metadata. Tests the descriptions one.
|
||||
* Other tests in {@link #testFileSpecificMetadata(String, Map)}
|
||||
*/
|
||||
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties) {
|
||||
// Title is as normal
|
||||
assertEquals(
|
||||
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||
QUICK_TITLE,
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE)));
|
||||
// Has Author, not Creator, and is different
|
||||
assertEquals(
|
||||
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
|
||||
"Hauskaz",
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR)));
|
||||
|
||||
// Description is a composite
|
||||
assertContains(
|
||||
"Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + QUICK_TITLE + " for mimetype " + mimetype,
|
||||
QUICK_TITLE,
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION)));
|
||||
// Check rest of it later
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for various Audio specific bits of metadata
|
||||
*/
|
||||
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) {
|
||||
QName album = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "album");
|
||||
assertEquals(
|
||||
"Property " + album + " not found for mimetype " + mimetype,
|
||||
ALBUM,
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(album)));
|
||||
|
||||
QName artist = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "artist");
|
||||
assertEquals(
|
||||
"Property " + artist + " not found for mimetype " + mimetype,
|
||||
ARTIST,
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(artist)));
|
||||
|
||||
QName genre = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "genre");
|
||||
assertEquals(
|
||||
"Property " + genre + " not found for mimetype " + mimetype,
|
||||
GENRE,
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(genre)));
|
||||
|
||||
QName releaseDate = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "releaseDate");
|
||||
assertEquals(
|
||||
"Property " + releaseDate + " not found for mimetype " + mimetype,
|
||||
"2009-01-01T00:00:00.000Z",
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(releaseDate)));
|
||||
|
||||
QName channels = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "channelType");
|
||||
assertEquals(
|
||||
"Property " + channels + " not found for mimetype " + mimetype,
|
||||
"Stereo",
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(channels)));
|
||||
|
||||
|
||||
// Description is a composite - check the artist part
|
||||
assertContains(
|
||||
"Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + ARTIST + " for mimetype " + mimetype,
|
||||
ARTIST,
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION)));
|
||||
}
|
||||
}
|
@@ -11,6 +11,7 @@
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
|
||||
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
@@ -36,3 +37,16 @@ exif\:FNumber=exif:fNumber
|
||||
exif\:FocalLength=exif:focalLength
|
||||
exif\:IsoSpeedRatings=exif:isoSpeedRatings
|
||||
exif\:DateTimeOriginal=exif:dateTimeOriginal
|
||||
|
||||
xmpDM\:album=audio:album
|
||||
xmpDM\:artist=audio:artist
|
||||
xmpDM\:composer=audio:composer
|
||||
xmpDM\:engineer=audio:engineer
|
||||
xmpDM\:genre=audio:genre
|
||||
xmpDM\:trackNumber=audio:trackNumber
|
||||
xmpDM\:releaseDate=audio:releaseDate
|
||||
#xmpDM:logComment
|
||||
xmpDM\:audioSampleRate=audio:sampleRate
|
||||
xmpDM\:audioSampleType=audio:sampleType
|
||||
xmpDM\:audioChannelType=audio:channelType
|
||||
xmpDM\:audioCompressor=audio:compressor
|
||||
|
@@ -124,7 +124,6 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
//"2010.dwg", // Not auto-detected properly yet
|
||||
".pdf",
|
||||
".odt",
|
||||
".ogg"
|
||||
};
|
||||
|
||||
for (String fileBase : testFiles)
|
||||
|
@@ -88,18 +88,21 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
* Builds up a list of supported mime types by merging an explicit
|
||||
* list with any that Tika also claims to support
|
||||
*/
|
||||
protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser tikaParser) {
|
||||
protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers) {
|
||||
ArrayList<String> types = new ArrayList<String>();
|
||||
for(String type : explicitTypes) {
|
||||
if(!types.contains(type)) {
|
||||
types.add(type);
|
||||
}
|
||||
}
|
||||
if(tikaParser != null) {
|
||||
for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) {
|
||||
String type = mt.toString();
|
||||
if(!types.contains(type)) {
|
||||
types.add(type);
|
||||
if(tikaParsers != null) {
|
||||
for(Parser tikaParser : tikaParsers)
|
||||
{
|
||||
for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) {
|
||||
String type = mt.toString();
|
||||
if(!types.contains(type)) {
|
||||
types.add(type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -225,9 +228,11 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
{
|
||||
is = getInputStream(reader);
|
||||
Parser parser = getParser();
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext context = new ParseContext();
|
||||
|
||||
Metadata metadata = new Metadata();
|
||||
metadata.add(Metadata.CONTENT_TYPE, reader.getMimetype());
|
||||
|
||||
ContentHandler handler;
|
||||
Map<String,String> headers = null;
|
||||
if(needHeaderContents()) {
|
||||
|
Reference in New Issue
Block a user