Merged HEAD-BUG-FIX (4.3/Cloud) to HEAD (4.3/Cloud)

68539: Merged V4.2-BUG-FIX (4.2.3) to HEAD-BUG-FIX (4.3/Cloud)
      68468: MNT-11350: Upgrade of Tika to 1.6-SNAPSHOT
         - Disabled parsing of embedded images in metadata extraction as well


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@70421 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Alan Davis
2014-05-16 16:38:52 +00:00
parent da9ce86dfe
commit 09ac1c52ff

View File

@@ -260,13 +260,26 @@
</property> </property>
</bean> </bean>
<bean id="pdfBoxEmbededDocumentSelector"
class="org.alfresco.repo.content.metadata.MediaTypeDisablingDocumentSelector">
<property name="disabledMediaTypes">
<list>
<value>image/jpeg</value>
<value>image/tiff</value>
<value>image/png</value>
</list>
</property>
</bean>
<!-- Content Metadata Extractors --> <!-- Content Metadata Extractors -->
<!-- The last one listed for any mimetype will be used if available --> <!-- The last one listed for any mimetype will be used if available -->
<!-- As such, the Tika auto-detect fallback should be listed first --> <!-- As such, the Tika auto-detect fallback should be listed first -->
<bean id="extracter.TikaAuto" class="org.alfresco.repo.content.metadata.TikaAutoMetadataExtracter" parent="baseMetadataExtracter"> <bean id="extracter.TikaAuto" class="org.alfresco.repo.content.metadata.TikaAutoMetadataExtracter" parent="baseMetadataExtracter">
<constructor-arg><ref bean="tikaConfig"/></constructor-arg> <constructor-arg><ref bean="tikaConfig"/></constructor-arg>
</bean> </bean>
<bean id="extracter.PDFBox" class="org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter" parent="baseMetadataExtracter" /> <bean id="extracter.PDFBox" class="org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter" parent="baseMetadataExtracter">
<property name="documentSelector" ref="pdfBoxEmbededDocumentSelector" />
</bean>
<bean id="extracter.Poi" class="org.alfresco.repo.content.metadata.PoiMetadataExtracter" parent="baseMetadataExtracter" /> <bean id="extracter.Poi" class="org.alfresco.repo.content.metadata.PoiMetadataExtracter" parent="baseMetadataExtracter" />
<bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" /> <bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" />
<bean id="extracter.Mail" class="org.alfresco.repo.content.metadata.MailMetadataExtracter" parent="baseMetadataExtracter" /> <bean id="extracter.Mail" class="org.alfresco.repo.content.metadata.MailMetadataExtracter" parent="baseMetadataExtracter" />
@@ -462,17 +475,7 @@
<bean id="transformer.PdfBox" <bean id="transformer.PdfBox"
class="org.alfresco.repo.content.transform.PdfBoxContentTransformer" class="org.alfresco.repo.content.transform.PdfBoxContentTransformer"
parent="baseContentTransformer" > parent="baseContentTransformer" >
<property name="documentSelector"> <property name="documentSelector" ref="pdfBoxEmbededDocumentSelector" />
<bean class="org.alfresco.repo.content.metadata.MediaTypeDisablingDocumentSelector">
<property name="disabledMediaTypes">
<list>
<value>image/jpeg</value>
<value>image/tiff</value>
<value>image/png</value>
</list>
</property>
</bean>
</property>
</bean> </bean>
<bean id="transformer.PdfBox.TextToPdf" <bean id="transformer.PdfBox.TextToPdf"