Files
alfresco-community-repo/config/alfresco/linkvalidation-service-context.xml
Jon Cox c5d97bfebe Generalized handling of URI schemas to be ignored during link validation.
These are configurable in linkvalidation-service-context.xml
Also ensured the list of links per file is sorted & unique.

 Here's a list of known protocols:

 http://www.iana.org/assignments/uri-schemes.html

      aaa aaas acap afs cap cid crid data dav dict dns dtn fax file
      ftp go gopher h323 http https iax2 icap im imap info ipp iris
      iris.beep iris.lwz iris.xpc iris.xpcs ldap mailserver mailto
      mid modem msrp msrps mtqp mupdate news nfs nntp opaquelocktoken
      pop pres prospero rtsp service shttp sip sips snmp soap.beep
      soap.beeps tag tel telnet tftp thismessage tip tn3270 tv urn
      vemmi wais xmlrpc.beep xmlrpc.beeps xmpp z39.50r z39.50s

 For now, all these URI schemes get a free pass except http & https. 
 Any URI not qualified by one of these protocol/scheme designators 
 is presumed to be broken.  It would be nice to validate ftp links
 for real, but that won't happen for a while.


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6189 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2007-07-07 02:58:22 +00:00

305 lines
12 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING/DTD BEAN//EN"
"http://www.springframework.org/dtd/spring-beans.dtd">
<beans>
<bean id="linkValidationServiceExcludeExtensionMatcher"
class="org.alfresco.repo.avm.util.FileExtensionNameMatcher">
<property name="extensions">
<list>
<value>.o</value>
<value>.bak</value>
<value>.tmp</value>
<value>.swp</value>
<value>~</value>
</list>
</property>
</bean>
<bean id="hrefBearingRequestPathNameMatcher"
class="org.alfresco.repo.avm.util.HrefBearingRequestPathNameMatcher">
<property name="extensions">
<list>
<!--
See also: http://filext.com/alphalist.php
and: HrefBearingRequestPathNameMatcher
Note: HrefBearingRequestPathNameMatcher does not require
a leading '.' (unlike FileExtensionNameMatcher),
nor does it suffer from performance problems when
the number of extensions is large. However, it
will not tolerate extensions with internal "." chars;
everything up to and including the final "." will
simply be ignored. Matching is not case sensitive.
-->
<value></value> <!-- null extension for dirs -->
<value>ahtml</value>
<value>ahtm</value>
<value>asphtml</value>
<value>asp</value>
<value>axs</value>
<value>bhtml</value>
<value>css</value>
<value>dci</value>
<value>dht</value>
<value>dochtml</value>
<value>docmhtml</value>
<value>ehtml</value>
<value>ephtml</value>
<value>fhtml</value>
<value>fphtml</value>
<value>hhtml</value>
<value>ht3</value>
<value>htc</value>
<value>htmls</value>
<value>html</value>
<value>htm</value>
<value>ihtml</value>
<value>jcs</value>
<value>jhtml</value>
<value>jhtm</value>
<value>jsp</value>
<value>log</value>
<value>mdhtml</value>
<value>mhtml</value>
<value>mhtm</value>
<value>mht</value>
<value>mml</value>
<value>php2</value>
<value>php3</value>
<value>php4</value>
<value>php5</value>
<value>php6</value>
<value>php7</value>
<value>php8</value>
<value>php9</value>
<value>php</value>
<value>phtml</value>
<value>phtml</value>
<value>phtm</value>
<value>pht</value>
<value>pl</value>
<value>ppthtml</value>
<value>pptm</value>
<value>pt</value>
<value>pubhtml</value>
<value>pubmhtml</value>
<value>rbx</value>
<value>rhtml</value>
<value>rmh</value>
<value>s1h</value>
<value>shtml3</value>
<value>shtml</value>
<value>shtm</value>
<value>sht</value>
<value>ssi</value>
<value>stml</value>
<value>stm</value>
<value>thtml</value>
<value>txt</value>
<value>whtek</value>
<value>xhtml</value>
<value>xhtml</value>
<value>xhtm</value>
<value>xhtm</value>
<value>xht</value>
<value>xlshtml</value>
<value>xlshtm</value>
<value>xlsmhtml</value>
<value>xml</value>
<value>xtml</value>
<value>ybhtm</value>
<!-- Add others here, if you'd like! -->
</list>
</property>
</bean>
<!--
The following URI schema types will not be validated,
and will not show up as "broken" during link checking.
If you want to exclude some URI schema (aka "protocol")
from link checking, add it to this list.
The reason that link checking doesn't simply just skip
anything that isn't http/https is that sometimes people
make typos such as "httpss://...", which is clearly a
nonsense protocol. Thus, the set of protocols that
are not checked is made explicit.
See also: http://tools.ietf.org/html/rfc3986
-->
<bean id="linkValidationServiceExcludeUriSchemeNameMatcher"
class="org.alfresco.repo.avm.util.UriSchemeNameMatcher">
<property name="extensions">
<list>
<value>aaa</value>
<value>aaas</value>
<value>acap</value>
<value>afs</value>
<value>cap</value>
<value>cid</value>
<value>crid</value>
<value>data</value>
<value>dav</value>
<value>dict</value>
<value>dns</value>
<value>dtn</value>
<value>fax</value>
<value>file</value>
<value>ftp</value>
<value>go</value>
<value>gopher</value>
<value>h323</value>
<!-- <value>http</value> NOT EXCLUDED -->
<!-- <value>https</value> NOT EXCLUDED -->
<value>iax2</value>
<value>icap</value>
<value>im</value>
<value>imap</value>
<value>info</value>
<value>ipp</value>
<value>iris</value>
<value>iris.beep</value>
<value>iris.lwz</value>
<value>iris.xpc</value>
<value>iris.xpcs</value>
<value>ldap</value>
<value>mailserver</value>
<value>mailto</value>
<value>mid</value>
<value>modem</value>
<value>msrp</value>
<value>msrps</value>
<value>mtqp</value>
<value>mupdate</value>
<value>news</value>
<value>nfs</value>
<value>nntp</value>
<value>opaquelocktoken</value>
<value>pop</value>
<value>pres</value>
<value>prospero</value>
<value>rtsp</value>
<value>service</value>
<value>shttp</value>
<value>sip</value>
<value>sips</value>
<value>snmp</value>
<value>soap.beep</value>
<value>soap.beeps</value>
<value>tag</value>
<value>tel</value>
<value>telnet</value>
<value>tftp</value>
<value>thismessage</value>
<value>tip</value>
<value>tn3270</value>
<value>tv</value>
<value>urn</value>
<value>vemmi</value>
<value>wais</value>
<value>xmlrpc.beep</value>
<value>xmlrpc.beeps</value>
<value>xmpp</value>
<value>z39.50r</value>
<value>z39.50s</value>
<!-- Add others here, if you'd like! -->
</list>
</property>
</bean>
<bean id="linkValidationService"
class="org.alfresco.linkvalidation.LinkValidationServiceImpl"
lazy-init="true">
<property name="attributeService">
<ref bean="AttributeService"/>
</property>
<property name="avmRemote">
<ref bean="avmRemote"/>
</property>
<property name="virtServerRegistry">
<ref bean="VirtServerRegistry"/>
</property>
<property name="AVMSyncService">
<ref bean="AVMSyncService"/>
</property>
<property name="excludePathMatcher">
<ref bean="linkValidationServiceExcludeExtensionMatcher"/>
</property>
<property name="hrefBearingRequestPathMatcher">
<ref bean="hrefBearingRequestPathNameMatcher"/>
</property>
<property name="excludeUriMatcher">
<ref bean="linkValidationServiceExcludeUriSchemeNameMatcher"/>
</property>
<property name="retryingTransactionHelper">
<ref bean="retryingTransactionHelper"/>
</property>
<property name="createVersionTxnListener">
<ref bean="createVersionTxnListener"/>
</property>
<property name="purgeVersionTxnListener">
<ref bean="purgeVersionTxnListener"/>
</property>
<property name="purgeStoreTxnListener">
<ref bean="purgeStoreTxnListener"/>
</property>
<!-- Poll interval to check getLatestSnapshotID (in milliseconds) -->
<property name="pollInterval" value="5000"/>
<!-- Timeouts (in milliseconds) -->
<property name="localConnectTimeout" value="10000"/>
<property name="remoteConnectTimeout" value="10000"/>
<property name="localReadTimeout" value="30000"/>
<property name="remoteReadTimeout" value="30000"/>
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<!-- Advanced Option: -->
<!-- -->
<!-- JSSE Security parameters (for validating HTTPS links) -->
<!-- -->
<!-- NOTE: -->
<!-- -->
<!-- When the following jsse properties below are commented -->
<!-- out the LinkValidationService uses the system's default -->
<!-- cacert file and password. Typically, there's no need to -->
<!-- change these values, but if you'd like to do so, you can. -->
<!-- -->
<!-- The most common case where you'd use a custom trust store -->
<!-- and password is when you want to validate https links for -->
<!-- which the name on the site's cert does not match the host -->
<!-- you're contacting, is invalid, and/or is not trusted. -->
<!-- If you do nothing, such links will appear "broken". -->
<!-- One option would be to modify your default cert at: -->
<!-- -->
<!-- Unix: ${JAVA_HOME}/lib/security/cacerts -->
<!-- Windows: ${JAVA_HOME}\lib\security\cacerts -->
<!-- -->
<!-- However, this is often not particularly desirable. -->
<!-- By allowing you to point at a custom cacert/password, -->
<!-- the configuration options below allow you to validate -->
<!-- what would be otherwise untrusted "broken" https links -->
<!-- without disturbing the system's default cacert file. -->
<!-- -->
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<!-- <property name="jsseTrustStoreFile" value="...???..."/> -->
<!-- <property name="jsseTrustStorePassword" value="...???..."/> -->
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<!-- Low-level parameters that typical users should never modify! -->
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<property name="purgeAllValidationDataOnBootstrap" value="false"/>
</bean>
</beans>