Generalized handling of URI schemas to be ignored during link validation.

These are configurable in linkvalidation-service-context.xml
Also ensured the list of links per file is sorted & unique.

 Here's a list of known protocols:

 http://www.iana.org/assignments/uri-schemes.html

      aaa aaas acap afs cap cid crid data dav dict dns dtn fax file
      ftp go gopher h323 http https iax2 icap im imap info ipp iris
      iris.beep iris.lwz iris.xpc iris.xpcs ldap mailserver mailto
      mid modem msrp msrps mtqp mupdate news nfs nntp opaquelocktoken
      pop pres prospero rtsp service shttp sip sips snmp soap.beep
      soap.beeps tag tel telnet tftp thismessage tip tn3270 tv urn
      vemmi wais xmlrpc.beep xmlrpc.beeps xmpp z39.50r z39.50s

 For now, all these URI schemes get a free pass except http & https. 
 Any URI not qualified by one of these protocol/scheme designators 
 is presumed to be broken.  It would be nice to validate ftp links
 for real, but that won't happen for a while.


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6189 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Jon Cox
2007-07-07 02:58:22 +00:00
parent 4457c5fb1d
commit c5d97bfebe
2 changed files with 306 additions and 1 deletions

View File

@@ -118,6 +118,103 @@
</property>
</bean>
<!--
The following URI schema types will not be validated,
and will not show up as "broken" during link checking.
If you want to exclude some URI schema (aka "protocol")
from link checking, add it to this list.
The reason that link checking doesn't simply just skip
anything that isn't http/https is that sometimes people
make typos such as "httpss://...", which is clearly a
nonsense protocol. Thus, the set of protocols that
are not checked is made explicit.
See also: http://tools.ietf.org/html/rfc3986
-->
<bean id="linkValidationServiceExcludeUriSchemeNameMatcher"
class="org.alfresco.repo.avm.util.UriSchemeNameMatcher">
<property name="extensions">
<list>
<value>aaa</value>
<value>aaas</value>
<value>acap</value>
<value>afs</value>
<value>cap</value>
<value>cid</value>
<value>crid</value>
<value>data</value>
<value>dav</value>
<value>dict</value>
<value>dns</value>
<value>dtn</value>
<value>fax</value>
<value>file</value>
<value>ftp</value>
<value>go</value>
<value>gopher</value>
<value>h323</value>
<!-- <value>http</value> NOT EXCLUDED -->
<!-- <value>https</value> NOT EXCLUDED -->
<value>iax2</value>
<value>icap</value>
<value>im</value>
<value>imap</value>
<value>info</value>
<value>ipp</value>
<value>iris</value>
<value>iris.beep</value>
<value>iris.lwz</value>
<value>iris.xpc</value>
<value>iris.xpcs</value>
<value>ldap</value>
<value>mailserver</value>
<value>mailto</value>
<value>mid</value>
<value>modem</value>
<value>msrp</value>
<value>msrps</value>
<value>mtqp</value>
<value>mupdate</value>
<value>news</value>
<value>nfs</value>
<value>nntp</value>
<value>opaquelocktoken</value>
<value>pop</value>
<value>pres</value>
<value>prospero</value>
<value>rtsp</value>
<value>service</value>
<value>shttp</value>
<value>sip</value>
<value>sips</value>
<value>snmp</value>
<value>soap.beep</value>
<value>soap.beeps</value>
<value>tag</value>
<value>tel</value>
<value>telnet</value>
<value>tftp</value>
<value>thismessage</value>
<value>tip</value>
<value>tn3270</value>
<value>tv</value>
<value>urn</value>
<value>vemmi</value>
<value>wais</value>
<value>xmlrpc.beep</value>
<value>xmlrpc.beeps</value>
<value>xmpp</value>
<value>z39.50r</value>
<value>z39.50s</value>
<!-- Add others here, if you'd like! -->
</list>
</property>
</bean>
<bean id="linkValidationService"
class="org.alfresco.linkvalidation.LinkValidationServiceImpl"
lazy-init="true">
@@ -133,12 +230,15 @@
<property name="AVMSyncService">
<ref bean="AVMSyncService"/>
</property>
<property name="excludeMatcher">
<property name="excludePathMatcher">
<ref bean="linkValidationServiceExcludeExtensionMatcher"/>
</property>
<property name="hrefBearingRequestPathMatcher">
<ref bean="hrefBearingRequestPathNameMatcher"/>
</property>
<property name="excludeUriMatcher">
<ref bean="linkValidationServiceExcludeUriSchemeNameMatcher"/>
</property>
<property name="retryingTransactionHelper">
<ref bean="retryingTransactionHelper"/>
</property>