mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-06-30 18:15:39 +00:00
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@18931 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
202 lines
6.3 KiB
Java
202 lines
6.3 KiB
Java
/*-----------------------------------------------------------------------------
|
|
* Copyright 2007-2010 Alfresco Software Limited.
|
|
*
|
|
* This file is part of Alfresco
|
|
*
|
|
* Alfresco is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Alfresco is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
*
|
|
* Author Jon Cox <jcox@alfresco.com>
|
|
* File UriSchemeNameMatcher.java
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
package org.alfresco.repo.avm.util;
|
|
|
|
import java.io.Serializable;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
|
|
import org.alfresco.util.NameMatcher;
|
|
|
|
/**
|
|
* A NameMatcher that matches an incoming URL against list of schemes
|
|
* (less formally known as "protocols"), case insensitively.
|
|
* The formal spec for parsing URIs is RFC-3986
|
|
* <p>
|
|
* Perhaps someday, it might be worthwhile to create a specific
|
|
* parser for each registered scheme-specific part, and validate
|
|
* that; for now, we'll just be be more lax, and assume the URI
|
|
* is alwasy scheme-qualified. This matcher will look no further
|
|
* than the leading colon, and declare "no match" otherwise.
|
|
* The discussion below explains why.
|
|
* <p>
|
|
* See: http://tools.ietf.org/html/rfc3986):
|
|
* <pre>
|
|
* The following regex parses URIs:
|
|
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
|
|
*
|
|
* Given the following URI:
|
|
* http://www.ics.uci.edu/pub/ietf/uri/#Related
|
|
*
|
|
* The captured subexpressions are:
|
|
*
|
|
* $1 = http:
|
|
* $2 = http
|
|
* $3 = //www.ics.uci.edu
|
|
* $4 = www.ics.uci.edu
|
|
* $5 = /pub/ietf/uri/
|
|
* $6 = <undefined>
|
|
* $7 = <undefined>
|
|
* $8 = #Related
|
|
* $9 = Related
|
|
*
|
|
* N0TE:
|
|
* A URI can be non-scheme qualified because $1 is optional. Therefore,
|
|
* the following are all exaples of valid non-scheme qualified URIS:
|
|
*
|
|
* ""
|
|
* "moo@cow.com"
|
|
* "moo@cow.com?wow"
|
|
* "moo@cow.com?wow#zow"
|
|
* "moo@cow.com#zow"
|
|
* "/"
|
|
* "/moo/cow"
|
|
* "/moo/cow?wow"
|
|
* "/moo/cow?wow#zow"
|
|
* "/moo/cow#zow"
|
|
* "//moo/cow"
|
|
* "//moo.com/cow"
|
|
* "//moo.com/cow/"
|
|
* "//moo.com/cow?wow"
|
|
* "//moo.com/cow?wow#zow"
|
|
* "//moo.com/cow#zow"
|
|
* "//moo.com:8080/cow"
|
|
* "//moo.com:/cow"
|
|
* "//moo.com:8080/cow?wow"
|
|
* "//moo.com:8080/cow?wow#zow"
|
|
* "//moo.com:8080/cow#zow"
|
|
* "///moo/cow"
|
|
* "///moo/cow?wow"
|
|
* "///moo/cow?wow#zow"
|
|
* "///moo/cow#zow"
|
|
*
|
|
* And so forth...
|
|
*
|
|
* <pre>
|
|
*
|
|
* Thus the business end of things as far as scheme matching is: $2,
|
|
* Most schemes will have a $3 that starts with '//', but not all.
|
|
* Specificially, the following have no "network path '//' segment,
|
|
* or aren't required to (source: http://en.wikipedia.org/wiki/URI_scheme):
|
|
* <pre>
|
|
*
|
|
* cid data dns fax go h323 iax2 mailto mid news pres sip
|
|
* sips tel urn xmpp about aim callto feed magnet msnim
|
|
* psyc skype sms stream xfire ymsgr
|
|
*
|
|
* </pre>
|
|
*
|
|
* Visually the parts are as follows:
|
|
* <pre>
|
|
*
|
|
* foo://example.com:10042/over/there?name=ferret#nose
|
|
* \_/ \_______________/\_________/ \_________/ \__/
|
|
* | | | | |
|
|
* scheme authority path query fragment
|
|
* | _____________________|__
|
|
* / \ / \
|
|
* urn:example:animal:ferret:nose
|
|
*
|
|
* </pre>
|
|
*
|
|
* This is useful for classifying URLs for things like whether or not
|
|
* they're supported by an application.
|
|
*
|
|
* For example, the LinkValidationService supports http, and https,
|
|
* is willing to ignore certain well-formed URLs, but treats URLs
|
|
* will unknown and unsupported protocols as broken. Concretely,
|
|
* we'd like to avoid treating something like the following one
|
|
* as being non-broken even though you can't apply GET or HEAD
|
|
* to it.
|
|
*
|
|
* <pre>
|
|
* <a href="mailto:alice@example.com">Email</a>
|
|
* </pre>
|
|
*
|
|
* As of June 2007,IANA had over 70 registered and provisional protocols
|
|
* listed at http://www.iana.org/assignments/uri-schemes.html but sometimes
|
|
* people create their own too (e.g.: cvs). Here's the official list:
|
|
* <pre>
|
|
*
|
|
* aaa aaas acap afs cap cid crid data dav dict dns dtn fax file
|
|
* ftp go gopher h323 http https iax2 icap im imap info ipp iris
|
|
* iris.beep iris.lwz iris.xpc iris.xpcs ldap mailserver mailto
|
|
* mid modem msrp msrps mtqp mupdate news nfs nntp opaquelocktoken
|
|
* pop pres prospero rtsp service shttp sip sips snmp soap.beep
|
|
* soap.beeps tag tel telnet tftp thismessage tip tn3270 tv urn
|
|
* vemmi wais xmlrpc.beep xmlrpc.beeps xmpp z39.50r z39.50s
|
|
* </pre>
|
|
*
|
|
*/
|
|
public class UriSchemeNameMatcher implements NameMatcher, Serializable
|
|
{
|
|
/**
|
|
* The extensions to match.
|
|
*/
|
|
HashMap<String,String> scheme_;
|
|
|
|
/**
|
|
* Default constructor.
|
|
*/
|
|
public UriSchemeNameMatcher()
|
|
{
|
|
scheme_ = new HashMap<String,String>();
|
|
}
|
|
|
|
/**
|
|
* Set the protocols case insensitively (cannonicalized to lower-case).
|
|
*
|
|
* @param protocols
|
|
*/
|
|
public void setExtensions(List<String> protocols)
|
|
{
|
|
for (String protocol : protocols)
|
|
{
|
|
scheme_.put( protocol.toLowerCase(), null );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if the URL's protocol is in the of
|
|
* being matched. Everything up to but not including
|
|
* the intial colon is
|
|
*/
|
|
public boolean matches(String uri)
|
|
{
|
|
if ( uri == null ) { return false; }
|
|
|
|
int colon_index = uri.indexOf(':');
|
|
|
|
if ( colon_index >= 0)
|
|
{
|
|
String proto =
|
|
uri.substring(0, colon_index).toLowerCase();
|
|
|
|
return scheme_.containsKey( proto );
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|