mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Extended Path parsing ...
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2111 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -29,8 +29,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
||||
/**
|
||||
* @author andyh
|
||||
*
|
||||
* TODO To change the template for this generated type comment go to Window -
|
||||
* Preferences - Java - Code Style - Code Templates
|
||||
* TODO To change the template for this generated type comment go to Window - Preferences - Java - Code Style - Code Templates
|
||||
*/
|
||||
public class PathTokenFilter extends Tokenizer
|
||||
{
|
||||
@@ -54,6 +53,8 @@ public class PathTokenFilter extends Tokenizer
|
||||
|
||||
public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE = "PATH_ELEMENT_NAMESPACE";
|
||||
|
||||
public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX = "PATH_ELEMENT_NAMESPACE_PREFIX";
|
||||
|
||||
char pathSeparator;
|
||||
|
||||
String separatorTokenText;
|
||||
@@ -68,6 +69,8 @@ public class PathTokenFilter extends Tokenizer
|
||||
|
||||
int nsEndDelimiterLength;
|
||||
|
||||
char nsPrefixDelimiter = ':';
|
||||
|
||||
LinkedList<Token> tokens = new LinkedList<Token>();
|
||||
|
||||
Iterator<Token> it = null;
|
||||
@@ -130,11 +133,12 @@ public class PathTokenFilter extends Tokenizer
|
||||
{
|
||||
String text = t.termText();
|
||||
|
||||
if((text.length() == 0) || text.equals(pathSeparator))
|
||||
if (text.length() == 0)
|
||||
{
|
||||
break;
|
||||
continue; // Skip if we find // or /; or ;; etc
|
||||
}
|
||||
|
||||
|
||||
if (text.charAt(text.length() - 1) == pathSeparator)
|
||||
{
|
||||
text = text.substring(0, text.length() - 1);
|
||||
@@ -144,11 +148,19 @@ public class PathTokenFilter extends Tokenizer
|
||||
}
|
||||
|
||||
int split = -1;
|
||||
boolean isPrefix = false;
|
||||
|
||||
if ((text.length() > 0) && (text.charAt(0) == nsStartDelimiter))
|
||||
{
|
||||
split = text.indexOf(nsEndDelimiter);
|
||||
}
|
||||
|
||||
if (split == -1)
|
||||
{
|
||||
split = text.indexOf(nsPrefixDelimiter);
|
||||
isPrefix = true;
|
||||
}
|
||||
|
||||
if (split == -1)
|
||||
{
|
||||
namespaceToken = new Token(noNsTokenText, t.startOffset(), t.startOffset(),
|
||||
@@ -158,10 +170,21 @@ public class PathTokenFilter extends Tokenizer
|
||||
}
|
||||
else
|
||||
{
|
||||
namespaceToken = new Token(text.substring(nsStartDelimiterLength, (split + nsEndDelimiterLength - 1)),
|
||||
t.startOffset(), t.startOffset() + split, TOKEN_TYPE_PATH_ELEMENT_NAMESPACE);
|
||||
nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset() + split
|
||||
+ nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
|
||||
if (isPrefix)
|
||||
{
|
||||
namespaceToken = new Token(text.substring(0, split), t.startOffset(), t.startOffset() + split,
|
||||
TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX);
|
||||
nameToken = new Token(text.substring(split + 1), t.startOffset()
|
||||
+ split + 1, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
|
||||
}
|
||||
else
|
||||
{
|
||||
namespaceToken = new Token(text.substring(nsStartDelimiterLength,
|
||||
(split + nsEndDelimiterLength - 1)), t.startOffset(), t.startOffset() + split,
|
||||
TOKEN_TYPE_PATH_ELEMENT_NAMESPACE);
|
||||
nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset()
|
||||
+ split + nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
namespaceToken.setPositionIncrement(1);
|
||||
@@ -190,7 +213,6 @@ public class PathTokenFilter extends Tokenizer
|
||||
|
||||
pathSplitToken = null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
String countString = nf.format(lengthCounter);
|
||||
@@ -238,6 +260,12 @@ public class PathTokenFilter extends Tokenizer
|
||||
{
|
||||
return new Token(buffer.toString(), start, readerPosition - 1, "QNAME");
|
||||
}
|
||||
else if (!inNameSpace && (c == ';'))
|
||||
{
|
||||
buffer.append(c);
|
||||
return new Token(buffer.toString(), start, readerPosition , "LASTQNAME");
|
||||
}
|
||||
|
||||
buffer.append(c);
|
||||
}
|
||||
readerPosition = -1;
|
||||
@@ -250,6 +278,5 @@ public class PathTokenFilter extends Tokenizer
|
||||
throw new IllegalStateException("QName terminated incorrectly: " + buffer.toString());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Alfresco, Inc.
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public class PathTokenFilterTest extends TestCase
|
||||
{
|
||||
|
||||
public PathTokenFilterTest()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
public PathTokenFilterTest(String arg0)
|
||||
{
|
||||
super(arg0);
|
||||
}
|
||||
|
||||
|
||||
public void testFullPath() throws IOException
|
||||
{
|
||||
tokenise("{uri1}one", new String[]{"uri1", "one"});
|
||||
tokenise("/{uri1}one", new String[]{"uri1", "one"});
|
||||
tokenise("{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("/{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
tokenise("/{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
try
|
||||
{
|
||||
tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
}
|
||||
catch(IllegalStateException ise)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void testPrefixPath() throws IOException
|
||||
{
|
||||
tokenise("uri1:one", new String[]{"uri1", "one"});
|
||||
tokenise("/uri1:one", new String[]{"uri1", "one"});
|
||||
tokenise("uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("/uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
tokenise("/uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
try
|
||||
{
|
||||
tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
}
|
||||
catch(IllegalStateException ise)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void testMixedPath() throws IOException
|
||||
{
|
||||
|
||||
tokenise("{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("/{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
tokenise("/uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
try
|
||||
{
|
||||
tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
}
|
||||
catch(IllegalStateException ise)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void tokenise(String path, String[] tokens) throws IOException
|
||||
{
|
||||
StringReader reader = new StringReader(path);
|
||||
TokenStream ts = new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR,
|
||||
PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT,
|
||||
PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);
|
||||
Token t;
|
||||
int i = 0;
|
||||
while( (t = ts.next()) != null)
|
||||
{
|
||||
if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE))
|
||||
{
|
||||
assert(i % 2 == 0);
|
||||
assertEquals(t.termText(), tokens[i++]);
|
||||
}
|
||||
else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX))
|
||||
{
|
||||
assert(i % 2 == 0);
|
||||
assertEquals(t.termText(), tokens[i++]);
|
||||
}
|
||||
else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAME))
|
||||
{
|
||||
assert(i % 2 == 1);
|
||||
assertEquals(t.termText(), tokens[i++]);
|
||||
}
|
||||
}
|
||||
if(i != tokens.length)
|
||||
{
|
||||
fail("Invalid number of tokens, found "+i+" and expected "+tokens.length);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user