diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java index ad19ffe6c1..0d5663d2ba 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java @@ -29,8 +29,7 @@ import org.apache.lucene.analysis.Tokenizer; /** * @author andyh * - * TODO To change the template for this generated type comment go to Window - - * Preferences - Java - Code Style - Code Templates + * TODO To change the template for this generated type comment go to Window - Preferences - Java - Code Style - Code Templates */ public class PathTokenFilter extends Tokenizer { @@ -53,6 +52,8 @@ public class PathTokenFilter extends Tokenizer public final static String TOKEN_TYPE_PATH_ELEMENT_NAME = "PATH_ELEMENT_NAME"; public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE = "PATH_ELEMENT_NAMESPACE"; + + public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX = "PATH_ELEMENT_NAMESPACE_PREFIX"; char pathSeparator; @@ -68,6 +69,8 @@ public class PathTokenFilter extends Tokenizer int nsEndDelimiterLength; + char nsPrefixDelimiter = ':'; + LinkedList tokens = new LinkedList(); Iterator it = null; @@ -129,13 +132,14 @@ public class PathTokenFilter extends Tokenizer while ((t = nextToken()) != null) { String text = t.termText(); - - if((text.length() == 0) || text.equals(pathSeparator)) + + if (text.length() == 0) { - break; + continue; // Skip if we find // or /; or ;; etc } + - if (text.charAt(text.length()-1) == pathSeparator) + if (text.charAt(text.length() - 1) == pathSeparator) { text = text.substring(0, text.length() - 1); pathSplitToken = new Token(separatorTokenText, t.startOffset(), t.endOffset(), TOKEN_TYPE_PATH_SEP); @@ -144,11 +148,19 @@ public class PathTokenFilter extends Tokenizer } int split = -1; + boolean isPrefix = false; if ((text.length() > 0) && (text.charAt(0) == nsStartDelimiter)) { split = text.indexOf(nsEndDelimiter); } + + if (split == -1) + { + split = text.indexOf(nsPrefixDelimiter); + isPrefix = true; + } + if (split == -1) { namespaceToken = new Token(noNsTokenText, t.startOffset(), t.startOffset(), @@ -158,10 +170,21 @@ public class PathTokenFilter extends Tokenizer } else { - namespaceToken = new Token(text.substring(nsStartDelimiterLength, (split + nsEndDelimiterLength - 1)), - t.startOffset(), t.startOffset() + split, TOKEN_TYPE_PATH_ELEMENT_NAMESPACE); - nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset() + split - + nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME); + if (isPrefix) + { + namespaceToken = new Token(text.substring(0, split), t.startOffset(), t.startOffset() + split, + TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX); + nameToken = new Token(text.substring(split + 1), t.startOffset() + + split + 1, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME); + } + else + { + namespaceToken = new Token(text.substring(nsStartDelimiterLength, + (split + nsEndDelimiterLength - 1)), t.startOffset(), t.startOffset() + split, + TOKEN_TYPE_PATH_ELEMENT_NAMESPACE); + nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset() + + split + nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME); + } } namespaceToken.setPositionIncrement(1); @@ -190,7 +213,6 @@ public class PathTokenFilter extends Tokenizer pathSplitToken = null; } - } String countString = nf.format(lengthCounter); @@ -208,12 +230,12 @@ public class PathTokenFilter extends Tokenizer it = tokens.iterator(); } - + int readerPosition = 0; - + private Token nextToken() throws IOException { - if(readerPosition == -1) + if (readerPosition == -1) { return null; } @@ -222,34 +244,39 @@ public class PathTokenFilter extends Tokenizer int start = readerPosition; int current; char c; - while((current = input.read()) != -1) + while ((current = input.read()) != -1) { - c = (char)current; - readerPosition++; - if(c == nsStartDelimiter) - { - inNameSpace = true; - } - else if(c == nsEndDelimiter) - { - inNameSpace = false; - } - else if(!inNameSpace && (c == '/')) - { - return new Token(buffer.toString(), start, readerPosition-1, "QNAME"); - } - buffer.append(c); - } + c = (char) current; + readerPosition++; + if (c == nsStartDelimiter) + { + inNameSpace = true; + } + else if (c == nsEndDelimiter) + { + inNameSpace = false; + } + else if (!inNameSpace && (c == '/')) + { + return new Token(buffer.toString(), start, readerPosition - 1, "QNAME"); + } + else if (!inNameSpace && (c == ';')) + { + buffer.append(c); + return new Token(buffer.toString(), start, readerPosition , "LASTQNAME"); + } + + buffer.append(c); + } readerPosition = -1; - if(!inNameSpace) + if (!inNameSpace) { - return new Token(buffer.toString(), start, readerPosition-1, "QNAME"); + return new Token(buffer.toString(), start, readerPosition - 1, "QNAME"); } else { - throw new IllegalStateException("QName terminated incorrectly: "+buffer.toString()); + throw new IllegalStateException("QName terminated incorrectly: " + buffer.toString()); } - - + } } \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java new file mode 100644 index 0000000000..27c64246d1 --- /dev/null +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2005 Alfresco, Inc. + * + * Licensed under the Mozilla Public License version 1.1 + * with a permitted attribution clause. You may obtain a + * copy of the License at + * + * http://www.alfresco.org/legal/license.txt + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific + * language governing permissions and limitations under the + * License. + */ +package org.alfresco.repo.search.impl.lucene.analysis; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; + +import junit.framework.TestCase; + +public class PathTokenFilterTest extends TestCase +{ + + public PathTokenFilterTest() + { + super(); + } + + public PathTokenFilterTest(String arg0) + { + super(arg0); + } + + + public void testFullPath() throws IOException + { + tokenise("{uri1}one", new String[]{"uri1", "one"}); + tokenise("/{uri1}one", new String[]{"uri1", "one"}); + tokenise("{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); + tokenise("/{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); + tokenise("{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); + tokenise("/{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); + try + { + tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); + } + catch(IllegalStateException ise) + { + + } + + } + + + public void testPrefixPath() throws IOException + { + tokenise("uri1:one", new String[]{"uri1", "one"}); + tokenise("/uri1:one", new String[]{"uri1", "one"}); + tokenise("uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); + tokenise("/uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); + tokenise("uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); + tokenise("/uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); + try + { + tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); + } + catch(IllegalStateException ise) + { + + } + + } + + + public void testMixedPath() throws IOException + { + + tokenise("{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); + tokenise("/{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); + tokenise("uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); + tokenise("/uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); + try + { + tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); + } + catch(IllegalStateException ise) + { + + } + + } + + + private void tokenise(String path, String[] tokens) throws IOException + { + StringReader reader = new StringReader(path); + TokenStream ts = new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR, + PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT, + PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true); + Token t; + int i = 0; + while( (t = ts.next()) != null) + { + if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE)) + { + assert(i % 2 == 0); + assertEquals(t.termText(), tokens[i++]); + } + else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX)) + { + assert(i % 2 == 0); + assertEquals(t.termText(), tokens[i++]); + } + else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAME)) + { + assert(i % 2 == 1); + assertEquals(t.termText(), tokens[i++]); + } + } + if(i != tokens.length) + { + fail("Invalid number of tokens, found "+i+" and expected "+tokens.length); + } + } +}