Extended Path parsing ...

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2111 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-07-24 17:32:48 +00:00 · 2006-01-13 16:28:17 +00:00
parent 985138446c
commit cd72520330
2 changed files with 194 additions and 36 deletions
--- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java
@@ -29,8 +29,7 @@ import org.apache.lucene.analysis.Tokenizer;
 /**
 * @author andyh
 * 
- * TODO To change the template for this generated type comment go to Window -
- * Preferences - Java - Code Style - Code Templates
+ * TODO To change the template for this generated type comment go to Window - Preferences - Java - Code Style - Code Templates
 */
 public class PathTokenFilter extends Tokenizer
 {
@@ -54,6 +53,8 @@ public class PathTokenFilter extends Tokenizer

    public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE = "PATH_ELEMENT_NAMESPACE";
    
+    public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX = "PATH_ELEMENT_NAMESPACE_PREFIX";
+
    char pathSeparator;

    String separatorTokenText;
@@ -68,6 +69,8 @@ public class PathTokenFilter extends Tokenizer

    int nsEndDelimiterLength;

+    char nsPrefixDelimiter = ':';
+
    LinkedList<Token> tokens = new LinkedList<Token>();

    Iterator<Token> it = null;
@@ -130,11 +133,12 @@ public class PathTokenFilter extends Tokenizer
        {
            String text = t.termText();

-            if((text.length() == 0) || text.equals(pathSeparator))
+            if (text.length() == 0)
            {
-                break;
+                continue; //  Skip  if we find // or /; or ;; etc 
            }

+            
            if (text.charAt(text.length() - 1) == pathSeparator)
            {
                text = text.substring(0, text.length() - 1);
@@ -144,11 +148,19 @@ public class PathTokenFilter extends Tokenizer
            }

            int split = -1;
+            boolean isPrefix = false;

            if ((text.length() > 0) && (text.charAt(0) == nsStartDelimiter))
            {
                split = text.indexOf(nsEndDelimiter);
            }
+
+            if (split == -1)
+            {
+                split = text.indexOf(nsPrefixDelimiter);
+                isPrefix = true;
+            }
+
            if (split == -1)
            {
                namespaceToken = new Token(noNsTokenText, t.startOffset(), t.startOffset(),
@@ -158,10 +170,21 @@ public class PathTokenFilter extends Tokenizer
            }
            else
            {
-                namespaceToken = new Token(text.substring(nsStartDelimiterLength, (split + nsEndDelimiterLength - 1)),
-                        t.startOffset(), t.startOffset() + split, TOKEN_TYPE_PATH_ELEMENT_NAMESPACE);
-                nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset() + split
-                        + nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
+                if (isPrefix)
+                {
+                    namespaceToken = new Token(text.substring(0, split), t.startOffset(), t.startOffset() + split,
+                            TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX);
+                    nameToken = new Token(text.substring(split + 1), t.startOffset()
+                            + split + 1, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
+                }
+                else
+                {
+                    namespaceToken = new Token(text.substring(nsStartDelimiterLength,
+                            (split + nsEndDelimiterLength - 1)), t.startOffset(), t.startOffset() + split,
+                            TOKEN_TYPE_PATH_ELEMENT_NAMESPACE);
+                    nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset()
+                            + split + nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
+                }
            }

            namespaceToken.setPositionIncrement(1);
@@ -190,7 +213,6 @@ public class PathTokenFilter extends Tokenizer

                pathSplitToken = null;
            }
-
        }

        String countString = nf.format(lengthCounter);
@@ -238,6 +260,12 @@ public class PathTokenFilter extends Tokenizer
            {
                return new Token(buffer.toString(), start, readerPosition - 1, "QNAME");
            }
+            else if (!inNameSpace && (c == ';'))
+            {
+                buffer.append(c);
+                return new Token(buffer.toString(), start, readerPosition , "LASTQNAME");
+            }
+            
            buffer.append(c);
        }
        readerPosition = -1;
@@ -250,6 +278,5 @@ public class PathTokenFilter extends Tokenizer
            throw new IllegalStateException("QName terminated incorrectly: " + buffer.toString());
        }

-        
    }
 }
--- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2005 Alfresco, Inc.
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.search.impl.lucene.analysis;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+import junit.framework.TestCase;
+
+public class PathTokenFilterTest extends TestCase
+{
+
+    public PathTokenFilterTest()
+    {
+        super();
+    }
+
+    public PathTokenFilterTest(String arg0)
+    {
+        super(arg0);
+    }
+
+    
+    public void testFullPath() throws IOException
+    {
+        tokenise("{uri1}one", new String[]{"uri1", "one"});
+        tokenise("/{uri1}one", new String[]{"uri1", "one"});
+        tokenise("{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
+        tokenise("/{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
+        tokenise("{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
+        tokenise("/{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
+        try
+        {
+           tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
+        }
+        catch(IllegalStateException ise)
+        {
+            
+        }
+       
+    }
+    
+    
+    public void testPrefixPath() throws IOException
+    {
+        tokenise("uri1:one", new String[]{"uri1", "one"});
+        tokenise("/uri1:one", new String[]{"uri1", "one"});
+        tokenise("uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
+        tokenise("/uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
+        tokenise("uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
+        tokenise("/uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
+        try
+        {
+           tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
+        }
+        catch(IllegalStateException ise)
+        {
+            
+        }
+       
+    }
+    
+    
+    public void testMixedPath() throws IOException
+    {
+     
+        tokenise("{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
+        tokenise("/{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
+        tokenise("uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
+        tokenise("/uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
+        try
+        {
+           tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
+        }
+        catch(IllegalStateException ise)
+        {
+            
+        }
+       
+    }
+    
+    
+    private void tokenise(String path, String[] tokens) throws IOException
+    {
+        StringReader reader = new StringReader(path);
+        TokenStream ts = new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR,
+                PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT,
+                PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);
+       Token t;
+       int i = 0;
+       while( (t = ts.next()) != null)
+       {
+           if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE))
+           {
+               assert(i % 2 == 0);
+               assertEquals(t.termText(), tokens[i++]);
+           }
+           else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX))
+           {
+               assert(i % 2 == 0);
+               assertEquals(t.termText(), tokens[i++]);
+           }
+           else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAME))
+           {
+               assert(i % 2 == 1);
+               assertEquals(t.termText(), tokens[i++]);
+           }
+       }
+       if(i != tokens.length)
+       {
+           fail("Invalid number of tokens, found "+i+" and expected "+tokens.length);
+       }
+    }
+}