From fd351b94113cb5f6339cd69c2e48370147c58443 Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Tue, 5 Dec 2006 11:20:22 +0000 Subject: [PATCH] Updated Query parser git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4519 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../repo/search/impl/lucene/CharStream.java | 206 +++++---- .../search/impl/lucene/FastCharStream.java | 3 +- .../search/impl/lucene/LuceneQueryParser.java | 4 +- .../impl/lucene/LuceneSearcherImpl2.java | 7 +- .../impl/lucene/MultiFieldQueryParser.java | 268 ++++++++++++ .../search/impl/lucene/ParseException.java | 384 ++++++++--------- .../repo/search/impl/lucene/QueryParser.java | 392 ++++++++++-------- .../repo/search/impl/lucene/QueryParser.jj | 377 ++++++++++------- .../impl/lucene/QueryParserTokenManager.java | 15 +- .../repo/search/impl/lucene/Token.java | 177 ++++---- .../search/impl/lucene/TokenMgrError.java | 266 ++++++------ 11 files changed, 1235 insertions(+), 864 deletions(-) create mode 100644 source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java diff --git a/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java b/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java index 0e11c043db..b01b9ea8a9 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java @@ -1,110 +1,96 @@ -/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */ -package org.alfresco.repo.search.impl.lucene; - -/** - * This interface describes a character stream that maintains line and - * column number positions of the characters. It also has the capability - * to backup the stream to some extent. An implementation of this - * interface is used in the TokenManager implementation generated by - * JavaCCParser. - * - * All the methods except backup can be implemented in any fashion. backup - * needs to be implemented correctly for the correct operation of the lexer. - * Rest of the methods are all used to get information like line number, - * column number and the String that constitutes a token and are not used - * by the lexer. Hence their implementation won't affect the generated lexer's - * operation. - */ - -public interface CharStream { - - /** - * Returns the next character from the selected input. The method - * of selecting the input is the responsibility of the class - * implementing this interface. Can throw any java.io.IOException. - */ - char readChar() throws java.io.IOException; - - /** - * Returns the column position of the character last read. - * @deprecated - * @see #getEndColumn - */ - int getColumn(); - - /** - * Returns the line number of the character last read. - * @deprecated - * @see #getEndLine - */ - int getLine(); - - /** - * Returns the column number of the last character for current token (being - * matched after the last call to BeginTOken). - */ - int getEndColumn(); - - /** - * Returns the line number of the last character for current token (being - * matched after the last call to BeginTOken). - */ - int getEndLine(); - - /** - * Returns the column number of the first character for current token (being - * matched after the last call to BeginTOken). - */ - int getBeginColumn(); - - /** - * Returns the line number of the first character for current token (being - * matched after the last call to BeginTOken). - */ - int getBeginLine(); - - /** - * Backs up the input stream by amount steps. Lexer calls this method if it - * had already read some characters, but could not use them to match a - * (longer) token. So, they will be used again as the prefix of the next - * token and it is the implemetation's responsibility to do this right. - */ - void backup(int amount); - - /** - * Returns the next character that marks the beginning of the next token. - * All characters must remain in the buffer between two successive calls - * to this method to implement backup correctly. - */ - char BeginToken() throws java.io.IOException; - - /** - * Returns a string made up of characters from the marked token beginning - * to the current buffer position. Implementations have the choice of returning - * anything that they want to. For example, for efficiency, one might decide - * to just return null, which is a valid implementation. - */ - String GetImage(); - - /** - * Returns an array of characters that make up the suffix of length 'len' for - * the currently matched token. This is used to build up the matched string - * for use in actions in the case of MORE. A simple and inefficient - * implementation of this is as follows : - * - * { - * String t = GetImage(); - * return t.substring(t.length() - len, t.length()).toCharArray(); - * } - */ - char[] GetSuffix(int len); - - /** - * The lexer calls this function to indicate that it is done with the stream - * and hence implementations can free any resources held by this class. - * Again, the body of this function can be just empty and it will not - * affect the lexer's operation. - */ - void Done(); - -} +/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */ +package org.alfresco.repo.search.impl.lucene; + +/** + * This interface describes a character stream that maintains line and + * column number positions of the characters. It also has the capability + * to backup the stream to some extent. An implementation of this + * interface is used in the TokenManager implementation generated by + * JavaCCParser. + * + * All the methods except backup can be implemented in any fashion. backup + * needs to be implemented correctly for the correct operation of the lexer. + * Rest of the methods are all used to get information like line number, + * column number and the String that constitutes a token and are not used + * by the lexer. Hence their implementation won't affect the generated lexer's + * operation. + */ + +public interface CharStream { + + /** + * Returns the next character from the selected input. The method + * of selecting the input is the responsibility of the class + * implementing this interface. Can throw any java.io.IOException. + */ + char readChar() throws java.io.IOException; + + /** + * Returns the column number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndColumn(); + + /** + * Returns the line number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndLine(); + + /** + * Returns the column number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginColumn(); + + /** + * Returns the line number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginLine(); + + /** + * Backs up the input stream by amount steps. Lexer calls this method if it + * had already read some characters, but could not use them to match a + * (longer) token. So, they will be used again as the prefix of the next + * token and it is the implemetation's responsibility to do this right. + */ + void backup(int amount); + + /** + * Returns the next character that marks the beginning of the next token. + * All characters must remain in the buffer between two successive calls + * to this method to implement backup correctly. + */ + char BeginToken() throws java.io.IOException; + + /** + * Returns a string made up of characters from the marked token beginning + * to the current buffer position. Implementations have the choice of returning + * anything that they want to. For example, for efficiency, one might decide + * to just return null, which is a valid implementation. + */ + String GetImage(); + + /** + * Returns an array of characters that make up the suffix of length 'len' for + * the currently matched token. This is used to build up the matched string + * for use in actions in the case of MORE. A simple and inefficient + * implementation of this is as follows : + * + * { + * String t = GetImage(); + * return t.substring(t.length() - len, t.length()).toCharArray(); + * } + */ + char[] GetSuffix(int len); + + /** + * The lexer calls this function to indicate that it is done with the stream + * and hence implementations can free any resources held by this class. + * Again, the body of this function can be just empty and it will not + * affect the lexer's operation. + */ + void Done(); + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java b/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java index 04d659a096..762b46b1ea 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java @@ -17,8 +17,7 @@ package org.alfresco.repo.search.impl.lucene; * limitations under the License. */ -import java.io.IOException; -import java.io.Reader; +import java.io.*; /** An efficient implementation of JavaCC's CharStream interface.

Note that * this does not do line-number counting, but instead keeps track of the diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java index 1901290807..f48d6b3782 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java @@ -63,7 +63,7 @@ public class LuceneQueryParser extends QueryParser * if the parsing fails */ static public Query parse(String query, String field, Analyzer analyzer, - NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, int defaultOperator) + NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, Operator defaultOperator) throws ParseException { if (s_logger.isDebugEnabled()) @@ -71,7 +71,7 @@ public class LuceneQueryParser extends QueryParser s_logger.debug("Using Alfresco Lucene Query Parser for query: " + query); } LuceneQueryParser parser = new LuceneQueryParser(field, analyzer); - parser.setOperator(defaultOperator); + parser.setDefaultOperator(defaultOperator); parser.setNamespacePrefixResolver(namespacePrefixResolver); parser.setDictionaryService(dictionaryService); return parser.parse(query); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java index cf3fa13ae6..db1aba72ad 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java @@ -32,6 +32,7 @@ import org.alfresco.repo.search.Indexer; import org.alfresco.repo.search.QueryRegisterComponent; import org.alfresco.repo.search.SearcherException; import org.alfresco.repo.search.impl.NodeSearcher; +import org.alfresco.repo.search.impl.lucene.QueryParser.Operator; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.repository.InvalidNodeRefException; import org.alfresco.service.cmr.repository.NodeRef; @@ -203,14 +204,14 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2 try { - int defaultOperator; + Operator defaultOperator; if (searchParameters.getDefaultOperator() == SearchParameters.AND) { - defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_AND; + defaultOperator = LuceneQueryParser.AND_OPERATOR; } else { - defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_OR; + defaultOperator = LuceneQueryParser.OR_OPERATOR; } Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser( diff --git a/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java new file mode 100644 index 0000000000..51430152c4 --- /dev/null +++ b/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java @@ -0,0 +1,268 @@ +package org.alfresco.repo.search.impl.lucene; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; + +import java.util.Vector; + +/** + * A QueryParser which constructs queries to search multiple fields. + * + * @author Kelvin Tan, Daniel Naber + * @version $Revision: 406088 $ + */ +public class MultiFieldQueryParser extends QueryParser +{ + + private String[] fields; + + /** + * Creates a MultiFieldQueryParser. + * + *

It will, when parse(String query) + * is called, construct a query like this (assuming the query consists of + * two terms and you specify the two fields title and body):

+ * + * + * (title:term1 body:term1) (title:term2 body:term2) + * + * + *

When setDefaultOperator(AND_OPERATOR) is set, the result will be:

+ * + * + * +(title:term1 body:term1) +(title:term2 body:term2) + * + * + *

In other words, all the query's terms must appear, but it doesn't matter in + * what fields they appear.

+ */ + public MultiFieldQueryParser(String[] fields, Analyzer analyzer) { + super(null, analyzer); + this.fields = fields; + } + + protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + Query q = super.getFieldQuery(fields[i], queryText); + if (q != null) { + if (q instanceof PhraseQuery) { + ((PhraseQuery) q).setSlop(slop); + } + if (q instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) q).setSlop(slop); + } + clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); + } + } + if (clauses.size() == 0) // happens for stopwords + return null; + return getBooleanQuery(clauses, true); + } + return super.getFieldQuery(field, queryText); + } + + + protected Query getFieldQuery(String field, String queryText) throws ParseException { + return getFieldQuery(field, queryText, 0); + } + + + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException + { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr, minSimilarity), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getFuzzyQuery(field, termStr, minSimilarity); + } + + protected Query getPrefixQuery(String field, String termStr) throws ParseException + { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getPrefixQuery(field, termStr); + } + + protected Query getWildcardQuery(String field, String termStr) throws ParseException { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getWildcardQuery(fields[i], termStr), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getWildcardQuery(field, termStr); + } + + + protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1, part2, inclusive), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getRangeQuery(field, part1, part2, inclusive); + } + + + + + /** + * Parses a query which searches on the fields specified. + *

+ * If x fields are specified, this effectively constructs: + *

+   * 
+   * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+   * 
+   * 
+ * @param queries Queries strings to parse + * @param fields Fields to search on + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the queries array differs + * from the length of the fields array + */ + public static Query parse(String[] queries, String[] fields, + Analyzer analyzer) throws ParseException + { + if (queries.length != fields.length) + throw new IllegalArgumentException("queries.length != fields.length"); + BooleanQuery bQuery = new BooleanQuery(); + for (int i = 0; i < fields.length; i++) + { + QueryParser qp = new QueryParser(fields[i], analyzer); + Query q = qp.parse(queries[i]); + bQuery.add(q, BooleanClause.Occur.SHOULD); + } + return bQuery; + } + + + /** + * Parses a query, searching on the fields specified. + * Use this if you need to specify certain fields as required, + * and others as prohibited. + *

+   * Usage:
+   * 
+   * String[] fields = {"filename", "contents", "description"};
+   * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+   *                BooleanClause.Occur.MUST,
+   *                BooleanClause.Occur.MUST_NOT};
+   * MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+   * 
+   * 
+ *

+ * The code above would construct a query: + *

+   * 
+   * (filename:query) +(contents:query) -(description:query)
+   * 
+   * 
+ * + * @param query Query string to parse + * @param fields Fields to search on + * @param flags Flags describing the fields + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the fields array differs + * from the length of the flags array + */ + public static Query parse(String query, String[] fields, + BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { + if (fields.length != flags.length) + throw new IllegalArgumentException("fields.length != flags.length"); + BooleanQuery bQuery = new BooleanQuery(); + for (int i = 0; i < fields.length; i++) { + QueryParser qp = new QueryParser(fields[i], analyzer); + Query q = qp.parse(query); + bQuery.add(q, flags[i]); + } + return bQuery; + } + + + /** + * Parses a query, searching on the fields specified. + * Use this if you need to specify certain fields as required, + * and others as prohibited. + *

+   * Usage:
+   * 
+   * String[] query = {"query1", "query2", "query3"};
+   * String[] fields = {"filename", "contents", "description"};
+   * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+   *                BooleanClause.Occur.MUST,
+   *                BooleanClause.Occur.MUST_NOT};
+   * MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+   * 
+   * 
+ *

+ * The code above would construct a query: + *

+   * 
+   * (filename:query1) +(contents:query2) -(description:query3)
+   * 
+   * 
+ * + * @param queries Queries string to parse + * @param fields Fields to search on + * @param flags Flags describing the fields + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the queries, fields, + * and flags array differ + */ + public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags, + Analyzer analyzer) throws ParseException + { + if (!(queries.length == fields.length && queries.length == flags.length)) + throw new IllegalArgumentException("queries, fields, and flags array have have different length"); + BooleanQuery bQuery = new BooleanQuery(); + for (int i = 0; i < fields.length; i++) + { + QueryParser qp = new QueryParser(fields[i], analyzer); + Query q = qp.parse(queries[i]); + bQuery.add(q, flags[i]); + } + return bQuery; + } + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java b/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java index c19638b39f..61ab9f693c 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java @@ -1,192 +1,192 @@ -/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */ -package org.alfresco.repo.search.impl.lucene; - -/** - * This exception is thrown when parse errors are encountered. - * You can explicitly create objects of this exception type by - * calling the method generateParseException in the generated - * parser. - * - * You can modify this class to customize your error reporting - * mechanisms so long as you retain the public fields. - */ -public class ParseException extends Exception { - - /** - * This constructor is used by the method "generateParseException" - * in the generated parser. Calling this constructor generates - * a new object of this type with the fields "currentToken", - * "expectedTokenSequences", and "tokenImage" set. The boolean - * flag "specialConstructor" is also set to true to indicate that - * this constructor was used to create this object. - * This constructor calls its super class with the empty string - * to force the "toString" method of parent class "Throwable" to - * print the error message in the form: - * ParseException: - */ - public ParseException(Token currentTokenVal, - int[][] expectedTokenSequencesVal, - String[] tokenImageVal - ) - { - super(""); - specialConstructor = true; - currentToken = currentTokenVal; - expectedTokenSequences = expectedTokenSequencesVal; - tokenImage = tokenImageVal; - } - - /** - * The following constructors are for use by you for whatever - * purpose you can think of. Constructing the exception in this - * manner makes the exception behave in the normal way - i.e., as - * documented in the class "Throwable". The fields "errorToken", - * "expectedTokenSequences", and "tokenImage" do not contain - * relevant information. The JavaCC generated code does not use - * these constructors. - */ - - public ParseException() { - super(); - specialConstructor = false; - } - - public ParseException(String message) { - super(message); - specialConstructor = false; - } - - /** - * This variable determines which constructor was used to create - * this object and thereby affects the semantics of the - * "getMessage" method (see below). - */ - protected boolean specialConstructor; - - /** - * This is the last token that has been consumed successfully. If - * this object has been created due to a parse error, the token - * followng this token will (therefore) be the first error token. - */ - public Token currentToken; - - /** - * Each entry in this array is an array of integers. Each array - * of integers represents a sequence of tokens (by their ordinal - * values) that is expected at this point of the parse. - */ - public int[][] expectedTokenSequences; - - /** - * This is a reference to the "tokenImage" array of the generated - * parser within which the parse error occurred. This array is - * defined in the generated ...Constants interface. - */ - public String[] tokenImage; - - /** - * This method has the standard behavior when this object has been - * created using the standard constructors. Otherwise, it uses - * "currentToken" and "expectedTokenSequences" to generate a parse - * error message and returns it. If this object has been created - * due to a parse error, and you do not catch it (it gets thrown - * from the parser), then this method is called during the printing - * of the final stack trace, and hence the correct error message - * gets displayed. - */ - public String getMessage() { - if (!specialConstructor) { - return super.getMessage(); - } - String expected = ""; - int maxSize = 0; - for (int i = 0; i < expectedTokenSequences.length; i++) { - if (maxSize < expectedTokenSequences[i].length) { - maxSize = expectedTokenSequences[i].length; - } - for (int j = 0; j < expectedTokenSequences[i].length; j++) { - expected += tokenImage[expectedTokenSequences[i][j]] + " "; - } - if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { - expected += "..."; - } - expected += eol + " "; - } - String retval = "Encountered \""; - Token tok = currentToken.next; - for (int i = 0; i < maxSize; i++) { - if (i != 0) retval += " "; - if (tok.kind == 0) { - retval += tokenImage[0]; - break; - } - retval += add_escapes(tok.image); - tok = tok.next; - } - retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn; - retval += "." + eol; - if (expectedTokenSequences.length == 1) { - retval += "Was expecting:" + eol + " "; - } else { - retval += "Was expecting one of:" + eol + " "; - } - retval += expected; - return retval; - } - - /** - * The end of line string for this machine. - */ - protected String eol = System.getProperty("line.separator", "\n"); - - /** - * Used to convert raw characters to their escaped version - * when these raw version cannot be used as part of an ASCII - * string literal. - */ - protected String add_escapes(String str) { - StringBuffer retval = new StringBuffer(); - char ch; - for (int i = 0; i < str.length(); i++) { - switch (str.charAt(i)) - { - case 0 : - continue; - case '\b': - retval.append("\\b"); - continue; - case '\t': - retval.append("\\t"); - continue; - case '\n': - retval.append("\\n"); - continue; - case '\f': - retval.append("\\f"); - continue; - case '\r': - retval.append("\\r"); - continue; - case '\"': - retval.append("\\\""); - continue; - case '\'': - retval.append("\\\'"); - continue; - case '\\': - retval.append("\\\\"); - continue; - default: - if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { - String s = "0000" + Integer.toString(ch, 16); - retval.append("\\u" + s.substring(s.length() - 4, s.length())); - } else { - retval.append(ch); - } - continue; - } - } - return retval.toString(); - } - -} +/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */ +package org.alfresco.repo.search.impl.lucene; + +/** + * This exception is thrown when parse errors are encountered. + * You can explicitly create objects of this exception type by + * calling the method generateParseException in the generated + * parser. + * + * You can modify this class to customize your error reporting + * mechanisms so long as you retain the public fields. + */ +public class ParseException extends Exception { + + /** + * This constructor is used by the method "generateParseException" + * in the generated parser. Calling this constructor generates + * a new object of this type with the fields "currentToken", + * "expectedTokenSequences", and "tokenImage" set. The boolean + * flag "specialConstructor" is also set to true to indicate that + * this constructor was used to create this object. + * This constructor calls its super class with the empty string + * to force the "toString" method of parent class "Throwable" to + * print the error message in the form: + * ParseException: + */ + public ParseException(Token currentTokenVal, + int[][] expectedTokenSequencesVal, + String[] tokenImageVal + ) + { + super(""); + specialConstructor = true; + currentToken = currentTokenVal; + expectedTokenSequences = expectedTokenSequencesVal; + tokenImage = tokenImageVal; + } + + /** + * The following constructors are for use by you for whatever + * purpose you can think of. Constructing the exception in this + * manner makes the exception behave in the normal way - i.e., as + * documented in the class "Throwable". The fields "errorToken", + * "expectedTokenSequences", and "tokenImage" do not contain + * relevant information. The JavaCC generated code does not use + * these constructors. + */ + + public ParseException() { + super(); + specialConstructor = false; + } + + public ParseException(String message) { + super(message); + specialConstructor = false; + } + + /** + * This variable determines which constructor was used to create + * this object and thereby affects the semantics of the + * "getMessage" method (see below). + */ + protected boolean specialConstructor; + + /** + * This is the last token that has been consumed successfully. If + * this object has been created due to a parse error, the token + * followng this token will (therefore) be the first error token. + */ + public Token currentToken; + + /** + * Each entry in this array is an array of integers. Each array + * of integers represents a sequence of tokens (by their ordinal + * values) that is expected at this point of the parse. + */ + public int[][] expectedTokenSequences; + + /** + * This is a reference to the "tokenImage" array of the generated + * parser within which the parse error occurred. This array is + * defined in the generated ...Constants interface. + */ + public String[] tokenImage; + + /** + * This method has the standard behavior when this object has been + * created using the standard constructors. Otherwise, it uses + * "currentToken" and "expectedTokenSequences" to generate a parse + * error message and returns it. If this object has been created + * due to a parse error, and you do not catch it (it gets thrown + * from the parser), then this method is called during the printing + * of the final stack trace, and hence the correct error message + * gets displayed. + */ + public String getMessage() { + if (!specialConstructor) { + return super.getMessage(); + } + String expected = ""; + int maxSize = 0; + for (int i = 0; i < expectedTokenSequences.length; i++) { + if (maxSize < expectedTokenSequences[i].length) { + maxSize = expectedTokenSequences[i].length; + } + for (int j = 0; j < expectedTokenSequences[i].length; j++) { + expected += tokenImage[expectedTokenSequences[i][j]] + " "; + } + if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { + expected += "..."; + } + expected += eol + " "; + } + String retval = "Encountered \""; + Token tok = currentToken.next; + for (int i = 0; i < maxSize; i++) { + if (i != 0) retval += " "; + if (tok.kind == 0) { + retval += tokenImage[0]; + break; + } + retval += add_escapes(tok.image); + tok = tok.next; + } + retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn; + retval += "." + eol; + if (expectedTokenSequences.length == 1) { + retval += "Was expecting:" + eol + " "; + } else { + retval += "Was expecting one of:" + eol + " "; + } + retval += expected; + return retval; + } + + /** + * The end of line string for this machine. + */ + protected String eol = System.getProperty("line.separator", "\n"); + + /** + * Used to convert raw characters to their escaped version + * when these raw version cannot be used as part of an ASCII + * string literal. + */ + protected String add_escapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java index e57882c030..96d64e77d8 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java @@ -1,31 +1,19 @@ /* Generated By:JavaCC: Do not edit this line. QueryParser.java */ package org.alfresco.repo.search.impl.lucene; -import java.io.IOException; -import java.io.StringReader; -import java.text.DateFormat; -import java.util.Date; -import java.util.Locale; import java.util.Vector; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.DateField; +import java.io.*; +import java.text.*; +import java.util.*; import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.RangeQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.analysis.*; +import org.apache.lucene.document.*; +import org.apache.lucene.search.*; +import org.apache.lucene.util.Parameter; /** - * This class is generated by JavaCC. The only method that clients should need - * to call is parse(). + * This class is generated by JavaCC. The most important method is + * {@link #parse(String)}. * * The syntax for query strings is as follows: * A Query is a series of clauses. @@ -53,9 +41,21 @@ import org.apache.lucene.search.BooleanClause.Occur; * *

* Examples of appropriately formatted queries can be found in the test cases. + * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax + * documentation. *

* + *

In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. date:[6/1/2005 TO 6/4/2005] + * produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note + * that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This + * feature also assumes that your index uses the {@link DateField} class to store dates. + * If you use a different format (e.g. {@link DateTools}) and you still want QueryParser + * to turn local dates in range queries into valid queries you need to create your own + * query parser that inherits QueryParser and overwrites + * {@link #getRangeQuery(String, String, String, boolean)}.

+ * + *

Note that QueryParser is not thread-safe.

+ * * @author Brian Goetz * @author Peter Halacsy * @author Tatu Saloranta @@ -71,36 +71,37 @@ public class QueryParser implements QueryParserConstants { private static final int MOD_NOT = 10; private static final int MOD_REQ = 11; - public static final int DEFAULT_OPERATOR_OR = 0; - public static final int DEFAULT_OPERATOR_AND = 1; + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /** Alternative form of QueryParser.Operator.AND */ + public static final Operator AND_OPERATOR = Operator.AND; + /** Alternative form of QueryParser.Operator.OR */ + public static final Operator OR_OPERATOR = Operator.OR; /** The actual operator that parser uses to combine query terms */ - private int operator = DEFAULT_OPERATOR_OR; + private Operator operator = OR_OPERATOR; - /** - * Whether terms of wildcard and prefix queries are to be automatically - * lower-cased or not. Default is true. - */ - boolean lowercaseWildcardTerms = true; + boolean lowercaseExpandedTerms = true; Analyzer analyzer; String field; int phraseSlop = 0; float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; Locale locale = Locale.getDefault(); - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param query the query string to be parsed. - * @param field the default field for query terms. - * @param analyzer used to find terms in the query text. - * @throws ParseException if the parsing fails + /** The default operator for parsing queries. + * Use {@link QueryParser#setDefaultOperator} to change it. */ - static public Query parse(String query, String field, Analyzer analyzer) - throws ParseException { - QueryParser parser = new QueryParser(field, analyzer); - return parser.parse(query); + static public final class Operator extends Parameter { + private Operator(String name) { + super(name); + } + static public final Operator OR = new Operator("OR"); + static public final Operator AND = new Operator("AND"); } + /** Constructs a query parser. * @param f the default field for query terms. * @param a used to find terms in the query text. @@ -111,8 +112,7 @@ public class QueryParser implements QueryParserConstants { field = f; } - /** Parses a query string, returning a - * Query. + /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. * @param query the query string to be parsed. * @throws ParseException if the parsing fails */ @@ -144,18 +144,36 @@ public class QueryParser implements QueryParserConstants { } /** - * Get the default minimal similarity for fuzzy queries. + * Get the minimal similarity for fuzzy queries. */ public float getFuzzyMinSim() { return fuzzyMinSim; } + /** - *Set the default minimum similarity for fuzzy queries. + * Set the minimum similarity for fuzzy queries. + * Default is 0.5f. */ public void setFuzzyMinSim(float fuzzyMinSim) { this.fuzzyMinSim = fuzzyMinSim; } + /** + * Get the prefix length for fuzzy queries. + * @return Returns the fuzzyPrefixLength. + */ + public int getFuzzyPrefixLength() { + return fuzzyPrefixLength; + } + + /** + * Set the prefix length for fuzzy queries. Default is 0. + * @param fuzzyPrefixLength The fuzzyPrefixLength to set. + */ + public void setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + } + /** * Sets the default slop for phrases. If zero, then exact phrase matches * are required. Default value is zero. @@ -171,32 +189,43 @@ public class QueryParser implements QueryParserConstants { return phraseSlop; } - /** - * Sets the boolean operator of the QueryParser. - * In classic mode (DEFAULT_OPERATOR_OR) terms without any modifiers - * are considered optional: for example capital of Hungary is equal to - * capital OR of OR Hungary.
- * In DEFAULT_OPERATOR_AND terms are considered to be in conjuction: the - * above mentioned query is parsed as capital AND of AND Hungary - */ - public void setOperator(int operator) { - this.operator = operator; - } /** - * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND - * or DEFAULT_OPERATOR_OR. + * Sets the boolean operator of the QueryParser. + * In default mode (OR_OPERATOR) terms without any modifiers + * are considered optional: for example capital of Hungary is equal to + * capital OR of OR Hungary.
+ * In AND_OPERATOR mode terms are considered to be in conjuction: the + * above mentioned query is parsed as capital AND of AND Hungary */ - public int getOperator() { + public void setDefaultOperator(Operator op) { + this.operator = op; + } + + + /** + * Gets implicit operator setting, which will be either AND_OPERATOR + * or OR_OPERATOR. + */ + public Operator getDefaultOperator() { return operator; } - public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) { - this.lowercaseWildcardTerms = lowercaseWildcardTerms; + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is true. + */ + public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; } - public boolean getLowercaseWildcardTerms() { - return lowercaseWildcardTerms; + + /** + * @see #setLowercaseExpandedTerms(boolean) + */ + public boolean getLowercaseExpandedTerms() { + return lowercaseExpandedTerms; } /** @@ -213,7 +242,7 @@ public class QueryParser implements QueryParserConstants { return locale; } - protected void addClause(Vector clauses, int conj, int mods, Query q) { + protected void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, @@ -221,17 +250,17 @@ public class QueryParser implements QueryParserConstants { if (clauses.size() > 0 && conj == CONJ_AND) { BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); if (!c.isProhibited()) - c.setOccur(Occur.MUST); + c.setOccur(BooleanClause.Occur.MUST); } - if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { + if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without // this modification a OR b would parsed as +a OR b BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); if (!c.isProhibited()) - c.setOccur(Occur.SHOULD); + c.setOccur(BooleanClause.Occur.SHOULD); } // We might have been passed a null query; the term might have been @@ -239,7 +268,7 @@ public class QueryParser implements QueryParserConstants { if (q == null) return; - if (operator == DEFAULT_OPERATOR_OR) { + if (operator == OR_OPERATOR) { // We set REQUIRED if we're introduced by AND or +; PROHIBITED if // introduced by NOT or -; make sure not to set both. prohibited = (mods == MOD_NOT); @@ -253,31 +282,16 @@ public class QueryParser implements QueryParserConstants { prohibited = (mods == MOD_NOT); required = (!prohibited && conj != CONJ_OR); } - Occur occur = Occur.SHOULD; - if(prohibited) - { - occur = Occur.MUST_NOT; - } - if(required) - { - occur = Occur.MUST; - } - clauses.addElement(new BooleanClause(q, occur)); + if (required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST)); + else if (!required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD)); + else if (!required && prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); + else + throw new RuntimeException("Clause cannot be both required and prohibited"); } - /** - * Note that parameter analyzer is ignored. Calls inside the parser always - * use class member analyser. This method will be deprecated and substituted - * by {@link #getFieldQuery(String, String)} in future versions of Lucene. - * Currently overwriting either of these methods works. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText) throws ParseException { - return getFieldQuery(field, queryText); - } /** * @exception ParseException throw in overridden method to disallow @@ -286,10 +300,11 @@ public class QueryParser implements QueryParserConstants { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - TokenStream source = analyzer.tokenStream(field, - new StringReader(queryText)); + TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); org.apache.lucene.analysis.Token t; + int positionCount = 0; + boolean severalTokensAtSamePosition = false; while (true) { try { @@ -300,7 +315,11 @@ public class QueryParser implements QueryParserConstants { } if (t == null) break; - v.addElement(t.termText()); + v.addElement(t); + if (t.getPositionIncrement() != 0) + positionCount += t.getPositionIncrement(); + else + severalTokensAtSamePosition = true; } try { source.close(); @@ -311,36 +330,52 @@ public class QueryParser implements QueryParserConstants { if (v.size() == 0) return null; - else if (v.size() == 1) - return new TermQuery(new Term(field, (String) v.elementAt(0))); - else { - PhraseQuery q = new PhraseQuery(); - q.setSlop(phraseSlop); - for (int i=0; i 0) { + mpq.add((Term[])multiTerms.toArray(new Term[0])); + multiTerms.clear(); + } + multiTerms.add(new Term(field, t.termText())); + } + mpq.add((Term[])multiTerms.toArray(new Term[0])); + return mpq; + } + } + else { + PhraseQuery q = new PhraseQuery(); + q.setSlop(phraseSlop); + for (int i = 0; i < v.size(); i++) { + q.add(new Term(field, ((org.apache.lucene.analysis.Token) + v.elementAt(i)).termText())); + + } + return q; } - return q; } } - /** - * Base implementation delegates to {@link #getFieldQuery(String, Analyzer, String)}. - * This method may be overwritten, for example, to return - * a SpanNearQuery instead of a PhraseQuery. - * - * Note that parameter analyzer is ignored. Calls inside the parser always - * use class member analyser. This method will be deprecated and substituted - * by {@link #getFieldQuery(String, String, int)} in future versions of Lucene. - * Currently overwriting either of these methods works. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText, - int slop) throws ParseException { - return getFieldQuery(field, queryText, slop); - } /** * Base implementation delegates to {@link #getFieldQuery(String,String)}. @@ -356,25 +391,13 @@ public class QueryParser implements QueryParserConstants { if (query instanceof PhraseQuery) { ((PhraseQuery) query).setSlop(slop); } + if (query instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) query).setSlop(slop); + } return query; } - /** - * Note that parameter analyzer is ignored. Calls inside the parser always - * use class member analyser. This method will be deprecated and substituted - * by {@link #getRangeQuery(String, String, String, boolean)} in future versions of Lucene. - * Currently overwriting either of these methods works. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getRangeQuery(String field, - Analyzer analyzer, - String part1, - String part2, - boolean inclusive) throws ParseException { - return getRangeQuery(field, part1, part2, inclusive); - } /** * @exception ParseException throw in overridden method to disallow @@ -384,11 +407,27 @@ public class QueryParser implements QueryParserConstants { String part2, boolean inclusive) throws ParseException { + if (lowercaseExpandedTerms) { + part1 = part1.toLowerCase(); + part2 = part2.toLowerCase(); + } try { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); df.setLenient(true); Date d1 = df.parse(part1); Date d2 = df.parse(part2); + if (inclusive) { + // The user can only specify the date, not the time, so make sure + // the time is set to the latest possible time of that date to really + // include all documents: + Calendar cal = Calendar.getInstance(locale); + cal.setTime(d2); + cal.set(Calendar.HOUR_OF_DAY, 23); + cal.set(Calendar.MINUTE, 59); + cal.set(Calendar.SECOND, 59); + cal.set(Calendar.MILLISECOND, 999); + d2 = cal.getTime(); + } part1 = DateField.dateToString(d1); part2 = DateField.dateToString(d2); } @@ -412,9 +451,28 @@ public class QueryParser implements QueryParserConstants { * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow */ - protected Query getBooleanQuery(Vector clauses) throws ParseException + protected Query getBooleanQuery(Vector clauses) throws ParseException { + return getBooleanQuery(clauses, false); + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains {@link BooleanClause} instances + * to join. + * @param disableCoord true if coord scoring should be disabled. + * + * @return Resulting {@link Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(Vector clauses, boolean disableCoord) + throws ParseException { - BooleanQuery query = new BooleanQuery(); + BooleanQuery query = new BooleanQuery(disableCoord); for (int i = 0; i < clauses.size(); i++) { query.add((BooleanClause)clauses.elementAt(i)); } @@ -444,8 +502,8 @@ public class QueryParser implements QueryParserConstants { */ protected Query getWildcardQuery(String field, String termStr) throws ParseException { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new WildcardQuery(t); @@ -453,7 +511,7 @@ public class QueryParser implements QueryParserConstants { /** * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses an input term + * {@link #getWildcardQuery}). Called when parser parses an input term * token that uses prefix notation; that is, contains a single '*' wildcard * character as its last character. Since this is a special case * of generic wildcard term, and such a query can be optimized easily, @@ -476,16 +534,17 @@ public class QueryParser implements QueryParserConstants { */ protected Query getPrefixQuery(String field, String termStr) throws ParseException { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new PrefixQuery(t); } - /** + + /** * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses + * {@link #getWildcardQuery}). Called when parser parses * an input term token that has the fuzzy suffix (~) appended. * * @param field Name of the field query will use. @@ -494,26 +553,13 @@ public class QueryParser implements QueryParserConstants { * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getFuzzyQuery(String field, String termStr) throws ParseException { - return getFuzzyQuery(field, termStr, fuzzyMinSim); - } - - /** - * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~floatNumber) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * @param minSimilarity the minimum similarity required for a fuzzy match - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } Term t = new Term(field, termStr); - return new FuzzyQuery(t, minSimilarity); + return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); } /** @@ -534,10 +580,10 @@ public class QueryParser implements QueryParserConstants { /** * Returns a String where those characters that QueryParser - * expects to be escaped are escaped, i.e. preceded by a \. + * expects to be escaped are escaped by a preceding \. */ public static String escape(String s) { - StringBuilder sb = new StringBuilder(s.length()); + StringBuffer sb = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); // NOTE: keep this in sync with _ESCAPED_CHAR below! @@ -551,7 +597,16 @@ public class QueryParser implements QueryParserConstants { return sb.toString(); } + /** + * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}. + * Usage:
+ * java org.apache.lucene.queryParser.QueryParser <input> + */ public static void main(String[] args) throws Exception { + if (args.length == 0) { + System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); + System.exit(0); + } QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); Query q = qp.parse(args[0]); @@ -789,12 +844,9 @@ public class QueryParser implements QueryParserConstants { if(fms < 0.0f || fms > 1.0f){ {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");} } - if(fms == fuzzyMinSim) - q = getFuzzyQuery(field, termImage); - else - q = getFuzzyQuery(field, termImage, fms); + q = getFuzzyQuery(field, termImage,fms); } else { - q = getFieldQuery(field, analyzer, termImage); + q = getFieldQuery(field, termImage); } break; case RANGEIN_START: @@ -851,7 +903,7 @@ public class QueryParser implements QueryParserConstants { } else { goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); + q = getRangeQuery(field, goop1.image, goop2.image, true); break; case RANGEEX_START: jj_consume_token(RANGEEX_START); @@ -908,7 +960,7 @@ public class QueryParser implements QueryParserConstants { goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); + q = getRangeQuery(field, goop1.image, goop2.image, false); break; case QUOTED: term = jj_consume_token(QUOTED); @@ -937,7 +989,7 @@ public class QueryParser implements QueryParserConstants { } catch (Exception ignored) { } } - q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s); + q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); break; default: jj_la1[21] = jj_gen; @@ -1183,6 +1235,7 @@ public class QueryParser implements QueryParserConstants { final private void jj_rescan_token() { jj_rescan = true; for (int i = 0; i < 1; i++) { + try { JJCalls p = jj_2_rtns[i]; do { if (p.gen > jj_gen) { @@ -1193,6 +1246,7 @@ public class QueryParser implements QueryParserConstants { } p = p.next; } while (p != null); + } catch(LookaheadSuccess ls) { } } jj_rescan = false; } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj index b5a9c4350c..6098160919 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj +++ b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj @@ -32,10 +32,11 @@ import org.apache.lucene.index.Term; import org.apache.lucene.analysis.*; import org.apache.lucene.document.*; import org.apache.lucene.search.*; +import org.apache.lucene.util.Parameter; /** - * This class is generated by JavaCC. The only method that clients should need - * to call is parse(). + * This class is generated by JavaCC. The most important method is + * {@link #parse(String)}. * * The syntax for query strings is as follows: * A Query is a series of clauses. @@ -63,9 +64,21 @@ import org.apache.lucene.search.*; * *

* Examples of appropriately formatted queries can be found in the test cases. + * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax + * documentation. *

* + *

In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. date:[6/1/2005 TO 6/4/2005] + * produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note + * that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This + * feature also assumes that your index uses the {@link DateField} class to store dates. + * If you use a different format (e.g. {@link DateTools}) and you still want QueryParser + * to turn local dates in range queries into valid queries you need to create your own + * query parser that inherits QueryParser and overwrites + * {@link #getRangeQuery(String, String, String, boolean)}.

+ * + *

Note that QueryParser is not thread-safe.

+ * * @author Brian Goetz * @author Peter Halacsy * @author Tatu Saloranta @@ -81,36 +94,37 @@ public class QueryParser { private static final int MOD_NOT = 10; private static final int MOD_REQ = 11; - public static final int DEFAULT_OPERATOR_OR = 0; - public static final int DEFAULT_OPERATOR_AND = 1; + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /** Alternative form of QueryParser.Operator.AND */ + public static final Operator AND_OPERATOR = Operator.AND; + /** Alternative form of QueryParser.Operator.OR */ + public static final Operator OR_OPERATOR = Operator.OR; /** The actual operator that parser uses to combine query terms */ - private int operator = DEFAULT_OPERATOR_OR; + private Operator operator = OR_OPERATOR; - /** - * Whether terms of wildcard and prefix queries are to be automatically - * lower-cased or not. Default is true. - */ - boolean lowercaseWildcardTerms = true; + boolean lowercaseExpandedTerms = true; Analyzer analyzer; String field; int phraseSlop = 0; float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; Locale locale = Locale.getDefault(); - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param query the query string to be parsed. - * @param field the default field for query terms. - * @param analyzer used to find terms in the query text. - * @throws ParseException if the parsing fails + /** The default operator for parsing queries. + * Use {@link QueryParser#setDefaultOperator} to change it. */ - static public Query parse(String query, String field, Analyzer analyzer) - throws ParseException { - QueryParser parser = new QueryParser(field, analyzer); - return parser.parse(query); + static public final class Operator extends Parameter { + private Operator(String name) { + super(name); + } + static public final Operator OR = new Operator("OR"); + static public final Operator AND = new Operator("AND"); } + /** Constructs a query parser. * @param f the default field for query terms. * @param a used to find terms in the query text. @@ -121,8 +135,7 @@ public class QueryParser { field = f; } - /** Parses a query string, returning a - * Query. + /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. * @param query the query string to be parsed. * @throws ParseException if the parsing fails */ @@ -154,17 +167,35 @@ public class QueryParser { } /** - * Get the default minimal similarity for fuzzy queries. + * Get the minimal similarity for fuzzy queries. */ public float getFuzzyMinSim() { return fuzzyMinSim; } + /** - *Set the default minimum similarity for fuzzy queries. + * Set the minimum similarity for fuzzy queries. + * Default is 0.5f. */ public void setFuzzyMinSim(float fuzzyMinSim) { this.fuzzyMinSim = fuzzyMinSim; } + + /** + * Get the prefix length for fuzzy queries. + * @return Returns the fuzzyPrefixLength. + */ + public int getFuzzyPrefixLength() { + return fuzzyPrefixLength; + } + + /** + * Set the prefix length for fuzzy queries. Default is 0. + * @param fuzzyPrefixLength The fuzzyPrefixLength to set. + */ + public void setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + } /** * Sets the default slop for phrases. If zero, then exact phrase matches @@ -181,32 +212,43 @@ public class QueryParser { return phraseSlop; } - /** - * Sets the boolean operator of the QueryParser. - * In classic mode (DEFAULT_OPERATOR_OR) terms without any modifiers - * are considered optional: for example capital of Hungary is equal to - * capital OR of OR Hungary.
- * In DEFAULT_OPERATOR_AND terms are considered to be in conjuction: the - * above mentioned query is parsed as capital AND of AND Hungary - */ - public void setOperator(int operator) { - this.operator = operator; - } /** - * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND - * or DEFAULT_OPERATOR_OR. + * Sets the boolean operator of the QueryParser. + * In default mode (OR_OPERATOR) terms without any modifiers + * are considered optional: for example capital of Hungary is equal to + * capital OR of OR Hungary.
+ * In AND_OPERATOR mode terms are considered to be in conjuction: the + * above mentioned query is parsed as capital AND of AND Hungary */ - public int getOperator() { + public void setDefaultOperator(Operator op) { + this.operator = op; + } + + + /** + * Gets implicit operator setting, which will be either AND_OPERATOR + * or OR_OPERATOR. + */ + public Operator getDefaultOperator() { return operator; } - public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) { - this.lowercaseWildcardTerms = lowercaseWildcardTerms; + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is true. + */ + public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; } - public boolean getLowercaseWildcardTerms() { - return lowercaseWildcardTerms; + + /** + * @see #setLowercaseExpandedTerms(boolean) + */ + public boolean getLowercaseExpandedTerms() { + return lowercaseExpandedTerms; } /** @@ -222,26 +264,26 @@ public class QueryParser { public Locale getLocale() { return locale; } - - protected void addClause(Vector clauses, int conj, int mods, Query q) { + + protected void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited if (clauses.size() > 0 && conj == CONJ_AND) { BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); - if (!c.prohibited) - c.required = true; + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.MUST); } - if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { + if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without // this modification a OR b would parsed as +a OR b BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); - if (!c.prohibited) - c.required = false; + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.SHOULD); } // We might have been passed a null query; the term might have been @@ -249,7 +291,7 @@ public class QueryParser { if (q == null) return; - if (operator == DEFAULT_OPERATOR_OR) { + if (operator == OR_OPERATOR) { // We set REQUIRED if we're introduced by AND or +; PROHIBITED if // introduced by NOT or -; make sure not to set both. prohibited = (mods == MOD_NOT); @@ -263,23 +305,17 @@ public class QueryParser { prohibited = (mods == MOD_NOT); required = (!prohibited && conj != CONJ_OR); } - clauses.addElement(new BooleanClause(q, required, prohibited)); - } - - /** - * Note that parameter analyzer is ignored. Calls inside the parser always - * use class member analyser. This method will be deprecated and substituted - * by {@link #getFieldQuery(String, String)} in future versions of Lucene. - * Currently overwriting either of these methods works. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText) throws ParseException { - return getFieldQuery(field, queryText); + if (required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST)); + else if (!required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD)); + else if (!required && prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); + else + throw new RuntimeException("Clause cannot be both required and prohibited"); } + /** * @exception ParseException throw in overridden method to disallow */ @@ -287,10 +323,11 @@ public class QueryParser { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - TokenStream source = analyzer.tokenStream(field, - new StringReader(queryText)); + TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); org.apache.lucene.analysis.Token t; + int positionCount = 0; + boolean severalTokensAtSamePosition = false; while (true) { try { @@ -301,7 +338,11 @@ public class QueryParser { } if (t == null) break; - v.addElement(t.termText()); + v.addElement(t); + if (t.getPositionIncrement() != 0) + positionCount += t.getPositionIncrement(); + else + severalTokensAtSamePosition = true; } try { source.close(); @@ -312,36 +353,52 @@ public class QueryParser { if (v.size() == 0) return null; - else if (v.size() == 1) - return new TermQuery(new Term(field, (String) v.elementAt(0))); - else { - PhraseQuery q = new PhraseQuery(); - q.setSlop(phraseSlop); - for (int i=0; i 0) { + mpq.add((Term[])multiTerms.toArray(new Term[0])); + multiTerms.clear(); + } + multiTerms.add(new Term(field, t.termText())); + } + mpq.add((Term[])multiTerms.toArray(new Term[0])); + return mpq; + } + } + else { + PhraseQuery q = new PhraseQuery(); + q.setSlop(phraseSlop); + for (int i = 0; i < v.size(); i++) { + q.add(new Term(field, ((org.apache.lucene.analysis.Token) + v.elementAt(i)).termText())); + + } + return q; } - return q; } } - - /** - * Base implementation delegates to {@link #getFieldQuery(String, Analyzer, String)}. - * This method may be overwritten, for example, to return - * a SpanNearQuery instead of a PhraseQuery. - * - * Note that parameter analyzer is ignored. Calls inside the parser always - * use class member analyser. This method will be deprecated and substituted - * by {@link #getFieldQuery(String, String, int)} in future versions of Lucene. - * Currently overwriting either of these methods works. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText, - int slop) throws ParseException { - return getFieldQuery(field, queryText, slop); - } + /** * Base implementation delegates to {@link #getFieldQuery(String,String)}. @@ -357,25 +414,13 @@ public class QueryParser { if (query instanceof PhraseQuery) { ((PhraseQuery) query).setSlop(slop); } + if (query instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) query).setSlop(slop); + } return query; } - - /** - * Note that parameter analyzer is ignored. Calls inside the parser always - * use class member analyser. This method will be deprecated and substituted - * by {@link #getRangeQuery(String, String, String, boolean)} in future versions of Lucene. - * Currently overwriting either of these methods works. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getRangeQuery(String field, - Analyzer analyzer, - String part1, - String part2, - boolean inclusive) throws ParseException { - return getRangeQuery(field, part1, part2, inclusive); - } + /** * @exception ParseException throw in overridden method to disallow @@ -385,11 +430,27 @@ public class QueryParser { String part2, boolean inclusive) throws ParseException { + if (lowercaseExpandedTerms) { + part1 = part1.toLowerCase(); + part2 = part2.toLowerCase(); + } try { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); df.setLenient(true); Date d1 = df.parse(part1); Date d2 = df.parse(part2); + if (inclusive) { + // The user can only specify the date, not the time, so make sure + // the time is set to the latest possible time of that date to really + // include all documents: + Calendar cal = Calendar.getInstance(locale); + cal.setTime(d2); + cal.set(Calendar.HOUR_OF_DAY, 23); + cal.set(Calendar.MINUTE, 59); + cal.set(Calendar.SECOND, 59); + cal.set(Calendar.MILLISECOND, 999); + d2 = cal.getTime(); + } part1 = DateField.dateToString(d1); part2 = DateField.dateToString(d2); } @@ -413,10 +474,28 @@ public class QueryParser { * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow */ - protected Query getBooleanQuery(Vector clauses) throws ParseException + protected Query getBooleanQuery(Vector clauses) throws ParseException { + return getBooleanQuery(clauses, false); + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains {@link BooleanClause} instances + * to join. + * @param disableCoord true if coord scoring should be disabled. + * + * @return Resulting {@link Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(Vector clauses, boolean disableCoord) + throws ParseException { - BooleanQuery query = new BooleanQuery(); - query. + BooleanQuery query = new BooleanQuery(disableCoord); for (int i = 0; i < clauses.size(); i++) { query.add((BooleanClause)clauses.elementAt(i)); } @@ -446,8 +525,8 @@ public class QueryParser { */ protected Query getWildcardQuery(String field, String termStr) throws ParseException { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new WildcardQuery(t); @@ -455,7 +534,7 @@ public class QueryParser { /** * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses an input term + * {@link #getWildcardQuery}). Called when parser parses an input term * token that uses prefix notation; that is, contains a single '*' wildcard * character as its last character. Since this is a special case * of generic wildcard term, and such a query can be optimized easily, @@ -478,16 +557,17 @@ public class QueryParser { */ protected Query getPrefixQuery(String field, String termStr) throws ParseException { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new PrefixQuery(t); } - /** + + /** * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses + * {@link #getWildcardQuery}). Called when parser parses * an input term token that has the fuzzy suffix (~) appended. * * @param field Name of the field query will use. @@ -496,26 +576,13 @@ public class QueryParser { * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getFuzzyQuery(String field, String termStr) throws ParseException { - return getFuzzyQuery(field, termStr, fuzzyMinSim); - } - - /** - * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~floatNumber) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * @param minSimilarity the minimum similarity required for a fuzzy match - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } Term t = new Term(field, termStr); - return new FuzzyQuery(t, minSimilarity); + return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); } /** @@ -536,7 +603,7 @@ public class QueryParser { /** * Returns a String where those characters that QueryParser - * expects to be escaped are escaped, i.e. preceded by a \. + * expects to be escaped are escaped by a preceding \. */ public static String escape(String s) { StringBuffer sb = new StringBuffer(); @@ -553,7 +620,16 @@ public class QueryParser { return sb.toString(); } + /** + * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}. + * Usage:
+ * java org.apache.lucene.queryParser.QueryParser <input> + */ public static void main(String[] args) throws Exception { + if (args.length == 0) { + System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); + System.exit(0); + } QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); Query q = qp.parse(args[0]); @@ -580,17 +656,18 @@ PARSER_END(QueryParser) } SKIP : { - <<_WHITESPACE>> + < <_WHITESPACE>> } // OG: to support prefix queries: -// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137 +// http://issues.apache.org/bugzilla/show_bug.cgi?id=12137 // Change from: +// // | // (<_TERM_CHAR> | ( [ "*", "?" ] ))* > // To: // -// | | ( [ "*", "?" ] ))* > +// (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > TOKEN : { @@ -606,8 +683,7 @@ PARSER_END(QueryParser) | (<_TERM_CHAR>)* > | )+ ( "." (<_NUM_CHAR>)+ )? )? > | (<_TERM_CHAR>)* "*" > -| | ( [ "*", "?" ] )) - (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > | : RangeIn | : RangeEx } @@ -746,12 +822,9 @@ Query Term(String field) : { if(fms < 0.0f || fms > 1.0f){ throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); } - if(fms == fuzzyMinSim) - q = getFuzzyQuery(field, termImage); - else - q = getFuzzyQuery(field, termImage, fms); + q = getFuzzyQuery(field, termImage,fms); } else { - q = getFieldQuery(field, analyzer, termImage); + q = getFieldQuery(field, termImage); } } | ( ( goop1=|goop1= ) @@ -769,7 +842,7 @@ Query Term(String field) : { } else { goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); + q = getRangeQuery(field, goop1.image, goop2.image, true); } | ( ( goop1=|goop1= ) [ ] ( goop2=|goop2= ) @@ -787,7 +860,7 @@ Query Term(String field) : { goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); + q = getRangeQuery(field, goop1.image, goop2.image, false); } | term= [ fuzzySlop= ] @@ -801,7 +874,7 @@ Query Term(String field) : { } catch (Exception ignored) { } } - q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s); + q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); } ) { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java b/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java index c7f66313ed..5421c13889 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java @@ -1,5 +1,14 @@ /* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */ package org.alfresco.repo.search.impl.lucene; +import java.util.Vector; +import java.io.*; +import java.text.*; +import java.util.*; +import org.apache.lucene.index.Term; +import org.apache.lucene.analysis.*; +import org.apache.lucene.document.*; +import org.apache.lucene.search.*; +import org.apache.lucene.util.Parameter; public class QueryParserTokenManager implements QueryParserConstants { @@ -937,12 +946,10 @@ protected CharStream input_stream; private final int[] jjrounds = new int[34]; private final int[] jjstateSet = new int[68]; protected char curChar; -public QueryParserTokenManager(CharStream stream) -{ +public QueryParserTokenManager(CharStream stream){ input_stream = stream; } -public QueryParserTokenManager(CharStream stream, int lexState) -{ +public QueryParserTokenManager(CharStream stream, int lexState){ this(stream); SwitchTo(lexState); } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/Token.java b/source/java/org/alfresco/repo/search/impl/lucene/Token.java index 57cefb2b54..ca7dd23c21 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/Token.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/Token.java @@ -1,97 +1,80 @@ -/* - * Copyright (C) 2005 Alfresco, Inc. - * - * Licensed under the Mozilla Public License version 1.1 - * with a permitted attribution clause. You may obtain a - * copy of the License at - * - * http://www.alfresco.org/legal/license.txt - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific - * language governing permissions and limitations under the - * License. - */ -/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */ -package org.alfresco.repo.search.impl.lucene; - -/** - * Describes the input token stream. - */ - -public class Token { - - /** - * An integer that describes the kind of this token. This numbering - * system is determined by JavaCCParser, and a table of these numbers is - * stored in the file ...Constants.java. - */ - public int kind; - - /** - * beginLine and beginColumn describe the position of the first character - * of this token; endLine and endColumn describe the position of the - * last character of this token. - */ - public int beginLine, beginColumn, endLine, endColumn; - - /** - * The string image of the token. - */ - public String image; - - /** - * A reference to the next regular (non-special) token from the input - * stream. If this is the last token from the input stream, or if the - * token manager has not read tokens beyond this one, this field is - * set to null. This is true only if this token is also a regular - * token. Otherwise, see below for a description of the contents of - * this field. - */ - public Token next; - - /** - * This field is used to access special tokens that occur prior to this - * token, but after the immediately preceding regular (non-special) token. - * If there are no such special tokens, this field is set to null. - * When there are more than one such special token, this field refers - * to the last of these special tokens, which in turn refers to the next - * previous special token through its specialToken field, and so on - * until the first special token (whose specialToken field is null). - * The next fields of special tokens refer to other special tokens that - * immediately follow it (without an intervening regular token). If there - * is no such token, this field is null. - */ - public Token specialToken; - - /** - * Returns the image. - */ - public String toString() - { - return image; - } - - /** - * Returns a new Token object, by default. However, if you want, you - * can create and return subclass objects based on the value of ofKind. - * Simply add the cases to the switch for all those special cases. - * For example, if you have a subclass of Token called IDToken that - * you want to create if ofKind is ID, simlpy add something like : - * - * case MyParserConstants.ID : return new IDToken(); - * - * to the following switch statement. Then you can cast matchedToken - * variable to the appropriate type and use it in your lexical actions. - */ - public static final Token newToken(int ofKind) - { - switch(ofKind) - { - default : return new Token(); - } - } - -} +/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */package org.alfresco.repo.search.impl.lucene; + +/** + * Describes the input token stream. + */ + +public class Token { + + /** + * An integer that describes the kind of this token. This numbering + * system is determined by JavaCCParser, and a table of these numbers is + * stored in the file ...Constants.java. + */ + public int kind; + + /** + * beginLine and beginColumn describe the position of the first character + * of this token; endLine and endColumn describe the position of the + * last character of this token. + */ + public int beginLine, beginColumn, endLine, endColumn; + + /** + * The string image of the token. + */ + public String image; + + /** + * A reference to the next regular (non-special) token from the input + * stream. If this is the last token from the input stream, or if the + * token manager has not read tokens beyond this one, this field is + * set to null. This is true only if this token is also a regular + * token. Otherwise, see below for a description of the contents of + * this field. + */ + public Token next; + + /** + * This field is used to access special tokens that occur prior to this + * token, but after the immediately preceding regular (non-special) token. + * If there are no such special tokens, this field is set to null. + * When there are more than one such special token, this field refers + * to the last of these special tokens, which in turn refers to the next + * previous special token through its specialToken field, and so on + * until the first special token (whose specialToken field is null). + * The next fields of special tokens refer to other special tokens that + * immediately follow it (without an intervening regular token). If there + * is no such token, this field is null. + */ + public Token specialToken; + + /** + * Returns the image. + */ + public String toString() + { + return image; + } + + /** + * Returns a new Token object, by default. However, if you want, you + * can create and return subclass objects based on the value of ofKind. + * Simply add the cases to the switch for all those special cases. + * For example, if you have a subclass of Token called IDToken that + * you want to create if ofKind is ID, simlpy add something like : + * + * case MyParserConstants.ID : return new IDToken(); + * + * to the following switch statement. Then you can cast matchedToken + * variable to the appropriate type and use it in your lexical actions. + */ + public static final Token newToken(int ofKind) + { + switch(ofKind) + { + default : return new Token(); + } + } + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/TokenMgrError.java b/source/java/org/alfresco/repo/search/impl/lucene/TokenMgrError.java index 452f2183eb..57c9c65fec 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/TokenMgrError.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/TokenMgrError.java @@ -1,133 +1,133 @@ -/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */ -package org.alfresco.repo.search.impl.lucene; - -public class TokenMgrError extends Error -{ - /* - * Ordinals for various reasons why an Error of this type can be thrown. - */ - - /** - * Lexical error occured. - */ - static final int LEXICAL_ERROR = 0; - - /** - * An attempt wass made to create a second instance of a static token manager. - */ - static final int STATIC_LEXER_ERROR = 1; - - /** - * Tried to change to an invalid lexical state. - */ - static final int INVALID_LEXICAL_STATE = 2; - - /** - * Detected (and bailed out of) an infinite loop in the token manager. - */ - static final int LOOP_DETECTED = 3; - - /** - * Indicates the reason why the exception is thrown. It will have - * one of the above 4 values. - */ - int errorCode; - - /** - * Replaces unprintable characters by their espaced (or unicode escaped) - * equivalents in the given string - */ - protected static final String addEscapes(String str) { - StringBuilder retval = new StringBuilder(str.length() + 8); - char ch; - for (int i = 0; i < str.length(); i++) { - switch (str.charAt(i)) - { - case 0 : - continue; - case '\b': - retval.append("\\b"); - continue; - case '\t': - retval.append("\\t"); - continue; - case '\n': - retval.append("\\n"); - continue; - case '\f': - retval.append("\\f"); - continue; - case '\r': - retval.append("\\r"); - continue; - case '\"': - retval.append("\\\""); - continue; - case '\'': - retval.append("\\\'"); - continue; - case '\\': - retval.append("\\\\"); - continue; - default: - if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { - String s = "0000" + Integer.toString(ch, 16); - retval.append("\\u" + s.substring(s.length() - 4, s.length())); - } else { - retval.append(ch); - } - continue; - } - } - return retval.toString(); - } - - /** - * Returns a detailed message for the Error when it is thrown by the - * token manager to indicate a lexical error. - * Parameters : - * EOFSeen : indicates if EOF caused the lexicl error - * curLexState : lexical state in which this error occured - * errorLine : line number when the error occured - * errorColumn : column number when the error occured - * errorAfter : prefix that was seen before this error occured - * curchar : the offending character - * Note: You can customize the lexical error message by modifying this method. - */ - protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { - return("Lexical error at line " + - errorLine + ", column " + - errorColumn + ". Encountered: " + - (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + - "after : \"" + addEscapes(errorAfter) + "\""); - } - - /** - * You can also modify the body of this method to customize your error messages. - * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not - * of end-users concern, so you can return something like : - * - * "Internal Error : Please file a bug report .... " - * - * from this method for such cases in the release version of your parser. - */ - public String getMessage() { - return super.getMessage(); - } - - /* - * Constructors of various flavors follow. - */ - - public TokenMgrError() { - } - - public TokenMgrError(String message, int reason) { - super(message); - errorCode = reason; - } - - public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { - this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); - } -} +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */ +package org.alfresco.repo.search.impl.lucene; + +public class TokenMgrError extends Error +{ + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ + + /** + * Lexical error occured. + */ + static final int LEXICAL_ERROR = 0; + + /** + * An attempt wass made to create a second instance of a static token manager. + */ + static final int STATIC_LEXER_ERROR = 1; + + /** + * Tried to change to an invalid lexical state. + */ + static final int INVALID_LEXICAL_STATE = 2; + + /** + * Detected (and bailed out of) an infinite loop in the token manager. + */ + static final int LOOP_DETECTED = 3; + + /** + * Indicates the reason why the exception is thrown. It will have + * one of the above 4 values. + */ + int errorCode; + + /** + * Replaces unprintable characters by their espaced (or unicode escaped) + * equivalents in the given string + */ + protected static final String addEscapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + + /** + * Returns a detailed message for the Error when it is thrown by the + * token manager to indicate a lexical error. + * Parameters : + * EOFSeen : indicates if EOF caused the lexicl error + * curLexState : lexical state in which this error occured + * errorLine : line number when the error occured + * errorColumn : column number when the error occured + * errorAfter : prefix that was seen before this error occured + * curchar : the offending character + * Note: You can customize the lexical error message by modifying this method. + */ + protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { + return("Lexical error at line " + + errorLine + ", column " + + errorColumn + ". Encountered: " + + (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + + "after : \"" + addEscapes(errorAfter) + "\""); + } + + /** + * You can also modify the body of this method to customize your error messages. + * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + * of end-users concern, so you can return something like : + * + * "Internal Error : Please file a bug report .... " + * + * from this method for such cases in the release version of your parser. + */ + public String getMessage() { + return super.getMessage(); + } + + /* + * Constructors of various flavors follow. + */ + + public TokenMgrError() { + } + + public TokenMgrError(String message, int reason) { + super(message); + errorCode = reason; + } + + public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { + this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); + } +}