diff --git a/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java b/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java index 0e11c043db..b01b9ea8a9 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/CharStream.java @@ -1,110 +1,96 @@ -/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */ -package org.alfresco.repo.search.impl.lucene; - -/** - * This interface describes a character stream that maintains line and - * column number positions of the characters. It also has the capability - * to backup the stream to some extent. An implementation of this - * interface is used in the TokenManager implementation generated by - * JavaCCParser. - * - * All the methods except backup can be implemented in any fashion. backup - * needs to be implemented correctly for the correct operation of the lexer. - * Rest of the methods are all used to get information like line number, - * column number and the String that constitutes a token and are not used - * by the lexer. Hence their implementation won't affect the generated lexer's - * operation. - */ - -public interface CharStream { - - /** - * Returns the next character from the selected input. The method - * of selecting the input is the responsibility of the class - * implementing this interface. Can throw any java.io.IOException. - */ - char readChar() throws java.io.IOException; - - /** - * Returns the column position of the character last read. - * @deprecated - * @see #getEndColumn - */ - int getColumn(); - - /** - * Returns the line number of the character last read. - * @deprecated - * @see #getEndLine - */ - int getLine(); - - /** - * Returns the column number of the last character for current token (being - * matched after the last call to BeginTOken). - */ - int getEndColumn(); - - /** - * Returns the line number of the last character for current token (being - * matched after the last call to BeginTOken). - */ - int getEndLine(); - - /** - * Returns the column number of the first character for current token (being - * matched after the last call to BeginTOken). - */ - int getBeginColumn(); - - /** - * Returns the line number of the first character for current token (being - * matched after the last call to BeginTOken). - */ - int getBeginLine(); - - /** - * Backs up the input stream by amount steps. Lexer calls this method if it - * had already read some characters, but could not use them to match a - * (longer) token. So, they will be used again as the prefix of the next - * token and it is the implemetation's responsibility to do this right. - */ - void backup(int amount); - - /** - * Returns the next character that marks the beginning of the next token. - * All characters must remain in the buffer between two successive calls - * to this method to implement backup correctly. - */ - char BeginToken() throws java.io.IOException; - - /** - * Returns a string made up of characters from the marked token beginning - * to the current buffer position. Implementations have the choice of returning - * anything that they want to. For example, for efficiency, one might decide - * to just return null, which is a valid implementation. - */ - String GetImage(); - - /** - * Returns an array of characters that make up the suffix of length 'len' for - * the currently matched token. This is used to build up the matched string - * for use in actions in the case of MORE. A simple and inefficient - * implementation of this is as follows : - * - * { - * String t = GetImage(); - * return t.substring(t.length() - len, t.length()).toCharArray(); - * } - */ - char[] GetSuffix(int len); - - /** - * The lexer calls this function to indicate that it is done with the stream - * and hence implementations can free any resources held by this class. - * Again, the body of this function can be just empty and it will not - * affect the lexer's operation. - */ - void Done(); - -} +/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */ +package org.alfresco.repo.search.impl.lucene; + +/** + * This interface describes a character stream that maintains line and + * column number positions of the characters. It also has the capability + * to backup the stream to some extent. An implementation of this + * interface is used in the TokenManager implementation generated by + * JavaCCParser. + * + * All the methods except backup can be implemented in any fashion. backup + * needs to be implemented correctly for the correct operation of the lexer. + * Rest of the methods are all used to get information like line number, + * column number and the String that constitutes a token and are not used + * by the lexer. Hence their implementation won't affect the generated lexer's + * operation. + */ + +public interface CharStream { + + /** + * Returns the next character from the selected input. The method + * of selecting the input is the responsibility of the class + * implementing this interface. Can throw any java.io.IOException. + */ + char readChar() throws java.io.IOException; + + /** + * Returns the column number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndColumn(); + + /** + * Returns the line number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndLine(); + + /** + * Returns the column number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginColumn(); + + /** + * Returns the line number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginLine(); + + /** + * Backs up the input stream by amount steps. Lexer calls this method if it + * had already read some characters, but could not use them to match a + * (longer) token. So, they will be used again as the prefix of the next + * token and it is the implemetation's responsibility to do this right. + */ + void backup(int amount); + + /** + * Returns the next character that marks the beginning of the next token. + * All characters must remain in the buffer between two successive calls + * to this method to implement backup correctly. + */ + char BeginToken() throws java.io.IOException; + + /** + * Returns a string made up of characters from the marked token beginning + * to the current buffer position. Implementations have the choice of returning + * anything that they want to. For example, for efficiency, one might decide + * to just return null, which is a valid implementation. + */ + String GetImage(); + + /** + * Returns an array of characters that make up the suffix of length 'len' for + * the currently matched token. This is used to build up the matched string + * for use in actions in the case of MORE. A simple and inefficient + * implementation of this is as follows : + * + * { + * String t = GetImage(); + * return t.substring(t.length() - len, t.length()).toCharArray(); + * } + */ + char[] GetSuffix(int len); + + /** + * The lexer calls this function to indicate that it is done with the stream + * and hence implementations can free any resources held by this class. + * Again, the body of this function can be just empty and it will not + * affect the lexer's operation. + */ + void Done(); + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java b/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java index 04d659a096..762b46b1ea 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java @@ -17,8 +17,7 @@ package org.alfresco.repo.search.impl.lucene; * limitations under the License. */ -import java.io.IOException; -import java.io.Reader; +import java.io.*; /** An efficient implementation of JavaCC's CharStream interface.
Note that * this does not do line-number counting, but instead keeps track of the diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java index 1901290807..f48d6b3782 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java @@ -63,7 +63,7 @@ public class LuceneQueryParser extends QueryParser * if the parsing fails */ static public Query parse(String query, String field, Analyzer analyzer, - NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, int defaultOperator) + NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, Operator defaultOperator) throws ParseException { if (s_logger.isDebugEnabled()) @@ -71,7 +71,7 @@ public class LuceneQueryParser extends QueryParser s_logger.debug("Using Alfresco Lucene Query Parser for query: " + query); } LuceneQueryParser parser = new LuceneQueryParser(field, analyzer); - parser.setOperator(defaultOperator); + parser.setDefaultOperator(defaultOperator); parser.setNamespacePrefixResolver(namespacePrefixResolver); parser.setDictionaryService(dictionaryService); return parser.parse(query); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java index cf3fa13ae6..db1aba72ad 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java @@ -32,6 +32,7 @@ import org.alfresco.repo.search.Indexer; import org.alfresco.repo.search.QueryRegisterComponent; import org.alfresco.repo.search.SearcherException; import org.alfresco.repo.search.impl.NodeSearcher; +import org.alfresco.repo.search.impl.lucene.QueryParser.Operator; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.repository.InvalidNodeRefException; import org.alfresco.service.cmr.repository.NodeRef; @@ -203,14 +204,14 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2 try { - int defaultOperator; + Operator defaultOperator; if (searchParameters.getDefaultOperator() == SearchParameters.AND) { - defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_AND; + defaultOperator = LuceneQueryParser.AND_OPERATOR; } else { - defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_OR; + defaultOperator = LuceneQueryParser.OR_OPERATOR; } Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser( diff --git a/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java new file mode 100644 index 0000000000..51430152c4 --- /dev/null +++ b/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java @@ -0,0 +1,268 @@ +package org.alfresco.repo.search.impl.lucene; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; + +import java.util.Vector; + +/** + * A QueryParser which constructs queries to search multiple fields. + * + * @author Kelvin Tan, Daniel Naber + * @version $Revision: 406088 $ + */ +public class MultiFieldQueryParser extends QueryParser +{ + + private String[] fields; + + /** + * Creates a MultiFieldQueryParser. + * + *
It will, when parse(String query)
+ * is called, construct a query like this (assuming the query consists of
+ * two terms and you specify the two fields title
and body
):
+ * (title:term1 body:term1) (title:term2 body:term2)
+ *
+ *
+ * When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+ * + *
+ * +(title:term1 body:term1) +(title:term2 body:term2)
+ *
+ *
+ * In other words, all the query's terms must appear, but it doesn't matter in + * what fields they appear.
+ */ + public MultiFieldQueryParser(String[] fields, Analyzer analyzer) { + super(null, analyzer); + this.fields = fields; + } + + protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + Query q = super.getFieldQuery(fields[i], queryText); + if (q != null) { + if (q instanceof PhraseQuery) { + ((PhraseQuery) q).setSlop(slop); + } + if (q instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) q).setSlop(slop); + } + clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); + } + } + if (clauses.size() == 0) // happens for stopwords + return null; + return getBooleanQuery(clauses, true); + } + return super.getFieldQuery(field, queryText); + } + + + protected Query getFieldQuery(String field, String queryText) throws ParseException { + return getFieldQuery(field, queryText, 0); + } + + + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException + { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr, minSimilarity), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getFuzzyQuery(field, termStr, minSimilarity); + } + + protected Query getPrefixQuery(String field, String termStr) throws ParseException + { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getPrefixQuery(field, termStr); + } + + protected Query getWildcardQuery(String field, String termStr) throws ParseException { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getWildcardQuery(fields[i], termStr), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getWildcardQuery(field, termStr); + } + + + protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException { + if (field == null) { + Vector clauses = new Vector(); + for (int i = 0; i < fields.length; i++) { + clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1, part2, inclusive), + BooleanClause.Occur.SHOULD)); + } + return getBooleanQuery(clauses, true); + } + return super.getRangeQuery(field, part1, part2, inclusive); + } + + + + + /** + * Parses a query which searches on the fields specified. + *+ * If x fields are specified, this effectively constructs: + *
+ *
+ * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+ *
+ *
+ * @param queries Queries strings to parse
+ * @param fields Fields to search on
+ * @param analyzer Analyzer to use
+ * @throws ParseException if query parsing fails
+ * @throws IllegalArgumentException if the length of the queries array differs
+ * from the length of the fields array
+ */
+ public static Query parse(String[] queries, String[] fields,
+ Analyzer analyzer) throws ParseException
+ {
+ if (queries.length != fields.length)
+ throw new IllegalArgumentException("queries.length != fields.length");
+ BooleanQuery bQuery = new BooleanQuery();
+ for (int i = 0; i < fields.length; i++)
+ {
+ QueryParser qp = new QueryParser(fields[i], analyzer);
+ Query q = qp.parse(queries[i]);
+ bQuery.add(q, BooleanClause.Occur.SHOULD);
+ }
+ return bQuery;
+ }
+
+
+ /**
+ * Parses a query, searching on the fields specified.
+ * Use this if you need to specify certain fields as required,
+ * and others as prohibited.
+ *
+ * Usage:
+ *
+ * String[] fields = {"filename", "contents", "description"};
+ * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ * BooleanClause.Occur.MUST,
+ * BooleanClause.Occur.MUST_NOT};
+ * MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+ *
+ *
+ *+ * The code above would construct a query: + *
+ *
+ * (filename:query) +(contents:query) -(description:query)
+ *
+ *
+ *
+ * @param query Query string to parse
+ * @param fields Fields to search on
+ * @param flags Flags describing the fields
+ * @param analyzer Analyzer to use
+ * @throws ParseException if query parsing fails
+ * @throws IllegalArgumentException if the length of the fields array differs
+ * from the length of the flags array
+ */
+ public static Query parse(String query, String[] fields,
+ BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
+ if (fields.length != flags.length)
+ throw new IllegalArgumentException("fields.length != flags.length");
+ BooleanQuery bQuery = new BooleanQuery();
+ for (int i = 0; i < fields.length; i++) {
+ QueryParser qp = new QueryParser(fields[i], analyzer);
+ Query q = qp.parse(query);
+ bQuery.add(q, flags[i]);
+ }
+ return bQuery;
+ }
+
+
+ /**
+ * Parses a query, searching on the fields specified.
+ * Use this if you need to specify certain fields as required,
+ * and others as prohibited.
+ *
+ * Usage:
+ *
+ * String[] query = {"query1", "query2", "query3"};
+ * String[] fields = {"filename", "contents", "description"};
+ * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ * BooleanClause.Occur.MUST,
+ * BooleanClause.Occur.MUST_NOT};
+ * MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+ *
+ *
+ *+ * The code above would construct a query: + *
+ *
+ * (filename:query1) +(contents:query2) -(description:query3)
+ *
+ *
+ *
+ * @param queries Queries string to parse
+ * @param fields Fields to search on
+ * @param flags Flags describing the fields
+ * @param analyzer Analyzer to use
+ * @throws ParseException if query parsing fails
+ * @throws IllegalArgumentException if the length of the queries, fields,
+ * and flags array differ
+ */
+ public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags,
+ Analyzer analyzer) throws ParseException
+ {
+ if (!(queries.length == fields.length && queries.length == flags.length))
+ throw new IllegalArgumentException("queries, fields, and flags array have have different length");
+ BooleanQuery bQuery = new BooleanQuery();
+ for (int i = 0; i < fields.length; i++)
+ {
+ QueryParser qp = new QueryParser(fields[i], analyzer);
+ Query q = qp.parse(queries[i]);
+ bQuery.add(q, flags[i]);
+ }
+ return bQuery;
+ }
+
+}
diff --git a/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java b/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java
index c19638b39f..61ab9f693c 100644
--- a/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/ParseException.java
@@ -1,192 +1,192 @@
-/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
-package org.alfresco.repo.search.impl.lucene;
-
-/**
- * This exception is thrown when parse errors are encountered.
- * You can explicitly create objects of this exception type by
- * calling the method generateParseException in the generated
- * parser.
- *
- * You can modify this class to customize your error reporting
- * mechanisms so long as you retain the public fields.
- */
-public class ParseException extends Exception {
-
- /**
- * This constructor is used by the method "generateParseException"
- * in the generated parser. Calling this constructor generates
- * a new object of this type with the fields "currentToken",
- * "expectedTokenSequences", and "tokenImage" set. The boolean
- * flag "specialConstructor" is also set to true to indicate that
- * this constructor was used to create this object.
- * This constructor calls its super class with the empty string
- * to force the "toString" method of parent class "Throwable" to
- * print the error message in the form:
- * ParseException: * Examples of appropriately formatted queries can be found in the test cases. + * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax + * documentation. *
* + *In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. date:[6/1/2005 TO 6/4/2005] + * produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note + * that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This + * feature also assumes that your index uses the {@link DateField} class to store dates. + * If you use a different format (e.g. {@link DateTools}) and you still want QueryParser + * to turn local dates in range queries into valid queries you need to create your own + * query parser that inherits QueryParser and overwrites + * {@link #getRangeQuery(String, String, String, boolean)}.
+ * + *Note that QueryParser is not thread-safe.
+ * * @author Brian Goetz * @author Peter Halacsy * @author Tatu Saloranta @@ -71,36 +71,37 @@ public class QueryParser implements QueryParserConstants { private static final int MOD_NOT = 10; private static final int MOD_REQ = 11; - public static final int DEFAULT_OPERATOR_OR = 0; - public static final int DEFAULT_OPERATOR_AND = 1; + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /** Alternative form of QueryParser.Operator.AND */ + public static final Operator AND_OPERATOR = Operator.AND; + /** Alternative form of QueryParser.Operator.OR */ + public static final Operator OR_OPERATOR = Operator.OR; /** The actual operator that parser uses to combine query terms */ - private int operator = DEFAULT_OPERATOR_OR; + private Operator operator = OR_OPERATOR; - /** - * Whether terms of wildcard and prefix queries are to be automatically - * lower-cased or not. Default istrue
.
- */
- boolean lowercaseWildcardTerms = true;
+ boolean lowercaseExpandedTerms = true;
Analyzer analyzer;
String field;
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
+ int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
Locale locale = Locale.getDefault();
- /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
- * @param query the query string to be parsed.
- * @param field the default field for query terms.
- * @param analyzer used to find terms in the query text.
- * @throws ParseException if the parsing fails
+ /** The default operator for parsing queries.
+ * Use {@link QueryParser#setDefaultOperator} to change it.
*/
- static public Query parse(String query, String field, Analyzer analyzer)
- throws ParseException {
- QueryParser parser = new QueryParser(field, analyzer);
- return parser.parse(query);
+ static public final class Operator extends Parameter {
+ private Operator(String name) {
+ super(name);
+ }
+ static public final Operator OR = new Operator("OR");
+ static public final Operator AND = new Operator("AND");
}
+
/** Constructs a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
@@ -111,8 +112,7 @@ public class QueryParser implements QueryParserConstants {
field = f;
}
- /** Parses a query string, returning a
- * Query.
+ /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
* @throws ParseException if the parsing fails
*/
@@ -144,18 +144,36 @@ public class QueryParser implements QueryParserConstants {
}
/**
- * Get the default minimal similarity for fuzzy queries.
+ * Get the minimal similarity for fuzzy queries.
*/
public float getFuzzyMinSim() {
return fuzzyMinSim;
}
+
/**
- *Set the default minimum similarity for fuzzy queries.
+ * Set the minimum similarity for fuzzy queries.
+ * Default is 0.5f.
*/
public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim;
}
+ /**
+ * Get the prefix length for fuzzy queries.
+ * @return Returns the fuzzyPrefixLength.
+ */
+ public int getFuzzyPrefixLength() {
+ return fuzzyPrefixLength;
+ }
+
+ /**
+ * Set the prefix length for fuzzy queries. Default is 0.
+ * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
+ */
+ public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
+ this.fuzzyPrefixLength = fuzzyPrefixLength;
+ }
+
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero.
@@ -171,32 +189,43 @@ public class QueryParser implements QueryParserConstants {
return phraseSlop;
}
- /**
- * Sets the boolean operator of the QueryParser.
- * In classic mode (DEFAULT_OPERATOR_OR
) terms without any modifiers
- * are considered optional: for example capital of Hungary
is equal to
- * capital OR of OR Hungary
.DEFAULT_OPERATOR_AND
terms are considered to be in conjuction: the
- * above mentioned query is parsed as capital AND of AND Hungary
- */
- public void setOperator(int operator) {
- this.operator = operator;
- }
/**
- * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
- * or DEFAULT_OPERATOR_OR.
+ * Sets the boolean operator of the QueryParser.
+ * In default mode (OR_OPERATOR
) terms without any modifiers
+ * are considered optional: for example capital of Hungary
is equal to
+ * capital OR of OR Hungary
.AND_OPERATOR
mode terms are considered to be in conjuction: the
+ * above mentioned query is parsed as capital AND of AND Hungary
*/
- public int getOperator() {
+ public void setDefaultOperator(Operator op) {
+ this.operator = op;
+ }
+
+
+ /**
+ * Gets implicit operator setting, which will be either AND_OPERATOR
+ * or OR_OPERATOR.
+ */
+ public Operator getDefaultOperator() {
return operator;
}
- public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
- this.lowercaseWildcardTerms = lowercaseWildcardTerms;
+
+ /**
+ * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
+ * lower-cased or not. Default is true
.
+ */
+ public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
+ this.lowercaseExpandedTerms = lowercaseExpandedTerms;
}
- public boolean getLowercaseWildcardTerms() {
- return lowercaseWildcardTerms;
+
+ /**
+ * @see #setLowercaseExpandedTerms(boolean)
+ */
+ public boolean getLowercaseExpandedTerms() {
+ return lowercaseExpandedTerms;
}
/**
@@ -213,7 +242,7 @@ public class QueryParser implements QueryParserConstants {
return locale;
}
- protected void addClause(Vector clauses, int conj, int mods, Query q) {
+ protected void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
@@ -221,17 +250,17 @@ public class QueryParser implements QueryParserConstants {
if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.isProhibited())
- c.setOccur(Occur.MUST);
+ c.setOccur(BooleanClause.Occur.MUST);
}
- if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
+ if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.isProhibited())
- c.setOccur(Occur.SHOULD);
+ c.setOccur(BooleanClause.Occur.SHOULD);
}
// We might have been passed a null query; the term might have been
@@ -239,7 +268,7 @@ public class QueryParser implements QueryParserConstants {
if (q == null)
return;
- if (operator == DEFAULT_OPERATOR_OR) {
+ if (operator == OR_OPERATOR) {
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both.
prohibited = (mods == MOD_NOT);
@@ -253,31 +282,16 @@ public class QueryParser implements QueryParserConstants {
prohibited = (mods == MOD_NOT);
required = (!prohibited && conj != CONJ_OR);
}
- Occur occur = Occur.SHOULD;
- if(prohibited)
- {
- occur = Occur.MUST_NOT;
- }
- if(required)
- {
- occur = Occur.MUST;
- }
- clauses.addElement(new BooleanClause(q, occur));
+ if (required && !prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
+ else if (!required && !prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+ else if (!required && prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
+ else
+ throw new RuntimeException("Clause cannot be both required and prohibited");
}
- /**
- * Note that parameter analyzer is ignored. Calls inside the parser always
- * use class member analyser. This method will be deprecated and substituted
- * by {@link #getFieldQuery(String, String)} in future versions of Lucene.
- * Currently overwriting either of these methods works.
- *
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFieldQuery(String field,
- Analyzer analyzer,
- String queryText) throws ParseException {
- return getFieldQuery(field, queryText);
- }
/**
* @exception ParseException throw in overridden method to disallow
@@ -286,10 +300,11 @@ public class QueryParser implements QueryParserConstants {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
- TokenStream source = analyzer.tokenStream(field,
- new StringReader(queryText));
+ TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
Vector v = new Vector();
org.apache.lucene.analysis.Token t;
+ int positionCount = 0;
+ boolean severalTokensAtSamePosition = false;
while (true) {
try {
@@ -300,7 +315,11 @@ public class QueryParser implements QueryParserConstants {
}
if (t == null)
break;
- v.addElement(t.termText());
+ v.addElement(t);
+ if (t.getPositionIncrement() != 0)
+ positionCount += t.getPositionIncrement();
+ else
+ severalTokensAtSamePosition = true;
}
try {
source.close();
@@ -311,36 +330,52 @@ public class QueryParser implements QueryParserConstants {
if (v.size() == 0)
return null;
- else if (v.size() == 1)
- return new TermQuery(new Term(field, (String) v.elementAt(0)));
- else {
- PhraseQuery q = new PhraseQuery();
- q.setSlop(phraseSlop);
- for (int i=0; i\
.
+ * expects to be escaped are escaped by a preceding \
.
*/
public static String escape(String s) {
- StringBuilder sb = new StringBuilder(s.length());
+ StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
// NOTE: keep this in sync with _ESCAPED_CHAR below!
@@ -551,7 +597,16 @@ public class QueryParser implements QueryParserConstants {
return sb.toString();
}
+ /**
+ * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
+ * Usage:java org.apache.lucene.queryParser.QueryParser <input>
+ */
public static void main(String[] args) throws Exception {
+ if (args.length == 0) {
+ System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
+ System.exit(0);
+ }
QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
@@ -789,12 +844,9 @@ public class QueryParser implements QueryParserConstants {
if(fms < 0.0f || fms > 1.0f){
{if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");}
}
- if(fms == fuzzyMinSim)
- q = getFuzzyQuery(field, termImage);
- else
- q = getFuzzyQuery(field, termImage, fms);
+ q = getFuzzyQuery(field, termImage,fms);
} else {
- q = getFieldQuery(field, analyzer, termImage);
+ q = getFieldQuery(field, termImage);
}
break;
case RANGEIN_START:
@@ -851,7 +903,7 @@ public class QueryParser implements QueryParserConstants {
} else {
goop2.image = discardEscapeChar(goop2.image);
}
- q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
+ q = getRangeQuery(field, goop1.image, goop2.image, true);
break;
case RANGEEX_START:
jj_consume_token(RANGEEX_START);
@@ -908,7 +960,7 @@ public class QueryParser implements QueryParserConstants {
goop2.image = discardEscapeChar(goop2.image);
}
- q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
+ q = getRangeQuery(field, goop1.image, goop2.image, false);
break;
case QUOTED:
term = jj_consume_token(QUOTED);
@@ -937,7 +989,7 @@ public class QueryParser implements QueryParserConstants {
}
catch (Exception ignored) { }
}
- q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s);
+ q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
break;
default:
jj_la1[21] = jj_gen;
@@ -1183,6 +1235,7 @@ public class QueryParser implements QueryParserConstants {
final private void jj_rescan_token() {
jj_rescan = true;
for (int i = 0; i < 1; i++) {
+ try {
JJCalls p = jj_2_rtns[i];
do {
if (p.gen > jj_gen) {
@@ -1193,6 +1246,7 @@ public class QueryParser implements QueryParserConstants {
}
p = p.next;
} while (p != null);
+ } catch(LookaheadSuccess ls) { }
}
jj_rescan = false;
}
diff --git a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj
index b5a9c4350c..6098160919 100644
--- a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj
+++ b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.jj
@@ -32,10 +32,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
+import org.apache.lucene.util.Parameter;
/**
- * This class is generated by JavaCC. The only method that clients should need
- * to call is parse().
+ * This class is generated by JavaCC. The most important method is
+ * {@link #parse(String)}.
*
* The syntax for query strings is as follows:
* A Query is a series of clauses.
@@ -63,9 +64,21 @@ import org.apache.lucene.search.*;
*
* * Examples of appropriately formatted queries can be found in the test cases. + * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax + * documentation. *
* + *In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. date:[6/1/2005 TO 6/4/2005] + * produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note + * that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This + * feature also assumes that your index uses the {@link DateField} class to store dates. + * If you use a different format (e.g. {@link DateTools}) and you still want QueryParser + * to turn local dates in range queries into valid queries you need to create your own + * query parser that inherits QueryParser and overwrites + * {@link #getRangeQuery(String, String, String, boolean)}.
+ * + *Note that QueryParser is not thread-safe.
+ * * @author Brian Goetz * @author Peter Halacsy * @author Tatu Saloranta @@ -81,36 +94,37 @@ public class QueryParser { private static final int MOD_NOT = 10; private static final int MOD_REQ = 11; - public static final int DEFAULT_OPERATOR_OR = 0; - public static final int DEFAULT_OPERATOR_AND = 1; + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /** Alternative form of QueryParser.Operator.AND */ + public static final Operator AND_OPERATOR = Operator.AND; + /** Alternative form of QueryParser.Operator.OR */ + public static final Operator OR_OPERATOR = Operator.OR; /** The actual operator that parser uses to combine query terms */ - private int operator = DEFAULT_OPERATOR_OR; + private Operator operator = OR_OPERATOR; - /** - * Whether terms of wildcard and prefix queries are to be automatically - * lower-cased or not. Default istrue
.
- */
- boolean lowercaseWildcardTerms = true;
+ boolean lowercaseExpandedTerms = true;
Analyzer analyzer;
String field;
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
+ int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
Locale locale = Locale.getDefault();
- /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
- * @param query the query string to be parsed.
- * @param field the default field for query terms.
- * @param analyzer used to find terms in the query text.
- * @throws ParseException if the parsing fails
+ /** The default operator for parsing queries.
+ * Use {@link QueryParser#setDefaultOperator} to change it.
*/
- static public Query parse(String query, String field, Analyzer analyzer)
- throws ParseException {
- QueryParser parser = new QueryParser(field, analyzer);
- return parser.parse(query);
+ static public final class Operator extends Parameter {
+ private Operator(String name) {
+ super(name);
+ }
+ static public final Operator OR = new Operator("OR");
+ static public final Operator AND = new Operator("AND");
}
+
/** Constructs a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
@@ -121,8 +135,7 @@ public class QueryParser {
field = f;
}
- /** Parses a query string, returning a
- * Query.
+ /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
* @throws ParseException if the parsing fails
*/
@@ -154,17 +167,35 @@ public class QueryParser {
}
/**
- * Get the default minimal similarity for fuzzy queries.
+ * Get the minimal similarity for fuzzy queries.
*/
public float getFuzzyMinSim() {
return fuzzyMinSim;
}
+
/**
- *Set the default minimum similarity for fuzzy queries.
+ * Set the minimum similarity for fuzzy queries.
+ * Default is 0.5f.
*/
public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim;
}
+
+ /**
+ * Get the prefix length for fuzzy queries.
+ * @return Returns the fuzzyPrefixLength.
+ */
+ public int getFuzzyPrefixLength() {
+ return fuzzyPrefixLength;
+ }
+
+ /**
+ * Set the prefix length for fuzzy queries. Default is 0.
+ * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
+ */
+ public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
+ this.fuzzyPrefixLength = fuzzyPrefixLength;
+ }
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
@@ -181,32 +212,43 @@ public class QueryParser {
return phraseSlop;
}
- /**
- * Sets the boolean operator of the QueryParser.
- * In classic mode (DEFAULT_OPERATOR_OR
) terms without any modifiers
- * are considered optional: for example capital of Hungary
is equal to
- * capital OR of OR Hungary
.DEFAULT_OPERATOR_AND
terms are considered to be in conjuction: the
- * above mentioned query is parsed as capital AND of AND Hungary
- */
- public void setOperator(int operator) {
- this.operator = operator;
- }
/**
- * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
- * or DEFAULT_OPERATOR_OR.
+ * Sets the boolean operator of the QueryParser.
+ * In default mode (OR_OPERATOR
) terms without any modifiers
+ * are considered optional: for example capital of Hungary
is equal to
+ * capital OR of OR Hungary
.AND_OPERATOR
mode terms are considered to be in conjuction: the
+ * above mentioned query is parsed as capital AND of AND Hungary
*/
- public int getOperator() {
+ public void setDefaultOperator(Operator op) {
+ this.operator = op;
+ }
+
+
+ /**
+ * Gets implicit operator setting, which will be either AND_OPERATOR
+ * or OR_OPERATOR.
+ */
+ public Operator getDefaultOperator() {
return operator;
}
- public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
- this.lowercaseWildcardTerms = lowercaseWildcardTerms;
+
+ /**
+ * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
+ * lower-cased or not. Default is true
.
+ */
+ public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
+ this.lowercaseExpandedTerms = lowercaseExpandedTerms;
}
- public boolean getLowercaseWildcardTerms() {
- return lowercaseWildcardTerms;
+
+ /**
+ * @see #setLowercaseExpandedTerms(boolean)
+ */
+ public boolean getLowercaseExpandedTerms() {
+ return lowercaseExpandedTerms;
}
/**
@@ -222,26 +264,26 @@ public class QueryParser {
public Locale getLocale() {
return locale;
}
-
- protected void addClause(Vector clauses, int conj, int mods, Query q) {
+
+ protected void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
- if (!c.prohibited)
- c.required = true;
+ if (!c.isProhibited())
+ c.setOccur(BooleanClause.Occur.MUST);
}
- if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
+ if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
- if (!c.prohibited)
- c.required = false;
+ if (!c.isProhibited())
+ c.setOccur(BooleanClause.Occur.SHOULD);
}
// We might have been passed a null query; the term might have been
@@ -249,7 +291,7 @@ public class QueryParser {
if (q == null)
return;
- if (operator == DEFAULT_OPERATOR_OR) {
+ if (operator == OR_OPERATOR) {
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both.
prohibited = (mods == MOD_NOT);
@@ -263,23 +305,17 @@ public class QueryParser {
prohibited = (mods == MOD_NOT);
required = (!prohibited && conj != CONJ_OR);
}
- clauses.addElement(new BooleanClause(q, required, prohibited));
- }
-
- /**
- * Note that parameter analyzer is ignored. Calls inside the parser always
- * use class member analyser. This method will be deprecated and substituted
- * by {@link #getFieldQuery(String, String)} in future versions of Lucene.
- * Currently overwriting either of these methods works.
- *
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFieldQuery(String field,
- Analyzer analyzer,
- String queryText) throws ParseException {
- return getFieldQuery(field, queryText);
+ if (required && !prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
+ else if (!required && !prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+ else if (!required && prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
+ else
+ throw new RuntimeException("Clause cannot be both required and prohibited");
}
+
/**
* @exception ParseException throw in overridden method to disallow
*/
@@ -287,10 +323,11 @@ public class QueryParser {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
- TokenStream source = analyzer.tokenStream(field,
- new StringReader(queryText));
+ TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
Vector v = new Vector();
org.apache.lucene.analysis.Token t;
+ int positionCount = 0;
+ boolean severalTokensAtSamePosition = false;
while (true) {
try {
@@ -301,7 +338,11 @@ public class QueryParser {
}
if (t == null)
break;
- v.addElement(t.termText());
+ v.addElement(t);
+ if (t.getPositionIncrement() != 0)
+ positionCount += t.getPositionIncrement();
+ else
+ severalTokensAtSamePosition = true;
}
try {
source.close();
@@ -312,36 +353,52 @@ public class QueryParser {
if (v.size() == 0)
return null;
- else if (v.size() == 1)
- return new TermQuery(new Term(field, (String) v.elementAt(0)));
- else {
- PhraseQuery q = new PhraseQuery();
- q.setSlop(phraseSlop);
- for (int i=0; i\
.
+ * expects to be escaped are escaped by a preceding \
.
*/
public static String escape(String s) {
StringBuffer sb = new StringBuffer();
@@ -553,7 +620,16 @@ public class QueryParser {
return sb.toString();
}
+ /**
+ * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
+ * Usage:java org.apache.lucene.queryParser.QueryParser <input>
+ */
public static void main(String[] args) throws Exception {
+ if (args.length == 0) {
+ System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
+ System.exit(0);
+ }
QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
@@ -580,17 +656,18 @@ PARSER_END(QueryParser)
}