Index of d:locale and accented chars + additional TEXT tests

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4691 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2006-12-22 18:13:46 +00:00
parent 037e5be984
commit f260c2b35f
6 changed files with 99 additions and 8 deletions

View File

@@ -15,3 +15,4 @@ d_dictionary.datatype.d_guid.analyzer=org.alfresco.repo.search.impl.lucene.analy
d_dictionary.datatype.d_category.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser d_dictionary.datatype.d_category.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser
d_dictionary.datatype.d_noderef.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser d_dictionary.datatype.d_noderef.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser
d_dictionary.datatype.d_path.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser d_dictionary.datatype.d_path.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser
d_dictionary.datatype.d_locale.analyzer=org.alfresco.repo.search.impl.lucene.analysis.LowerCaseVerbatimAnalyser

View File

@@ -30,6 +30,7 @@ import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition; import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer;
@@ -44,6 +45,8 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
public class LuceneAnalyser extends Analyzer public class LuceneAnalyser extends Analyzer
{ {
private static Logger s_logger = Logger.getLogger(LuceneAnalyser.class);
// Dictinary service to look up analyser classes by data type and locale. // Dictinary service to look up analyser classes by data type and locale.
private DictionaryService dictionaryService; private DictionaryService dictionaryService;
@@ -204,6 +207,10 @@ public class LuceneAnalyser extends Analyzer
{ {
Class<?> clazz = Class.forName(analyserClassName); Class<?> clazz = Class.forName(analyserClassName);
Analyzer analyser = (Analyzer) clazz.newInstance(); Analyzer analyser = (Analyzer) clazz.newInstance();
if(s_logger.isDebugEnabled())
{
s_logger.debug("Loaded "+analyserClassName+" for type "+dataType.getName());
}
return analyser; return analyser;
} }
catch (ClassNotFoundException e) catch (ClassNotFoundException e)

View File

@@ -1538,8 +1538,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
// nothing to index // nothing to index
continue; continue;
} }
// String strValue = ValueConverter.convert(String.class, value);
// TODO: Need to add with the correct language based analyser
if (isContent) if (isContent)
{ {
@@ -1558,6 +1556,14 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
// TODO: Use the node locale in preferanced to the system locale // TODO: Use the node locale in preferanced to the system locale
Locale locale = contentData.getLocale(); Locale locale = contentData.getLocale();
if (locale == null) if (locale == null)
{
Serializable localeProperty = nodeService.getProperty(nodeRef, ContentModel.PROP_LOCALE);
if (localeProperty != null)
{
locale = DefaultTypeConverter.INSTANCE.convert(Locale.class, localeProperty);
}
}
if (locale == null)
{ {
locale = Locale.getDefault(); locale = Locale.getDefault();
} }
@@ -1713,14 +1719,24 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
Field.TermVector.NO)); Field.TermVector.NO));
} }
} }
else if(isText) else if (isText)
{ {
// TODO: Use the node locale in preferanced to the system locale // TODO: Use the node locale in preferanced to the system locale
Locale locale = Locale.getDefault(); Locale locale = null;
Serializable localeProperty = nodeService.getProperty(nodeRef, ContentModel.PROP_LOCALE);
if (localeProperty != null)
{
locale = DefaultTypeConverter.INSTANCE.convert(Locale.class, localeProperty);
}
if (locale == null)
{
locale = Locale.getDefault();
}
StringBuilder builder = new StringBuilder(); StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO));
Field.TermVector.NO));
} }
else else
{ {

View File

@@ -272,6 +272,7 @@ public class LuceneTest2 extends TestCase
testProperties.put(QName.createQName(TEST_NAMESPACE, "category-ista"), new NodeRef(storeRef, "CategoryId")); testProperties.put(QName.createQName(TEST_NAMESPACE, "category-ista"), new NodeRef(storeRef, "CategoryId"));
testProperties.put(QName.createQName(TEST_NAMESPACE, "noderef-ista"), n1); testProperties.put(QName.createQName(TEST_NAMESPACE, "noderef-ista"), n1);
testProperties.put(QName.createQName(TEST_NAMESPACE, "path-ista"), nodeService.getPath(n3)); testProperties.put(QName.createQName(TEST_NAMESPACE, "path-ista"), nodeService.getPath(n3));
testProperties.put(QName.createQName(TEST_NAMESPACE, "locale-ista"), Locale.UK);
testProperties.put(QName.createQName(TEST_NAMESPACE, "null"), null); testProperties.put(QName.createQName(TEST_NAMESPACE, "null"), null);
testProperties.put(QName.createQName(TEST_NAMESPACE, "list"), new ArrayList()); testProperties.put(QName.createQName(TEST_NAMESPACE, "list"), new ArrayList());
MLText mlText = new MLText(); MLText mlText = new MLText();
@@ -333,7 +334,7 @@ public class LuceneTest2 extends TestCase
// InputStream is = // InputStream is =
// this.getClass().getClassLoader().getResourceAsStream("test.doc"); // this.getClass().getClassLoader().getResourceAsStream("test.doc");
// writer.putContent(is); // writer.putContent(is);
writer.putContent("The quick brown fox jumped over the lazy dog"); writer.putContent("The quick brown fox jumped over the lazy dog \u00E0\u00EA\u00EE\u00F0\u00F1\u00F6\u00FB\u00FF");
nodeService.addChild(rootNodeRef, n8, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}eight-0")); nodeService.addChild(rootNodeRef, n8, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}eight-0"));
nodeService.addChild(n1, n8, ASSOC_TYPE_QNAME, QName.createQName("{namespace}eight-1")); nodeService.addChild(n1, n8, ASSOC_TYPE_QNAME, QName.createQName("{namespace}eight-1"));
@@ -2425,6 +2426,45 @@ public class LuceneTest2 extends TestCase
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
results.close(); results.close();
// locale
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":\"en_GB_\"",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":en_GB_",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":en_*",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":*_GB_*",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":*_gb_*",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
results.close();
// Type
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testType.toString() + "\"", null, results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testType.toString() + "\"", null,
null); null);
assertEquals(1, results.length()); assertEquals(1, results.length());
@@ -2492,6 +2532,10 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox cm\\:name:fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo AND TYPE:\"" results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo AND TYPE:\""
+ ContentModel.PROP_CONTENT.toString() + "\"", null, null); + ContentModel.PROP_CONTENT.toString() + "\"", null, null);
assertEquals(0, results.length()); assertEquals(0, results.length());
@@ -2507,6 +2551,17 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
// Accents
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"\u00E0\u00EA\u00EE\u00F0\u00F1\u00F6\u00FB\u00FF\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"aeidnouy\"", null, null);
assertEquals(1, results.length());
results.close();
// FTS test // FTS test
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"fox\"", null, null); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"fox\"", null, null);
@@ -2540,7 +2595,7 @@ public class LuceneTest2 extends TestCase
results.close(); results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"90\"", null, null); + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"110\"", null, null);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();

View File

@@ -258,6 +258,16 @@
<stored>true</stored> <stored>true</stored>
<tokenised>true</tokenised> <tokenised>true</tokenised>
</index> </index>
</property>
<property name="test:locale-ista">
<type>d:locale</type>
<mandatory>false</mandatory>
<multiple>false</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property> </property>
</properties> </properties>
<mandatory-aspects> <mandatory-aspects>

View File

@@ -20,6 +20,7 @@ import java.io.Reader;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ISOLatin1AccentFilter;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
@@ -58,6 +59,7 @@ public class AlfrescoStandardAnalyser extends Analyzer
result = new AlfrescoStandardFilter(result); result = new AlfrescoStandardFilter(result);
result = new LowerCaseFilter(result); result = new LowerCaseFilter(result);
result = new StopFilter(result, stopSet); result = new StopFilter(result, stopSet);
result = new ISOLatin1AccentFilter(result);
return result; return result;
} }
} }