From f260c2b35f4eb3d7f9f8a2453a845a354c6a4601 Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Fri, 22 Dec 2006 18:13:46 +0000 Subject: [PATCH] Index of d:locale and accented chars + additional TEXT tests git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4691 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../model/dataTypeAnalyzers.properties | 1 + .../search/impl/lucene/LuceneAnalyser.java | 7 +++ .../impl/lucene/LuceneIndexerImpl2.java | 28 +++++++-- .../repo/search/impl/lucene/LuceneTest2.java | 59 ++++++++++++++++++- .../search/impl/lucene/LuceneTest_model.xml | 10 ++++ .../analysis/AlfrescoStandardAnalyser.java | 2 + 6 files changed, 99 insertions(+), 8 deletions(-) diff --git a/config/alfresco/model/dataTypeAnalyzers.properties b/config/alfresco/model/dataTypeAnalyzers.properties index 7529f289d2..7c3d7f9511 100644 --- a/config/alfresco/model/dataTypeAnalyzers.properties +++ b/config/alfresco/model/dataTypeAnalyzers.properties @@ -15,3 +15,4 @@ d_dictionary.datatype.d_guid.analyzer=org.alfresco.repo.search.impl.lucene.analy d_dictionary.datatype.d_category.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser d_dictionary.datatype.d_noderef.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser d_dictionary.datatype.d_path.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser +d_dictionary.datatype.d_locale.analyzer=org.alfresco.repo.search.impl.lucene.analysis.LowerCaseVerbatimAnalyser diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java index 5aa141b2dd..3c461309b9 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java @@ -30,6 +30,7 @@ import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.PropertyDefinition; import org.alfresco.service.namespace.QName; +import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; @@ -44,6 +45,8 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; public class LuceneAnalyser extends Analyzer { + private static Logger s_logger = Logger.getLogger(LuceneAnalyser.class); + // Dictinary service to look up analyser classes by data type and locale. private DictionaryService dictionaryService; @@ -204,6 +207,10 @@ public class LuceneAnalyser extends Analyzer { Class clazz = Class.forName(analyserClassName); Analyzer analyser = (Analyzer) clazz.newInstance(); + if(s_logger.isDebugEnabled()) + { + s_logger.debug("Loaded "+analyserClassName+" for type "+dataType.getName()); + } return analyser; } catch (ClassNotFoundException e) diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java index 3cf120850d..f29e8737c8 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java @@ -1538,8 +1538,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 // nothing to index continue; } - // String strValue = ValueConverter.convert(String.class, value); - // TODO: Need to add with the correct language based analyser if (isContent) { @@ -1558,6 +1556,14 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 // TODO: Use the node locale in preferanced to the system locale Locale locale = contentData.getLocale(); if (locale == null) + { + Serializable localeProperty = nodeService.getProperty(nodeRef, ContentModel.PROP_LOCALE); + if (localeProperty != null) + { + locale = DefaultTypeConverter.INSTANCE.convert(Locale.class, localeProperty); + } + } + if (locale == null) { locale = Locale.getDefault(); } @@ -1713,14 +1719,24 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 Field.TermVector.NO)); } } - else if(isText) + else if (isText) { // TODO: Use the node locale in preferanced to the system locale - Locale locale = Locale.getDefault(); + Locale locale = null; + + Serializable localeProperty = nodeService.getProperty(nodeRef, ContentModel.PROP_LOCALE); + if (localeProperty != null) + { + locale = DefaultTypeConverter.INSTANCE.convert(Locale.class, localeProperty); + } + + if (locale == null) + { + locale = Locale.getDefault(); + } StringBuilder builder = new StringBuilder(); builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); - doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, - Field.TermVector.NO)); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); } else { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java index c830b4e150..d04dd0ca3b 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java @@ -272,6 +272,7 @@ public class LuceneTest2 extends TestCase testProperties.put(QName.createQName(TEST_NAMESPACE, "category-ista"), new NodeRef(storeRef, "CategoryId")); testProperties.put(QName.createQName(TEST_NAMESPACE, "noderef-ista"), n1); testProperties.put(QName.createQName(TEST_NAMESPACE, "path-ista"), nodeService.getPath(n3)); + testProperties.put(QName.createQName(TEST_NAMESPACE, "locale-ista"), Locale.UK); testProperties.put(QName.createQName(TEST_NAMESPACE, "null"), null); testProperties.put(QName.createQName(TEST_NAMESPACE, "list"), new ArrayList()); MLText mlText = new MLText(); @@ -333,7 +334,7 @@ public class LuceneTest2 extends TestCase // InputStream is = // this.getClass().getClassLoader().getResourceAsStream("test.doc"); // writer.putContent(is); - writer.putContent("The quick brown fox jumped over the lazy dog"); + writer.putContent("The quick brown fox jumped over the lazy dog \u00E0\u00EA\u00EE\u00F0\u00F1\u00F6\u00FB\u00FF"); nodeService.addChild(rootNodeRef, n8, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}eight-0")); nodeService.addChild(n1, n8, ASSOC_TYPE_QNAME, QName.createQName("{namespace}eight-1")); @@ -2425,6 +2426,45 @@ public class LuceneTest2 extends TestCase assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); results.close(); + // locale + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":\"en_GB_\"", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":en_GB_", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":en_*", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":*_GB_*", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "locale-ista")) + ":*_gb_*", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); + results.close(); + + // Type + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testType.toString() + "\"", null, null); assertEquals(1, results.length()); @@ -2492,6 +2532,10 @@ public class LuceneTest2 extends TestCase assertEquals(1, results.length()); results.close(); + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox cm\\:name:fox", null, null); + assertEquals(1, results.length()); + results.close(); + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo AND TYPE:\"" + ContentModel.PROP_CONTENT.toString() + "\"", null, null); assertEquals(0, results.length()); @@ -2507,6 +2551,17 @@ public class LuceneTest2 extends TestCase assertEquals(1, results.length()); results.close(); + + // Accents + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"\u00E0\u00EA\u00EE\u00F0\u00F1\u00F6\u00FB\u00FF\"", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"aeidnouy\"", null, null); + assertEquals(1, results.length()); + results.close(); + // FTS test results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"fox\"", null, null); @@ -2540,7 +2595,7 @@ public class LuceneTest2 extends TestCase results.close(); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"90\"", null, null); + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"110\"", null, null); assertEquals(1, results.length()); results.close(); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml index 1489a7b4c1..7c8f3491a0 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml @@ -258,6 +258,16 @@ true true + + + d:locale + false + false + + true + true + true + diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java index 2f67d5567c..7c3ef63ef6 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java @@ -20,6 +20,7 @@ import java.io.Reader; import java.util.Set; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ISOLatin1AccentFilter; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.StopFilter; @@ -58,6 +59,7 @@ public class AlfrescoStandardAnalyser extends Analyzer result = new AlfrescoStandardFilter(result); result = new LowerCaseFilter(result); result = new StopFilter(result, stopSet); + result = new ISOLatin1AccentFilter(result); return result; } }