diff --git a/config/alfresco/messages/patch-service.properties b/config/alfresco/messages/patch-service.properties index 36ad021903..c1941341c3 100644 --- a/config/alfresco/messages/patch-service.properties +++ b/config/alfresco/messages/patch-service.properties @@ -126,3 +126,6 @@ patch.linkNodeExtension.err.unable_to_fix=Auto-fixing of link node file extensio patch.linkNodeExtension.rewritten=Name ''{0}'' rewritten to ''{1}'' patch.systemRegistryBootstrap.description=Bootstraps the node that will hold system registry metadata. + +patch.userAndPersonUserNamesAsIdentifiers.description=Reindex user:user and cm:person uids as identifiers +patch.userAndPersonUserNamesAsIdentifiers.result=Reindexed user:user and cm:person uids as identifiers diff --git a/config/alfresco/patch/patch-services-context.xml b/config/alfresco/patch/patch-services-context.xml index dfd970a5fe..7a4999b8e7 100644 --- a/config/alfresco/patch/patch-services-context.xml +++ b/config/alfresco/patch/patch-services-context.xml @@ -591,5 +591,23 @@ + + + patch.userAndPersonUserNamesAsIdentifiers + patch.userAndPersonUserNamesAsIdentifiers.description + 0 + 35 + 36 + + + + + + + + + + + diff --git a/config/alfresco/version.properties b/config/alfresco/version.properties index 3ffd75d015..ada757c8f1 100644 --- a/config/alfresco/version.properties +++ b/config/alfresco/version.properties @@ -19,4 +19,4 @@ version.build=@build-number@ # Schema number -version.schema=35 +version.schema=36 diff --git a/source/java/org/alfresco/repo/admin/patch/impl/UserAndPersonTokenisationPatch.java b/source/java/org/alfresco/repo/admin/patch/impl/UserAndPersonTokenisationPatch.java new file mode 100644 index 0000000000..ff046aabe1 --- /dev/null +++ b/source/java/org/alfresco/repo/admin/patch/impl/UserAndPersonTokenisationPatch.java @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2005 Alfresco, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package org.alfresco.repo.admin.patch.impl; + +import org.alfresco.i18n.I18NUtil; +import org.alfresco.repo.admin.patch.AbstractPatch; +import org.alfresco.repo.importer.ImporterBootstrap; +import org.alfresco.repo.search.Indexer; +import org.alfresco.repo.search.IndexerAndSearcher; +import org.alfresco.service.cmr.repository.StoreRef; +import org.alfresco.service.cmr.search.ResultSet; +import org.alfresco.service.cmr.search.ResultSetRow; +import org.alfresco.service.cmr.search.SearchParameters; +import org.alfresco.service.cmr.search.SearchService; + +/** + * Patch usr:user and cm:person objects so that the user name properties are in the + * index in untokenized form. If not authentication may fail in mixed language use. + * + * @author andyh + * + */ +public class UserAndPersonTokenisationPatch extends AbstractPatch +{ + private static final String MSG_SUCCESS = "patch.userAndPersonUserNamesAsIdentifiers.result"; + + private ImporterBootstrap spacesImporterBootstrap; + private ImporterBootstrap userImporterBootstrap; + private IndexerAndSearcher indexerAndSearcher; + + + public UserAndPersonTokenisationPatch() + { + + } + + public void setSpacesImporterBootstrap(ImporterBootstrap spacesImporterBootstrap) + { + this.spacesImporterBootstrap = spacesImporterBootstrap; + } + + public void setUserImporterBootstrap(ImporterBootstrap userImporterBootstrap) + { + this.userImporterBootstrap = userImporterBootstrap; + } + + public void setIndexerAndSearcher(IndexerAndSearcher indexerAndSearcher) + { + this.indexerAndSearcher = indexerAndSearcher; + } + + @Override + protected String applyInternal() throws Exception + { + reindex("TYPE:\"usr:user\"", userImporterBootstrap.getStoreRef()); + reindex("TYPE:\"cm:person\"", spacesImporterBootstrap.getStoreRef()); + return I18NUtil.getMessage(MSG_SUCCESS); + } + + private void reindex(String query, StoreRef store) + { + SearchParameters sp = new SearchParameters(); + sp.setLanguage(SearchService.LANGUAGE_LUCENE); + sp.setQuery(query); + sp.addStore(store); + ResultSet rs = null; + try + { + rs = searchService.query(sp); + for(ResultSetRow row : rs) + { + Indexer indexer = indexerAndSearcher.getIndexer(row.getNodeRef().getStoreRef()); + indexer.updateNode(row.getNodeRef()); + } + } + finally + { + if(rs != null) + { + rs.close(); + } + } + } +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java index 1ff0513bd4..6ecc923a35 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java @@ -21,6 +21,7 @@ import java.io.Reader; import java.util.HashMap; import java.util.Map; +import org.alfresco.model.ContentModel; import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser; import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser; @@ -37,9 +38,8 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; /** - * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser - * should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false - * when adding the field to the document) + * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that + * require no tokenisation. (tokenise should be set to false when adding the field to the document) * * @author andyh */ @@ -47,7 +47,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; public class LuceneAnalyser extends Analyzer { private static Logger s_logger = Logger.getLogger(LuceneAnalyser.class); - + // Dictinary service to look up analyser classes by data type and locale. private DictionaryService dictionaryService; @@ -156,35 +156,45 @@ public class LuceneAnalyser extends Analyzer else { QName propertyQName = QName.createQName(fieldName.substring(1)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if (propertyDef != null) + // Temporary fix for person and user uids + + if (propertyQName.equals(ContentModel.PROP_USER_USERNAME) + || propertyQName.equals(ContentModel.PROP_USERNAME)) { - if (propertyDef.isTokenisedInIndex()) + analyser = new VerbatimAnalyser(true); + } + else + { + PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); + if (propertyDef != null) { - DataTypeDefinition dataType = propertyDef.getDataType(); - if (dataType.getName().equals(DataTypeDefinition.CONTENT)) + if (propertyDef.isTokenisedInIndex()) { - analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); - } - else if (dataType.getName().equals(DataTypeDefinition.TEXT)) - { - analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); + DataTypeDefinition dataType = propertyDef.getDataType(); + if (dataType.getName().equals(DataTypeDefinition.CONTENT)) + { + analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); + } + else if (dataType.getName().equals(DataTypeDefinition.TEXT)) + { + analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); + } + else + { + analyser = loadAnalyzer(dataType); + } } else { - analyser = loadAnalyzer(dataType); + analyser = new VerbatimAnalyser(); } } else { - analyser = new VerbatimAnalyser(); + DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT); + analyser = loadAnalyzer(dataType); } } - else - { - DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT); - analyser = loadAnalyzer(dataType); - } } } else @@ -208,9 +218,9 @@ public class LuceneAnalyser extends Analyzer { Class clazz = Class.forName(analyserClassName); Analyzer analyser = (Analyzer) clazz.newInstance(); - if(s_logger.isDebugEnabled()) + if (s_logger.isDebugEnabled()) { - s_logger.debug("Loaded "+analyserClassName+" for type "+dataType.getName()); + s_logger.debug("Loaded " + analyserClassName + " for type " + dataType.getName()); } return analyser; } @@ -232,8 +242,7 @@ public class LuceneAnalyser extends Analyzer } /** - * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different - * languages etc. + * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc. */ @Override public int getPositionIncrementGap(String fieldName) diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java index 2c9ec42766..93dbcd03a2 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java @@ -65,6 +65,7 @@ import org.alfresco.service.cmr.repository.NodeService; import org.alfresco.service.cmr.repository.Path; import org.alfresco.service.cmr.repository.StoreRef; import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; +import org.alfresco.service.cmr.repository.datatype.TypeConversionException; import org.alfresco.service.cmr.search.ResultSetRow; import org.alfresco.service.cmr.search.SearchParameters; import org.alfresco.service.namespace.QName; @@ -97,6 +98,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 public static final String NOT_INDEXED_CONTENT_MISSING = "nicm"; + public static final String NOT_INDEXED_NO_TYPE_CONVERSION = "nintc"; + private static Logger s_logger = Logger.getLogger(LuceneIndexerImpl2.class); /** @@ -121,8 +124,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 private long maxAtomicTransformationTime = 20; /** - * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: - * Consider if this information needs to be persisted for recovery + * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: Consider if this information needs to be persisted for recovery */ private Set deletions = new LinkedHashSet(); @@ -141,8 +143,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 private boolean isModified = false; /** - * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just - * fixing up non atomically indexed things from one or more other updates. + * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other + * updates. */ private Boolean isFTSUpdate = null; @@ -689,8 +691,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 } /** - * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper - * serialisation against the index as would a data base transaction. + * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper serialisation against the index as would a data base transaction. * * @return */ @@ -804,8 +805,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 } /** - * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow - * roll back. + * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back. */ public void setRollbackOnly() @@ -1534,7 +1534,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 // convert value to String for (Serializable serializableValue : DefaultTypeConverter.INSTANCE.getCollection(Serializable.class, value)) { - String strValue = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue); + String strValue = null; + try + { + strValue = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue); + } + catch (TypeConversionException e) + { + doc.add(new Field(attributeName, NOT_INDEXED_NO_TYPE_CONVERSION, Field.Store.NO, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + continue; + } if (strValue == null) { // nothing to index @@ -1727,6 +1737,12 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 } else if (isText) { + // Temporary special case for uids + if(propertyName.equals(ContentModel.PROP_USER_USERNAME) || propertyName.equals(ContentModel.PROP_USERNAME)) + { + doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + } + // TODO: Use the node locale in preferanced to the system locale Locale locale = null; @@ -1740,10 +1756,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { locale = Locale.getDefault(); } - StringBuilder builder = new StringBuilder(); - builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); - doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, - Field.TermVector.NO)); + if (tokenise) + { + StringBuilder builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, + Field.TermVector.NO)); + } + else + { + doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + } } else { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java index b34c999b5a..f7420c3409 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java @@ -290,6 +290,40 @@ public class LuceneTest2 extends TestCase mlText.addValue(new Locale("ru"), "банан"); mlText.addValue(new Locale("es"), "plátano"); testProperties.put(QName.createQName(TEST_NAMESPACE, "ml"), mlText); + // Any multivalued + ArrayList anyValues = new ArrayList(); + anyValues.add(Integer.valueOf(100)); + anyValues.add("anyValueAsString"); + anyValues.add(new UnknownDataType()); + testProperties.put(QName.createQName(TEST_NAMESPACE, "any-many-ista"), anyValues); + // Content multivalued + // - note only one the first value is used from the collection + // - andit has to go in type d:any as d:content is not allowed to be multivalued + + ArrayList contentValues = new ArrayList(); + contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16")); + testProperties.put(QName.createQName(TEST_NAMESPACE, "content-many-ista"), contentValues); + + + + // MLText multivalued + + MLText mlText1 = new MLText(); + mlText1.addValue(Locale.ENGLISH, "cabbage"); + mlText1.addValue(Locale.FRENCH, "chou"); + + MLText mlText2 = new MLText(); + mlText2.addValue(Locale.ENGLISH, "lemur"); + mlText2.addValue(new Locale("ru"), "лемур"); + + ArrayList mlValues = new ArrayList(); + mlValues.add(mlText1); + mlValues.add(mlText2); + + testProperties.put(QName.createQName(TEST_NAMESPACE, "mltext-many-ista"), mlValues); + + // null in multi valued + ArrayList testList = new ArrayList(); testList.add(null); testProperties.put(QName.createQName(TEST_NAMESPACE, "nullList"), testList); @@ -299,7 +333,14 @@ public class LuceneTest2 extends TestCase n4 = nodeService.createNode(rootNodeRef, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}four"), testType, testProperties).getChildRef(); + + ContentWriter multiWriter = contentService.getWriter(n4, QName.createQName(TEST_NAMESPACE, "content-many-ista"), true); + multiWriter.setEncoding( "UTF-16"); + multiWriter.setMimetype("text/plain"); + multiWriter.putContent("multicontent"); + + nodeService.getProperties(n1); nodeService.getProperties(n2); nodeService.getProperties(n3); @@ -2427,6 +2468,80 @@ public class LuceneTest2 extends TestCase assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista"))); results.close(); + // d:any + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"100\"", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista"))); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"anyValueAsString\"", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista"))); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" + + escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"nintc\"", + null, null); + assertEquals(1, results.length()); + assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista"))); + results.close(); + + // multi ml text + + QName multimlQName = QName.createQName(TEST_NAMESPACE, "mltext-many-ista"); + + SearchParameters sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":лемур"); + sp.addLocale(new Locale("ru")); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":lemur"); + sp.addLocale(new Locale("en")); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":chou"); + sp.addLocale(new Locale("fr")); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabbage"); + sp.addLocale(new Locale("en")); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + // multivalued content in type d:any + // This should not be indexed as we can not know what to do with content here. + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(QName.createQName(TEST_NAMESPACE, "content-many-ista").toString()) + ":multicontent"); + results = searcher.query(sp); + assertEquals(0, results.length()); + results.close(); + // locale results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@" @@ -2615,7 +2730,7 @@ public class LuceneTest2 extends TestCase // Configuration of TEXT - SearchParameters sp = new SearchParameters(); + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":\"fox\""); @@ -4592,4 +4707,14 @@ public class LuceneTest2 extends TestCase // test.dictionaryService.getType(test.nodeService.getType(test.rootNodeRef)).getDefaultAspects(); } + + public static class UnknownDataType implements Serializable + { + + /** + * + */ + private static final long serialVersionUID = -6729690518573349055L; + + } } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml index 7c8f3491a0..484dc177af 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml @@ -268,6 +268,38 @@ true true + + + + d:any + false + true + + true + true + true + + + + + d:any + false + true + + true + true + true + + + + d:mltext + false + true + + true + true + true +