From 0f8b014eb9da99f7c2cd19e448588f66240d2146 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Fri, 28 Aug 2020 15:34:10 +0200 Subject: [PATCH] SEARCH-2396: Avoid indexing duplicated values for APATH and ANAME fields. --- .../alfresco/solr/SolrInformationServer.java | 27 +++++-- .../solr/SolrInformationServerTest.java | 77 +++++++++++++++++++ 2 files changed, 97 insertions(+), 7 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index 6d3e9ba24..7609ca61a 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -3339,7 +3339,7 @@ public class SolrInformationServer implements InformationServer return doc; } - private void updatePathRelatedFields(NodeMetaData nodeMetaData, SolrInputDocument doc) + protected void updatePathRelatedFields(NodeMetaData nodeMetaData, SolrInputDocument doc) { clearFields(doc, FIELD_PATH, FIELD_SITE, FIELD_TAG, FIELD_TAG_SUGGEST, FIELD_APATH, FIELD_ANAME); @@ -3376,6 +3376,9 @@ public class SolrInformationServer implements InformationServer doc.addField(FIELD_SITE, NO_SITE); } + // Saving calculated APATH and ANAME elements in order to avoid storing duplicate values + Set addedAPaths = new HashSet<>(); + Set addedANames = new HashSet<>(); notNullOrEmpty(nodeMetaData.getAncestorPaths()) .forEach(ancestorPath -> { String [] elements = @@ -3387,27 +3390,37 @@ public class SolrInformationServer implements InformationServer StringBuilder builder = new StringBuilder(); int i = 0; - for(String element : elements) + for (String element : elements) { builder.append('/').append(element); - doc.addField(FIELD_APATH, "" + i++ + builder); + String apath = "" + i++ + builder; + if (!addedAPaths.contains(apath)) + { + doc.addField(FIELD_APATH, apath); + addedAPaths.add(apath); + } } - if(builder.length() > 0) + if (builder.length() > 0) { doc.addField(FIELD_APATH, "F" + builder); } builder = new StringBuilder(); - for(int j = 0; j < elements.length; j++) + for (int j = 0; j < elements.length; j++) { String element = elements[elements.length - 1 - j]; builder.insert(0, element); builder.insert(0, '/'); - doc.addField(FIELD_ANAME, "" + j + builder); + String aname = "" + j + builder; + if (!addedANames.contains(aname)) + { + doc.addField(FIELD_ANAME, aname); + addedANames.add(aname); + } } - if(builder.length() > 0) + if (builder.length() > 0) { doc.addField(FIELD_ANAME, "F" + builder); } diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/SolrInformationServerTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/SolrInformationServerTest.java index 6f9480da0..f18df2e52 100644 --- a/search-services/alfresco-search/src/test/java/org/alfresco/solr/SolrInformationServerTest.java +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/SolrInformationServerTest.java @@ -26,11 +26,18 @@ package org.alfresco.solr; +import java.util.Collection; +import java.util.Collections; +import java.util.List; import java.util.Properties; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; +import org.alfresco.repo.search.adaptor.lucene.QueryConstants; import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.PropertyDefinition; +import org.alfresco.solr.client.NodeMetaData; import org.alfresco.solr.client.SOLRAPIClient; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; @@ -320,4 +327,74 @@ public class SolrInformationServerTest assertNull(document); } + + /** + * When storing ANAME and APATH fields, skipping to store duplicated entries should be granted. + * + * Test data has been simplified from a living input use case: + * + * { + * "apath": "/9ea65c3c/1f5eebed/d657ec29/7c7da7c4/3ca56633/85f3f802/5c3a9e15/da781274", + * "path": "/company_home/user_homes/user1/taskers/Tasker-1418058127641/responseSummary-1418332928505/responseSummary-1418332928552/response" + * }, + * { + * "apath": "/9ea65c3c/1f5eebed/d657ec29/572c38fc/4ff94a6e/85f3f802/5c3a9e15/da781274", + * "path": "/company_home/user_homes/user2/taskers/tasker/responseSummary-1418332928505/responseSummary-1418332928552/response" + * }, + * { + * "apath": "/9ea65c3c/1f5eebed/d657ec29/cebd969b/0decd203/85f3f802/5c3a9e15/da781274", + * "path": "/company_home/user_homes/user3/taskers/tasker/responseSummary-1418332928505/responseSummary-1418332928552/response" + * } + */ + @Test + public void testPathsFieldStorage() + { + + SolrInputDocument doc = new SolrInputDocument(); + NodeMetaData nodeMetaData = new NodeMetaData(); + nodeMetaData.setAncestorPaths(List.of("/1/2/4/7/10", "/1/2/5/8/10", "/1/2/6/9/10")); + nodeMetaData.setPaths(List.of()); + + // Repeat the operation 2 times to verify updating and existing document removes previous + // information in ANAME and APATH fields + IntStream.range(0, 2).forEach(i -> + { + + infoServer.updatePathRelatedFields(nodeMetaData, doc); + + List anames = doc.getFieldValues(QueryConstants.FIELD_ANAME).stream() + .map(aname -> aname.toString()) + .collect(Collectors.toList()) + .stream() + .sorted() + .collect(Collectors.toList()); + + assertEquals("Expecting only 1 aname for level 0 path: 0/10", + anames.stream().filter(aname -> aname.startsWith("0/")).count(), 1); + assertEquals(anames.stream().filter(aname -> aname.startsWith("1/")).count(), 3); + assertEquals(anames.stream().filter(aname -> aname.startsWith("2/")).count(), 3); + assertEquals(anames.stream().filter(aname -> aname.startsWith("3/")).count(), 3); + assertEquals(anames.stream().filter(aname -> aname.startsWith("4/")).count(), 3); + assertEquals(anames.stream().filter(aname -> aname.startsWith("F/")).count(), 3); + + List apaths = doc.getFieldValues(QueryConstants.FIELD_APATH).stream() + .map(aname -> aname.toString()) + .collect(Collectors.toList()) + .stream() + .sorted() + .collect(Collectors.toList()); + + assertEquals("Expecting only 1 aname for level 0 path: 0/1", + apaths.stream().filter(apath -> apath.startsWith("0/")).count(), 1); + assertEquals("Expecting only 1 aname for level 0 path: 1/1/2", + apaths.stream().filter(apath -> apath.startsWith("1/")).count(), 1); + assertEquals(apaths.stream().filter(apath -> apath.startsWith("2/")).count(), 3); + assertEquals(apaths.stream().filter(apath -> apath.startsWith("3/")).count(), 3); + assertEquals(apaths.stream().filter(apath -> apath.startsWith("4/")).count(), 3); + assertEquals(apaths.stream().filter(apath -> apath.startsWith("F/")).count(), 3); + + }); + + } + } \ No newline at end of file