SEARCH-2396: Avoid indexing duplicated values for APATH and ANAME fields.

This commit is contained in:
Angel Borroy
2020-08-28 15:34:10 +02:00
parent 9c99b10977
commit 0f8b014eb9
2 changed files with 97 additions and 7 deletions

View File

@@ -3339,7 +3339,7 @@ public class SolrInformationServer implements InformationServer
return doc;
}
private void updatePathRelatedFields(NodeMetaData nodeMetaData, SolrInputDocument doc)
protected void updatePathRelatedFields(NodeMetaData nodeMetaData, SolrInputDocument doc)
{
clearFields(doc, FIELD_PATH, FIELD_SITE, FIELD_TAG, FIELD_TAG_SUGGEST, FIELD_APATH, FIELD_ANAME);
@@ -3376,6 +3376,9 @@ public class SolrInformationServer implements InformationServer
doc.addField(FIELD_SITE, NO_SITE);
}
// Saving calculated APATH and ANAME elements in order to avoid storing duplicate values
Set<String> addedAPaths = new HashSet<>();
Set<String> addedANames = new HashSet<>();
notNullOrEmpty(nodeMetaData.getAncestorPaths())
.forEach(ancestorPath -> {
String [] elements =
@@ -3387,27 +3390,37 @@ public class SolrInformationServer implements InformationServer
StringBuilder builder = new StringBuilder();
int i = 0;
for(String element : elements)
for (String element : elements)
{
builder.append('/').append(element);
doc.addField(FIELD_APATH, "" + i++ + builder);
String apath = "" + i++ + builder;
if (!addedAPaths.contains(apath))
{
doc.addField(FIELD_APATH, apath);
addedAPaths.add(apath);
}
}
if(builder.length() > 0)
if (builder.length() > 0)
{
doc.addField(FIELD_APATH, "F" + builder);
}
builder = new StringBuilder();
for(int j = 0; j < elements.length; j++)
for (int j = 0; j < elements.length; j++)
{
String element = elements[elements.length - 1 - j];
builder.insert(0, element);
builder.insert(0, '/');
doc.addField(FIELD_ANAME, "" + j + builder);
String aname = "" + j + builder;
if (!addedANames.contains(aname))
{
doc.addField(FIELD_ANAME, aname);
addedANames.add(aname);
}
}
if(builder.length() > 0)
if (builder.length() > 0)
{
doc.addField(FIELD_ANAME, "F" + builder);
}

View File

@@ -26,11 +26,18 @@
package org.alfresco.solr;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.solr.client.NodeMetaData;
import org.alfresco.solr.client.SOLRAPIClient;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
@@ -320,4 +327,74 @@ public class SolrInformationServerTest
assertNull(document);
}
/**
* When storing ANAME and APATH fields, skipping to store duplicated entries should be granted.
*
* Test data has been simplified from a living input use case:
*
* {
* "apath": "/9ea65c3c/1f5eebed/d657ec29/7c7da7c4/3ca56633/85f3f802/5c3a9e15/da781274",
* "path": "/company_home/user_homes/user1/taskers/Tasker-1418058127641/responseSummary-1418332928505/responseSummary-1418332928552/response"
* },
* {
* "apath": "/9ea65c3c/1f5eebed/d657ec29/572c38fc/4ff94a6e/85f3f802/5c3a9e15/da781274",
* "path": "/company_home/user_homes/user2/taskers/tasker/responseSummary-1418332928505/responseSummary-1418332928552/response"
* },
* {
* "apath": "/9ea65c3c/1f5eebed/d657ec29/cebd969b/0decd203/85f3f802/5c3a9e15/da781274",
* "path": "/company_home/user_homes/user3/taskers/tasker/responseSummary-1418332928505/responseSummary-1418332928552/response"
* }
*/
@Test
public void testPathsFieldStorage()
{
SolrInputDocument doc = new SolrInputDocument();
NodeMetaData nodeMetaData = new NodeMetaData();
nodeMetaData.setAncestorPaths(List.of("/1/2/4/7/10", "/1/2/5/8/10", "/1/2/6/9/10"));
nodeMetaData.setPaths(List.of());
// Repeat the operation 2 times to verify updating and existing document removes previous
// information in ANAME and APATH fields
IntStream.range(0, 2).forEach(i ->
{
infoServer.updatePathRelatedFields(nodeMetaData, doc);
List<String> anames = doc.getFieldValues(QueryConstants.FIELD_ANAME).stream()
.map(aname -> aname.toString())
.collect(Collectors.toList())
.stream()
.sorted()
.collect(Collectors.toList());
assertEquals("Expecting only 1 aname for level 0 path: 0/10",
anames.stream().filter(aname -> aname.startsWith("0/")).count(), 1);
assertEquals(anames.stream().filter(aname -> aname.startsWith("1/")).count(), 3);
assertEquals(anames.stream().filter(aname -> aname.startsWith("2/")).count(), 3);
assertEquals(anames.stream().filter(aname -> aname.startsWith("3/")).count(), 3);
assertEquals(anames.stream().filter(aname -> aname.startsWith("4/")).count(), 3);
assertEquals(anames.stream().filter(aname -> aname.startsWith("F/")).count(), 3);
List<String> apaths = doc.getFieldValues(QueryConstants.FIELD_APATH).stream()
.map(aname -> aname.toString())
.collect(Collectors.toList())
.stream()
.sorted()
.collect(Collectors.toList());
assertEquals("Expecting only 1 aname for level 0 path: 0/1",
apaths.stream().filter(apath -> apath.startsWith("0/")).count(), 1);
assertEquals("Expecting only 1 aname for level 0 path: 1/1/2",
apaths.stream().filter(apath -> apath.startsWith("1/")).count(), 1);
assertEquals(apaths.stream().filter(apath -> apath.startsWith("2/")).count(), 3);
assertEquals(apaths.stream().filter(apath -> apath.startsWith("3/")).count(), 3);
assertEquals(apaths.stream().filter(apath -> apath.startsWith("4/")).count(), 3);
assertEquals(apaths.stream().filter(apath -> apath.startsWith("F/")).count(), 3);
});
}
}