diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index dee1ae407..6f059b3ad 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -408,6 +408,10 @@ public class SolrInformationServer implements InformationServer private long cleanContentLastPurged; + // Get Paths information from Repository for a batch of nodes (true by default) + // When false, Paths information is only recovered for single nodes + private final boolean getPathsInNodeBatches; + // Metadata pulling control private boolean skipDescendantDocsForSpecificTypes; private boolean skipDescendantDocsForSpecificAspects; @@ -605,6 +609,8 @@ public class SolrInformationServer implements InformationServer contentStreamLimit = Integer.parseInt(coreConfiguration.getProperty("alfresco.contentStreamLimit", "10000000")); + getPathsInNodeBatches = Boolean.parseBoolean(coreConfiguration.getProperty("alfresco.metadata.getPathsInNodeBatches", "true")); + props = AlfrescoSolrDataModel.getCommonConfig(); hostName = ConfigUtil.locateProperty(SOLR_HOST, props.getProperty(SOLR_HOST)); @@ -2013,6 +2019,9 @@ public class SolrInformationServer implements InformationServer nmdp.setNodeIds(nodeIds); nmdp.setIncludeChildIds(false); nmdp.setIncludeChildAssociations(false); + // Getting Ancestor information when getting a batch of nodes from repository, + // may contain large information to be stored in memory for a long time. + nmdp.setIncludePaths(getPathsInNodeBatches); // Fetches bulk metadata nmdp.setMaxResults(Integer.MAX_VALUE); @@ -2160,10 +2169,18 @@ public class SolrInformationServer implements InformationServer if (cascadeTrackingEnabled()) { - updatePathRelatedFields(metadata, doc); - updateNamePathRelatedFields(metadata, doc); - updateAncestorRelatedFields(metadata, doc); - doc.setField(FIELD_PARENT_ASSOC_CRC, metadata.getParentAssocsCrc()); + // As metadata is used like final but the lambdas above, we need a new variable here + NodeMetaData extendedMetadata = metadata; + // Ancestor information was not recovered for node batches, so we need to update + // the node with that information before updating the SOLR Document + if (!getPathsInNodeBatches) + { + extendedMetadata = getNodeMetaDataWithPathInfo(metadata.getId()); + } + updatePathRelatedFields(extendedMetadata, doc); + updateNamePathRelatedFields(extendedMetadata, doc); + updateAncestorRelatedFields(extendedMetadata, doc); + doc.setField(FIELD_PARENT_ASSOC_CRC, extendedMetadata.getParentAssocsCrc()); } ofNullable(metadata.getOwner()).ifPresent(owner -> doc.setField(FIELD_OWNER, owner)); @@ -2207,6 +2224,23 @@ public class SolrInformationServer implements InformationServer }); } + /** + * Gets full metadata information for a given nodeId, including Paths information. + * Paths information can be huge in some scenarios, so it's recommended to use + * this method always, as this gets Paths information for a single node. + * @param nodeId Id for the node to get information from repository + * @return Full metadata information for the node + */ + private NodeMetaData getNodeMetaDataWithPathInfo(long nodeId) + { + NodeMetaDataParameters nmdp = new NodeMetaDataParameters(); + nmdp.setFromNodeId(nodeId); + nmdp.setToNodeId(nodeId); + nmdp.setIncludePaths(true); + nmdp.setMaxResults(1); + return getNodesMetaDataFromRepository(nmdp).get().iterator().next(); + } + private void updateAncestorRelatedFields(NodeMetaData nodeMetaData, SolrInputDocument doc) { doc.removeField(FIELD_ANCESTOR); diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties index 36a308911..433eb8eb4 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties @@ -183,6 +183,12 @@ alfresco.metadata.ignore.datatype.1=app:configurations alfresco.metadata.skipDescendantDocsForSpecificAspects=false #alfresco.metadata.ignore.aspect.0= +# If you are experimenting OOM errors, probably your Paths information is too large to be + # recovered for node batches. You can change this property to "false" in order to get + # paths information only for single nodes. Be aware that when using "false", memory requirements + # are lower but also indexing performance is slower. + alfresco.metadata.getPathsInNodeBatches=true + # Date/Datetime fields only: if this property is set to true (default value) each date/datetime field # # - will be indexed as a whole value