diff --git a/config/alfresco/ibatis/org.hibernate.dialect.Dialect/patch-common-SqlMap.xml b/config/alfresco/ibatis/org.hibernate.dialect.Dialect/patch-common-SqlMap.xml index 45ac2ca723..2ffedddb7c 100644 --- a/config/alfresco/ibatis/org.hibernate.dialect.Dialect/patch-common-SqlMap.xml +++ b/config/alfresco/ibatis/org.hibernate.dialect.Dialect/patch-common-SqlMap.xml @@ -163,6 +163,10 @@ select count(*) from alf_child_assoc + + select max(id) from alf_child_assoc + + = #idTwo# + ca.id >= #idTwo# and + ca.id < #idThree# ]]> diff --git a/config/alfresco/patch/patch-services-context.xml b/config/alfresco/patch/patch-services-context.xml index 61edb3a6fe..8935cd1139 100644 --- a/config/alfresco/patch/patch-services-context.xml +++ b/config/alfresco/patch/patch-services-context.xml @@ -1877,6 +1877,10 @@ + + + + diff --git a/source/java/org/alfresco/repo/admin/patch/impl/FixNameCrcValuesPatch.java b/source/java/org/alfresco/repo/admin/patch/impl/FixNameCrcValuesPatch.java index 6d3ac2f84a..6e9b211f53 100644 --- a/source/java/org/alfresco/repo/admin/patch/impl/FixNameCrcValuesPatch.java +++ b/source/java/org/alfresco/repo/admin/patch/impl/FixNameCrcValuesPatch.java @@ -69,6 +69,11 @@ public class FixNameCrcValuesPatch extends AbstractPatch private ControlDAO controlDAO; private DictionaryService dictionaryService; + private int batchThreads = 2; + private int batchSize = 1000; + private long batchMaxQueryRange = Long.MAX_VALUE; + private int batchQuerySize = 2000; + private static Log logger = LogFactory.getLog(FixNameCrcValuesPatch.class); private static Log progress_logger = LogFactory.getLog(PatchExecuter.class); @@ -105,6 +110,41 @@ public class FixNameCrcValuesPatch extends AbstractPatch this.dictionaryService = dictionaryService; } + /** + * @param batchThreads the number of threads that will write child association changes + */ + public void setBatchThreads(int batchThreads) + { + this.batchThreads = batchThreads; + } + + /** + * @param batchSize the number of child associations that will be modified per transaction + */ + public void setBatchSize(int batchSize) + { + this.batchSize = batchSize; + } + + /** + * @param batchMaxQueryRange the largest ID range that the work provider can query for. + * Lower this if the distribution of ID in alf_child_assoc is not + * uniform and memory problems are encountered. + */ + public void setBatchMaxQueryRange(long batchMaxQueryRange) + { + this.batchMaxQueryRange = batchMaxQueryRange; + } + + /** + * @param batchQuerySize the maximum number of results to pull back before handing off to + * the threads (usually threads * batch size) + */ + public void setBatchQuerySize(int batchQuerySize) + { + this.batchQuerySize = batchQuerySize; + } + @Override protected void checkProperties() { @@ -139,6 +179,7 @@ public class FixNameCrcValuesPatch extends AbstractPatch private FileChannel channel; private Integer assocCount; private Long minAssocId = 0L; + private Long maxAssocId; private FixNameCrcValuesHelper() throws IOException { @@ -185,10 +226,17 @@ public class FixNameCrcValuesPatch extends AbstractPatch return assocCount.intValue(); } - public Collection> getNextWork() + public synchronized Collection> getNextWork() { + if (maxAssocId == null) + { + maxAssocId = patchDAO.getMaxChildAssocId(); + } + double total = (double) getTotalEstimatedWorkSize(); + long rangeMultipler = Math.round(maxAssocId.doubleValue() / total); // Get the next collection - List> results = patchDAO.getChildAssocsForCrcFix(minAssocId, 1000); + List> results = patchDAO.getChildAssocsForCrcFix( + minAssocId, maxAssocId, rangeMultipler, batchMaxQueryRange, batchQuerySize); // Find out what the last ID is int resultsSize = results.size(); if (resultsSize > 0) @@ -207,7 +255,7 @@ public class FixNameCrcValuesPatch extends AbstractPatch "FixNameCrcValuesPatch", transactionService.getRetryingTransactionHelper(), workProvider, - 2, 20, + batchThreads, batchSize, applicationEventPublisher, progress_logger, 1000); diff --git a/source/java/org/alfresco/repo/domain/patch/PatchDAO.java b/source/java/org/alfresco/repo/domain/patch/PatchDAO.java index 49fe2e1beb..e6996eef98 100644 --- a/source/java/org/alfresco/repo/domain/patch/PatchDAO.java +++ b/source/java/org/alfresco/repo/domain/patch/PatchDAO.java @@ -141,6 +141,12 @@ public interface PatchDAO */ public int getChildAssocCount(); + /** + * + * @return Returns the maximum child assoc ID or 0 if there are none + */ + Long getMaxChildAssocId(); + /** * The results map contains: * @@ -156,10 +162,21 @@ public interface PatchDAO ]]> * * @param minAssocId the minimum child assoc ID + * @param stopAtAssocId the child assoc ID to stop at i.e. once this ID has been reached, + * pull back no results + * @param rangeMultiplier the ration of IDs to actual rows (how many IDs to select to get a row) + * @param maxIdRange the largest ID range to use for selects. Normally, the ID range should be + * allowed to grow in accordance with the general distribution of rows, but + * if memory problems are encountered, then the range will need to be set down. * @param maxResults the number of child associations to fetch * @return Returns child associations that need fixing */ - public List> getChildAssocsForCrcFix(Long minAssocId, int maxResults); + public List> getChildAssocsForCrcFix( + Long minAssocId, + Long stopAtAssocId, + long rangeMultiplier, + long maxIdRange, + int maxResults); public int updateChildAssocCrc(Long assocId, Long childNodeNameCrc, Long qnameCrc); diff --git a/source/java/org/alfresco/repo/domain/patch/ibatis/PatchDAOImpl.java b/source/java/org/alfresco/repo/domain/patch/ibatis/PatchDAOImpl.java index da7e6ccf3c..64e99fd885 100644 --- a/source/java/org/alfresco/repo/domain/patch/ibatis/PatchDAOImpl.java +++ b/source/java/org/alfresco/repo/domain/patch/ibatis/PatchDAOImpl.java @@ -35,6 +35,7 @@ import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.StoreRef; import org.alfresco.service.namespace.QName; import org.alfresco.util.Pair; +import org.alfresco.util.ParameterCheck; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.orm.ibatis.SqlMapClientTemplate; @@ -68,6 +69,7 @@ public class PatchDAOImpl extends AbstractPatchDAOImpl private static final String SELECT_PERMISSIONS_DM_NODE_COUNT = "alfresco.patch.select_DmNodeCount"; private static final String SELECT_PERMISSIONS_DM_NODE_COUNT_WITH_NEW_ACLS = "alfresco.patch.select_DmNodeCountWherePermissionsHaveChanged"; private static final String SELECT_CHILD_ASSOCS_COUNT = "alfresco.patch.select_allChildAssocsCount"; + private static final String SELECT_CHILD_ASSOCS_MAX_ID = "alfresco.patch.select_maxChildAssocId"; private static final String SELECT_CHILD_ASSOCS_FOR_CRCS = "alfresco.patch.select_allChildAssocsForCrcs"; private static final String SELECT_NODES_BY_TYPE_AND_NAME_PATTERN = "alfresco.patch.select_nodesByTypeAndNamePattern"; @@ -370,15 +372,79 @@ public class PatchDAOImpl extends AbstractPatchDAOImpl return (Integer) template.queryForObject(SELECT_CHILD_ASSOCS_COUNT); } - @SuppressWarnings("unchecked") - public List> getChildAssocsForCrcFix(Long minAssocId, int maxResults) + @Override + public Long getMaxChildAssocId() { + Long maxAssocId = (Long) template.queryForObject(SELECT_CHILD_ASSOCS_MAX_ID); + return maxAssocId == null ? 0L : maxAssocId; + } + + @SuppressWarnings("unchecked") + public List> getChildAssocsForCrcFix( + Long minAssocId, + Long stopAtAssocId, + long rangeMultiplier, + long maxIdRange, + int maxResults) + { + ParameterCheck.mandatory("minAssocId", minAssocId); + ParameterCheck.mandatory("stopAtAssocId", stopAtAssocId); + /* + * ALF-4529: Database connection problems when upgrading large sample 2.1.x data set + * We have to set an upper bound on the query that is driven by an index + * otherwise we get OOM on the resultset, even with a limit. + * Since there can be voids in the sequence, we have to check if we have hit the max ID, yet. + */ Long qnameId = qnameDAO.getOrCreateQName(ContentModel.PROP_NAME).getFirst(); + + int queryMaxResults = maxResults; + List> results = new ArrayList>(maxResults); + while (results.size() < maxResults && minAssocId <= stopAtAssocId) + { + // Avoid getting too few results because of voids. + // On the other hand, the distribution of child assoc types can result in swathes of + // the table containing voids and rows of no interest. So we ramp up the multiplier + // to take larger and larger ID ranges in order to quickly walk through these zones. + Long maxAssocId = minAssocId + Math.min(maxResults * rangeMultiplier, maxIdRange); + + IdsEntity entity = new IdsEntity(); + entity.setIdOne(qnameId); + entity.setIdTwo(minAssocId); + entity.setIdThree(maxAssocId); + + try + { + List> rows = template.queryForList(SELECT_CHILD_ASSOCS_FOR_CRCS, entity, 0, queryMaxResults); + // Add these rows to the result + results.addAll(rows); + // Calculate new maxResults + queryMaxResults = maxResults - results.size(); + // Move the minAssocId up to ensure we get new results + // If we got fewer results than queryMaxResults, then there were too many voids and we + // requery using the previous maxAssocId + minAssocId = maxAssocId; + // Double the range multiplier if we have a low hit-rate (<50% of desired size) + if (rows.size() < queryMaxResults / 2) + { + rangeMultiplier *= 2L; + } + } + catch (Throwable e) + { + // Hit a DB problem. Log all the details of the query so that parameters can be adjusted externally. + String msg = + "Failed to query for batch of alf_child_assoc rows; use a lower 'maxIdRange': \n" + + " minAssocId: " + minAssocId + "\n" + + " maxAssocId: " + maxAssocId + "\n" + + " maxIdRange: " + maxIdRange + "\n" + + " stopAtAssocId: " + stopAtAssocId + "\n" + + " rangeMultiplier: " + rangeMultiplier + "\n" + + " queryMaxResults: " + queryMaxResults; + logger.error(msg); + throw new RuntimeException(msg, e); + } + } - IdsEntity entity = new IdsEntity(); - entity.setIdOne(qnameId); - entity.setIdTwo(minAssocId); - List> results = template.queryForList(SELECT_CHILD_ASSOCS_FOR_CRCS, entity, 0, maxResults); // Done return results; }
@@ -156,10 +162,21 @@ public interface PatchDAO ]]> *