mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-08-07 17:49:17 +00:00
Fix ALF-4529: Database connection problems when upgrading large sample 2.1.x data set
- Range-based query to drive WorkProvider - Added support for sparse datasets - Added range-limiting and other parameters: use in the event of unevenly distributed IDs git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@23118 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -163,6 +163,10 @@
|
||||
select count(*) from alf_child_assoc
|
||||
</select>
|
||||
|
||||
<select id="select_maxChildAssocId" resultClass="java.lang.Long">
|
||||
select max(id) from alf_child_assoc
|
||||
</select>
|
||||
|
||||
<select id="select_allChildAssocsForCrcs" parameterClass="Ids" resultMap="result_childAssocsForCrc">
|
||||
<![CDATA[
|
||||
select
|
||||
@@ -179,7 +183,8 @@
|
||||
join alf_node cn on (cn.id = ca.child_node_id)
|
||||
left join alf_node_properties cnp on (cnp.node_id = cn.id and cnp.qname_id = #idOne#)
|
||||
where
|
||||
ca.id >= #idTwo#
|
||||
ca.id >= #idTwo# and
|
||||
ca.id < #idThree#
|
||||
]]>
|
||||
</select>
|
||||
|
||||
|
@@ -1877,6 +1877,10 @@
|
||||
<property name="dictionaryService">
|
||||
<ref bean="dictionaryService" />
|
||||
</property>
|
||||
<property name="batchThreads" value="2"/>
|
||||
<property name="batchSize" value="1000"/>
|
||||
<property name="batchMaxQueryRange" value="9223372036854775807"/>
|
||||
<property name="batchQuerySize" value="2000"/>
|
||||
</bean>
|
||||
|
||||
<bean id="patch.redeployNominatedInvitationProcessWithPropsForShare" class="org.alfresco.repo.admin.patch.impl.GenericWorkflowPatch" parent="basePatch" >
|
||||
|
@@ -69,6 +69,11 @@ public class FixNameCrcValuesPatch extends AbstractPatch
|
||||
private ControlDAO controlDAO;
|
||||
private DictionaryService dictionaryService;
|
||||
|
||||
private int batchThreads = 2;
|
||||
private int batchSize = 1000;
|
||||
private long batchMaxQueryRange = Long.MAX_VALUE;
|
||||
private int batchQuerySize = 2000;
|
||||
|
||||
private static Log logger = LogFactory.getLog(FixNameCrcValuesPatch.class);
|
||||
private static Log progress_logger = LogFactory.getLog(PatchExecuter.class);
|
||||
|
||||
@@ -105,6 +110,41 @@ public class FixNameCrcValuesPatch extends AbstractPatch
|
||||
this.dictionaryService = dictionaryService;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param batchThreads the number of threads that will write child association changes
|
||||
*/
|
||||
public void setBatchThreads(int batchThreads)
|
||||
{
|
||||
this.batchThreads = batchThreads;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param batchSize the number of child associations that will be modified per transaction
|
||||
*/
|
||||
public void setBatchSize(int batchSize)
|
||||
{
|
||||
this.batchSize = batchSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param batchMaxQueryRange the largest ID range that the work provider can query for.
|
||||
* Lower this if the distribution of ID in alf_child_assoc is not
|
||||
* uniform and memory problems are encountered.
|
||||
*/
|
||||
public void setBatchMaxQueryRange(long batchMaxQueryRange)
|
||||
{
|
||||
this.batchMaxQueryRange = batchMaxQueryRange;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param batchQuerySize the maximum number of results to pull back before handing off to
|
||||
* the threads (usually threads * batch size)
|
||||
*/
|
||||
public void setBatchQuerySize(int batchQuerySize)
|
||||
{
|
||||
this.batchQuerySize = batchQuerySize;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void checkProperties()
|
||||
{
|
||||
@@ -139,6 +179,7 @@ public class FixNameCrcValuesPatch extends AbstractPatch
|
||||
private FileChannel channel;
|
||||
private Integer assocCount;
|
||||
private Long minAssocId = 0L;
|
||||
private Long maxAssocId;
|
||||
|
||||
private FixNameCrcValuesHelper() throws IOException
|
||||
{
|
||||
@@ -185,10 +226,17 @@ public class FixNameCrcValuesPatch extends AbstractPatch
|
||||
return assocCount.intValue();
|
||||
}
|
||||
|
||||
public Collection<Map<String, Object>> getNextWork()
|
||||
public synchronized Collection<Map<String, Object>> getNextWork()
|
||||
{
|
||||
if (maxAssocId == null)
|
||||
{
|
||||
maxAssocId = patchDAO.getMaxChildAssocId();
|
||||
}
|
||||
double total = (double) getTotalEstimatedWorkSize();
|
||||
long rangeMultipler = Math.round(maxAssocId.doubleValue() / total);
|
||||
// Get the next collection
|
||||
List<Map<String, Object>> results = patchDAO.getChildAssocsForCrcFix(minAssocId, 1000);
|
||||
List<Map<String, Object>> results = patchDAO.getChildAssocsForCrcFix(
|
||||
minAssocId, maxAssocId, rangeMultipler, batchMaxQueryRange, batchQuerySize);
|
||||
// Find out what the last ID is
|
||||
int resultsSize = results.size();
|
||||
if (resultsSize > 0)
|
||||
@@ -207,7 +255,7 @@ public class FixNameCrcValuesPatch extends AbstractPatch
|
||||
"FixNameCrcValuesPatch",
|
||||
transactionService.getRetryingTransactionHelper(),
|
||||
workProvider,
|
||||
2, 20,
|
||||
batchThreads, batchSize,
|
||||
applicationEventPublisher,
|
||||
progress_logger, 1000);
|
||||
|
||||
|
@@ -141,6 +141,12 @@ public interface PatchDAO
|
||||
*/
|
||||
public int getChildAssocCount();
|
||||
|
||||
/**
|
||||
*
|
||||
* @return Returns the maximum child assoc ID or <tt>0</tt> if there are none
|
||||
*/
|
||||
Long getMaxChildAssocId();
|
||||
|
||||
/**
|
||||
* The results map contains:
|
||||
* <pre>
|
||||
@@ -156,10 +162,21 @@ public interface PatchDAO
|
||||
]]>
|
||||
* </pre>
|
||||
* @param minAssocId the minimum child assoc ID
|
||||
* @param stopAtAssocId the child assoc ID to stop at i.e. once this ID has been reached,
|
||||
* pull back no results
|
||||
* @param rangeMultiplier the ration of IDs to actual rows (how many IDs to select to get a row)
|
||||
* @param maxIdRange the largest ID range to use for selects. Normally, the ID range should be
|
||||
* allowed to grow in accordance with the general distribution of rows, but
|
||||
* if memory problems are encountered, then the range will need to be set down.
|
||||
* @param maxResults the number of child associations to fetch
|
||||
* @return Returns child associations <b>that need fixing</b>
|
||||
*/
|
||||
public List<Map<String, Object>> getChildAssocsForCrcFix(Long minAssocId, int maxResults);
|
||||
public List<Map<String, Object>> getChildAssocsForCrcFix(
|
||||
Long minAssocId,
|
||||
Long stopAtAssocId,
|
||||
long rangeMultiplier,
|
||||
long maxIdRange,
|
||||
int maxResults);
|
||||
|
||||
public int updateChildAssocCrc(Long assocId, Long childNodeNameCrc, Long qnameCrc);
|
||||
|
||||
|
@@ -35,6 +35,7 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.Pair;
|
||||
import org.alfresco.util.ParameterCheck;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.orm.ibatis.SqlMapClientTemplate;
|
||||
@@ -68,6 +69,7 @@ public class PatchDAOImpl extends AbstractPatchDAOImpl
|
||||
private static final String SELECT_PERMISSIONS_DM_NODE_COUNT = "alfresco.patch.select_DmNodeCount";
|
||||
private static final String SELECT_PERMISSIONS_DM_NODE_COUNT_WITH_NEW_ACLS = "alfresco.patch.select_DmNodeCountWherePermissionsHaveChanged";
|
||||
private static final String SELECT_CHILD_ASSOCS_COUNT = "alfresco.patch.select_allChildAssocsCount";
|
||||
private static final String SELECT_CHILD_ASSOCS_MAX_ID = "alfresco.patch.select_maxChildAssocId";
|
||||
private static final String SELECT_CHILD_ASSOCS_FOR_CRCS = "alfresco.patch.select_allChildAssocsForCrcs";
|
||||
private static final String SELECT_NODES_BY_TYPE_AND_NAME_PATTERN = "alfresco.patch.select_nodesByTypeAndNamePattern";
|
||||
|
||||
@@ -370,15 +372,79 @@ public class PatchDAOImpl extends AbstractPatchDAOImpl
|
||||
return (Integer) template.queryForObject(SELECT_CHILD_ASSOCS_COUNT);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public List<Map<String, Object>> getChildAssocsForCrcFix(Long minAssocId, int maxResults)
|
||||
@Override
|
||||
public Long getMaxChildAssocId()
|
||||
{
|
||||
Long maxAssocId = (Long) template.queryForObject(SELECT_CHILD_ASSOCS_MAX_ID);
|
||||
return maxAssocId == null ? 0L : maxAssocId;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public List<Map<String, Object>> getChildAssocsForCrcFix(
|
||||
Long minAssocId,
|
||||
Long stopAtAssocId,
|
||||
long rangeMultiplier,
|
||||
long maxIdRange,
|
||||
int maxResults)
|
||||
{
|
||||
ParameterCheck.mandatory("minAssocId", minAssocId);
|
||||
ParameterCheck.mandatory("stopAtAssocId", stopAtAssocId);
|
||||
/*
|
||||
* ALF-4529: Database connection problems when upgrading large sample 2.1.x data set
|
||||
* We have to set an upper bound on the query that is driven by an index
|
||||
* otherwise we get OOM on the resultset, even with a limit.
|
||||
* Since there can be voids in the sequence, we have to check if we have hit the max ID, yet.
|
||||
*/
|
||||
Long qnameId = qnameDAO.getOrCreateQName(ContentModel.PROP_NAME).getFirst();
|
||||
|
||||
int queryMaxResults = maxResults;
|
||||
List<Map<String, Object>> results = new ArrayList<Map<String,Object>>(maxResults);
|
||||
while (results.size() < maxResults && minAssocId <= stopAtAssocId)
|
||||
{
|
||||
// Avoid getting too few results because of voids.
|
||||
// On the other hand, the distribution of child assoc types can result in swathes of
|
||||
// the table containing voids and rows of no interest. So we ramp up the multiplier
|
||||
// to take larger and larger ID ranges in order to quickly walk through these zones.
|
||||
Long maxAssocId = minAssocId + Math.min(maxResults * rangeMultiplier, maxIdRange);
|
||||
|
||||
IdsEntity entity = new IdsEntity();
|
||||
entity.setIdOne(qnameId);
|
||||
entity.setIdTwo(minAssocId);
|
||||
entity.setIdThree(maxAssocId);
|
||||
|
||||
try
|
||||
{
|
||||
List<Map<String, Object>> rows = template.queryForList(SELECT_CHILD_ASSOCS_FOR_CRCS, entity, 0, queryMaxResults);
|
||||
// Add these rows to the result
|
||||
results.addAll(rows);
|
||||
// Calculate new maxResults
|
||||
queryMaxResults = maxResults - results.size();
|
||||
// Move the minAssocId up to ensure we get new results
|
||||
// If we got fewer results than queryMaxResults, then there were too many voids and we
|
||||
// requery using the previous maxAssocId
|
||||
minAssocId = maxAssocId;
|
||||
// Double the range multiplier if we have a low hit-rate (<50% of desired size)
|
||||
if (rows.size() < queryMaxResults / 2)
|
||||
{
|
||||
rangeMultiplier *= 2L;
|
||||
}
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
// Hit a DB problem. Log all the details of the query so that parameters can be adjusted externally.
|
||||
String msg =
|
||||
"Failed to query for batch of alf_child_assoc rows; use a lower 'maxIdRange': \n" +
|
||||
" minAssocId: " + minAssocId + "\n" +
|
||||
" maxAssocId: " + maxAssocId + "\n" +
|
||||
" maxIdRange: " + maxIdRange + "\n" +
|
||||
" stopAtAssocId: " + stopAtAssocId + "\n" +
|
||||
" rangeMultiplier: " + rangeMultiplier + "\n" +
|
||||
" queryMaxResults: " + queryMaxResults;
|
||||
logger.error(msg);
|
||||
throw new RuntimeException(msg, e);
|
||||
}
|
||||
}
|
||||
|
||||
IdsEntity entity = new IdsEntity();
|
||||
entity.setIdOne(qnameId);
|
||||
entity.setIdTwo(minAssocId);
|
||||
List<Map<String, Object>> results = template.queryForList(SELECT_CHILD_ASSOCS_FOR_CRCS, entity, 0, maxResults);
|
||||
// Done
|
||||
return results;
|
||||
}
|
||||
|
Reference in New Issue
Block a user