ALF-4898 - rework to deployment locks to implement Job Lock Service timeout callback.

- there is now two callbacks.   A short timeout (1 minute) for keeping the lock alive and a longer lock for timing 
out the whole deployment.

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@22961 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Mark Rogers
2010-10-07 13:56:35 +00:00
parent 839d522808
commit ee52855f62
4 changed files with 579 additions and 384 deletions

View File

@@ -75,6 +75,17 @@
<value>${deployment.service.numberOfSendingThreads}</value>
</property>
<!-- Short timeout, to detect a shutdown server -->
<property name="targetLockRefreshTime">
<value>${deployment.service.targetLockRefreshTime}</value>
</property>
<!-- Long timeout, how long to wait for the next step in the deployment process, may need to be fairly
long to cope with large transfers over slow networks -->
<property name="targetLockTimeToLive">
<value>${deployment.service.targetLockTimeout}</value>
</property>
<!-- Which adapters are provided to communicate with remote File System Receivers -->
<property name="deploymentReceiverTransportAdapters">
<map>

View File

@@ -499,6 +499,12 @@ deployment.service.numberOfSendingThreads=5
deployment.service.corePoolSize=2
deployment.service.maximumPoolSize=3
# How long to wait in mS before refreshing a target lock - detects shutdown servers
deployment.service.targetLockRefreshTime=60000
# How long to wait in mS from the last communication before deciding that deployment has failed, possibly
# the destination is no longer available?
deployment.service.targetLockTimeout=3600000
# Transfer Service
transferservice.receiver.enabled=true
transferservice.receiver.stagingDir=${java.io.tmpdir}/alfresco-transfer-staging

View File

@@ -53,6 +53,7 @@ import org.alfresco.repo.avm.AVMNodeService;
import org.alfresco.repo.avm.util.SimplePath;
import org.alfresco.repo.domain.PropertyValue;
import org.alfresco.repo.lock.JobLockService;
import org.alfresco.repo.lock.JobLockService.JobLockRefreshCallback;
import org.alfresco.repo.remote.AVMRemoteImpl;
import org.alfresco.repo.remote.AVMSyncServiceRemote;
import org.alfresco.repo.remote.ClientTicketHolder;
@@ -96,11 +97,6 @@ public class DeploymentServiceImpl implements DeploymentService
{
private static Log fgLogger = LogFactory.getLog(DeploymentServiceImpl.class);
/**
* Holds locks for all deployment destinations (alfresco->alfresco)
*/
private Map<DeploymentDestination, DeploymentDestination> fDestinations;
/**
* The local AVMService Instance.
*/
@@ -130,18 +126,32 @@ public class DeploymentServiceImpl implements DeploymentService
/**
* Hold the deployment lock for 3600 seconds (1 hour)
* <p>
* This is how long we will wait for a business process to complete.
* And needs to be fairly long to allow transmission of of big files
* over high latency networks.
*/
private long targetLockTimeToLive = 3600000;
/**
* Retry for target lock every 10 seconds
* Refresh the lock every minute or so
* <p>
* This is how long we keep the lock for before nudging it. So if
* this node in the cluster is shut down during deployment then
* another node can take over.
*/
private long targetLockRetryWait = 10000;
private long targetLockRefreshTime = 10000;
/**
* Retry 10000 times before giving up
* Retry for target lock every 1 second
*/
private int targetLockRetryCount = 10000;
private long targetLockRetryWait = 1000;
/**
* Retry 10000 times before giving up, basically we
* never want to give up.
*/
private int targetLockRetryCount = 10001;
/**
* The size of the output buffers
@@ -164,7 +174,6 @@ public class DeploymentServiceImpl implements DeploymentService
public DeploymentServiceImpl()
{
fTicketHolder = new ClientTicketHolderThread();
fDestinations = new HashMap<DeploymentDestination, DeploymentDestination>();
}
/**
@@ -209,10 +218,12 @@ public class DeploymentServiceImpl implements DeploymentService
* Lock the cluster for the remote target
*/
String lockStr = hostName + "." + "asr." + storeName;
QName lockQName = QName.createQName("{org.alfresco.deployment.lock}" + lockStr);
QName lockQName = QName.createQName("{http://www.alfresco.org/deploymentService/1.0}" + lockStr);
Lock lock = new Lock(lockQName);
lock.makeLock();
try
{
/**
* Got the lock - now do a deployment
*/
@@ -383,6 +394,13 @@ public class DeploymentServiceImpl implements DeploymentService
fTicketHolder.setTicket(null);
}
}
finally
{
fgLogger.debug("about to release lock");
lock.releaseLock();
}
}
/**
* Deploy all the children of corresponding directories. (ASR version)
@@ -1052,19 +1070,19 @@ public class DeploymentServiceImpl implements DeploymentService
List<DeploymentCallback> callbacks)
{
fgLogger.debug("deployDifferenceFS start");
/**
* Lock cluster for the remote target
*/
String lockStr = hostName + "." + port + "." + target;
QName lockQName = QName.createQName("{org.alfresco.deployment.lock}" + lockStr);
String lockStr = "deploy." + hostName + "." + port + "." + target;
QName lockQName = QName.createQName("{http://www.alfresco.org/deploymentService/1.0}" + lockStr);
final Lock lock = new Lock(lockQName);
lock.makeLock();
try
{
/**
* Cluster Lock held here
*/
if (fgLogger.isDebugEnabled())
{
Object[] objs = {version, srcPath, adapterName, hostName, port, target};
@@ -1144,9 +1162,12 @@ public class DeploymentServiceImpl implements DeploymentService
try
{
fgLogger.debug("calling begin");
DeploymentToken token = service.begin(target, storeName, version, userName, password.toCharArray());
ticket = token.getTicket();
lock.checkLock();
// run this in its own txn
final DeploymentReceiverService fservice = service;
final String fTicket = ticket;
@@ -1187,8 +1208,13 @@ public class DeploymentServiceImpl implements DeploymentService
try
{
fgLogger.debug("no errors - prepare and commit");
lock.checkLock();
service.prepare(ticket);
lock.checkLock();
service.commit(ticket);
// no point checking the lock here - we have committed.
}
catch (Exception e)
{
@@ -1249,6 +1275,12 @@ public class DeploymentServiceImpl implements DeploymentService
throw new AVMException(f.format(objs), e);
}
}
finally
{
fgLogger.debug("At end of method - about to release lock");
lock.releaseLock();
}
} // End of deploy difference FS
private class ComparatorFileDescriptorCaseSensitive implements Comparator<FileDescriptor>
@@ -1309,7 +1341,7 @@ public class DeploymentServiceImpl implements DeploymentService
Iterator<FileDescriptor> dstIter = dstListing.iterator();
Iterator<AVMNodeDescriptor> srcIter = srcListing.iterator();
lock.refreshLock();
lock.checkLock();
// Here with two sorted directory listings
AVMNodeDescriptor src = null;
@@ -1353,7 +1385,7 @@ public class DeploymentServiceImpl implements DeploymentService
fgLogger.debug("comparing src:" + src + " dst:"+ dst);
}
lock.refreshLock();
lock.checkLock();
// This means no entry on src so delete what is on dst.
if (src == null)
@@ -1721,13 +1753,37 @@ public class DeploymentServiceImpl implements DeploymentService
}
/**
* Inner Class to Decorate the jobLockService to add control over the refreshLock behaviour to
* reduce the number of calls to the underlying lock service.
* Inner Class to Decorate the jobLockService to
* add control over the refreshLock behaviour.
*
* Deployment service calls (On deployment main thread)
* makeLock and releaseLock around the deployment.
* periodically calls checkLock as it does its work.
* checkLock can throw an exception if the business process has timed out.
*
* isActive and lockReleased called by Job Lock Thread
*/
private class Lock
private class Lock implements JobLockRefreshCallback
{
/**
* The name of the lock - unique for each target
*/
QName lockQName;
long lockTime;
/**
* The unique token for this lock instance.
*/
String lockToken;
/**
* Is the lock active ?
*/
boolean active = false;
/**
* When did we last check whether the lock is active
*/
Date lastActive = new Date();
public Lock(QName lockQName)
{
@@ -1735,34 +1791,130 @@ public class DeploymentServiceImpl implements DeploymentService
}
/**
* Make the lock - called on main deployment thread
*
* @throws LockAquisitionException
*/
public void makeLock()
{
jobLockService.getTransactionalLock(lockQName, getTargetLockTimeToLive(), getTargetLockRetryWait(), getTargetLockRetryCount());
lockTime = new Date().getTime();
if(fgLogger.isDebugEnabled())
{
fgLogger.debug("lock taken" + lockQName);
fgLogger.debug("target lock refresh time :" + getTargetLockRefreshTime() + "targetLockRetryWait:" + targetLockRetryWait + "targetLockRetryCount:" + targetLockRetryCount);
}
lockToken = jobLockService.getLock(lockQName, targetLockRefreshTime, targetLockRetryWait, targetLockRetryCount);
synchronized(this)
{
active = true;
}
if (fgLogger.isDebugEnabled())
{
fgLogger.debug("lock taken:" + lockQName);
}
// We may have taken so long to begin that we have already timed out !
checkLock();
fgLogger.debug("register lock callback, target lock refresh time :" + getTargetLockRefreshTime());
jobLockService.refreshLock(lockToken, lockQName, getTargetLockRefreshTime(), this);
fgLogger.debug("callback registered");
}
/**
* Refresh the lock - called as the business process progresses.
*
* Called on main deployment thread.
* @throws AVMException (Lock timeout)
*/
public void checkLock()
{
// Do I need to sync this?
if(active)
{
Date now = new Date();
if(now.getTime() > lastActive.getTime() + targetLockTimeToLive)
{
// lock time to live has expired.
MessageFormat f = new MessageFormat("Deployment Lock timeout, lock time to live exceeded, timeout:{0}mS time since last activity:{1}mS");
Object[] objs = {new Long(targetLockTimeToLive), new Long(now.getTime() - lastActive.getTime()) };
throw new AVMException(f.format(objs));
}
// Update lastActive to 1S boundary
if(now.getTime() > lastActive.getTime() + 1000)
{
lastActive = new Date();
fgLogger.debug("lastActive:" + lastActive);
}
}
else
{
// lock not active. Has been switched off by Job Lock Service.
MessageFormat f = new MessageFormat("Lock timeout, lock not active");
Object[] objs = { };
throw new AVMException(f.format(objs));
}
}
public void refreshLock()
{
/**
* Optimisation to stop the lock being refreshed thousands of times, refresh lock only after half lock time has expired
* Release the lock
*
* Called on main deployment thread
*/
Date now = new Date();
if(now.getTime() - lockTime > (targetLockTimeToLive / 2))
public void releaseLock()
{
if(fgLogger.isDebugEnabled())
{
fgLogger.debug("lock refreshed" + lockQName);
fgLogger.debug("deployment service about to releaseLock : " + lockQName);
}
jobLockService.getTransactionalLock(lockQName, getTargetLockTimeToLive(), getTargetLockRetryWait(), getTargetLockRetryCount());
lockTime = new Date().getTime();
if(active)
{
jobLockService.releaseLock(lockToken, lockQName);
}
fgLogger.debug("setting active = false" + lockQName);
// may need to sync this
synchronized(this)
{
active = false;
}
}
/**
* Job Lock Callback
*
* Callback from the job lock service. Is the deployment active?
*/
@Override
public boolean isActive()
{
// may need to sync active flag
if(fgLogger.isDebugEnabled())
{
fgLogger.debug("deployment service callback active: " + active);
}
synchronized(this)
{
return active;
}
}
/**
* Job Lock Callback.
*/
@Override
public void lockReleased()
{
fgLogger.debug("deployment service: lock released callback");
synchronized(this)
{
active = false;
}
}
}
@@ -1978,4 +2130,21 @@ public class DeploymentServiceImpl implements DeploymentService
}
return false;
}
public void setTargetLockRefreshTime(long targetLockRefreshTime)
{
this.targetLockRefreshTime = targetLockRefreshTime;
}
/**
* How long to keep a lock before refreshing it?
* <p>
* Short time-out, typically a minute.
* @return the time in mS for how long to keep the lock.
*/
public long getTargetLockRefreshTime()
{
return targetLockRefreshTime;
}
}

View File

@@ -40,6 +40,8 @@ import org.alfresco.service.cmr.avm.deploy.DeploymentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.Deleter;
import org.alfresco.util.NameMatcher;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.context.support.FileSystemXmlApplicationContext;
/**
@@ -47,7 +49,7 @@ import org.springframework.context.support.FileSystemXmlApplicationContext;
* @author britt
* @author mrogers
*/
public class FSDeploymentTest extends AVMServiceTestBase
public class DeploymentServiceImplFSTest extends AVMServiceTestBase
{
private File log = null;
private File metadata = null;
@@ -60,6 +62,8 @@ public class FSDeploymentTest extends AVMServiceTestBase
DeploymentService service = null;
private static Log logger = LogFactory.getLog(DeploymentServiceImplFSTest.class);
@Override
protected void setUp() throws Exception
@@ -410,6 +414,7 @@ public class FSDeploymentTest extends AVMServiceTestBase
*/
public void testWrongPassword()
{
logger.debug("Start testWrongPassword");
try {
service.deployDifferenceFS(-1, "main:/", "default", "localhost", 44100, TEST_USER, "Wrong!", TEST_TARGET, null, false, false, false, null);
@@ -428,6 +433,7 @@ public class FSDeploymentTest extends AVMServiceTestBase
*/
public void testWrongTarget()
{
logger.debug("Start testWrongTarget");
try {
service.deployDifferenceFS(-1, "main:/", "default", "localhost", 44100, TEST_USER, TEST_PASSWORD, "crapTarget", null, false, false, false, null);
fail("Wrong target should have thrown an exception");
@@ -443,6 +449,7 @@ public class FSDeploymentTest extends AVMServiceTestBase
*/
public void testNoExclusionFilter() throws Exception
{
logger.debug("Start testNoExclusionFilter");
DeploymentReport report = new DeploymentReport();
List<DeploymentCallback> callbacks = new ArrayList<DeploymentCallback>();
callbacks.add(new DeploymentReportCallback(report));
@@ -471,6 +478,7 @@ public class FSDeploymentTest extends AVMServiceTestBase
*/
public void testRevertToPreviousVersion() throws Exception
{
logger.debug("Start testRevertToPreviousVersion");
DeploymentReport report = new DeploymentReport();
List<DeploymentCallback> callbacks = new ArrayList<DeploymentCallback>();
callbacks.add(new DeploymentReportCallback(report));
@@ -527,6 +535,7 @@ public class FSDeploymentTest extends AVMServiceTestBase
*/
public void testBulkLoad() throws Exception
{
logger.debug("Start testBulkLoad");
DeploymentReport report = new DeploymentReport();
List<DeploymentCallback> callbacks = new ArrayList<DeploymentCallback>();
callbacks.add(new DeploymentReportCallback(report));