ACS-849_incorrect_probes added db check (#692)

* ACS-849_incorrect_probes added db check
This commit is contained in:
Nithin Nambiar
2021-10-11 12:08:43 +01:00
committed by GitHub
parent 8705d97407
commit e68f56270a
5 changed files with 188 additions and 136 deletions

View File

@@ -23,8 +23,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.rest.api.probes;
import org.alfresco.repo.admin.RepoHealthChecker;
import org.alfresco.rest.api.discovery.DiscoveryApiWebscript;
import org.alfresco.rest.api.model.Probe;
import org.alfresco.rest.framework.WebApiDescription;
@@ -35,29 +37,26 @@ import org.alfresco.rest.framework.core.exceptions.ServiceUnavailableException;
import org.alfresco.rest.framework.resource.EntityResource;
import org.alfresco.rest.framework.resource.actions.interfaces.EntityResourceAction;
import org.alfresco.rest.framework.resource.parameters.Parameters;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An implementation of an Entity Resource for Probes.
*/
@EntityResource(name="probes", title = "Probes")
public class ProbeEntityResource implements EntityResourceAction.ReadById<Probe>
@EntityResource(name = "probes", title = "Probes") public class ProbeEntityResource
implements EntityResourceAction.ReadById<Probe>
{
public static final String LIVE = "-live-";
public static final String READY = "-ready-";
public static final long CHECK_PERIOD = 10 * 1000; // Maximum of only one checkResult every 10 seconds.
protected static Log logger = LogFactory.getLog(ProbeEntityResource.class);;
private long nextCheckTime = 0;
private final static Logger logger = LoggerFactory.getLogger(ProbeEntityResource.class);
private final Object lock = new Object();
private final Probe liveProbe = new Probe("liveProbe: Success - Tested");
private long lastCheckTime = 0;
private Boolean checkResult;
private Boolean checking = false;
private boolean readySent;
private DiscoveryApiWebscript discovery;
private RepoHealthChecker repoHealthChecker;
public DiscoveryApiWebscript setDiscovery(DiscoveryApiWebscript discovery)
{
DiscoveryApiWebscript result = this.discovery;
@@ -65,145 +64,141 @@ public class ProbeEntityResource implements EntityResourceAction.ReadById<Probe>
return result;
}
public void setRepoHealthChecker(RepoHealthChecker repoHealthChecker)
{
this.repoHealthChecker = repoHealthChecker;
}
/**
* Returns a status code of 200 for okay. The probe contains little information for security reasons.
*
* Note: does *not* require authenticated access, so limits the amount of work performed to avoid a DDOS.
*/
@Override
@WebApiDescription(title="Get probe status", description = "Returns 200 if valid")
@WebApiParam(name = "probeName", title = "The probe's name")
@WebApiNoAuth
public Probe readById(String name, Parameters parameters)
@Override @WebApiDescription(title = "Get probe status", description = "Returns 200 if valid") @WebApiParam(name = "probeName", title = "The probe's name") @WebApiNoAuth public Probe readById(
String name, Parameters parameters)
{
boolean isLiveProbe = LIVE.equalsIgnoreCase(name);
if (!isLiveProbe && !READY.equalsIgnoreCase(name))
ProbeType probeType = ProbeType.fromString(name);
Probe probeResponse = null;
switch (probeType)
{
throw new InvalidArgumentException("Bad probe name");
case LIVE:
probeResponse = liveProbe;
break;
case READY:
String message = doReadyCheck();
probeResponse = new Probe(message);
break;
case UNKNOWN:
throw new InvalidArgumentException("Bad probe name");
}
String message = doCheckOrNothing(isLiveProbe);
return new Probe(message);
return probeResponse;
}
// We don't want to be doing checks all the time or holding monitors for a long time to avoid a DDOS.
public String doCheckOrNothing(boolean isLiveProbe)
public String doReadyCheck()
{
boolean doCheck = false;
long now = 0;
boolean result;
String message = "No test";
boolean logInfo = false;
synchronized(checking)
synchronized (lock)
{
// Initially ready needs to be false so we don't get requests and live true so the pod is not killed.
if (checkResult == null)
String message;
long now = System.currentTimeMillis();
if (checkResult == null || isAfterCheckPeriod(now))
{
result = isLiveProbe;
try
{
performReadinessCheck();
checkResult = true;
}
catch (Exception e)
{
checkResult = false;
logger.debug("Exception during readiness check", e);
}
finally
{
setLastCheckTime(now);
message = getMessage(checkResult, "Tested");
logger.info(message);
}
}
else
{
result = checkResult;
// if no check is performed, use previous check result
message = getMessage(checkResult, "No test");
logger.debug(message);
}
if (checkResult)
{
return message;
}
if (checking) // Is another thread is checking?
{
if (!readySent && result && !isLiveProbe)
{
readySent = true;
logInfo = true;
}
}
else // This thread will do a check
{
now = System.currentTimeMillis();
if (checkResult == null || nextCheckTime <= now)
{
doCheck = true;
checking = true;
}
}
throw new ServiceUnavailableException(message);
}
if (doCheck)
{
try
{
message = "Tested";
doCheck(isLiveProbe);
result = true;
}
catch (Exception e)
{
result = false;
}
finally
{
synchronized (checking)
{
checking = false;
checkResult = result;
setNextCheckTime(now);
if (result && !readySent && !isLiveProbe) // Are we initially ready
{
readySent = true;
logInfo = true;
}
else if (!result && (isLiveProbe || readySent)) // Are we sick
{
logInfo = true;
}
}
}
}
message = getMessage(isLiveProbe, result, message);
if (logInfo)
{
logger.info(message);
}
else
{
logger.debug(message);
}
if (result)
{
return message;
}
throw new ServiceUnavailableException(message);
}
private String getMessage(boolean isLiveProbe, boolean result, String message)
private String getMessage(boolean result, String message)
{
return (isLiveProbe ? "liveProbe" : "readyProbe")+": "+
(result ? "Success" : "Failure") +
" - "+message;
return "readyProbe: " + (result ? "Success" : "Failure") + " - " + message;
}
private void doCheck(boolean isLiveProbe)
private void performReadinessCheck()
{
discovery.getRepositoryInfo();
repoHealthChecker.checkDatabase();
logger.debug("All checks complete");
}
private void setNextCheckTime(long now)
private void setLastCheckTime(long time)
{
long oldValue = nextCheckTime;
if (nextCheckTime == 0)
this.lastCheckTime = time;
long nextCheckTime = lastCheckTime + CHECK_PERIOD;
logger.trace("nextCheckTime: {} (+{} secs)", nextCheckTime, ((CHECK_PERIOD) / 1000));
}
private boolean isAfterCheckPeriod(long currentTime)
{
return ((currentTime - lastCheckTime) >= CHECK_PERIOD);
}
public enum ProbeType
{
LIVE("-live-"), READY("-ready-"), UNKNOWN("");
String value;
ProbeType(String strValue)
{
nextCheckTime = (now / 60000) * 60000;
value = strValue;
}
do
public static ProbeType fromString(String text)
{
nextCheckTime += CHECK_PERIOD;
for (ProbeType p : ProbeType.values())
{
if (p.value.equalsIgnoreCase(text))
{
return p;
}
}
return UNKNOWN;
}
while (nextCheckTime <= now);
if (logger.isTraceEnabled())
public String getValue()
{
logger.trace("nextCheckTime: " + nextCheckTime + " (+" + ((nextCheckTime - oldValue) / 1000) + " secs)");
return value;
}
}
}

View File

@@ -1075,6 +1075,7 @@
<bean id="org.alfresco.rest.api.probes.ProbeEntityResource.get" class="org.alfresco.rest.api.probes.ProbeEntityResource">
<property name="discovery" ref="webscript.org.alfresco.api.DiscoveryApiWebscript.get" />
<property name="repoHealthChecker" ref="repoHealthChecker" />
</bean>
<!-- REST API direct access URL configuration settings -->

View File

@@ -26,21 +26,26 @@
package org.alfresco.rest.api.tests;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.admin.RepoHealthChecker;
import org.alfresco.rest.api.discovery.DiscoveryApiWebscript;
import org.alfresco.rest.api.probes.ProbeEntityResource;
import org.alfresco.rest.api.tests.client.HttpResponse;
import org.json.simple.JSONObject;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.MockitoJUnitRunner;
import static org.alfresco.rest.api.probes.ProbeEntityResource.*;
import static org.alfresco.rest.api.probes.ProbeEntityResource.ProbeType.LIVE;
import static org.alfresco.rest.api.probes.ProbeEntityResource.ProbeType.READY;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.lenient;
/**
* V1 REST API tests for Probes (Live and Ready)
@@ -53,7 +58,6 @@ import static org.mockito.Mockito.when;
public class ProbeApiTest extends AbstractBaseApiTest
{
private static final boolean OK = true;
private static final boolean ERROR = false;
private ProbeEntityResource probe;
private DiscoveryApiWebscript origDiscovery;
@@ -64,6 +68,9 @@ public class ProbeApiTest extends AbstractBaseApiTest
@Mock
private DiscoveryApiWebscript badDiscovery;
@Mock
private RepoHealthChecker repoHealthChecker;
@Before
@Override
public void setup() throws Exception
@@ -73,7 +80,9 @@ public class ProbeApiTest extends AbstractBaseApiTest
String beanName = ProbeEntityResource.class.getCanonicalName()+".get";
probe = applicationContext.getBean(beanName, ProbeEntityResource.class);
when(badDiscovery.getRepositoryInfo()).thenThrow(AlfrescoRuntimeException.class);
lenient().when(badDiscovery.getRepositoryInfo()).thenThrow(AlfrescoRuntimeException.class);
Mockito.doNothing().when(repoHealthChecker).checkDatabase();
probe.setRepoHealthChecker(repoHealthChecker);
origDiscovery = probe.setDiscovery(badDiscovery);
}
@@ -91,7 +100,7 @@ public class ProbeApiTest extends AbstractBaseApiTest
return "public";
}
private void assertResponse(String probeName, Boolean ready, String expected, int expectedStatus) throws Exception
private void assertResponse(ProbeType probeType, Boolean ready, String expected, int expectedStatus) throws Exception
{
String[] keys = expectedStatus == 200
? new String[]{"entry", "message"}
@@ -103,7 +112,7 @@ public class ProbeApiTest extends AbstractBaseApiTest
? goodDiscovery
: badDiscovery);
HttpResponse response = getSingle(ProbeEntityResource.class, probeName, null, expectedStatus);
HttpResponse response = getSingle(ProbeEntityResource.class, probeType.getValue(), null, expectedStatus);
Object object = response.getJsonResponse();
for (String key: keys)
{
@@ -128,32 +137,29 @@ public class ProbeApiTest extends AbstractBaseApiTest
public void testProbes() throws Exception
{
// Live first
assertResponse(LIVE, ERROR, "liveProbe: Failure - Tested", 503);
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503); // Need to wait 10 seconds.
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503);
assertResponse(READY, null, "readyProbe: Failure - No test", 503);
assertResponse(LIVE, OK, "liveProbe: Success - Tested", 200);
assertResponse(READY, null, "readyProbe: Failure - Tested", 503);
Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(READY, OK, "readyProbe: Success - Tested", 200);
assertResponse(READY, null, "readyProbe: Success - No test", 200);
Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(LIVE, OK, "liveProbe: Success - Tested", 200);
assertResponse(LIVE, null, "liveProbe: Success - No test", 200);
assertResponse(READY, null, "readyProbe: Success - No test", 200);
assertResponse(LIVE, null, "liveProbe: Success - No test", 200);
Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(LIVE, ERROR, "liveProbe: Failure - Tested", 503);
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503);
assertResponse(READY, null, "readyProbe: Failure - No test", 503);
assertResponse(READY, null, "readyProbe: Failure - Tested", 503);
// Ready first
Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(READY, OK, "readyProbe: Success - Tested", 200);
assertResponse(LIVE, null, "liveProbe: Success - No test", 200);
assertResponse(LIVE, OK, "liveProbe: Success - Tested", 200);
assertResponse(READY, null, "readyProbe: Success - No test", 200);
// check db failure
Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(READY, ERROR, "readyProbe: Failure - Tested", 503);
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503);
Mockito.doThrow(AlfrescoRuntimeException.class).when(repoHealthChecker).checkDatabase();
assertResponse(READY, OK, "readyProbe: Failure - Tested", 503);
assertResponse(READY, OK, "readyProbe: Failure - No test", 503);
}
}

View File

@@ -0,0 +1,47 @@
/*
* #%L
* Alfresco Remote API
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.admin;
import org.alfresco.repo.domain.schema.DataSourceCheck;
/**
* A utility service to do the database and other health checks.
*/
public class RepoHealthChecker
{
private final DataSourceCheck dataSourceCheck;
public RepoHealthChecker(DataSourceCheck dataSourceCheck)
{
this.dataSourceCheck = dataSourceCheck;
}
public void checkDatabase()
{
dataSourceCheck.init();
}
}

View File

@@ -58,5 +58,8 @@
<value>path</value>
</property>
</bean>
<bean id ="repoHealthChecker" class="org.alfresco.repo.admin.RepoHealthChecker">
<constructor-arg index="0" ref="dataSourceCheck"/>
</bean>
</beans>