ACS-849_incorrect_probes added db check (#692)

* ACS-849_incorrect_probes added db check
This commit is contained in:
Nithin Nambiar
2021-10-11 12:08:43 +01:00
committed by GitHub
parent 8705d97407
commit e68f56270a
5 changed files with 188 additions and 136 deletions

View File

@@ -23,8 +23,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>. * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L% * #L%
*/ */
package org.alfresco.rest.api.probes; package org.alfresco.rest.api.probes;
import org.alfresco.repo.admin.RepoHealthChecker;
import org.alfresco.rest.api.discovery.DiscoveryApiWebscript; import org.alfresco.rest.api.discovery.DiscoveryApiWebscript;
import org.alfresco.rest.api.model.Probe; import org.alfresco.rest.api.model.Probe;
import org.alfresco.rest.framework.WebApiDescription; import org.alfresco.rest.framework.WebApiDescription;
@@ -35,29 +37,26 @@ import org.alfresco.rest.framework.core.exceptions.ServiceUnavailableException;
import org.alfresco.rest.framework.resource.EntityResource; import org.alfresco.rest.framework.resource.EntityResource;
import org.alfresco.rest.framework.resource.actions.interfaces.EntityResourceAction; import org.alfresco.rest.framework.resource.actions.interfaces.EntityResourceAction;
import org.alfresco.rest.framework.resource.parameters.Parameters; import org.alfresco.rest.framework.resource.parameters.Parameters;
import org.apache.commons.logging.Log; import org.slf4j.Logger;
import org.apache.commons.logging.LogFactory; import org.slf4j.LoggerFactory;
/** /**
* An implementation of an Entity Resource for Probes. * An implementation of an Entity Resource for Probes.
*/ */
@EntityResource(name="probes", title = "Probes") @EntityResource(name = "probes", title = "Probes") public class ProbeEntityResource
public class ProbeEntityResource implements EntityResourceAction.ReadById<Probe> implements EntityResourceAction.ReadById<Probe>
{ {
public static final String LIVE = "-live-";
public static final String READY = "-ready-";
public static final long CHECK_PERIOD = 10 * 1000; // Maximum of only one checkResult every 10 seconds. public static final long CHECK_PERIOD = 10 * 1000; // Maximum of only one checkResult every 10 seconds.
protected static Log logger = LogFactory.getLog(ProbeEntityResource.class);; private final static Logger logger = LoggerFactory.getLogger(ProbeEntityResource.class);
private final Object lock = new Object();
private long nextCheckTime = 0; private final Probe liveProbe = new Probe("liveProbe: Success - Tested");
private long lastCheckTime = 0;
private Boolean checkResult; private Boolean checkResult;
private Boolean checking = false;
private boolean readySent;
private DiscoveryApiWebscript discovery; private DiscoveryApiWebscript discovery;
private RepoHealthChecker repoHealthChecker;
public DiscoveryApiWebscript setDiscovery(DiscoveryApiWebscript discovery) public DiscoveryApiWebscript setDiscovery(DiscoveryApiWebscript discovery)
{ {
DiscoveryApiWebscript result = this.discovery; DiscoveryApiWebscript result = this.discovery;
@@ -65,145 +64,141 @@ public class ProbeEntityResource implements EntityResourceAction.ReadById<Probe>
return result; return result;
} }
public void setRepoHealthChecker(RepoHealthChecker repoHealthChecker)
{
this.repoHealthChecker = repoHealthChecker;
}
/** /**
* Returns a status code of 200 for okay. The probe contains little information for security reasons. * Returns a status code of 200 for okay. The probe contains little information for security reasons.
*
* Note: does *not* require authenticated access, so limits the amount of work performed to avoid a DDOS. * Note: does *not* require authenticated access, so limits the amount of work performed to avoid a DDOS.
*/ */
@Override @Override @WebApiDescription(title = "Get probe status", description = "Returns 200 if valid") @WebApiParam(name = "probeName", title = "The probe's name") @WebApiNoAuth public Probe readById(
@WebApiDescription(title="Get probe status", description = "Returns 200 if valid") String name, Parameters parameters)
@WebApiParam(name = "probeName", title = "The probe's name")
@WebApiNoAuth
public Probe readById(String name, Parameters parameters)
{ {
boolean isLiveProbe = LIVE.equalsIgnoreCase(name); ProbeType probeType = ProbeType.fromString(name);
if (!isLiveProbe && !READY.equalsIgnoreCase(name)) Probe probeResponse = null;
switch (probeType)
{ {
case LIVE:
probeResponse = liveProbe;
break;
case READY:
String message = doReadyCheck();
probeResponse = new Probe(message);
break;
case UNKNOWN:
throw new InvalidArgumentException("Bad probe name"); throw new InvalidArgumentException("Bad probe name");
} }
String message = doCheckOrNothing(isLiveProbe); return probeResponse;
return new Probe(message);
} }
// We don't want to be doing checks all the time or holding monitors for a long time to avoid a DDOS. // We don't want to be doing checks all the time or holding monitors for a long time to avoid a DDOS.
public String doCheckOrNothing(boolean isLiveProbe) public String doReadyCheck()
{ {
boolean doCheck = false;
long now = 0;
boolean result;
String message = "No test";
boolean logInfo = false;
synchronized(checking)
{
// Initially ready needs to be false so we don't get requests and live true so the pod is not killed.
if (checkResult == null)
{
result = isLiveProbe;
}
else
{
result = checkResult;
}
if (checking) // Is another thread is checking? synchronized (lock)
{ {
if (!readySent && result && !isLiveProbe) String message;
{ long now = System.currentTimeMillis();
readySent = true;
logInfo = true;
}
}
else // This thread will do a check
{
now = System.currentTimeMillis();
if (checkResult == null || nextCheckTime <= now)
{
doCheck = true;
checking = true;
}
}
}
if (doCheck) if (checkResult == null || isAfterCheckPeriod(now))
{ {
try try
{ {
message = "Tested"; performReadinessCheck();
doCheck(isLiveProbe); checkResult = true;
result = true;
} }
catch (Exception e) catch (Exception e)
{ {
result = false; checkResult = false;
logger.debug("Exception during readiness check", e);
} }
finally finally
{ {
synchronized (checking)
{
checking = false;
checkResult = result;
setNextCheckTime(now);
if (result && !readySent && !isLiveProbe) // Are we initially ready
{
readySent = true;
logInfo = true;
}
else if (!result && (isLiveProbe || readySent)) // Are we sick
{
logInfo = true;
}
}
}
}
message = getMessage(isLiveProbe, result, message); setLastCheckTime(now);
message = getMessage(checkResult, "Tested");
if (logInfo)
{
logger.info(message); logger.info(message);
}
} }
else else
{ {
// if no check is performed, use previous check result
message = getMessage(checkResult, "No test");
logger.debug(message); logger.debug(message);
}
if (result) }
if (checkResult)
{ {
return message; return message;
} }
throw new ServiceUnavailableException(message); throw new ServiceUnavailableException(message);
} }
private String getMessage(boolean isLiveProbe, boolean result, String message)
{
return (isLiveProbe ? "liveProbe" : "readyProbe")+": "+
(result ? "Success" : "Failure") +
" - "+message;
} }
private void doCheck(boolean isLiveProbe) private String getMessage(boolean result, String message)
{ {
return "readyProbe: " + (result ? "Success" : "Failure") + " - " + message;
}
private void performReadinessCheck()
{
discovery.getRepositoryInfo(); discovery.getRepositoryInfo();
repoHealthChecker.checkDatabase();
logger.debug("All checks complete");
} }
private void setNextCheckTime(long now) private void setLastCheckTime(long time)
{ {
long oldValue = nextCheckTime; this.lastCheckTime = time;
if (nextCheckTime == 0) long nextCheckTime = lastCheckTime + CHECK_PERIOD;
{
nextCheckTime = (now / 60000) * 60000; logger.trace("nextCheckTime: {} (+{} secs)", nextCheckTime, ((CHECK_PERIOD) / 1000));
} }
do private boolean isAfterCheckPeriod(long currentTime)
{ {
nextCheckTime += CHECK_PERIOD; return ((currentTime - lastCheckTime) >= CHECK_PERIOD);
} }
while (nextCheckTime <= now);
if (logger.isTraceEnabled()) public enum ProbeType
{ {
logger.trace("nextCheckTime: " + nextCheckTime + " (+" + ((nextCheckTime - oldValue) / 1000) + " secs)"); LIVE("-live-"), READY("-ready-"), UNKNOWN("");
String value;
ProbeType(String strValue)
{
value = strValue;
}
public static ProbeType fromString(String text)
{
for (ProbeType p : ProbeType.values())
{
if (p.value.equalsIgnoreCase(text))
{
return p;
} }
} }
return UNKNOWN;
}
public String getValue()
{
return value;
}
}
} }

View File

@@ -1075,6 +1075,7 @@
<bean id="org.alfresco.rest.api.probes.ProbeEntityResource.get" class="org.alfresco.rest.api.probes.ProbeEntityResource"> <bean id="org.alfresco.rest.api.probes.ProbeEntityResource.get" class="org.alfresco.rest.api.probes.ProbeEntityResource">
<property name="discovery" ref="webscript.org.alfresco.api.DiscoveryApiWebscript.get" /> <property name="discovery" ref="webscript.org.alfresco.api.DiscoveryApiWebscript.get" />
<property name="repoHealthChecker" ref="repoHealthChecker" />
</bean> </bean>
<!-- REST API direct access URL configuration settings --> <!-- REST API direct access URL configuration settings -->

View File

@@ -26,21 +26,26 @@
package org.alfresco.rest.api.tests; package org.alfresco.rest.api.tests;
import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.admin.RepoHealthChecker;
import org.alfresco.rest.api.discovery.DiscoveryApiWebscript; import org.alfresco.rest.api.discovery.DiscoveryApiWebscript;
import org.alfresco.rest.api.probes.ProbeEntityResource; import org.alfresco.rest.api.probes.ProbeEntityResource;
import org.alfresco.rest.api.tests.client.HttpResponse; import org.alfresco.rest.api.tests.client.HttpResponse;
import org.json.simple.JSONObject; import org.json.simple.JSONObject;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.MockitoJUnitRunner; import org.mockito.junit.MockitoJUnitRunner;
import static org.alfresco.rest.api.probes.ProbeEntityResource.*; import static org.alfresco.rest.api.probes.ProbeEntityResource.*;
import static org.alfresco.rest.api.probes.ProbeEntityResource.ProbeType.LIVE;
import static org.alfresco.rest.api.probes.ProbeEntityResource.ProbeType.READY;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.lenient;
/** /**
* V1 REST API tests for Probes (Live and Ready) * V1 REST API tests for Probes (Live and Ready)
@@ -53,7 +58,6 @@ import static org.mockito.Mockito.when;
public class ProbeApiTest extends AbstractBaseApiTest public class ProbeApiTest extends AbstractBaseApiTest
{ {
private static final boolean OK = true; private static final boolean OK = true;
private static final boolean ERROR = false;
private ProbeEntityResource probe; private ProbeEntityResource probe;
private DiscoveryApiWebscript origDiscovery; private DiscoveryApiWebscript origDiscovery;
@@ -64,6 +68,9 @@ public class ProbeApiTest extends AbstractBaseApiTest
@Mock @Mock
private DiscoveryApiWebscript badDiscovery; private DiscoveryApiWebscript badDiscovery;
@Mock
private RepoHealthChecker repoHealthChecker;
@Before @Before
@Override @Override
public void setup() throws Exception public void setup() throws Exception
@@ -73,7 +80,9 @@ public class ProbeApiTest extends AbstractBaseApiTest
String beanName = ProbeEntityResource.class.getCanonicalName()+".get"; String beanName = ProbeEntityResource.class.getCanonicalName()+".get";
probe = applicationContext.getBean(beanName, ProbeEntityResource.class); probe = applicationContext.getBean(beanName, ProbeEntityResource.class);
when(badDiscovery.getRepositoryInfo()).thenThrow(AlfrescoRuntimeException.class); lenient().when(badDiscovery.getRepositoryInfo()).thenThrow(AlfrescoRuntimeException.class);
Mockito.doNothing().when(repoHealthChecker).checkDatabase();
probe.setRepoHealthChecker(repoHealthChecker);
origDiscovery = probe.setDiscovery(badDiscovery); origDiscovery = probe.setDiscovery(badDiscovery);
} }
@@ -91,7 +100,7 @@ public class ProbeApiTest extends AbstractBaseApiTest
return "public"; return "public";
} }
private void assertResponse(String probeName, Boolean ready, String expected, int expectedStatus) throws Exception private void assertResponse(ProbeType probeType, Boolean ready, String expected, int expectedStatus) throws Exception
{ {
String[] keys = expectedStatus == 200 String[] keys = expectedStatus == 200
? new String[]{"entry", "message"} ? new String[]{"entry", "message"}
@@ -103,7 +112,7 @@ public class ProbeApiTest extends AbstractBaseApiTest
? goodDiscovery ? goodDiscovery
: badDiscovery); : badDiscovery);
HttpResponse response = getSingle(ProbeEntityResource.class, probeName, null, expectedStatus); HttpResponse response = getSingle(ProbeEntityResource.class, probeType.getValue(), null, expectedStatus);
Object object = response.getJsonResponse(); Object object = response.getJsonResponse();
for (String key: keys) for (String key: keys)
{ {
@@ -128,32 +137,29 @@ public class ProbeApiTest extends AbstractBaseApiTest
public void testProbes() throws Exception public void testProbes() throws Exception
{ {
// Live first // Live first
assertResponse(LIVE, ERROR, "liveProbe: Failure - Tested", 503); assertResponse(LIVE, OK, "liveProbe: Success - Tested", 200);
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503); // Need to wait 10 seconds. assertResponse(READY, null, "readyProbe: Failure - Tested", 503);
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503);
assertResponse(READY, null, "readyProbe: Failure - No test", 503); Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(READY, OK, "readyProbe: Success - Tested", 200);
assertResponse(READY, null, "readyProbe: Success - No test", 200);
Thread.currentThread().sleep(CHECK_PERIOD); Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(LIVE, OK, "liveProbe: Success - Tested", 200); assertResponse(LIVE, OK, "liveProbe: Success - Tested", 200);
assertResponse(LIVE, null, "liveProbe: Success - No test", 200); assertResponse(READY, null, "readyProbe: Failure - Tested", 503);
assertResponse(READY, null, "readyProbe: Success - No test", 200);
assertResponse(LIVE, null, "liveProbe: Success - No test", 200);
Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(LIVE, ERROR, "liveProbe: Failure - Tested", 503);
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503);
assertResponse(READY, null, "readyProbe: Failure - No test", 503);
// Ready first // Ready first
Thread.currentThread().sleep(CHECK_PERIOD); Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(READY, OK, "readyProbe: Success - Tested", 200); assertResponse(READY, OK, "readyProbe: Success - Tested", 200);
assertResponse(LIVE, null, "liveProbe: Success - No test", 200); assertResponse(LIVE, OK, "liveProbe: Success - Tested", 200);
assertResponse(READY, null, "readyProbe: Success - No test", 200); assertResponse(READY, null, "readyProbe: Success - No test", 200);
// check db failure
Thread.currentThread().sleep(CHECK_PERIOD); Thread.currentThread().sleep(CHECK_PERIOD);
assertResponse(READY, ERROR, "readyProbe: Failure - Tested", 503); Mockito.doThrow(AlfrescoRuntimeException.class).when(repoHealthChecker).checkDatabase();
assertResponse(LIVE, null, "liveProbe: Failure - No test", 503); assertResponse(READY, OK, "readyProbe: Failure - Tested", 503);
assertResponse(READY, OK, "readyProbe: Failure - No test", 503);
} }
} }

View File

@@ -0,0 +1,47 @@
/*
* #%L
* Alfresco Remote API
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.admin;
import org.alfresco.repo.domain.schema.DataSourceCheck;
/**
* A utility service to do the database and other health checks.
*/
public class RepoHealthChecker
{
private final DataSourceCheck dataSourceCheck;
public RepoHealthChecker(DataSourceCheck dataSourceCheck)
{
this.dataSourceCheck = dataSourceCheck;
}
public void checkDatabase()
{
dataSourceCheck.init();
}
}

View File

@@ -58,5 +58,8 @@
<value>path</value> <value>path</value>
</property> </property>
</bean> </bean>
<bean id ="repoHealthChecker" class="org.alfresco.repo.admin.RepoHealthChecker">
<constructor-arg index="0" ref="dataSourceCheck"/>
</bean>
</beans> </beans>