Feature/repo 3505 probes

This commit is contained in:
Alan Davis
2018-05-23 15:36:56 +01:00
parent a20840c5b8
commit 7de82a3541
18 changed files with 618 additions and 75 deletions

View File

@@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import org.alfresco.util.TempFileProvider;
import org.alfresco.util.exec.RuntimeExec;
@@ -55,28 +55,32 @@ import java.util.Collection;
import java.util.Map;
/**
* Abstract Controller, provides structure and helper methods to sub-class transformer controllers.
* <p>Abstract Controller, provides structure and helper methods to sub-class transformer controllers.</p>
*
* Status Codes:
* <p>Status Codes:</p>
* <ul>
* <li>200 Success</li>
* <li>400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)</li>
* <li>400 Bad Request: Request parameter <name> is of the wrong type</li>
* <li>400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)</li>
* <li>400 Bad Request: The source filename was not supplied</li>
* <li>500 Internal Server Error: (no message with low level IO problems)</li>
* <li>500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)</li>
* <li>500 Internal Server Error: Transformer version check exit code was not 0</li>
* <li>500 Internal Server Error: Transformer version check failed to create any output</li>
* <li>500 Internal Server Error: Could not read the target file</li>
* <li>500 Internal Server Error: The target filename was malformed (should not happen because of other checks)</li>
* <li>500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)</li>
* <li>500 Internal Server Error: Filename encoding error</li>
* <li>507 Insufficient Storage: Failed to store the source file</li>
*
* 200 Success
* 400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)
* 400 Bad Request: Request parameter <name> is of the wrong type
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
* 400 Bad Request: The source filename was not supplied
* 500 Internal Server Error: (no message with low level IO problems)
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
* 500 Internal Server Error: Transformer version check exit code was not 0
* 500 Internal Server Error: Transformer version check failed to create any output
* 500 Internal Server Error: Could not read the target file
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
* 500 Internal Server Error: Filename encoding error
* 507 Insufficient Storage: Failed to store the source file
*
* 408 Request Timeout -- TODO implement general timeout mechanism rather than depend on transformer timeout (might be possible for external processes)
* 415 Unsupported Media Type -- TODO possibly implement a check on supported source and target mimetypes (probably not)
* 429 Too Many Requests -- TODO implement general throttling mechanism (needs to be done)
* <li>408 Request Timeout -- TODO implement general timeout mechanism rather than depend on transformer timeout
* (might be possible for external processes)</li>
* <li>415 Unsupported Media Type -- TODO possibly implement a check on supported source and target mimetypes (probably not)</li>
* <li>429 Too Many Requests: Returned by liveness probe</li>
* </ul>
* <p>Provides methods to help super classes perform /transform requests. Also responses to /version, /ready and /live
* requests.</p>
*/
public abstract class AbstractTransformerController
{
@@ -88,6 +92,8 @@ public abstract class AbstractTransformerController
protected RuntimeExec transformCommand;
private RuntimeExec checkCommand;
private ProbeTestTransform probeTestTransform = null;
public void setTransformCommand(RuntimeExec runtimeExec)
{
transformCommand = runtimeExec;
@@ -133,6 +139,36 @@ public abstract class AbstractTransformerController
return version;
}
@GetMapping("/ready")
@ResponseBody
public String ready(HttpServletRequest request)
{
return probe(request, false);
}
@GetMapping("/live")
@ResponseBody
public String live(HttpServletRequest request)
{
return probe(request, true);
}
private String probe(HttpServletRequest request, boolean isLiveProbe)
{
return getProbeTestTransformInternal().doTransformOrNothing(request, isLiveProbe);
}
private ProbeTestTransform getProbeTestTransformInternal()
{
if (probeTestTransform == null)
{
probeTestTransform = getProbeTestTransform();
}
return probeTestTransform;
}
abstract ProbeTestTransform getProbeTestTransform();
@GetMapping("/")
public String transformForm(Model model)
{
@@ -205,7 +241,8 @@ public abstract class AbstractTransformerController
logger.error(message);
}
LogEntry.setStatusCodeAndMessage(statusCode, message);
long time = LogEntry.setStatusCodeAndMessage(statusCode, message);
getProbeTestTransformInternal().recordTransformTime(time);
// Forced to include the transformer name in the message (see commented out version of this method)
response.sendError(statusCode, transformerName+" - "+message);
@@ -259,6 +296,7 @@ public abstract class AbstractTransformerController
*/
protected File createSourceFile(HttpServletRequest request, MultipartFile multipartFile)
{
getProbeTestTransformInternal().incrementTransformerCount();
String filename = multipartFile.getOriginalFilename();
long size = multipartFile.getSize();
filename = checkFilename( true, filename);
@@ -363,8 +401,9 @@ public abstract class AbstractTransformerController
ResponseEntity<Resource> body = ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION,
"attachment; filename*= UTF-8''" + targetFilename).body(targetResource);
LogEntry.setTargetSize(targetFile.length());
LogEntry.setStatusCodeAndMessage(200, "Success");
LogEntry.addDelay(testDelay);
long time = LogEntry.setStatusCodeAndMessage(200, "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransformInternal().recordTransformTime(time);
return body;
}
catch (UnsupportedEncodingException e)

View File

@@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import java.text.SimpleDateFormat;
import java.util.Collection;
@@ -42,6 +42,8 @@ import static java.lang.Math.max;
*/
public class LogEntry
{
// TODO allow ProbeTestTransform to find out if there are any transforms running longer than the max time.
private static final AtomicInteger count = new AtomicInteger(0);
private static final Deque<LogEntry> log = new ConcurrentLinkedDeque<>();
private static final int MAX_LOG_SIZE = 10;
@@ -82,28 +84,28 @@ public class LogEntry
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append(getId());
sb.append(' ');
sb.append(HH_MM_SS.format(getDate()));
sb.append(' ');
sb.append(getStatusCode());
sb.append(' ');
sb.append(getDuration());
sb.append(' ');
sb.append(getSource());
sb.append(' ');
sb.append(getSourceSize());
sb.append(' ');
sb.append(getTarget());
sb.append(' ');
sb.append(getTargetSize());
sb.append(' ');
sb.append(getOptions());
sb.append(' ');
append(sb, Integer.toString(getId()));
append(sb, HH_MM_SS.format(getDate()));
append(sb, Integer.toString(getStatusCode()));
append(sb, getDuration());
append(sb, getSource());
append(sb, getSourceSize());
append(sb, getTarget());
append(sb, getTargetSize());
append(sb, getOptions());
sb.append(getMessage());
return sb.toString();
}
private void append(StringBuilder sb, String value)
{
if (value != null && !value.isEmpty() && !"0bytes".equals(value))
{
sb.append(value);
sb.append(' ');
}
}
public static Collection<LogEntry> getLog()
{
return log;
@@ -147,25 +149,29 @@ public class LogEntry
currentLogEntry.get().options = options;
}
public static void setStatusCodeAndMessage(int statusCode, String message)
public static long setStatusCodeAndMessage(int statusCode, String message)
{
LogEntry logEntry = currentLogEntry.get();
logEntry.statusCode = statusCode;
logEntry.message = message;
logEntry.durationTransform = System.currentTimeMillis() - logEntry.start - logEntry.durationStreamIn;
return logEntry.durationTransform;
}
// In order to test connection timeouts, a testDelay may be added as a request parameter.
// This method waits for this period to end. It is in this class as all the times are recorded here.
public static void addDelay(Long testDelay)
public static long addDelay(Long testDelay)
{
long durationDelay = 0;
if (testDelay != null && testDelay > 0)
{
currentLogEntry.get().addDelayInternal(testDelay);
durationDelay = currentLogEntry.get().addDelayInternal(testDelay);
}
return durationDelay;
}
private void addDelayInternal(Long testDelay)
private long addDelayInternal(Long testDelay)
{
long durationDelay = Math.max(testDelay - System.currentTimeMillis() + start, -1);
if (durationDelay > 0)
@@ -179,10 +185,12 @@ public class LogEntry
Thread.currentThread().interrupt();
}
this.durationDelay = durationDelay;
return durationDelay;
}
else
{
this.durationDelay = -1;
return 0;
}
}
@@ -219,7 +227,10 @@ public class LogEntry
public String getDuration()
{
return time(durationStreamIn + max(durationTransform, 0) + max(durationDelay, 0) + max(durationStreamOut, 0))+
long duration = durationStreamIn + max(durationTransform, 0) + max(durationDelay, 0) + max(durationStreamOut, 0);
return duration <= 5
? ""
: time(duration)+
" ("+
(time(durationStreamIn)+' '+
time(durationTransform)+' '+

View File

@@ -0,0 +1,314 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
/**
* Provides the logic performing test transformations by the live and ready probes.
*
* <p><b>K8s probes</b>: A readiness probe indicates if the pod should accept request. <b>It does not indicate that a pod is
* ready after startup</b>. The liveness probe indicates when to kill the pod. <b>Both probes are called throughout the
* lifetime of the pod</b> and a <b>liveness probes can take place before a readiness probe.</b> The k8s
* <b>initialDelaySeconds field is not fully honoured</b> as it multiplied by a random number, so is
* actually a maximum initial delay in seconds, but could be 0. </p>
*
* <p>Live and readiness probes might do test transforms. The first 6 requests result in a transformation
* of a small test file. The average time is remembered, but excludes the first one which is normally longer. This is
* used in future requests to discover if transformations are becoming slower. The request also returns a non 200 status
* code resulting in the k8s pod being terminated, after a predefined number of transformations have been performed or
* if any transformation takes a long time. These are controlled by environment variables:</p>
* <ul>
* <li>livenessPercent - The percentage slower the small test transform must be to indicate there is a problem.</li>
* <li>livenessTransformPeriodSeconds - As liveness probes should be frequent, not every request should result in
* a test transformation. This value defines the gap between transformations.</li>
* <li>maxTransforms - the maximum number of transformation to be performed before a restart.</li>
* <li>maxTransformSeconds - the maximum time for a transformation, including failed ones.</li>
* </ul>
*/
abstract class ProbeTestTransform
{
public static final int AVERAGE_OVER_TRANSFORMS = 5;
private final String sourceFilename;
private final String targetFilename;
private final long minExpectedLength;
private final long maxExpectedLength;
private final Log logger;
int livenessPercent;
long probeCount;
int transCount;
long normalTime;
long maxTime = Long.MAX_VALUE;
long nextTransformTime;
private final long livenessTransformPeriod;
private long maxTransformCount = Long.MAX_VALUE;
private long maxTransformTime;
private AtomicBoolean initialised = new AtomicBoolean(false);
private AtomicBoolean readySent = new AtomicBoolean(false);
private AtomicLong transformCount = new AtomicLong(0);
private AtomicBoolean die = new AtomicBoolean(false);
/**
* See Probes.md for more info.
* @param expectedLength was the length of the target file during testing
* @param plusOrMinus simply allows for some variation in the transformed size caused by new versions of dates
* @param livenessPercent indicates that for this type of transform a variation up to 2 and a half times is not
* unreasonable under load
* @param maxTransforms default values normally supplied by helm. Not identical so we can be sure which value is used.
* @param maxTransformSeconds default values normally supplied by helm. Not identical so we can be sure which value is used.
* @param livenessTransformPeriodSeconds default values normally supplied by helm. Not identical so we can be sure which value is used.
*/
public ProbeTestTransform(AbstractTransformerController controller,
String sourceFilename, String targetFilename, long expectedLength, long plusOrMinus,
int livenessPercent, long maxTransforms, long maxTransformSeconds,
long livenessTransformPeriodSeconds)
{
logger = controller.logger;
this.sourceFilename = sourceFilename;
this.targetFilename = targetFilename;
minExpectedLength = Math.max(0, expectedLength-plusOrMinus);
maxExpectedLength = expectedLength+plusOrMinus;
this.livenessPercent = (int) getPositiveLongEnv("livenessPercent", livenessPercent);
maxTransformCount = getPositiveLongEnv("maxTransforms", maxTransforms);
maxTransformTime = getPositiveLongEnv("maxTransformSeconds", maxTransformSeconds)*1000;
livenessTransformPeriod = getPositiveLongEnv("livenessTransformPeriodSeconds", livenessTransformPeriodSeconds)*1000;
}
protected long getPositiveLongEnv(String name, long defaultValue)
{
long l = -1;
String env = System.getenv(name);
if (env != null)
{
try
{
l = Long.parseLong(env);
}
catch (NumberFormatException ignore)
{
}
}
if (l <= 0)
{
l = defaultValue;
}
logger.info("Probe: "+name+"="+l);
return l;
}
// We don't want to be doing test transforms every few seconds, but do want frequent live probes.
public String doTransformOrNothing(HttpServletRequest request, boolean isLiveProbe)
{
// If not initialised OR it is a live probe and we are scheduled to to do a test transform.
probeCount++;
return (isLiveProbe && livenessTransformPeriod > 0 &&
(transCount <= AVERAGE_OVER_TRANSFORMS || nextTransformTime < System.currentTimeMillis()))
|| !initialised.get()
? doTransform(request, isLiveProbe)
: doNothing(isLiveProbe);
}
private String doNothing(boolean isLiveProbe)
{
String probeMessage = getProbeMessage(isLiveProbe);
String message = "Success - No transform.";
LogEntry.setStatusCodeAndMessage(200, probeMessage+message);
if (!isLiveProbe && !readySent.getAndSet(true))
{
logger.info(probeMessage+message);
}
return message;
}
String doTransform(HttpServletRequest request, boolean isLiveProbe)
{
checkMaxTransformTimeAndCount(isLiveProbe);
long start = System.currentTimeMillis();
if (nextTransformTime != 0)
{
do
{
nextTransformTime += livenessTransformPeriod;
}
while (nextTransformTime < start);
}
File sourceFile = getSourceFile(request, isLiveProbe);
File targetFile = getTargetFile(request);
executeTransformCommand(sourceFile, targetFile);
long time = System.currentTimeMillis() - start;
String message = "Transform " + time + "ms";
checkTargetFile(targetFile, isLiveProbe, message);
recordTransformTime(time);
calculateMaxTime(time, isLiveProbe);
if (time > maxTime)
{
throw new TransformException(500, getMessagePrefix(isLiveProbe)+
message+" which is more than "+ livenessPercent +
"% slower than the normal value of "+normalTime+"ms");
}
// We don't care if the ready or live probe works out if we are 'ready' to take requests.
initialised.set(true);
checkMaxTransformTimeAndCount(isLiveProbe);
return getProbeMessage(isLiveProbe)+message;
}
private void checkMaxTransformTimeAndCount(boolean isLiveProbe)
{
if (die.get())
{
throw new TransformException(429, getMessagePrefix(isLiveProbe) +
"Transformer requested to die. A transform took longer than "+
(maxTransformTime *1000)+" seconds");
}
if (maxTransformCount > 0 && transformCount.get() > maxTransformCount)
{
throw new TransformException(429, getMessagePrefix(isLiveProbe) +
"Transformer requested to die. It has performed more than "+
maxTransformCount+" transformations");
}
}
File getSourceFile(HttpServletRequest request, boolean isLiveProbe)
{
incrementTransformerCount();
File sourceFile = TempFileProvider.createTempFile("source_", "_"+ sourceFilename);
request.setAttribute(AbstractTransformerController.SOURCE_FILE, sourceFile);
try (InputStream inputStream = this.getClass().getResourceAsStream('/'+sourceFilename))
{
Files.copy(inputStream, sourceFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
}
catch (IOException e)
{
throw new TransformException(507, getMessagePrefix(isLiveProbe)+
"Failed to store the source file", e);
}
long length = sourceFile.length();
LogEntry.setSource(sourceFilename, length);
return sourceFile;
}
File getTargetFile(HttpServletRequest request)
{
File targetFile = TempFileProvider.createTempFile("target_", "_"+targetFilename);
request.setAttribute(AbstractTransformerController.TARGET_FILE, targetFile);
LogEntry.setTarget(targetFilename);
return targetFile;
}
void recordTransformTime(long time)
{
if (maxTransformTime > 0 && time > maxTransformTime)
{
die.set(true);
}
}
void calculateMaxTime(long time, boolean isLiveProbe)
{
if (transCount <= AVERAGE_OVER_TRANSFORMS)
{
// Take the average of the first few transforms as the normal time. The initial transform might be slower
// so is ignored. Later ones are not included in case we have a gradual performance problem.
String message = getMessagePrefix(isLiveProbe) + "Success - Transform " + time + "ms";
if (++transCount > 1)
{
normalTime = (normalTime * (transCount-2) + time) / (transCount-1);
maxTime = (normalTime * (livenessPercent + 100)) / 100;
if ((!isLiveProbe && !readySent.getAndSet(true)) || transCount > AVERAGE_OVER_TRANSFORMS)
{
nextTransformTime = System.currentTimeMillis() + livenessTransformPeriod;
logger.info(message + " - " + normalTime + "ms+" + livenessPercent + "%=" + maxTime + "ms");
}
}
else if (!isLiveProbe && !readySent.getAndSet(true))
{
logger.info(message);
}
}
}
protected abstract void executeTransformCommand(File sourceFile, File targetFile);
private void checkTargetFile(File targetFile, boolean isLiveProbe, String message)
{
String probeMessage = getProbeMessage(isLiveProbe);
if (!targetFile.exists() || !targetFile.isFile())
{
throw new TransformException(500, probeMessage +"Target File \""+targetFile.getAbsolutePath()+"\" did not exist");
}
long length = targetFile.length();
if (length < minExpectedLength || length > maxExpectedLength)
{
throw new TransformException(500, probeMessage +"Target File \""+targetFile.getAbsolutePath()+
"\" was the wrong size ("+ length+"). Needed to be between "+minExpectedLength+" and "+
maxExpectedLength);
}
LogEntry.setTargetSize(length);
LogEntry.setStatusCodeAndMessage(200, probeMessage +"Success - "+message);
}
private String getMessagePrefix(boolean isLiveProbe)
{
return Long.toString(probeCount) + ' ' + getProbeMessage(isLiveProbe);
}
private String getProbeMessage(boolean isLiveProbe)
{
return (isLiveProbe ? "Live Probe: " : "Ready Probe: ");
}
public void incrementTransformerCount()
{
transformCount.incrementAndGet();
}
}

View File

@@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
public class TransformException extends RuntimeException
{

View File

@@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import org.springframework.web.servlet.handler.HandlerInterceptorAdapter;
@@ -48,8 +48,8 @@ public class TransformInterceptor extends HandlerInterceptorAdapter
throws Exception
{
// TargetFile cannot be deleted until completion, otherwise 0 bytes are sent.
deleteFile(request, "sourceFile");
deleteFile(request, "targetFile");
deleteFile(request, AbstractTransformerController.SOURCE_FILE);
deleteFile(request, AbstractTransformerController.TARGET_FILE);
LogEntry.complete();
}

View File

@@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -35,7 +35,7 @@ public class WebApplicationConfig extends WebMvcConfigurerAdapter {
@Override
public void addInterceptors(InterceptorRegistry registry) {
registry.addInterceptor(transformInterceptor()).addPathPatterns("/transform");;
registry.addInterceptor(transformInterceptor()).addPathPatterns("/transform", "/live", "/ready");;
}
@Bean

View File

@@ -25,7 +25,6 @@
*/
package org.alfresco.transformer;
import org.alfresco.transformer.base.AbstractTransformerController;
import org.alfresco.util.exec.RuntimeExec;
import org.junit.Test;
import org.mockito.Mock;
@@ -81,7 +80,7 @@ public abstract class AbstractTransformerControllerTest
protected byte[] expectedSourceFileBytes;
protected byte[] expectedTargetFileBytes;
private AbstractTransformerController controller;
protected AbstractTransformerController controller;
// Called by sub class
public void mockTransformCommand(AbstractTransformerController controller, String sourceExtension, String targetExtension, String sourceMimetype) throws IOException
@@ -298,4 +297,33 @@ public abstract class AbstractTransformerControllerTest
// System.out.println(str);
// }
// }
@Test
public void calculateMaxTime() throws Exception
{
ProbeTestTransform probeTestTransform = controller.getProbeTestTransform();
probeTestTransform.livenessPercent = 110;
long [][] values = new long[][] {
{5000, 0, Long.MAX_VALUE}, // 1st transform is ignored
{1000, 1000, 2100}, // 1000 + 1000*1.1
{3000, 2000, 4200}, // 2000 + 2000*1.1
{2000, 2000, 4200},
{6000, 3000, 6300},
{8000, 4000, 8400},
{4444, 4000, 8400}, // no longer in the first few, so normal and max times don't change
{5555, 4000, 8400}
};
for (long[] v: values)
{
long time = v[0];
long expectedNormalTime = v[1];
long expectedMaxTime = v[2];
probeTestTransform.calculateMaxTime(time, true);
assertEquals("", expectedNormalTime, probeTestTransform.normalTime);
assertEquals("", expectedMaxTime, probeTestTransform.maxTime);
}
}
}