REPO-3505 Addition of liveness and readiness probes for transformers.

This commit is contained in:
Alan Davis 2018-05-23 15:36:56 +01:00
commit b04fdd4ef1
18 changed files with 618 additions and 75 deletions

View File

@ -11,8 +11,6 @@
*/
package org.alfresco.transformer;
import org.alfresco.transformer.base.AbstractTransformerController;
import org.alfresco.transformer.base.LogEntry;
import org.alfresco.util.exec.RuntimeExec;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -98,6 +96,21 @@ public class AlfrescoPdfRendererController extends AbstractTransformerController
return runtimeExec;
}
@Override
protected ProbeTestTransform getProbeTestTransform()
{
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this,"quick.pdf", "quick.png",
7455, 1024, 150, 10240, 60*20+1, 60*15-15)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
AlfrescoPdfRendererController.this.executeTransformCommand("", sourceFile, targetFile, null);
}
};
}
@PostMapping("/transform")
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@ -138,6 +151,13 @@ public class AlfrescoPdfRendererController extends AbstractTransformerController
args.add("--page=" + page);
}
String options = args.toString();
executeTransformCommand(options, sourceFile, targetFile, timeout);
return createAttachment(targetFilename, targetFile, testDelay);
}
private void executeTransformCommand(String options, File sourceFile, File targetFile, @RequestParam(value = "timeout", required = false) Long timeout)
{
LogEntry.setOptions(options);
Map<String, String> properties = new HashMap<String, String>(5);
@ -146,7 +166,5 @@ public class AlfrescoPdfRendererController extends AbstractTransformerController
properties.put("target", targetFile.getAbsolutePath());
executeTransformCommand(properties, targetFile, timeout);
return createAttachment(targetFilename, targetFile, testDelay);
}
}

View File

@ -25,6 +25,7 @@
*/
package org.alfresco.transformer;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;

View File

@ -11,9 +11,6 @@
*/
package org.alfresco.transformer;
import org.alfresco.transformer.base.AbstractTransformerController;
import org.alfresco.transformer.base.LogEntry;
import org.alfresco.transformer.base.TransformException;
import org.alfresco.util.exec.RuntimeExec;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -109,6 +106,21 @@ public class ImageMagickController extends AbstractTransformerController
return runtimeExec;
}
@Override
protected ProbeTestTransform getProbeTestTransform()
{
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.jpg", "quick.png",
35593, 1024, 150, 1024, 60*15+1,60*15+0)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
ImageMagickController.this.executeTransformCommand("", sourceFile, "", targetFile, null);
}
};
}
@PostMapping("/transform")
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@ -254,6 +266,13 @@ public class ImageMagickController extends AbstractTransformerController
: "["+startPage+'-'+endPage+']';
String options = args.toString();
executeTransformCommand(options, sourceFile, pageRange, targetFile, timeout);
return createAttachment(targetFilename, targetFile, testDelay);
}
private void executeTransformCommand(String options, File sourceFile, String pageRange, File targetFile, Long timeout)
{
LogEntry.setOptions(pageRange+(pageRange.isEmpty() ? "" : " ")+options);
Map<String, String> properties = new HashMap<String, String>(5);
@ -262,7 +281,5 @@ public class ImageMagickController extends AbstractTransformerController
properties.put("target", targetFile.getAbsolutePath());
executeTransformCommand(properties, targetFile, timeout);
return createAttachment(targetFilename, targetFile, testDelay);
}
}

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -25,6 +25,7 @@
*/
package org.alfresco.transformer;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;

View File

@ -12,8 +12,6 @@
package org.alfresco.transformer;
import com.sun.star.task.ErrorCodeIOException;
import org.alfresco.transformer.base.AbstractTransformerController;
import org.alfresco.transformer.base.TransformException;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
@ -71,9 +69,6 @@ public class LibreOfficeController extends AbstractTransformerController
logEnterpriseLicenseMessage();
logger.info("This transformer uses LibreOffice from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt");
logger.info("-------------------------------------------------------------------------------------------------------------------------------------------------------");
// TODO Remove this when we are happy that creating the jodconverter on the first transform is okay.
// setJodConverter(createJodConverter(null));
}
private static JodConverter createJodConverter(Long taskExecutionTimeout)
@ -122,14 +117,24 @@ public class LibreOfficeController extends AbstractTransformerController
@Override
protected String version()
{
// TODO Remove this when we are happy that creating the jodconverter on the first transform is okay.
// if (!jodconverter.isAvailable())
// {
// throw new TransformException(500, "LibreOffice is not yet available");
// }
return "LibreOffice available";
}
@Override
protected ProbeTestTransform getProbeTestTransform()
{
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.doc", "quick.pdf",
11817, 1024, 150, 10240, 60*30+1, 60*15+20)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
LibreOfficeController.this.executeTransformCommand(sourceFile, targetFile, null);
}
};
}
@PostMapping("/transform")
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,

View File

@ -66,6 +66,8 @@ public class LibreOfficeControllerTest extends AbstractTransformerControllerTest
@Before
public void before() throws IOException
{
super.controller = controller;
sourceExtension = "doc";
targetExtension = "pdf";
sourceMimetype = "application/msword";

View File

@ -25,6 +25,7 @@
*/
package org.alfresco.transformer;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;

View File

@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import org.alfresco.util.TempFileProvider;
import org.alfresco.util.exec.RuntimeExec;
@ -55,28 +55,32 @@ import java.util.Collection;
import java.util.Map;
/**
* Abstract Controller, provides structure and helper methods to sub-class transformer controllers.
* <p>Abstract Controller, provides structure and helper methods to sub-class transformer controllers.</p>
*
* Status Codes:
* <p>Status Codes:</p>
* <ul>
* <li>200 Success</li>
* <li>400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)</li>
* <li>400 Bad Request: Request parameter <name> is of the wrong type</li>
* <li>400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)</li>
* <li>400 Bad Request: The source filename was not supplied</li>
* <li>500 Internal Server Error: (no message with low level IO problems)</li>
* <li>500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)</li>
* <li>500 Internal Server Error: Transformer version check exit code was not 0</li>
* <li>500 Internal Server Error: Transformer version check failed to create any output</li>
* <li>500 Internal Server Error: Could not read the target file</li>
* <li>500 Internal Server Error: The target filename was malformed (should not happen because of other checks)</li>
* <li>500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)</li>
* <li>500 Internal Server Error: Filename encoding error</li>
* <li>507 Insufficient Storage: Failed to store the source file</li>
*
* 200 Success
* 400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)
* 400 Bad Request: Request parameter <name> is of the wrong type
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
* 400 Bad Request: The source filename was not supplied
* 500 Internal Server Error: (no message with low level IO problems)
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
* 500 Internal Server Error: Transformer version check exit code was not 0
* 500 Internal Server Error: Transformer version check failed to create any output
* 500 Internal Server Error: Could not read the target file
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
* 500 Internal Server Error: Filename encoding error
* 507 Insufficient Storage: Failed to store the source file
*
* 408 Request Timeout -- TODO implement general timeout mechanism rather than depend on transformer timeout (might be possible for external processes)
* 415 Unsupported Media Type -- TODO possibly implement a check on supported source and target mimetypes (probably not)
* 429 Too Many Requests -- TODO implement general throttling mechanism (needs to be done)
* <li>408 Request Timeout -- TODO implement general timeout mechanism rather than depend on transformer timeout
* (might be possible for external processes)</li>
* <li>415 Unsupported Media Type -- TODO possibly implement a check on supported source and target mimetypes (probably not)</li>
* <li>429 Too Many Requests: Returned by liveness probe</li>
* </ul>
* <p>Provides methods to help super classes perform /transform requests. Also responses to /version, /ready and /live
* requests.</p>
*/
public abstract class AbstractTransformerController
{
@ -88,6 +92,8 @@ public abstract class AbstractTransformerController
protected RuntimeExec transformCommand;
private RuntimeExec checkCommand;
private ProbeTestTransform probeTestTransform = null;
public void setTransformCommand(RuntimeExec runtimeExec)
{
transformCommand = runtimeExec;
@ -133,6 +139,36 @@ public abstract class AbstractTransformerController
return version;
}
@GetMapping("/ready")
@ResponseBody
public String ready(HttpServletRequest request)
{
return probe(request, false);
}
@GetMapping("/live")
@ResponseBody
public String live(HttpServletRequest request)
{
return probe(request, true);
}
private String probe(HttpServletRequest request, boolean isLiveProbe)
{
return getProbeTestTransformInternal().doTransformOrNothing(request, isLiveProbe);
}
private ProbeTestTransform getProbeTestTransformInternal()
{
if (probeTestTransform == null)
{
probeTestTransform = getProbeTestTransform();
}
return probeTestTransform;
}
abstract ProbeTestTransform getProbeTestTransform();
@GetMapping("/")
public String transformForm(Model model)
{
@ -205,7 +241,8 @@ public abstract class AbstractTransformerController
logger.error(message);
}
LogEntry.setStatusCodeAndMessage(statusCode, message);
long time = LogEntry.setStatusCodeAndMessage(statusCode, message);
getProbeTestTransformInternal().recordTransformTime(time);
// Forced to include the transformer name in the message (see commented out version of this method)
response.sendError(statusCode, transformerName+" - "+message);
@ -259,6 +296,7 @@ public abstract class AbstractTransformerController
*/
protected File createSourceFile(HttpServletRequest request, MultipartFile multipartFile)
{
getProbeTestTransformInternal().incrementTransformerCount();
String filename = multipartFile.getOriginalFilename();
long size = multipartFile.getSize();
filename = checkFilename( true, filename);
@ -363,8 +401,9 @@ public abstract class AbstractTransformerController
ResponseEntity<Resource> body = ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION,
"attachment; filename*= UTF-8''" + targetFilename).body(targetResource);
LogEntry.setTargetSize(targetFile.length());
LogEntry.setStatusCodeAndMessage(200, "Success");
LogEntry.addDelay(testDelay);
long time = LogEntry.setStatusCodeAndMessage(200, "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransformInternal().recordTransformTime(time);
return body;
}
catch (UnsupportedEncodingException e)

View File

@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import java.text.SimpleDateFormat;
import java.util.Collection;
@ -42,6 +42,8 @@ import static java.lang.Math.max;
*/
public class LogEntry
{
// TODO allow ProbeTestTransform to find out if there are any transforms running longer than the max time.
private static final AtomicInteger count = new AtomicInteger(0);
private static final Deque<LogEntry> log = new ConcurrentLinkedDeque<>();
private static final int MAX_LOG_SIZE = 10;
@ -82,28 +84,28 @@ public class LogEntry
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append(getId());
sb.append(' ');
sb.append(HH_MM_SS.format(getDate()));
sb.append(' ');
sb.append(getStatusCode());
sb.append(' ');
sb.append(getDuration());
sb.append(' ');
sb.append(getSource());
sb.append(' ');
sb.append(getSourceSize());
sb.append(' ');
sb.append(getTarget());
sb.append(' ');
sb.append(getTargetSize());
sb.append(' ');
sb.append(getOptions());
sb.append(' ');
append(sb, Integer.toString(getId()));
append(sb, HH_MM_SS.format(getDate()));
append(sb, Integer.toString(getStatusCode()));
append(sb, getDuration());
append(sb, getSource());
append(sb, getSourceSize());
append(sb, getTarget());
append(sb, getTargetSize());
append(sb, getOptions());
sb.append(getMessage());
return sb.toString();
}
private void append(StringBuilder sb, String value)
{
if (value != null && !value.isEmpty() && !"0bytes".equals(value))
{
sb.append(value);
sb.append(' ');
}
}
public static Collection<LogEntry> getLog()
{
return log;
@ -147,25 +149,29 @@ public class LogEntry
currentLogEntry.get().options = options;
}
public static void setStatusCodeAndMessage(int statusCode, String message)
public static long setStatusCodeAndMessage(int statusCode, String message)
{
LogEntry logEntry = currentLogEntry.get();
logEntry.statusCode = statusCode;
logEntry.message = message;
logEntry.durationTransform = System.currentTimeMillis() - logEntry.start - logEntry.durationStreamIn;
return logEntry.durationTransform;
}
// In order to test connection timeouts, a testDelay may be added as a request parameter.
// This method waits for this period to end. It is in this class as all the times are recorded here.
public static void addDelay(Long testDelay)
public static long addDelay(Long testDelay)
{
long durationDelay = 0;
if (testDelay != null && testDelay > 0)
{
currentLogEntry.get().addDelayInternal(testDelay);
durationDelay = currentLogEntry.get().addDelayInternal(testDelay);
}
return durationDelay;
}
private void addDelayInternal(Long testDelay)
private long addDelayInternal(Long testDelay)
{
long durationDelay = Math.max(testDelay - System.currentTimeMillis() + start, -1);
if (durationDelay > 0)
@ -179,10 +185,12 @@ public class LogEntry
Thread.currentThread().interrupt();
}
this.durationDelay = durationDelay;
return durationDelay;
}
else
{
this.durationDelay = -1;
return 0;
}
}
@ -219,7 +227,10 @@ public class LogEntry
public String getDuration()
{
return time(durationStreamIn + max(durationTransform, 0) + max(durationDelay, 0) + max(durationStreamOut, 0))+
long duration = durationStreamIn + max(durationTransform, 0) + max(durationDelay, 0) + max(durationStreamOut, 0);
return duration <= 5
? ""
: time(duration)+
" ("+
(time(durationStreamIn)+' '+
time(durationTransform)+' '+

View File

@ -0,0 +1,314 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2018 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
/**
* Provides the logic performing test transformations by the live and ready probes.
*
* <p><b>K8s probes</b>: A readiness probe indicates if the pod should accept request. <b>It does not indicate that a pod is
* ready after startup</b>. The liveness probe indicates when to kill the pod. <b>Both probes are called throughout the
* lifetime of the pod</b> and a <b>liveness probes can take place before a readiness probe.</b> The k8s
* <b>initialDelaySeconds field is not fully honoured</b> as it multiplied by a random number, so is
* actually a maximum initial delay in seconds, but could be 0. </p>
*
* <p>Live and readiness probes might do test transforms. The first 6 requests result in a transformation
* of a small test file. The average time is remembered, but excludes the first one which is normally longer. This is
* used in future requests to discover if transformations are becoming slower. The request also returns a non 200 status
* code resulting in the k8s pod being terminated, after a predefined number of transformations have been performed or
* if any transformation takes a long time. These are controlled by environment variables:</p>
* <ul>
* <li>livenessPercent - The percentage slower the small test transform must be to indicate there is a problem.</li>
* <li>livenessTransformPeriodSeconds - As liveness probes should be frequent, not every request should result in
* a test transformation. This value defines the gap between transformations.</li>
* <li>maxTransforms - the maximum number of transformation to be performed before a restart.</li>
* <li>maxTransformSeconds - the maximum time for a transformation, including failed ones.</li>
* </ul>
*/
abstract class ProbeTestTransform
{
public static final int AVERAGE_OVER_TRANSFORMS = 5;
private final String sourceFilename;
private final String targetFilename;
private final long minExpectedLength;
private final long maxExpectedLength;
private final Log logger;
int livenessPercent;
long probeCount;
int transCount;
long normalTime;
long maxTime = Long.MAX_VALUE;
long nextTransformTime;
private final long livenessTransformPeriod;
private long maxTransformCount = Long.MAX_VALUE;
private long maxTransformTime;
private AtomicBoolean initialised = new AtomicBoolean(false);
private AtomicBoolean readySent = new AtomicBoolean(false);
private AtomicLong transformCount = new AtomicLong(0);
private AtomicBoolean die = new AtomicBoolean(false);
/**
* See Probes.md for more info.
* @param expectedLength was the length of the target file during testing
* @param plusOrMinus simply allows for some variation in the transformed size caused by new versions of dates
* @param livenessPercent indicates that for this type of transform a variation up to 2 and a half times is not
* unreasonable under load
* @param maxTransforms default values normally supplied by helm. Not identical so we can be sure which value is used.
* @param maxTransformSeconds default values normally supplied by helm. Not identical so we can be sure which value is used.
* @param livenessTransformPeriodSeconds default values normally supplied by helm. Not identical so we can be sure which value is used.
*/
public ProbeTestTransform(AbstractTransformerController controller,
String sourceFilename, String targetFilename, long expectedLength, long plusOrMinus,
int livenessPercent, long maxTransforms, long maxTransformSeconds,
long livenessTransformPeriodSeconds)
{
logger = controller.logger;
this.sourceFilename = sourceFilename;
this.targetFilename = targetFilename;
minExpectedLength = Math.max(0, expectedLength-plusOrMinus);
maxExpectedLength = expectedLength+plusOrMinus;
this.livenessPercent = (int) getPositiveLongEnv("livenessPercent", livenessPercent);
maxTransformCount = getPositiveLongEnv("maxTransforms", maxTransforms);
maxTransformTime = getPositiveLongEnv("maxTransformSeconds", maxTransformSeconds)*1000;
livenessTransformPeriod = getPositiveLongEnv("livenessTransformPeriodSeconds", livenessTransformPeriodSeconds)*1000;
}
protected long getPositiveLongEnv(String name, long defaultValue)
{
long l = -1;
String env = System.getenv(name);
if (env != null)
{
try
{
l = Long.parseLong(env);
}
catch (NumberFormatException ignore)
{
}
}
if (l <= 0)
{
l = defaultValue;
}
logger.info("Probe: "+name+"="+l);
return l;
}
// We don't want to be doing test transforms every few seconds, but do want frequent live probes.
public String doTransformOrNothing(HttpServletRequest request, boolean isLiveProbe)
{
// If not initialised OR it is a live probe and we are scheduled to to do a test transform.
probeCount++;
return (isLiveProbe && livenessTransformPeriod > 0 &&
(transCount <= AVERAGE_OVER_TRANSFORMS || nextTransformTime < System.currentTimeMillis()))
|| !initialised.get()
? doTransform(request, isLiveProbe)
: doNothing(isLiveProbe);
}
private String doNothing(boolean isLiveProbe)
{
String probeMessage = getProbeMessage(isLiveProbe);
String message = "Success - No transform.";
LogEntry.setStatusCodeAndMessage(200, probeMessage+message);
if (!isLiveProbe && !readySent.getAndSet(true))
{
logger.info(probeMessage+message);
}
return message;
}
String doTransform(HttpServletRequest request, boolean isLiveProbe)
{
checkMaxTransformTimeAndCount(isLiveProbe);
long start = System.currentTimeMillis();
if (nextTransformTime != 0)
{
do
{
nextTransformTime += livenessTransformPeriod;
}
while (nextTransformTime < start);
}
File sourceFile = getSourceFile(request, isLiveProbe);
File targetFile = getTargetFile(request);
executeTransformCommand(sourceFile, targetFile);
long time = System.currentTimeMillis() - start;
String message = "Transform " + time + "ms";
checkTargetFile(targetFile, isLiveProbe, message);
recordTransformTime(time);
calculateMaxTime(time, isLiveProbe);
if (time > maxTime)
{
throw new TransformException(500, getMessagePrefix(isLiveProbe)+
message+" which is more than "+ livenessPercent +
"% slower than the normal value of "+normalTime+"ms");
}
// We don't care if the ready or live probe works out if we are 'ready' to take requests.
initialised.set(true);
checkMaxTransformTimeAndCount(isLiveProbe);
return getProbeMessage(isLiveProbe)+message;
}
private void checkMaxTransformTimeAndCount(boolean isLiveProbe)
{
if (die.get())
{
throw new TransformException(429, getMessagePrefix(isLiveProbe) +
"Transformer requested to die. A transform took longer than "+
(maxTransformTime *1000)+" seconds");
}
if (maxTransformCount > 0 && transformCount.get() > maxTransformCount)
{
throw new TransformException(429, getMessagePrefix(isLiveProbe) +
"Transformer requested to die. It has performed more than "+
maxTransformCount+" transformations");
}
}
File getSourceFile(HttpServletRequest request, boolean isLiveProbe)
{
incrementTransformerCount();
File sourceFile = TempFileProvider.createTempFile("source_", "_"+ sourceFilename);
request.setAttribute(AbstractTransformerController.SOURCE_FILE, sourceFile);
try (InputStream inputStream = this.getClass().getResourceAsStream('/'+sourceFilename))
{
Files.copy(inputStream, sourceFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
}
catch (IOException e)
{
throw new TransformException(507, getMessagePrefix(isLiveProbe)+
"Failed to store the source file", e);
}
long length = sourceFile.length();
LogEntry.setSource(sourceFilename, length);
return sourceFile;
}
File getTargetFile(HttpServletRequest request)
{
File targetFile = TempFileProvider.createTempFile("target_", "_"+targetFilename);
request.setAttribute(AbstractTransformerController.TARGET_FILE, targetFile);
LogEntry.setTarget(targetFilename);
return targetFile;
}
void recordTransformTime(long time)
{
if (maxTransformTime > 0 && time > maxTransformTime)
{
die.set(true);
}
}
void calculateMaxTime(long time, boolean isLiveProbe)
{
if (transCount <= AVERAGE_OVER_TRANSFORMS)
{
// Take the average of the first few transforms as the normal time. The initial transform might be slower
// so is ignored. Later ones are not included in case we have a gradual performance problem.
String message = getMessagePrefix(isLiveProbe) + "Success - Transform " + time + "ms";
if (++transCount > 1)
{
normalTime = (normalTime * (transCount-2) + time) / (transCount-1);
maxTime = (normalTime * (livenessPercent + 100)) / 100;
if ((!isLiveProbe && !readySent.getAndSet(true)) || transCount > AVERAGE_OVER_TRANSFORMS)
{
nextTransformTime = System.currentTimeMillis() + livenessTransformPeriod;
logger.info(message + " - " + normalTime + "ms+" + livenessPercent + "%=" + maxTime + "ms");
}
}
else if (!isLiveProbe && !readySent.getAndSet(true))
{
logger.info(message);
}
}
}
protected abstract void executeTransformCommand(File sourceFile, File targetFile);
private void checkTargetFile(File targetFile, boolean isLiveProbe, String message)
{
String probeMessage = getProbeMessage(isLiveProbe);
if (!targetFile.exists() || !targetFile.isFile())
{
throw new TransformException(500, probeMessage +"Target File \""+targetFile.getAbsolutePath()+"\" did not exist");
}
long length = targetFile.length();
if (length < minExpectedLength || length > maxExpectedLength)
{
throw new TransformException(500, probeMessage +"Target File \""+targetFile.getAbsolutePath()+
"\" was the wrong size ("+ length+"). Needed to be between "+minExpectedLength+" and "+
maxExpectedLength);
}
LogEntry.setTargetSize(length);
LogEntry.setStatusCodeAndMessage(200, probeMessage +"Success - "+message);
}
private String getMessagePrefix(boolean isLiveProbe)
{
return Long.toString(probeCount) + ' ' + getProbeMessage(isLiveProbe);
}
private String getProbeMessage(boolean isLiveProbe)
{
return (isLiveProbe ? "Live Probe: " : "Ready Probe: ");
}
public void incrementTransformerCount()
{
transformCount.incrementAndGet();
}
}

View File

@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
public class TransformException extends RuntimeException
{

View File

@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import org.springframework.web.servlet.handler.HandlerInterceptorAdapter;
@ -48,8 +48,8 @@ public class TransformInterceptor extends HandlerInterceptorAdapter
throws Exception
{
// TargetFile cannot be deleted until completion, otherwise 0 bytes are sent.
deleteFile(request, "sourceFile");
deleteFile(request, "targetFile");
deleteFile(request, AbstractTransformerController.SOURCE_FILE);
deleteFile(request, AbstractTransformerController.TARGET_FILE);
LogEntry.complete();
}

View File

@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.base;
package org.alfresco.transformer;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@ -35,7 +35,7 @@ public class WebApplicationConfig extends WebMvcConfigurerAdapter {
@Override
public void addInterceptors(InterceptorRegistry registry) {
registry.addInterceptor(transformInterceptor()).addPathPatterns("/transform");;
registry.addInterceptor(transformInterceptor()).addPathPatterns("/transform", "/live", "/ready");;
}
@Bean

View File

@ -25,7 +25,6 @@
*/
package org.alfresco.transformer;
import org.alfresco.transformer.base.AbstractTransformerController;
import org.alfresco.util.exec.RuntimeExec;
import org.junit.Test;
import org.mockito.Mock;
@ -81,7 +80,7 @@ public abstract class AbstractTransformerControllerTest
protected byte[] expectedSourceFileBytes;
protected byte[] expectedTargetFileBytes;
private AbstractTransformerController controller;
protected AbstractTransformerController controller;
// Called by sub class
public void mockTransformCommand(AbstractTransformerController controller, String sourceExtension, String targetExtension, String sourceMimetype) throws IOException
@ -298,4 +297,33 @@ public abstract class AbstractTransformerControllerTest
// System.out.println(str);
// }
// }
@Test
public void calculateMaxTime() throws Exception
{
ProbeTestTransform probeTestTransform = controller.getProbeTestTransform();
probeTestTransform.livenessPercent = 110;
long [][] values = new long[][] {
{5000, 0, Long.MAX_VALUE}, // 1st transform is ignored
{1000, 1000, 2100}, // 1000 + 1000*1.1
{3000, 2000, 4200}, // 2000 + 2000*1.1
{2000, 2000, 4200},
{6000, 3000, 6300},
{8000, 4000, 8400},
{4444, 4000, 8400}, // no longer in the first few, so normal and max times don't change
{5555, 4000, 8400}
};
for (long[] v: values)
{
long time = v[0];
long expectedNormalTime = v[1];
long expectedMaxTime = v[2];
probeTestTransform.calculateMaxTime(time, true);
assertEquals("", expectedNormalTime, probeTestTransform.normalTime);
assertEquals("", expectedMaxTime, probeTestTransform.maxTime);
}
}
}

106
docs/Probes.md Normal file
View File

@ -0,0 +1,106 @@
# Transformer k8s liveness and readiness probes
The transformer's liveness and readiness probes perform small test transformations to check that a pod has fully started up and that it is still healthy.
Initial test transforms are performed by both probes on start up. After a successful transform, the readiness probe always returns a success message and does no more transformations. As a result requests will not be suspended to the pod unless there is a network issue.
The liveness probe gathers the average time of 5 test transformation after start up (the initial transform is ignored as it may take longer). It will then do a test transform occasionally (see livenessTransformPeriodSeconds) to ensure the system is still healthy. If any of these transforms take longer that a specified percentage, the probe will return an error status code to terminate the pod. The liveness probe may optionally also be configured to return an error if a specified number of transformations have been performed or if any transform takes longer than some specified limit.
Environment variables
### Configuration
The actions of the probes are controlled by environment variables
livenessPercent - The percentage slower the small test transform must be to indicate there is a problem. Generally
set to 150 so the test transform may be two and a half times as long.
livenessTransformPeriodSeconds - As liveness probes should be frequent, not every request should result in a test
transformation. This value defines the gap between transformations. Generally set to 10 minutes.
maxTransforms - the maximum number of transformation to be performed before a restart. Generally quite a high number
but does allow for the pod to be recycled occationally. A value of 0 disables this check.
maxTransformSeconds - the maximum time for a transformation, including failed ones. Quite large values needs to be
used to avoid recycling pods if there are large files that take a long time. Generally transforms that do take
a long time indicate that the transformer is no longer healthy. A value of 0 disables this check.
The rate and frequency of the probes are controlled by standard k8s fields. See [K8s Liveness and Readiness Probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes)
initialDelaySeconds - The maximum time that the probe waits before being called. Generally configured so that the
live probe has a shorter value has a chance to initialise the pod beore the first request come in.
periodSeconds - The frequence that the probe is called. Noteboth probes are called throughout the lieftime of the
pod.
timeoutSeconds - Generally overrides the default of 1 second, to allow the probe long enough to check slow test
transforms.
failureThreshold - set to 1 in the case of the liveness probe, so that any failure terminates the pod sright away.
In the case of readiness probe this is left as the default 3, to give the pod a chance to start.
## Helm chart use of these variables and fields
#### Values.yaml
~~~yaml
imagemagick:
# ...
readinessProbe:
initialDelaySeconds: 20
periodSeconds: 60
timeoutSeconds: 10
livenessProbe:
initialDelaySeconds: 10
periodSeconds: 20
timeoutSeconds: 10
livenessPercent: 150
livenessTransformPeriodSeconds: 600
maxTransforms: 10000
maxTransformSeconds: 900
~~~
#### deployment-imagemagick.yaml
~~~yaml
apiVersion: extensions/v1beta1
kind: Deployment
# ...
spec:
# ...
spec:
containers:
envFrom:
- configMapRef:
# config map to use, defined in config-imagemagick.yaml
name: {{ template "content-services.fullname" . }}-imagemagick-configmap
readinessProbe:
httpGet:
path: /ready
port: {{ .Values.imagemagick.image.internalPort }}
initialDelaySeconds: {{ .Values.imagemagick.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.imagemagick.readinessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.imagemagick.readinessProbe.timeoutSeconds }}
livenessProbe:
httpGet:
path: /live
port: {{ .Values.imagemagick.image.internalPort }}
initialDelaySeconds: {{ .Values.imagemagick.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.imagemagick.livenessProbe.periodSeconds }}
failureThreshold: 1
timeoutSeconds: {{ .Values.imagemagick.livenessProbe.timeoutSeconds }}
# ...
~~~
#### config-imagemagick.yaml
~~~yaml
# Defines the properties required by the imagemagick container
apiVersion: v1
kind: ConfigMap
# ...
data:
livenessPercent: "{{ .Values.imagemagick.livenessProbe.livenessPercent }}"
livenessTransformPeriodSeconds: "{{ .Values.imagemagick.livenessProbe.livenessTransformPeriodSeconds }}"
maxTransforms: "{{ .Values.imagemagick.livenessProbe.maxTransforms }}"
maxTransformSeconds: "{{ .Values.imagemagick.livenessProbe.maxTransformSeconds }}"
~~~