();
properties.put(ContentModel.PROP_NAME, filename);
img = nodeService.createNode(
imgFolder,
ContentModel.ASSOC_CONTAINS,
QName.createQName(filename),
ContentModel.TYPE_CONTENT,
properties
).getChildRef();
if (logger.isDebugEnabled())
{
logger.debug("Image node created: " + img);
}
}
// TODO Once composite content is properly supported,
// at this point we'll associate the new image with
// the rendered HTML node so the dependency is tracked.
// Put the image into the node
ContentWriter writer = contentService.getWriter(
img, ContentModel.PROP_CONTENT, true
);
writer.setMimetype(contentType);
writer.putContent(imageSource);
if (logger.isDebugEnabled())
{
logger.debug("Image content written into " + img);
}
// All done
return img;
}
/**
* Builds a Tika-compatible SAX content handler, which will
* be used to generate+capture the XHTML
*/
private ContentHandler buildContentHandler(Writer output, RenderingContext context)
{
// Create the main transformer
SAXTransformerFactory factory = (SAXTransformerFactory)
SAXTransformerFactory.newInstance();
TransformerHandler handler;
try {
handler = factory.newTransformerHandler();
} catch (TransformerConfigurationException e) {
throw new RenditionServiceException("SAX Processing isn't available - " + e);
}
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(output));
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
// Change the image links as they go past
String dirName = null, imgPrefix = null;
if(context.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false))
{
imgPrefix = getImagesPrefixName(context);
}
else
{
dirName = getImagesDirectoryName(context);
}
ContentHandler contentHandler = new TikaImageRewritingContentHandler(
handler, dirName, imgPrefix
);
// If required, wrap it to only return the body
boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
if(bodyOnly) {
contentHandler = new BodyContentHandler(contentHandler);
}
// All done
return contentHandler;
}
/**
* Asks Tika to translate the contents into HTML
*/
private void generateHTML(Parser p, RenderingContext context)
{
ContentReader contentReader = context.makeContentReader();
// Setup things to parse with
StringWriter sw = new StringWriter();
ContentHandler handler = buildContentHandler(sw, context);
// Tell Tika what we're dealing with
Metadata metadata = new Metadata();
metadata.set(
Metadata.CONTENT_TYPE,
contentReader.getMimetype()
);
metadata.set(
Metadata.RESOURCE_NAME_KEY,
nodeService.getProperty(
context.getSourceNode(),
ContentModel.PROP_NAME
).toString()
);
// Our parse context needs to extract images
ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, new TikaImageExtractingParser(context));
// Parse
try {
p.parse(
contentReader.getContentInputStream(),
handler, metadata, parseContext
);
} catch(Exception e) {
throw new RenditionServiceException("Tika HTML Conversion Failed", e);
}
// As a string
String html = sw.toString();
// If we're doing body-only, remove all the html namespaces
// that will otherwise clutter up the document
boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
if(bodyOnly) {
html = html.replaceAll(" types;
private RenderingContext renderingContext;
private NodeRef imgFolder = null;
private int count = 0;
private TikaImageExtractingParser(RenderingContext renderingContext) {
this.renderingContext = renderingContext;
// Our expected types
types = new HashSet();
types.add(MediaType.image("bmp"));
types.add(MediaType.image("gif"));
types.add(MediaType.image("jpg"));
types.add(MediaType.image("jpeg"));
types.add(MediaType.image("png"));
types.add(MediaType.image("tiff"));
// Are images going in the same place as the HTML?
if( renderingContext.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false) )
{
RenditionLocation location = resolveRenditionLocation(
renderingContext.getSourceNode(), renderingContext.getDefinition(),
renderingContext.getDestinationNode()
);
imgFolder = location.getParentRef();
if (logger.isDebugEnabled())
{
logger.debug("Using imgFolder: " + imgFolder);
}
}
}
@Override
public Set getSupportedTypes(ParseContext context) {
return types;
}
@Override
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
// Is it a supported image?
String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
String type = metadata.get(Metadata.CONTENT_TYPE);
boolean accept = false;
if(type != null) {
for(MediaType mt : types) {
if(mt.toString().equals(type)) {
accept = true;
}
}
}
if(filename != null) {
for(MediaType mt : types) {
String ext = "." + mt.getSubtype();
if(filename.endsWith(ext)) {
accept = true;
}
}
}
if(!accept)
return;
handleImage(stream, filename, type);
}
@Override
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata) throws IOException, SAXException, TikaException {
parse(stream, handler, metadata, new ParseContext());
}
private void handleImage(InputStream stream, String filename, String type) {
count++;
// Do we already have the folder? If not, create it
if(imgFolder == null) {
imgFolder = createImagesDirectory(renderingContext);
}
// Give it a sensible name if needed
if(filename == null) {
filename = "image-" + count + ".";
filename += type.substring(type.indexOf('/')+1);
}
// Prefix the filename if needed
filename = getImagesPrefixName(renderingContext) + filename;
// Save the image
createEmbeddedImage(imgFolder, (count==1), filename, type, stream, renderingContext);
}
}
/**
* A content handler that re-writes image src attributes,
* and passes everything else on to the real one.
*/
private class TikaImageRewritingContentHandler extends ContentHandlerDecorator {
private String imageFolder;
private String imagePrefix;
private TikaImageRewritingContentHandler(ContentHandler handler, String imageFolder, String imagePrefix) {
super(handler);
this.imageFolder = imageFolder;
this.imagePrefix = imagePrefix;
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes origAttrs) throws SAXException {
// If we have an image tag, re-write the src attribute
// if required
if("img".equals(localName)) {
AttributesImpl attrs;
if(origAttrs instanceof AttributesImpl) {
attrs = (AttributesImpl)origAttrs;
} else {
attrs = new AttributesImpl(origAttrs);
}
for(int i=0; i