mirror of
				https://github.com/Alfresco/alfresco-community-repo.git
				synced 2025-10-22 15:12:38 +00:00 
			
		
		
		
	125892 adragoi: Merged 5.0.N (5.0.4) to 5.1.N (5.1.2)
      125842 rmunteanu: Merged V4.2-BUG-FIX (4.2.7) to 5.0.N (5.0.4) (PARTIAL MERGE)
         125700 adavis: Merged V4.2.5 (4.2.5.7) to V4.2-BUG-FIX (4.2.7)
            125698: Merged DEV to V4.2.5 (4.2.5.7)
               125677 arebegea: MNT-15219 : Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may cause OutOfMemory in Tika
                  - Should not have updated version.properties as the original commit needs to be merged forwards.,
            125696: Merged DEV to V4.2.5 (4.2.5.7)
               125677 arebegea: MNT-15219 : Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may cause OutOfMemory in Tika
                  - Modified tika parser and tika core jars to allow some configuration parameters to be sent from Alfresco side using the metadata map parameter
                  - Excluded by default the parsing of drawings/shapes xmls because there was little valuable data that could be extracted from those xmls
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@126004 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
		
	
		
			
				
	
	
		
			548 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Java
		
	
	
	
	
	
			
		
		
	
	
			548 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Java
		
	
	
	
	
	
| /*
 | |
|  * #%L
 | |
|  * Alfresco Repository
 | |
|  * %%
 | |
|  * Copyright (C) 2005 - 2016 Alfresco Software Limited
 | |
|  * %%
 | |
|  * This file is part of the Alfresco software. 
 | |
|  * If the software was purchased under a paid Alfresco license, the terms of 
 | |
|  * the paid license agreement will prevail.  Otherwise, the software is 
 | |
|  * provided under the following open source license terms:
 | |
|  * 
 | |
|  * Alfresco is free software: you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU Lesser General Public License as published by
 | |
|  * the Free Software Foundation, either version 3 of the License, or
 | |
|  * (at your option) any later version.
 | |
|  * 
 | |
|  * Alfresco is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|  * GNU Lesser General Public License for more details.
 | |
|  * 
 | |
|  * You should have received a copy of the GNU Lesser General Public License
 | |
|  * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
 | |
|  * #L%
 | |
|  */
 | |
| 
 | |
| package org.alfresco.repo.rendition.executer;
 | |
| 
 | |
| import java.io.IOException;
 | |
| import java.io.InputStream;
 | |
| import java.io.Serializable;
 | |
| import java.io.StringWriter;
 | |
| import java.io.Writer;
 | |
| import java.util.Collection;
 | |
| import java.util.HashMap;
 | |
| import java.util.HashSet;
 | |
| import java.util.Map;
 | |
| import java.util.Set;
 | |
| 
 | |
| import javax.xml.transform.OutputKeys;
 | |
| import javax.xml.transform.TransformerConfigurationException;
 | |
| import javax.xml.transform.sax.SAXTransformerFactory;
 | |
| import javax.xml.transform.sax.TransformerHandler;
 | |
| import javax.xml.transform.stream.StreamResult;
 | |
| 
 | |
| import org.alfresco.model.ContentModel;
 | |
| import org.alfresco.repo.action.ParameterDefinitionImpl;
 | |
| import org.alfresco.repo.content.metadata.MetadataExtracterConfig;
 | |
| import org.alfresco.repo.rendition.RenditionLocation;
 | |
| import org.alfresco.service.cmr.action.ParameterDefinition;
 | |
| import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
 | |
| import org.alfresco.service.cmr.rendition.RenditionServiceException;
 | |
| import org.alfresco.service.cmr.repository.ContentReader;
 | |
| import org.alfresco.service.cmr.repository.ContentService;
 | |
| import org.alfresco.service.cmr.repository.ContentWriter;
 | |
| import org.alfresco.service.cmr.repository.NodeRef;
 | |
| import org.alfresco.service.namespace.QName;
 | |
| import org.apache.commons.logging.Log;
 | |
| import org.apache.commons.logging.LogFactory;
 | |
| import org.apache.tika.config.TikaConfig;
 | |
| import org.apache.tika.exception.TikaException;
 | |
| import org.apache.tika.metadata.Metadata;
 | |
| import org.apache.tika.mime.MediaType;
 | |
| import org.apache.tika.parser.AutoDetectParser;
 | |
| import org.apache.tika.parser.ParseContext;
 | |
| import org.apache.tika.parser.Parser;
 | |
| import org.apache.tika.sax.BodyContentHandler;
 | |
| import org.apache.tika.sax.ContentHandlerDecorator;
 | |
| import org.xml.sax.Attributes;
 | |
| import org.xml.sax.ContentHandler;
 | |
| import org.xml.sax.SAXException;
 | |
| import org.xml.sax.helpers.AttributesImpl;
 | |
| 
 | |
| /**
 | |
|  * This class provides a way to turn documents supported by the
 | |
|  *  {@link ContentService} standard transformers into basic, clean
 | |
|  *  HTML.
 | |
|  * <P/>
 | |
|  * The HTML that is produced probably isn't going to be suitable
 | |
|  *  for direct web publishing, as it's likely going to be too
 | |
|  *  basic. Instead, it should be simple and clean HTML, suitable
 | |
|  *  for being the basis of some web-friendly HTML once edited
 | |
|  *  / further transformed. 
 | |
|  * 
 | |
|  * @author Nick Burch
 | |
|  * @since 3.4
 | |
|  */
 | |
| public class HTMLRenderingEngine extends AbstractRenderingEngine
 | |
| {
 | |
|     private static Log logger = LogFactory.getLog(HTMLRenderingEngine.class);
 | |
|     private TikaConfig tikaConfig;
 | |
|     private MetadataExtracterConfig metadataExtracterConfig;
 | |
|     /**
 | |
|      * This optional parameter, when set to true, causes only the
 | |
|      *  contents of the HTML body to be written out as the rendition.
 | |
|      * By default, the whole of the HTML document is used.
 | |
|      */
 | |
|     public static final String PARAM_BODY_CONTENTS_ONLY = "bodyContentsOnly";
 | |
|     /**
 | |
|      * This optional parameter, when set to true, causes any embedded
 | |
|      *  images to be written into the same folder as the html, with
 | |
|      *  a name prefix.
 | |
|      * By default, images are placed into a sub-folder.
 | |
|      */
 | |
|     public static final String PARAM_IMAGES_SAME_FOLDER = "imagesSameFolder";
 | |
| 
 | |
|     /*
 | |
|      * Action constants
 | |
|      */
 | |
|     public static final String NAME = "htmlRenderingEngine";
 | |
|     
 | |
|     
 | |
|     @Override
 | |
|     protected Collection<ParameterDefinition> getParameterDefinitions() {
 | |
|        Collection<ParameterDefinition> paramList = super.getParameterDefinitions();
 | |
|        paramList.add(new ParameterDefinitionImpl(PARAM_BODY_CONTENTS_ONLY, DataTypeDefinition.BOOLEAN, false,
 | |
|              getParamDisplayLabel(PARAM_BODY_CONTENTS_ONLY)));
 | |
|        paramList.add(new ParameterDefinitionImpl(PARAM_IMAGES_SAME_FOLDER, DataTypeDefinition.BOOLEAN, false,
 | |
|              getParamDisplayLabel(PARAM_IMAGES_SAME_FOLDER)));
 | |
|        return paramList;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Injects the TikaConfig to use
 | |
|      * 
 | |
|      * @param tikaConfig The Tika Config to use 
 | |
|      */
 | |
|     public void setTikaConfig(TikaConfig tikaConfig)
 | |
|     {
 | |
|         this.tikaConfig = tikaConfig;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * The metadata extracter config.
 | |
|      */
 | |
|     public void setMetadataExtracterConfig(MetadataExtracterConfig metadataExtracterConfig)
 | |
|     {
 | |
|         this.metadataExtracterConfig = metadataExtracterConfig;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * (non-Javadoc)
 | |
|      * @see org.alfresco.repo.rendition.executer.AbstractRenderingEngine#render(org.alfresco.repo.rendition.executer.AbstractRenderingEngine.RenderingContext)
 | |
|      */
 | |
|     @Override
 | |
|     protected void render(RenderingContext context)
 | |
|     {
 | |
|         ContentReader contentReader = context.makeContentReader();
 | |
|         String sourceMimeType = contentReader.getMimetype();
 | |
|         
 | |
|         // Check that Tika supports the supplied file
 | |
|         AutoDetectParser p = new AutoDetectParser(tikaConfig);
 | |
|         MediaType sourceMediaType = MediaType.parse(sourceMimeType);
 | |
|         if(! p.getParsers().containsKey(sourceMediaType))
 | |
|         {
 | |
|            throw new RenditionServiceException(
 | |
|                  "Source mime type of " + sourceMimeType + 
 | |
|                  " is not supported by Tika for HTML conversions"
 | |
|            );
 | |
|         }
 | |
|         
 | |
|         // Make the HTML Version using Tika
 | |
|         // This will also extract out any images as found
 | |
|         generateHTML(p, context);
 | |
|     }
 | |
| 
 | |
|     private String getHtmlBaseName(RenderingContext context)
 | |
|     {
 | |
|        // Based on the name of the source node, which will
 | |
|        //  also largely be the name of the html node
 | |
|        String baseName = nodeService.getProperty( 
 | |
|              context.getSourceNode(),
 | |
|              ContentModel.PROP_NAME
 | |
|        ).toString();
 | |
|        if(baseName.lastIndexOf('.') > -1)
 | |
|        {
 | |
|           baseName = baseName.substring(0, baseName.lastIndexOf('.'));
 | |
|        }
 | |
|        return baseName;
 | |
|     }
 | |
|     /**
 | |
|      * What name should be used for the images directory?
 | |
|      * Note this is only required if {@link #PARAM_IMAGES_SAME_FOLDER} is false (the default).
 | |
|      */
 | |
|     private String getImagesDirectoryName(RenderingContext context)
 | |
|     {
 | |
|        // Based on the name of the source node, which will
 | |
|        //  also largely be the name of the html node
 | |
|        String folderName = getHtmlBaseName(context);
 | |
|        folderName = folderName + "_files";
 | |
|        return folderName;
 | |
|     }
 | |
|     /**
 | |
|      * What prefix should be applied to the name of images? 
 | |
|      */
 | |
|     private String getImagesPrefixName(RenderingContext context)
 | |
|     {
 | |
|        if( context.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false) )
 | |
|        {
 | |
|           // Prefix with the name of the source node
 | |
|           return getHtmlBaseName(context) + "_";
 | |
|        }
 | |
|        else {
 | |
|           // They have their own folder, so no prefix is needed
 | |
|           return "";
 | |
|        }
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Creates a directory to store the images in.
 | |
|      * The directory will be a sibling of the rendered
 | |
|      *  HTML, and named similar to it.
 | |
|      * Note this is only required if {@link #PARAM_IMAGES_SAME_FOLDER} is false (the default).
 | |
|      */
 | |
|     private NodeRef createImagesDirectory(RenderingContext context)
 | |
|     {
 | |
|        // It should be a sibling of the HTML in it's eventual location
 | |
|        //  (not it's current temporary one!)
 | |
|        RenditionLocation location = resolveRenditionLocation(
 | |
|              context.getSourceNode(), context.getDefinition(), context.getDestinationNode()
 | |
|        );
 | |
|        NodeRef parent = location.getParentRef();
 | |
|        
 | |
|        // Figure out what to call it, based on the HTML node
 | |
|        String folderName = getImagesDirectoryName(context);
 | |
|        
 | |
|        // It is already there?
 | |
|        // (eg from when the rendition is being re-run)
 | |
|        NodeRef imgFolder = nodeService.getChildByName(
 | |
|              parent, ContentModel.ASSOC_CONTAINS, folderName
 | |
|        );
 | |
|        if(imgFolder != null)
 | |
|           return imgFolder;
 | |
|        
 | |
|        // Create the directory
 | |
|        Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
 | |
|        properties.put(ContentModel.PROP_NAME, folderName);
 | |
|        imgFolder = nodeService.createNode(
 | |
|              parent,
 | |
|              ContentModel.ASSOC_CONTAINS,
 | |
|              QName.createQName(folderName),
 | |
|              ContentModel.TYPE_FOLDER,
 | |
|              properties
 | |
|        ).getChildRef();
 | |
|        
 | |
|        return imgFolder;
 | |
|     }
 | |
|     
 | |
|     private NodeRef createEmbeddedImage(NodeRef imgFolder, boolean primary,
 | |
|           String filename, String contentType, InputStream imageSource,
 | |
|           RenderingContext context)
 | |
|     {
 | |
|        // Create the node if needed
 | |
|        NodeRef img = nodeService.getChildByName(
 | |
|              imgFolder, ContentModel.ASSOC_CONTAINS, filename
 | |
|        );
 | |
|        if(img == null)
 | |
|        {
 | |
|           Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
 | |
|           properties.put(ContentModel.PROP_NAME, filename);
 | |
|           img = nodeService.createNode(
 | |
|                 imgFolder,
 | |
|                 ContentModel.ASSOC_CONTAINS,
 | |
|                 QName.createQName(filename),
 | |
|                 ContentModel.TYPE_CONTENT,
 | |
|                 properties
 | |
|           ).getChildRef();
 | |
|           if (logger.isDebugEnabled())
 | |
|           {
 | |
|               logger.debug("Image node created: " + img);
 | |
|           }
 | |
|        }
 | |
|        
 | |
|        // TODO Once composite content is properly supported,
 | |
|        //  at this point we'll associate the new image with
 | |
|        //  the rendered HTML node so the dependency is tracked.
 | |
|        
 | |
|        // Put the image into the node
 | |
|        ContentWriter writer = contentService.getWriter(
 | |
|              img, ContentModel.PROP_CONTENT, true
 | |
|        );
 | |
|        writer.setMimetype(contentType);
 | |
|        writer.putContent(imageSource);
 | |
|        if (logger.isDebugEnabled())
 | |
|        {
 | |
|            logger.debug("Image content written into " + img);
 | |
|        }
 | |
|        
 | |
|        // All done
 | |
|        return img;
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Builds a Tika-compatible SAX content handler, which will
 | |
|      *  be used to generate+capture the XHTML
 | |
|      */
 | |
|     private ContentHandler buildContentHandler(Writer output, RenderingContext context) 
 | |
|     {
 | |
|        // Create the main transformer
 | |
|        SAXTransformerFactory factory = (SAXTransformerFactory)
 | |
|                 SAXTransformerFactory.newInstance();
 | |
|        TransformerHandler handler;
 | |
|        
 | |
|        try {
 | |
|           handler = factory.newTransformerHandler();
 | |
|        } catch (TransformerConfigurationException e) {
 | |
|           throw new RenditionServiceException("SAX Processing isn't available - " + e);
 | |
|        }
 | |
|        
 | |
|        handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
 | |
|        handler.setResult(new StreamResult(output));
 | |
|        handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
 | |
|        
 | |
|        // Change the image links as they go past
 | |
|        String dirName = null, imgPrefix = null;
 | |
|        if(context.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false))
 | |
|        {
 | |
|           imgPrefix = getImagesPrefixName(context);
 | |
|        }
 | |
|        else
 | |
|        {
 | |
|           dirName = getImagesDirectoryName(context);
 | |
|        }
 | |
|        ContentHandler contentHandler = new TikaImageRewritingContentHandler(
 | |
|              handler, dirName, imgPrefix
 | |
|        );
 | |
|        
 | |
|        // If required, wrap it to only return the body 
 | |
|        boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
 | |
|        if(bodyOnly) {
 | |
|           contentHandler = new BodyContentHandler(contentHandler);
 | |
|        }
 | |
|        
 | |
|        // All done
 | |
|        return contentHandler;
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Asks Tika to translate the contents into HTML
 | |
|      */
 | |
|     private void generateHTML(Parser p, RenderingContext context)
 | |
|     {
 | |
|        ContentReader contentReader = context.makeContentReader();
 | |
|        
 | |
|        // Setup things to parse with
 | |
|        StringWriter sw = new StringWriter();
 | |
|        ContentHandler handler = buildContentHandler(sw, context);
 | |
|        
 | |
|        // Tell Tika what we're dealing with
 | |
|        Metadata metadata = new Metadata();
 | |
|        metadata.set(
 | |
|              Metadata.CONTENT_TYPE, 
 | |
|              contentReader.getMimetype()
 | |
|        );
 | |
|        metadata.set(
 | |
|              Metadata.RESOURCE_NAME_KEY, 
 | |
|              nodeService.getProperty( 
 | |
|                    context.getSourceNode(),
 | |
|                    ContentModel.PROP_NAME
 | |
|              ).toString()
 | |
|        );
 | |
|        if (metadataExtracterConfig != null)
 | |
|        {
 | |
|           metadataExtracterConfig.prepareMetadataWithConfigParams(metadata);
 | |
|        }
 | |
|        
 | |
|        // Our parse context needs to extract images
 | |
|        ParseContext parseContext = new ParseContext();
 | |
|        parseContext.set(Parser.class, new TikaImageExtractingParser(context));
 | |
|        
 | |
|        // Parse
 | |
|        try {
 | |
|           p.parse(
 | |
|                 contentReader.getContentInputStream(),
 | |
|                 handler, metadata, parseContext
 | |
|           );
 | |
|        } catch(Exception e) {
 | |
|           throw new RenditionServiceException("Tika HTML Conversion Failed", e);
 | |
|        }
 | |
|        
 | |
|        // As a string
 | |
|        String html = sw.toString();
 | |
|        
 | |
|        // If we're doing body-only, remove all the html namespaces
 | |
|        //  that will otherwise clutter up the document
 | |
|        boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
 | |
|        if(bodyOnly) {
 | |
|           html = html.replaceAll("<\\?xml.*?\\?>", "");
 | |
|           html = html.replaceAll("<p xmlns=\"http://www.w3.org/1999/xhtml\"","<p");
 | |
|           html = html.replaceAll("<h(\\d) xmlns=\"http://www.w3.org/1999/xhtml\"","<h\\1");
 | |
|           html = html.replaceAll("<div xmlns=\"http://www.w3.org/1999/xhtml\"","<div");
 | |
|           html = html.replaceAll("<table xmlns=\"http://www.w3.org/1999/xhtml\"","<table");
 | |
|           html = html.replaceAll("
","");
 | |
|        }
 | |
|        
 | |
|        // Save it
 | |
|        ContentWriter contentWriter = context.makeContentWriter();
 | |
|        contentWriter.setMimetype("text/html");
 | |
|        contentWriter.putContent( html );
 | |
|     }
 | |
|     
 | |
|     
 | |
|     /**
 | |
|      * A nested Tika parser which extracts out any
 | |
|      *  images as they come past.
 | |
|      */
 | |
|    @SuppressWarnings("serial")
 | |
|    private class TikaImageExtractingParser implements Parser {
 | |
|       private Set<MediaType> types;
 | |
|       
 | |
|       private RenderingContext renderingContext;
 | |
|       private NodeRef imgFolder = null;
 | |
|       private int count = 0;
 | |
|       
 | |
|       private TikaImageExtractingParser(RenderingContext renderingContext) {
 | |
|          this.renderingContext = renderingContext;
 | |
|          
 | |
|          // Our expected types
 | |
|          types = new HashSet<MediaType>();
 | |
|          types.add(MediaType.image("bmp"));
 | |
|          types.add(MediaType.image("gif"));
 | |
|          types.add(MediaType.image("jpg"));
 | |
|          types.add(MediaType.image("jpeg"));
 | |
|          types.add(MediaType.image("png"));
 | |
|          types.add(MediaType.image("tiff"));
 | |
|          
 | |
|          // Are images going in the same place as the HTML?
 | |
|          if( renderingContext.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false) )
 | |
|          {
 | |
|             RenditionLocation location = resolveRenditionLocation(
 | |
|                   renderingContext.getSourceNode(), renderingContext.getDefinition(), 
 | |
|                   renderingContext.getDestinationNode()
 | |
|             );
 | |
|             imgFolder = location.getParentRef();
 | |
|             if (logger.isDebugEnabled())
 | |
|             {
 | |
|                 logger.debug("Using imgFolder: " + imgFolder);
 | |
|             }
 | |
|          }
 | |
|       }
 | |
|       
 | |
|       @Override
 | |
|       public Set<MediaType> getSupportedTypes(ParseContext context) {
 | |
|          return types;
 | |
|       }
 | |
| 
 | |
|       @Override
 | |
|       public void parse(InputStream stream, ContentHandler handler,
 | |
|             Metadata metadata, ParseContext context) throws IOException,
 | |
|             SAXException, TikaException {
 | |
|          // Is it a supported image?
 | |
|          String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
 | |
|          String type = metadata.get(Metadata.CONTENT_TYPE);
 | |
|          boolean accept = false;
 | |
|          
 | |
|          if(type != null) {
 | |
|             for(MediaType mt : types) {
 | |
|                if(mt.toString().equals(type)) {
 | |
|                   accept = true;
 | |
|                }
 | |
|             }
 | |
|          }
 | |
|          if(filename != null) {
 | |
|             for(MediaType mt : types) {
 | |
|                String ext = "." + mt.getSubtype();
 | |
|                if(filename.endsWith(ext)) {
 | |
|                   accept = true;
 | |
|                }
 | |
|             }
 | |
|          }
 | |
|          
 | |
|          if(!accept)
 | |
|             return;
 | |
| 
 | |
|          handleImage(stream, filename, type);
 | |
|       }
 | |
| 
 | |
|       private void handleImage(InputStream stream, String filename, String type) {
 | |
|          count++;
 | |
|          
 | |
|          // Do we already have the folder? If not, create it
 | |
|          if(imgFolder == null) {
 | |
|             imgFolder = createImagesDirectory(renderingContext);
 | |
|          }
 | |
|          
 | |
|          // Give it a sensible name if needed
 | |
|          if(filename == null) {
 | |
|             filename = "image-" + count + ".";
 | |
|             filename += type.substring(type.indexOf('/')+1);
 | |
|          }
 | |
|          
 | |
|          // Prefix the filename if needed
 | |
|          filename = getImagesPrefixName(renderingContext) + filename; 
 | |
| 
 | |
|          // Save the image
 | |
|          createEmbeddedImage(imgFolder, (count==1), filename, type, stream, renderingContext);
 | |
|       }
 | |
|     }
 | |
|    
 | |
|     /**
 | |
|      * A content handler that re-writes image src attributes,
 | |
|      *  and passes everything else on to the real one.
 | |
|      */
 | |
|     private class TikaImageRewritingContentHandler extends ContentHandlerDecorator {
 | |
|        private String imageFolder;
 | |
|        private String imagePrefix;
 | |
|        
 | |
|        private TikaImageRewritingContentHandler(ContentHandler handler, String imageFolder, String imagePrefix) {
 | |
|           super(handler);
 | |
|           this.imageFolder = imageFolder;
 | |
|           this.imagePrefix = imagePrefix;
 | |
|        }
 | |
| 
 | |
|        @Override
 | |
|        public void startElement(String uri, String localName, String qName,
 | |
|              Attributes origAttrs) throws SAXException {
 | |
|           // If we have an image tag, re-write the src attribute
 | |
|           //  if required
 | |
|           if("img".equals(localName)) {
 | |
|              AttributesImpl attrs;
 | |
|              if(origAttrs instanceof AttributesImpl) {
 | |
|                 attrs = (AttributesImpl)origAttrs;
 | |
|              } else {
 | |
|                 attrs = new AttributesImpl(origAttrs);
 | |
|              }
 | |
|              
 | |
|              for(int i=0; i<attrs.getLength(); i++) {
 | |
|                 if("src".equals(attrs.getLocalName(i))) {
 | |
|                    String src = attrs.getValue(i);
 | |
|                    if(src.startsWith("embedded:")) {
 | |
|                       String newSrc = "";
 | |
|                       if(imageFolder != null)
 | |
|                          newSrc += imageFolder + "/";
 | |
|                       if(imagePrefix != null)
 | |
|                          newSrc += imagePrefix;
 | |
|                       newSrc += src.substring(src.indexOf(':')+1);
 | |
|                       attrs.setValue(i, newSrc);
 | |
|                    }
 | |
|                 }
 | |
|              }
 | |
|              super.startElement(uri, localName, qName, attrs);
 | |
|           } else {
 | |
|              // For any other tag, pass through as-is
 | |
|              super.startElement(uri, localName, qName, origAttrs);
 | |
|           }
 | |
|        }
 | |
|     }
 | |
| } |