mirror of
				https://github.com/Alfresco/alfresco-community-repo.git
				synced 2025-10-22 15:12:38 +00:00 
			
		
		
		
	git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@22335 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
		
			
				
	
	
		
			262 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			Java
		
	
	
	
	
	
			
		
		
	
	
			262 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			Java
		
	
	
	
	
	
| /*
 | |
|  * Copyright (C) 2005-2010 Alfresco Software Limited.
 | |
|  *
 | |
|  * This file is part of Alfresco
 | |
|  *
 | |
|  * Alfresco is free software: you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU Lesser General Public License as published by
 | |
|  * the Free Software Foundation, either version 3 of the License, or
 | |
|  * (at your option) any later version.
 | |
|  *
 | |
|  * Alfresco is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|  * GNU Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public License
 | |
|  * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
 | |
|  */
 | |
| 
 | |
| package org.alfresco.repo.rendition.executer;
 | |
| 
 | |
| import java.io.Serializable;
 | |
| import java.io.StringWriter;
 | |
| import java.io.Writer;
 | |
| import java.util.HashMap;
 | |
| import java.util.Map;
 | |
| 
 | |
| import javax.xml.transform.OutputKeys;
 | |
| import javax.xml.transform.TransformerConfigurationException;
 | |
| import javax.xml.transform.sax.SAXTransformerFactory;
 | |
| import javax.xml.transform.sax.TransformerHandler;
 | |
| import javax.xml.transform.stream.StreamResult;
 | |
| 
 | |
| import org.alfresco.model.ContentModel;
 | |
| import org.alfresco.repo.rendition.RenditionLocation;
 | |
| import org.alfresco.service.cmr.dictionary.DictionaryService;
 | |
| import org.alfresco.service.cmr.rendition.RenditionServiceException;
 | |
| import org.alfresco.service.cmr.repository.ContentReader;
 | |
| import org.alfresco.service.cmr.repository.ContentService;
 | |
| import org.alfresco.service.cmr.repository.ContentWriter;
 | |
| import org.alfresco.service.cmr.repository.NodeRef;
 | |
| import org.alfresco.service.namespace.QName;
 | |
| import org.apache.commons.logging.Log;
 | |
| import org.apache.commons.logging.LogFactory;
 | |
| import org.apache.tika.metadata.Metadata;
 | |
| import org.apache.tika.mime.MediaType;
 | |
| import org.apache.tika.parser.AutoDetectParser;
 | |
| import org.apache.tika.parser.ParseContext;
 | |
| import org.apache.tika.parser.Parser;
 | |
| import org.xml.sax.ContentHandler;
 | |
| 
 | |
| /**
 | |
|  * This class provides a way to turn documents supported by the
 | |
|  *  {@link ContentService} standard transformers into basic, clean
 | |
|  *  HTML.
 | |
|  * <P/>
 | |
|  * The HTML that is produced probably isn't going to be suitable
 | |
|  *  for direct web publishing, as it's likely going to be too
 | |
|  *  basic. Instead, it should be simple and clean HTML, suitable
 | |
|  *  for being the basis of some web-friendly HTML once edited
 | |
|  *  / further transformed. 
 | |
|  * 
 | |
|  * @author Nick Burch
 | |
|  * @since 3.4
 | |
|  */
 | |
| public class HTMLRenderingEngine extends AbstractRenderingEngine
 | |
| {
 | |
|     private static Log logger = LogFactory.getLog(HTMLRenderingEngine.class);
 | |
| 
 | |
|     /*
 | |
|      * Action constants
 | |
|      */
 | |
|     public static final String NAME = "htmlRenderingEngine";
 | |
|     
 | |
|     protected static final QName PRIMARY_IMAGE = QName.createQName(
 | |
|           "http://www.alfresco.org/model/website/1.0", "primaryImage");
 | |
|     protected static final QName SECONDARY_IMAGE = QName.createQName(
 | |
|           "http://www.alfresco.org/model/website/1.0", "secondaryImage");
 | |
| 
 | |
|     private DictionaryService dictionaryService;
 | |
|     
 | |
|     public void setDictionaryService(DictionaryService dictionaryService) {
 | |
|        this.dictionaryService = dictionaryService;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * (non-Javadoc)
 | |
|      * @see org.alfresco.repo.rendition.executer.AbstractRenderingEngine#render(org.alfresco.repo.rendition.executer.AbstractRenderingEngine.RenderingContext)
 | |
|      */
 | |
|     @Override
 | |
|     protected void render(RenderingContext context)
 | |
|     {
 | |
|         ContentReader contentReader = context.makeContentReader();
 | |
|         String sourceMimeType = contentReader.getMimetype();
 | |
|         String targetMimeType = "text/html";
 | |
|         
 | |
|         // Check that Tika supports it
 | |
|         AutoDetectParser p = new AutoDetectParser();
 | |
|         MediaType sourceMediaType = MediaType.parse(sourceMimeType);
 | |
|         if(! p.getParsers().containsKey(sourceMediaType))
 | |
|         {
 | |
|            throw new RenditionServiceException(
 | |
|                  "Source mime type of " + sourceMimeType + 
 | |
|                  " is not supported by Tika for HTML conversions"
 | |
|            );
 | |
|         }
 | |
|         
 | |
|         // Make the HTML Version using Tika
 | |
|         generateHTML(p, context);
 | |
|         
 | |
|         // Extract out any images
 | |
|         // TODO
 | |
|         boolean hasImages = true; // TODO
 | |
|         if(hasImages)
 | |
|         {
 | |
|            Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
 | |
|            NodeRef imgFolder = null;
 | |
|            
 | |
|            // Extract into it
 | |
|            boolean donePrimary = false;
 | |
|            for(String fakeContent : new String[] {"Test1","Test2"})
 | |
|            {
 | |
|               if(imgFolder == null)
 | |
|                  imgFolder = createImagesDirectory(context);
 | |
|               
 | |
|               // Create the node if needed
 | |
|               NodeRef img = nodeService.getChildByName(
 | |
|                     imgFolder, ContentModel.ASSOC_CONTAINS, fakeContent
 | |
|               );
 | |
|               if(img == null)
 | |
|               {
 | |
|                  properties.clear();
 | |
|                  properties.put(ContentModel.PROP_NAME, fakeContent); 
 | |
|                  img = nodeService.createNode(
 | |
|                        imgFolder,
 | |
|                        ContentModel.ASSOC_CONTAINS,
 | |
|                        QName.createQName(fakeContent),
 | |
|                        ContentModel.TYPE_CONTENT,
 | |
|                        properties
 | |
|                  ).getChildRef();
 | |
|               }
 | |
|               
 | |
|               // If we can, associate it with the rendered HTML, so
 | |
|               //  that they're properly linked
 | |
|               QName assocType = SECONDARY_IMAGE;
 | |
|               if(!donePrimary)
 | |
|               {
 | |
|                  assocType = PRIMARY_IMAGE;
 | |
|                  donePrimary = true;
 | |
|               }
 | |
|               if(dictionaryService.getAssociation(assocType) != null)
 | |
|               {
 | |
|                  nodeService.createAssociation(
 | |
|                        context.getDestinationNode(), img, assocType
 | |
|                  );
 | |
|               }
 | |
|               
 | |
|               // Put the image into the node
 | |
|               ContentWriter writer = contentService.getWriter(
 | |
|                     img, ContentModel.PROP_CONTENT, true
 | |
|               );
 | |
|               writer.putContent(fakeContent);
 | |
|            }
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Creates a directory to store the images in.
 | |
|      * The directory will be a sibling of the rendered
 | |
|      *  HTML, and named similar to it.
 | |
|      */
 | |
|     private NodeRef createImagesDirectory(RenderingContext context)
 | |
|     {
 | |
|        // It should be a sibling of the HTML in it's eventual location
 | |
|        //  (not it's current temporary one!)
 | |
|        RenditionLocation location = resolveRenditionLocation(
 | |
|              context.getSourceNode(), context.getDefinition(), context.getDestinationNode()
 | |
|        );
 | |
|        NodeRef parent = location.getParentRef();
 | |
|        
 | |
|        // Figure out what to call it, based on the HTML node
 | |
|        String folderName = nodeService.getProperty( 
 | |
|              context.getSourceNode(),
 | |
|              ContentModel.PROP_NAME
 | |
|        ).toString();
 | |
|        if(folderName.lastIndexOf('.') > -1)
 | |
|        {
 | |
|           folderName = folderName.substring(0, folderName.lastIndexOf('.'));
 | |
|        }
 | |
|        folderName = folderName + "_files";
 | |
|        
 | |
|        // It is already there?
 | |
|        // (eg from when the rendition is being re-run)
 | |
|        NodeRef imgFolder = nodeService.getChildByName(
 | |
|              parent, ContentModel.ASSOC_CONTAINS, folderName
 | |
|        );
 | |
|        if(imgFolder != null)
 | |
|           return imgFolder;
 | |
|        
 | |
|        // Create the directory
 | |
|        Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
 | |
|        properties.put(ContentModel.PROP_NAME, folderName);
 | |
|        imgFolder = nodeService.createNode(
 | |
|              parent,
 | |
|              ContentModel.ASSOC_CONTAINS,
 | |
|              QName.createQName(folderName),
 | |
|              ContentModel.TYPE_FOLDER,
 | |
|              properties
 | |
|        ).getChildRef();
 | |
|        
 | |
|        return imgFolder;
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Builds a Tika-compatible SAX content handler, which will
 | |
|      *  be used to generate+capture the XHTML
 | |
|      */
 | |
|     private ContentHandler buildContentHandler(Writer output) 
 | |
|     {
 | |
|        SAXTransformerFactory factory = (SAXTransformerFactory)
 | |
|                 SAXTransformerFactory.newInstance();
 | |
|        TransformerHandler handler;
 | |
|        
 | |
|        try {
 | |
|           handler = factory.newTransformerHandler();
 | |
|        } catch (TransformerConfigurationException e) {
 | |
|           throw new RenditionServiceException("SAX Processing isn't available - " + e);
 | |
|        }
 | |
|        
 | |
|        handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
 | |
|        handler.setResult(new StreamResult(output));
 | |
|        handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
 | |
|        
 | |
|        return handler;
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Asks Tika to translate the contents into HTML
 | |
|      */
 | |
|     private void generateHTML(Parser p, RenderingContext context)
 | |
|     {
 | |
|        // Setup things to parse with
 | |
|        Metadata metadata = new Metadata();
 | |
|        ParseContext parseContext = new ParseContext();
 | |
|        StringWriter sw = new StringWriter();
 | |
|        ContentHandler handler = buildContentHandler(sw);
 | |
|        
 | |
|        // Parse
 | |
|        try {
 | |
|           p.parse(
 | |
|                 context.makeContentReader().getContentInputStream(),
 | |
|                 handler, metadata, parseContext
 | |
|           );
 | |
|        } catch(Exception e) {
 | |
|           throw new RenditionServiceException("Tika HTML Conversion Failed", e);
 | |
|        }
 | |
|        
 | |
|        // Save it
 | |
|        ContentWriter contentWriter = context.makeContentWriter();
 | |
|        contentWriter.putContent( sw.toString() );
 | |
|     }
 | |
| } |