MNT-16882 : CMIS does not detect text encoding correctly

- explicit charset requests are not supported by Alfresco repository. we use guess encoding methods
   - the fix is to correctly interpret the mimetype text for the CMIS requests and make CMIS behave similarly to REST V1 APIs when working with content.
   - added tests for the 3 affected methods

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@135665 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrei Rebegea
2017-03-07 10:44:08 +00:00
parent a75200b776
commit 792bba21c8
3 changed files with 246 additions and 31 deletions

View File

@@ -1312,11 +1312,11 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
// copy stream to temp file
// OpenCMIS does this for us ....
tempFile = copyToTempFile(contentStream);
final Charset encoding = (tempFile == null ? null : getEncoding(tempFile, contentStream.getMimeType()));
String encoding = getEncoding(tempFile, mimeType);
ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef);
writer.setMimetype(mimeType);
writer.setEncoding(encoding.name());
writer.setEncoding(encoding);
writer.putContent(tempFile);
}
}
@@ -1572,16 +1572,15 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
throw new CmisInvalidArgumentException("No content!");
}
// copy stream to temp file
String mimeType = parseMimeType(contentStream);
final File tempFile = copyToTempFile(contentStream);
final Charset encoding = getEncoding(tempFile, contentStream.getMimeType());
String encoding = getEncoding(tempFile, mimeType);
try
{
ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef);
String mimeType = parseMimeType(contentStream);
writer.setMimetype(mimeType);
writer.setEncoding(encoding.name());
writer.setEncoding(encoding);
writer.putContent(tempFile);
}
finally
@@ -2307,7 +2306,6 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
// copy stream to temp file
final File tempFile = copyToTempFile(contentStream);
final Charset encoding = (tempFile == null ? null : getEncoding(tempFile, contentStream.getMimeType()));
// check in
// update PWC
@@ -2319,10 +2317,12 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
// handle content
if (contentStream != null)
{
String mimeType = parseMimeType(contentStream);
String encoding = getEncoding(tempFile, mimeType);
// write content
ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef);
writer.setMimetype(parseMimeType(contentStream));
writer.setEncoding(encoding.name());
writer.setMimetype(mimeType);
writer.setEncoding(encoding);
writer.putContent(tempFile);
}
@@ -3081,22 +3081,51 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
}
}
private Charset getEncoding(File tempFile, String mimeType)
/**
* Inspired from NodesImpl.guessEncoding method.
*
* @param tempFile can be null;
* @param mimeType can be null;
* @return the encoding detected. never null;
*/
private String getEncoding(File tempFile, String mimeType)
{
Charset encoding = null;
String defaultEncoding = "UTF-8";
if (tempFile == null)
{
return defaultEncoding;
}
InputStream tfis = null;
try
{
InputStream tfis = new BufferedInputStream(new FileInputStream(tempFile));
tfis = new BufferedInputStream(new FileInputStream(tempFile));
ContentCharsetFinder charsetFinder = connector.getMimetypeService().getContentCharsetFinder();
encoding = charsetFinder.getCharset(tfis, mimeType);
tfis.close();
} catch (Exception e)
return charsetFinder.getCharset(tfis, mimeType).name();
}
catch (Exception e)
{
throw new CmisStorageException("Unable to read content: " + e.getMessage(), e);
}
finally
{
closeInputStream(tfis);
}
}
return encoding;
protected void closeInputStream(InputStream tfis)
{
if (tfis != null)
{
try
{
tfis.close();
}
catch (Exception e)
{
// nothing
}
}
}
private File copyToTempFile(ContentStream contentStream)

View File

@@ -27,8 +27,17 @@
package org.alfresco.opencmis;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
@@ -59,9 +68,11 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.dictionary.DictionaryDAO;
import org.alfresco.repo.dictionary.M2Model;
import org.alfresco.repo.domain.audit.AuditDAO;
import org.alfresco.repo.domain.node.ContentDataWithId;
import org.alfresco.repo.domain.node.NodeDAO;
import org.alfresco.repo.model.Repository;
import org.alfresco.repo.node.archive.NodeArchiveService;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.repo.security.authentication.AuthenticationContext;
import org.alfresco.repo.security.authentication.AuthenticationUtil;
import org.alfresco.repo.security.authentication.AuthenticationUtil.RunAsWork;
@@ -73,6 +84,7 @@ import org.alfresco.repo.transaction.RetryingTransactionHelper;
import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback;
import org.alfresco.repo.version.VersionableAspectTest;
import org.alfresco.repo.workflow.WorkflowDeployer;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.action.ActionCondition;
import org.alfresco.service.cmr.action.ActionService;
import org.alfresco.service.cmr.dictionary.AspectDefinition;
@@ -110,6 +122,7 @@ import org.apache.chemistry.opencmis.commons.PropertyIds;
import org.apache.chemistry.opencmis.commons.data.Ace;
import org.apache.chemistry.opencmis.commons.data.AllowableActions;
import org.apache.chemistry.opencmis.commons.data.CmisExtensionElement;
import org.apache.chemistry.opencmis.commons.data.ContentStream;
import org.apache.chemistry.opencmis.commons.data.FailedToDeleteData;
import org.apache.chemistry.opencmis.commons.data.ObjectData;
import org.apache.chemistry.opencmis.commons.data.ObjectInFolderData;
@@ -155,13 +168,6 @@ import org.junit.Test;
import org.springframework.context.ApplicationContext;
import org.springframework.extensions.webscripts.GUID;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* OpenCMIS tests.
*
@@ -794,6 +800,12 @@ public class CMISTest
@Test
public void testContentMimeTypeDetection()
{
ServiceRegistry serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
FileFolderService ffs = serviceRegistry.getFileFolderService();
AuthenticationComponent authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
final String isoEncoding = "ISO-8859-1";
final String utfEncoding = "UTF-8";
// get repository id
List<RepositoryInfo> repositories = withCmisService(new CmisServiceCallback<List<RepositoryInfo>>()
{
@@ -864,11 +876,16 @@ public class CMISTest
}
});
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_HTML, contentType);
// check that the encoding is detected correctly
checkEncoding(ffs, authenticationComponent, objectData, utfEncoding);
}
// create content stream with mimetype and encoding
// create content stream with mimetype and encoding as UTF-8
{
String mimeType = MimetypeMap.MIMETYPE_TEXT_PLAIN + "; charset=UTF-8";
String mimeType = MimetypeMap.MIMETYPE_TEXT_PLAIN + "; charset="+isoEncoding;
// NOTE that we intentionally specify the wrong charset here.
// Alfresco will detect the encoding (as UTF-8 - given by the ContentStreamImpl constructor)
final ContentStreamImpl contentStreamHTML = new ContentStreamImpl(null, mimeType, "<html><head><title> Hello </title></head><body><p> Test html</p></body></html></body></html>");
withCmisService(new CmisServiceCallback<Void>()
{
@@ -901,6 +918,66 @@ public class CMISTest
}
});
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_TEXT_PLAIN, contentType);
// check that the encoding is detected correctly
checkEncoding(ffs, authenticationComponent, objectData, utfEncoding);
}
// create content stream with mimetype and encoding as ISO-8859-1
{
String mimeType = MimetypeMap.MIMETYPE_TEXT_PLAIN + "; charset=" + utfEncoding;
// NOTE that we intentionally specify the wrong charset here.
// Alfresco will detect the encoding (as ISO-8859-1 - given by the ContentStreamImpl with streams)
String content = "<html><head><title>aegif Mind Share Leader Generating New Paradigms by aegif corporation</title></head><body><p> Test html</p></body></html></body></html>";
byte[] buf = null;
try
{
buf = content.getBytes(isoEncoding); // set the encoding here for the content stream
}
catch (UnsupportedEncodingException e)
{
e.printStackTrace();
}
ByteArrayInputStream input = new ByteArrayInputStream(buf);
final ContentStream contentStreamHTML = new ContentStreamImpl(null, BigInteger.valueOf(buf.length), mimeType, input);
withCmisService(new CmisServiceCallback<Void>()
{
@Override
public Void execute(CmisService cmisService)
{
Holder<String> latestObjectIdHolder = getHolderOfObjectOfLatestVersion(cmisService, repositoryId,
objectIdHolder);
cmisService.setContentStream(repositoryId, latestObjectIdHolder, true, null, contentStreamHTML, null);
return null;
}
});
// check mimetype
final ObjectData objectData = withCmisService(new CmisServiceCallback<ObjectData>()
{
@Override
public ObjectData execute(CmisService cmisService)
{
ObjectData objectData = cmisService.getObjectByPath(repositoryId, path, null, false,
IncludeRelationships.NONE, null, false, false, null);
return objectData;
}
});
String contentType = withCmisService(new CmisServiceCallback<String>()
{
@Override
public String execute(CmisService cmisService)
{
String contentType = cmisService.getObjectInfo(repositoryId, objectData.getId()).getContentType();
return contentType;
}
});
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_TEXT_PLAIN, contentType);
// check that the encoding is detected correctly
checkEncoding(ffs, authenticationComponent, objectData, isoEncoding);
}
// checkout/checkin object with mimetype and encoding
@@ -957,9 +1034,50 @@ public class CMISTest
}
});
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_HTML, contentType);
checkEncoding(ffs, authenticationComponent, objectData, utfEncoding);
}
}
protected void checkEncoding(FileFolderService ffs, AuthenticationComponent authenticationComponent,
final ObjectData objectData, String expectedEncoding)
{
// Authenticate as system to check the properties in alfresco
authenticationComponent.setSystemUserAsCurrentUser();
try
{
NodeRef doc1NodeRef = cmisIdToNodeRef(objectData.getId());
doc1NodeRef.getId();
FileInfo fileInfo = ffs.getFileInfo(doc1NodeRef);
Map<QName, Serializable> properties2 = fileInfo.getProperties();
ContentDataWithId contentData = (ContentDataWithId) properties2
.get(QName.createQName("{http://www.alfresco.org/model/content/1.0}content"));
String encoding = contentData.getEncoding();
assertEquals(expectedEncoding, encoding);
}
finally
{
authenticationComponent.clearCurrentSecurityContext();
}
}
/**
* Turns a CMIS id into a node ref
* @param nodeId
* @return
*/
private NodeRef cmisIdToNodeRef(String nodeId)
{
int idx = nodeId.indexOf(";");
if(idx != -1)
{
nodeId = nodeId.substring(0, idx);
}
NodeRef nodeRef = new NodeRef(nodeId);
return nodeRef;
}
private Holder<String> getHolderOfObjectOfLatestVersion(CmisService cmisService, String repositoryId, Holder<String> currentHolder)
{
ObjectData oData = cmisService.getObjectOfLatestVersion(repositoryId, currentHolder.getValue(), null, Boolean.FALSE, null, null, null, null, null, null, null);

View File

@@ -30,6 +30,8 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.util.Calendar;
import java.util.HashMap;
@@ -42,14 +44,18 @@ import junit.framework.TestCase;
import org.alfresco.events.types.ContentEventImpl;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.repo.domain.node.ContentDataWithId;
import org.alfresco.repo.events.EventPublisherForTestingOnly;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.coci.CheckOutCheckInService;
import org.alfresco.service.cmr.model.FileFolderService;
import org.alfresco.service.cmr.model.FileInfo;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.namespace.QName;
import org.alfresco.test_category.OwnJVMTestsCategory;
import org.alfresco.util.ApplicationContextHelper;
import org.alfresco.util.FileFilterMode.Client;
@@ -497,4 +503,66 @@ public class OpenCmisLocalTest extends TestCase
assertFalse(nodeService.exists(doc1NodeRef));
assertFalse(nodeService.exists(doc1WorkingCopy));
}
public void testEncodingForCreateContentStream()
{
ServiceRegistry serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
FileFolderService ffs = serviceRegistry.getFileFolderService();
// Authenticate as system
AuthenticationComponent authenticationComponent = (AuthenticationComponent) ctx
.getBean(BEAN_NAME_AUTHENTICATION_COMPONENT);
authenticationComponent.setSystemUserAsCurrentUser();
try
{
/* Create the document using openCmis services */
Repository repository = getRepository("admin", "admin");
Session session = repository.createSession();
Folder rootFolder = session.getRootFolder();
Document document = createDocument(rootFolder, "test_file_" + GUID.generate() + ".txt", session);
ContentStream content = document.getContentStream();
assertNotNull(content);
content = document.getContentStream(BigInteger.valueOf(2), BigInteger.valueOf(4));
assertNotNull(content);
NodeRef doc1NodeRef = cmisIdToNodeRef(document.getId());
FileInfo fileInfo = ffs.getFileInfo(doc1NodeRef);
Map<QName, Serializable> properties = fileInfo.getProperties();
ContentDataWithId contentData = (ContentDataWithId) properties
.get(QName.createQName("{http://www.alfresco.org/model/content/1.0}content"));
String encoding = contentData.getEncoding();
assertEquals("ISO-8859-1", encoding);
}
finally
{
authenticationComponent.clearCurrentSecurityContext();
}
}
private static Document createDocument(Folder target, String newDocName, Session session)
{
Map<String, String> props = new HashMap<String, String>();
props.put(PropertyIds.OBJECT_TYPE_ID, "cmis:document");
props.put(PropertyIds.NAME, newDocName);
String content = "aegif Mind Share Leader Generating New Paradigms by aegif corporation.";
byte[] buf = null;
try
{
buf = content.getBytes("ISO-8859-1"); // set the encoding here for the content stream
}
catch (UnsupportedEncodingException e)
{
e.printStackTrace();
}
ByteArrayInputStream input = new ByteArrayInputStream(buf);
ContentStream contentStream = session.getObjectFactory().createContentStream(newDocName, buf.length,
"text/plain; charset=UTF-8", input); // additionally set the charset here
// NOTE that we intentionally specified the wrong charset here (as UTF-8)
// because Alfresco does automatic charset detection, so we will ignore this explicit request
return target.createDocument(props, contentStream, VersioningState.MAJOR);
}
}