MNT-16882 : CMIS does not detect text encoding correctly

- explicit charset requests are not supported by Alfresco repository. we use guess encoding methods
   - the fix is to correctly interpret the mimetype text for the CMIS requests and make CMIS behave similarly to REST V1 APIs when working with content.
   - added tests for the 3 affected methods

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@135665 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrei Rebegea
2017-03-07 10:44:08 +00:00
parent a75200b776
commit 792bba21c8
3 changed files with 246 additions and 31 deletions

View File

@@ -1312,11 +1312,11 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
// copy stream to temp file // copy stream to temp file
// OpenCMIS does this for us .... // OpenCMIS does this for us ....
tempFile = copyToTempFile(contentStream); tempFile = copyToTempFile(contentStream);
final Charset encoding = (tempFile == null ? null : getEncoding(tempFile, contentStream.getMimeType())); String encoding = getEncoding(tempFile, mimeType);
ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef); ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef);
writer.setMimetype(mimeType); writer.setMimetype(mimeType);
writer.setEncoding(encoding.name()); writer.setEncoding(encoding);
writer.putContent(tempFile); writer.putContent(tempFile);
} }
} }
@@ -1572,16 +1572,15 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
throw new CmisInvalidArgumentException("No content!"); throw new CmisInvalidArgumentException("No content!");
} }
// copy stream to temp file String mimeType = parseMimeType(contentStream);
final File tempFile = copyToTempFile(contentStream); final File tempFile = copyToTempFile(contentStream);
final Charset encoding = getEncoding(tempFile, contentStream.getMimeType()); String encoding = getEncoding(tempFile, mimeType);
try try
{ {
ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef); ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef);
String mimeType = parseMimeType(contentStream);
writer.setMimetype(mimeType); writer.setMimetype(mimeType);
writer.setEncoding(encoding.name()); writer.setEncoding(encoding);
writer.putContent(tempFile); writer.putContent(tempFile);
} }
finally finally
@@ -2307,7 +2306,6 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
// copy stream to temp file // copy stream to temp file
final File tempFile = copyToTempFile(contentStream); final File tempFile = copyToTempFile(contentStream);
final Charset encoding = (tempFile == null ? null : getEncoding(tempFile, contentStream.getMimeType()));
// check in // check in
// update PWC // update PWC
@@ -2319,10 +2317,12 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
// handle content // handle content
if (contentStream != null) if (contentStream != null)
{ {
String mimeType = parseMimeType(contentStream);
String encoding = getEncoding(tempFile, mimeType);
// write content // write content
ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef); ContentWriter writer = connector.getFileFolderService().getWriter(nodeRef);
writer.setMimetype(parseMimeType(contentStream)); writer.setMimetype(mimeType);
writer.setEncoding(encoding.name()); writer.setEncoding(encoding);
writer.putContent(tempFile); writer.putContent(tempFile);
} }
@@ -3081,22 +3081,51 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr
} }
} }
private Charset getEncoding(File tempFile, String mimeType) /**
* Inspired from NodesImpl.guessEncoding method.
*
* @param tempFile can be null;
* @param mimeType can be null;
* @return the encoding detected. never null;
*/
private String getEncoding(File tempFile, String mimeType)
{ {
Charset encoding = null; String defaultEncoding = "UTF-8";
if (tempFile == null)
{
return defaultEncoding;
}
InputStream tfis = null;
try try
{ {
InputStream tfis = new BufferedInputStream(new FileInputStream(tempFile)); tfis = new BufferedInputStream(new FileInputStream(tempFile));
ContentCharsetFinder charsetFinder = connector.getMimetypeService().getContentCharsetFinder(); ContentCharsetFinder charsetFinder = connector.getMimetypeService().getContentCharsetFinder();
encoding = charsetFinder.getCharset(tfis, mimeType); return charsetFinder.getCharset(tfis, mimeType).name();
tfis.close(); }
} catch (Exception e) catch (Exception e)
{ {
throw new CmisStorageException("Unable to read content: " + e.getMessage(), e); throw new CmisStorageException("Unable to read content: " + e.getMessage(), e);
} }
finally
{
closeInputStream(tfis);
}
}
return encoding; protected void closeInputStream(InputStream tfis)
{
if (tfis != null)
{
try
{
tfis.close();
}
catch (Exception e)
{
// nothing
}
}
} }
private File copyToTempFile(ContentStream contentStream) private File copyToTempFile(ContentStream contentStream)

View File

@@ -27,8 +27,17 @@
package org.alfresco.opencmis; package org.alfresco.opencmis;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream;
import java.io.File; import java.io.File;
import java.io.Serializable; import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.ArrayList; import java.util.ArrayList;
@@ -59,9 +68,11 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.dictionary.DictionaryDAO; import org.alfresco.repo.dictionary.DictionaryDAO;
import org.alfresco.repo.dictionary.M2Model; import org.alfresco.repo.dictionary.M2Model;
import org.alfresco.repo.domain.audit.AuditDAO; import org.alfresco.repo.domain.audit.AuditDAO;
import org.alfresco.repo.domain.node.ContentDataWithId;
import org.alfresco.repo.domain.node.NodeDAO; import org.alfresco.repo.domain.node.NodeDAO;
import org.alfresco.repo.model.Repository; import org.alfresco.repo.model.Repository;
import org.alfresco.repo.node.archive.NodeArchiveService; import org.alfresco.repo.node.archive.NodeArchiveService;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.repo.security.authentication.AuthenticationContext; import org.alfresco.repo.security.authentication.AuthenticationContext;
import org.alfresco.repo.security.authentication.AuthenticationUtil; import org.alfresco.repo.security.authentication.AuthenticationUtil;
import org.alfresco.repo.security.authentication.AuthenticationUtil.RunAsWork; import org.alfresco.repo.security.authentication.AuthenticationUtil.RunAsWork;
@@ -73,6 +84,7 @@ import org.alfresco.repo.transaction.RetryingTransactionHelper;
import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback; import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback;
import org.alfresco.repo.version.VersionableAspectTest; import org.alfresco.repo.version.VersionableAspectTest;
import org.alfresco.repo.workflow.WorkflowDeployer; import org.alfresco.repo.workflow.WorkflowDeployer;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.action.ActionCondition; import org.alfresco.service.cmr.action.ActionCondition;
import org.alfresco.service.cmr.action.ActionService; import org.alfresco.service.cmr.action.ActionService;
import org.alfresco.service.cmr.dictionary.AspectDefinition; import org.alfresco.service.cmr.dictionary.AspectDefinition;
@@ -110,6 +122,7 @@ import org.apache.chemistry.opencmis.commons.PropertyIds;
import org.apache.chemistry.opencmis.commons.data.Ace; import org.apache.chemistry.opencmis.commons.data.Ace;
import org.apache.chemistry.opencmis.commons.data.AllowableActions; import org.apache.chemistry.opencmis.commons.data.AllowableActions;
import org.apache.chemistry.opencmis.commons.data.CmisExtensionElement; import org.apache.chemistry.opencmis.commons.data.CmisExtensionElement;
import org.apache.chemistry.opencmis.commons.data.ContentStream;
import org.apache.chemistry.opencmis.commons.data.FailedToDeleteData; import org.apache.chemistry.opencmis.commons.data.FailedToDeleteData;
import org.apache.chemistry.opencmis.commons.data.ObjectData; import org.apache.chemistry.opencmis.commons.data.ObjectData;
import org.apache.chemistry.opencmis.commons.data.ObjectInFolderData; import org.apache.chemistry.opencmis.commons.data.ObjectInFolderData;
@@ -154,13 +167,6 @@ import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContext;
import org.springframework.extensions.webscripts.GUID; import org.springframework.extensions.webscripts.GUID;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/** /**
* OpenCMIS tests. * OpenCMIS tests.
@@ -793,7 +799,13 @@ public class CMISTest
*/ */
@Test @Test
public void testContentMimeTypeDetection() public void testContentMimeTypeDetection()
{ {
ServiceRegistry serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
FileFolderService ffs = serviceRegistry.getFileFolderService();
AuthenticationComponent authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
final String isoEncoding = "ISO-8859-1";
final String utfEncoding = "UTF-8";
// get repository id // get repository id
List<RepositoryInfo> repositories = withCmisService(new CmisServiceCallback<List<RepositoryInfo>>() List<RepositoryInfo> repositories = withCmisService(new CmisServiceCallback<List<RepositoryInfo>>()
{ {
@@ -863,12 +875,17 @@ public class CMISTest
return contentType; return contentType;
} }
}); });
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_HTML, contentType); assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_HTML, contentType);
// check that the encoding is detected correctly
checkEncoding(ffs, authenticationComponent, objectData, utfEncoding);
} }
// create content stream with mimetype and encoding // create content stream with mimetype and encoding as UTF-8
{ {
String mimeType = MimetypeMap.MIMETYPE_TEXT_PLAIN + "; charset=UTF-8"; String mimeType = MimetypeMap.MIMETYPE_TEXT_PLAIN + "; charset="+isoEncoding;
// NOTE that we intentionally specify the wrong charset here.
// Alfresco will detect the encoding (as UTF-8 - given by the ContentStreamImpl constructor)
final ContentStreamImpl contentStreamHTML = new ContentStreamImpl(null, mimeType, "<html><head><title> Hello </title></head><body><p> Test html</p></body></html></body></html>"); final ContentStreamImpl contentStreamHTML = new ContentStreamImpl(null, mimeType, "<html><head><title> Hello </title></head><body><p> Test html</p></body></html></body></html>");
withCmisService(new CmisServiceCallback<Void>() withCmisService(new CmisServiceCallback<Void>()
{ {
@@ -900,8 +917,68 @@ public class CMISTest
return contentType; return contentType;
} }
}); });
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_TEXT_PLAIN, contentType); assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_TEXT_PLAIN, contentType);
// check that the encoding is detected correctly
checkEncoding(ffs, authenticationComponent, objectData, utfEncoding);
} }
// create content stream with mimetype and encoding as ISO-8859-1
{
String mimeType = MimetypeMap.MIMETYPE_TEXT_PLAIN + "; charset=" + utfEncoding;
// NOTE that we intentionally specify the wrong charset here.
// Alfresco will detect the encoding (as ISO-8859-1 - given by the ContentStreamImpl with streams)
String content = "<html><head><title>aegif Mind Share Leader Generating New Paradigms by aegif corporation</title></head><body><p> Test html</p></body></html></body></html>";
byte[] buf = null;
try
{
buf = content.getBytes(isoEncoding); // set the encoding here for the content stream
}
catch (UnsupportedEncodingException e)
{
e.printStackTrace();
}
ByteArrayInputStream input = new ByteArrayInputStream(buf);
final ContentStream contentStreamHTML = new ContentStreamImpl(null, BigInteger.valueOf(buf.length), mimeType, input);
withCmisService(new CmisServiceCallback<Void>()
{
@Override
public Void execute(CmisService cmisService)
{
Holder<String> latestObjectIdHolder = getHolderOfObjectOfLatestVersion(cmisService, repositoryId,
objectIdHolder);
cmisService.setContentStream(repositoryId, latestObjectIdHolder, true, null, contentStreamHTML, null);
return null;
}
});
// check mimetype
final ObjectData objectData = withCmisService(new CmisServiceCallback<ObjectData>()
{
@Override
public ObjectData execute(CmisService cmisService)
{
ObjectData objectData = cmisService.getObjectByPath(repositoryId, path, null, false,
IncludeRelationships.NONE, null, false, false, null);
return objectData;
}
});
String contentType = withCmisService(new CmisServiceCallback<String>()
{
@Override
public String execute(CmisService cmisService)
{
String contentType = cmisService.getObjectInfo(repositoryId, objectData.getId()).getContentType();
return contentType;
}
});
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_TEXT_PLAIN, contentType);
// check that the encoding is detected correctly
checkEncoding(ffs, authenticationComponent, objectData, isoEncoding);
}
// checkout/checkin object with mimetype and encoding // checkout/checkin object with mimetype and encoding
{ {
@@ -956,10 +1033,51 @@ public class CMISTest
return contentType; return contentType;
} }
}); });
assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_HTML, contentType); assertEquals("Mimetype is not defined correctly.", MimetypeMap.MIMETYPE_HTML, contentType);
checkEncoding(ffs, authenticationComponent, objectData, utfEncoding);
} }
}
protected void checkEncoding(FileFolderService ffs, AuthenticationComponent authenticationComponent,
final ObjectData objectData, String expectedEncoding)
{
// Authenticate as system to check the properties in alfresco
authenticationComponent.setSystemUserAsCurrentUser();
try
{
NodeRef doc1NodeRef = cmisIdToNodeRef(objectData.getId());
doc1NodeRef.getId();
FileInfo fileInfo = ffs.getFileInfo(doc1NodeRef);
Map<QName, Serializable> properties2 = fileInfo.getProperties();
ContentDataWithId contentData = (ContentDataWithId) properties2
.get(QName.createQName("{http://www.alfresco.org/model/content/1.0}content"));
String encoding = contentData.getEncoding();
assertEquals(expectedEncoding, encoding);
}
finally
{
authenticationComponent.clearCurrentSecurityContext();
}
}
/**
* Turns a CMIS id into a node ref
* @param nodeId
* @return
*/
private NodeRef cmisIdToNodeRef(String nodeId)
{
int idx = nodeId.indexOf(";");
if(idx != -1)
{
nodeId = nodeId.substring(0, idx);
}
NodeRef nodeRef = new NodeRef(nodeId);
return nodeRef;
} }
private Holder<String> getHolderOfObjectOfLatestVersion(CmisService cmisService, String repositoryId, Holder<String> currentHolder) private Holder<String> getHolderOfObjectOfLatestVersion(CmisService cmisService, String repositoryId, Holder<String> currentHolder)
{ {
ObjectData oData = cmisService.getObjectOfLatestVersion(repositoryId, currentHolder.getValue(), null, Boolean.FALSE, null, null, null, null, null, null, null); ObjectData oData = cmisService.getObjectOfLatestVersion(repositoryId, currentHolder.getValue(), null, Boolean.FALSE, null, null, null, null, null, null, null);

View File

@@ -30,6 +30,8 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.Calendar; import java.util.Calendar;
import java.util.HashMap; import java.util.HashMap;
@@ -42,14 +44,18 @@ import junit.framework.TestCase;
import org.alfresco.events.types.ContentEventImpl; import org.alfresco.events.types.ContentEventImpl;
import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentWriter; import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.repo.domain.node.ContentDataWithId;
import org.alfresco.repo.events.EventPublisherForTestingOnly; import org.alfresco.repo.events.EventPublisherForTestingOnly;
import org.alfresco.repo.security.authentication.AuthenticationComponent; import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.service.ServiceRegistry; import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.coci.CheckOutCheckInService; import org.alfresco.service.cmr.coci.CheckOutCheckInService;
import org.alfresco.service.cmr.model.FileFolderService;
import org.alfresco.service.cmr.model.FileInfo;
import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter; import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService; import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.namespace.QName;
import org.alfresco.test_category.OwnJVMTestsCategory; import org.alfresco.test_category.OwnJVMTestsCategory;
import org.alfresco.util.ApplicationContextHelper; import org.alfresco.util.ApplicationContextHelper;
import org.alfresco.util.FileFilterMode.Client; import org.alfresco.util.FileFilterMode.Client;
@@ -496,5 +502,67 @@ public class OpenCmisLocalTest extends TestCase
NodeService nodeService = serviceRegistry.getNodeService(); NodeService nodeService = serviceRegistry.getNodeService();
assertFalse(nodeService.exists(doc1NodeRef)); assertFalse(nodeService.exists(doc1NodeRef));
assertFalse(nodeService.exists(doc1WorkingCopy)); assertFalse(nodeService.exists(doc1WorkingCopy));
} }
public void testEncodingForCreateContentStream()
{
ServiceRegistry serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
FileFolderService ffs = serviceRegistry.getFileFolderService();
// Authenticate as system
AuthenticationComponent authenticationComponent = (AuthenticationComponent) ctx
.getBean(BEAN_NAME_AUTHENTICATION_COMPONENT);
authenticationComponent.setSystemUserAsCurrentUser();
try
{
/* Create the document using openCmis services */
Repository repository = getRepository("admin", "admin");
Session session = repository.createSession();
Folder rootFolder = session.getRootFolder();
Document document = createDocument(rootFolder, "test_file_" + GUID.generate() + ".txt", session);
ContentStream content = document.getContentStream();
assertNotNull(content);
content = document.getContentStream(BigInteger.valueOf(2), BigInteger.valueOf(4));
assertNotNull(content);
NodeRef doc1NodeRef = cmisIdToNodeRef(document.getId());
FileInfo fileInfo = ffs.getFileInfo(doc1NodeRef);
Map<QName, Serializable> properties = fileInfo.getProperties();
ContentDataWithId contentData = (ContentDataWithId) properties
.get(QName.createQName("{http://www.alfresco.org/model/content/1.0}content"));
String encoding = contentData.getEncoding();
assertEquals("ISO-8859-1", encoding);
}
finally
{
authenticationComponent.clearCurrentSecurityContext();
}
}
private static Document createDocument(Folder target, String newDocName, Session session)
{
Map<String, String> props = new HashMap<String, String>();
props.put(PropertyIds.OBJECT_TYPE_ID, "cmis:document");
props.put(PropertyIds.NAME, newDocName);
String content = "aegif Mind Share Leader Generating New Paradigms by aegif corporation.";
byte[] buf = null;
try
{
buf = content.getBytes("ISO-8859-1"); // set the encoding here for the content stream
}
catch (UnsupportedEncodingException e)
{
e.printStackTrace();
}
ByteArrayInputStream input = new ByteArrayInputStream(buf);
ContentStream contentStream = session.getObjectFactory().createContentStream(newDocName, buf.length,
"text/plain; charset=UTF-8", input); // additionally set the charset here
// NOTE that we intentionally specified the wrong charset here (as UTF-8)
// because Alfresco does automatic charset detection, so we will ignore this explicit request
return target.createDocument(props, contentStream, VersioningState.MAJOR);
}
} }