mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Moved data-model master into its own directory
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Data model classes
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2016 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.encoding;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.tika.parser.txt.CharsetDetector;
|
||||
import org.apache.tika.parser.txt.CharsetMatch;
|
||||
|
||||
/**
|
||||
* Uses Apache Tika as a fallback encoding detector
|
||||
*
|
||||
* @since 3.4
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class TikaCharsetFinder extends AbstractCharactersetFinder
|
||||
{
|
||||
private static Log logger = LogFactory.getLog(TikaCharsetFinder.class);
|
||||
|
||||
private int threshold = 35;
|
||||
|
||||
@Override
|
||||
protected Charset detectCharsetImpl(byte[] buffer) throws Exception
|
||||
{
|
||||
CharsetDetector detector = new CharsetDetector();
|
||||
detector.setText(buffer);
|
||||
CharsetMatch match = detector.detect();
|
||||
|
||||
if(match != null && match.getConfidence() > threshold)
|
||||
{
|
||||
try
|
||||
{
|
||||
return Charset.forName(match.getName());
|
||||
}
|
||||
catch(UnsupportedCharsetException e)
|
||||
{
|
||||
logger.info("Charset detected as " + match.getName() + " but the JVM does not support this, detection skipped");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the matching threshold before we decide that
|
||||
* what we detected is a good match. In the range
|
||||
* 0-100.
|
||||
*/
|
||||
public int getThreshold()
|
||||
{
|
||||
return threshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* At what point do we decide our match is good enough?
|
||||
* In the range 0-100. If we don't reach the threshold,
|
||||
* we'll decline, and either another finder will work on
|
||||
* it or the fallback encoding will be taken.
|
||||
*/
|
||||
public void setThreshold(int threshold)
|
||||
{
|
||||
if(threshold < 0)
|
||||
threshold = 0;
|
||||
if(threshold > 100)
|
||||
threshold = 100;
|
||||
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user