# add dist

This commit is contained in:
Mario Romano
2016-04-21 11:56:31 +01:00
parent 5914688467
commit 07807e7bc3
13499 changed files with 1808930 additions and 5 deletions

View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python
import re
import json
# https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
# http://stackoverflow.com/a/13436167/96656
def unisymbol(codePoint):
if codePoint >= 0x0000 and codePoint <= 0xFFFF:
return unichr(codePoint)
elif codePoint >= 0x010000 and codePoint <= 0x10FFFF:
highSurrogate = int((codePoint - 0x10000) / 0x400) + 0xD800
lowSurrogate = int((codePoint - 0x10000) % 0x400) + 0xDC00
return unichr(highSurrogate) + unichr(lowSurrogate)
else:
return 'Error'
def hexify(codePoint):
return 'U+' + hex(codePoint)[2:].upper().zfill(6)
def writeFile(filename, contents):
print filename
with open(filename, 'w') as f:
f.write(contents.strip() + '\n')
data = []
for codePoint in range(0x000000, 0x10FFFF + 1):
# Skip non-scalar values.
if codePoint >= 0xD800 and codePoint <= 0xDFFF:
continue
symbol = unisymbol(codePoint)
# http://stackoverflow.com/a/17199950/96656
bytes = symbol.encode('utf8').decode('latin1')
data.append({
'codePoint': codePoint,
'decoded': symbol,
'encoded': bytes
});
jsonData = json.dumps(data, sort_keys=False, indent=2, separators=(',', ': '))
# Use tabs instead of double spaces for indentation
jsonData = jsonData.replace(' ', '\t')
# Escape hexadecimal digits in escape sequences
jsonData = re.sub(
r'\\u([a-fA-F0-9]{4})',
lambda match: r'\u{}'.format(match.group(1).upper()),
jsonData
)
writeFile('data.json', jsonData)

View File

@@ -0,0 +1,35 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>utf8.js test suite</title>
<link rel="stylesheet" href="../node_modules/qunitjs/qunit/qunit.css">
</head>
<body>
<div id="qunit"></div>
<script src="../node_modules/qunitjs/qunit/qunit.js"></script>
<script src="../utf8.js"></script>
<script>
// populate `QUnit.urlParams`
QUnit.urlParams.norequire = /[?&]norequire=true(?:&|$)/.test(location.search);
// load tests.js if not using require.js
document.write(QUnit.urlParams.norequire
? '<script src="tests.js"><\/script>'
: '<script src="../node_modules/requirejs/require.js"><\/script>'
);
</script>
<script>
window.require && require({
'baseUrl': '../node_modules/requirejs/',
'urlArgs': 't=' + (+new Date),
'paths': {
'utf8': '../../utf8'
}
},
['utf8'], function(utf8) {
require(['tests.js']);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,276 @@
(function(root) {
'use strict';
var noop = Function.prototype;
var load = (typeof require == 'function' && !(root.define && define.amd)) ?
require :
(!root.document && root.java && root.load) || noop;
var QUnit = (function() {
return root.QUnit || (
root.addEventListener || (root.addEventListener = noop),
root.setTimeout || (root.setTimeout = noop),
root.QUnit = load('../node_modules/qunitjs/qunit/qunit.js') || root.QUnit,
addEventListener === noop && delete root.addEventListener,
root.QUnit
);
}());
var qe = load('../node_modules/qunit-extras/qunit-extras.js');
if (qe) {
qe.runInContext(root);
}
/** The `utf8` object to test */
var utf8 = root.utf8 || (root.utf8 = (
utf8 = load('../utf8.js') || root.utf8,
utf8 = utf8.utf8 || utf8
));
/*--------------------------------------------------------------------------*/
function forEach(array, fn) {
var index = -1;
var length = array.length;
while (++index < length) {
fn(array[index]);
}
}
// Quick and dirty test to see if were in Node & need extended tests
var runExtendedTests = (function() {
try {
return process.argv[0] == 'node' && process.argv[2] == '--extended';
} catch(error) { }
}());
var data = [
// 1-byte
{
'codePoint': 0x0000,
'decoded': '\0',
'encoded': '\0'
},
{
'codePoint': 0x005C,
'decoded': '\x5C',
'encoded': '\x5C'
},
{
'codePoint': 0x007F,
'decoded': '\x7F',
'encoded': '\x7F'
},
// 2-byte
{
'codePoint': 0x0080,
'decoded': '\x80',
'encoded': '\xC2\x80'
},
{
'codePoint': 0x05CA,
'decoded': '\u05CA',
'encoded': '\xD7\x8A'
},
{
'codePoint': 0x07FF,
'decoded': '\u07FF',
'encoded': '\xDF\xBF',
},
// 3-byte
{
'codePoint': 0x0800,
'decoded': '\u0800',
'encoded': '\xE0\xA0\x80',
},
{
'codePoint': 0x2C3C,
'decoded': '\u2C3C',
'encoded': '\xE2\xB0\xBC'
},
{
'codePoint': 0xFFFF,
'decoded': '\uFFFF',
'encoded': '\xEF\xBF\xBF'
},
// unmatched surrogate halves
// high surrogates: 0xD800 to 0xDBFF
{
'codePoint': 0xD800,
'decoded': '\uD800',
'encoded': '\xED\xA0\x80',
'error': true
},
{
'description': 'High surrogate followed by another high surrogate',
'decoded': '\uD800\uD800',
'encoded': '\xED\xA0\x80\xED\xA0\x80',
'error': true
},
{
'description': 'High surrogate followed by a symbol that is not a surrogate',
'decoded': '\uD800A',
'encoded': '\xED\xA0\x80A',
'error': true
},
{
'description': 'Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate',
'decoded': '\uD800\uD834\uDF06\uD800',
'encoded': '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80',
'error': true
},
{
'codePoint': 0xD9AF,
'decoded': '\uD9AF',
'encoded': '\xED\xA6\xAF',
'error': true
},
{
'codePoint': 0xDBFF,
'decoded': '\uDBFF',
'encoded': '\xED\xAF\xBF',
'error': true
},
// low surrogates: 0xDC00 to 0xDFFF
{
'codePoint': 0xDC00,
'decoded': '\uDC00',
'encoded': '\xED\xB0\x80',
'error': true
},
{
'description': 'Low surrogate followed by another low surrogate',
'decoded': '\uDC00\uDC00',
'encoded': '\xED\xB0\x80\xED\xB0\x80',
'error': true
},
{
'description': 'Low surrogate followed by a symbol that is not a surrogate',
'decoded': '\uDC00A',
'encoded': '\xED\xB0\x80A',
'error': true
},
{
'description': 'Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate',
'decoded': '\uDC00\uD834\uDF06\uDC00',
'encoded': '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80',
'error': true
},
{
'codePoint': 0xDEEE,
'decoded': '\uDEEE',
'encoded': '\xED\xBB\xAE',
'error': true
},
{
'codePoint': 0xDFFF,
'decoded': '\uDFFF',
'encoded': '\xED\xBF\xBF',
'error': true
},
// 4-byte
{
'codePoint': 0x010000,
'decoded': '\uD800\uDC00',
'encoded': '\xF0\x90\x80\x80'
},
{
'codePoint': 0x01D306,
'decoded': '\uD834\uDF06',
'encoded': '\xF0\x9D\x8C\x86'
},
{
'codePoint': 0x10FFF,
'decoded': '\uDBFF\uDFFF',
'encoded': '\xF4\x8F\xBF\xBF'
}
];
if (runExtendedTests) {
data = data.concat(require('./data.json'));
}
// `throws` is a reserved word in ES3; alias it to avoid errors
var raises = QUnit.assert['throws'];
// explicitly call `QUnit.module()` instead of `module()`
// in case we are in a CLI environment
QUnit.module('utf8.js');
test('encode/decode', function() {
forEach(data, function(object) {
var description = object.description || 'U+' + object.codePoint.toString(16).toUpperCase();
;
if (object.error) {
raises(
function() {
utf8.decode(object.encoded);
},
Error,
'Error: non-scalar value detected'
);
raises(
function() {
utf8.encode(object.decoded);
},
Error,
'Error: non-scalar value detected'
);
} else {
equal(
object.encoded,
utf8.encode(object.decoded),
'Encoding: ' + description
);
equal(
object.decoded,
utf8.decode(object.encoded),
'Decoding: ' + description
);
}
});
// Error handling
raises(
function() {
utf8.decode('\uFFFF');
},
Error,
'Error: invalid UTF-8 detected'
);
raises(
function() {
utf8.decode('\xE9\x00\x00');
},
Error,
'Error: invalid continuation byte (4-byte sequence expected)'
);
raises(
function() {
utf8.decode('\xC2\uFFFF');
},
Error,
'Error: invalid continuation byte'
);
raises(
function() {
utf8.decode('\xF0\x9D');
},
Error,
'Error: invalid byte index'
);
});
/*--------------------------------------------------------------------------*/
// configure QUnit and call `QUnit.start()` for
// Narwhal, Node.js, PhantomJS, Rhino, and RingoJS
if (!root.document || root.phantom) {
QUnit.config.noglobals = true;
QUnit.start();
}
}(typeof global == 'object' && global || this));