punycode: Update to v0.2.1

This commit is contained in:
Mathias Bynens 2011-11-30 09:44:00 +01:00 committed by Ben Noordhuis
parent b204006105
commit cf89beec6f

View File

@ -4,21 +4,20 @@
* Available under MIT license <http://mths.be/mit> * Available under MIT license <http://mths.be/mit>
*/ */
;(function(window) { ;(function(root) {
/** /**
* The `Punycode` object. * The `punycode` object.
* @name Punycode * @name punycode
* @type Object * @type Object
*/ */
var Punycode, var punycode,
/** Detect free variables `define`, `exports`, and `require` */ /** Detect free variables `define`, `exports`, `module` and `require` */
freeDefine = typeof define == 'function' && typeof define.amd == 'object' && freeDefine = typeof define == 'function' && typeof define.amd == 'object' &&
define.amd && define, define.amd && define,
freeExports = typeof exports == 'object' && exports && freeExports = typeof exports == 'object' && exports,
(typeof global == 'object' && global && global == global.global && freeModule = typeof module == 'object' && module,
(window = global), exports),
freeRequire = typeof require == 'function' && require, freeRequire = typeof require == 'function' && require,
/** Highest positive signed 32-bit float value */ /** Highest positive signed 32-bit float value */
@ -35,7 +34,7 @@
delimiter = '-', // '\x2D' delimiter = '-', // '\x2D'
/** Regular expressions */ /** Regular expressions */
regexASCII = /[^\x20-\x7e]/, regexNonASCII = /[^ -~]/, // matches unprintable ASCII chars + non-ASCII chars
regexPunycode = /^xn--/, regexPunycode = /^xn--/,
/** Error messages */ /** Error messages */
@ -50,7 +49,10 @@
/** Convenience shortcuts */ /** Convenience shortcuts */
baseMinusTMin = base - tMin, baseMinusTMin = base - tMin,
floor = Math.floor, floor = Math.floor,
stringFromCharCode = String.fromCharCode; stringFromCharCode = String.fromCharCode,
/** Temporary variable */
key;
/*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/
@ -97,8 +99,9 @@
/** /**
* Creates an array containing the decimal code points of each character in * Creates an array containing the decimal code points of each character in
* the string. * the string.
* @see `Punycode.utf16.encode` * @see `punycode.utf16.encode`
* @memberOf Punycode.utf16 * @see <http://tools.ietf.org/html/rfc2781>
* @memberOf punycode.utf16
* @name decode * @name decode
* @param {String} string The Unicode input string. * @param {String} string The Unicode input string.
* @returns {Array} The new array. * @returns {Array} The new array.
@ -125,8 +128,9 @@
/** /**
* Creates a string based on an array of decimal code points. * Creates a string based on an array of decimal code points.
* @see `Punycode.utf16.decode` * @see `punycode.utf16.decode`
* @memberOf Punycode.utf16 * @see <http://tools.ietf.org/html/rfc2781>
* @memberOf punycode.utf16
* @name encode * @name encode
* @param {Array} codePoints The array of decimal code points. * @param {Array} codePoints The array of decimal code points.
* @returns {String} The new string. * @returns {String} The new string.
@ -215,25 +219,13 @@
/** /**
* Converts a Punycode string of ASCII code points to a string of Unicode * Converts a Punycode string of ASCII code points to a string of Unicode
* code points. * code points.
* @memberOf Punycode * @memberOf punycode
* @param {String} input The Punycode string of ASCII code points. * @param {String} input The Punycode string of ASCII code points.
* @param {Boolean} preserveCase A boolean value indicating if character
* case should be preserved or not.
* @returns {String} The resulting string of Unicode code points. * @returns {String} The resulting string of Unicode code points.
*/ */
function decode(input, preserveCase) { function decode(input) {
// Don't use UTF-16 // Don't use UTF-16
var output = [], var output = [],
/**
* The `caseFlags` array needs room for at least `output.length` values,
* or it can be `undefined` if the case information is not needed. A
* truthy value suggests that the corresponding Unicode character be
* forced to uppercase (if possible), while falsy values suggest that it
* be forced to lowercase (if possible). ASCII code points are output
* already in the proper case, but their flags will be set appropriately
* so that applying the flags would be harmless.
*/
caseFlags = [],
inputLength = input.length, inputLength = input.length,
out, out,
i = 0, i = 0,
@ -261,9 +253,6 @@
} }
for (j = 0; j < basic; ++j) { for (j = 0; j < basic; ++j) {
if (preserveCase) {
caseFlags[output.length] = input.charCodeAt(j) - 65 < 26;
}
// if it's not a basic code point // if it's not a basic code point
if (input.charCodeAt(j) >= 0x80) { if (input.charCodeAt(j) >= 0x80) {
error('not-basic'); error('not-basic');
@ -271,8 +260,8 @@
output.push(input.charCodeAt(j)); output.push(input.charCodeAt(j));
} }
// Main decoding loop: start just after the last delimiter if any basic // Main decoding loop: start just after the last delimiter if any basic code
// code points were copied; start at the beginning otherwise. // points were copied; start at the beginning otherwise.
for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
@ -322,36 +311,21 @@
i %= out; i %= out;
// Insert `n` at position `i` of the output // Insert `n` at position `i` of the output
// The case of the last character determines `uppercase` flag output.splice(i++, 0, n);
if (preserveCase) {
caseFlags.splice(i, 0, input.charCodeAt(index - 1) - 65 < 26);
}
output.splice(i, 0, n);
i++;
} }
if (preserveCase) {
for (i = 0, length = output.length; i < length; i++) {
if (caseFlags[i]) {
output[i] = (stringFromCharCode(output[i]).toUpperCase()).charCodeAt(0);
}
}
}
return utf16encode(output); return utf16encode(output);
} }
/** /**
* Converts a string of Unicode code points to a Punycode string of ASCII * Converts a string of Unicode code points to a Punycode string of ASCII
* code points. * code points.
* @memberOf Punycode * @memberOf punycode
* @param {String} input The string of Unicode code points. * @param {String} input The string of Unicode code points.
* @param {Boolean} preserveCase A boolean value indicating if character
* case should be preserved or not.
* @returns {String} The resulting Punycode string of ASCII code points. * @returns {String} The resulting Punycode string of ASCII code points.
*/ */
function encode(input, preserveCase) { function encode(input) {
var n, var n,
delta, delta,
handledCPCount, handledCPCount,
@ -363,18 +337,6 @@
k, k,
t, t,
currentValue, currentValue,
/**
* The `caseFlags` array will hold `inputLength` boolean values, where
* `true` suggests that the corresponding Unicode character be forced
* to uppercase after being decoded (if possible), and `false`
* suggests that it be forced to lowercase (if possible). ASCII code
* points are encoded literally, except that ASCII letters are forced
* to uppercase or lowercase according to the corresponding uppercase
* flags. If `caseFlags` remains `undefined` then ASCII letters are
* left as they are, and other code points are treated as if their
* uppercase flags were `true`.
*/
caseFlags,
output = [], output = [],
/** `inputLength` will hold the number of code points in `input`. */ /** `inputLength` will hold the number of code points in `input`. */
inputLength, inputLength,
@ -383,24 +345,12 @@
baseMinusT, baseMinusT,
qMinusT; qMinusT;
if (preserveCase) {
// Preserve case, step 1 of 2: get a list of the unaltered string
caseFlags = utf16decode(input);
}
// Convert the input in UTF-16 to Unicode // Convert the input in UTF-16 to Unicode
input = utf16decode(input); input = utf16decode(input);
// Cache the length // Cache the length
inputLength = input.length; inputLength = input.length;
if (preserveCase) {
// Preserve case, step 2 of 2: modify the list to true/false
for (j = 0; j < inputLength; j++) {
caseFlags[j] = input[j] != caseFlags[j];
}
}
// Initialize the state // Initialize the state
n = initialN; n = initialN;
delta = 0; delta = 0;
@ -410,11 +360,7 @@
for (j = 0; j < inputLength; ++j) { for (j = 0; j < inputLength; ++j) {
currentValue = input[j]; currentValue = input[j];
if (currentValue < 0x80) { if (currentValue < 0x80) {
output.push( output.push(stringFromCharCode(currentValue));
stringFromCharCode(
caseFlags ? encodeBasic(currentValue, caseFlags[j]) : currentValue
)
);
} }
} }
@ -433,7 +379,6 @@
// All non-basic code points < n have been handled already. Find the next // All non-basic code points < n have been handled already. Find the next
// larger one: // larger one:
for (m = maxInt, j = 0; j < inputLength; ++j) { for (m = maxInt, j = 0; j < inputLength; ++j) {
currentValue = input[j]; currentValue = input[j];
if (currentValue >= n && currentValue < m) { if (currentValue >= n && currentValue < m) {
@ -473,7 +418,7 @@
q = floor(qMinusT / baseMinusT); q = floor(qMinusT / baseMinusT);
} }
output.push(stringFromCharCode(digitToBasic(q, preserveCase && caseFlags[j] ? 1 : 0))); output.push(stringFromCharCode(digitToBasic(q, 0)));
bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength); bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
delta = 0; delta = 0;
++handledCPCount; ++handledCPCount;
@ -492,7 +437,7 @@
* Punycoded parts of the domain name will be converted, i.e. it doesn't * Punycoded parts of the domain name will be converted, i.e. it doesn't
* matter if you call it on a string that has already been converted to * matter if you call it on a string that has already been converted to
* Unicode. * Unicode.
* @memberOf Punycode * @memberOf punycode
* @param {String} domain The Punycode domain name to convert to Unicode. * @param {String} domain The Punycode domain name to convert to Unicode.
* @returns {String} The Unicode representation of the given Punycode * @returns {String} The Unicode representation of the given Punycode
* string. * string.
@ -509,13 +454,13 @@
* Converts a Unicode string representing a domain name to Punycode. Only the * Converts a Unicode string representing a domain name to Punycode. Only the
* non-ASCII parts of the domain name will be converted, i.e. it doesn't * non-ASCII parts of the domain name will be converted, i.e. it doesn't
* matter if you call it with a domain that's already in ASCII. * matter if you call it with a domain that's already in ASCII.
* @memberOf Punycode * @memberOf punycode
* @param {String} domain The domain name to convert, as a Unicode string. * @param {String} domain The domain name to convert, as a Unicode string.
* @returns {String} The Punycode representation of the given domain name. * @returns {String} The Punycode representation of the given domain name.
*/ */
function toASCII(domain) { function toASCII(domain) {
return mapDomain(domain, function(string) { return mapDomain(domain, function(string) {
return regexASCII.test(string) return regexNonASCII.test(string)
? 'xn--' + encode(string) ? 'xn--' + encode(string)
: string; : string;
}); });
@ -524,12 +469,17 @@
/*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/
/** Define the public API */ /** Define the public API */
Punycode = { punycode = {
'version': '0.1.1', /**
* A string representing the current Punycode.js version number.
* @memberOf punycode
* @type String
*/
'version': '0.2.1',
/** /**
* An object of methods to convert from JavaScript's internal character * An object of methods to convert from JavaScript's internal character
* representation to Unicode and back. * representation to Unicode and back.
* @memberOf Punycode * @memberOf punycode
* @type Object * @type Object
*/ */
'utf16': { 'utf16': {
@ -542,23 +492,23 @@
'toUnicode': toUnicode 'toUnicode': toUnicode
}; };
/** Expose Punycode */ /** Expose `punycode` */
if (freeExports) { if (freeExports) {
if (typeof module == 'object' && module && module.exports == freeExports) { if (freeModule && freeModule.exports == freeExports) {
// in Node.js // in Node.js or Ringo 0.8+
module.exports = Punycode; freeModule.exports = punycode;
} else { } else {
// in Narwhal or Ringo // in Narwhal or Ringo 0.7-
freeExports.Punycode = Punycode; for (key in punycode) {
punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
}
} }
} else if (freeDefine) { } else if (freeDefine) {
// via curl.js or RequireJS // via curl.js or RequireJS
freeDefine(function() { define('punycode', punycode);
return Punycode;
});
} else { } else {
// in a browser or Rhino // in a browser or Rhino
window.Punycode = Punycode; root.punycode = punycode;
} }
}(this)); }(this));