diff --git a/lib/url.js b/lib/url.js index b8ba3fb1dd7..db7723895b6 100644 --- a/lib/url.js +++ b/lib/url.js @@ -64,7 +64,7 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i, // them. nonHostChars = ['%', '/', '?', ';', '#'] .concat(unwise).concat(autoEscape), - nonAuthChars = ['/', '@', '?', '#'].concat(delims), + hostEndingChars = ['/', '?', '#'], hostnameMaxLen = 255, hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/, hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/, @@ -146,50 +146,64 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { if (!hostlessProtocol[proto] && (slashes || (proto && !slashedProtocol[proto]))) { + // there's a hostname. // the first instance of /, ?, ;, or # ends the host. - // don't enforce full RFC correctness, just be unstupid about it. - + // // If there is an @ in the hostname, then non-host chars *are* allowed - // to the left of the first @ sign, unless some non-auth character + // to the left of the last @ sign, unless some host-ending character // comes *before* the @-sign. // URLs are obnoxious. - var atSign = rest.indexOf('@'); - if (atSign !== -1) { - var auth = rest.slice(0, atSign); + // + // ex: + // http://a@b@c/ => user:a@b host:c + // http://a@b?@c => user:a host:c path:/?@c - // there *may be* an auth - var hasAuth = true; - for (var i = 0, l = nonAuthChars.length; i < l; i++) { - if (auth.indexOf(nonAuthChars[i]) !== -1) { - // not a valid auth. Something like http://foo.com/bar@baz/ - hasAuth = false; - break; - } - } + // v0.12 TODO(isaacs): This is not quite how Chrome does things. + // Review our test case against browsers more comprehensively. - if (hasAuth) { - // pluck off the auth portion. - this.auth = decodeURIComponent(auth); - rest = rest.substr(atSign + 1); - } + // find the first instance of any hostEndingChars + var hostEnd = -1; + for (var i = 0; i < hostEndingChars.length; i++) { + var hec = rest.indexOf(hostEndingChars[i]); + if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) + hostEnd = hec; } - var firstNonHost = -1; - for (var i = 0, l = nonHostChars.length; i < l; i++) { - var index = rest.indexOf(nonHostChars[i]); - if (index !== -1 && - (firstNonHost < 0 || index < firstNonHost)) firstNonHost = index; - } - - if (firstNonHost !== -1) { - this.host = rest.substr(0, firstNonHost); - rest = rest.substr(firstNonHost); + // at this point, either we have an explicit point where the + // auth portion cannot go past, or the last @ char is the decider. + var auth, atSign; + if (hostEnd === -1) { + // atSign can be anywhere. + atSign = rest.lastIndexOf('@'); } else { - this.host = rest; - rest = ''; + // atSign must be in auth portion. + // http://a@b/c@d => host:b auth:a path:/c@d + atSign = rest.lastIndexOf('@', hostEnd); } + // Now we have a portion which is definitely the auth. + // Pull that off. + if (atSign !== -1) { + auth = rest.slice(0, atSign); + rest = rest.slice(atSign + 1); + this.auth = decodeURIComponent(auth); + } + + // the host is the remaining to the left of the first non-host char + hostEnd = -1; + for (var i = 0; i < nonHostChars.length; i++) { + var hec = rest.indexOf(nonHostChars[i]); + if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) + hostEnd = hec; + } + // if we still have not hit it, then the entire thing is a host. + if (hostEnd === -1) + hostEnd = rest.length; + + this.host = rest.slice(0, hostEnd); + rest = rest.slice(hostEnd); + // pull out port. this.parseHost(); diff --git a/test/simple/test-url.js b/test/simple/test-url.js index 6630da1025e..d27abbab8a3 100644 --- a/test/simple/test-url.js +++ b/test/simple/test-url.js @@ -741,6 +741,45 @@ var parseTests = { 'path': '/test', }, + 'http://a@b@c/': { + protocol: 'http:', + slashes: true, + auth: 'a@b', + host: 'c', + hostname: 'c', + href: 'http://a%40b@c/', + path: '/', + pathname: '/' + }, + + 'http://a@b?@c': { + protocol: 'http:', + slashes: true, + auth: 'a', + host: 'b', + hostname: 'b', + href: 'http://a@b/?@c', + path: '/?@c', + pathname: '/', + search: '?@c', + query: '@c' + }, + + 'http://a\r" \t\n<\'b:b@c\r\nd/e?f':{ + protocol: 'http:', + slashes: true, + auth: 'a\r" \t\n<\'b:b', + host: 'c', + port: null, + hostname: 'c', + hash: null, + search: '?f', + query: 'f', + pathname: '%0D%0Ad/e', + path: '%0D%0Ad/e?f', + href: 'http://a%0D%22%20%09%0A%3C\'b:b@c/%0D%0Ad/e?f' + } + }; for (var u in parseTests) {