diff --git a/lib/url.js b/lib/url.js index f272551ac9c..55c4bf25bd1 100644 --- a/lib/url.js +++ b/lib/url.js @@ -7,11 +7,32 @@ exports.format = urlFormat; // compiled once on the first module load. var protocolPattern = /^([a-z0-9]+:)/, portPattern = /:[0-9]+$/, - nonHostChars = ['/', '?', ';', '#'], + delims = ['<', '>', '"', '\'', '`', /\s/], + unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims), + nonHostChars = ['/', '?', ';', '#'].concat(unwise), + hostnameMaxLen = 255, + hostnamePartPattern = /^[a-z0-9][a-z0-9A-Z-]{0,62}$/, + unsafeProtocol = { + 'javascript': true, + 'javascript:': true + }, hostlessProtocol = { + 'javascript': true, + 'javascript:': true, 'file': true, 'file:': true }, + pathedProtocol = { + 'http': true, + 'https': true, + 'ftp': true, + 'gopher': true, + 'file': true, + 'http:': true, + 'ftp:': true, + 'gopher:': true, + 'file:': true + }, slashedProtocol = { 'http': true, 'https': true, @@ -29,7 +50,7 @@ var protocolPattern = /^([a-z0-9]+:)/, function urlParse(url, parseQueryString, slashesDenoteHost) { if (url && typeof(url) === 'object' && url.href) return url; - var out = { href: url }, + var out = {}, rest = url; var proto = protocolPattern.exec(rest); @@ -50,6 +71,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { out.slashes = true; } } + if (!hostlessProtocol[proto] && (slashes || (proto && !slashedProtocol[proto]))) { // there's a hostname. @@ -79,9 +101,36 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { // we've indicated that there is a hostname, // so even if it's empty, it has to be present. out.hostname = out.hostname || ''; + + // validate a little. + if (out.hostname.length > hostnameMaxLen) { + out.hostname = ''; + } else { + var hostparts = out.hostname.split(/\./); + for (var i = 0, l = hostparts.length; i < l; i++) { + var part = hostparts[i]; + if (!part.match(hostnamePartPattern)) { + out.hostname = ''; + break; + } + } + } } // now rest is set to the post-host stuff. + // chop off any delim chars. + if (!unsafeProtocol[proto]) { + var chop = rest.length; + for (var i = 0, l = delims.length; i < l; i++) { + var c = rest.indexOf(delims[i]); + if (c !== -1) { + chop = Math.min(c, chop); + } + } + rest = rest.substr(0, chop); + } + + // chop off from the tail first. var hash = rest.indexOf('#'); if (hash !== -1) { @@ -99,9 +148,17 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { rest = rest.slice(0, qm); } else if (parseQueryString) { // no query string, but parseQueryString still requested + out.search = ''; out.query = {}; } if (rest) out.pathname = rest; + if (slashedProtocol[proto] && + out.hostname && !out.pathname) { + out.pathname = '/'; + } + + // finally, reconstruct the href based on what has been validated. + out.href = urlFormat(out); return out; } @@ -123,13 +180,12 @@ function urlFormat(obj) { ) : false, pathname = obj.pathname || '', - search = obj.search || ( - obj.query && ('?' + ( - typeof(obj.query) === 'object' ? - querystring.stringify(obj.query) : - String(obj.query) - )) - ) || '', + query = obj.query && + ((typeof obj.query === 'object' && + Object.keys(obj.query).length) ? + querystring.stringify(obj.query) : + '') || '', + search = obj.search || (query && ('?' + query)) || '', hash = obj.hash || ''; if (protocol && protocol.substr(-1) !== ':') protocol += ':'; diff --git a/test/simple/test-url.js b/test/simple/test-url.js index 91509b2baa8..f38195874b6 100644 --- a/test/simple/test-url.js +++ b/test/simple/test-url.js @@ -28,7 +28,8 @@ var parseTests = { 'pathname': '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=' }, 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : { - 'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=', + 'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api' + + '&x=2&y=2&z=3&s=', 'protocol': 'http:', 'host': 'mt0.google.com', 'hostname': 'mt0.google.com', @@ -37,7 +38,8 @@ var parseTests = { 'pathname': '/vt/lyrs=m@114' }, 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : { - 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=', + 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???' + + '&hl=en&src=api&x=2&y=2&z=3&s=', 'protocol': 'http:', 'host': 'user:pass@mt0.google.com', 'auth': 'user:pass', @@ -84,49 +86,6 @@ var parseTests = { 'query': 'baz=quux', 'pathname': '/foo/bar' }, - 'http://example.com?foo=bar#frag' : { - 'href': 'http://example.com?foo=bar#frag', - 'protocol': 'http:', - 'host': 'example.com', - 'hostname': 'example.com', - 'hash': '#frag', - 'search': '?foo=bar', - 'query': 'foo=bar' - }, - 'http://example.com?foo=@bar#frag' : { - 'href': 'http://example.com?foo=@bar#frag', - 'protocol': 'http:', - 'host': 'example.com', - 'hostname': 'example.com', - 'hash': '#frag', - 'search': '?foo=@bar', - 'query': 'foo=@bar' - }, - 'http://example.com?foo=/bar/#frag' : { - 'href': 'http://example.com?foo=/bar/#frag', - 'protocol': 'http:', - 'host': 'example.com', - 'hostname': 'example.com', - 'hash': '#frag', - 'search': '?foo=/bar/', - 'query': 'foo=/bar/' - }, - 'http://example.com?foo=?bar/#frag' : { - 'href': 'http://example.com?foo=?bar/#frag', - 'protocol': 'http:', - 'host': 'example.com', - 'hostname': 'example.com', - 'hash': '#frag', - 'search': '?foo=?bar/', - 'query': 'foo=?bar/' - }, - 'http://example.com#frag=?bar/#frag' : { - 'href': 'http://example.com#frag=?bar/#frag', - 'protocol': 'http:', - 'host': 'example.com', - 'hostname': 'example.com', - 'hash': '#frag=?bar/#frag' - }, '/foo/bar?baz=quux#frag' : { 'href': '/foo/bar?baz=quux#frag', 'hash': '#frag', @@ -154,9 +113,7 @@ var parseTests = { 'javascript:alert(\'hello\');' : { 'href': 'javascript:alert(\'hello\');', 'protocol': 'javascript:', - 'host': 'alert(\'hello\')', - 'hostname': 'alert(\'hello\')', - 'pathname' : ';' + 'pathname': 'alert(\'hello\');' }, 'xmpp:isaacschlueter@jabber.org' : { 'href': 'xmpp:isaacschlueter@jabber.org', @@ -194,21 +151,13 @@ var parseTestsWithQueryString = { 'pathname': '/foo/bar' }, 'http://example.com' : { - 'href': 'http://example.com', + 'href': 'http://example.com/', 'protocol': 'http:', 'slashes': true, 'host': 'example.com', 'hostname': 'example.com', - 'query': {} - }, - 'http://example.com?' : { - 'href': 'http://example.com?', - 'protocol': 'http:', - 'slashes': true, - 'host': 'example.com', - 'hostname': 'example.com', - 'search': '?', - 'query': {} + 'query': {}, + 'pathname': '/' } }; for (var u in parseTestsWithQueryString) { @@ -225,7 +174,72 @@ for (var u in parseTestsWithQueryString) { // some extra formatting tests, just to verify // that it'll format slightly wonky content to a valid url. var formatTests = { + 'http://example.com?' : { + 'href': 'http://example.com/?', + 'protocol': 'http:', + 'slashes': true, + 'host': 'example.com', + 'hostname': 'example.com', + 'search': '?', + 'query': {}, + 'pathname': '/' + }, + 'http://example.com?foo=bar#frag' : { + 'href': 'http://example.com/?foo=bar#frag', + 'protocol': 'http:', + 'host': 'example.com', + 'hostname': 'example.com', + 'hash': '#frag', + 'search': '?foo=bar', + 'query': 'foo=bar', + 'pathname': '/' + }, + 'http://example.com?foo=@bar#frag' : { + 'href': 'http://example.com/?foo=@bar#frag', + 'protocol': 'http:', + 'host': 'example.com', + 'hostname': 'example.com', + 'hash': '#frag', + 'search': '?foo=@bar', + 'query': 'foo=@bar', + 'pathname': '/' + }, + 'http://example.com?foo=/bar/#frag' : { + 'href': 'http://example.com/?foo=/bar/#frag', + 'protocol': 'http:', + 'host': 'example.com', + 'hostname': 'example.com', + 'hash': '#frag', + 'search': '?foo=/bar/', + 'query': 'foo=/bar/', + 'pathname': '/' + }, + 'http://example.com?foo=?bar/#frag' : { + 'href': 'http://example.com/?foo=?bar/#frag', + 'protocol': 'http:', + 'host': 'example.com', + 'hostname': 'example.com', + 'hash': '#frag', + 'search': '?foo=?bar/', + 'query': 'foo=?bar/', + 'pathname': '/' + }, + 'http://example.com#frag=?bar/#frag' : { + 'href': 'http://example.com/#frag=?bar/#frag', + 'protocol': 'http:', + 'host': 'example.com', + 'hostname': 'example.com', + 'hash': '#frag=?bar/#frag', + 'pathname': '/' + }, + 'http://google.com" onload="alert(42)/' : { + 'href': 'http://google.com/', + 'protocol': 'http:', + 'host': 'google.com', + 'pathname': '/' + }, 'http://a.com/a/b/c?s#h' : { + 'href': 'http://a.com/a/b/c?s#h', 'protocol': 'http', 'host': 'a.com', 'pathname': 'a/b/c', @@ -233,7 +247,7 @@ var formatTests = { 'search': 's' }, 'xmpp:isaacschlueter@jabber.org' : { - 'href': 'xmpp://isaacschlueter@jabber.org', + 'href': 'xmpp:isaacschlueter@jabber.org', 'protocol': 'xmpp:', 'host': 'isaacschlueter@jabber.org', 'auth': 'isaacschlueter', @@ -241,9 +255,17 @@ var formatTests = { } }; for (var u in formatTests) { - var actual = url.format(formatTests[u]); - assert.equal(actual, u, - 'wonky format(' + u + ') == ' + u + '\nactual:' + actual); + var expect = formatTests[u].href; + delete formatTests[u].href; + var actual = url.format(u); + var actualObj = url.format(formatTests[u]); + assert.equal(actual, expect, + 'wonky format(' + u + ') == ' + expect + + '\nactual:' + actual); + assert.equal(actualObj, expect, + 'wonky format(' + JSON.stringify(formatTests[u]) + + ') == ' + expect + + '\nactual: ' + actualObj); } /*