http: disallow two-byte characters in URL path
This commit changes node's handling of two-byte characters in the path component of an http URL. Previously, node would just strip the higher byte when generating the request. So this code: ``` http.request({host: "example.com", port: "80", "/N"}) ``` would request `http://example.com/.` (`.` is the character for the byte `0x2e`). This is not useful and can in some cases lead to filter evasion. With this change, the code generates `ERR_UNESCAPED_CHARACTERS`, just like space and control characters already did. PR-URL: https://github.com/nodejs/node/pull/16237 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Anatoli Papirovski <apapirovski@mac.com> Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de> Reviewed-By: Timothy Gu <timothygu99@gmail.com>
This commit is contained in:
parent
ac25cee2e2
commit
b961d9fd83
@ -41,33 +41,7 @@ const { outHeadersKey } = require('internal/http');
|
||||
const { nextTick } = require('internal/process/next_tick');
|
||||
const errors = require('internal/errors');
|
||||
|
||||
// The actual list of disallowed characters in regexp form is more like:
|
||||
// /[^A-Za-z0-9\-._~!$&'()*+,;=/:@]/
|
||||
// with an additional rule for ignoring percentage-escaped characters, but
|
||||
// that's a) hard to capture in a regular expression that performs well, and
|
||||
// b) possibly too restrictive for real-world usage. So instead we restrict the
|
||||
// filter to just control characters and spaces.
|
||||
//
|
||||
// This function is used in the case of small paths, where manual character code
|
||||
// checks can greatly outperform the equivalent regexp (tested in V8 5.4).
|
||||
function isInvalidPath(s) {
|
||||
var i = 0;
|
||||
if (s.charCodeAt(0) <= 32) return true;
|
||||
if (++i >= s.length) return false;
|
||||
if (s.charCodeAt(1) <= 32) return true;
|
||||
if (++i >= s.length) return false;
|
||||
if (s.charCodeAt(2) <= 32) return true;
|
||||
if (++i >= s.length) return false;
|
||||
if (s.charCodeAt(3) <= 32) return true;
|
||||
if (++i >= s.length) return false;
|
||||
if (s.charCodeAt(4) <= 32) return true;
|
||||
if (++i >= s.length) return false;
|
||||
if (s.charCodeAt(5) <= 32) return true;
|
||||
++i;
|
||||
for (; i < s.length; ++i)
|
||||
if (s.charCodeAt(i) <= 32) return true;
|
||||
return false;
|
||||
}
|
||||
const INVALID_PATH_REGEX = /[^\u0021-\u00ff]/;
|
||||
|
||||
function validateHost(host, name) {
|
||||
if (host != null && typeof host !== 'string') {
|
||||
@ -117,13 +91,7 @@ function ClientRequest(options, cb) {
|
||||
var path;
|
||||
if (options.path) {
|
||||
path = String(options.path);
|
||||
var invalidPath;
|
||||
if (path.length <= 39) { // Determined experimentally in V8 5.4
|
||||
invalidPath = isInvalidPath(path);
|
||||
} else {
|
||||
invalidPath = /[\u0000-\u0020]/.test(path);
|
||||
}
|
||||
if (invalidPath)
|
||||
if (INVALID_PATH_REGEX.test(path))
|
||||
throw new errors.TypeError('ERR_UNESCAPED_CHARACTERS', 'Request path');
|
||||
}
|
||||
|
||||
|
12
test/parallel/test-http-client-invalid-path.js
Normal file
12
test/parallel/test-http-client-invalid-path.js
Normal file
@ -0,0 +1,12 @@
|
||||
'use strict';
|
||||
const common = require('../common');
|
||||
const http = require('http');
|
||||
|
||||
common.expectsError(() => {
|
||||
http.request({
|
||||
path: '/thisisinvalid\uffe2'
|
||||
}).end();
|
||||
}, {
|
||||
code: 'ERR_UNESCAPED_CHARACTERS',
|
||||
type: TypeError
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user