reuse open(2) from rb_file_load_ok on POSIX-like system

When loading Ruby source files, we can save the result of
successful opens as open(2)/openat(2) are a fairly expensive
syscalls.  This also avoids a time-of-check-to-time-of-use
(TOCTTOU) problem.

This reduces open(2) syscalls during `require'; but should be
most apparent when users have a small $LOAD_PATH.  Users with
large $LOAD_PATH will benefit less since there'll be more
open(2) failures due to ENOENT.

With `strace -c -e openat ruby -e exit' under Linux, this
results in a ~14% reduction of openat(2) syscalls
(glibc uses openat(2) to implement open(2)).

 % time     seconds  usecs/call     calls    errors syscall
 ------ ----------- ----------- --------- --------- ----------------
   0.00    0.000000           0       296       110 openat
   0.00    0.000000           0       254       110 openat

Additionally, the introduction of `struct ruby_file_load_state'
may make future optimizations more apparent.

This change cannot benefit binary (.so) loading since the
dlopen(3) API requires a filename and I'm not aware of an
alternative that takes a pre-existing FD.  In typical
situations, Ruby source files outnumber the mount of .so
files.
This commit is contained in:
Eric Wong 2023-02-24 18:05:36 +00:00 committed by Eric Wong
parent 6e6992e5db
commit 35136e1e9c
6 changed files with 101 additions and 27 deletions

38
file.c
View File

@ -6359,7 +6359,7 @@ ruby_is_fd_loadable(int fd)
#ifndef _WIN32
int
rb_file_load_ok(const char *path)
rb_file_load_ok(const char *path, struct ruby_file_load_state *fls)
{
int ret = 1;
/*
@ -6381,10 +6381,21 @@ rb_file_load_ok(const char *path)
}
rb_update_max_fd(fd);
ret = ruby_is_fd_loadable(fd);
(void)close(fd);
#if defined(DOSISH) || defined(__CYGWIN__)
fls = NULL; /* need to set xflag via open_load_file */
#endif
if (ret && fls) {
/* TODO: avoid path object alloc in rb_io_fdopen */
fls->filev = rb_io_fdopen(fd, mode, path);
fls->is_fifo = ret < 0 ? 1 : 0;
fls->is_nonblock = mode == O_RDONLY ? 0 : 1;
} else {
(void)close(fd);
}
return ret;
}
#endif
#endif /* !_WIN32 */
static int
is_explicit_relative(const char *path)
@ -6409,6 +6420,13 @@ copy_path_class(VALUE path, VALUE orig)
int
rb_find_file_ext(VALUE *filep, const char *const *ext)
{
return ruby_find_file_ext(filep, ext, NULL);
}
int
ruby_find_file_ext(VALUE *filep, const char *const *ext,
struct ruby_file_load_state *fls)
{
const char *f = StringValueCStr(*filep);
VALUE fname = *filep, load_path, tmp;
@ -6429,7 +6447,7 @@ rb_find_file_ext(VALUE *filep, const char *const *ext)
fnlen = RSTRING_LEN(fname);
for (i=0; ext[i]; i++) {
rb_str_cat2(fname, ext[i]);
if (rb_file_load_ok(RSTRING_PTR(fname))) {
if (rb_file_load_ok(RSTRING_PTR(fname), fls)) {
*filep = copy_path_class(fname, *filep);
return (int)(i+1);
}
@ -6454,7 +6472,7 @@ rb_find_file_ext(VALUE *filep, const char *const *ext)
RB_GC_GUARD(str) = rb_get_path(str);
if (RSTRING_LEN(str) == 0) continue;
rb_file_expand_path_internal(fname, str, 0, 0, tmp);
if (rb_file_load_ok(RSTRING_PTR(tmp))) {
if (rb_file_load_ok(RSTRING_PTR(tmp), fls)) {
*filep = copy_path_class(tmp, *filep);
return (int)(j+1);
}
@ -6468,6 +6486,12 @@ rb_find_file_ext(VALUE *filep, const char *const *ext)
VALUE
rb_find_file(VALUE path)
{
return ruby_find_file(path, NULL);
}
VALUE
ruby_find_file(VALUE path, struct ruby_file_load_state *fls)
{
VALUE tmp, load_path;
const char *f = StringValueCStr(path);
@ -6481,7 +6505,7 @@ rb_find_file(VALUE path)
}
if (expanded || rb_is_absolute_path(f) || is_explicit_relative(f)) {
if (!rb_file_load_ok(f)) return 0;
if (!rb_file_load_ok(f, fls)) return 0;
if (!expanded)
path = copy_path_class(file_expand_path_1(path), path);
return path;
@ -6499,7 +6523,7 @@ rb_find_file(VALUE path)
if (RSTRING_LEN(str) > 0) {
rb_file_expand_path_internal(path, str, 0, 0, tmp);
f = RSTRING_PTR(tmp);
if (rb_file_load_ok(f)) goto found;
if (rb_file_load_ok(f, fls)) goto found;
}
}
rb_str_resize(tmp, 0);

View File

@ -11,6 +11,14 @@
#include "ruby/ruby.h" /* for VALUE */
#include "ruby/encoding.h" /* for rb_encodinng */
struct ruby_file_load_state {
/* TODO: consider stuffing `VALUE fname' here */
VALUE filev;
unsigned int is_fifo:1;
unsigned int is_nonblock:1;
/* TODO: DOSISH / __CYGWIN__ maintainer may add xflag here */
};
/* file.c */
extern const char ruby_null_device[];
VALUE rb_home_dir_of(VALUE user, VALUE result);
@ -18,12 +26,16 @@ VALUE rb_default_home_dir(VALUE result);
VALUE rb_realpath_internal(VALUE basedir, VALUE path, int strict);
VALUE rb_check_realpath(VALUE basedir, VALUE path, rb_encoding *origenc);
void rb_file_const(const char*, VALUE);
int rb_file_load_ok(const char *);
int rb_file_load_ok(const char *, struct ruby_file_load_state *);
VALUE rb_file_expand_path_fast(VALUE, VALUE);
VALUE rb_file_expand_path_internal(VALUE, VALUE, int, int, VALUE);
VALUE rb_get_path_check_to_string(VALUE);
VALUE rb_get_path_check_convert(VALUE);
int ruby_is_fd_loadable(int fd);
int ruby_disable_nonblock(int fd);
int ruby_find_file_ext(VALUE *filep, const char *const *ext,
struct ruby_file_load_state *);
VALUE ruby_find_file(VALUE path, struct ruby_file_load_state *);
RUBY_SYMBOL_EXPORT_BEGIN
/* file.c (export) */

View File

@ -10,10 +10,13 @@
*/
#include "ruby/ruby.h" /* for VALUE */
struct rb_iseq_struct; /* in vm_core.h */
struct ruby_file_load_state; /* internal/file.h */
/* parse.y */
VALUE rb_parser_set_yydebug(VALUE, VALUE);
void *rb_parser_load_file(VALUE parser, VALUE name);
void *rb_parser_load_state(VALUE parser, VALUE name,
struct ruby_file_load_state *);
void rb_parser_keep_script_lines(VALUE vparser);
void rb_parser_error_tolerant(VALUE vparser);
void rb_parser_keep_tokens(VALUE vparser);

46
load.c
View File

@ -676,7 +676,8 @@ rb_provide(const char *feature)
NORETURN(static void load_failed(VALUE));
static inline void
load_iseq_eval(rb_execution_context_t *ec, VALUE fname)
load_iseq_eval(rb_execution_context_t *ec, VALUE fname,
struct ruby_file_load_state *fls)
{
const rb_iseq_t *iseq = rb_iseq_load_iseq(fname);
@ -686,7 +687,7 @@ load_iseq_eval(rb_execution_context_t *ec, VALUE fname)
rb_ast_t *ast;
VALUE parser = rb_parser_new();
rb_parser_set_context(parser, NULL, FALSE);
ast = (rb_ast_t *)rb_parser_load_file(parser, fname);
ast = (rb_ast_t *)rb_parser_load_state(parser, fname, fls);
iseq = rb_iseq_new_top(&ast->body, rb_fstring_lit("<top (required)>"),
fname, rb_realpath_internal(Qnil, fname, 1), NULL);
rb_ast_dispose(ast);
@ -698,7 +699,8 @@ load_iseq_eval(rb_execution_context_t *ec, VALUE fname)
}
static inline enum ruby_tag_type
load_wrapping(rb_execution_context_t *ec, VALUE fname, VALUE load_wrapper)
load_wrapping(rb_execution_context_t *ec, VALUE fname, VALUE load_wrapper,
struct ruby_file_load_state *fls)
{
enum ruby_tag_type state;
rb_thread_t *th = rb_ec_thread_ptr(ec);
@ -718,7 +720,7 @@ load_wrapping(rb_execution_context_t *ec, VALUE fname, VALUE load_wrapper)
EC_PUSH_TAG(ec);
state = EC_EXEC_TAG();
if (state == TAG_NONE) {
load_iseq_eval(ec, fname);
load_iseq_eval(ec, fname, fls);
}
EC_POP_TAG();
@ -744,7 +746,7 @@ raise_load_if_failed(rb_execution_context_t *ec, enum ruby_tag_type state)
}
static void
rb_load_internal(VALUE fname, VALUE wrap)
rb_load_internal(VALUE fname, VALUE wrap, struct ruby_file_load_state *fls)
{
rb_execution_context_t *ec = GET_EC();
enum ruby_tag_type state = TAG_NONE;
@ -752,10 +754,10 @@ rb_load_internal(VALUE fname, VALUE wrap)
if (!RB_TYPE_P(wrap, T_MODULE)) {
wrap = rb_module_new();
}
state = load_wrapping(ec, fname, wrap);
state = load_wrapping(ec, fname, wrap, fls);
}
else {
load_iseq_eval(ec, fname);
load_iseq_eval(ec, fname, fls);
}
raise_load_if_failed(ec, state);
}
@ -763,9 +765,10 @@ rb_load_internal(VALUE fname, VALUE wrap)
void
rb_load(VALUE fname, int wrap)
{
VALUE tmp = rb_find_file(FilePathValue(fname));
struct ruby_file_load_state fls = { .filev = Qfalse };
VALUE tmp = ruby_find_file(FilePathValue(fname), &fls);
if (!tmp) load_failed(fname);
rb_load_internal(tmp, RBOOL(wrap));
rb_load_internal(tmp, RBOOL(wrap), &fls);
}
void
@ -817,6 +820,7 @@ static VALUE
rb_f_load(int argc, VALUE *argv, VALUE _)
{
VALUE fname, wrap, path, orig_fname;
struct ruby_file_load_state fls = { .filev = Qfalse };
rb_scan_args(argc, argv, "11", &fname, &wrap);
@ -824,13 +828,13 @@ rb_f_load(int argc, VALUE *argv, VALUE _)
fname = rb_str_encode_ospath(orig_fname);
RUBY_DTRACE_HOOK(LOAD_ENTRY, RSTRING_PTR(orig_fname));
path = rb_find_file(fname);
path = ruby_find_file(fname, &fls);
if (!path) {
if (!rb_file_load_ok(RSTRING_PTR(fname)))
if (!rb_file_load_ok(RSTRING_PTR(fname), &fls))
load_failed(orig_fname);
path = fname;
}
rb_load_internal(path, wrap);
rb_load_internal(path, wrap, &fls);
RUBY_DTRACE_HOOK(LOAD_RETURN, RSTRING_PTR(orig_fname));
@ -961,7 +965,8 @@ rb_f_require_relative(VALUE obj, VALUE fname)
typedef int (*feature_func)(rb_vm_t *vm, const char *feature, const char *ext, int rb, int expanded, const char **fn);
static int
search_required(rb_vm_t *vm, VALUE fname, volatile VALUE *path, feature_func rb_feature_p)
search_required(rb_vm_t *vm, VALUE fname, volatile VALUE *path,
feature_func rb_feature_p, struct ruby_file_load_state *fls)
{
VALUE tmp;
char *ext, *ftptr;
@ -976,7 +981,7 @@ search_required(rb_vm_t *vm, VALUE fname, volatile VALUE *path, feature_func rb_
if (loading) *path = rb_filesystem_str_new_cstr(loading);
return 'r';
}
if ((tmp = rb_find_file(fname)) != 0) {
if ((tmp = ruby_find_file(fname, fls)) != 0) {
ext = strrchr(ftptr = RSTRING_PTR(tmp), '.');
if (!rb_feature_p(vm, ftptr, ext, TRUE, TRUE, &loading) || loading)
*path = tmp;
@ -1017,7 +1022,7 @@ search_required(rb_vm_t *vm, VALUE fname, volatile VALUE *path, feature_func rb_
return 'r';
}
tmp = fname;
type = rb_find_file_ext(&tmp, ft == 's' ? ruby_ext : loadable_ext);
type = ruby_find_file_ext(&tmp, ft == 's' ? ruby_ext : loadable_ext, fls);
#if EXTSTATIC
if (!ft && type != 1) { // not already a feature and not found as a dynamic library
VALUE lookup_name = tmp;
@ -1103,7 +1108,7 @@ rb_resolve_feature_path(VALUE klass, VALUE fname)
fname = rb_get_path(fname);
path = rb_str_encode_ospath(fname);
found = search_required(GET_VM(), path, &path, no_feature_p);
found = search_required(GET_VM(), path, &path, no_feature_p, NULL);
switch (found) {
case 'r':
@ -1163,6 +1168,7 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception, bool wa
VALUE realpaths = get_loaded_features_realpaths(th->vm);
volatile bool reset_ext_config = false;
struct rb_ext_config prev_ext_config;
struct ruby_file_load_state fls = { .filev = Qfalse };
fname = rb_get_path(fname);
path = rb_str_encode_ospath(fname);
@ -1177,7 +1183,7 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception, bool wa
int found;
RUBY_DTRACE_HOOK(FIND_REQUIRE_ENTRY, RSTRING_PTR(fname));
found = search_required(th->vm, path, &saved_path, rb_feature_p);
found = search_required(th->vm, path, &saved_path, rb_feature_p, &fls);
RUBY_DTRACE_HOOK(FIND_REQUIRE_RETURN, RSTRING_PTR(fname));
path = saved_path;
@ -1200,10 +1206,14 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception, bool wa
else {
switch (found) {
case 'r':
load_iseq_eval(ec, path);
load_iseq_eval(ec, path, &fls);
break;
case 's':
if (fls.filev != Qfalse) {
rb_io_close(fls.filev);
fls.filev = Qfalse;
}
reset_ext_config = true;
ext_config_push(th, &prev_ext_config);
handle = (long)rb_vm_call_cfunc(rb_vm_top_self(), load_ext,

23
ruby.c
View File

@ -2539,6 +2539,29 @@ rb_parser_load_file(VALUE parser, VALUE fname_v)
return load_file(parser, fname_v, f, 0, &opt);
}
void *
rb_parser_load_state(VALUE parser, VALUE fname_v,
struct ruby_file_load_state *fls)
{
if (fls->filev != Qfalse) {
ruby_cmdline_options_t opt;
cmdline_options_init(&opt);
/* TODO: xflag for DOSISH || __CYGWIN__ */
if (fls->is_nonblock) {
struct rb_io_t *fptr;
RB_IO_POINTER(fls->filev, fptr);
disable_nonblock(fptr->fd);
}
if (fls->is_fifo) {
rb_io_wait(fls->filev, RB_INT2NUM(RUBY_IO_READABLE), Qnil);
}
return load_file(parser, fname_v, fls->filev, 0, &opt);
}
return rb_parser_load_file(parser, fname_v);
}
/*
* call-seq:
* Process.argv0 -> frozen_string

View File

@ -10,6 +10,7 @@
#include <wchar.h>
#include <shlwapi.h>
#include "win32/file.h"
#include "internal/file.h"
#ifndef INVALID_FILE_ATTRIBUTES
# define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
@ -592,8 +593,9 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
return result;
}
/* TODO: using @fls is an exercise for a win32 hacker */
int
rb_file_load_ok(const char *path)
rb_file_load_ok(const char *path, struct ruby_file_load_state *fls)
{
DWORD attr;
int ret = 1;