diff --git a/yarp/extension.c b/yarp/extension.c index 36e9941d64..a189b1a876 100644 --- a/yarp/extension.c +++ b/yarp/extension.c @@ -14,14 +14,6 @@ VALUE rb_cYARPParseResult; /* IO of Ruby code */ /******************************************************************************/ -// Represents an input of Ruby code. It can either be coming from a file or a -// string. If it's a file, we'll use demand paging to read the contents of the -// file into a string. If it's already a string, we'll reference it directly. -typedef struct { - const char *source; - size_t size; -} input_t; - // Check if the given filepath is a string. If it's nil, then return NULL. If // it's not a string, then raise a type error. Otherwise return the filepath as // a C string. @@ -41,142 +33,15 @@ check_filepath(VALUE filepath) { return StringValueCStr(filepath); } -// Read the file indicated by the filepath parameter into source and load its -// contents and size into the given input_t. -// -// We want to use demand paging as much as possible in order to avoid having to -// read the entire file into memory (which could be detrimental to performance -// for large files). This means that if we're on windows we'll use -// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use -// `mmap`, and on other POSIX systems we'll use `read`. -static int -input_load_filepath(input_t *input, const char *filepath) { -#ifdef _WIN32 - // Open the file for reading. - HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - - if (file == INVALID_HANDLE_VALUE) { - perror("CreateFile failed"); - return 1; - } - - // Get the file size. - DWORD file_size = GetFileSize(file, NULL); - if (file_size == INVALID_FILE_SIZE) { - CloseHandle(file); - perror("GetFileSize failed"); - return 1; - } - - // If the file is empty, then we don't need to do anything else, we'll set - // the source to a constant empty string and return. - if (!file_size) { - CloseHandle(file); - input->size = 0; - input->source = ""; - return 0; - } - - // Create a mapping of the file. - HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); - if (mapping == NULL) { - CloseHandle(file); - perror("CreateFileMapping failed"); - return 1; - } - - // Map the file into memory. - input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); - CloseHandle(mapping); - CloseHandle(file); - - if (input->source == NULL) { - perror("MapViewOfFile failed"); - return 1; - } - - // Set the size of the source. - input->size = (size_t) file_size; - return 0; -#else - // Open the file for reading - int fd = open(filepath, O_RDONLY); - if (fd == -1) { - perror("open"); - return 1; - } - - // Stat the file to get the file size - struct stat sb; - if (fstat(fd, &sb) == -1) { - close(fd); - perror("fstat"); - return 1; - } - - // mmap the file descriptor to virtually get the contents - input->size = sb.st_size; - -#ifdef HAVE_MMAP - if (!input->size) { - close(fd); - input->source = ""; - return 0; - } - - const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0); - if (result == MAP_FAILED) { - perror("Map failed"); - return 1; - } else { - input->source = result; - } -#else - input->source = malloc(input->size); - if (input->source == NULL) return 1; - - ssize_t read_size = read(fd, (void *) input->source, input->size); - if (read_size < 0 || (size_t)read_size != input->size) { - perror("Read size is incorrect"); - free((void *) input->source); - return 1; - } -#endif - - close(fd); - return 0; -#endif -} - -// Load the contents and size of the given string into the given input_t. +// Load the contents and size of the given string into the given yp_string_t. static void -input_load_string(input_t *input, VALUE string) { +input_load_string(yp_string_t *input, VALUE string) { // Check if the string is a string. If it's not, then raise a type error. if (!RB_TYPE_P(string, T_STRING)) { rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string)); } - input->source = RSTRING_PTR(string); - input->size = RSTRING_LEN(string); -} - -// Free any resources associated with the given input_t. This is the corollary -// function to source_file_load. It will unmap the file if it was mapped, or -// free the memory if it was allocated. -static void -input_unload_filepath(input_t *input) { - // We don't need to free anything with 0 sized files because we handle that - // with a constant string instead. - if (!input->size) return; - void *memory = (void *) input->source; - -#if defined(_WIN32) - UnmapViewOfFile(memory); -#elif defined(HAVE_MMAP) - munmap(memory, input->size); -#else - free(memory); -#endif + yp_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string)); } /******************************************************************************/ @@ -185,14 +50,14 @@ input_unload_filepath(input_t *input) { // Dump the AST corresponding to the given input to a string. static VALUE -dump_input(input_t *input, const char *filepath) { +dump_input(yp_string_t *input, const char *filepath) { yp_buffer_t buffer; if (!yp_buffer_init(&buffer)) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } yp_parser_t parser; - yp_parser_init(&parser, input->source, input->size, filepath); + yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath); yp_node_t *node = yp_parse(&parser, false); yp_serialize(&parser, node, &buffer); @@ -212,7 +77,7 @@ dump(int argc, VALUE *argv, VALUE self) { VALUE filepath; rb_scan_args(argc, argv, "11", &string, &filepath); - input_t input; + yp_string_t input; input_load_string(&input, string); return dump_input(&input, check_filepath(filepath)); } @@ -220,13 +85,13 @@ dump(int argc, VALUE *argv, VALUE self) { // Dump the AST corresponding to the given file to a string. static VALUE dump_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; VALUE value = dump_input(&input, checked); - input_unload_filepath(&input); + yp_string_free(&input); return value; } @@ -356,13 +221,13 @@ lex_encoding_changed_callback(yp_parser_t *parser) { // Return an array of tokens corresponding to the given source. static VALUE -lex_input(input_t *input, const char *filepath) { +lex_input(yp_string_t *input, const char *filepath) { yp_parser_t parser; - yp_parser_init(&parser, input->source, input->size, filepath); + yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath); yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback); VALUE offsets = rb_ary_new(); - VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets }; + VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets }; VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource); lex_data_t lex_data = { @@ -410,7 +275,7 @@ lex(int argc, VALUE *argv, VALUE self) { VALUE filepath; rb_scan_args(argc, argv, "11", &string, &filepath); - input_t input; + yp_string_t input; input_load_string(&input, string); return lex_input(&input, check_filepath(filepath)); } @@ -418,13 +283,13 @@ lex(int argc, VALUE *argv, VALUE self) { // Return an array of tokens corresponding to the given file. static VALUE lex_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; VALUE value = lex_input(&input, checked); - input_unload_filepath(&input); + yp_string_free(&input); return value; } @@ -435,9 +300,9 @@ lex_file(VALUE self, VALUE filepath) { // Parse the given input and return a ParseResult instance. static VALUE -parse_input(input_t *input, const char *filepath) { +parse_input(yp_string_t *input, const char *filepath) { yp_parser_t parser; - yp_parser_init(&parser, input->source, input->size, filepath); + yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath); yp_node_t *node = yp_parse(&parser, false); rb_encoding *encoding = rb_enc_find(parser.encoding.name); @@ -466,13 +331,14 @@ parse(int argc, VALUE *argv, VALUE self) { VALUE filepath; rb_scan_args(argc, argv, "11", &string, &filepath); - input_t input; + yp_string_t input; input_load_string(&input, string); #ifdef YARP_DEBUG_MODE_BUILD - char* dup = malloc(input.size); - memcpy(dup, input.source, input.size); - input.source = dup; + size_t length = yp_string_length(&input); + char* dup = malloc(length); + memcpy(dup, yp_string_source(&input), length); + yp_string_constant_init(&input, dup, length); #endif VALUE value = parse_input(&input, check_filepath(filepath)); @@ -487,13 +353,13 @@ parse(int argc, VALUE *argv, VALUE self) { // Parse the given file and return a ParseResult instance. static VALUE parse_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; VALUE value = parse_input(&input, checked); - input_unload_filepath(&input); + yp_string_free(&input); return value; } @@ -586,13 +452,13 @@ memsize(VALUE self, VALUE string) { // parser for memory and speed. static VALUE profile_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; yp_parser_t parser; - yp_parser_init(&parser, input.source, input.size, checked); + yp_parser_init(&parser, yp_string_source(&input), yp_string_length(&input), checked); yp_node_t *node = yp_parse(&parser, false); yp_node_destroy(&parser, node); diff --git a/yarp/extension.h b/yarp/extension.h index d19c390f2b..1f281240c2 100644 --- a/yarp/extension.h +++ b/yarp/extension.h @@ -5,16 +5,6 @@ #include #include "yarp.h" -// The following headers are necessary to read files using demand paging. -#ifdef _WIN32 -#include -#else -#include -#include -#include -#include -#endif - #define EXPECTED_YARP_VERSION "0.4.0" VALUE yp_source_new(yp_parser_t *parser); diff --git a/yarp/util/yp_string.c b/yarp/util/yp_string.c index 248c082a86..12ae5537c0 100644 --- a/yarp/util/yp_string.c +++ b/yarp/util/yp_string.c @@ -1,5 +1,15 @@ #include "yarp/util/yp_string.h" +// The following headers are necessary to read files using demand paging. +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#endif + // Initialize a shared string that is based on initial input. void yp_string_shared_init(yp_string_t *string, const char *start, const char *end) { @@ -36,6 +46,17 @@ yp_string_constant_init(yp_string_t *string, const char *source, size_t length) }; } +static void +yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) { + *string = (yp_string_t) { + .type = YP_STRING_MAPPED, + .as.mapped = { + .source = source, + .length = length + } + }; +} + // Returns the memory size associated with the string. size_t yp_string_memsize(const yp_string_t *string) { @@ -84,5 +105,113 @@ YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string) { if (string->type == YP_STRING_OWNED) { free(string->as.owned.source); + } else if (string->type == YP_STRING_MAPPED && string->as.mapped.length) { + void *memory = (void *) string->as.mapped.source; + #if defined(_WIN32) + UnmapViewOfFile(memory); + #elif defined(HAVE_MMAP) + munmap(memory, string->as.mapped.length); + #else + free(memory); + #endif } } + +bool +yp_string_mapped_init(yp_string_t *string, const char *filepath) { +#ifdef _WIN32 + // Open the file for reading. + HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + + if (file == INVALID_HANDLE_VALUE) { + perror("CreateFile failed"); + return false; + } + + // Get the file size. + DWORD file_size = GetFileSize(file, NULL); + if (file_size == INVALID_FILE_SIZE) { + CloseHandle(file); + perror("GetFileSize failed"); + return false; + } + + // If the file is empty, then we don't need to do anything else, we'll set + // the source to a constant empty string and return. + if (file_size == 0) { + CloseHandle(file); + yp_string_mapped_init_internal(string, "", 0); + return true; + } + + // Create a mapping of the file. + HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); + if (mapping == NULL) { + CloseHandle(file); + perror("CreateFileMapping failed"); + return false; + } + + // Map the file into memory. + char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); + CloseHandle(mapping); + CloseHandle(file); + + if (source == NULL) { + perror("MapViewOfFile failed"); + return false; + } + + yp_string_mapped_init_internal(string, source, (size_t) file_size); + return true; +#else + // Open the file for reading + int fd = open(filepath, O_RDONLY); + if (fd == -1) { + perror("open"); + return false; + } + + // Stat the file to get the file size + struct stat sb; + if (fstat(fd, &sb) == -1) { + close(fd); + perror("fstat"); + return false; + } + + // mmap the file descriptor to virtually get the contents + size_t size = (size_t) sb.st_size; + char *source = NULL; + + if (size == 0) { + close(fd); + yp_string_mapped_init_internal(string, "", 0); + return true; + } + +#ifdef HAVE_MMAP + source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (source == MAP_FAILED) { + perror("Map failed"); + return false; + } +#else + source = malloc(size); + if (source == NULL) { + return false; + } + + ssize_t read_size = read(fd, (void *) source, size); + if (read_size < 0 || (size_t)read_size != size) { + perror("Read size is incorrect"); + free((void *) source); + return false; + } +#endif + + close(fd); + yp_string_mapped_init_internal(string, source, size); + return true; +#endif +} diff --git a/yarp/util/yp_string.h b/yarp/util/yp_string.h index eecd71ea5b..4d9612674d 100644 --- a/yarp/util/yp_string.h +++ b/yarp/util/yp_string.h @@ -3,13 +3,14 @@ #include "yarp/defines.h" +#include #include #include #include // This struct represents a string value. typedef struct { - enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT } type; + enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type; union { struct { @@ -26,6 +27,11 @@ typedef struct { const char *source; size_t length; } constant; + + struct { + char *source; + size_t length; + } mapped; } as; } yp_string_t; @@ -38,6 +44,17 @@ void yp_string_owned_init(yp_string_t *string, char *source, size_t length); // Initialize a constant string that doesn't own its memory source. void yp_string_constant_init(yp_string_t *string, const char *source, size_t length); +// Read the file indicated by the filepath parameter into source and load its +// contents and size into the given yp_string_t. +// The given yp_string_t should be freed using yp_string_free() when it is no longer used. +// +// We want to use demand paging as much as possible in order to avoid having to +// read the entire file into memory (which could be detrimental to performance +// for large files). This means that if we're on windows we'll use +// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use +// `mmap`, and on other POSIX systems we'll use `read`. +bool yp_string_mapped_init(yp_string_t *string, const char *filepath); + // Returns the memory size associated with the string. size_t yp_string_memsize(const yp_string_t *string);