From 7c99e43c3f050244b06dbd18de4f605ea70d234c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 13 Nov 2023 15:05:49 -0500 Subject: [PATCH] [ruby/prism] Ensure serialized file is little endian https://github.com/ruby/prism/commit/0c762ee68a --- prism/defines.h | 10 +++++++++ prism/templates/lib/prism/serialize.rb.erb | 8 +++---- prism/templates/src/serialize.c.erb | 25 +++++++++++++++++----- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/prism/defines.h b/prism/defines.h index f89a0bed8e..28f4da11df 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -74,4 +74,14 @@ # define snprintf _snprintf #endif +/** + * Defined PRISM_WORDS_BIGENDIAN so we can ensure our serialization happens in + * little endian format regardless of platform. + */ +#if defined(WORDS_BIGENDIAN) +# define PRISM_WORDS_BIGENDIAN +#elif defined(AC_APPLE_UNIVERSAL_BUILD) && defined(__BIG_ENDIAN__) +# define PRISM_WORDS_BIGENDIAN +#endif + #endif diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 2837504543..517d4e8a24 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -137,7 +137,7 @@ module Prism comments, magic_comments, errors, warnings = load_metadata - @constant_pool_offset = io.read(4).unpack1("L") + @constant_pool_offset = io.read(4).unpack1("L<") @constant_pool = Array.new(load_varint, nil) [load_node, comments, magic_comments, errors, warnings] @@ -167,7 +167,7 @@ module Prism end def load_serialized_length - io.read(4).unpack1("L") + io.read(4).unpack1("L<") end def load_optional_node @@ -206,8 +206,8 @@ module Prism unless constant offset = constant_pool_offset + index * 8 - start = serialized.unpack1("L", offset: offset) - length = serialized.unpack1("L", offset: offset + 4) + start = serialized.unpack1("L<", offset: offset) + length = serialized.unpack1("L<", offset: offset + 4) constant = if start.nobits?(1 << 31) diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index db4c91e0cd..2ecf7299c6 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -47,6 +47,21 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe } } +/** + * Serialize a 32-bit integer to the given address always in little-endian. + */ +static void +pm_serialize_32(char *address, uint32_t value) { +#ifdef PRISM_WORDS_BIGENDIAN + address[0] = (char) ((value >> 24) & 0xFF); + address[1] = (char) ((value >> 16) & 0xFF); + address[2] = (char) ((value >> 8) & 0xFF); + address[3] = (char) (value & 0xFF); +#else + memcpy(address, &value, sizeof(uint32_t)); +#endif +} + static void pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node)); @@ -118,7 +133,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { <%- if node.needs_serialized_length? -%> // serialize length uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t)); - memcpy(buffer->value + length_offset, &length, sizeof(uint32_t)); + pm_serialize_32(buffer->value + length_offset, length); <%- end -%> break; } @@ -231,7 +246,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) // Now we're going to serialize the offset of the constant pool back where // we left space for it. uint32_t length = pm_sizet_to_u32(buffer->length); - memcpy(buffer->value + offset, &length, sizeof(uint32_t)); + pm_serialize_32(buffer->value + offset, length); // Now we're going to serialize the constant pool. offset = buffer->length; @@ -258,18 +273,18 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) assert(content_offset < owned_mask); content_offset |= owned_mask; - memcpy(buffer->value + buffer_offset, &content_offset, 4); + pm_serialize_32(buffer->value + buffer_offset, content_offset); pm_buffer_append_bytes(buffer, constant->start, constant->length); } else { // Since this is a shared constant, we are going to write its // source offset directly into the buffer. uint32_t source_offset = pm_ptrdifft_to_u32(constant->start - parser->start); - memcpy(buffer->value + buffer_offset, &source_offset, 4); + pm_serialize_32(buffer->value + buffer_offset, source_offset); } // Now we can write the length of the constant into the buffer. uint32_t constant_length = pm_sizet_to_u32(constant->length); - memcpy(buffer->value + buffer_offset + 4, &constant_length, 4); + pm_serialize_32(buffer->value + buffer_offset + 4, constant_length); } } }