[ruby/prism] Parse numeric values
https://github.com/ruby/prism/commit/a6a552411c
This commit is contained in:
parent
548151d1e9
commit
ff6ebba9de
@ -949,6 +949,34 @@ named_captures(VALUE self, VALUE source) {
|
|||||||
return names;
|
return names;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* call-seq:
|
||||||
|
* Debug::number_parse(source) -> Integer
|
||||||
|
*
|
||||||
|
* Parses the given source string and returns the number it represents.
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
number_parse(VALUE self, VALUE source) {
|
||||||
|
const uint8_t *start = (const uint8_t *) RSTRING_PTR(source);
|
||||||
|
size_t length = RSTRING_LEN(source);
|
||||||
|
|
||||||
|
pm_number_t number = { 0 };
|
||||||
|
pm_number_parse(&number, PM_NUMBER_BASE_UNKNOWN, start, start + length);
|
||||||
|
|
||||||
|
VALUE result = UINT2NUM(number.head.value);
|
||||||
|
size_t shift = 0;
|
||||||
|
|
||||||
|
for (pm_number_node_t *node = number.head.next; node != NULL; node = node->next) {
|
||||||
|
VALUE receiver = rb_funcall(UINT2NUM(node->value), rb_intern("<<"), 1, ULONG2NUM(++shift * 32));
|
||||||
|
result = rb_funcall(receiver, rb_intern("|"), 1, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (number.negative) result = rb_funcall(result, rb_intern("-@"), 0);
|
||||||
|
pm_number_free(&number);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
|
* Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
|
||||||
@ -1148,6 +1176,7 @@ Init_prism(void) {
|
|||||||
// internal tasks. We expose these to make them easier to test.
|
// internal tasks. We expose these to make them easier to test.
|
||||||
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
||||||
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
||||||
|
rb_define_singleton_method(rb_cPrismDebug, "number_parse", number_parse, 1);
|
||||||
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
||||||
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
||||||
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
|
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "prism/util/pm_buffer.h"
|
#include "prism/util/pm_buffer.h"
|
||||||
#include "prism/util/pm_char.h"
|
#include "prism/util/pm_char.h"
|
||||||
#include "prism/util/pm_memchr.h"
|
#include "prism/util/pm_memchr.h"
|
||||||
|
#include "prism/util/pm_number.h"
|
||||||
#include "prism/util/pm_strncasecmp.h"
|
#include "prism/util/pm_strncasecmp.h"
|
||||||
#include "prism/util/pm_strpbrk.h"
|
#include "prism/util/pm_strpbrk.h"
|
||||||
#include "prism/ast.h"
|
#include "prism/ast.h"
|
||||||
|
164
prism/util/pm_number.c
Normal file
164
prism/util/pm_number.c
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
#include "prism/util/pm_number.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new node for a number in the linked list.
|
||||||
|
*/
|
||||||
|
static pm_number_node_t *
|
||||||
|
pm_number_node_create(pm_number_t *number, uint32_t value) {
|
||||||
|
number->length++;
|
||||||
|
pm_number_node_t *node = malloc(sizeof(pm_number_node_t));
|
||||||
|
*node = (pm_number_node_t) { .next = NULL, .value = value };
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a 32-bit integer to a number.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
pm_number_add(pm_number_t *number, uint32_t addend) {
|
||||||
|
uint32_t carry = addend;
|
||||||
|
pm_number_node_t *current = &number->head;
|
||||||
|
|
||||||
|
while (carry > 0) {
|
||||||
|
uint64_t result = (uint64_t) current->value + carry;
|
||||||
|
carry = (uint32_t) (result >> 32);
|
||||||
|
current->value = (uint32_t) result;
|
||||||
|
|
||||||
|
if (carry > 0) {
|
||||||
|
if (current->next == NULL) {
|
||||||
|
current->next = pm_number_node_create(number, carry);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
current = current->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Multiple a number by a 32-bit integer. In practice, the multiplier is the
|
||||||
|
* base of the number, so this is 2, 8, 10, or 16.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
pm_number_multiply(pm_number_t *number, uint32_t multiplier) {
|
||||||
|
uint32_t carry = 0;
|
||||||
|
|
||||||
|
for (pm_number_node_t *current = &number->head; current != NULL; current = current->next) {
|
||||||
|
uint64_t result = (uint64_t) current->value * multiplier + carry;
|
||||||
|
carry = (uint32_t) (result >> 32);
|
||||||
|
current->value = (uint32_t) result;
|
||||||
|
|
||||||
|
if (carry > 0 && current->next == NULL) {
|
||||||
|
current->next = pm_number_node_create(number, carry);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the value of a digit in a number.
|
||||||
|
*/
|
||||||
|
static uint32_t
|
||||||
|
pm_number_parse_digit(const uint8_t character) {
|
||||||
|
switch (character) {
|
||||||
|
case '0': return 0;
|
||||||
|
case '1': return 1;
|
||||||
|
case '2': return 2;
|
||||||
|
case '3': return 3;
|
||||||
|
case '4': return 4;
|
||||||
|
case '5': return 5;
|
||||||
|
case '6': return 6;
|
||||||
|
case '7': return 7;
|
||||||
|
case '8': return 8;
|
||||||
|
case '9': return 9;
|
||||||
|
case 'a': case 'A': return 10;
|
||||||
|
case 'b': case 'B': return 11;
|
||||||
|
case 'c': case 'C': return 12;
|
||||||
|
case 'd': case 'D': return 13;
|
||||||
|
case 'e': case 'E': return 14;
|
||||||
|
case 'f': case 'F': return 15;
|
||||||
|
default: assert(false && "unreachable");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a number from a string. This assumes that the format of the number has
|
||||||
|
* already been validated, as internal validation checks are not performed here.
|
||||||
|
*/
|
||||||
|
PRISM_EXPORTED_FUNCTION void
|
||||||
|
pm_number_parse(pm_number_t *number, pm_number_base_t base, const uint8_t *start, const uint8_t *end) {
|
||||||
|
switch (*start) {
|
||||||
|
case '-':
|
||||||
|
number->negative = true;
|
||||||
|
/* fallthrough */
|
||||||
|
case '+':
|
||||||
|
start++;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t multiplier;
|
||||||
|
switch (base) {
|
||||||
|
case PM_NUMBER_BASE_BINARY:
|
||||||
|
start += 2; // 0b
|
||||||
|
multiplier = 2;
|
||||||
|
break;
|
||||||
|
case PM_NUMBER_BASE_OCTAL:
|
||||||
|
start++; // 0
|
||||||
|
if (*start == 'o' || *start == 'O') start++; // o
|
||||||
|
multiplier = 8;
|
||||||
|
break;
|
||||||
|
case PM_NUMBER_BASE_DECIMAL:
|
||||||
|
if (*start == '0' && (end - start) > 1) start += 2; // 0d
|
||||||
|
multiplier = 10;
|
||||||
|
break;
|
||||||
|
case PM_NUMBER_BASE_HEXADECIMAL:
|
||||||
|
start += 2; // 0x
|
||||||
|
multiplier = 16;
|
||||||
|
break;
|
||||||
|
case PM_NUMBER_BASE_UNKNOWN:
|
||||||
|
if (*start == '0' && (end - start) > 1) {
|
||||||
|
switch (start[1]) {
|
||||||
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break;
|
||||||
|
case 'b': case 'B': start += 2; multiplier = 2; break;
|
||||||
|
case 'o': case 'O': start += 2; multiplier = 8; break;
|
||||||
|
case 'd': case 'D': start += 2; multiplier = 10; break;
|
||||||
|
case 'x': case 'X': start += 2; multiplier = 16; break;
|
||||||
|
default: assert(false && "unreachable");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
multiplier = 10;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (pm_number_add(number, pm_number_parse_digit(*start++)); start < end; start++) {
|
||||||
|
if (*start == '_') continue;
|
||||||
|
pm_number_multiply(number, multiplier);
|
||||||
|
pm_number_add(number, pm_number_parse_digit(*start));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively destroy the linked list of a number.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
pm_number_node_destroy(pm_number_node_t *number) {
|
||||||
|
if (number->next != NULL) {
|
||||||
|
pm_number_node_destroy(number->next);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(number);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free the internal memory of a number. This memory will only be allocated if
|
||||||
|
* the number exceeds the size of a single node in the linked list.
|
||||||
|
*/
|
||||||
|
PRISM_EXPORTED_FUNCTION void
|
||||||
|
pm_number_free(pm_number_t *number) {
|
||||||
|
if (number->head.next) {
|
||||||
|
pm_number_node_destroy(number->head.next);
|
||||||
|
}
|
||||||
|
}
|
95
prism/util/pm_number.h
Normal file
95
prism/util/pm_number.h
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
/**
|
||||||
|
* @file pm_number.h
|
||||||
|
*
|
||||||
|
* This module provides functions for working with arbitrary-sized numbers.
|
||||||
|
*/
|
||||||
|
#ifndef PRISM_NUMBER_H
|
||||||
|
#define PRISM_NUMBER_H
|
||||||
|
|
||||||
|
#include "prism/defines.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A node in the linked list of a pm_number_t.
|
||||||
|
*/
|
||||||
|
typedef struct pm_number_node {
|
||||||
|
/** A pointer to the next node in the list. */
|
||||||
|
struct pm_number_node *next;
|
||||||
|
|
||||||
|
/** The value of the node. */
|
||||||
|
uint32_t value;
|
||||||
|
} pm_number_node_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This structure represents an arbitrary-sized number. It is implemented as a
|
||||||
|
* linked list of 32-bit integers, with the least significant digit at the head
|
||||||
|
* of the list.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
/**
|
||||||
|
* The head of the linked list, embedded directly so that allocations do not
|
||||||
|
* need to be performed for small numbers.
|
||||||
|
*/
|
||||||
|
pm_number_node_t head;
|
||||||
|
|
||||||
|
/** The number of nodes in the linked list that have been allocated. */
|
||||||
|
size_t length;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not the number is negative. It is stored this way so that a
|
||||||
|
* zeroed pm_number_t is always positive zero.
|
||||||
|
*/
|
||||||
|
bool negative;
|
||||||
|
} pm_number_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An enum controlling the base of a number. It is expected that the base is
|
||||||
|
* already known before parsing the number, even though it could be derived from
|
||||||
|
* the string itself.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
/** The binary base, indicated by a 0b or 0B prefix. */
|
||||||
|
PM_NUMBER_BASE_BINARY,
|
||||||
|
|
||||||
|
/** The octal base, indicated by a 0, 0o, or 0O prefix. */
|
||||||
|
PM_NUMBER_BASE_OCTAL,
|
||||||
|
|
||||||
|
/** The decimal base, indicated by a 0d, 0D, or empty prefix. */
|
||||||
|
PM_NUMBER_BASE_DECIMAL,
|
||||||
|
|
||||||
|
/** The hexidecimal base, indicated by a 0x or 0X prefix. */
|
||||||
|
PM_NUMBER_BASE_HEXADECIMAL,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An unknown base, in which case pm_number_parse will derive it based on
|
||||||
|
* the content of the string. This is less efficient and does more
|
||||||
|
* comparisons, so if callers know the base ahead of time, they should use
|
||||||
|
* that instead.
|
||||||
|
*/
|
||||||
|
PM_NUMBER_BASE_UNKNOWN
|
||||||
|
} pm_number_base_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a number from a string. This assumes that the format of the number has
|
||||||
|
* already been validated, as internal validation checks are not performed here.
|
||||||
|
*
|
||||||
|
* @param number The number to parse into.
|
||||||
|
* @param base The base of the number.
|
||||||
|
* @param start The start of the string.
|
||||||
|
* @param end The end of the string.
|
||||||
|
*/
|
||||||
|
PRISM_EXPORTED_FUNCTION void pm_number_parse(pm_number_t *number, pm_number_base_t base, const uint8_t *start, const uint8_t *end);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free the internal memory of a number. This memory will only be allocated if
|
||||||
|
* the number exceeds the size of a single node in the linked list.
|
||||||
|
*
|
||||||
|
* @param number The number to free.
|
||||||
|
*/
|
||||||
|
PRISM_EXPORTED_FUNCTION void pm_number_free(pm_number_t *number);
|
||||||
|
|
||||||
|
#endif
|
36
test/prism/number_parse_test.rb
Normal file
36
test/prism/number_parse_test.rb
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require_relative "test_helper"
|
||||||
|
|
||||||
|
return if Prism::BACKEND == :FFI
|
||||||
|
|
||||||
|
module Prism
|
||||||
|
class NumberParseTest < TestCase
|
||||||
|
def test_number_parse
|
||||||
|
assert_number_parse(1)
|
||||||
|
assert_number_parse(50)
|
||||||
|
assert_number_parse(100)
|
||||||
|
assert_number_parse(100, "1_0_0")
|
||||||
|
|
||||||
|
assert_number_parse(10, "0b1010")
|
||||||
|
assert_number_parse(10, "0B1010")
|
||||||
|
assert_number_parse(10, "0o12")
|
||||||
|
assert_number_parse(10, "0O12")
|
||||||
|
assert_number_parse(10, "012")
|
||||||
|
assert_number_parse(10, "0d10")
|
||||||
|
assert_number_parse(10, "0D10")
|
||||||
|
assert_number_parse(10, "0xA")
|
||||||
|
assert_number_parse(10, "0XA")
|
||||||
|
|
||||||
|
assert_number_parse(2**32)
|
||||||
|
assert_number_parse(2**64 + 2**32)
|
||||||
|
assert_number_parse(2**128 + 2**64 + 2**32)
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def assert_number_parse(expected, source = expected.to_s)
|
||||||
|
assert_equal expected, Debug.number_parse(source)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
x
Reference in New Issue
Block a user