[ruby/json] Faster float formatting

This commit provides an alternative implementation for a float → decimal conversion.

It integrates a C implementation of Fabian Loitsch's Grisu-algorithm [[pdf]](http://florian.loitsch.com/publications/dtoa-pldi2010.pdf), extracted from https://github.com/night-shift/fpconv. The relevant files are added in this PR, they are, as is all of https://github.com/night-shift/fpconv, available under a MIT License.

As a result, I see a speedup of 900% on Apple Silicon M1 for a float set of benchmarks.

floats don't have a single correct  string representation: a float like `1000.0` can be represented as "1000", "1e3", "1000.0" (and more).

The Grisu algorithm converts floating point numbers to an optimal decimal string representation without loss of precision. As a result, a float that is exactly an integer (like `Float(10)`) will be converted by that algorithm into `"10"`. While technically correct – the JSON format treats floats and integers identically –, this differs from the current behaviour of the `"json"` gem. To address this, the integration checks for that case, and explicitely adds a ".0" suffix in those cases.

This is sufficient to meet all existing tests; there is, however, a chance that the current implementation and this implementation occasionally encode floats differently.

```
== Encoding floats (4179311 bytes)
ruby 3.4.1 (2024-12-25 revision https://github.com/ruby/json/commit/48d4efcb85) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
        json (local)     4.000 i/100ms
Calculating -------------------------------------
        json (local)     46.046 (± 2.2%) i/s   (21.72 ms/i) -    232.000 in   5.039611s

Normalize to 2090234 byte
== Encoding floats (4179242 bytes)
ruby 3.4.1 (2024-12-25 revision https://github.com/ruby/json/commit/48d4efcb85) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
       json (2.10.2)     1.000 i/100ms
Calculating -------------------------------------
       json (2.10.2)      4.614 (± 0.0%) i/s  (216.74 ms/i) -     24.000 in   5.201871s
```

These benchmarks are run via a script ([link](https://gist.github.com/radiospiel/04019402726a28b31616df3d0c17bd1c)) which is based on the gem's `benchmark/encoder.rb` file. There are probably better ways to run benchmarks :) My version allows to combine multiple test cases into a single one.

The `dumps` benchmark, which covers the JSON files in `benchmark/data/*.json` – with the exception of `canada.json` – , reported a minor speedup within statistical uncertainty.

https://github.com/ruby/json/commit/7d77415108
This commit is contained in:
eno 2025-03-16 20:28:46 +01:00 committed by Hiroshi SHIBATA
parent ef7c7f9e77
commit a59333c58b
6 changed files with 516 additions and 3 deletions

3
ext/json/ext/vendor/fpconv/README.md vendored Normal file
View File

@ -0,0 +1,3 @@
The contents of this directory is extracted from https://github.com/night-shift/fpconv
It is licensed under the provisions of the Boost Software License - Version 1.0 - August 17th, 2003. See the ./license file for details.

23
ext/json/ext/vendor/fpconv/license vendored Normal file
View File

@ -0,0 +1,23 @@
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

339
ext/json/ext/vendor/fpconv/src/fpconv.c vendored Normal file
View File

@ -0,0 +1,339 @@
#include <stdbool.h>
#include <string.h>
#include "fpconv.h"
#include "powers.h"
#define fracmask 0x000FFFFFFFFFFFFFU
#define expmask 0x7FF0000000000000U
#define hiddenbit 0x0010000000000000U
#define signmask 0x8000000000000000U
#define expbias (1023 + 52)
#define absv(n) ((n) < 0 ? -(n) : (n))
#define minv(a, b) ((a) < (b) ? (a) : (b))
static uint64_t tens[] = {
10000000000000000000U, 1000000000000000000U, 100000000000000000U,
10000000000000000U, 1000000000000000U, 100000000000000U,
10000000000000U, 1000000000000U, 100000000000U,
10000000000U, 1000000000U, 100000000U,
10000000U, 1000000U, 100000U,
10000U, 1000U, 100U,
10U, 1U
};
static inline uint64_t get_dbits(double d)
{
union {
double dbl;
uint64_t i;
} dbl_bits = { d };
return dbl_bits.i;
}
static Fp build_fp(double d)
{
uint64_t bits = get_dbits(d);
Fp fp;
fp.frac = bits & fracmask;
fp.exp = (bits & expmask) >> 52;
if(fp.exp) {
fp.frac += hiddenbit;
fp.exp -= expbias;
} else {
fp.exp = -expbias + 1;
}
return fp;
}
static void normalize(Fp* fp)
{
while ((fp->frac & hiddenbit) == 0) {
fp->frac <<= 1;
fp->exp--;
}
int shift = 64 - 52 - 1;
fp->frac <<= shift;
fp->exp -= shift;
}
static void get_normalized_boundaries(Fp* fp, Fp* lower, Fp* upper)
{
upper->frac = (fp->frac << 1) + 1;
upper->exp = fp->exp - 1;
while ((upper->frac & (hiddenbit << 1)) == 0) {
upper->frac <<= 1;
upper->exp--;
}
int u_shift = 64 - 52 - 2;
upper->frac <<= u_shift;
upper->exp = upper->exp - u_shift;
int l_shift = fp->frac == hiddenbit ? 2 : 1;
lower->frac = (fp->frac << l_shift) - 1;
lower->exp = fp->exp - l_shift;
lower->frac <<= lower->exp - upper->exp;
lower->exp = upper->exp;
}
static Fp multiply(Fp* a, Fp* b)
{
const uint64_t lomask = 0x00000000FFFFFFFF;
uint64_t ah_bl = (a->frac >> 32) * (b->frac & lomask);
uint64_t al_bh = (a->frac & lomask) * (b->frac >> 32);
uint64_t al_bl = (a->frac & lomask) * (b->frac & lomask);
uint64_t ah_bh = (a->frac >> 32) * (b->frac >> 32);
uint64_t tmp = (ah_bl & lomask) + (al_bh & lomask) + (al_bl >> 32);
/* round up */
tmp += 1U << 31;
Fp fp = {
ah_bh + (ah_bl >> 32) + (al_bh >> 32) + (tmp >> 32),
a->exp + b->exp + 64
};
return fp;
}
static void round_digit(char* digits, int ndigits, uint64_t delta, uint64_t rem, uint64_t kappa, uint64_t frac)
{
while (rem < frac && delta - rem >= kappa &&
(rem + kappa < frac || frac - rem > rem + kappa - frac)) {
digits[ndigits - 1]--;
rem += kappa;
}
}
static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
{
uint64_t wfrac = upper->frac - fp->frac;
uint64_t delta = upper->frac - lower->frac;
Fp one;
one.frac = 1ULL << -upper->exp;
one.exp = upper->exp;
uint64_t part1 = upper->frac >> -one.exp;
uint64_t part2 = upper->frac & (one.frac - 1);
int idx = 0, kappa = 10;
uint64_t* divp;
/* 1000000000 */
for(divp = tens + 10; kappa > 0; divp++) {
uint64_t div = *divp;
unsigned digit = (unsigned) (part1 / div);
if (digit || idx) {
digits[idx++] = digit + '0';
}
part1 -= digit * div;
kappa--;
uint64_t tmp = (part1 <<-one.exp) + part2;
if (tmp <= delta) {
*K += kappa;
round_digit(digits, idx, delta, tmp, div << -one.exp, wfrac);
return idx;
}
}
/* 10 */
uint64_t* unit = tens + 18;
while(true) {
part2 *= 10;
delta *= 10;
kappa--;
unsigned digit = (unsigned) (part2 >> -one.exp);
if (digit || idx) {
digits[idx++] = digit + '0';
}
part2 &= one.frac - 1;
if (part2 < delta) {
*K += kappa;
round_digit(digits, idx, delta, part2, one.frac, wfrac * *unit);
return idx;
}
unit--;
}
}
static int grisu2(double d, char* digits, int* K)
{
Fp w = build_fp(d);
Fp lower, upper;
get_normalized_boundaries(&w, &lower, &upper);
normalize(&w);
int k;
Fp cp = find_cachedpow10(upper.exp, &k);
w = multiply(&w, &cp);
upper = multiply(&upper, &cp);
lower = multiply(&lower, &cp);
lower.frac++;
upper.frac--;
*K = -k;
return generate_digits(&w, &upper, &lower, digits, K);
}
static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
{
int exp = absv(K + ndigits - 1);
int max_trailing_zeros = 7;
if(neg) {
max_trailing_zeros -= 1;
}
/* write plain integer */
if(K >= 0 && (exp < (ndigits + max_trailing_zeros))) {
memcpy(dest, digits, ndigits);
memset(dest + ndigits, '0', K);
return ndigits + K;
}
/* write decimal w/o scientific notation */
if(K < 0 && (K > -7 || exp < 4)) {
int offset = ndigits - absv(K);
/* fp < 1.0 -> write leading zero */
if(offset <= 0) {
offset = -offset;
dest[0] = '0';
dest[1] = '.';
memset(dest + 2, '0', offset);
memcpy(dest + offset + 2, digits, ndigits);
return ndigits + 2 + offset;
/* fp > 1.0 */
} else {
memcpy(dest, digits, offset);
dest[offset] = '.';
memcpy(dest + offset + 1, digits + offset, ndigits - offset);
return ndigits + 1;
}
}
/* write decimal w/ scientific notation */
ndigits = minv(ndigits, 18 - neg);
int idx = 0;
dest[idx++] = digits[0];
if(ndigits > 1) {
dest[idx++] = '.';
memcpy(dest + idx, digits + 1, ndigits - 1);
idx += ndigits - 1;
}
dest[idx++] = 'e';
char sign = K + ndigits - 1 < 0 ? '-' : '+';
dest[idx++] = sign;
int cent = 0;
if(exp > 99) {
cent = exp / 100;
dest[idx++] = cent + '0';
exp -= cent * 100;
}
if(exp > 9) {
int dec = exp / 10;
dest[idx++] = dec + '0';
exp -= dec * 10;
} else if(cent) {
dest[idx++] = '0';
}
dest[idx++] = exp % 10 + '0';
return idx;
}
static int filter_special(double fp, char* dest)
{
if(fp == 0.0) {
dest[0] = '0';
return 1;
}
uint64_t bits = get_dbits(fp);
bool nan = (bits & expmask) == expmask;
if(!nan) {
return 0;
}
if(bits & fracmask) {
dest[0] = 'n'; dest[1] = 'a'; dest[2] = 'n';
} else {
dest[0] = 'i'; dest[1] = 'n'; dest[2] = 'f';
}
return 3;
}
int fpconv_dtoa(double d, char dest[24])
{
char digits[18];
int str_len = 0;
bool neg = false;
if(get_dbits(d) & signmask) {
dest[0] = '-';
str_len++;
neg = true;
}
int spec = filter_special(d, dest + str_len);
if(spec) {
return str_len + spec;
}
int K = 0;
int ndigits = grisu2(d, digits, &K);
str_len += emit_digits(digits, ndigits, dest + str_len, K, neg);
return str_len;
}

33
ext/json/ext/vendor/fpconv/src/fpconv.h vendored Normal file
View File

@ -0,0 +1,33 @@
#ifndef FPCONV_H
#define FPCONV_H
/* Fast and accurate double to string conversion based on Florian Loitsch's
* Grisu-algorithm[1].
*
* Input:
* fp -> the double to convert, dest -> destination buffer.
* The generated string will never be longer than 24 characters.
* Make sure to pass a pointer to at least 24 bytes of memory.
* The emitted string will not be null terminated.
*
* Output:
* The number of written characters.
*
* Exemplary usage:
*
* void print(double d)
* {
* char buf[24 + 1] // plus null terminator
* int str_len = fpconv_dtoa(d, buf);
*
* buf[str_len] = '\0';
* printf("%s", buf);
* }
*
*/
int fpconv_dtoa(double fp, char dest[24]);
#endif
/* [1] http://florian.loitsch.com/publications/dtoa-pldi2010.pdf */

87
ext/json/ext/vendor/fpconv/src/powers.h vendored Normal file
View File

@ -0,0 +1,87 @@
#include <stdint.h>
#define npowers 87
#define steppowers 8
#define firstpower -348 /* 10 ^ -348 */
#define expmax -32
#define expmin -60
typedef struct Fp {
uint64_t frac;
int exp;
} Fp;
static Fp powers_ten[] = {
{ 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 },
{ 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 },
{ 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 },
{ 12353653155963782858U, -1060 }, { 18408377700990114895U, -1034 },
{ 13715310171984221708U, -1007 }, { 10218702384817765436U, -980 },
{ 15227053142812498563U, -954 }, { 11345038669416679861U, -927 },
{ 16905424996341287883U, -901 }, { 12595523146049147757U, -874 },
{ 9384396036005875287U, -847 }, { 13983839803942852151U, -821 },
{ 10418772551374772303U, -794 }, { 15525180923007089351U, -768 },
{ 11567161174868858868U, -741 }, { 17236413322193710309U, -715 },
{ 12842128665889583758U, -688 }, { 9568131466127621947U, -661 },
{ 14257626930069360058U, -635 }, { 10622759856335341974U, -608 },
{ 15829145694278690180U, -582 }, { 11793632577567316726U, -555 },
{ 17573882009934360870U, -529 }, { 13093562431584567480U, -502 },
{ 9755464219737475723U, -475 }, { 14536774485912137811U, -449 },
{ 10830740992659433045U, -422 }, { 16139061738043178685U, -396 },
{ 12024538023802026127U, -369 }, { 17917957937422433684U, -343 },
{ 13349918974505688015U, -316 }, { 9946464728195732843U, -289 },
{ 14821387422376473014U, -263 }, { 11042794154864902060U, -236 },
{ 16455045573212060422U, -210 }, { 12259964326927110867U, -183 },
{ 18268770466636286478U, -157 }, { 13611294676837538539U, -130 },
{ 10141204801825835212U, -103 }, { 15111572745182864684U, -77 },
{ 11258999068426240000U, -50 }, { 16777216000000000000U, -24 },
{ 12500000000000000000U, 3 }, { 9313225746154785156U, 30 },
{ 13877787807814456755U, 56 }, { 10339757656912845936U, 83 },
{ 15407439555097886824U, 109 }, { 11479437019748901445U, 136 },
{ 17105694144590052135U, 162 }, { 12744735289059618216U, 189 },
{ 9495567745759798747U, 216 }, { 14149498560666738074U, 242 },
{ 10542197943230523224U, 269 }, { 15709099088952724970U, 295 },
{ 11704190886730495818U, 322 }, { 17440603504673385349U, 348 },
{ 12994262207056124023U, 375 }, { 9681479787123295682U, 402 },
{ 14426529090290212157U, 428 }, { 10748601772107342003U, 455 },
{ 16016664761464807395U, 481 }, { 11933345169920330789U, 508 },
{ 17782069995880619868U, 534 }, { 13248674568444952270U, 561 },
{ 9871031767461413346U, 588 }, { 14708983551653345445U, 614 },
{ 10959046745042015199U, 641 }, { 16330252207878254650U, 667 },
{ 12166986024289022870U, 694 }, { 18130221999122236476U, 720 },
{ 13508068024458167312U, 747 }, { 10064294952495520794U, 774 },
{ 14996968138956309548U, 800 }, { 11173611982879273257U, 827 },
{ 16649979327439178909U, 853 }, { 12405201291620119593U, 880 },
{ 9242595204427927429U, 907 }, { 13772540099066387757U, 933 },
{ 10261342003245940623U, 960 }, { 15290591125556738113U, 986 },
{ 11392378155556871081U, 1013 }, { 16975966327722178521U, 1039 },
{ 12648080533535911531U, 1066 }
};
static Fp find_cachedpow10(int exp, int* k)
{
const double one_log_ten = 0.30102999566398114;
int approx = -(exp + npowers) * one_log_ten;
int idx = (approx - firstpower) / steppowers;
while(1) {
int current = exp + powers_ten[idx].exp + 64;
if(current < expmin) {
idx++;
continue;
}
if(current > expmax) {
idx--;
continue;
}
*k = (firstpower + idx * steppowers);
return powers_ten[idx];
}
}

View File

@ -1050,12 +1050,15 @@ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *da
}
#endif
#include "../vendor/fpconv/src/fpconv.c"
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
double value = RFLOAT_VALUE(obj);
char allow_nan = state->allow_nan;
if (!allow_nan) {
if (isinf(value) || isnan(value)) {
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
if (!allow_nan) {
if (state->strict && state->as_json) {
VALUE casted_obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
if (casted_obj != obj) {
@ -1067,8 +1070,33 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
}
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
}
VALUE tmp = rb_funcall(obj, i_to_s, 0);
fbuffer_append_str(buffer, tmp);
return;
}
fbuffer_append_str(buffer, rb_funcall(obj, i_to_s, 0));
/* This implementation writes directly into the buffer. We reserve
* the 24 characters that fpconv_dtoa states as its maximum, plus
* 2 more characters for the potential ".0" suffix.
*/
fbuffer_inc_capa(buffer, 26);
char* d = buffer->ptr + buffer->len;
int len = fpconv_dtoa(value, d);
/* fpconv_dtoa converts a float to its shorted string representation. When
* converting a float that is exactly an integer (e.g. `Float(2)`) this
* returns in a string that looks like an integer. This is correct, since
* JSON treats ints and floats the same. However, to not break integrations
* that expect a string representation looking like a float, we append a
* "." in that case.
*/
if(!memchr(d, '.', len) && !memchr(d, 'e', len)) {
d[len] = '.';
d[len+1] = '0';
len += 2;
}
buffer->len += len;
}
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)