Move Time#xmlschema in core and optimize it

[Feature #20707]

Converting Time into RFC3339 / ISO8601 representation is an significant
hotspot for applications that serialize data in JSON, XML or other formats.

By moving it into core we can optimize it much further than what `strftime` will
allow.

```
compare-ruby: ruby 3.4.0dev (2024-08-29T13:11:40Z master 6b08a50a62) +YJIT [arm64-darwin23]
built-ruby: ruby 3.4.0dev (2024-08-30T13:17:32Z native-xmlschema 34041ff71f) +YJIT [arm64-darwin23]
warming up......

|                        |compare-ruby|built-ruby|
|:-----------------------|-----------:|---------:|
|time.xmlschema          |      1.087M|    5.190M|
|                        |           -|     4.78x|
|utc_time.xmlschema      |      1.464M|    6.848M|
|                        |           -|     4.68x|
|time.xmlschema(6)       |    859.960k|    4.646M|
|                        |           -|     5.40x|
|utc_time.xmlschema(6)   |      1.080M|    5.917M|
|                        |           -|     5.48x|
|time.xmlschema(9)       |    893.909k|    4.668M|
|                        |           -|     5.22x|
|utc_time.xmlschema(9)   |      1.056M|    5.707M|
|                        |           -|     5.40x|
```
This commit is contained in:
Jean Boussier 2024-08-30 13:04:11 +02:00
parent d4de8aef37
commit 57e3fc32ea
Notes: git 2024-09-05 17:23:32 +00:00
9 changed files with 207 additions and 3 deletions

View File

@ -0,0 +1,23 @@
prelude: |
# frozen_string_literal
unless Time.method_defined?(:xmlschema)
class Time
def xmlschema(fraction_digits=0)
fraction_digits = fraction_digits.to_i
s = strftime("%FT%T")
if fraction_digits > 0
s << strftime(".%#{fraction_digits}N")
end
s << (utc? ? 'Z' : strftime("%:z"))
end
end
end
time = Time.now
utc_time = Time.now.utc
benchmark:
- time.xmlschema
- utc_time.xmlschema
- time.xmlschema(6)
- utc_time.xmlschema(6)
- time.xmlschema(9)
- utc_time.xmlschema(9)

View File

@ -0,0 +1,6 @@
require_relative '../../spec_helper'
require_relative 'shared/xmlschema'
describe "Time#iso8601" do
it_behaves_like :time_xmlschema, :iso8601
end

View File

@ -0,0 +1,31 @@
describe :time_xmlschema, shared: true do
ruby_version_is "3.4" do
it "generates ISO-8601 strings in Z for UTC times" do
t = Time.utc(1985, 4, 12, 23, 20, 50, 521245)
t.send(@method).should == "1985-04-12T23:20:50Z"
t.send(@method, 2).should == "1985-04-12T23:20:50.52Z"
t.send(@method, 9).should == "1985-04-12T23:20:50.521245000Z"
end
it "generates ISO-8601 string with timeone offset for non-UTC times" do
t = Time.new(1985, 4, 12, 23, 20, 50, "+02:00")
t.send(@method).should == "1985-04-12T23:20:50+02:00"
t.send(@method, 2).should == "1985-04-12T23:20:50.00+02:00"
end
it "year is always at least 4 digits" do
t = Time.utc(12, 4, 12)
t.send(@method).should == "0012-04-12T00:00:00Z"
end
it "year can be more than 4 digits" do
t = Time.utc(40_000, 4, 12)
t.send(@method).should == "40000-04-12T00:00:00Z"
end
it "year can be negative" do
t = Time.utc(-2000, 4, 12)
t.send(@method).should == "-2000-04-12T00:00:00Z"
end
end
end

View File

@ -0,0 +1,6 @@
require_relative '../../spec_helper'
require_relative 'shared/xmlschema'
describe "Time#xmlschema" do
it_behaves_like :time_xmlschema, :xmlschema
end

View File

@ -3,5 +3,5 @@ require_relative 'shared/xmlschema'
require 'time' require 'time'
describe "Time.xmlschema" do describe "Time.xmlschema" do
it_behaves_like :time_xmlschema, :iso8601 it_behaves_like :time_library_xmlschema, :iso8601
end end

View File

@ -1,4 +1,4 @@
describe :time_xmlschema, shared: true do describe :time_library_xmlschema, shared: true do
it "parses ISO-8601 strings" do it "parses ISO-8601 strings" do
t = Time.utc(1985, 4, 12, 23, 20, 50, 520000) t = Time.utc(1985, 4, 12, 23, 20, 50, 520000)
s = "1985-04-12T23:20:50.52Z" s = "1985-04-12T23:20:50.52Z"

View File

@ -3,5 +3,5 @@ require_relative 'shared/xmlschema'
require 'time' require 'time'
describe "Time.xmlschema" do describe "Time.xmlschema" do
it_behaves_like :time_xmlschema, :xmlschema it_behaves_like :time_library_xmlschema, :xmlschema
end end

View File

@ -1444,4 +1444,60 @@ class TestTime < Test::Unit::TestCase
def test_parse_zero_bigint def test_parse_zero_bigint
assert_equal 0, Time.new("2020-10-28T16:48:07.000Z").nsec, '[Bug #19390]' assert_equal 0, Time.new("2020-10-28T16:48:07.000Z").nsec, '[Bug #19390]'
end end
def test_xmlschema_encode
[:xmlschema, :iso8601].each do |method|
bug6100 = '[ruby-core:42997]'
t = Time.utc(2001, 4, 17, 19, 23, 17, 300000)
assert_equal("2001-04-17T19:23:17Z", t.__send__(method))
assert_equal("2001-04-17T19:23:17.3Z", t.__send__(method, 1))
assert_equal("2001-04-17T19:23:17.300000Z", t.__send__(method, 6))
assert_equal("2001-04-17T19:23:17.3000000Z", t.__send__(method, 7))
assert_equal("2001-04-17T19:23:17.3Z", t.__send__(method, 1.9), bug6100)
t = Time.utc(2001, 4, 17, 19, 23, 17, 123456)
assert_equal("2001-04-17T19:23:17.1234560Z", t.__send__(method, 7))
assert_equal("2001-04-17T19:23:17.123456Z", t.__send__(method, 6))
assert_equal("2001-04-17T19:23:17.12345Z", t.__send__(method, 5))
assert_equal("2001-04-17T19:23:17.1Z", t.__send__(method, 1))
assert_equal("2001-04-17T19:23:17.1Z", t.__send__(method, 1.9), bug6100)
t = Time.at(2.quo(3)).getlocal("+09:00")
assert_equal("1970-01-01T09:00:00.666+09:00", t.__send__(method, 3))
assert_equal("1970-01-01T09:00:00.6666666666+09:00", t.__send__(method, 10))
assert_equal("1970-01-01T09:00:00.66666666666666666666+09:00", t.__send__(method, 20))
assert_equal("1970-01-01T09:00:00.6+09:00", t.__send__(method, 1.1), bug6100)
assert_equal("1970-01-01T09:00:00.666+09:00", t.__send__(method, 3.2), bug6100)
t = Time.at(123456789.quo(9999999999)).getlocal("+09:00")
assert_equal("1970-01-01T09:00:00.012+09:00", t.__send__(method, 3))
assert_equal("1970-01-01T09:00:00.012345678+09:00", t.__send__(method, 9))
assert_equal("1970-01-01T09:00:00.0123456789+09:00", t.__send__(method, 10))
assert_equal("1970-01-01T09:00:00.0123456789012345678+09:00", t.__send__(method, 19))
assert_equal("1970-01-01T09:00:00.01234567890123456789+09:00", t.__send__(method, 20))
assert_equal("1970-01-01T09:00:00.012+09:00", t.__send__(method, 3.8), bug6100)
t = Time.utc(1)
assert_equal("0001-01-01T00:00:00Z", t.__send__(method))
begin
Time.at(-1)
rescue ArgumentError
# ignore
else
t = Time.utc(1960, 12, 31, 23, 0, 0, 123456)
assert_equal("1960-12-31T23:00:00.123456Z", t.__send__(method, 6))
end
assert_equal("10000-01-01T00:00:00Z", Time.utc(10000).__send__(method))
assert_equal("9999-01-01T00:00:00Z", Time.utc(9999).__send__(method))
assert_equal("0001-01-01T00:00:00Z", Time.utc(1).__send__(method)) # 1 AD
assert_equal("0000-01-01T00:00:00Z", Time.utc(0).__send__(method)) # 1 BC
assert_equal("-0001-01-01T00:00:00Z", Time.utc(-1).__send__(method)) # 2 BC
assert_equal("-0004-01-01T00:00:00Z", Time.utc(-4).__send__(method)) # 5 BC
assert_equal("-9999-01-01T00:00:00Z", Time.utc(-9999).__send__(method))
assert_equal("-10000-01-01T00:00:00Z", Time.utc(-10000).__send__(method))
end
end
end end

82
time.c
View File

@ -5215,6 +5215,86 @@ time_strftime(VALUE time, VALUE format)
} }
} }
static VALUE
time_xmlschema(int argc, VALUE *argv, VALUE time)
{
long fraction_digits = 0;
rb_check_arity(argc, 0, 1);
if (argc > 0) {
fraction_digits = NUM2LONG(argv[0]);
if (fraction_digits < 0) {
fraction_digits = 0;
}
}
struct time_object *tobj;
GetTimeval(time, tobj);
MAKE_TM(time, tobj);
long year = -1;
if (FIXNUM_P(tobj->vtm.year)) {
year = FIX2LONG(tobj->vtm.year);
}
if (RB_UNLIKELY(year > 9999 || year < 0 || fraction_digits > 9)) {
// Slow path for uncommon dates.
VALUE format = rb_utf8_str_new_cstr("%FT%T");
if (fraction_digits > 0) {
rb_str_catf(format, ".%%#%ldN", fraction_digits);
}
rb_str_cat_cstr(format, TZMODE_UTC_P(tobj) ? "Z" : "%:z");
return rb_funcallv(time, rb_intern("strftime"), 1, &format);
}
long buf_size = sizeof("YYYY-MM-DDTHH:MM:SS+ZH:ZM") + fraction_digits + (fraction_digits > 0 ? 1 : 0);
VALUE str = rb_str_buf_new(buf_size);
rb_enc_associate_index(str, rb_utf8_encindex());
char *ptr = RSTRING_PTR(str);
char *start = ptr;
int written = snprintf(
ptr,
sizeof("YYYY-MM-DDTHH:MM:SS"),
"%04ld-%02d-%02dT%02d:%02d:%02d",
year,
tobj->vtm.mon,
tobj->vtm.mday,
tobj->vtm.hour,
tobj->vtm.min,
tobj->vtm.sec
);
RUBY_ASSERT(written == sizeof("YYYY-MM-DDTHH:MM:SS") - 1);
ptr += written;
if (fraction_digits > 0) {
long nsec = NUM2LONG(mulquov(tobj->vtm.subsecx, INT2FIX(1000000000), INT2FIX(TIME_SCALE)));
long subsec = nsec / (long)pow(10, 9 - fraction_digits);
*ptr = '.';
ptr++;
written = snprintf(ptr, fraction_digits + 1, "%0*ld", (int)fraction_digits, subsec); // Always allow to write \0
RUBY_ASSERT(written > 0);
ptr += written;
}
if (TZMODE_UTC_P(tobj)) {
*ptr = 'Z';
ptr++;
}
else {
long offset = NUM2LONG(rb_time_utc_offset(time));
int offset_hours = (int)(offset / 3600);
int offset_minutes = (int)((offset % 3600 / 60));
written = snprintf(ptr, sizeof("+ZH:ZM"), "%+03d:%02d", offset_hours, offset_minutes);
RUBY_ASSERT(written == sizeof("+ZH:ZM") - 1);
ptr += written;
}
rb_str_set_len(str, ptr -start); // We could skip coderange scanning as we know it's full ASCII.
return str;
}
int ruby_marshal_write_long(long x, char *buf); int ruby_marshal_write_long(long x, char *buf);
enum {base_dump_size = 8}; enum {base_dump_size = 8};
@ -5842,6 +5922,8 @@ Init_Time(void)
rb_define_method(rb_cTime, "subsec", time_subsec, 0); rb_define_method(rb_cTime, "subsec", time_subsec, 0);
rb_define_method(rb_cTime, "strftime", time_strftime, 1); rb_define_method(rb_cTime, "strftime", time_strftime, 1);
rb_define_method(rb_cTime, "xmlschema", time_xmlschema, -1);
rb_define_alias(rb_cTime, "iso8601", "xmlschema");
/* methods for marshaling */ /* methods for marshaling */
rb_define_private_method(rb_cTime, "_dump", time_dump, -1); rb_define_private_method(rb_cTime, "_dump", time_dump, -1);