ruby/internal/encoding.h
Jean Boussier fae86a701e string.c: Directly create strings with the correct encoding
While profiling msgpack-ruby I noticed a very substantial amout of time
spent in `rb_enc_associate_index`, called by `rb_utf8_str_new`.

On that benchmark, `rb_utf8_str_new` is 33% of the total runtime,
in big part because it cause GC to trigger often, but even then
`5.3%` of the total runtime is spent in `rb_enc_associate_index`
called by `rb_utf8_str_new`.

After closer inspection, it appears that it's performing a lot of
safety check we can assert we don't need, and other extra useless
operations, because strings are first created and filled as ASCII-8BIT
and then later reassociated to the desired encoding.

By directly allocating the string with the right encoding, it allow
to skip a lot of duplicated and useless operations.

After this change, the time spent in `rb_utf8_str_new` is down
to `28.4%` of total runtime, and most of that is GC.
2024-11-13 13:32:32 +01:00

39 lines
1.5 KiB
C

#ifndef INTERNAL_ENCODING_H /*-*-C-*-vi:se ft=c:*/
#define INTERNAL_ENCODING_H
/**
* @author Ruby developers <ruby-core@ruby-lang.org>
* @copyright This file is a part of the programming language Ruby.
* Permission is hereby granted, to either redistribute and/or
* modify this file, provided that the conditions mentioned in the
* file COPYING are met. Consult the file for details.
* @brief Internal header for Encoding.
*/
#include "ruby/ruby.h" /* for ID */
#include "ruby/encoding.h" /* for rb_encoding */
#define rb_enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
#define rb_is_usascii_enc(enc) ((enc) == rb_usascii_encoding())
#define rb_is_ascii8bit_enc(enc) ((enc) == rb_ascii8bit_encoding())
#define rb_is_locale_enc(enc) ((enc) == rb_locale_encoding())
/* encoding.c */
ID rb_id_encoding(void);
const char * rb_enc_inspect_name(rb_encoding *enc);
rb_encoding *rb_enc_get_from_index(int index);
rb_encoding *rb_enc_check_str(VALUE str1, VALUE str2);
int rb_encdb_replicate(const char *alias, const char *orig);
int rb_encdb_alias(const char *alias, const char *orig);
int rb_enc_autoload(rb_encoding *enc);
int rb_encdb_dummy(const char *name);
void rb_encdb_declare(const char *name);
void rb_enc_set_base(const char *name, const char *orig);
int rb_enc_set_dummy(int index);
void rb_enc_raw_set(VALUE obj, rb_encoding *enc);
PUREFUNC(int rb_data_is_encoding(VALUE obj));
/* vm.c */
void rb_free_global_enc_table(void);
#endif /* INTERNAL_ENCODING_H */