[ruby/prism] added CP950 encoding

https://github.com/ruby/prism/commit/9c2d1cf4ba
This commit is contained in:
Dhaval 2023-11-29 05:55:29 +05:30 committed by Kevin Newton
parent 57cb47bfe2
commit 9fada99cb2
5 changed files with 61 additions and 0 deletions

View File

@ -88,6 +88,7 @@ Gem::Specification.new do |spec|
"src/enc/pm_big5.c",
"src/enc/pm_cp51932.c",
"src/enc/pm_cp949.c",
"src/enc/pm_cp950.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
"src/enc/pm_shift_jis.c",

57
prism/enc/pm_cp950.c Normal file
View File

@ -0,0 +1,57 @@
#include "prism/enc/pm_encoding.h"
static size_t
pm_encoding_cp950_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters
if (*b < 0x80) {
return 1;
}
// These are the double byte characters
if (
(n > 1) &&
((b[0] >= 0x81 && b[0] <= 0xFE) &&
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE)))
) {
return 2;
}
return 0;
}
static size_t
pm_encoding_cp950_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp950_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
pm_encoding_cp950_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp950_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
pm_encoding_cp950_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp950_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return 0;
}
}
/** cp950 encoding */
pm_encoding_t pm_encoding_cp950 = {
.name = "cp950",
.char_width = pm_encoding_cp950_char_width,
.alnum_char = pm_encoding_cp950_alnum_char,
.alpha_char = pm_encoding_cp950_alpha_char,
.isupper_char = pm_encoding_cp950_isupper_char,
.multibyte = true
};

View File

@ -165,6 +165,7 @@ extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;
extern pm_encoding_t pm_encoding_cp855;
extern pm_encoding_t pm_encoding_cp949;
extern pm_encoding_t pm_encoding_cp950;
extern pm_encoding_t pm_encoding_euc_jp;
extern pm_encoding_t pm_encoding_gb1988;
extern pm_encoding_t pm_encoding_gbk;

View File

@ -6233,6 +6233,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
ENCODING1("CP936", pm_encoding_gbk);
ENCODING1("CP949", pm_encoding_cp949);
ENCODING1("CP950", pm_encoding_cp950);
ENCODING1("CP1250", pm_encoding_windows_1250);
ENCODING1("CP1251", pm_encoding_windows_1251);
ENCODING1("CP1252", pm_encoding_windows_1252);

View File

@ -69,6 +69,7 @@ module Prism
Encoding::Big5_HKSCS => 0x00...0x10000,
Encoding::Big5_UAO => 0x00...0x10000,
Encoding::CP949 => 0x00...0x10000,
Encoding::CP950 => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,