Rename the big5-hkscs stuff to something more generic and add UAO sharing common code.

Merge the Big5 extensions into pm_big5.c
This commit is contained in:
Ryan Garver 2023-11-18 14:49:48 -08:00 committed by Kevin Newton
parent a4003bb8dc
commit e16ff17374
6 changed files with 66 additions and 55 deletions

View File

@ -85,7 +85,6 @@ Gem::Specification.new do |spec|
"lib/prism/visitor.rb",
"src/diagnostic.c",
"src/enc/pm_big5.c",
"src/enc/pm_big5_hkscs.c",
"src/enc/pm_cp51932.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",

View File

@ -15,6 +15,22 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
return 0;
}
static size_t
pm_encoding_big5_star_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
return 1;
}
// These are the double byte characters.
if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
return 2;
}
return 0;
}
static size_t
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
@ -24,6 +40,15 @@ pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
}
}
static size_t
pm_encoding_big5_star_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_star_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
@ -33,6 +58,15 @@ pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
}
}
static size_t
pm_encoding_big5_star_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_star_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
@ -42,6 +76,15 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
}
}
static bool
pm_encoding_big5_star_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_star_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return false;
}
}
/** Big5 encoding */
pm_encoding_t pm_encoding_big5 = {
.name = "big5",
@ -51,3 +94,23 @@ pm_encoding_t pm_encoding_big5 = {
.isupper_char = pm_encoding_big5_isupper_char,
.multibyte = true
};
/** Big5-HKSCS encoding */
pm_encoding_t pm_encoding_big5_hkscs = {
.name = "big5-hkscs",
.char_width = pm_encoding_big5_star_char_width,
.alnum_char = pm_encoding_big5_star_alnum_char,
.alpha_char = pm_encoding_big5_star_alpha_char,
.isupper_char = pm_encoding_big5_star_isupper_char,
.multibyte = true
};
/** Big5-UAO encoding */
pm_encoding_t pm_encoding_big5_uao = {
.name = "big5-uao",
.char_width = pm_encoding_big5_star_char_width,
.alnum_char = pm_encoding_big5_star_alnum_char,
.alpha_char = pm_encoding_big5_star_alpha_char,
.isupper_char = pm_encoding_big5_star_isupper_char,
.multibyte = true
};

View File

@ -1,54 +0,0 @@
#include "prism/enc/pm_encoding.h"
static size_t
pm_encoding_big5_hkscs_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
return 1;
}
// These are the double byte characters.
if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
return 2;
}
return 0;
}
static size_t
pm_encoding_big5_hkscs_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
pm_encoding_big5_hkscs_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
pm_encoding_big5_hkscs_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return false;
}
}
/** Big5 encoding */
pm_encoding_t pm_encoding_big5_hkscs = {
.name = "big5-hkscs",
.char_width = pm_encoding_big5_hkscs_char_width,
.alnum_char = pm_encoding_big5_hkscs_alnum_char,
.alpha_char = pm_encoding_big5_hkscs_alpha_char,
.isupper_char = pm_encoding_big5_hkscs_isupper_char,
.multibyte = true
};

View File

@ -159,6 +159,7 @@ extern pm_encoding_t pm_encoding_ascii;
extern pm_encoding_t pm_encoding_ascii_8bit;
extern pm_encoding_t pm_encoding_big5;
extern pm_encoding_t pm_encoding_big5_hkscs;
extern pm_encoding_t pm_encoding_big5_uao;
extern pm_encoding_t pm_encoding_cp51932;
extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;

View File

@ -6091,6 +6091,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("BINARY", pm_encoding_ascii_8bit);
ENCODING1("Big5", pm_encoding_big5);
ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
ENCODING1("Big5-UAO", pm_encoding_big5_uao);
break;
case 'C': case 'c':
ENCODING1("CP437", pm_encoding_ibm437);

View File

@ -65,6 +65,7 @@ module Prism
Encoding::Windows_874 => 0x00...0x100,
Encoding::Big5 => 0x00...0x10000,
Encoding::Big5_HKSCS => 0x00...0x10000,
Encoding::Big5_UAO => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,