char is not unsigned

It seems that decades ago, ruby was written under assumption that
char is unsigned.  Which is of course a false assumption.  We
need to explicitly store a numeric value into an unsigned char
variable to tell we expect 0..255 value.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65900 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
shyouhei 2018-11-21 08:51:39 +00:00
parent f1ed4b713b
commit 953091a4b1
2 changed files with 34 additions and 33 deletions

9
re.c
View File

@ -2539,7 +2539,7 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
VALUE buf, rb_encoding **encp, int *has_property, VALUE buf, rb_encoding **encp, int *has_property,
onig_errmsg_buffer err) onig_errmsg_buffer err)
{ {
char c; unsigned char c;
char smallbuf[2]; char smallbuf[2];
while (p < end) { while (p < end) {
@ -2602,8 +2602,9 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
p = p-2; p = p-2;
if (enc == rb_usascii_encoding()) { if (enc == rb_usascii_encoding()) {
const char *pbeg = p; const char *pbeg = p;
c = read_escaped_byte(&p, end, err); int byte = read_escaped_byte(&p, end, err);
if (c == (char)-1) return -1; if (byte == -1) return -1;
c = byte;
rb_str_buf_cat(buf, pbeg, p-pbeg); rb_str_buf_cat(buf, pbeg, p-pbeg);
} }
else { else {
@ -2652,7 +2653,7 @@ escape_asis:
break; break;
default: default:
rb_str_buf_cat(buf, &c, 1); rb_str_buf_cat(buf, (char *)&c, 1);
break; break;
} }
} }

View File

@ -6190,7 +6190,7 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en
unsigned int c; unsigned int c;
int codelen; int codelen;
size_t hexlen; size_t hexlen;
char buf[6]; unsigned char buf[6];
static rb_encoding *enc_utf8 = NULL; static rb_encoding *enc_utf8 = NULL;
switch (*s) { switch (*s) {
@ -6208,8 +6208,8 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en
case 'b': case 'b':
case 'a': case 'a':
case 'e': case 'e':
*buf = (char)unescape_ascii(*s); *buf = unescape_ascii(*s);
rb_str_cat(undumped, buf, 1); rb_str_cat(undumped, (char *)buf, 1);
s++; s++;
break; break;
case 'u': case 'u':
@ -6249,8 +6249,8 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en
if (0xd800 <= c && c <= 0xdfff) { if (0xd800 <= c && c <= 0xdfff) {
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
} }
codelen = rb_enc_mbcput(c, buf, *penc); codelen = rb_enc_mbcput(c, (char *)buf, *penc);
rb_str_cat(undumped, buf, codelen); rb_str_cat(undumped, (char *)buf, codelen);
s += hexlen; s += hexlen;
} }
} }
@ -6262,8 +6262,8 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en
if (0xd800 <= c && c <= 0xdfff) { if (0xd800 <= c && c <= 0xdfff) {
rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
} }
codelen = rb_enc_mbcput(c, buf, *penc); codelen = rb_enc_mbcput(c, (char *)buf, *penc);
rb_str_cat(undumped, buf, codelen); rb_str_cat(undumped, (char *)buf, codelen);
s += hexlen; s += hexlen;
} }
break; break;
@ -6279,7 +6279,7 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en
if (hexlen != 2) { if (hexlen != 2) {
rb_raise(rb_eRuntimeError, "invalid hex escape"); rb_raise(rb_eRuntimeError, "invalid hex escape");
} }
rb_str_cat(undumped, buf, 1); rb_str_cat(undumped, (char *)buf, 1);
s += hexlen; s += hexlen;
break; break;
default: default:
@ -6915,7 +6915,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
int cflag = 0; int cflag = 0;
unsigned int c, c0, last = 0; unsigned int c, c0, last = 0;
int modify = 0, i, l; int modify = 0, i, l;
char *s, *send; unsigned char *s, *send;
VALUE hash = 0; VALUE hash = 0;
int singlebyte = single_byte_optimizable(str); int singlebyte = single_byte_optimizable(str);
int termlen; int termlen;
@ -6999,18 +6999,18 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
if (cr == ENC_CODERANGE_VALID && rb_enc_asciicompat(e1)) if (cr == ENC_CODERANGE_VALID && rb_enc_asciicompat(e1))
cr = ENC_CODERANGE_7BIT; cr = ENC_CODERANGE_7BIT;
str_modify_keep_cr(str); str_modify_keep_cr(str);
s = RSTRING_PTR(str); send = RSTRING_END(str); s = (unsigned char *)RSTRING_PTR(str); send = (unsigned char *)RSTRING_END(str);
termlen = rb_enc_mbminlen(enc); termlen = rb_enc_mbminlen(enc);
if (sflag) { if (sflag) {
int clen, tlen; int clen, tlen;
long offset, max = RSTRING_LEN(str); long offset, max = RSTRING_LEN(str);
unsigned int save = -1; unsigned int save = -1;
char *buf = ALLOC_N(char, max + termlen), *t = buf; unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf;
while (s < send) { while (s < send) {
int may_modify = 0; int may_modify = 0;
c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1);
tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
s += clen; s += clen;
@ -7046,7 +7046,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
if ((offset = t - buf) + tlen > max) { if ((offset = t - buf) + tlen > max) {
size_t MAYBE_UNUSED(old) = max + termlen; size_t MAYBE_UNUSED(old) = max + termlen;
max = offset + tlen + (send - s); max = offset + tlen + (send - s);
SIZED_REALLOC_N(buf, char, max + termlen, old); SIZED_REALLOC_N(buf, unsigned char, max + termlen, old);
t = buf + offset; t = buf + offset;
} }
rb_enc_mbcput(c, t, enc); rb_enc_mbcput(c, t, enc);
@ -7059,8 +7059,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
if (!STR_EMBED_P(str)) { if (!STR_EMBED_P(str)) {
ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
} }
TERM_FILL(t, termlen); TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = buf; RSTRING(str)->as.heap.ptr = (char *)buf;
RSTRING(str)->as.heap.len = t - buf; RSTRING(str)->as.heap.len = t - buf;
STR_SET_NOEMBED(str); STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max; RSTRING(str)->as.heap.aux.capa = max;
@ -7086,11 +7086,11 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
else { else {
int clen, tlen; int clen, tlen;
long offset, max = (long)((send - s) * 1.2); long offset, max = (long)((send - s) * 1.2);
char *buf = ALLOC_N(char, max + termlen), *t = buf; unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf;
while (s < send) { while (s < send) {
int may_modify = 0; int may_modify = 0;
c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1);
tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
if (c < 256) { if (c < 256) {
@ -7119,7 +7119,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
if ((offset = t - buf) + tlen > max) { if ((offset = t - buf) + tlen > max) {
size_t MAYBE_UNUSED(old) = max + termlen; size_t MAYBE_UNUSED(old) = max + termlen;
max = offset + tlen + (long)((send - s) * 1.2); max = offset + tlen + (long)((send - s) * 1.2);
SIZED_REALLOC_N(buf, char, max + termlen, old); SIZED_REALLOC_N(buf, unsigned char, max + termlen, old);
t = buf + offset; t = buf + offset;
} }
if (s != t) { if (s != t) {
@ -7135,8 +7135,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
if (!STR_EMBED_P(str)) { if (!STR_EMBED_P(str)) {
ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
} }
TERM_FILL(t, termlen); TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = buf; RSTRING(str)->as.heap.ptr = (char *)buf;
RSTRING(str)->as.heap.len = t - buf; RSTRING(str)->as.heap.len = t - buf;
STR_SET_NOEMBED(str); STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max; RSTRING(str)->as.heap.aux.capa = max;
@ -7405,7 +7405,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
char squeez[TR_TABLE_SIZE]; char squeez[TR_TABLE_SIZE];
rb_encoding *enc = 0; rb_encoding *enc = 0;
VALUE del = 0, nodel = 0; VALUE del = 0, nodel = 0;
char *s, *send, *t; unsigned char *s, *send, *t;
int i, modify = 0; int i, modify = 0;
int ascompat, singlebyte = single_byte_optimizable(str); int ascompat, singlebyte = single_byte_optimizable(str);
unsigned int save; unsigned int save;
@ -7426,15 +7426,15 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
} }
str_modify_keep_cr(str); str_modify_keep_cr(str);
s = t = RSTRING_PTR(str); s = t = (unsigned char *)RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil; if (!s || RSTRING_LEN(str) == 0) return Qnil;
send = RSTRING_END(str); send = (unsigned char *)RSTRING_END(str);
save = -1; save = -1;
ascompat = rb_enc_asciicompat(enc); ascompat = rb_enc_asciicompat(enc);
if (singlebyte) { if (singlebyte) {
while (s < send) { while (s < send) {
unsigned int c = *(unsigned char*)s++; unsigned int c = *s++;
if (c != save || (argc > 0 && !squeez[c])) { if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c; *t++ = save = c;
} }
@ -7445,14 +7445,14 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
unsigned int c; unsigned int c;
int clen; int clen;
if (ascompat && (c = *(unsigned char*)s) < 0x80) { if (ascompat && (c = *s) < 0x80) {
if (c != save || (argc > 0 && !squeez[c])) { if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c; *t++ = save = c;
} }
s++; s++;
} }
else { else {
c = rb_enc_codepoint_len(s, send, &clen, enc); c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc);
if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
if (t != s) rb_enc_mbcput(c, t, enc); if (t != s) rb_enc_mbcput(c, t, enc);
@ -7464,9 +7464,9 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
} }
} }
TERM_FILL(t, TERM_LEN(str)); TERM_FILL((char *)t, TERM_LEN(str));
if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) { if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
STR_SET_LEN(str, t - RSTRING_PTR(str)); STR_SET_LEN(str, (char *)t - RSTRING_PTR(str));
modify = 1; modify = 1;
} }