From 5802768b40e50cabfbfe0a198a44e01cbad9faae Mon Sep 17 00:00:00 2001 From: akr Date: Tue, 11 Dec 2007 03:08:50 +0000 Subject: [PATCH] * encoding.c (rb_enc_get_ascii): add an argument to provide the length of the returned character. * include/ruby/encoding.h (rb_enc_get_ascii): add the argument. * re.c (rb_reg_expr_str): modify rb_enc_get_ascii call. (rb_reg_quote): ditto. (rb_reg_regsub): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14190 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 11 ++++ encoding.c | 14 +++-- include/ruby/encoding.h | 8 +-- re.c | 126 +++++++++++++++++++++------------------- 4 files changed, 90 insertions(+), 69 deletions(-) diff --git a/ChangeLog b/ChangeLog index add968a321..5f8451f9ba 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Tue Dec 11 12:05:51 2007 Tanaka Akira + + * encoding.c (rb_enc_get_ascii): add an argument to provide the + length of the returned character. + + * include/ruby/encoding.h (rb_enc_get_ascii): add the argument. + + * re.c (rb_reg_expr_str): modify rb_enc_get_ascii call. + (rb_reg_quote): ditto. + (rb_reg_regsub): ditto. + Tue Dec 11 09:40:21 2007 Tanaka Akira * include/ruby/oniguruma.h (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): diff --git a/encoding.c b/encoding.c index 21b56f2636..5949b17bfd 100644 --- a/encoding.c +++ b/encoding.c @@ -505,22 +505,26 @@ rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc) return n; } -int rb_enc_get_ascii(const char *p, const char *e, rb_encoding *enc) +int rb_enc_get_ascii(const char *p, const char *e, int *len, rb_encoding *enc) { int c, l; if (e <= p) return -1; if (rb_enc_asciicompat(enc)) { c = (unsigned char)*p; - return ISASCII(c) ? c : -1; + if (!ISASCII(c)) + return -1; + if (len) *len = 1; + return c; } l = rb_enc_precise_mbclen(p, e, enc); if (!MBCLEN_CHARFOUND(l)) return -1; c = rb_enc_codepoint(p, e, enc); - if (rb_enc_isascii(c, enc)) - return c; - return -1; + if (!rb_enc_isascii(c, enc)) + return -1; + if (len) *len = l; + return c; } int diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 38ba031e67..dd30e66dfd 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -72,14 +72,14 @@ rb_encoding * rb_enc_find(const char *name); /* ptr,endptr,encoding -> mbclen */ int rb_enc_mbclen(const char*, const char *, rb_encoding*); -/* ptr,endptr,encoding -> chlen, invalid or needmore */ -int rb_enc_precise_mbclen(const char*, const char *, rb_encoding*); +/* -> chlen, invalid or needmore */ +int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); #define MBCLEN_CHARFOUND(ret) ONIGENC_MBCLEN_CHARFOUND(ret) #define MBCLEN_INVALID(ret) ONIGENC_MBCLEN_INVALID(ret) #define MBCLEN_NEEDMORE(ret) ONIGENC_MBCLEN_NEEDMORE(ret) -/* ptr,endptr,encoding -> 0x00..0x7f, -1 */ -int rb_enc_get_ascii(const char*, const char *, rb_encoding*); +/* -> 0x00..0x7f, -1 */ +int rb_enc_get_ascii(const char *p, const char *e, int *len, rb_encoding *enc); /* code,encoding -> codelen */ int rb_enc_codelen(int, rb_encoding*); diff --git a/re.c b/re.c index f41b4155a3..20fbf28d82 100644 --- a/re.c +++ b/re.c @@ -218,16 +218,21 @@ rb_reg_expr_str(VALUE str, const char *s, long len) rb_encoding *enc = rb_enc_get(str); const char *p, *pend; int need_escape = 0; - int c; + int c, clen; p = s; pend = p + len; while (pptr)) { - no = uc - '0'; + no = c - '0'; } else { - continue; + continue; } break; case 'k': - if (s < e && *s == '<') { - char *name, *name_end; - - name_end = name = s + 1; - while (name_end < e) { - if (*name_end == '>') break; - name_end += mbclen(name_end, e, enc); - } - if (name_end < e) { - no = name_to_backref_number(regs, regexp, name, name_end); - p = s = name_end + 1; - break; - } - else { - rb_raise(rb_eRuntimeError, "invalid group name reference format"); - } + if (s < e && rb_enc_get_ascii(s, e, &clen, enc) == '<') { + char *name, *name_end; + + name_end = name = s + clen; + while (name_end < e) { + c = rb_enc_get_ascii(name_end, e, &clen, enc); + if (c == '>') break; + name_end += c == -1 ? mbclen(name_end, e, enc) : clen; + } + if (name_end < e) { + no = name_to_backref_number(regs, regexp, name, name_end); + p = s = name_end + clen; + break; + } + else { + rb_raise(rb_eRuntimeError, "invalid group name reference format"); + } } - rb_str_buf_cat(val, s-2, 2); + rb_str_buf_cat(val, ss, s-ss); continue; case '0': @@ -2765,11 +2774,11 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp) break; case '\\': - rb_str_buf_cat(val, s-1, 1); + rb_str_buf_cat(val, s-clen, clen); continue; default: - rb_str_buf_cat(val, s-2, 2); + rb_str_buf_cat(val, ss, s-ss); continue; } @@ -2783,11 +2792,8 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp) if (p < e) { if (!val) { val = rb_str_buf_new(e-p); - rb_str_buf_cat(val, p, e-p); - } - else { - rb_str_buf_cat(val, p, e-p); } + rb_str_buf_cat(val, p, e-p); } if (!val) return str;