* ext/nkf/nkf-utf8/nkf.c: follow nkf 1.62

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8061 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2005-03-03 14:27:17 +00:00
parent 31d9032ae2
commit e5d5c0d8fc
2 changed files with 240 additions and 137 deletions

View File

@ -1,3 +1,7 @@
Thu Mar 3 23:24:00 2005 NARUSE, Yui <naruse@ruby-lang.org>
* ext/nkf/nkf-utf8/nkf.c: follow nkf 1.62
Thu Mar 3 18:47:18 2005 Nobuyoshi Nakada <nobu@ruby-lang.org> Thu Mar 3 18:47:18 2005 Nobuyoshi Nakada <nobu@ruby-lang.org>
* {bcc32,win32,wince}/Makefile.sub (config.h): check if affected * {bcc32,win32,wince}/Makefile.sub (config.h): check if affected

View File

@ -41,7 +41,7 @@
***********************************************************************/ ***********************************************************************/
/* $Id$ */ /* $Id$ */
#define NKF_VERSION "2.0.4" #define NKF_VERSION "2.0.4"
#define NKF_RELEASE_DATE "2005-02-02" #define NKF_RELEASE_DATE "2005-02-20"
#include "config.h" #include "config.h"
static char *CopyRight = static char *CopyRight =
@ -149,12 +149,20 @@ static char *CopyRight =
#ifndef MSDOS /* UNIX, OS/2 */ #ifndef MSDOS /* UNIX, OS/2 */
#include <unistd.h> #include <unistd.h>
#include <utime.h> #include <utime.h>
#else #else /* defined(MSDOS) */
#ifdef __WIN32__
#ifdef __BORLANDC__ /* BCC32 */
#include <utime.h>
#else /* !defined(__BORLANDC__) */
#include <sys/utime.h>
#endif /* (__BORLANDC__) */
#else /* !defined(__WIN32__) */
#if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */ #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
#include <sys/utime.h> #include <sys/utime.h>
#elif defined(__TURBOC__) /* BCC */ #elif defined(__TURBOC__) /* BCC */
#include <utime.h> #include <utime.h>
#elif defined(LSI_C) /* LSI C */ #elif defined(LSI_C) /* LSI C */
#endif /* (__WIN32__) */
#endif #endif
#endif #endif
#endif #endif
@ -282,7 +290,7 @@ STATIC int noconvert PROTO((FILE *f));
STATIC int kanji_convert PROTO((FILE *f)); STATIC int kanji_convert PROTO((FILE *f));
STATIC int h_conv PROTO((FILE *f,int c2,int c1)); STATIC int h_conv PROTO((FILE *f,int c2,int c1));
STATIC int push_hold_buf PROTO((int c2)); STATIC int push_hold_buf PROTO((int c2));
STATIC void set_iconv PROTO((int f, int (*iconv_func)())); STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
STATIC int s_iconv PROTO((int c2,int c1,int c0)); STATIC int s_iconv PROTO((int c2,int c1,int c0));
STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1)); STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
STATIC int e_iconv PROTO((int c2,int c1,int c0)); STATIC int e_iconv PROTO((int c2,int c1,int c0));
@ -332,13 +340,14 @@ STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
STATIC int mime_integrity PROTO((FILE *f,unsigned char *p)); STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
STATIC int base64decode PROTO((int c)); STATIC int base64decode PROTO((int c));
STATIC void mime_prechar PROTO((int c2, int c1));
STATIC void mime_putc PROTO((int c)); STATIC void mime_putc PROTO((int c));
STATIC void open_mime PROTO((int c)); STATIC void open_mime PROTO((int c));
STATIC void close_mime PROTO(()); STATIC void close_mime PROTO(());
STATIC void usage PROTO(()); STATIC void usage PROTO(());
STATIC void version PROTO(()); STATIC void version PROTO(());
STATIC void options PROTO((unsigned char *c)); STATIC void options PROTO((unsigned char *c));
#ifdef PERL_XS #if defined(PERL_XS) || defined(WIN32DLL)
STATIC void reinit PROTO(()); STATIC void reinit PROTO(());
#endif #endif
@ -369,6 +378,7 @@ static int hira_f = FALSE; /* hira/kata henkan */
static int input_f = FALSE; /* non fixed input code */ static int input_f = FALSE; /* non fixed input code */
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */ static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
static int mime_decode_f = FALSE; /* mime decode is explicitly on */
static int mimebuf_f = FALSE; /* MIME buffered input */ static int mimebuf_f = FALSE; /* MIME buffered input */
static int broken_f = FALSE; /* convert ESC-less broken JIS */ static int broken_f = FALSE; /* convert ESC-less broken JIS */
static int iso8859_f = FALSE; /* ISO8859 through */ static int iso8859_f = FALSE; /* ISO8859 through */
@ -646,7 +656,14 @@ static int crmode_f = 0; /* CR, NL, CRLF */
static int end_check; static int end_check;
#endif /*Easy Win */ #endif /*Easy Win */
#ifndef PERL_XS #define STD_GC_BUFSIZE (256)
int std_gc_buf[STD_GC_BUFSIZE];
int std_gc_ndx;
#ifdef WIN32DLL
#include "nkf32dll.c"
#elif defined(PERL_XS)
#else /* WIN32DLL */
int int
main(argc, argv) main(argc, argv)
int argc; int argc;
@ -815,7 +832,7 @@ main(argc, argv)
#ifdef OVERWRITE #ifdef OVERWRITE
if (overwrite) { if (overwrite) {
struct stat sb; struct stat sb;
#if defined(MSDOS) && !defined(__MINGW32__) #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
time_t tb[2]; time_t tb[2];
#else #else
struct utimbuf tb; struct utimbuf tb;
@ -835,7 +852,7 @@ main(argc, argv)
} }
/* $B%?%$%`%9%?%s%W$rI|85(B */ /* $B%?%$%`%9%?%s%W$rI|85(B */
#if defined(MSDOS) && !defined(__MINGW32__) #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
tb[0] = tb[1] = sb.st_mtime; tb[0] = tb[1] = sb.st_mtime;
if (utime(outfname, tb)) { if (utime(outfname, tb)) {
fprintf(stderr, "Can't set timestamp %s\n", outfname); fprintf(stderr, "Can't set timestamp %s\n", outfname);
@ -871,10 +888,10 @@ main(argc, argv)
#else /* for Other OS */ #else /* for Other OS */
if (file_out == TRUE) if (file_out == TRUE)
fclose(stdout); fclose(stdout);
#endif #endif /*Easy Win */
return (0); return (0);
} }
#endif #endif /* WIN32DLL */
static static
struct { struct {
@ -1229,6 +1246,7 @@ options(cp)
} }
continue; continue;
case 'm': /* MIME support */ case 'm': /* MIME support */
mime_decode_f = TRUE;
if (*cp=='B'||*cp=='Q') { if (*cp=='B'||*cp=='Q') {
mime_decode_mode = *cp++; mime_decode_mode = *cp++;
mimebuf_f = FIXED_MIME; mimebuf_f = FIXED_MIME;
@ -1718,10 +1736,7 @@ code_status(c)
} }
} }
#define STD_GC_BUFSIZE (256) #ifndef WIN32DLL
int std_gc_buf[STD_GC_BUFSIZE];
int std_gc_ndx;
int int
std_getc(f) std_getc(f)
FILE *f; FILE *f;
@ -1731,6 +1746,7 @@ FILE *f;
} }
return getc(f); return getc(f);
} }
#endif /*WIN32DLL*/
int int
std_ungetc(c,f) std_ungetc(c,f)
@ -1744,6 +1760,7 @@ FILE *f;
return c; return c;
} }
#ifndef WIN32DLL
void void
std_putc(c) std_putc(c)
int c; int c;
@ -1751,6 +1768,7 @@ int c;
if(c!=EOF) if(c!=EOF)
putchar(c); putchar(c);
} }
#endif /*WIN32DLL*/
int int
noconvert(f) noconvert(f)
@ -2126,8 +2144,7 @@ kanji_convert(f)
} else if ((c1 == NL || c1 == CR) && broken_f&4) { } else if ((c1 == NL || c1 == CR) && broken_f&4) {
input_mode = ASCII; set_iconv(FALSE, 0); input_mode = ASCII; set_iconv(FALSE, 0);
SEND; SEND;
/* } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
} else if (c1 == NL && mime_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) { if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f); i_ungetc(SPACE,f);
continue; continue;
@ -2136,7 +2153,7 @@ kanji_convert(f)
} }
c1 = NL; c1 = NL;
SEND; SEND;
} else if (c1 == CR && mime_f && !mime_decode_mode ) { } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF) { if ((c1=(*i_getc)(f))!=EOF) {
if (c1==SPACE) { if (c1==SPACE) {
i_ungetc(SPACE,f); i_ungetc(SPACE,f);
@ -2153,7 +2170,6 @@ kanji_convert(f)
} }
c1 = CR; c1 = CR;
SEND; SEND;
*/
} else } else
SEND; SEND;
} }
@ -2979,14 +2995,7 @@ base64_conv(c2, c1)
int c2, int c2,
c1; c1;
{ {
if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) { mime_prechar(c2, c1);
(*o_putc)(EOF);
(*o_putc)(NL);
} else if (base64_count>66 && mimeout_mode) {
(*o_base64conv)(EOF,0);
(*o_base64conv)(NL,0);
(*o_base64conv)(SPACE,0);
}
(*o_base64conv)(c2,c1); (*o_base64conv)(c2,c1);
} }
@ -3487,6 +3496,10 @@ int mime_encode_method[] = {
#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c) #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
#define nkf_isdigit(c) ('0'<=c && c<='9') #define nkf_isdigit(c) ('0'<=c && c<='9')
#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F')) #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
#define nkf_isblank(c) (c == SPACE || c == TAB)
#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
void void
switch_mime_getc() switch_mime_getc()
@ -3674,6 +3687,7 @@ set_input_codename (codename)
is_inputcode_set = TRUE; is_inputcode_set = TRUE;
} }
#ifndef WIN32DLL
void void
print_guessed_code (filename) print_guessed_code (filename)
char *filename; char *filename;
@ -3689,6 +3703,7 @@ print_guessed_code (filename)
if (filename != NULL) printf("%s:", filename); if (filename != NULL) printf("%s:", filename);
printf("%s\n", codename); printf("%s\n", codename);
} }
#endif /*WIN32DLL*/
int int
hex2bin(x) hex2bin(x)
@ -4188,6 +4203,10 @@ int mode;
i = 0; i = 0;
if (base64_count>45) { if (base64_count>45) {
if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
(*o_mputc)(mimeout_buf[i]);
i++;
}
(*o_mputc)(NL); (*o_mputc)(NL);
(*o_mputc)(SPACE); (*o_mputc)(SPACE);
base64_count = 1; base64_count = 1;
@ -4304,42 +4323,160 @@ mimeout_addchar(c)
} }
} }
int mime_lastchar2, mime_lastchar1;
void mime_prechar(c2, c1)
int c2, c1;
{
if (mimeout_mode){
if (c2){
if (base64_count + mimeout_buf_count/3*4> 66){
(*o_base64conv)(EOF,0);
(*o_base64conv)(0,NL);
(*o_base64conv)(0,SPACE);
}
}/*else if (mime_lastchar2){
if (c1 <=DEL && !nkf_isspace(c1)){
(*o_base64conv)(0,SPACE);
}
}*/
}/*else{
if (c2 && mime_lastchar2 == 0
&& mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
(*o_base64conv)(0,SPACE);
}
}*/
mime_lastchar2 = c2;
mime_lastchar1 = c1;
}
void void
mime_putc(c) mime_putc(c)
int c; int c;
{ {
int i = 0; int i = 0;
int j = 0; int j = 0;
int lastchar;
if (mimeout_f==FIXED_MIME && base64_count>71) { if (mimeout_f == FIXED_MIME){
if (mimeout_mode=='Q') { if (mimeout_mode == 'Q'){
if (base64_count > 71){
if (c!=CR && c!=NL) { if (c!=CR && c!=NL) {
(*o_mputc)('='); (*o_mputc)('=');
(*o_mputc)(NL); (*o_mputc)(NL);
} }
} else { base64_count = 0;
}
}else{
if (base64_count > 71){
eof_mime(); eof_mime();
(*o_mputc)(NL); (*o_mputc)(NL);
base64_count = 0;
} }
base64_count=0; if (c == EOF) { /* c==EOF */
} else if (mimeout_f!=FIXED_MIME && !mimeout_mode && (c==CR||c==NL)) { eof_mime();
base64_count=0;
} }
if (c!=EOF && mimeout_f!=FIXED_MIME) { }
if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 ) ) { if (c != EOF) { /* c==EOF */
mimeout_addchar(c);
}
return;
}
/* mimeout_f != FIXED_MIME */
if (c == EOF) { /* c==EOF */
j = mimeout_buf_count;
mimeout_buf_count = 0;
i = 0;
for (;i<j;i++) {
/*if (nkf_isspace(mimeout_buf[i])){
break;
}*/
mimeout_addchar(mimeout_buf[i]);
}
eof_mime();
for (;i<j;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
return;
}
if (mimeout_mode=='Q') { if (mimeout_mode=='Q') {
if (c<=SPACE) { if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
if (c <= SPACE) {
close_mime(); close_mime();
(*o_mputc)(SPACE); (*o_mputc)(SPACE);
base64_count++; base64_count++;
} }
(*o_mputc)(c); (*o_mputc)(c);
base64_count++; base64_count++;
}
return; return;
} else if (mimeout_mode) { }
if (mimeout_buf_count>0
&& (mimeout_buf[mimeout_buf_count-1]==CR || mimeout_buf[mimeout_buf_count-1]==NL)) { if (mimeout_buf_count > 0){
if (c==SPACE || c==TAB) { lastchar = mimeout_buf[mimeout_buf_count - 1];
}else{
lastchar = -1;
}
if (!mimeout_mode) {
if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
if (nkf_isspace(c)) {
if (c==CR || c==NL) {
base64_count=0;
}
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
base64_count = 0;
}else{
base64_count++;
}
}
mimeout_buf[0] = c;
mimeout_buf_count = 1;
}else{
if (base64_count > 1
&& base64_count + mimeout_buf_count > 76){
(*o_mputc)(NL);
base64_count = 0;
if (!nkf_isspace(mimeout_buf[0])){
(*o_mputc)(SPACE);
base64_count++;
}
}
mimeout_buf[mimeout_buf_count++] = c;
if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
open_mime(output_mode);
}
}
return;
}else{
if (lastchar==CR || lastchar == NL){
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
}
base64_count = 0;
mimeout_buf_count = 0;
}
if (lastchar==SPACE) {
for (i=0;i<mimeout_buf_count-1;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
mimeout_buf[0] = SPACE;
mimeout_buf_count = 1;
}
open_mime(output_mode);
}
}else{
/* mimeout_mode == 'B', 1, 2 */
if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
if (lastchar == CR || lastchar == NL){
if (nkf_isblank(c)) {
for (i=0;i<mimeout_buf_count;i++) { for (i=0;i<mimeout_buf_count;i++) {
mimeout_addchar(mimeout_buf[i]); mimeout_addchar(mimeout_buf[i]);
} }
@ -4375,7 +4512,6 @@ mime_putc(c)
} }
return; return;
} }
if (mimeout_buf_count>0 && SPACE<c && c!='=') { if (mimeout_buf_count>0 && SPACE<c && c!='=') {
mimeout_buf[mimeout_buf_count++] = c; mimeout_buf[mimeout_buf_count++] = c;
if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
@ -4387,54 +4523,8 @@ mime_putc(c)
} }
return; return;
} }
} else if (!mimeout_mode) {
if (c==SPACE || c==TAB || c==CR || c==NL) {
if ((c==CR || c==NL)
&&(mimeout_buf[mimeout_buf_count-1]==SPACE
|| mimeout_buf[mimeout_buf_count-1]==TAB)) {
mimeout_buf_count--;
} }
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
} }
mimeout_buf_count = 0;
}
mimeout_buf[mimeout_buf_count++] = c;
if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
open_mime(output_mode);
}
return;
}
} else if (!mimeout_mode) {
if (mimeout_buf_count>0 && mimeout_buf[mimeout_buf_count-1]==SPACE) {
for (i=0;i<mimeout_buf_count-1;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
mimeout_buf[0] = SPACE;
mimeout_buf_count = 1;
}
open_mime(output_mode);
}
} else if (c == EOF) { /* c==EOF */
j = mimeout_buf_count;
mimeout_buf_count = 0;
i = 0;
for (;i<j;i++) {
if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
|| mimeout_buf[i]==CR || mimeout_buf[i]==NL)
break;
mimeout_addchar(mimeout_buf[i]);
}
eof_mime();
for (;i<j;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
return;
}
if (mimeout_buf_count>0) { if (mimeout_buf_count>0) {
j = mimeout_buf_count; j = mimeout_buf_count;
mimeout_buf_count = 0; mimeout_buf_count = 0;
@ -4456,7 +4546,7 @@ mime_putc(c)
} }
#ifdef PERL_XS #if defined(PERL_XS) || defined(WIN32DLL)
void void
reinit() reinit()
{ {
@ -4475,6 +4565,7 @@ reinit()
input_f = FALSE; input_f = FALSE;
alpha_f = FALSE; alpha_f = FALSE;
mime_f = STRICT_MIME; mime_f = STRICT_MIME;
mime_decode_f = FALSE;
mimebuf_f = FALSE; mimebuf_f = FALSE;
broken_f = FALSE; broken_f = FALSE;
iso8859_f = FALSE; iso8859_f = FALSE;
@ -4561,6 +4652,10 @@ reinit()
#ifdef CHECK_OPTION #ifdef CHECK_OPTION
iconv_for_check = 0; iconv_for_check = 0;
#endif #endif
input_codename = "";
#ifdef WIN32DLL
reinitdll();
#endif /*WIN32DLL*/
} }
#endif #endif
@ -4577,9 +4672,13 @@ int c2,c1,c0;
{ {
fprintf(stderr,"nkf internal module connection failure.\n"); fprintf(stderr,"nkf internal module connection failure.\n");
exit(1); exit(1);
return 0; /* LINT */
} }
#ifndef PERL_XS #ifndef PERL_XS
#ifdef WIN32DLL
#define fprintf dllprintf
#endif
void void
usage() usage()
{ {
@ -4667,7 +4766,7 @@ version()
,NKF_VERSION,NKF_RELEASE_DATE); ,NKF_VERSION,NKF_RELEASE_DATE);
fprintf(stderr,"\n%s\n",CopyRight); fprintf(stderr,"\n%s\n",CopyRight);
} }
#endif #endif /*PERL_XS*/
/** /**
** $B%Q%C%A@):n<T(B ** $B%Q%C%A@):n<T(B