From c9000c247c057f9f5ae7ef40a34d1b973c00ab20 Mon Sep 17 00:00:00 2001 From: "bar@bar.mysql.r18.ru" <> Date: Fri, 3 Jan 2003 14:35:32 +0400 Subject: [PATCH] Charset index is sotred in XML now --- libmysql/Makefile.shared | 2 +- mysys/charset.c | 203 ++++++++++++---- sql/share/Makefile.am | 4 +- sql/share/charsets/Index.xml | 455 +++++++++++++++++++++++++++++++++++ 4 files changed, 617 insertions(+), 47 deletions(-) create mode 100644 sql/share/charsets/Index.xml diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index a2e6fddff0f..4d8b703fb2d 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -58,7 +58,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \ mf_loadpath.lo my_pthread.lo my_thr_init.lo \ thr_mutex.lo mulalloc.lo string.lo default.lo \ my_compress.lo array.lo my_once.lo list.lo my_net.lo \ - charset.lo hash.lo mf_iocache.lo \ + charset.lo xml.lo hash.lo mf_iocache.lo \ mf_iocache2.lo my_seek.lo \ my_pread.lo mf_cache.lo my_vsnprintf.lo md5.lo sha1.lo\ my_getopt.lo my_gethostbyname.lo my_port.lo diff --git a/mysys/charset.c b/mysys/charset.c index cf0628495fc..3f650bbe14d 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -19,7 +19,9 @@ #include #include #include +#include +#define MY_CHARSET_INDEX "Index.xml" const char *charsets_dir = NULL; static int charset_initialized=0; @@ -85,53 +87,166 @@ char *get_charsets_dir(char *buf) } +#define MAX_BUF 1024*16 + + +static void mstr(char *str,const char *src,uint l1,uint l2) +{ + l1 = l1str; s++) + if (!strncmp(attr,s->str,len)) + return s; + return NULL; +} + +struct my_cs_file_info +{ + CHARSET_INFO cs; + myf myflags; +}; + +static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len) +{ + struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data; + struct my_cs_file_section_st *s = cs_file_sec(attr,len); + + if ( s && (s->state == _CS_CHARSET)) + { + bzero(&i->cs,sizeof(i->cs)); + } + return MY_XML_OK; +} + +static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len) +{ + struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data; + struct my_cs_file_section_st *s = cs_file_sec(attr,len); + + if (s && (s->state == _CS_COLLATION) && !all_charsets[i->cs.number]) + { + if (!(all_charsets[i->cs.number]= + (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),i->myflags))) + { + return MY_XML_ERROR; + } + all_charsets[i->cs.number][0]=i->cs; + } + return MY_XML_OK; +} + +static int cs_value(MY_XML_PARSER *st,const char *attr, uint len) +{ + struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data; + struct my_cs_file_section_st *s; + int state = (s=cs_file_sec(st->attr,strlen(st->attr))) ? s->state : 0; + + if(0) + { + char str[256]; + mstr(str,attr,len,sizeof(str)-1); + printf("VALUE %d %s='%s'\n",state,st->attr,str); + } + + switch (state) + { + case _CS_ID: + i->cs.number = my_strntoul(my_charset_latin1,attr,len,(char**)NULL,0); + break; + case _CS_COLNAME: + if ((i->cs.name = (char*) my_once_alloc(len+1,i->myflags))) + { + memcpy((char*)i->cs.name,attr,len); + ((char*)(i->cs.name))[len]='\0'; + } + break; + } + return MY_XML_OK; +} + static my_bool read_charset_index(myf myflags) { - struct simpleconfig_buf_st fb; - char buf[MAX_LINE], num_buf[MAX_LINE]; - - strmov(get_charsets_dir(buf), "Index"); - - if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) - return TRUE; - fb.buf[0] = '\0'; - fb.p = fb.buf; - + char *buf; + int fd; + uint len; + MY_XML_PARSER p; + struct my_cs_file_info i; - while (!get_word(&fb, buf) && !get_word(&fb, num_buf)) + if (! (buf = (char *)my_malloc(MAX_BUF,myflags))) + return FALSE; + + strmov(get_charsets_dir(buf),MY_CHARSET_INDEX); + + if ((fd=my_open(buf,O_RDONLY,myflags)) < 0) { - uint csnum; - uint length; - CHARSET_INFO *cs; - - if (!(csnum = atoi(num_buf))) - { - /* corrupt Index file */ - my_fclose(fb.f,myflags); - return TRUE; - } - - if (all_charsets[csnum]) - continue; - - if (!(cs=(CHARSET_INFO*) my_once_alloc(sizeof(cs[0]),myflags))) - { - my_fclose(fb.f,myflags); - return TRUE; - } - bzero(cs,sizeof(cs[0])); - - if (!(cs->name= (char*)my_once_alloc(length=(uint)strlen(buf)+1,myflags))) - { - my_fclose(fb.f,myflags); - return TRUE; - } - memcpy((char*)cs->name,buf,length); - cs->number=csnum; - all_charsets[csnum]=cs; + my_free(buf,myflags); + return TRUE; } - my_fclose(fb.f,myflags); - + + len=read(fd,buf,MAX_BUF); + my_xml_parser_create(&p); + my_close(fd,myflags); + + my_xml_set_enter_handler(&p,cs_enter); + my_xml_set_value_handler(&p,cs_value); + my_xml_set_leave_handler(&p,cs_leave); + my_xml_set_user_data(&p,(void*)&i); + + if (MY_XML_OK!=my_xml_parse(&p,buf,len)) + { + /* + printf("ERROR at line %d pos %d '%s'\n", + my_xml_error_lineno(&p)+1, + my_xml_error_pos(&p), + my_xml_error_string(&p)); + */ + } + + my_xml_parser_free(&p); + return FALSE; } @@ -472,7 +587,7 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags) if (!cs && (flags & MY_WME)) { char index_file[FN_REFLEN], cs_string[23]; - strmov(get_charsets_dir(index_file), "Index"); + strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); cs_string[0]='#'; int10_to_str(cs_number, cs_string+1, 10); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); @@ -505,7 +620,7 @@ CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) if (!cs && (flags & MY_WME)) { char index_file[FN_REFLEN]; - strmov(get_charsets_dir(index_file), "Index"); + strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); } diff --git a/sql/share/Makefile.am b/sql/share/Makefile.am index c70ac9ccf57..a1b506f1ff5 100644 --- a/sql/share/Makefile.am +++ b/sql/share/Makefile.am @@ -7,7 +7,7 @@ dist-hook: done; \ sleep 1 ; touch $(srcdir)/*/errmsg.sys $(INSTALL_DATA) $(srcdir)/charsets/README $(distdir)/charsets - $(INSTALL_DATA) $(srcdir)/charsets/Index $(distdir)/charsets + $(INSTALL_DATA) $(srcdir)/charsets/Index.xml $(distdir)/charsets all: @AVAILABLE_LANGUAGES_ERRORS@ @@ -25,7 +25,7 @@ install-data-local: done $(mkinstalldirs) $(DESTDIR)$(pkgdatadir)/charsets $(INSTALL_DATA) $(srcdir)/charsets/README $(DESTDIR)$(pkgdatadir)/charsets/README - $(INSTALL_DATA) $(srcdir)/charsets/Index $(DESTDIR)$(pkgdatadir)/charsets/Index + $(INSTALL_DATA) $(srcdir)/charsets/Index.xml $(DESTDIR)$(pkgdatadir)/charsets/Index.xml $(INSTALL_DATA) $(srcdir)/charsets/*.conf $(DESTDIR)$(pkgdatadir)/charsets fix_errors: diff --git a/sql/share/charsets/Index.xml b/sql/share/charsets/Index.xml new file mode 100644 index 00000000000..79b622f9c5c --- /dev/null +++ b/sql/share/charsets/Index.xml @@ -0,0 +1,455 @@ + + + + + +This file lists all of the available character sets. +To make maintaining easier please: + - keep records sorted by collation number. + - change charset-list.max-id when adding a new collation. + + + + Traditional Chinese + big-5 + bigfive + big-five + cn-big5 + csbig5 + + + + + Central European + csisolatin2 + iso-8859-2 + iso-ir-101 + iso_8859-2 + iso_8859-2:1987 + l2 + + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + + + + Western + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + + + Western + 850 + cp850 + cspc850multilingual + ibm850 + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + + + Western + csisolatin1 + csisolatin1 + iso-8859-1 + iso-ir-100 + iso_8859-1 + iso_8859-1:1987 + l1 + latin1 + + + + + + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + + + Western + hproman8 + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + + + Cyrillic + koi8-ru + cskoi8r + + + + + Western + iso-646-se + + + + + Western + us + us-ascii + csascii + iso-ir-6 + iso646-us + + + + + Japanese + euc-jp + + + + + Japanese + s-jis + shift-jis + x-sjis + + + + + Cyrillic + windows-1251 + ms-cyr + ms-cyrillic + + Belarusian + Bulgarian + Macedonian + Russian + Serbian + Mongolian + Ukrainian + + + + + Belarusian + Bulgarian + Macedonian + Russian + Serbian + Mongolian + Ukrainian + + + Belarusian + Bulgarian + Macedonian + Russian + Serbian + Mongolian + Ukrainian + + + + + Hebrew + csisolatinhebrew + iso-8859-8 + iso-ir-138 + + + + + Thai + tis-620 + + + + + Korean + euckr + euc-kr + + + + + Baltic + BalticRim + iso-8859-13 + l7 + + + + + + + Cyrillic + koi8-u + + + + + Simplified Chinese + chinese + iso-ir-58 + + + + + Greek + csisolatingreek + ecma-118 + greek8 + iso-8859-7 + iso-ir-126 + + + + + Central European + ms-ce + windows-1250 + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + + + East Asian + cp936 + + + + + Baltic + WinBaltRim + windows-1257 + + + + Latvian + Lithuanian + + + Latvian + Lithuanian + + + Latvian + Lithuanian + + + + + South Asian + csisolatin5 + iso-8859-9 + iso-ir-148 + l5 + latin5 + turkish + + + + + South Asian + armscii-8 + + + + + Unicode + utf-8 + + + + + Unicode + + + + + Cyrillic + 866 + csibm866 + ibm866 + + + + + Central European + + + + + Central European + MacCentralEurope + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + + Western + Mac + Macintosh + csmacintosh + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + Dutch + English + French + German Duden + Italian + Latin + Pogtuguese + Spanish + + + + + Central European + 852 + cp852 + ibm852 + + Hungarian + Polish + Romanian + Croatian + Slovak + Slovenian + Sorbian + + + + + Arabic + ms-arab + windows-1256 + + + + + + + +