diff --git a/source/compiler/sc.h b/source/compiler/sc.h index beda684..e417ae9 100644 --- a/source/compiler/sc.h +++ b/source/compiler/sc.h @@ -308,6 +308,11 @@ typedef struct s_valuepair { #define opcodes(n) ((n)*sizeof(cell)) /* opcode size */ #define opargs(n) ((n)*sizeof(cell)) /* size of typical argument */ +/* general purpose macros */ +#if !defined makelong + #define makelong(low,high) ((long)(low) | ((long)(high) << (sizeof(long)*4))) +#endif + /* Tokens recognized by lex() * Some of these constants are assigned as well to the variable "lastst" (see SC1.C) */ @@ -493,6 +498,24 @@ typedef struct s_emit_outval { } value; } emit_outval; +/* constants for error_suggest() */ +#define MAX_EDIT_DIST 2 /* allow two mis-typed characters; when there are more, + * the names are too different, and no match is returned */ +enum { /* identifier types */ + estSYMBOL = 0, + estNONSYMBOL, + estAUTOMATON, + estSTATE +}; +enum { /* symbol types */ + essNONLABEL, /* find symbols of any type but labels */ + essVARCONST, /* array, single variable or named constant */ + essARRAY, + essCONST, + essFUNCTN, + essLABEL +}; + /* interface functions */ #if defined __cplusplus extern "C" { @@ -721,8 +744,9 @@ SC_FUNC void outval(cell val,int newline); SC_FUNC void outinstr(const char *name,emit_outval params[],int numparams); /* function prototypes in SC5.C */ -SC_FUNC int error(int number,...); +SC_FUNC int error(long number,...); SC_FUNC void errorset(int code,int line); +SC_FUNC int error_suggest(int error,const char *name,const char *name2,int type,int subtype); /* function prototypes in SC6.C */ SC_FUNC int assemble(FILE *fout,FILE *fin); diff --git a/source/compiler/sc1.c b/source/compiler/sc1.c index d96e4e0..fcec5ec 100644 --- a/source/compiler/sc1.c +++ b/source/compiler/sc1.c @@ -2018,7 +2018,7 @@ static void declglb(char *firstname,int firsttag,int fpublic,int fstatic,int fst } else { tag=pc_addtag(NULL); if (lex(&val,&str)!=tSYMBOL) /* read in (new) token */ - error(20,str); /* invalid symbol name */ + error_suggest(20,str,NULL,estSYMBOL,essFUNCTN); /* invalid symbol name */ assert(strlen(str)<=sNAMEMAX); strcpy(name,str); /* save symbol name */ } /* if */ @@ -4869,7 +4869,7 @@ static int testsymbols(symbol *root,int level,int testlabs,int testconst) case iLABEL: if (testlabs) { if ((sym->usage & uDEFINE)==0) { - error(19,sym->name); /* not a label: ... */ + error_suggest(19,sym->name,NULL,estSYMBOL,essLABEL); /* not a label: ... */ } else if ((sym->usage & uREAD)==0) { errorset(sSETPOS,sym->lnumber); error(203,sym->name); /* symbol isn't used: ... */ @@ -5973,7 +5973,7 @@ static void dogoto(void) // sym->compound (nesting level of the label) against nestlevel; // if sym->compound < nestlevel, call the destructor operator } else { - error(20,st); /* illegal symbol name */ + error_suggest(20,st,NULL,estSYMBOL,essLABEL); /* illegal symbol name */ } /* if */ needtoken(tTERM); } @@ -6013,7 +6013,7 @@ static symbol *fetchlab(char *name) sym=findloc(name); /* labels are local in scope */ if (sym) { if (sym->ident!=iLABEL) - error(19,sym->name); /* not a label: ... */ + error_suggest(19,sym->name,NULL,estSYMBOL,essLABEL); /* not a label: ... */ } else { sym=addsym(name,getlabel(),iLABEL,sLOCAL,0,0); assert(sym!=NULL); /* fatal error 103 must be given on error */ diff --git a/source/compiler/sc3.c b/source/compiler/sc3.c index e22de2c..f4bef71 100644 --- a/source/compiler/sc3.c +++ b/source/compiler/sc3.c @@ -749,7 +749,7 @@ SC_FUNC int sc_getstateid(constvalue **automaton,constvalue **state) return 0; tokeninfo(&val,&str); /* do not copy the name yet, must check automaton first */ if (*automaton==NULL) { - error(86,name); /* unknown automaton */ + error_suggest(86,name,NULL,estAUTOMATON,0); /* unknown automaton */ return 0; } /* if */ assert((*automaton)->index>0); @@ -769,7 +769,7 @@ SC_FUNC int sc_getstateid(constvalue **automaton,constvalue **state) char *fsaname=(*automaton)->name; if (*fsaname=='\0') fsaname="
"; - error(87,name,fsaname); /* unknown state for automaton */ + error_suggest(87,name,fsaname,estSTATE,fsa); /* unknown state for automaton */ return 0; } /* if */ @@ -1321,7 +1321,7 @@ static int hier2(value *lval) paranthese++; tok=lex(&val,&st); if (tok!=tSYMBOL) - return error(20,st); /* illegal symbol name */ + return error_suggest(20,st,NULL,estNONSYMBOL,tok); /* illegal symbol name */ sym=findloc(st); if (sym==NULL) sym=findglb(st,sSTATEVAR); @@ -1344,18 +1344,18 @@ static int hier2(value *lval) paranthese++; tok=lex(&val,&st); if (tok!=tSYMBOL) - return error(20,st); /* illegal symbol name */ + return error_suggest(20,st,NULL,estNONSYMBOL,tok); /* illegal symbol name */ sym=findloc(st); if (sym==NULL) sym=findglb(st,sSTATEVAR); if (sym==NULL) - return error(17,st); /* undefined symbol */ + return error_suggest(17,st,NULL,estSYMBOL,essVARCONST); /* undefined symbol */ if (sym->ident==iCONSTEXPR) error(39); /* constant symbol has no size */ else if (sym->ident==iFUNCTN || sym->ident==iREFFUNC) error(72); /* "function" symbol has no size */ else if ((sym->usage & uDEFINE)==0) - return error(17,st); /* undefined symbol (symbol is in the table, but it is "used" only) */ + return error_suggest(17,st,NULL,estSYMBOL,essVARCONST); /* undefined symbol (symbol is in the table, but it is "used" only) */ clear_value(lval); lval->ident=iCONSTEXPR; lval->constval=1; /* preset */ @@ -1370,7 +1370,7 @@ static int hier2(value *lval) int cmptag=subsym->x.tags.index; tokeninfo(&val,&idxname); if ((idxsym=findconst(idxname,&cmptag))==NULL) - error(80,idxname); /* unknown symbol, or non-constant */ + error_suggest(80,idxname,NULL,estSYMBOL,essCONST); /* unknown symbol, or non-constant */ else if (cmptag>1) error(91,idxname); /* ambiguous constant */ } /* if */ @@ -1397,7 +1397,7 @@ static int hier2(value *lval) paranthese++; tok=lex(&val,&st); if (tok!=tSYMBOL && tok!=tLABEL) - return error(20,st); /* illegal symbol name */ + return error_suggest(20,st,NULL,estNONSYMBOL,tok); /* illegal symbol name */ if (tok==tLABEL) { constvalue *tagsym=find_constval(&tagname_tab,st,0); tag=(int)((tagsym!=NULL) ? tagsym->value : 0); @@ -1406,9 +1406,9 @@ static int hier2(value *lval) if (sym==NULL) sym=findglb(st,sSTATEVAR); if (sym==NULL) - return error(17,st); /* undefined symbol */ + return error_suggest(17,st,NULL,estSYMBOL,essNONLABEL); /* undefined symbol */ if ((sym->usage & uDEFINE)==0) - return error(17,st); /* undefined symbol (symbol is in the table, but it is "used" only) */ + return error_suggest(17,st,NULL,estSYMBOL,essNONLABEL); /* undefined symbol (symbol is in the table, but it is "used" only) */ tag=sym->tag; } /* if */ if (sym!=NULL && (sym->ident==iARRAY || sym->ident==iREFARRAY)) { @@ -1422,7 +1422,7 @@ static int hier2(value *lval) int cmptag=subsym->x.tags.index; tokeninfo(&val,&idxname); if ((idxsym=findconst(idxname,&cmptag))==NULL) - error(80,idxname); /* unknown symbol, or non-constant */ + error_suggest(80,idxname,NULL,estSYMBOL,essCONST); /* unknown symbol, or non-constant */ else if (cmptag>1) error(91,idxname); /* ambiguous constant */ } /* if */ @@ -1601,7 +1601,7 @@ restart: needtoken(close); return FALSE; } else if (sym->ident!=iARRAY && sym->ident!=iREFARRAY){ - error(28,sym->name); /* cannot subscript, variable is not an array */ + error_suggest(28,sym->name,NULL,estSYMBOL,essARRAY);/* cannot subscript, variable is not an array */ needtoken(close); return FALSE; } else if (sym->dim.array.level>0 && close!=']') { @@ -1854,10 +1854,10 @@ static int primary(value *lval) * implemented, issue an error */ if ((sym->usage & uPROTOTYPED)==0) - error(17,st); + error_suggest(17,st,NULL,estSYMBOL,essFUNCTN); /* undefined symbol */ } else { if ((sym->usage & uDEFINE)==0) - error(17,st); + error_suggest(17,st,NULL,estSYMBOL,essVARCONST); /* undefined symbol */ lval->sym=sym; lval->ident=sym->ident; lval->tag=sym->tag; @@ -1870,7 +1870,7 @@ static int primary(value *lval) } /* if */ } else { if (!sc_allowproccall) - return error(17,st); /* undefined symbol */ + return error_suggest(17,st,NULL,estSYMBOL,essVARCONST); /* undefined symbol */ /* an unknown symbol, but used in a way compatible with the "procedure * call" syntax. So assume that the symbol refers to a function. */ diff --git a/source/compiler/sc5.c b/source/compiler/sc5.c index cd24e87..ae05425 100644 --- a/source/compiler/sc5.c +++ b/source/compiler/sc5.c @@ -29,6 +29,7 @@ #if defined LINUX || defined __GNUC__ #include #endif +#include #include #include #include /* ANSI standardized variable argument list functions */ @@ -197,6 +198,10 @@ static char *warnmsg[] = { /*239*/ "literal array/string passed to a non-const parameter\n" }; +static char *noticemsg[] = { +/*001*/ "; did you mean \"%s\"?\n" +}; + #define NUM_WARNINGS (sizeof warnmsg / sizeof warnmsg[0]) static struct s_warnstack { unsigned char disable[(NUM_WARNINGS + 7) / 8]; /* 8 flags in a char */ @@ -220,13 +225,22 @@ static int errwarn; * fcurrent (reffered to only) * errflag (altered) */ -SC_FUNC int error(int number,...) +SC_FUNC int error(long number,...) { static char *prefix[3]={ "error", "fatal error", "warning" }; static int lastline,errorcount; static short lastfile; char *msg,*pre; va_list argptr; + char string[128]; + int notice; + + /* split the error field between the real error/warning number and an optional + * "notice" number + */ + notice=(unsigned long)number >> (sizeof(long)*4); + number&=(~(unsigned long)0) >> (sizeof(long)*4); + assert(number>0 && number<300); /* errflag is reset on each semicolon. * In a two-pass compiler, an error should not be reported twice. Therefore @@ -244,26 +258,37 @@ static short lastfile; return 0; } /* if */ - if (number<100){ + if (number<100) { + assert(number>0 && number<(1+arraysize(errmsg))); msg=errmsg[number-1]; pre=prefix[0]; errflag=TRUE; /* set errflag (skip rest of erroneous expression) */ errnum++; - } else if (number<200){ + } else if (number<200) { + assert(number>=100 && number<(100+arraysize(fatalmsg))); msg=fatalmsg[number-100]; pre=prefix[1]; errnum++; /* a fatal error also counts as an error */ } else if (errwarn) { + assert(number>=200 && number<(200+arraysize(warnmsg))); msg=warnmsg[number-200]; pre=prefix[0]; errflag=TRUE; errnum++; } else { + assert(number>=200 && number<(200+arraysize(warnmsg))); msg=warnmsg[number-200]; pre=prefix[2]; warnnum++; } /* if */ + if (notice!=0) { + assert(notice>0 && notice<(1+arraysize(noticemsg)) && noticemsg[notice-1][0]!='\0'); + strcpy(string,msg); + strcpy(&string[strlen(string)-1],noticemsg[notice-1]); + msg=string; + } /* if */ + assert(errstart<=fline); if (errline>0) errstart=errline; @@ -271,9 +296,9 @@ static short lastfile; errline=fline; assert(errstart<=errline); va_start(argptr,number); - if (strlen(errfname)==0) { + if (errfname[0]=='\0') { int start=(errstart==errline) ? -1 : errstart; - if (pc_error(number,msg,inpfname,start,errline,argptr)) { + if (pc_error((int)number,msg,inpfname,start,errline,argptr)) { if (outf!=NULL) { pc_closeasm(outf,TRUE); outf=NULL; @@ -284,9 +309,9 @@ static short lastfile; FILE *fp=fopen(errfname,"a"); if (fp!=NULL) { if (errstart>=0 && errstart!=errline) - fprintf(fp,"%s(%d -- %d) : %s %03d: ",inpfname,errstart,errline,pre,number); + fprintf(fp,"%s(%d -- %d) : %s %03d: ",inpfname,errstart,errline,pre,(int)number); else - fprintf(fp,"%s(%d) : %s %03d: ",inpfname,errline,pre,number); + fprintf(fp,"%s(%d) : %s %03d: ",inpfname,errline,pre,(int)number); vfprintf(fp,msg,argptr); fclose(fp); } /* if */ @@ -294,7 +319,7 @@ static short lastfile; va_end(argptr); if ((number>=100 && number<200) || errnum>25){ - if (strlen(errfname)==0) { + if (errfname[0]=='\0') { va_start(argptr,number); pc_error(0,"\nCompilation aborted.\n\n",NULL,0,0,argptr); va_end(argptr); @@ -423,3 +448,240 @@ int pc_geterrorwarnings() return errwarn; } +/* Implementation of Levenshtein distance, by Lorenzo Seidenari + */ +static int minimum(int a,int b,int c) +{ + int min=a; + if(b0 && m>0); + d=(int*)malloc((sizeof(int))*(m+1)*(n+1)); + m++; + n++; + //Step 2 + for (k=0;kMAX_EDIT_DIST) + maxdist=MAX_EDIT_DIST; + return maxdist; +} + +static int find_closestsymbol_table(const char *name,const symbol *root,int symboltype,symbol **closestsym) +{ + int dist,maxdist,closestdist=INT_MAX; + char symname[2*sNAMEMAX+16]; + symbol *sym; + int ident; + assert(closestsym!=NULL); + *closestsym=NULL; + assert(name!=NULL); + maxdist=get_maxdist(name); + for (sym=root->next; sym!=NULL; sym=sym->next) { + if (sym->fnumber!=-1 && sym->fnumber!=fcurrent) + continue; + ident=sym->ident; + if (symboltype==essNONLABEL) { + if (ident==iLABEL) + continue; + } else if (symboltype==essVARCONST) { + if (ident!=iCONSTEXPR && ident!=iVARIABLE && ident!=iREFERENCE && ident!=iARRAY && ident!=iREFARRAY) + continue; + } else if (symboltype==essARRAY) { + if (ident!=iARRAY && ident!=iREFARRAY) + continue; + } else if (symboltype==essCONST) { + if (ident!=iCONSTEXPR) + continue; + } else if (symboltype==essFUNCTN) { + if ((ident!=iFUNCTN && ident!=iREFFUNC) || (sym->usage & uDEFINE)==0) + continue; + } else if (symboltype==essLABEL) { + if (ident!=iLABEL || (sym->usage & uDEFINE)==0) + continue; + } /* if */ + funcdisplayname(symname,sym->name); + dist=levenshtein_distance(name,symname); + if (dist>maxdist || dist>=closestdist) + continue; + *closestsym=sym; + closestdist=dist; + if (closestdist<=1) + break; + } /* for */ + return closestdist; +} + +static symbol *find_closestsymbol(const char *name,int symboltype) +{ + symbol *symloc,*symglb; + int distloc,distglb; + + if (sc_status==statFIRST) + return NULL; + assert(name!=NULL); + if (name[0]=='\0') + return NULL; + distloc=find_closestsymbol_table(name,&loctab,symboltype,&symloc); + if (distloc<=1) + distglb=INT_MAX; /* don't bother searching in the global table */ + else + distglb=find_closestsymbol_table(name,&glbtab,symboltype,&symglb); + return (distglbname[0]!='\0') { + dist=levenshtein_distance(name,ptr->name); + if (distnext; + } /* while */ + return closestmatch; +} + +static constvalue *find_closeststate(const char *name,int fsa) +{ + constvalue *ptr=sc_state_tab.first; + constvalue *closestmatch=NULL; + int dist,maxdist,closestdist=INT_MAX; + + assert(name!=NULL); + maxdist=get_maxdist(name); + while (ptr!=NULL) { + if (ptr->index==fsa && ptr->name[0]!='\0') { + dist=levenshtein_distance(name,ptr->name); + if (distnext; + } /* while */ + return closestmatch; +} + +static constvalue *findclosest_automaton_for_state(const char *statename,int fsa) +{ + constvalue *ptr=sc_state_tab.first; + constvalue *closestmatch=NULL; + constvalue *automaton; + const char *fsaname; + int dist,maxdist,closestdist=INT_MAX; + + assert(statename!=NULL); + maxdist=get_maxdist(statename); + automaton=automaton_findid(ptr->index); + assert(automaton!=NULL); + fsaname=automaton->name; + while (ptr!=NULL) { + if (fsa!=ptr->index && ptr->name[0]!='\0' && strcmp(statename,ptr->name)==0) { + automaton=automaton_findid(ptr->index); + assert(automaton!=NULL); + dist=levenshtein_distance(fsaname,automaton->name); + if (distnext; + } /* while */ + return closestmatch; +} + +SC_FUNC int error_suggest(int number,const char *name,const char *name2,int type,int subtype) +{ + char string[sNAMEMAX*2+2]; /* for ":" */ + const char *closestname=NULL; + + /* don't bother finding the closest names on errors + * that aren't going to be shown on the 1'st pass + */ + if ((errflag || sc_status!=statWRITE) && (number<100 || number>=200)) + return 0; + + if (type==estSYMBOL || (type==estNONSYMBOL && tMIDDLEname; + } else if (type==estAUTOMATON) { + constvalue *closestautomaton=find_closestautomaton(name); + if (closestautomaton!=NULL) + closestname=closestautomaton->name; + } else if (type==estSTATE) { + constvalue *closeststate=find_closeststate(name,subtype); + if (closeststate!=NULL) { + closestname=closeststate->name; + } else { + constvalue *closestautomaton=findclosest_automaton_for_state(name,subtype); + if (closestautomaton!=NULL) { + sprintf(string,"%s:%s",closestautomaton->name,name); + closestname=string; + } /* if */ + } /* if */ + } else { + assert(0); + } /* if */ + + if (closestname==NULL) { + error(number,name,name2); + } else if (name2!=NULL) { + error(makelong(number,1),name,name2,closestname); + } else { + error(makelong(number,1),name,closestname); + } /* if */ + return 0; +} diff --git a/source/compiler/tests/gh_353.inc b/source/compiler/tests/gh_353.inc new file mode 100644 index 0000000..8f07b7f --- /dev/null +++ b/source/compiler/tests/gh_353.inc @@ -0,0 +1,2 @@ +static staticvar; +#pragma unused staticvar \ No newline at end of file diff --git a/source/compiler/tests/gh_353.meta b/source/compiler/tests/gh_353.meta new file mode 100644 index 0000000..7c2391d --- /dev/null +++ b/source/compiler/tests/gh_353.meta @@ -0,0 +1,24 @@ +{ + 'test_type': 'output_check', + 'errors': """ +gh_353.pwn(12) : error 017: undefined symbol "abcxyz" +gh_353.pwn(20) : error 017: undefined symbol "length" +gh_353.pwn(30) : error 017: undefined symbol "float" +gh_353.pwn(40) : error 017: undefined symbol "ab" +gh_353.pwn(41) : error 017: undefined symbol "ab" +gh_353.pwn(50) : error 017: undefined symbol "staticval" +gh_353.pwn(58) : error 017: undefined symbol "val"; did you mean "var"? +gh_353.pwn(62) : error 017: undefined symbol "celmax"; did you mean "cellmax"? +gh_353.pwn(66) : error 017: undefined symbol "strcaf"; did you mean "strcat"? +gh_353.pwn(69) : error 017: undefined symbol "test_e17"; did you mean "test_e017"? +gh_353.pwn(78) : error 019: not a label: "lb"; did you mean "lbl"? +gh_353.pwn(85) : error 020: invalid symbol name "assert"; did you mean "asset"? +gh_353.pwn(96) : error 080: unknown symbol, or not a constant symbol (symbol "idx"); did you mean "id"? +gh_353.pwn(107) : error 086: unknown automaton "automaton1"; did you mean "automaton_1"? +gh_353.pwn(107) : error 036: empty statement +gh_353.pwn(114) : error 087: unknown state "BEING1" for automaton "automaton_2"; did you mean "BEING_1"? +gh_353.pwn(114) : error 036: empty statement +gh_353.pwn(117) : error 087: unknown state "STATE_1" for automaton "automaton_2"; did you mean "automaton_1:STATE_1"? +gh_353.pwn(117) : error 036: empty statement + """ +} diff --git a/source/compiler/tests/gh_353.pwn b/source/compiler/tests/gh_353.pwn new file mode 100644 index 0000000..e604b47 --- /dev/null +++ b/source/compiler/tests/gh_353.pwn @@ -0,0 +1,118 @@ +#include +#include +#include +#include "gh_353.inc" + +forward test_nosuggest1(); +public test_nosuggest1() +{ + // The compiler shouldn't suggest any name for this error + // since "abcxyz" and "abcd" differ by more than 2 symbols. + const abcd = 1; + printf("%d\n", abcxyz); + #pragma unused abcd +} + +forward test_nosuggest2(); +public test_nosuggest2() +{ + // There are no "()" after "length", so the compiler shouldn't suggest "flength". + printf("%d\n", length); +} + +forward test_nosuggest3(); +public test_nosuggest3() +{ + // float.inc is not #included, so float() is not defined. + // After the 1'st pass the compiler thinks float() is an unimplemented function, + // so it shouldn't suggest variable "flt" in this case. + new Float:flt; + return float(0); + #pragma unused flt +} + +forward test_nosuggest4(); +public test_nosuggest4() +{ + // "abc" is a label so the compiler shouldn't suggest its name + // where a variable or named constant is expected. +abc: + printf("%d\n", ab); + printf("%d\n", tagof ab); + #pragma unused abc +} + +forward test_nosuggest5(); +public test_nosuggest5() +{ + // As the name suggests, variable "staticvar" is defined as static + // within another file, so the compiler shouldn't suggest its name here. + return staticval; +} + +forward test_e017(); +public test_e017() +{ + // error 017: undefined symbol "val"; did you mean "var"? + new var = 1; + printf("%d\n", val); + #pragma unused var + + // error 017: undefined symbol "celmax"; did you mean "cellmax"? + printf("%d\n", celmax); + + // error 017: undefined symbol "strcaf"; did you mean "strcat"? + new str[4] = "a"; + strcaf(str, "b"); + + // error 017: undefined symbol "test_e17"; did you mean "test_e017"? + printf("%d\n", tagof test_e17); +} + +forward test_e019(); +public test_e019() +{ + // error 019: not a label: "lb"; did you mean "lbl"? +lbl: + goto lb; +} + +forward test_e020(); +public test_e020() +{ + // error 020: invalid symbol name "assert"; did you mean "asset"? + new asset = 0; + printf("%d\n", defined assert); + #pragma unused asset +} + +forward test_e080(); +public test_e080() +{ + // error 080: unknown symbol, or not a constant symbol (symbol "idx"); did you mean "id"? + new values[1]; + new idx = 0; + const id = 0; + printf("%d\n", sizeof values[idx]); + #pragma unused values, idx, id +} + +stock func1(){} +stock func2(){} + +forward test_e086(); +public test_e086() +{ + // error 086: unknown automaton "automaton1"; did you mean "automaton_1"? + state automaton1:STATE_1; +} + +forward test_e087(); +public test_e087() +{ + // error 087: unknown state BEING1" for automaton "automaton_2"; did you mean "BEING_1"? + state automaton_2:BEING1; + + // error 087: unknown state "STATE_1" for automaton "automaton_2"; did you mean "automaton_1:STATE_1"? + state automaton_2:STATE_1; +}