From 1d30a23fcc38bc11f928f59a6c69ddebcf523870 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 9 May 2018 00:16:32 +0400 Subject: [PATCH] Moving a few static functions in sql_lex.cc to new methods in Lex_input_stream Reasoning: - Shorter and clearer code - Better encapsulation (a fair number of Lex_input_stream methods and members were moved to the private section) New methods: int lex_token(union YYSTYPE *yylval, THD *thd); bool consume_comment(int remaining_recursions_permitted); int lex_one_token(union YYSTYPE *yylval, THD *thd); int find_keyword(Lex_ident_cli_st *str, uint len, bool function); LEX_CSTRING get_token(uint skip, uint length); Additional changes: - Removing Lex_input_stream::yylval. In the original code it was just an alias for the "yylval" passed to lex_one_token(). This coding style is bug prone and is hard to follow. In the new reduction "yylval" (or its components) is passed to the affected methods as a parameter. - Moving the code in sql_lex.h up and down between "private" and "public" sections (sorry if this made the diff somewhat harder to read) --- sql/sql_class.h | 2 +- sql/sql_lex.cc | 438 ++++++++++++++++++++++++------------------------ sql/sql_lex.h | 144 +++++++++------- sql/sql_yacc.yy | 2 +- 4 files changed, 302 insertions(+), 284 deletions(-) diff --git a/sql/sql_class.h b/sql/sql_class.h index 28f785d06a7..f774d0148a5 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -4250,7 +4250,7 @@ public: Lex_input_stream *lip= &m_parser_state->m_lip; if (!yytext) { - if (lip->lookahead_token >= 0) + if (lip->has_lookahead()) yytext= lip->get_tok_start_prev(); else yytext= lip->get_tok_start(); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index aaaf7223e5d..e45080b4028 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -39,9 +39,6 @@ void LEX::parse_error(uint err_number) } -static int lex_one_token(YYSTYPE *yylval, THD *thd); - - /** LEX_STRING constant for null-string to be used in parser and other places. */ @@ -284,7 +281,6 @@ void Lex_input_stream::reset(char *buffer, size_t length) { yylineno= 1; - yylval= NULL; lookahead_token= -1; lookahead_yylval= NULL; m_ptr= buffer; @@ -842,22 +838,23 @@ Yacc_state::~Yacc_state() } } -static int find_keyword(Lex_input_stream *lip, uint len, bool function) +int Lex_input_stream::find_keyword(Lex_ident_cli_st *kwd, + uint len, bool function) { - const char *tok= lip->get_tok_start(); + const char *tok= get_tok_start(); SYMBOL *symbol= get_hash_symbol(tok, len, function); if (symbol) { - lip->yylval->kwd.set_keyword(tok, len); - DBUG_ASSERT(tok >= lip->get_buf()); - DBUG_ASSERT(tok < lip->get_end_of_query()); + kwd->set_keyword(tok, len); + DBUG_ASSERT(tok >= get_buf()); + DBUG_ASSERT(tok < get_end_of_query()); if ((symbol->tok == NOT_SYM) && - (lip->m_thd->variables.sql_mode & MODE_HIGH_NOT_PRECEDENCE)) + (m_thd->variables.sql_mode & MODE_HIGH_NOT_PRECEDENCE)) return NOT2_SYM; if ((symbol->tok == OR_OR_SYM) && - !(lip->m_thd->variables.sql_mode & MODE_PIPES_AS_CONCAT)) + !(m_thd->variables.sql_mode & MODE_PIPES_AS_CONCAT)) return OR2_SYM; return symbol->tok; @@ -955,15 +952,15 @@ bool is_native_function_with_warn(THD *thd, const LEX_CSTRING *name) /* make a copy of token before ptr and set yytoklen */ -static LEX_CSTRING get_token(Lex_input_stream *lip, uint skip, uint length) +LEX_CSTRING Lex_input_stream::get_token(uint skip, uint length) { LEX_CSTRING tmp; - lip->yyUnget(); // ptr points now after last token char + yyUnget(); // ptr points now after last token char tmp.length= length; - tmp.str= lip->m_thd->strmake(lip->get_tok_start() + skip, tmp.length); + tmp.str= m_thd->strmake(get_tok_start() + skip, tmp.length); - lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip; - lip->m_cpp_text_end= lip->m_cpp_text_start + tmp.length; + m_cpp_text_start= get_cpp_tok_start() + skip; + m_cpp_text_end= m_cpp_text_start + tmp.length; return tmp; } @@ -1224,34 +1221,34 @@ static inline uint int_token(const char *str,uint length) @retval Whether EOF reached before comment is closed. */ -bool consume_comment(Lex_input_stream *lip, int remaining_recursions_permitted) +bool Lex_input_stream::consume_comment(int remaining_recursions_permitted) { uchar c; - while (! lip->eof()) + while (!eof()) { - c= lip->yyGet(); + c= yyGet(); if (remaining_recursions_permitted > 0) { - if ((c == '/') && (lip->yyPeek() == '*')) + if ((c == '/') && (yyPeek() == '*')) { - lip->yySkip(); /* Eat asterisk */ - consume_comment(lip, remaining_recursions_permitted-1); + yySkip(); // Eat asterisk + consume_comment(remaining_recursions_permitted - 1); continue; } } if (c == '*') { - if (lip->yyPeek() == '/') + if (yyPeek() == '/') { - lip->yySkip(); /* Eat slash */ + yySkip(); // Eat slash return FALSE; } } if (c == '\n') - lip->yylineno++; + yylineno++; } return TRUE; @@ -1271,24 +1268,35 @@ bool consume_comment(Lex_input_stream *lip, int remaining_recursions_permitted) int MYSQLlex(YYSTYPE *yylval, THD *thd) { - Lex_input_stream *lip= & thd->m_parser_state->m_lip; + return thd->m_parser_state->m_lip.lex_token(yylval, thd); +} + + +int ORAlex(YYSTYPE *yylval, THD *thd) +{ + return thd->m_parser_state->m_lip.lex_token(yylval, thd); +} + + +int Lex_input_stream::lex_token(YYSTYPE *yylval, THD *thd) +{ int token; - if (lip->lookahead_token >= 0) + if (lookahead_token >= 0) { /* The next token was already parsed in advance, return it. */ - token= lip->lookahead_token; - lip->lookahead_token= -1; - *yylval= *(lip->lookahead_yylval); - lip->lookahead_yylval= NULL; + token= lookahead_token; + lookahead_token= -1; + *yylval= *(lookahead_yylval); + lookahead_yylval= NULL; return token; } token= lex_one_token(yylval, thd); - lip->add_digest_token(token, yylval); + add_digest_token(token, yylval); switch(token) { case WITH: @@ -1300,7 +1308,7 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) which sql_yacc.yy can process. */ token= lex_one_token(yylval, thd); - lip->add_digest_token(token, yylval); + add_digest_token(token, yylval); switch(token) { case CUBE_SYM: return WITH_CUBE_SYM; @@ -1312,9 +1320,8 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) /* Save the token following 'WITH' */ - lip->lookahead_yylval= lip->yylval; - lip->yylval= NULL; - lip->lookahead_token= token; + lookahead_yylval= yylval; + lookahead_token= token; return WITH; } break; @@ -1325,7 +1332,7 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) * SELECT ... FOR SYSTEM_TIME ... . */ token= lex_one_token(yylval, thd); - lip->add_digest_token(token, yylval); + add_digest_token(token, yylval); switch(token) { case SYSTEM_TIME_SYM: return FOR_SYSTEM_TIME_SYM; @@ -1333,9 +1340,8 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) /* Save the token following 'FOR_SYM' */ - lip->lookahead_yylval= lip->yylval; - lip->yylval= NULL; - lip->lookahead_token= token; + lookahead_yylval= yylval; + lookahead_token= token; return FOR_SYM; } break; @@ -1344,16 +1350,15 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) thd->lex->current_select->parsing_place == IN_PART_FUNC) return VALUE_SYM; token= lex_one_token(yylval, thd); - lip->add_digest_token(token, yylval); + add_digest_token(token, yylval); switch(token) { case LESS_SYM: return VALUES_LESS_SYM; case IN_SYM: return VALUES_IN_SYM; default: - lip->lookahead_yylval= lip->yylval; - lip->yylval= NULL; - lip->lookahead_token= token; + lookahead_yylval= yylval; + lookahead_token= token; return VALUES; } break; @@ -1363,50 +1368,43 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) return token; } -int ORAlex(YYSTYPE *yylval, THD *thd) -{ - return MYSQLlex(yylval, thd); -} -static int lex_one_token(YYSTYPE *yylval, THD *thd) +int Lex_input_stream::lex_one_token(YYSTYPE *yylval, THD *thd) { uchar UNINIT_VAR(c); bool comment_closed; int tokval; uint length; enum my_lex_states state; - Lex_input_stream *lip= & thd->m_parser_state->m_lip; LEX *lex= thd->lex; CHARSET_INFO *const cs= thd->charset(); const uchar *const state_map= cs->state_map; const uchar *const ident_map= cs->ident_map; - lip->yylval= yylval; // The global state - - lip->start_token(); - state=lip->next_state; - lip->next_state=MY_LEX_OPERATOR_OR_IDENT; + start_token(); + state= next_state; + next_state= MY_LEX_OPERATOR_OR_IDENT; for (;;) { switch (state) { case MY_LEX_OPERATOR_OR_IDENT: // Next is operator or keyword case MY_LEX_START: // Start of token // Skip starting whitespace - while(state_map[c= lip->yyPeek()] == MY_LEX_SKIP) + while(state_map[c= yyPeek()] == MY_LEX_SKIP) { if (c == '\n') - lip->yylineno++; + yylineno++; - lip->yySkip(); + yySkip(); } /* Start of real token */ - lip->restart_token(); - c= lip->yyGet(); + restart_token(); + c= yyGet(); state= (enum my_lex_states) state_map[c]; break; case MY_LEX_ESCAPE: - if (!lip->eof() && lip->yyGet() == 'N') + if (!eof() && yyGet() == 'N') { // Allow \N as shortcut for NULL yylval->lex_str.str= (char*) "\\N"; yylval->lex_str.length= 2; @@ -1416,18 +1414,18 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) case MY_LEX_CHAR: // Unknown or single char token case MY_LEX_SKIP: // This should not happen if (c != ')') - lip->next_state= MY_LEX_START; // Allow signed numbers + next_state= MY_LEX_START; // Allow signed numbers return((int) c); case MY_LEX_MINUS_OR_COMMENT: - if (lip->yyPeek() == '-' && - (my_isspace(cs,lip->yyPeekn(1)) || - my_iscntrl(cs,lip->yyPeekn(1)))) + if (yyPeek() == '-' && + (my_isspace(cs,yyPeekn(1)) || + my_iscntrl(cs,yyPeekn(1)))) { state=MY_LEX_COMMENT; break; } - lip->next_state= MY_LEX_START; // Allow signed numbers + next_state= MY_LEX_START; // Allow signed numbers return((int) c); case MY_LEX_PLACEHOLDER: @@ -1437,13 +1435,13 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) its value in a query for the binlog, the query must stay grammatically correct. */ - lip->next_state= MY_LEX_START; // Allow signed numbers - if (lip->stmt_prepare_mode && !ident_map[(uchar) lip->yyPeek()]) + next_state= MY_LEX_START; // Allow signed numbers + if (stmt_prepare_mode && !ident_map[(uchar) yyPeek()]) return(PARAM_MARKER); return((int) c); case MY_LEX_COMMA: - lip->next_state= MY_LEX_START; // Allow signed numbers + next_state= MY_LEX_START; // Allow signed numbers /* Warning: This is a work around, to make the "remember_name" rule in @@ -1453,41 +1451,40 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) remember_name (see select_item) *before* actually parsing the first token of expr2. */ - lip->restart_token(); + restart_token(); return((int) c); case MY_LEX_IDENT_OR_NCHAR: { uint sep; - if (lip->yyPeek() != '\'') + if (yyPeek() != '\'') { state= MY_LEX_IDENT; break; } /* Found N'string' */ - lip->yySkip(); // Skip ' - if (lip->get_text(&yylval->lex_string_with_metadata, - (sep= lip->yyGetLast()), 2, 1)) + yySkip(); // Skip ' + if (get_text(&yylval->lex_string_with_metadata, (sep= yyGetLast()), 2, 1)) { state= MY_LEX_CHAR; // Read char by char break; } - lip->body_utf8_append(lip->m_cpp_text_start); - lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, + body_utf8_append(m_cpp_text_start); + body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, national_charset_info, - lip->m_cpp_text_end, sep); + m_cpp_text_end, sep); return(NCHAR_STRING); } case MY_LEX_IDENT_OR_HEX: - if (lip->yyPeek() == '\'') + if (yyPeek() == '\'') { // Found x'hex-number' state= MY_LEX_HEX_NUMBER; break; } /* fall through */ case MY_LEX_IDENT_OR_BIN: - if (lip->yyPeek() == '\'') + if (yyPeek() == '\'') { // Found b'bin-number' state= MY_LEX_BIN_NUMBER; break; @@ -1495,59 +1492,59 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) /* fall through */ case MY_LEX_IDENT: { - tokval= lip->scan_ident_middle(thd, &yylval->ident_cli, - &yylval->charset, &state); + tokval= scan_ident_middle(thd, &yylval->ident_cli, + &yylval->charset, &state); if (!tokval) continue; if (tokval == UNDERSCORE_CHARSET) - lip->m_underscore_cs= yylval->charset; + m_underscore_cs= yylval->charset; return tokval; } case MY_LEX_IDENT_SEP: // Found ident and now '.' - yylval->lex_str.str= (char*) lip->get_ptr(); + yylval->lex_str.str= (char*) get_ptr(); yylval->lex_str.length= 1; - c= lip->yyGet(); // should be '.' - lip->next_state= MY_LEX_IDENT_START; // Next is ident (not keyword) - if (!ident_map[(uchar) lip->yyPeek()]) // Probably ` or " - lip->next_state= MY_LEX_START; + c= yyGet(); // should be '.' + next_state= MY_LEX_IDENT_START; // Next is ident (not keyword) + if (!ident_map[(uchar) yyPeek()]) // Probably ` or " + next_state= MY_LEX_START; return((int) c); case MY_LEX_NUMBER_IDENT: // number or ident which num-start - if (lip->yyGetLast() == '0') + if (yyGetLast() == '0') { - c= lip->yyGet(); + c= yyGet(); if (c == 'x') { - while (my_isxdigit(cs,(c = lip->yyGet()))) ; - if ((lip->yyLength() >= 3) && !ident_map[c]) + while (my_isxdigit(cs, (c = yyGet()))) ; + if ((yyLength() >= 3) && !ident_map[c]) { /* skip '0x' */ - yylval->lex_str=get_token(lip, 2, lip->yyLength()-2); + yylval->lex_str= get_token(2, yyLength() - 2); return (HEX_NUM); } - lip->yyUnget(); + yyUnget(); state= MY_LEX_IDENT_START; break; } else if (c == 'b') { - while ((c= lip->yyGet()) == '0' || c == '1') + while ((c= yyGet()) == '0' || c == '1') ; - if ((lip->yyLength() >= 3) && !ident_map[c]) + if ((yyLength() >= 3) && !ident_map[c]) { /* Skip '0b' */ - yylval->lex_str= get_token(lip, 2, lip->yyLength()-2); + yylval->lex_str= get_token(2, yyLength() - 2); return (BIN_NUM); } - lip->yyUnget(); + yyUnget(); state= MY_LEX_IDENT_START; break; } - lip->yyUnget(); + yyUnget(); } - while (my_isdigit(cs, (c = lip->yyGet()))) ; + while (my_isdigit(cs, (c= yyGet()))) ; if (!ident_map[c]) { // Can't be identifier state=MY_LEX_INT_OR_REAL; @@ -1556,125 +1553,123 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) if (c == 'e' || c == 'E') { // The following test is written this way to allow numbers of type 1e1 - if (my_isdigit(cs,lip->yyPeek()) || - (c=(lip->yyGet())) == '+' || c == '-') + if (my_isdigit(cs, yyPeek()) || + (c=(yyGet())) == '+' || c == '-') { // Allow 1E+10 - if (my_isdigit(cs,lip->yyPeek())) // Number must have digit after sign + if (my_isdigit(cs, yyPeek())) // Number must have digit after sign { - lip->yySkip(); - while (my_isdigit(cs, lip->yyGet())) ; - yylval->lex_str= get_token(lip, 0, lip->yyLength()); + yySkip(); + while (my_isdigit(cs, yyGet())) ; + yylval->lex_str= get_token(0, yyLength()); return(FLOAT_NUM); } } - lip->yyUnget(); + yyUnget(); } // fall through case MY_LEX_IDENT_START: // We come here after '.' - return lip->scan_ident_start(thd, &yylval->ident_cli); + return scan_ident_start(thd, &yylval->ident_cli); case MY_LEX_USER_VARIABLE_DELIMITER: // Found quote char - return lip->scan_ident_delimited(thd, &yylval->ident_cli); + return scan_ident_delimited(thd, &yylval->ident_cli); case MY_LEX_INT_OR_REAL: // Complete int or incomplete real - if (c != '.' || lip->yyPeek() == '.') + if (c != '.' || yyPeek() == '.') { /* Found a complete integer number: - the number is either not followed by a dot at all, or - the number is followed by a double dot as in: FOR i IN 1..10 */ - yylval->lex_str=get_token(lip, 0, lip->yyLength()); + yylval->lex_str= get_token(0, yyLength()); return int_token(yylval->lex_str.str, (uint) yylval->lex_str.length); } // fall through case MY_LEX_REAL: // Incomplete real number - while (my_isdigit(cs,c = lip->yyGet())) ; + while (my_isdigit(cs, c= yyGet())) ; if (c == 'e' || c == 'E') { - c = lip->yyGet(); + c= yyGet(); if (c == '-' || c == '+') - c = lip->yyGet(); // Skip sign + c= yyGet(); // Skip sign if (!my_isdigit(cs, c)) { // No digit after sign state= MY_LEX_CHAR; break; } - while (my_isdigit(cs,lip->yyGet())) ; - yylval->lex_str=get_token(lip, 0, lip->yyLength()); + while (my_isdigit(cs, yyGet())) ; + yylval->lex_str= get_token(0, yyLength()); return(FLOAT_NUM); } - yylval->lex_str=get_token(lip, 0, lip->yyLength()); + yylval->lex_str= get_token(0, yyLength()); return(DECIMAL_NUM); case MY_LEX_HEX_NUMBER: // Found x'hexstring' - lip->yySkip(); // Accept opening ' - while (my_isxdigit(cs, (c= lip->yyGet()))) ; + yySkip(); // Accept opening ' + while (my_isxdigit(cs, (c= yyGet()))) ; if (c != '\'') return(ABORT_SYM); // Illegal hex constant - lip->yySkip(); // Accept closing ' - length= lip->yyLength(); // Length of hexnum+3 + yySkip(); // Accept closing ' + length= yyLength(); // Length of hexnum+3 if ((length % 2) == 0) return(ABORT_SYM); // odd number of hex digits - yylval->lex_str=get_token(lip, - 2, // skip x' - length-3); // don't count x' and last ' + yylval->lex_str= get_token(2, // skip x' + length - 3); // don't count x' and last ' return HEX_STRING; case MY_LEX_BIN_NUMBER: // Found b'bin-string' - lip->yySkip(); // Accept opening ' - while ((c= lip->yyGet()) == '0' || c == '1') + yySkip(); // Accept opening ' + while ((c= yyGet()) == '0' || c == '1') ; if (c != '\'') return(ABORT_SYM); // Illegal hex constant - lip->yySkip(); // Accept closing ' - length= lip->yyLength(); // Length of bin-num + 3 - yylval->lex_str= get_token(lip, - 2, // skip b' - length-3); // don't count b' and last ' + yySkip(); // Accept closing ' + length= yyLength(); // Length of bin-num + 3 + yylval->lex_str= get_token(2, // skip b' + length - 3); // don't count b' and last ' return (BIN_NUM); case MY_LEX_CMP_OP: // Incomplete comparison operator - lip->next_state= MY_LEX_START; // Allow signed numbers - if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP || - state_map[(uchar) lip->yyPeek()] == MY_LEX_LONG_CMP_OP) + next_state= MY_LEX_START; // Allow signed numbers + if (state_map[(uchar) yyPeek()] == MY_LEX_CMP_OP || + state_map[(uchar) yyPeek()] == MY_LEX_LONG_CMP_OP) { - lip->yySkip(); - if ((tokval= find_keyword(lip, 2, 0))) + yySkip(); + if ((tokval= find_keyword(&yylval->kwd, 2, 0))) return(tokval); - lip->yyUnget(); + yyUnget(); } return(c); case MY_LEX_LONG_CMP_OP: // Incomplete comparison operator - lip->next_state= MY_LEX_START; - if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP || - state_map[(uchar) lip->yyPeek()] == MY_LEX_LONG_CMP_OP) + next_state= MY_LEX_START; + if (state_map[(uchar) yyPeek()] == MY_LEX_CMP_OP || + state_map[(uchar) yyPeek()] == MY_LEX_LONG_CMP_OP) { - lip->yySkip(); - if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP) + yySkip(); + if (state_map[(uchar) yyPeek()] == MY_LEX_CMP_OP) { - lip->yySkip(); - if ((tokval= find_keyword(lip, 3, 0))) + yySkip(); + if ((tokval= find_keyword(&yylval->kwd, 3, 0))) return(tokval); - lip->yyUnget(); + yyUnget(); } - if ((tokval= find_keyword(lip, 2, 0))) + if ((tokval= find_keyword(&yylval->kwd, 2, 0))) return(tokval); - lip->yyUnget(); + yyUnget(); } return(c); case MY_LEX_BOOL: - if (c != lip->yyPeek()) + if (c != yyPeek()) { state= MY_LEX_CHAR; break; } - lip->yySkip(); - tokval= find_keyword(lip, 2, 0); // Is a bool operator - lip->next_state= MY_LEX_START; // Allow signed numbers + yySkip(); + tokval= find_keyword(&yylval->kwd, 2, 0); // Is a bool operator + next_state= MY_LEX_START; // Allow signed numbers return(tokval); case MY_LEX_STRING_OR_DELIMITER: @@ -1688,46 +1683,45 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) case MY_LEX_STRING: // Incomplete text string { uint sep; - if (lip->get_text(&yylval->lex_string_with_metadata, - (sep= lip->yyGetLast()), 1, 1)) + if (get_text(&yylval->lex_string_with_metadata, (sep= yyGetLast()), 1, 1)) { state= MY_LEX_CHAR; // Read char by char break; } - CHARSET_INFO *strcs= lip->m_underscore_cs ? lip->m_underscore_cs : cs; - lip->body_utf8_append(lip->m_cpp_text_start); + CHARSET_INFO *strcs= m_underscore_cs ? m_underscore_cs : cs; + body_utf8_append(m_cpp_text_start); - lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, - strcs, lip->m_cpp_text_end, sep); - lip->m_underscore_cs= NULL; + body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, + strcs, m_cpp_text_end, sep); + m_underscore_cs= NULL; return(TEXT_STRING); } case MY_LEX_COMMENT: // Comment lex->select_lex.options|= OPTION_FOUND_COMMENT; - while ((c= lip->yyGet()) != '\n' && c) ; - lip->yyUnget(); // Safety against eof + while ((c= yyGet()) != '\n' && c) ; + yyUnget(); // Safety against eof state= MY_LEX_START; // Try again break; case MY_LEX_LONG_COMMENT: // Long C comment? - if (lip->yyPeek() != '*') + if (yyPeek() != '*') { state= MY_LEX_CHAR; // Probable division break; } lex->select_lex.options|= OPTION_FOUND_COMMENT; /* Reject '/' '*', since we might need to turn off the echo */ - lip->yyUnget(); + yyUnget(); - lip->save_in_comment_state(); + save_in_comment_state(); - if (lip->yyPeekn(2) == '!' || - (lip->yyPeekn(2) == 'M' && lip->yyPeekn(3) == '!')) + if (yyPeekn(2) == '!' || + (yyPeekn(2) == 'M' && yyPeekn(3) == '!')) { - bool maria_comment_syntax= lip->yyPeekn(2) == 'M'; - lip->in_comment= DISCARD_COMMENT; + bool maria_comment_syntax= yyPeekn(2) == 'M'; + in_comment= DISCARD_COMMENT; /* Accept '/' '*' '!', but do not keep this marker. */ - lip->set_echo(FALSE); - lip->yySkipn(maria_comment_syntax ? 4 : 3); + set_echo(FALSE); + yySkipn(maria_comment_syntax ? 4 : 3); /* The special comment format is very strict: @@ -1738,24 +1732,24 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) 50114 -> 5.1.14 100000 -> 10.0.0 */ - if ( my_isdigit(cs, lip->yyPeekn(0)) - && my_isdigit(cs, lip->yyPeekn(1)) - && my_isdigit(cs, lip->yyPeekn(2)) - && my_isdigit(cs, lip->yyPeekn(3)) - && my_isdigit(cs, lip->yyPeekn(4)) + if ( my_isdigit(cs, yyPeekn(0)) + && my_isdigit(cs, yyPeekn(1)) + && my_isdigit(cs, yyPeekn(2)) + && my_isdigit(cs, yyPeekn(3)) + && my_isdigit(cs, yyPeekn(4)) ) { ulong version; uint length= 5; - char *end_ptr= (char*) lip->get_ptr()+length; + char *end_ptr= (char*) get_ptr() + length; int error; - if (my_isdigit(cs, lip->yyPeekn(5))) + if (my_isdigit(cs, yyPeekn(5))) { end_ptr++; // 6 digit number length++; } - version= (ulong) my_strtoll10(lip->get_ptr(), &end_ptr, &error); + version= (ulong) my_strtoll10(get_ptr(), &end_ptr, &error); /* MySQL-5.7 has new features and might have new SQL syntax that @@ -1767,9 +1761,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) (version < 50700 || version > 99999 || maria_comment_syntax)) { /* Accept 'M' 'm' 'm' 'd' 'd' */ - lip->yySkipn(length); + yySkipn(length); /* Expand the content of the special comment as real code */ - lip->set_echo(TRUE); + set_echo(TRUE); state=MY_LEX_START; break; /* Do not treat contents as a comment. */ } @@ -1780,8 +1774,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) { WSREP_DEBUG("consistency check: %s", thd->query()); thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED; - lip->yySkipn(5); - lip->set_echo(TRUE); + yySkipn(5); + set_echo(TRUE); state= MY_LEX_START; break; /* Do not treat contents as a comment. */ } @@ -1790,8 +1784,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) Patch and skip the conditional comment to avoid it being propagated infinitely (eg. to a slave). */ - char *pcom= lip->yyUnput(' '); - comment_closed= ! consume_comment(lip, 1); + char *pcom= yyUnput(' '); + comment_closed= ! consume_comment(1); if (! comment_closed) { *pcom= '!'; @@ -1803,16 +1797,16 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) { /* Not a version comment. */ state=MY_LEX_START; - lip->set_echo(TRUE); + set_echo(TRUE); break; } } else { - lip->in_comment= PRESERVE_COMMENT; - lip->yySkip(); // Accept / - lip->yySkip(); // Accept * - comment_closed= ! consume_comment(lip, 0); + in_comment= PRESERVE_COMMENT; + yySkip(); // Accept / + yySkip(); // Accept * + comment_closed= ! consume_comment(0); /* regular comments can have zero comments inside. */ } /* @@ -1835,96 +1829,96 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) if (! comment_closed) return (ABORT_SYM); state = MY_LEX_START; // Try again - lip->restore_in_comment_state(); + restore_in_comment_state(); break; case MY_LEX_END_LONG_COMMENT: - if ((lip->in_comment != NO_COMMENT) && lip->yyPeek() == '/') + if ((in_comment != NO_COMMENT) && yyPeek() == '/') { /* Reject '*' '/' */ - lip->yyUnget(); + yyUnget(); /* Accept '*' '/', with the proper echo */ - lip->set_echo(lip->in_comment == PRESERVE_COMMENT); - lip->yySkipn(2); + set_echo(in_comment == PRESERVE_COMMENT); + yySkipn(2); /* And start recording the tokens again */ - lip->set_echo(TRUE); - lip->in_comment=NO_COMMENT; + set_echo(TRUE); + in_comment= NO_COMMENT; state=MY_LEX_START; } else state= MY_LEX_CHAR; // Return '*' break; case MY_LEX_SET_VAR: // Check if ':=' - if (lip->yyPeek() != '=') + if (yyPeek() != '=') { state= MY_LEX_CHAR; // Return ':' break; } - lip->yySkip(); + yySkip(); return (SET_VAR); case MY_LEX_SEMICOLON: // optional line terminator state= MY_LEX_CHAR; // Return ';' break; case MY_LEX_EOL: - if (lip->eof()) + if (eof()) { - lip->yyUnget(); // Reject the last '\0' - lip->set_echo(FALSE); - lip->yySkip(); - lip->set_echo(TRUE); + yyUnget(); // Reject the last '\0' + set_echo(FALSE); + yySkip(); + set_echo(TRUE); /* Unbalanced comments with a missing '*' '/' are a syntax error */ - if (lip->in_comment != NO_COMMENT) + if (in_comment != NO_COMMENT) return (ABORT_SYM); - lip->next_state=MY_LEX_END; // Mark for next loop + next_state= MY_LEX_END; // Mark for next loop return(END_OF_INPUT); } state=MY_LEX_CHAR; break; case MY_LEX_END: - lip->next_state=MY_LEX_END; + next_state= MY_LEX_END; return(0); // We found end of input last time /* Actually real shouldn't start with . but allow them anyhow */ case MY_LEX_REAL_OR_POINT: - if (my_isdigit(cs,(c= lip->yyPeek()))) + if (my_isdigit(cs, (c= yyPeek()))) state = MY_LEX_REAL; // Real else if (c == '.') { - lip->yySkip(); + yySkip(); return DOT_DOT_SYM; } else { state= MY_LEX_IDENT_SEP; // return '.' - lip->yyUnget(); // Put back '.' + yyUnget(); // Put back '.' } break; case MY_LEX_USER_END: // end '@' of user@hostname - switch (state_map[(uchar) lip->yyPeek()]) { + switch (state_map[(uchar) yyPeek()]) { case MY_LEX_STRING: case MY_LEX_USER_VARIABLE_DELIMITER: case MY_LEX_STRING_OR_DELIMITER: break; case MY_LEX_USER_END: - lip->next_state= MY_LEX_SYSTEM_VAR; + next_state= MY_LEX_SYSTEM_VAR; break; default: - lip->next_state= MY_LEX_HOSTNAME; + next_state= MY_LEX_HOSTNAME; break; } - yylval->lex_str.str=(char*) lip->get_ptr(); - yylval->lex_str.length=1; + yylval->lex_str.str= (char*) get_ptr(); + yylval->lex_str.length= 1; return((int) '@'); case MY_LEX_HOSTNAME: // end '@' of user@hostname - for (c=lip->yyGet() ; + for (c= yyGet() ; my_isalnum(cs, c) || c == '.' || c == '_' || c == '$'; - c= lip->yyGet()) ; - yylval->lex_str=get_token(lip, 0, lip->yyLength()); + c= yyGet()) ; + yylval->lex_str= get_token(0, yyLength()); return(LEX_HOSTNAME); case MY_LEX_SYSTEM_VAR: - yylval->lex_str.str=(char*) lip->get_ptr(); - yylval->lex_str.length=1; - lip->yySkip(); // Skip '@' - lip->next_state= (state_map[(uchar) lip->yyPeek()] == + yylval->lex_str.str= (char*) get_ptr(); + yylval->lex_str.length= 1; + yySkip(); // Skip '@' + next_state= (state_map[(uchar) yyPeek()] == MY_LEX_USER_VARIABLE_DELIMITER ? MY_LEX_OPERATOR_OR_IDENT : MY_LEX_IDENT_OR_KEYWORD); @@ -1935,7 +1929,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) We should now be able to handle: [(global | local | session) .]variable_name */ - return lip->scan_ident_sysvar(thd, &yylval->ident_cli); + return scan_ident_sysvar(thd, &yylval->ident_cli); } } } @@ -1971,7 +1965,7 @@ int Lex_input_stream::scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str) next_state= MY_LEX_IDENT_SEP; if (!(length= yyLength())) return ABORT_SYM; // Names must be nonempty. - if ((tokval= find_keyword(this, length, 0))) + if ((tokval= find_keyword(str, length, 0))) { yyUnget(); // Put back 'c' return tokval; // Was keyword @@ -2098,7 +2092,7 @@ int Lex_input_stream::scan_ident_middle(THD *thd, Lex_ident_cli_st *str, { // '(' must follow directly if function int tokval; yyUnget(); - if ((tokval= find_keyword(this, length, c == '('))) + if ((tokval= find_keyword(str, length, c == '('))) { next_state= MY_LEX_START; // Allow signed numbers return(tokval); // Was keyword diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 13eb7ff12bb..43478c60146 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -2145,6 +2145,16 @@ public: void reset(char *buff, size_t length); + /** + The main method to scan the next token, with token contraction processing + for LALR(2) resolution, e.g. translate "WITH" followed by "ROLLUP" + to a single token WITH_ROLLUP_SYM. + */ + int lex_token(union YYSTYPE *yylval, THD *thd); + + void reduce_digest_token(uint token_left, uint token_right); + +private: /** Set the echo mode. @@ -2272,15 +2282,6 @@ public: return m_ptr; } - /** - End of file indicator for the query text to parse. - @return true if there are no more characters to parse - */ - bool eof() - { - return (m_ptr >= m_end_of_query); - } - /** End of file indicator for the query text to parse. @param n number of characters expected @@ -2291,24 +2292,6 @@ public: return ((m_ptr + n) >= m_end_of_query); } - /** Get the raw query buffer. */ - const char *get_buf() - { - return m_buf; - } - - /** Get the pre-processed query buffer. */ - const char *get_cpp_buf() - { - return m_cpp_buf; - } - - /** Get the end of the raw query buffer. */ - const char *get_end_of_query() - { - return m_end_of_query; - } - /** Mark the stream position as the start of a new token. */ void start_token() { @@ -2331,6 +2314,61 @@ public: m_cpp_tok_start= m_cpp_ptr; } + /** + Get the maximum length of the utf8-body buffer. + The utf8 body can grow because of the character set conversion and escaping. + */ + size_t get_body_utf8_maximum_length(THD *thd); + + /** Get the length of the current token, in the raw buffer. */ + uint yyLength() + { + /* + The assumption is that the lexical analyser is always 1 character ahead, + which the -1 account for. + */ + DBUG_ASSERT(m_ptr > m_tok_start); + return (uint) ((m_ptr - m_tok_start) - 1); + } + +public: + + /** + Test if a lookahead token was already scanned by lex_token(), + for LALR(2) resolution. + */ + bool has_lookahead() const + { + return lookahead_token >= 0; + } + + /** + End of file indicator for the query text to parse. + @return true if there are no more characters to parse + */ + bool eof() + { + return (m_ptr >= m_end_of_query); + } + + /** Get the raw query buffer. */ + const char *get_buf() + { + return m_buf; + } + + /** Get the pre-processed query buffer. */ + const char *get_cpp_buf() + { + return m_cpp_buf; + } + + /** Get the end of the raw query buffer. */ + const char *get_end_of_query() + { + return m_end_of_query; + } + /** Get the token start position, in the raw buffer. */ const char *get_tok_start() { @@ -2360,17 +2398,6 @@ public: return m_ptr; } - /** Get the length of the current token, in the raw buffer. */ - uint yyLength() - { - /* - The assumption is that the lexical analyser is always 1 character ahead, - which the -1 account for. - */ - DBUG_ASSERT(m_ptr > m_tok_start); - return (uint) ((m_ptr - m_tok_start) - 1); - } - /** Get the previus token start position, in the pre-processed buffer. */ const char *get_cpp_start_prev() { @@ -2434,12 +2461,6 @@ public: return (size_t) (m_body_utf8_ptr - m_body_utf8); } - /** - Get the maximum length of the utf8-body buffer. - The utf8 body can grow because of the character set conversion and escaping. - */ - size_t get_body_utf8_maximum_length(THD *thd); - void body_utf8_start(THD *thd, const char *begin_ptr); void body_utf8_append(const char *ptr); void body_utf8_append(const char *ptr, const char *end_ptr); @@ -2451,15 +2472,8 @@ public: CHARSET_INFO *txt_cs, const char *end_ptr, my_wc_t sep); - /** Current thread. */ - THD *m_thd; - - /** Current line number. */ - uint yylineno; - - /** Interface with bison, value of the last token parsed. */ - LEX_YYSTYPE yylval; +private: /** LALR(2) resolution, look ahead token. Value of the next token to return, if any, @@ -2476,15 +2490,20 @@ public: void add_digest_token(uint token, LEX_YYSTYPE yylval); - void reduce_digest_token(uint token_left, uint token_right); - + bool consume_comment(int remaining_recursions_permitted); + int lex_one_token(union YYSTYPE *yylval, THD *thd); + int find_keyword(Lex_ident_cli_st *str, uint len, bool function); + LEX_CSTRING get_token(uint skip, uint length); int scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str); int scan_ident_start(THD *thd, Lex_ident_cli_st *str); int scan_ident_middle(THD *thd, Lex_ident_cli_st *str, CHARSET_INFO **cs, my_lex_states *); int scan_ident_delimited(THD *thd, Lex_ident_cli_st *str); bool get_7bit_or_8bit_ident(THD *thd, uchar *last_char); -private: + + /** Current thread. */ + THD *m_thd; + /** Pointer to the current position in the raw input stream. */ char *m_ptr; @@ -2570,6 +2589,15 @@ public: */ bool multi_statements; + /** Current line number. */ + uint yylineno; + + /** + Current statement digest instrumentation. + */ + sql_digest_state* m_digest; + +private: /** State of the lexical analyser for comments. */ enum_comment_state in_comment; enum_comment_state in_comment_saved; @@ -2596,13 +2624,9 @@ public: NOTE: this member must be used within MYSQLlex() function only. */ CHARSET_INFO *m_underscore_cs; - - /** - Current statement digest instrumentation. - */ - sql_digest_state* m_digest; }; + /** Abstract representation of a statement. This class is an interface between the parser and the runtime. diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 8395135b00e..19af51f6a07 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -17365,7 +17365,7 @@ trigger_tail: FOR token is already passed through (see 'case FOR_SYM' in sql_lex.cc), so we use _prev() to get it back. */ - DBUG_ASSERT(YYLIP->lookahead_token >= 0); + DBUG_ASSERT(YYLIP->has_lookahead()); Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start_prev(); } EACH_SYM