MINOR: lua: Add tokenize function.

For tokenizing a string, standard Lua recommends to use regexes.
The followinf example splits words:

   for i in string.gmatch(example, "%S+") do
      print(i)
   end

This is a little bit overkill for simply split words. This patch
adds a tokenize function which quick and do not use regexes.
This commit is contained in:
Thierry FOURNIER / OZON.IO 2016-11-24 20:48:38 +01:00 committed by Willy Tarreau
parent 7f3aa8b62f
commit 8a1027aa45
2 changed files with 82 additions and 0 deletions

View File

@ -645,6 +645,40 @@ Core class
Match two networks. For example "127.0.0.1/32" matchs "127.0.0.0/8". The order
of network is not important.
.. js:function:: core.tokenize(str, separators [, noblank])
**context**: body, init, task, action, sample-fetch, converter
This function is useful for tokenizing an entry, or splitting some messages.
:param string str: The string which will be split.
:param string separators: A string containing a list of separators.
:param boolean noblank: Ignore empty entries.
:returns: an array of string.
For example:
.. code-block:: lua
local array = core.tokenize("This function is useful, for tokenizing an entry.", "., ", true)
print_r(array)
..
Returns this array:
.. code-block:: text
(table) table: 0x21c01e0 [
1: (string) "This"
2: (string) "function"
3: (string) "is"
4: (string) "useful"
5: (string) "for"
6: (string) "tokenizing"
7: (string) "an"
8: (string) "entry"
]
..
.. _proxy_class:
Proxy class

View File

@ -915,6 +915,53 @@ int hlua_fcn_post_init(lua_State *L)
return 1;
}
/* This Lua function take a string, a list of separators.
* It tokenize the input string using the list of separators
* as separator.
*
* The functionreturns a tablle filled with tokens.
*/
int hlua_tokenize(lua_State *L)
{
const char *str;
const char *sep;
int index;
const char *token;
const char *p;
const char *c;
int ignore_empty;
ignore_empty = 0;
str = luaL_checkstring(L, 1);
sep = luaL_checkstring(L, 2);
if (lua_gettop(L) == 3)
ignore_empty = hlua_checkboolean(L, 3);
lua_newtable(L);
index = 1;
token = str;
p = str;
while(1) {
for (c = sep; *c != '\0'; c++)
if (*p == *c)
break;
if (*p == *c) {
if ((!ignore_empty) || (p - token > 0)) {
lua_pushlstring(L, token, p - token);
lua_rawseti(L, -2, index);
index++;
}
token = p + 1;
}
if (*p == '\0')
break;
p++;
}
return 1;
}
int hlua_parse_addr(lua_State *L)
{
struct hlua_addr *addr;
@ -1000,6 +1047,7 @@ int hlua_fcn_reg_core_fcn(lua_State *L)
hlua_class_function(L, "get_info", hlua_get_info);
hlua_class_function(L, "parse_addr", hlua_parse_addr);
hlua_class_function(L, "match_addr", hlua_match_addr);
hlua_class_function(L, "tokenize", hlua_tokenize);
/* Create listener object. */
lua_newtable(L);