Merge patch series "regex patches"

Rasmus Villemoes <ravi@prevas.dk> says:

This started as a rather simple patch, 1/12, adding the ability to
more conveniently do regex matching in shell.

But with that, it became very easy to see what the slre library can
and especially what it cannot do, and that way I found both outright
bugs and a "wow, doesn't it support that syntax" gotcha. I couldn't
find any tests ('git grep slre -- test/' was empty), so I added a
small test suite and tweaked slre.c.

Link: https://lore.kernel.org/r/20250513084034.654865-1-ravi@prevas.dk
This commit is contained in:
Tom Rini 2025-05-29 08:27:13 -06:00
commit 23be77e18d
9 changed files with 244 additions and 28 deletions

View File

@ -1628,6 +1628,13 @@ F: drivers/gpio/sl28cpld-gpio.c
F: drivers/misc/sl28cpld.c F: drivers/misc/sl28cpld.c
F: drivers/watchdog/sl28cpld-wdt.c F: drivers/watchdog/sl28cpld-wdt.c
SLRE
M: Rasmus Villemoes <ravi@prevas.dk>
S: Maintained
F: include/slre.h
F: lib/slre.c
F: test/lib/slre.c
SMCCC TRNG SMCCC TRNG
M: Etienne Carriere <etienne.carriere@linaro.org> M: Etienne Carriere <etienne.carriere@linaro.org>
S: Maintained S: Maintained

View File

@ -7,6 +7,7 @@
#include <command.h> #include <command.h>
#include <fs.h> #include <fs.h>
#include <log.h> #include <log.h>
#include <slre.h>
#include <vsprintf.h> #include <vsprintf.h>
#define OP_INVALID 0 #define OP_INVALID 0
@ -26,6 +27,7 @@
#define OP_INT_GT 14 #define OP_INT_GT 14
#define OP_INT_GE 15 #define OP_INT_GE 15
#define OP_FILE_EXISTS 16 #define OP_FILE_EXISTS 16
#define OP_REGEX 17
const struct { const struct {
int arg; int arg;
@ -49,6 +51,9 @@ const struct {
{0, "-z", OP_STR_EMPTY, 2}, {0, "-z", OP_STR_EMPTY, 2},
{0, "-n", OP_STR_NEMPTY, 2}, {0, "-n", OP_STR_NEMPTY, 2},
{0, "-e", OP_FILE_EXISTS, 4}, {0, "-e", OP_FILE_EXISTS, 4},
#ifdef CONFIG_REGEX
{1, "=~", OP_REGEX, 3},
#endif
}; };
static int do_test(struct cmd_tbl *cmdtp, int flag, int argc, static int do_test(struct cmd_tbl *cmdtp, int flag, int argc,
@ -141,6 +146,20 @@ static int do_test(struct cmd_tbl *cmdtp, int flag, int argc,
case OP_FILE_EXISTS: case OP_FILE_EXISTS:
expr = file_exists(ap[1], ap[2], ap[3], FS_TYPE_ANY); expr = file_exists(ap[1], ap[2], ap[3], FS_TYPE_ANY);
break; break;
#ifdef CONFIG_REGEX
case OP_REGEX: {
struct slre slre;
if (slre_compile(&slre, ap[2]) == 0) {
printf("Error compiling regex: %s\n", slre.err_str);
expr = 0;
break;
}
expr = slre_match(&slre, ap[0], strlen(ap[0]), NULL);
break;
}
#endif
} }
switch (op) { switch (op) {

View File

@ -144,8 +144,9 @@ Configuration
* The *setexpr* command is only available if CMD_SETEXPR=y. * The *setexpr* command is only available if CMD_SETEXPR=y.
* The *setexpr fmt* sub-command is only available if CMD_SETEXPR_FMT=y. * The *setexpr fmt* sub-command is only available if CMD_SETEXPR_FMT=y.
* The *setexpr gsub* and *setexpr sub* sub-commands are only available if * The *setexpr gsub* and *setexpr sub* sub-commands are only available
CONFIG_REGEX=y. if CONFIG_REGEX=y. For an overview of the supported regex syntax,
see :doc:`test`.
Return value Return value
------------ ------------

102
doc/usage/cmd/test.rst Normal file
View File

@ -0,0 +1,102 @@
.. SPDX-License-Identifier: GPL-2.0-or-later
.. index::
single: test (command)
test command
============
Synopsis
--------
::
test <str-op> <s>
test <s1> <str-cmp> <s2>
test <n1> <num-cmp> <n2>
test ! <expr>
test <expr1> -o <expr2>
test <expr1> -a <expr2>
test -e <interface> <dev[:part]> <path>
test <s> =~ <re>
Description
-----------
The ``test`` command is similar to the ordinary shell built-in by the
same name. Unlike in ordinary shells, it cannot be spelled ``[``.
Strings
~~~~~~~
The string tests ``-n`` and ``-z``, and string comparison operators
``=``, ``!=``, ``<`` and ``>``, work exactly as in ordinary shells.
Numbers
~~~~~~~
The number comparison operators ``-lt``, ``-le``, ``-gt``, ``-gt``,
``-eq`` and ``-ne`` work as in ordinary shells.
.. note::
Numbers are parsed with ``simple_strtol(, 0)``, meaning that they
are treated as decimal unless there is a `0x` prefix, any errors in
parsing are ignored, and parsing stops as soon as a non-digit (for
the selected base) is encountered. And most U-Boot commands that
generate "numeric" environment variables store them as hexadecimal
*without* a `0x` prefix.
For example, this is not a correct way of testing whether a given file
has a size less than 4KiB::
# Assuming readme.txt exists, sets 'filesize' environment variable
$ size mmc 0:1 readme.txt
$ if test "$filesize" -lt 4096 ; then ...
If the file size is actually 8000 (decimal), its hexadecimal
representation, and thus the value of ``$filesize``, is ``1f40``, so
the comparison that is done ends up being "1 < 4096".
Logic
~~~~~
The ``!`` operator negates the sense of the test of the expression
``<expr>``.
The ``-o`` and ``-a`` operators perform logical OR and logical AND,
respectively, of the two expressions.
File existence
~~~~~~~~~~~~~~
Like ordinary shells, the ``-e`` operator can be used to test for
existence of a file. However, the U-Boot version takes three
arguments:
- The interface (e.g. ``mmc``).
- The device number, possibly including a partition specification.
- The usual path argument, which is interpreted relative to the root
of the filesystem.
Regular expressions
~~~~~~~~~~~~~~~~~~~
When ``CONFIG_REGEX`` is enabled, an additional operator ``=~`` is
available. This is similar to the same operator available with bash's
extended test command ``[[ ]]``. The left operand is a string which is
matched against the regular expression described by the right operand.
The regular expression engine supports these features:
- Anchoring ``^`` and ``$``, matching at the beginning/end of the
string.
- Matching any single character (including whitespace) using ``.``.
- Character classes ``[ ]``, including ranges ``[0-9]`` and negation
``[^ /.]``.
- Grouping ``( )``.
- Alternation ``|``.
- Postfix qualifiers ``*``, ``+`` and ``?`` and their non-greedy
variants ``*?``, ``+?`` and ``??``
For extracting the parts matching a capture group and/or performing
substitutions, including back references, see :doc:`setexpr`.

View File

@ -123,6 +123,7 @@ Shell commands
cmd/source cmd/source
cmd/tcpm cmd/tcpm
cmd/temperature cmd/temperature
cmd/test
cmd/tftpput cmd/tftpput
cmd/trace cmd/trace
cmd/true cmd/true

View File

@ -63,7 +63,6 @@ struct slre {
int code_size; int code_size;
int data_size; int data_size;
int num_caps; /* Number of bracket pairs */ int num_caps; /* Number of bracket pairs */
int anchored; /* Must match from string start */
const char *err_str; /* Error string */ const char *err_str; /* Error string */
}; };

View File

@ -30,7 +30,7 @@
#include <slre.h> #include <slre.h>
enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL,
STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT}; STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT, RANGE};
#ifdef SLRE_TEST #ifdef SLRE_TEST
static struct { static struct {
@ -55,7 +55,8 @@ static struct {
{"QUEST", 1, "o"}, /* Match zero or one time, "?" */ {"QUEST", 1, "o"}, /* Match zero or one time, "?" */
{"SPACE", 0, ""}, /* Match whitespace, "\s" */ {"SPACE", 0, ""}, /* Match whitespace, "\s" */
{"NONSPACE", 0, ""}, /* Match non-space, "\S" */ {"NONSPACE", 0, ""}, /* Match non-space, "\S" */
{"DIGIT", 0, ""} /* Match digit, "\d" */ {"DIGIT", 0, ""}, /* Match digit, "\d" */
{"RANGE", 0, ""}, /* Range separator - */
}; };
#endif /* SLRE_TEST */ #endif /* SLRE_TEST */
@ -260,6 +261,15 @@ anyof(struct slre *r, const char **re)
return; return;
/* NOTREACHED */ /* NOTREACHED */
break; break;
case '-':
if (r->data_size == old_data_size || **re == ']') {
/* First or last character, just match - itself. */
store_char_in_data(r, '-');
break;
}
store_char_in_data(r, 0);
store_char_in_data(r, RANGE);
break;
case '\\': case '\\':
esc = get_escape_char(re); esc = get_escape_char(re);
if ((esc & 0xff) == 0) { if ((esc & 0xff) == 0) {
@ -413,10 +423,7 @@ int
slre_compile(struct slre *r, const char *re) slre_compile(struct slre *r, const char *re)
{ {
r->err_str = NULL; r->err_str = NULL;
r->code_size = r->data_size = r->num_caps = r->anchored = 0; r->code_size = r->data_size = r->num_caps = 0;
if (*re == '^')
r->anchored++;
emit(r, OPEN); /* This will capture what matches full RE */ emit(r, OPEN); /* This will capture what matches full RE */
emit(r, 0); emit(r, 0);
@ -475,29 +482,54 @@ is_any_of(const unsigned char *p, int len, const char *s, int *ofs)
ch = s[*ofs]; ch = s[*ofs];
for (i = 0; i < len; i++) for (i = 0; i < len; i++) {
if (p[i] == ch) { if (p[i] == '\0') {
(*ofs)++; switch (p[++i]) {
return 1; case NONSPACE:
if (!isspace(ch))
goto match;
break;
case SPACE:
if (isspace(ch))
goto match;
break;
case DIGIT:
if (isdigit(ch))
goto match;
break;
case RANGE:
/*
* a-z is represented in the data array as {'a', \0, RANGE, 'z'}
*/
++i;
if (p[i - 3] <= (unsigned char)ch && (unsigned char)ch <= p[i])
goto match;
break;
}
continue;
} }
if (p[i] == ch)
goto match;
}
return 0; return 0;
match:
(*ofs)++;
return 1;
} }
static int static int
is_any_but(const unsigned char *p, int len, const char *s, int *ofs) is_any_but(const unsigned char *p, int len, const char *s, int *ofs)
{ {
int i, ch; int dummy = *ofs;
ch = s[*ofs]; if (is_any_of(p, len, s, &dummy)) {
return 0;
for (i = 0; i < len; i++) { } else {
if (p[i] == ch) (*ofs)++;
return 0; return 1;
} }
(*ofs)++;
return 1;
} }
static int static int
@ -650,13 +682,9 @@ slre_match(const struct slre *r, const char *buf, int len,
{ {
int i, ofs = 0, res = 0; int i, ofs = 0, res = 0;
if (r->anchored) { for (i = 0; i <= len && res == 0; i++) {
ofs = i;
res = match(r, 0, buf, len, &ofs, caps); res = match(r, 0, buf, len, &ofs, caps);
} else {
for (i = 0; i < len && res == 0; i++) {
ofs = i;
res = match(r, 0, buf, len, &ofs, caps);
}
} }
return res; return res;

View File

@ -29,6 +29,7 @@ obj-$(CONFIG_SHA256) += test_sha256_hmac.o
obj-$(CONFIG_HKDF_MBEDTLS) += test_sha256_hkdf.o obj-$(CONFIG_HKDF_MBEDTLS) += test_sha256_hkdf.o
obj-$(CONFIG_GETOPT) += getopt.o obj-$(CONFIG_GETOPT) += getopt.o
obj-$(CONFIG_CRC8) += test_crc8.o obj-$(CONFIG_CRC8) += test_crc8.o
obj-$(CONFIG_REGEX) += slre.o
obj-$(CONFIG_UT_LIB_CRYPT) += test_crypt.o obj-$(CONFIG_UT_LIB_CRYPT) += test_crypt.o
obj-$(CONFIG_UT_TIME) += time.o obj-$(CONFIG_UT_TIME) += time.o
obj-$(CONFIG_$(PHASE_)UT_UNICODE) += unicode.o obj-$(CONFIG_$(PHASE_)UT_UNICODE) += unicode.o

58
test/lib/slre.c Normal file
View File

@ -0,0 +1,58 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
#include <test/lib.h>
#include <test/ut.h>
#include <slre.h>
struct re_test {
const char *str;
const char *re;
int match;
};
static const struct re_test re_test[] = {
{ "123", "^\\d+$", 1},
{ "x23", "^\\d+$", 0},
{ "banana", "^([bn]a)*$", 1},
{ "panama", "^([bn]a)*$", 0},
{ "xby", "^a|b", 1},
{ "xby", "b|^a", 1},
{ "xby", "b|c$", 1},
{ "xby", "c$|b", 1},
{ "", "x*$", 1},
{ "", "^x*$", 1},
{ "yy", "x*$", 1},
{ "yy", "^x*$", 0},
{ "Gadsby", "^[^eE]*$", 1},
{ "Ernest", "^[^eE]*$", 0},
{ "6d41f0a39d6", "^[0123456789abcdef]*$", 1 },
/* DIGIT is 17 */
{ "##\x11%%\x11", "^[#%\\d]*$", 0 },
{ "##23%%45", "^[#%\\d]*$", 1 },
{ "U-Boot", "^[B-Uo-t]*$", 0 },
{ "U-Boot", "^[A-Zm-v-]*$", 1 },
{ "U-Boot", "^[-A-Za-z]*$", 1 },
/* The range --C covers both - and B. */
{ "U-Boot", "^[--CUot]*$", 1 },
{ "U-Boot", "^[^0-9]*$", 1 },
{ "U-Boot", "^[^0-9<->]*$", 1 },
{ "U-Boot", "^[^0-9<\\->]*$", 0 },
{}
};
static int lib_slre(struct unit_test_state *uts)
{
const struct re_test *t;
for (t = re_test; t->str; t++) {
struct slre slre;
ut_assert(slre_compile(&slre, t->re));
ut_assertf(!!slre_match(&slre, t->str, strlen(t->str), NULL) == t->match,
"'%s' unexpectedly %s '%s'\n", t->str,
t->match ? "didn't match" : "matched", t->re);
}
return 0;
}
LIB_TEST(lib_slre, 0);