mirror of
https://source.denx.de/u-boot/u-boot.git
synced 2025-12-19 16:31:27 +01:00
Merge patch series "regex patches"
Rasmus Villemoes <ravi@prevas.dk> says:
This started as a rather simple patch, 1/12, adding the ability to
more conveniently do regex matching in shell.
But with that, it became very easy to see what the slre library can
and especially what it cannot do, and that way I found both outright
bugs and a "wow, doesn't it support that syntax" gotcha. I couldn't
find any tests ('git grep slre -- test/' was empty), so I added a
small test suite and tweaked slre.c.
Link: https://lore.kernel.org/r/20250513084034.654865-1-ravi@prevas.dk
This commit is contained in:
commit
23be77e18d
@ -1628,6 +1628,13 @@ F: drivers/gpio/sl28cpld-gpio.c
|
||||
F: drivers/misc/sl28cpld.c
|
||||
F: drivers/watchdog/sl28cpld-wdt.c
|
||||
|
||||
SLRE
|
||||
M: Rasmus Villemoes <ravi@prevas.dk>
|
||||
S: Maintained
|
||||
F: include/slre.h
|
||||
F: lib/slre.c
|
||||
F: test/lib/slre.c
|
||||
|
||||
SMCCC TRNG
|
||||
M: Etienne Carriere <etienne.carriere@linaro.org>
|
||||
S: Maintained
|
||||
|
||||
19
cmd/test.c
19
cmd/test.c
@ -7,6 +7,7 @@
|
||||
#include <command.h>
|
||||
#include <fs.h>
|
||||
#include <log.h>
|
||||
#include <slre.h>
|
||||
#include <vsprintf.h>
|
||||
|
||||
#define OP_INVALID 0
|
||||
@ -26,6 +27,7 @@
|
||||
#define OP_INT_GT 14
|
||||
#define OP_INT_GE 15
|
||||
#define OP_FILE_EXISTS 16
|
||||
#define OP_REGEX 17
|
||||
|
||||
const struct {
|
||||
int arg;
|
||||
@ -49,6 +51,9 @@ const struct {
|
||||
{0, "-z", OP_STR_EMPTY, 2},
|
||||
{0, "-n", OP_STR_NEMPTY, 2},
|
||||
{0, "-e", OP_FILE_EXISTS, 4},
|
||||
#ifdef CONFIG_REGEX
|
||||
{1, "=~", OP_REGEX, 3},
|
||||
#endif
|
||||
};
|
||||
|
||||
static int do_test(struct cmd_tbl *cmdtp, int flag, int argc,
|
||||
@ -141,6 +146,20 @@ static int do_test(struct cmd_tbl *cmdtp, int flag, int argc,
|
||||
case OP_FILE_EXISTS:
|
||||
expr = file_exists(ap[1], ap[2], ap[3], FS_TYPE_ANY);
|
||||
break;
|
||||
#ifdef CONFIG_REGEX
|
||||
case OP_REGEX: {
|
||||
struct slre slre;
|
||||
|
||||
if (slre_compile(&slre, ap[2]) == 0) {
|
||||
printf("Error compiling regex: %s\n", slre.err_str);
|
||||
expr = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
expr = slre_match(&slre, ap[0], strlen(ap[0]), NULL);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
|
||||
@ -144,8 +144,9 @@ Configuration
|
||||
|
||||
* The *setexpr* command is only available if CMD_SETEXPR=y.
|
||||
* The *setexpr fmt* sub-command is only available if CMD_SETEXPR_FMT=y.
|
||||
* The *setexpr gsub* and *setexpr sub* sub-commands are only available if
|
||||
CONFIG_REGEX=y.
|
||||
* The *setexpr gsub* and *setexpr sub* sub-commands are only available
|
||||
if CONFIG_REGEX=y. For an overview of the supported regex syntax,
|
||||
see :doc:`test`.
|
||||
|
||||
Return value
|
||||
------------
|
||||
|
||||
102
doc/usage/cmd/test.rst
Normal file
102
doc/usage/cmd/test.rst
Normal file
@ -0,0 +1,102 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
.. index::
|
||||
single: test (command)
|
||||
|
||||
test command
|
||||
============
|
||||
|
||||
Synopsis
|
||||
--------
|
||||
|
||||
::
|
||||
|
||||
test <str-op> <s>
|
||||
test <s1> <str-cmp> <s2>
|
||||
test <n1> <num-cmp> <n2>
|
||||
test ! <expr>
|
||||
test <expr1> -o <expr2>
|
||||
test <expr1> -a <expr2>
|
||||
test -e <interface> <dev[:part]> <path>
|
||||
test <s> =~ <re>
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The ``test`` command is similar to the ordinary shell built-in by the
|
||||
same name. Unlike in ordinary shells, it cannot be spelled ``[``.
|
||||
|
||||
Strings
|
||||
~~~~~~~
|
||||
|
||||
The string tests ``-n`` and ``-z``, and string comparison operators
|
||||
``=``, ``!=``, ``<`` and ``>``, work exactly as in ordinary shells.
|
||||
|
||||
Numbers
|
||||
~~~~~~~
|
||||
|
||||
The number comparison operators ``-lt``, ``-le``, ``-gt``, ``-gt``,
|
||||
``-eq`` and ``-ne`` work as in ordinary shells.
|
||||
|
||||
.. note::
|
||||
Numbers are parsed with ``simple_strtol(, 0)``, meaning that they
|
||||
are treated as decimal unless there is a `0x` prefix, any errors in
|
||||
parsing are ignored, and parsing stops as soon as a non-digit (for
|
||||
the selected base) is encountered. And most U-Boot commands that
|
||||
generate "numeric" environment variables store them as hexadecimal
|
||||
*without* a `0x` prefix.
|
||||
|
||||
For example, this is not a correct way of testing whether a given file
|
||||
has a size less than 4KiB::
|
||||
|
||||
# Assuming readme.txt exists, sets 'filesize' environment variable
|
||||
$ size mmc 0:1 readme.txt
|
||||
$ if test "$filesize" -lt 4096 ; then ...
|
||||
|
||||
If the file size is actually 8000 (decimal), its hexadecimal
|
||||
representation, and thus the value of ``$filesize``, is ``1f40``, so
|
||||
the comparison that is done ends up being "1 < 4096".
|
||||
|
||||
Logic
|
||||
~~~~~
|
||||
|
||||
The ``!`` operator negates the sense of the test of the expression
|
||||
``<expr>``.
|
||||
|
||||
The ``-o`` and ``-a`` operators perform logical OR and logical AND,
|
||||
respectively, of the two expressions.
|
||||
|
||||
File existence
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Like ordinary shells, the ``-e`` operator can be used to test for
|
||||
existence of a file. However, the U-Boot version takes three
|
||||
arguments:
|
||||
|
||||
- The interface (e.g. ``mmc``).
|
||||
- The device number, possibly including a partition specification.
|
||||
- The usual path argument, which is interpreted relative to the root
|
||||
of the filesystem.
|
||||
|
||||
Regular expressions
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
When ``CONFIG_REGEX`` is enabled, an additional operator ``=~`` is
|
||||
available. This is similar to the same operator available with bash's
|
||||
extended test command ``[[ ]]``. The left operand is a string which is
|
||||
matched against the regular expression described by the right operand.
|
||||
|
||||
The regular expression engine supports these features:
|
||||
|
||||
- Anchoring ``^`` and ``$``, matching at the beginning/end of the
|
||||
string.
|
||||
- Matching any single character (including whitespace) using ``.``.
|
||||
- Character classes ``[ ]``, including ranges ``[0-9]`` and negation
|
||||
``[^ /.]``.
|
||||
- Grouping ``( )``.
|
||||
- Alternation ``|``.
|
||||
- Postfix qualifiers ``*``, ``+`` and ``?`` and their non-greedy
|
||||
variants ``*?``, ``+?`` and ``??``
|
||||
|
||||
For extracting the parts matching a capture group and/or performing
|
||||
substitutions, including back references, see :doc:`setexpr`.
|
||||
@ -123,6 +123,7 @@ Shell commands
|
||||
cmd/source
|
||||
cmd/tcpm
|
||||
cmd/temperature
|
||||
cmd/test
|
||||
cmd/tftpput
|
||||
cmd/trace
|
||||
cmd/true
|
||||
|
||||
@ -63,7 +63,6 @@ struct slre {
|
||||
int code_size;
|
||||
int data_size;
|
||||
int num_caps; /* Number of bracket pairs */
|
||||
int anchored; /* Must match from string start */
|
||||
const char *err_str; /* Error string */
|
||||
};
|
||||
|
||||
|
||||
78
lib/slre.c
78
lib/slre.c
@ -30,7 +30,7 @@
|
||||
#include <slre.h>
|
||||
|
||||
enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL,
|
||||
STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT};
|
||||
STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT, RANGE};
|
||||
|
||||
#ifdef SLRE_TEST
|
||||
static struct {
|
||||
@ -55,7 +55,8 @@ static struct {
|
||||
{"QUEST", 1, "o"}, /* Match zero or one time, "?" */
|
||||
{"SPACE", 0, ""}, /* Match whitespace, "\s" */
|
||||
{"NONSPACE", 0, ""}, /* Match non-space, "\S" */
|
||||
{"DIGIT", 0, ""} /* Match digit, "\d" */
|
||||
{"DIGIT", 0, ""}, /* Match digit, "\d" */
|
||||
{"RANGE", 0, ""}, /* Range separator - */
|
||||
};
|
||||
#endif /* SLRE_TEST */
|
||||
|
||||
@ -260,6 +261,15 @@ anyof(struct slre *r, const char **re)
|
||||
return;
|
||||
/* NOTREACHED */
|
||||
break;
|
||||
case '-':
|
||||
if (r->data_size == old_data_size || **re == ']') {
|
||||
/* First or last character, just match - itself. */
|
||||
store_char_in_data(r, '-');
|
||||
break;
|
||||
}
|
||||
store_char_in_data(r, 0);
|
||||
store_char_in_data(r, RANGE);
|
||||
break;
|
||||
case '\\':
|
||||
esc = get_escape_char(re);
|
||||
if ((esc & 0xff) == 0) {
|
||||
@ -413,10 +423,7 @@ int
|
||||
slre_compile(struct slre *r, const char *re)
|
||||
{
|
||||
r->err_str = NULL;
|
||||
r->code_size = r->data_size = r->num_caps = r->anchored = 0;
|
||||
|
||||
if (*re == '^')
|
||||
r->anchored++;
|
||||
r->code_size = r->data_size = r->num_caps = 0;
|
||||
|
||||
emit(r, OPEN); /* This will capture what matches full RE */
|
||||
emit(r, 0);
|
||||
@ -475,29 +482,54 @@ is_any_of(const unsigned char *p, int len, const char *s, int *ofs)
|
||||
|
||||
ch = s[*ofs];
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
if (p[i] == ch) {
|
||||
(*ofs)++;
|
||||
return 1;
|
||||
for (i = 0; i < len; i++) {
|
||||
if (p[i] == '\0') {
|
||||
switch (p[++i]) {
|
||||
case NONSPACE:
|
||||
if (!isspace(ch))
|
||||
goto match;
|
||||
break;
|
||||
case SPACE:
|
||||
if (isspace(ch))
|
||||
goto match;
|
||||
break;
|
||||
case DIGIT:
|
||||
if (isdigit(ch))
|
||||
goto match;
|
||||
break;
|
||||
case RANGE:
|
||||
/*
|
||||
* a-z is represented in the data array as {'a', \0, RANGE, 'z'}
|
||||
*/
|
||||
++i;
|
||||
if (p[i - 3] <= (unsigned char)ch && (unsigned char)ch <= p[i])
|
||||
goto match;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (p[i] == ch)
|
||||
goto match;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
match:
|
||||
(*ofs)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
is_any_but(const unsigned char *p, int len, const char *s, int *ofs)
|
||||
{
|
||||
int i, ch;
|
||||
int dummy = *ofs;
|
||||
|
||||
ch = s[*ofs];
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (p[i] == ch)
|
||||
return 0;
|
||||
if (is_any_of(p, len, s, &dummy)) {
|
||||
return 0;
|
||||
} else {
|
||||
(*ofs)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
(*ofs)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -650,13 +682,9 @@ slre_match(const struct slre *r, const char *buf, int len,
|
||||
{
|
||||
int i, ofs = 0, res = 0;
|
||||
|
||||
if (r->anchored) {
|
||||
for (i = 0; i <= len && res == 0; i++) {
|
||||
ofs = i;
|
||||
res = match(r, 0, buf, len, &ofs, caps);
|
||||
} else {
|
||||
for (i = 0; i < len && res == 0; i++) {
|
||||
ofs = i;
|
||||
res = match(r, 0, buf, len, &ofs, caps);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
|
||||
@ -29,6 +29,7 @@ obj-$(CONFIG_SHA256) += test_sha256_hmac.o
|
||||
obj-$(CONFIG_HKDF_MBEDTLS) += test_sha256_hkdf.o
|
||||
obj-$(CONFIG_GETOPT) += getopt.o
|
||||
obj-$(CONFIG_CRC8) += test_crc8.o
|
||||
obj-$(CONFIG_REGEX) += slre.o
|
||||
obj-$(CONFIG_UT_LIB_CRYPT) += test_crypt.o
|
||||
obj-$(CONFIG_UT_TIME) += time.o
|
||||
obj-$(CONFIG_$(PHASE_)UT_UNICODE) += unicode.o
|
||||
|
||||
58
test/lib/slre.c
Normal file
58
test/lib/slre.c
Normal file
@ -0,0 +1,58 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
|
||||
#include <test/lib.h>
|
||||
#include <test/ut.h>
|
||||
#include <slre.h>
|
||||
|
||||
struct re_test {
|
||||
const char *str;
|
||||
const char *re;
|
||||
int match;
|
||||
};
|
||||
|
||||
static const struct re_test re_test[] = {
|
||||
{ "123", "^\\d+$", 1},
|
||||
{ "x23", "^\\d+$", 0},
|
||||
{ "banana", "^([bn]a)*$", 1},
|
||||
{ "panama", "^([bn]a)*$", 0},
|
||||
{ "xby", "^a|b", 1},
|
||||
{ "xby", "b|^a", 1},
|
||||
{ "xby", "b|c$", 1},
|
||||
{ "xby", "c$|b", 1},
|
||||
{ "", "x*$", 1},
|
||||
{ "", "^x*$", 1},
|
||||
{ "yy", "x*$", 1},
|
||||
{ "yy", "^x*$", 0},
|
||||
{ "Gadsby", "^[^eE]*$", 1},
|
||||
{ "Ernest", "^[^eE]*$", 0},
|
||||
{ "6d41f0a39d6", "^[0123456789abcdef]*$", 1 },
|
||||
/* DIGIT is 17 */
|
||||
{ "##\x11%%\x11", "^[#%\\d]*$", 0 },
|
||||
{ "##23%%45", "^[#%\\d]*$", 1 },
|
||||
{ "U-Boot", "^[B-Uo-t]*$", 0 },
|
||||
{ "U-Boot", "^[A-Zm-v-]*$", 1 },
|
||||
{ "U-Boot", "^[-A-Za-z]*$", 1 },
|
||||
/* The range --C covers both - and B. */
|
||||
{ "U-Boot", "^[--CUot]*$", 1 },
|
||||
{ "U-Boot", "^[^0-9]*$", 1 },
|
||||
{ "U-Boot", "^[^0-9<->]*$", 1 },
|
||||
{ "U-Boot", "^[^0-9<\\->]*$", 0 },
|
||||
{}
|
||||
};
|
||||
|
||||
static int lib_slre(struct unit_test_state *uts)
|
||||
{
|
||||
const struct re_test *t;
|
||||
|
||||
for (t = re_test; t->str; t++) {
|
||||
struct slre slre;
|
||||
|
||||
ut_assert(slre_compile(&slre, t->re));
|
||||
ut_assertf(!!slre_match(&slre, t->str, strlen(t->str), NULL) == t->match,
|
||||
"'%s' unexpectedly %s '%s'\n", t->str,
|
||||
t->match ? "didn't match" : "matched", t->re);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
LIB_TEST(lib_slre, 0);
|
||||
Loading…
x
Reference in New Issue
Block a user