add limited highlighting of filter test results based on matched rules

This commit is contained in:
Andrew Dolgov 2025-03-28 07:59:46 +03:00
parent 49766ab01f
commit 8986a3e7ee
No known key found for this signature in database
GPG Key ID: 1A56B4FA25D4AF2A
4 changed files with 97 additions and 31 deletions

View File

@ -176,11 +176,43 @@ class Pref_Filters extends Handler_Protected {
},
$entry, $excerpt_length);
$matches = [];
$content_preview = $entry["content_preview"];
$content_title = $entry["title"];
// is it even possible to have multiple matched rules here?
foreach ($matched_rules as $rule) {
$can_highlight_content = false;
$can_highlight_title = false;
$matches[] = $rule['regexp_matches'][0];
switch ($rule['type']) {
case "both":
$can_highlight_title = true;
$can_highlight_content = true;
break;
case "title":
$can_highlight_title = true;
break;
case "content":
$can_highlight_content = true;
break;
}
if ($can_highlight_content)
$content_preview = Sanitizer::highlight_words_str($content_preview, $matches);
if ($can_highlight_title)
$content_title = Sanitizer::highlight_words_str($content_title, $matches);
}
$rv['items'][] = [
'title' => $entry['title'],
'title' => $content_title,
'feed_title' => $entry['feed_title'],
'date' => mb_substr($entry['date_entered'], 0, 16),
'content_preview' => $entry['content_preview'],
'content_preview' => $content_preview,
'matched_rules' => $matched_rules,
];
}

View File

@ -59,6 +59,65 @@ class Sanitizer {
return parse_url(Config::get_self_url(), PHP_URL_SCHEME) == 'https';
}
/** @param array<string> $words */
public static function highlight_words_str(string $str, array $words) : string {
$doc = new DOMDocument();
if ($doc->loadHTML('<?xml encoding="UTF-8"><span>' . $str . '</span>')) {
$xpath = new DOMXPath($doc);
if (self::highlight_words($doc, $xpath, $words)) {
$res = $doc->saveHTML();
/* strip everything outside of <body>...</body> */
$res_frag = array();
if (preg_match('/<body>(.*)<\/body>/is', $res, $res_frag)) {
return $res_frag[1];
} else {
return $res;
}
}
}
return $str;
}
/** @param array<string> $words */
public static function highlight_words(DOMDocument &$doc, DOMXPath $xpath, array $words) : bool {
$rv = false;
foreach ($words as $word) {
// http://stackoverflow.com/questions/4081372/highlight-keywords-in-a-paragraph
$elements = $xpath->query("//*/text()");
foreach ($elements as $child) {
$fragment = $doc->createDocumentFragment();
$text = $child->textContent;
while (($pos = mb_stripos($text, $word)) !== false) {
$fragment->appendChild(new DOMText(mb_substr($text, 0, (int)$pos)));
$word = mb_substr($text, (int)$pos, mb_strlen($word));
$highlight = $doc->createElement('span');
$highlight->appendChild(new DOMText($word));
$highlight->setAttribute('class', 'highlight');
$fragment->appendChild($highlight);
$text = mb_substr($text, $pos + mb_strlen($word));
}
if (!empty($text)) $fragment->appendChild(new DOMText($text));
$child->parentNode->replaceChild($fragment, $child);
$rv = true;
}
}
return $rv;
}
/**
* @param array<int, string>|null $highlight_words Words to highlight in the HTML output.
*
@ -197,34 +256,8 @@ class Sanitizer {
$div->appendChild($entry);
}
if (is_array($highlight_words)) {
foreach ($highlight_words as $word) {
// http://stackoverflow.com/questions/4081372/highlight-keywords-in-a-paragraph
$elements = $xpath->query("//*/text()");
foreach ($elements as $child) {
$fragment = $doc->createDocumentFragment();
$text = $child->textContent;
while (($pos = mb_stripos($text, $word)) !== false) {
$fragment->appendChild(new DOMText(mb_substr($text, 0, (int)$pos)));
$word = mb_substr($text, (int)$pos, mb_strlen($word));
$highlight = $doc->createElement('span');
$highlight->appendChild(new DOMText($word));
$highlight->setAttribute('class', 'highlight');
$fragment->appendChild($highlight);
$text = mb_substr($text, $pos + mb_strlen($word));
}
if (!empty($text)) $fragment->appendChild(new DOMText($text));
$child->parentNode->replaceChild($fragment, $child);
}
}
}
if (is_array($highlight_words))
self::highlight_words($doc, $xpath, $highlight_words);
$res = $doc->saveHTML();

View File

@ -20,7 +20,7 @@ const Filters = {
PARAM_ACTIONS: [4, 6, 7, 9, 10],
filter_info: {},
formatMatchedRules: function(rules) {
return rules.map((r) => r.regexp_matches[0] + ' - ' + r.reg_exp).join('\n');
return rules.map((r) => r.regexp_matches[0] + ' - ' + r.reg_exp + ' (' + r.type + ')').join('\n');
},
test: function() {
const test_dialog = new fox.SingleUseDialog({

View File

@ -32,6 +32,7 @@ parameters:
- plugins.local/**/tests/*
- plugins.local/*/vendor/intervention/*
- plugins.local/*/vendor/psr/log/*
- plugins.local/af_readability/*
- plugins.local/cache_s3/vendor/*
- plugins/**/test/*
- plugins/**/Test/*