From efe6fbd3fa97d4dcf22cdaa72561cc91421bcfe2 Mon Sep 17 00:00:00 2001 From: vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> Date: Mon, 1 Sep 2025 14:06:10 -0500 Subject: [PATCH 1/5] Update ComicPress logic This updates the logic to work across a variety of additional sites. Additionally, it grabs the author's comments from comics, such as the text on Buttersafe. This does not update the list of supported comics. There are too many comic sites to enumerate all of them anyway. --- .../filters/af_comics_comicpress.php | 91 ++++++++++--------- 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index 0afa19906..54ad515d7 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -9,59 +9,66 @@ class Af_Comics_ComicPress extends Af_ComicFilter { function process(&$article) { if (str_contains($article["guid"], "bunicomic.com") || - str_contains($article["guid"], "buttersafe.com") || - str_contains($article["guid"], "extrafabulouscomics.com") || - str_contains($article["guid"], "danbydraws.com") || - str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || - str_contains($article["guid"], "happyjar.com") || - str_contains($article["guid"], "nedroid.com") || - str_contains($article["guid"], "stonetoss.com") || - str_contains($article["guid"], "csectioncomics.com")) { + str_contains($article["guid"], "buttersafe.com") || + str_contains($article["guid"], "extrafabulouscomics.com") || + str_contains($article["guid"], "danbydraws.com") || + str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || + str_contains($article["guid"], "happyjar.com") || + str_contains($article["guid"], "nedroid.com") || + str_contains($article["guid"], "stonetoss.com") || + str_contains($article["guid"], "csectioncomics.com")) { - // lol at people who block clients by user agent - // oh noes my ad revenue Q_Q + $res = UrlHelper::fetch(["url" => $article["link"]]); - $res = UrlHelper::fetch(["url" => $article["link"], - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); + $doc = new DOMDocument(); - $doc = new DOMDocument(); + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + $img = $xpath->query('//div[@id="comic"]')->item(0); + $text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0); - if ($res && $doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0); + if ($img || $text) { + $article["content"] = ''; - if ($basenode) { - $article["content"] = $doc->saveHTML($basenode); - return true; + if ($img) { + $this->cleanup($xpath, $img); + $article["content"] .= $doc->saveHTML($img); } - /** @var DOMElement|null $webtoon_link (buni specific) */ - $webtoon_link = $xpath->query("//a[contains(@href,'www.webtoons.com')]")->item(0); - - if ($webtoon_link) { - - $res = UrlHelper::fetch(["url" => $webtoon_link->getAttribute("href"), - "useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]); - - if (@$doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $basenode = $xpath->query('//div[@id="_viewerBox"]')->item(0); - - if ($basenode) { - $imgs = $xpath->query("//img[@data-url]", $basenode); - - foreach ($imgs as $img) { - $img->setAttribute("src", $img->getAttribute("data-url")); - } - - $article["content"] = $doc->saveHTML($basenode); - return true; - } - } + if ($text) { + $this->cleanup($xpath, $text); + $article["content"] .= $doc->saveHTML($text); } + + return true; } + } } return false; } + + private function cleanup($xpath, $content_node) { + $toUpdates = $xpath->query('//img[@data-src]', $content_node); + $this->move_all_attributes($toUpdates, 'data-src', 'src'); + + $toUpdates = $xpath->query('//img[@data-srcset]', $content_node); + $this->move_all_attributes($toUpdates, 'data-srcset', 'srcset'); + + $toUpdates = $xpath->query('//img[@data-sizes]', $content_node); + $this->move_all_attributes($toUpdates, 'data-sizes', 'sizes'); + + $toRemoves = $xpath->query('.//*[contains(@class, "sharedaddy") or contains(@class, "relatedposts") or contains(@class, "donation_table") or contains(@class, "above-comic") or contains(@class, "oli_")]', $content_node); + foreach ($toRemoves as $toRemove) { + $toRemove->parentNode->removeChild($toRemove); + } + } + + private function move_all_attributes($toUpdates, $srcName, $dstName) { + foreach ($toUpdates as $toUpdate) { + $attributeValue = $toUpdate->getAttribute($srcName); + $toUpdate->setAttribute($dstName, $attributeValue); + $toUpdate->removeAttribute($srcName); + } + } } From 6c0bcd90ede37cffa1c354a1c61e3fffcd94abaf Mon Sep 17 00:00:00 2001 From: vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> Date: Mon, 1 Sep 2025 14:32:23 -0500 Subject: [PATCH 2/5] Try to add types --- plugins/af_comics/filters/af_comics_comicpress.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index 54ad515d7..3b61612fb 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -48,7 +48,7 @@ class Af_Comics_ComicPress extends Af_ComicFilter { return false; } - private function cleanup($xpath, $content_node) { + private function cleanup(DOMXPath $xpath, DOMNode $content_node): void { $toUpdates = $xpath->query('//img[@data-src]', $content_node); $this->move_all_attributes($toUpdates, 'data-src', 'src'); @@ -64,7 +64,7 @@ class Af_Comics_ComicPress extends Af_ComicFilter { } } - private function move_all_attributes($toUpdates, $srcName, $dstName) { + private function move_all_attributes(DOMNodeList $toUpdates, string $srcName, string $dstName): void { foreach ($toUpdates as $toUpdate) { $attributeValue = $toUpdate->getAttribute($srcName); $toUpdate->setAttribute($dstName, $attributeValue); From 17c6d7af8d718abe4cc3ea673e86c4f8f01f95e2 Mon Sep 17 00:00:00 2001 From: vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> Date: Mon, 1 Sep 2025 14:42:00 -0500 Subject: [PATCH 3/5] Hopefully fix PHPStan --- plugins/af_comics/filters/af_comics_comicpress.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index 3b61612fb..db65915bc 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -64,6 +64,9 @@ class Af_Comics_ComicPress extends Af_ComicFilter { } } + /** + * @param DOMNodeList $toUpdates + */ private function move_all_attributes(DOMNodeList $toUpdates, string $srcName, string $dstName): void { foreach ($toUpdates as $toUpdate) { $attributeValue = $toUpdate->getAttribute($srcName); From 17bd835530edc39e48096a206310cfdcc34072ef Mon Sep 17 00:00:00 2001 From: vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> Date: Tue, 2 Sep 2025 10:59:59 -0500 Subject: [PATCH 4/5] Revert whitespace changes --- .../filters/af_comics_comicpress.php | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index db65915bc..915c09641 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -9,40 +9,40 @@ class Af_Comics_ComicPress extends Af_ComicFilter { function process(&$article) { if (str_contains($article["guid"], "bunicomic.com") || - str_contains($article["guid"], "buttersafe.com") || - str_contains($article["guid"], "extrafabulouscomics.com") || - str_contains($article["guid"], "danbydraws.com") || - str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || - str_contains($article["guid"], "happyjar.com") || - str_contains($article["guid"], "nedroid.com") || - str_contains($article["guid"], "stonetoss.com") || - str_contains($article["guid"], "csectioncomics.com")) { + str_contains($article["guid"], "buttersafe.com") || + str_contains($article["guid"], "extrafabulouscomics.com") || + str_contains($article["guid"], "danbydraws.com") || + str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || + str_contains($article["guid"], "happyjar.com") || + str_contains($article["guid"], "nedroid.com") || + str_contains($article["guid"], "stonetoss.com") || + str_contains($article["guid"], "csectioncomics.com")) { - $res = UrlHelper::fetch(["url" => $article["link"]]); + $res = UrlHelper::fetch(["url" => $article["link"]]); - $doc = new DOMDocument(); + $doc = new DOMDocument(); - if ($res && $doc->loadHTML($res)) { - $xpath = new DOMXPath($doc); - $img = $xpath->query('//div[@id="comic"]')->item(0); - $text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0); + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + $img = $xpath->query('//div[@id="comic"]')->item(0); + $text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0); - if ($img || $text) { - $article["content"] = ''; + if ($img || $text) { + $article["content"] = ''; - if ($img) { - $this->cleanup($xpath, $img); - $article["content"] .= $doc->saveHTML($img); + if ($img) { + $this->cleanup($xpath, $img); + $article["content"] .= $doc->saveHTML($img); + } + + if ($text) { + $this->cleanup($xpath, $text); + $article["content"] .= $doc->saveHTML($text); + } + + return true; } - - if ($text) { - $this->cleanup($xpath, $text); - $article["content"] .= $doc->saveHTML($text); - } - - return true; } - } } return false; From c914d0710f052ef111243e3b7d3dffc89fef69c5 Mon Sep 17 00:00:00 2001 From: vjkcxl <349-vjkcxl@users.noreply.gitlab.tt-rss.org> Date: Tue, 2 Sep 2025 11:15:21 -0500 Subject: [PATCH 5/5] Fix Danby Draws --- .../filters/af_comics_comicpress.php | 3 +-- .../filters/af_comics_danbydraws.php | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 plugins/af_comics/filters/af_comics_danbydraws.php diff --git a/plugins/af_comics/filters/af_comics_comicpress.php b/plugins/af_comics/filters/af_comics_comicpress.php index 915c09641..f62d4889c 100755 --- a/plugins/af_comics/filters/af_comics_comicpress.php +++ b/plugins/af_comics/filters/af_comics_comicpress.php @@ -4,14 +4,13 @@ class Af_Comics_ComicPress extends Af_ComicFilter { function supported() { return array("Buni", "Buttersafe", "Happy Jar", "CSection", "Extra Fabulous Comics", "Nedroid", "Stonetoss", - "Danby Draws", "Powerup Comics"); + "Powerup Comics"); } function process(&$article) { if (str_contains($article["guid"], "bunicomic.com") || str_contains($article["guid"], "buttersafe.com") || str_contains($article["guid"], "extrafabulouscomics.com") || - str_contains($article["guid"], "danbydraws.com") || str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") || str_contains($article["guid"], "happyjar.com") || str_contains($article["guid"], "nedroid.com") || diff --git a/plugins/af_comics/filters/af_comics_danbydraws.php b/plugins/af_comics/filters/af_comics_danbydraws.php new file mode 100644 index 000000000..3ecc93180 --- /dev/null +++ b/plugins/af_comics/filters/af_comics_danbydraws.php @@ -0,0 +1,27 @@ + $article["link"]]); + + $doc = new DOMDocument(); + + if ($res && $doc->loadHTML($res)) { + $xpath = new DOMXPath($doc); + $basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0); + + if ($basenode) { + $article["content"] = $doc->saveHTML($basenode); + return true; + } + } + } + + return false; + } +}