Merge branch 'update-comicpress' into 'master'

Update ComicPress logic See merge request tt-rss/tt-rss!183
2025-09-20 21:11:00 +02:00 · 2025-09-02 21:24:44 +03:00 · 2025-09-02 21:24:44 +03:00 · be3ee920b1
commit be3ee920b1
parent 98dbf49733 c914d0710f
2 changed files with 70 additions and 34 deletions
--- a/plugins/af_comics/filters/af_comics_comicpress.php
+++ b/plugins/af_comics/filters/af_comics_comicpress.php
@ -4,64 +4,73 @@ class Af_Comics_ComicPress extends Af_ComicFilter {
 	function supported() {
 		return array("Buni", "Buttersafe", "Happy Jar", "CSection",
 			"Extra Fabulous Comics", "Nedroid", "Stonetoss",
-			"Danby Draws", "Powerup Comics");
+			"Powerup Comics");
 	}

 	function process(&$article) {
 		if (str_contains($article["guid"], "bunicomic.com") ||
 				str_contains($article["guid"], "buttersafe.com") ||
 				str_contains($article["guid"], "extrafabulouscomics.com") ||
-				str_contains($article["guid"], "danbydraws.com") ||
 				str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") ||
 				str_contains($article["guid"], "happyjar.com") ||
 				str_contains($article["guid"], "nedroid.com") ||
 				str_contains($article["guid"], "stonetoss.com") ||
 				str_contains($article["guid"], "csectioncomics.com")) {

-				// lol at people who block clients by user agent
-				// oh noes my ad revenue Q_Q
-
-				$res = UrlHelper::fetch(["url" => $article["link"],
-					"useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]);
+				$res = UrlHelper::fetch(["url" => $article["link"]]);

 				$doc = new DOMDocument();

 				if ($res && $doc->loadHTML($res)) {
 					$xpath = new DOMXPath($doc);
-					$basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0);
+					$img = $xpath->query('//div[@id="comic"]')->item(0);
+					$text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0);

-					if ($basenode) {
-						$article["content"] = $doc->saveHTML($basenode);
-						return true;
-					}
+					if ($img || $text) {
+						$article["content"] = '';

-					/** @var DOMElement|null $webtoon_link (buni specific) */
-					$webtoon_link = $xpath->query("//a[contains(@href,'www.webtoons.com')]")->item(0);
-
-					if ($webtoon_link) {
-
-						$res = UrlHelper::fetch(["url" => $webtoon_link->getAttribute("href"),
-							"useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]);
-
-						if (@$doc->loadHTML($res)) {
-							$xpath = new DOMXPath($doc);
-							$basenode = $xpath->query('//div[@id="_viewerBox"]')->item(0);
-
-							if ($basenode) {
-								$imgs = $xpath->query("//img[@data-url]", $basenode);
-
-								foreach ($imgs as $img) {
-									$img->setAttribute("src", $img->getAttribute("data-url"));
-								}
-
-								$article["content"] = $doc->saveHTML($basenode);
-								return true;
-							}
+						if ($img) {
+							$this->cleanup($xpath, $img);
+							$article["content"] .= $doc->saveHTML($img);
 						}
+
+						if ($text) {
+							$this->cleanup($xpath, $text);
+							$article["content"] .= $doc->saveHTML($text);
+						}
+
+						return true;
 					}
 				}
 		}

 		return false;
 	}
+
+	private function cleanup(DOMXPath $xpath, DOMNode $content_node): void {
+		$toUpdates = $xpath->query('//img[@data-src]', $content_node);
+		$this->move_all_attributes($toUpdates, 'data-src', 'src');
+
+		$toUpdates = $xpath->query('//img[@data-srcset]', $content_node);
+		$this->move_all_attributes($toUpdates, 'data-srcset', 'srcset');
+
+		$toUpdates = $xpath->query('//img[@data-sizes]', $content_node);
+		$this->move_all_attributes($toUpdates, 'data-sizes', 'sizes');
+
+		$toRemoves = $xpath->query('.//*[contains(@class, "sharedaddy") or contains(@class, "relatedposts") or contains(@class, "donation_table") or contains(@class, "above-comic") or contains(@class, "oli_")]', $content_node);
+		foreach ($toRemoves as $toRemove) {
+			$toRemove->parentNode->removeChild($toRemove);
+		}
+	}
+
+	/**
+	 * @param DOMNodeList<DOMNode> $toUpdates
+	 */
+	private function move_all_attributes(DOMNodeList $toUpdates, string $srcName, string $dstName): void {
+		foreach ($toUpdates as $toUpdate) {
+			$attributeValue = $toUpdate->getAttribute($srcName);
+			$toUpdate->setAttribute($dstName, $attributeValue);
+			$toUpdate->removeAttribute($srcName);
+		}
+	}
 }
--- a/plugins/af_comics/filters/af_comics_danbydraws.php
+++ b/plugins/af_comics/filters/af_comics_danbydraws.php
@ -0,0 +1,27 @@
+<?php
+class Af_Comics_DanbyDraws extends Af_ComicFilter {
+
+	function supported() {
+		return array("Danby Draws");
+	}
+
+	function process(&$article) {
+		if (str_contains($article["link"], "danbydraws.com")) {
+			$res = UrlHelper::fetch(["url" => $article["link"]]);
+
+			$doc = new DOMDocument();
+
+			if ($res && $doc->loadHTML($res)) {
+				$xpath = new DOMXPath($doc);
+				$basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0);
+
+				if ($basenode) {
+					$article["content"] = $doc->saveHTML($basenode);
+					return true;
+				}
+			}
+		}
+
+		return false;
+	}
+}