mirror of
https://git.tt-rss.org/fox/tt-rss.git
synced 2025-10-17 05:50:59 +02:00
Update ComicPress logic
This updates the logic to work across a variety of additional sites. Additionally, it grabs the author's comments from comics, such as the text on Buttersafe. This does not update the list of supported comics. There are too many comic sites to enumerate all of them anyway.
This commit is contained in:
parent
98dbf49733
commit
efe6fbd3fa
@ -9,59 +9,66 @@ class Af_Comics_ComicPress extends Af_ComicFilter {
|
||||
|
||||
function process(&$article) {
|
||||
if (str_contains($article["guid"], "bunicomic.com") ||
|
||||
str_contains($article["guid"], "buttersafe.com") ||
|
||||
str_contains($article["guid"], "extrafabulouscomics.com") ||
|
||||
str_contains($article["guid"], "danbydraws.com") ||
|
||||
str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") ||
|
||||
str_contains($article["guid"], "happyjar.com") ||
|
||||
str_contains($article["guid"], "nedroid.com") ||
|
||||
str_contains($article["guid"], "stonetoss.com") ||
|
||||
str_contains($article["guid"], "csectioncomics.com")) {
|
||||
str_contains($article["guid"], "buttersafe.com") ||
|
||||
str_contains($article["guid"], "extrafabulouscomics.com") ||
|
||||
str_contains($article["guid"], "danbydraws.com") ||
|
||||
str_contains($article["guid"], "theduckwebcomics.com/Powerup_Comics") ||
|
||||
str_contains($article["guid"], "happyjar.com") ||
|
||||
str_contains($article["guid"], "nedroid.com") ||
|
||||
str_contains($article["guid"], "stonetoss.com") ||
|
||||
str_contains($article["guid"], "csectioncomics.com")) {
|
||||
|
||||
// lol at people who block clients by user agent
|
||||
// oh noes my ad revenue Q_Q
|
||||
$res = UrlHelper::fetch(["url" => $article["link"]]);
|
||||
|
||||
$res = UrlHelper::fetch(["url" => $article["link"],
|
||||
"useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]);
|
||||
$doc = new DOMDocument();
|
||||
|
||||
$doc = new DOMDocument();
|
||||
if ($res && $doc->loadHTML($res)) {
|
||||
$xpath = new DOMXPath($doc);
|
||||
$img = $xpath->query('//div[@id="comic"]')->item(0);
|
||||
$text = $xpath->query('//div[@class="entry" or @class="entry-content"]')->item(0);
|
||||
|
||||
if ($res && $doc->loadHTML($res)) {
|
||||
$xpath = new DOMXPath($doc);
|
||||
$basenode = $xpath->query('//div[@id="comic"]|//img[contains(@class, "wp-post-image")]/..')->item(0);
|
||||
if ($img || $text) {
|
||||
$article["content"] = '';
|
||||
|
||||
if ($basenode) {
|
||||
$article["content"] = $doc->saveHTML($basenode);
|
||||
return true;
|
||||
if ($img) {
|
||||
$this->cleanup($xpath, $img);
|
||||
$article["content"] .= $doc->saveHTML($img);
|
||||
}
|
||||
|
||||
/** @var DOMElement|null $webtoon_link (buni specific) */
|
||||
$webtoon_link = $xpath->query("//a[contains(@href,'www.webtoons.com')]")->item(0);
|
||||
|
||||
if ($webtoon_link) {
|
||||
|
||||
$res = UrlHelper::fetch(["url" => $webtoon_link->getAttribute("href"),
|
||||
"useragent" => "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"]);
|
||||
|
||||
if (@$doc->loadHTML($res)) {
|
||||
$xpath = new DOMXPath($doc);
|
||||
$basenode = $xpath->query('//div[@id="_viewerBox"]')->item(0);
|
||||
|
||||
if ($basenode) {
|
||||
$imgs = $xpath->query("//img[@data-url]", $basenode);
|
||||
|
||||
foreach ($imgs as $img) {
|
||||
$img->setAttribute("src", $img->getAttribute("data-url"));
|
||||
}
|
||||
|
||||
$article["content"] = $doc->saveHTML($basenode);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if ($text) {
|
||||
$this->cleanup($xpath, $text);
|
||||
$article["content"] .= $doc->saveHTML($text);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function cleanup($xpath, $content_node) {
|
||||
$toUpdates = $xpath->query('//img[@data-src]', $content_node);
|
||||
$this->move_all_attributes($toUpdates, 'data-src', 'src');
|
||||
|
||||
$toUpdates = $xpath->query('//img[@data-srcset]', $content_node);
|
||||
$this->move_all_attributes($toUpdates, 'data-srcset', 'srcset');
|
||||
|
||||
$toUpdates = $xpath->query('//img[@data-sizes]', $content_node);
|
||||
$this->move_all_attributes($toUpdates, 'data-sizes', 'sizes');
|
||||
|
||||
$toRemoves = $xpath->query('.//*[contains(@class, "sharedaddy") or contains(@class, "relatedposts") or contains(@class, "donation_table") or contains(@class, "above-comic") or contains(@class, "oli_")]', $content_node);
|
||||
foreach ($toRemoves as $toRemove) {
|
||||
$toRemove->parentNode->removeChild($toRemove);
|
||||
}
|
||||
}
|
||||
|
||||
private function move_all_attributes($toUpdates, $srcName, $dstName) {
|
||||
foreach ($toUpdates as $toUpdate) {
|
||||
$attributeValue = $toUpdate->getAttribute($srcName);
|
||||
$toUpdate->setAttribute($dstName, $attributeValue);
|
||||
$toUpdate->removeAttribute($srcName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user