From d718db3b66b533aa3e011aba38bed9557848913b Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Thu, 14 Nov 2024 20:22:09 -0500 Subject: robust atom:content handling --- rss.php | 52 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/rss.php b/rss.php index 63fa73e..29e7568 100644 --- a/rss.php +++ b/rss.php @@ -118,21 +118,43 @@ function load_rss(string $uri, string $linkrel = "alternate", ?bool $allow_html $data["title"] ??= "[[[No Title]]]"; $data["title"] = htmlentities(html_entity_decode($data["title"])); - $data["content"] = $node->getElementsByTagName("content") - ?->item(0)?->textContent??""; - - if ($node->getElementsByTagName("content") - ?->item(0) - ?->getAttribute("type") === "html" && $allow_html !== FALSE) { - $data["content"] = strip_html($data["content"]); - } else { - $data["content"] = htmlentities(html_entity_decode($data["content"])); + foreach ($node->getElementsByTagName("content")->getIterator() as $content) { + $type = $content->getAttribute("type"); + + if ($type === "text") { + // if we prefer non-html, overwrite html with text + if ($allow_html === FALSE) { + $data["content"] = htmlentities($content->textContent); + } else { + $data["content"] ??= htmlentities($content->textContent); + } + } elseif ($type === "html" || $type === "xhtml") { + if ($allow_html === FALSE) { + $data["content"] ??= htmlentities($content->textContent); + } else { + $data["content"] = strip_html($content->textContent); + } + } elseif (str_starts_with($type, "image/")) { + if ($media->hasAttribute("src")) + $data["images"][] = htmlentities($media->getAttribute("src")); + elseif (base64_decode(trim($media->textContent)) !== FALSE) + $data["images"][] = htmlentities("data:".$type.";base64,".trim($media->textContent)); + } elseif (str_starts_with($type, "video/")) { + if ($media->hasAttribute("src")) + $data["videos"][] = htmlentities($media->getAttribute("src")); + elseif (base64_decode(trim($media->textContent)) !== FALSE) + $data["videos"][] = htmlentities("data:".$type.";base64,".trim($media->textContent)); + } elseif ($node->hasAttribute("src")) { + $data["links"][] = $node->getAttribute("src"); + } + // TODO: possible markdown et al. handling? } - $data["links"] = []; + $data["content"] ??= ""; + + $data["links"] ??= []; foreach ($node->getElementsByTagName("link")->getIterator() as $link) { - $date["links"][] = ["rel" => htmlentities($link->getAttribute("rel")), - "href" => htmlentities($link->getAttribute("href"))]; + $date["links"][] = htmlentities($link->getAttribute("href")); if ($link->getAttribute("rel") === $linkrel) { $data["link"] ??= htmlentities($link->getAttribute("href")); } @@ -254,8 +276,10 @@ foreach ($feeds as $name => $data) { if ($data["mode"] == "no_title") { unset($entry["title"]); } - if (@$data["media"]) { - unset($entry["media"]); + + if (@$data["media"] === FALSE) { + unset($entry["images"]); + unset($entry["videos"]); } $combined[] = $entry; -- cgit v1.2.3-54-g00ecf