aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleteoryx <alyx@aleteoryx.me>2024-11-14 20:22:09 -0500
committerAleteoryx <alyx@aleteoryx.me>2024-11-14 20:22:09 -0500
commitd718db3b66b533aa3e011aba38bed9557848913b (patch)
tree20db93d192f78f32f92b44daf67cc6ab9794f693
parent357998add708431088a02a03a7bd2d00f7549d9b (diff)
downloadrss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.tar.gz
rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.tar.bz2
rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.zip
robust atom:content handling
-rw-r--r--rss.php52
1 files changed, 38 insertions, 14 deletions
diff --git a/rss.php b/rss.php
index 63fa73e..29e7568 100644
--- a/rss.php
+++ b/rss.php
@@ -118,21 +118,43 @@ function load_rss(string $uri, string $linkrel = "alternate", ?bool $allow_html
$data["title"] ??= "[[[No Title]]]";
$data["title"] = htmlentities(html_entity_decode($data["title"]));
- $data["content"] = $node->getElementsByTagName("content")
- ?->item(0)?->textContent??"";
-
- if ($node->getElementsByTagName("content")
- ?->item(0)
- ?->getAttribute("type") === "html" && $allow_html !== FALSE) {
- $data["content"] = strip_html($data["content"]);
- } else {
- $data["content"] = htmlentities(html_entity_decode($data["content"]));
+ foreach ($node->getElementsByTagName("content")->getIterator() as $content) {
+ $type = $content->getAttribute("type");
+
+ if ($type === "text") {
+ // if we prefer non-html, overwrite html with text
+ if ($allow_html === FALSE) {
+ $data["content"] = htmlentities($content->textContent);
+ } else {
+ $data["content"] ??= htmlentities($content->textContent);
+ }
+ } elseif ($type === "html" || $type === "xhtml") {
+ if ($allow_html === FALSE) {
+ $data["content"] ??= htmlentities($content->textContent);
+ } else {
+ $data["content"] = strip_html($content->textContent);
+ }
+ } elseif (str_starts_with($type, "image/")) {
+ if ($media->hasAttribute("src"))
+ $data["images"][] = htmlentities($media->getAttribute("src"));
+ elseif (base64_decode(trim($media->textContent)) !== FALSE)
+ $data["images"][] = htmlentities("data:".$type.";base64,".trim($media->textContent));
+ } elseif (str_starts_with($type, "video/")) {
+ if ($media->hasAttribute("src"))
+ $data["videos"][] = htmlentities($media->getAttribute("src"));
+ elseif (base64_decode(trim($media->textContent)) !== FALSE)
+ $data["videos"][] = htmlentities("data:".$type.";base64,".trim($media->textContent));
+ } elseif ($node->hasAttribute("src")) {
+ $data["links"][] = $node->getAttribute("src");
+ }
+ // TODO: possible markdown et al. handling?
}
- $data["links"] = [];
+ $data["content"] ??= "";
+
+ $data["links"] ??= [];
foreach ($node->getElementsByTagName("link")->getIterator() as $link) {
- $date["links"][] = ["rel" => htmlentities($link->getAttribute("rel")),
- "href" => htmlentities($link->getAttribute("href"))];
+ $date["links"][] = htmlentities($link->getAttribute("href"));
if ($link->getAttribute("rel") === $linkrel) {
$data["link"] ??= htmlentities($link->getAttribute("href"));
}
@@ -254,8 +276,10 @@ foreach ($feeds as $name => $data) {
if ($data["mode"] == "no_title") {
unset($entry["title"]);
}
- if (@$data["media"]) {
- unset($entry["media"]);
+
+ if (@$data["media"] === FALSE) {
+ unset($entry["images"]);
+ unset($entry["videos"]);
}
$combined[] = $entry;