robust atom:content handling

author: Aleteoryx <alyx@aleteoryx.me> 2024-11-14 20:22:09 -0500
committer: Aleteoryx <alyx@aleteoryx.me> 2024-11-14 20:22:09 -0500
commit: d718db3b66b533aa3e011aba38bed9557848913b (patch)
tree: 20db93d192f78f32f92b44daf67cc6ab9794f693
parent: 357998add708431088a02a03a7bd2d00f7549d9b (diff)
download: rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.tar.gz
rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.tar.bz2
rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.zip
1 files changed, 37 insertions, 13 deletions
diff --git a/rss.php b/rss.php
index 63fa73e..29e7568 100644
--- a/rss.php
+++ b/rss.php
@@ -118,21 +118,43 @@ function load_rss(string $uri, string $linkrel = "alternate", ?bool $allow_html
       $data["title"] ??= "[[[No Title]]]";
       $data["title"] = htmlentities(html_entity_decode($data["title"]));
 
-      $data["content"] = $node->getElementsByTagName("content")
-                              ?->item(0)?->textContent??"";
+      foreach ($node->getElementsByTagName("content")->getIterator() as $content) {
+        $type = $content->getAttribute("type");
 
-      if ($node->getElementsByTagName("content")
-               ?->item(0)
-               ?->getAttribute("type") === "html" && $allow_html !== FALSE) {
-        $data["content"] = strip_html($data["content"]);
-      } else {
-        $data["content"] = htmlentities(html_entity_decode($data["content"]));
+        if ($type === "text") {
+          // if we prefer non-html, overwrite html with text
+          if ($allow_html === FALSE) {
+            $data["content"] = htmlentities($content->textContent);
+          } else {
+            $data["content"] ??= htmlentities($content->textContent);
+          }
+        } elseif ($type === "html" || $type === "xhtml") {
+          if ($allow_html === FALSE) {
+            $data["content"] ??= htmlentities($content->textContent);
+          } else {
+            $data["content"] = strip_html($content->textContent);
+          }
+        } elseif (str_starts_with($type, "image/")) {
+          if ($media->hasAttribute("src"))
+            $data["images"][] = htmlentities($media->getAttribute("src"));
+          elseif (base64_decode(trim($media->textContent)) !== FALSE)
+            $data["images"][] = htmlentities("data:".$type.";base64,".trim($media->textContent));
+        } elseif (str_starts_with($type, "video/")) {
+          if ($media->hasAttribute("src"))
+            $data["videos"][] = htmlentities($media->getAttribute("src"));
+          elseif (base64_decode(trim($media->textContent)) !== FALSE)
+            $data["videos"][] = htmlentities("data:".$type.";base64,".trim($media->textContent));
+        } elseif ($node->hasAttribute("src")) {
+          $data["links"][] = $node->getAttribute("src");
+        }
+        // TODO: possible markdown et al. handling?
       }
 
-      $data["links"] = [];
+      $data["content"] ??= "";
+
+      $data["links"] ??= [];
       foreach ($node->getElementsByTagName("link")->getIterator() as $link) {
-        $date["links"][] = ["rel" => htmlentities($link->getAttribute("rel")),
-                          "href" => htmlentities($link->getAttribute("href"))];
+        $date["links"][] = htmlentities($link->getAttribute("href"));
         if ($link->getAttribute("rel") === $linkrel) {
           $data["link"] ??= htmlentities($link->getAttribute("href"));
         }
@@ -254,8 +276,10 @@ foreach ($feeds as $name => $data) {
     if ($data["mode"] == "no_title") {
       unset($entry["title"]);
     }
-    if (@$data["media"]) {
-      unset($entry["media"]);
+
+    if (@$data["media"] === FALSE) {
+      unset($entry["images"]);
+      unset($entry["videos"]);
     }
 
     $combined[] = $entry;
author	Aleteoryx <alyx@aleteoryx.me>	2024-11-14 20:22:09 -0500
committer	Aleteoryx <alyx@aleteoryx.me>	2024-11-14 20:22:09 -0500
commit	d718db3b66b533aa3e011aba38bed9557848913b (patch)
tree	20db93d192f78f32f92b44daf67cc6ab9794f693
parent	357998add708431088a02a03a7bd2d00f7549d9b (diff)
download	rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.tar.gz rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.tar.bz2 rss_dot_php-d718db3b66b533aa3e011aba38bed9557848913b.zip