<?php define("VERSION", "0.0.2"); /* --- FEEDS - EDIT AS NEEDED --- */ $feeds["OTW News"] ["url"] = "https://www.transformativeworks.org/category/announcement/feed/"; $feeds["OTW News"] ["mode"] = "content"; $feeds["Dreamwidth News"]["url"] = "https://dw-news.dreamwidth.org/data/rss"; $feeds["Dreamwidth News"]["home"] = "https://dw-news.dreamwidth.org/"; /* --- CONFIG - EDIT AS NEEDED --- */ /// Directory to store RSS cache. /// /// Multiple instances can share one dir. $config["cache_dir"] = "/tmp/rss_dot_php"; /// Custom CSS $config["custom_css"] = <<<'EOC' /* custom CSS goes here! */ EOC; /// Document Language $config["lang"] = "en"; /// Date Format /// /// Displayed under every article, see /// <https://www.php.net/manual/en/datetime.format.php> /// for documentation. $config["date_fmt"] = "l, M jS, Y, H:i T"; /// Timezone /// /// A value of type DateTimeZone, see /// <https://www.php.net/manual/en/class.datetimezone.php> /// for documentation. $config["timezone"] = new DateTimeZone('UTC'); /// <a target=? > /// /// What to set for the target= attribute on generated links. /// _top will redirect the main tab, _blank wil make a nwe tab $config["link_target"] = "_top"; /* --- CODE - DO NOT TOUCH --- */ function load_rss(string $uri, string $linkrel = "alternate", ?bool $allow_html = NULL): array { global $config; $xml = file_get_contents($uri); // if the file doesn't contain an encoding, attempt to read it from http headers and re-encode if (!preg_match("/^[^>]+encoding/", $xml) && str_starts_with($uri, "http")) { foreach ($http_response_header as $header) { if (!str_starts_with(strtolower($header), "content-type")) continue; if (preg_match("/(?<=charset=)[a-z0-9_-]+/i", $header, $matches)) { $xml = iconv($matches[0], "UTF-8", $xml); $doc = new DOMDocument(encoding: "UTF-8"); } break; } } $doc ??= new DOMDocument(); $doc->loadXML($xml); if ($doc->documentElement->nodeName == "rss") { // TODO: better rss / atom sniffing foreach ($doc->getElementsByTagName("item") as $node) { $data["title"] = $node->getElementsByTagName("title") ?->item(0)?->textContent; $data["title"] ??= "[[[No Title]]]"; $data["title"] = htmlentities(html_entity_decode($data["title"])); $data["link"] ??= $node->getElementsByTagName("link") ?->item(0)?->textContent; $data["link"] ??= htmlentities($data["link"]); // assume rss is html by default $data["content"] = $node->getElementsByTagName("description") ?->item(0)?->textContent??""; if ($allow_html === TRUE || $allow_html === NULL) { $data["content"] = strip_html($data["content"]); } else { $data["content"] = htmlentities(html_entity_decode($data["content"])); } foreach($node->getElementsByTagNameNS("http://search.yahoo.com/mrss/", "content") ->getIterator() as $media) { $type = $media->getAttribute("type"); if (str_starts_with($type, "image/")) { $data["images"][] = htmlentities($media->getAttribute("url")); } elseif (str_starts_with($type, "video/")) { $data["videos"][] = htmlentities($media->getAttribute("url")); } } $data["date"] = new DateTime($node->getElementsByTagName("pubDate") ?->item(0)?->textContent ?? '@0'); $data["date"]->setTimezone($config["timezone"]); $parsed[] = $data; } } else { // assume atom foreach ($doc->getElementsByTagName("entry") as $node) { $data["title"] = $node->getElementsByTagName("title") ?->item(0)?->textContent; $data["title"] ??= "[[[No Title]]]"; $data["title"] = htmlentities(html_entity_decode($data["title"])); foreach ($node->getElementsByTagName("content")->getIterator() as $content) { $type = $content->getAttribute("type"); if ($type === "text") { // if we prefer non-html, overwrite html with text if ($allow_html === FALSE) { $data["content"] = htmlentities($content->textContent); } else { $data["content"] ??= htmlentities($content->textContent); } } elseif ($type === "html" || $type === "xhtml") { if ($allow_html === FALSE) { $data["content"] ??= htmlentities($content->textContent); } else { $data["content"] = strip_html($content->textContent); } } elseif (str_starts_with($type, "image/")) { if ($media->hasAttribute("src")) $data["images"][] = htmlentities($media->getAttribute("src")); elseif (base64_decode(trim($media->textContent)) !== FALSE) $data["images"][] = htmlentities("data:".$type.";base64,".trim($media->textContent)); } elseif (str_starts_with($type, "video/")) { if ($media->hasAttribute("src")) $data["videos"][] = htmlentities($media->getAttribute("src")); elseif (base64_decode(trim($media->textContent)) !== FALSE) $data["videos"][] = htmlentities("data:".$type.";base64,".trim($media->textContent)); } elseif ($node->hasAttribute("src")) { $data["links"][] = $node->getAttribute("src"); } // TODO: possible markdown et al. handling? } $data["content"] ??= ""; $data["links"] ??= []; foreach ($node->getElementsByTagName("link")->getIterator() as $link) { $date["links"][] = htmlentities($link->getAttribute("href")); if ($link->getAttribute("rel") === $linkrel) { $data["link"] ??= htmlentities($link->getAttribute("href")); } } $data["link"] ??= @$data["links"][0]; $data["date"] = $node->getElementsByTagName("published") ?->item(0)?->textContent; $data["date"] ??= $node->getElementsByTagName("updated") ?->item(0)?->textContent; $data["date"] = new DateTime($data["date"] ?? '@0'); $data["date"]->setTimezone($config["timezone"]); $parsed[] = $data; } } return $parsed??[]; } function load_cached(int $ttl, string $uri, string $linkrel = "alternate", ?bool $allow_html = NULL): array { global $config; $path = $config["cache_dir"]."/".md5($uri); // echo $path."\n"; if ((@filemtime($path) ?? 0) + $ttl < time()) { // echo "cache miss, loading over network\n"; $data = load_rss($uri, $linkrel, $allow_html); file_put_contents($path, serialize($data)); return $data; } else { // echo "cache hit, loading from file\n"; return unserialize(file_get_contents($path)); } } // potentially unsafe, shouldn't matter cause source is always trusted // TODO: sniff for 8.4 Dom\HTMLDocument when 8.4 releases // <https://www.php.net/manual/en/domdocument.loadhtml.php> function strip_html(string $html): string { if ($html === "") return $html; $doc = new DomDocument(); // this is a really ugly hack but libxml has left me no choice :( @$doc->loadHTML("<meta charset='UTF-8'>".$html); foreach($doc->getElementsByTagName("style")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("script")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("link")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("meta")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("base")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("title")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("template")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("slot")->getIterator() as $el) $el->remove(); foreach($doc->getElementsByTagName("img")->getIterator() as $el) $el->setAttribute("loading", "lazy"); foreach($doc->getElementsByTagName("*")->getIterator() as $el) { if (str_starts_with($el->getAttribute("href"), "javascript:")) $el->setAttribute("javascript:alert('Link stripped for security.')"); if (str_starts_with($el->getAttribute("src"), "javascript:")) $el->setAttribute("javascript:alert('Link stripped for security.')"); @$el->removeAttribute("autoplay"); } return implode( array_map( fn($x) => $doc->saveHTML($x), iterator_to_array( $doc->getElementsByTagName("body") ->item(0) ->childNodes ->getiterator()))); } // code begins here $config["link_target"] = htmlentities($config["link_target"]); @mkdir($config["cache_dir"], recursive: true); foreach ($_GET["disabled"]??[] as $idx => $feed) { if (!array_key_exists($feed, $feeds)) { unset($_GET["disabled"][$idx]); continue; } $off_feeds[$feed] = @$feeds[$feed]; unset($feeds[$feed]); } $combined = []; // Real Feed Processing Happens Here foreach ($feeds as $name => $data) { if (!isset($data["url"])) { error_log("Feed \"$name\" missing url. Ignoring."); continue; } if (!isset($data["ttl"])) $data["ttl"] = 3600; if (!isset($data["linkrel"])) $data["linkrel"] = "alternate"; $data["mode"] ??= "title"; foreach(load_cached($data["ttl"], $data["url"], $data["linkrel"], @$data["allow_html"]) as $entry) { $entry["source"] = htmlentities($name); $entry["home"] = htmlentities(@$data["home"]); if ($data["mode"] == "title") { unset($entry["content"]); } if ($data["mode"] == "no_title") { unset($entry["title"]); } if (@$data["media"] === FALSE) { unset($entry["images"]); unset($entry["videos"]); } $combined[] = $entry; } } // reverse-chronological by default usort($combined, fn($a, $b) => $b["date"]->getTimestamp() <=> $a["date"]->getTimestamp()); if (isset($_GET["reverse"])) $combined = array_reverse($combined); $base = parse_url($_SERVER["REQUEST_URI"], PHP_URL_PATH); ?> <!doctype html> <html lang="<?= $config['lang'] ?>"> <head> <meta charset="UTF-8"> <style> object, img, video { max-width: 60vw; } </style> <style><?= $config['custom_css'] ?></style> </head> <body> <nav> <div> <b>Toggle Feeds</b>: <?php foreach ($feeds??[] as $name => $data): $query = $_GET; $query["disabled"][] = $name; $uri = $base."?".http_build_query($query); ?> <span class="rss-source" data-source="<?= htmlentities($name) ?>"> <a href="<?= htmlentities($uri) ?>"><?= htmlentities($name) ?></a> </span> <?php endforeach; ?> <?php foreach ($off_feeds??[] as $name => $data): $query = $_GET; $query["disabled"] = array_filter($query["disabled"], fn($x) => $x !== $name); $uri = $base."?".http_build_query($query); ?> <span class="rss-source disabled" data-source="<?= htmlentities($name) ?>"> <a href="<?= htmlentities($uri) ?>"><?= htmlentities($name) ?></a> </span> <?php endforeach; ?> </div> </nav> <main> <?php if (!count($combined) && isset($_GET['disabled'])): ?> <h1>Looks like you filtered out everything...</h1> <p>Try unfiltering some feeds!</p> <?php endif; foreach ($combined as $entry): ?> <article data-source="<?= $entry['source'] ?>"> <div class="rss-content"> <?php if(isset($entry['title'])): ?> <h1 class="rss-title"><a target="<?= $config['link_target'] ?>" href="<?= $entry['link'] ?>"><?= $entry['title'] ?></a></h1> <?php endif; ?> <?php if(isset($entry['content'])): ?> <div><?= $entry['content'] ?></div> <?php endif; ?> <?php if(count($entry['images']??[])): ?> <div class="rss-images"> <?php foreach($entry['images'] as $media): ?> <img loading="lazy" src="<?= $media ?>"></object> <?php endforeach; ?> </div> <?php endif; ?> <?php if(count($entry['videos']??[])): ?> <div class="rss-videos"> <?php foreach($entry['videos'] as $media): ?> <video controls src="<?= $media ?>"></video> <?php endforeach; ?> </div> <?php endif; ?> </div> <span class="rss-source"> <?php if ($entry['home']): ?> <a target="<?= $config['link_target'] ?>" href="<?= $entry['home'] ?>"><?= $entry['source'] ?></a> <?php else: ?> <?= $entry['source'] ?> <?php endif; ?> </span> <?php if(!isset($entry['title'])): ?> • <a href="<?= $entry['link'] ?>">Source</a> <?php endif; ?> • <time datetime="<?= $entry['date']->format(DateTime::ISO8601) ?>"> <?= htmlentities($entry['date']->format($config['date_fmt'])) ?> </time> </article> <?php endforeach; ?> </main> <!-- generated by rss_dot_php <?= VERSION ?> https://git.aleteoryx.me/cgit/rss_dot_php --> </body> </html>