diff options
author | aleteoryx <alyx@aleteoryx.me> | 2024-11-12 15:13:06 -0500 |
---|---|---|
committer | aleteoryx <alyx@aleteoryx.me> | 2024-11-12 15:39:41 -0500 |
commit | abf8b1d21bf2c355c588e990decf043011750376 (patch) | |
tree | 08eec58407239a7c8b5bdbbb63bfc4a046e116a4 | |
download | rss_dot_php-abf8b1d21bf2c355c588e990decf043011750376.tar.gz rss_dot_php-abf8b1d21bf2c355c588e990decf043011750376.tar.bz2 rss_dot_php-abf8b1d21bf2c355c588e990decf043011750376.zip |
mostly feature-complete
-rw-r--r-- | rss.php | 210 |
1 files changed, 210 insertions, 0 deletions
@@ -0,0 +1,210 @@ +<?php define("VERSION", "0.0.1"); +/* --- FEEDS - EDIT AS NEEDED --- */ + + +$feeds["OTW News"] ["url"] = "https://www.transformativeworks.org/category/announcement/feed/"; +$feeds["Dreamwidth News"]["url"] = "https://dw-news.dreamwidth.org/data/rss"; + +/* --- CONFIG - EDIT AS NEEDED --- */ + + +/// Directory to store RSS cache. +/// +/// Multiple instances can share one dir. +$config["cache_dir"] = "/tmp/rss_dot_php"; + + +/// Custom CSS +$config["custom_css"] = <<<'EOC' + +/* custom CSS goes here! */ + +EOC; + + +/// Document Language +$config["lang"] = "en"; + + +/// Date Format +/// +/// Displayed under every article, see +/// <https://www.php.net/manual/en/datetime.format.php> +/// for documentation. +$config["date_fmt"] = "l, M jS, Y, H:i T"; + + +/// Timezone +/// +/// A value of type DateTimeZone, see +/// <https://www.php.net/manual/en/class.datetimezone.php> +/// for documentation. +$config["timezone"] = new DateTimeZone('UTC'); + + +/* --- CODE - DO NOT TOUCH --- */ + +function load_rss(string $uri, string $linkrel = "alternate"): array { + global $config; + + $xml = file_get_contents($uri); + + // if the file doesn't contain an encoding, attempt to read it from http headers and re-encode + if (!preg_match("/^[^>]+encoding/", $xml) && str_starts_with($uri, "http")) { + foreach ($http_response_header as $header) { + if (!str_starts_with(strtolower($header), "content-type")) continue; + if (preg_match("/(?<=charset=)[a-z0-9_-]+/i", $header, $matches)) { + $xml = iconv($matches[0], "UTF-8", $xml); + $doc = new DOMDocument(encoding: "UTF-8"); + } + break; + } + } + + $doc ??= new DOMDocument(); + $doc->loadXML($xml); + + if ($doc->documentElement->nodeName == "rss") { + // TODO: better rss / atom sniffing + foreach ($doc->getElementsByTagName("item") as $node) { + $data["title"] = $node->getElementsByTagName("title") + ?->item(0)?->textContent; + $data["link"] ??= $node->getElementsByTagName("link") + ?->item(0)?->textContent; + + $data["date"] = new DateTime($node->getElementsByTagName("pubDate") + ->item(0)->textContent); + $data["date"]->setTimezone($config["timezone"]); + + $parsed[] = $data; + } + } else { + // assume atom + foreach ($doc->getElementsByTagName("entry") as $node) { + $data["title"] = $node->getElementsByTagName("title") + ?->item(0)?->textContent; + $data["links"] = []; + foreach ($node->getElementsByTagName("link")->getIterator() as $link) { + $date["links"][] = ["rel" => $link->getAttribute("rel"), + "href" => $link->getAttribute("href")]; + if ($link->getAttribute("rel") === $linkrel) { + $data["link"] ??= $link->getAttribute("href"); + } + } + $data["link"] ??= @$data["links"][0]; + + $data["date"] = $node->getElementsByTagName("published") + ?->item(0)?->textContent; + $data["date"] ??= $node->getElementsByTagName("updated") + ?->item(0)?->textContent; + $data["date"] = new DateTime($data["date"]); + $data["date"]->setTimezone($config["timezone"]); + + $parsed[] = $data; + } + } + + return $parsed??[]; +} + +function load_cached(int $ttl, string $uri, string $linkrel = "alternate"): array { + global $config; + $path = $config["cache_dir"]."/".md5($uri); +// echo $path."\n"; + if ((@filemtime($path) ?? 0) + $ttl < time()) { +// echo "cache miss, loading over network\n"; + $data = load_rss($uri, $linkrel); + file_put_contents($path, serialize($data)); + return $data; + } else { +// echo "cache hit, loading from file\n"; + return unserialize(file_get_contents($path)); + } +} + +@mkdir($config["cache_dir"], recursive: true); + +foreach ($_GET["disabled"]??[] as $idx => $feed) { + if (!array_key_exists($feed, $feeds)) { + unset($_GET["disabled"][$idx]); + continue; + } + $off_feeds[$feed] = @$feeds[$feed]; + unset($feeds[$feed]); +} + +$combined = []; +// Real Feed Processing Happens Here +foreach ($feeds as $name => $data) { + if (!isset($data["url"])) { + error_log("Feed \"$name\" missing url. Ignoring."); + continue; + } + if (!isset($data["ttl"])) $data["ttl"] = 3600; + if (!isset($data["linkrel"])) $data["linkrel"] = "alternate"; + + foreach(load_cached($data["ttl"], $data["url"], $data["linkrel"]) as $entry) { + $entry["source"] = $name; + $combined[] = $entry; + } +} + +// reverse-chronological by default +usort($combined, fn($a, $b) => $b["date"]->getTimestamp() <=> $a["date"]->getTimestamp()); + +if (isset($_GET["reverse"])) + $combined = array_reverse($combined); + +$base = parse_url($_SERVER["REQUEST_URI"], PHP_URL_PATH); + +?> +<!doctype html> +<html lang="<?= $config['lang'] ?>"> +<head> + <meta charset="utf-8"> + <style><?= $config['custom_css'] ?></style> +</head> +<body> + <nav> + <div> + <b>Toggle Feeds</b>: +<?php foreach ($feeds??[] as $name => $data): + $query = $_GET; + $query["disabled"][] = $name; + $uri = $base."?".http_build_query($query); +?> + <span class="source" data-source="<?= htmlentities($name) ?>"> + <a href="<?= htmlentities($uri) ?>"><?= htmlentities($name) ?></a> + </span> +<?php endforeach; ?> +<?php foreach ($off_feeds??[] as $name => $data): + $query = $_GET; + $query["disabled"] = array_filter($query["disabled"], fn($x) => $x !== $name); + $uri = $base."?".http_build_query($query); +?> + <span class="source disabled" data-source="<?= htmlentities($name) ?>"> + <a href="<?= htmlentities($uri) ?>"><?= htmlentities($name) ?></a> + </span> +<?php endforeach; ?> + </div> + </nav> + <main> +<?php if (!count($combined) && isset($_GET['disabled'])): ?> +<h1>Looks like you filtered out everything...</h1> +<p>Try unfiltering some feeds!</p> +<?php endif; + foreach ($combined as $entry): ?> + <article> + <h1><a href="<?= htmlentities($entry['link']) ?>"><?= htmlentities($entry['title'] ?? "[[[No Title]]]") ?></a></h1> + <span class="source" data-source="<?= htmlentities($entry['source']) ?>"><?= htmlentities($entry['source']) ?></span> + • + <time datetime="<?= htmlentities($entry['date']->format(DateTime::ISO8601)) ?>"> + <?= htmlentities($entry['date']->format($config['date_fmt'])) ?> + </time> + </article> +<?php endforeach; ?> + </main> + <!-- generated by rss_dot_php <?= VERSION ?> + https://git.aleteoryx.me/cgit/rss_dot_php --> +</body> +</html> |