From abf8b1d21bf2c355c588e990decf043011750376 Mon Sep 17 00:00:00 2001 From: aleteoryx Date: Tue, 12 Nov 2024 15:13:06 -0500 Subject: mostly feature-complete --- rss.php | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 rss.php (limited to 'rss.php') diff --git a/rss.php b/rss.php new file mode 100644 index 0000000..012bf4e --- /dev/null +++ b/rss.php @@ -0,0 +1,210 @@ + +/// for documentation. +$config["date_fmt"] = "l, M jS, Y, H:i T"; + + +/// Timezone +/// +/// A value of type DateTimeZone, see +/// +/// for documentation. +$config["timezone"] = new DateTimeZone('UTC'); + + +/* --- CODE - DO NOT TOUCH --- */ + +function load_rss(string $uri, string $linkrel = "alternate"): array { + global $config; + + $xml = file_get_contents($uri); + + // if the file doesn't contain an encoding, attempt to read it from http headers and re-encode + if (!preg_match("/^[^>]+encoding/", $xml) && str_starts_with($uri, "http")) { + foreach ($http_response_header as $header) { + if (!str_starts_with(strtolower($header), "content-type")) continue; + if (preg_match("/(?<=charset=)[a-z0-9_-]+/i", $header, $matches)) { + $xml = iconv($matches[0], "UTF-8", $xml); + $doc = new DOMDocument(encoding: "UTF-8"); + } + break; + } + } + + $doc ??= new DOMDocument(); + $doc->loadXML($xml); + + if ($doc->documentElement->nodeName == "rss") { + // TODO: better rss / atom sniffing + foreach ($doc->getElementsByTagName("item") as $node) { + $data["title"] = $node->getElementsByTagName("title") + ?->item(0)?->textContent; + $data["link"] ??= $node->getElementsByTagName("link") + ?->item(0)?->textContent; + + $data["date"] = new DateTime($node->getElementsByTagName("pubDate") + ->item(0)->textContent); + $data["date"]->setTimezone($config["timezone"]); + + $parsed[] = $data; + } + } else { + // assume atom + foreach ($doc->getElementsByTagName("entry") as $node) { + $data["title"] = $node->getElementsByTagName("title") + ?->item(0)?->textContent; + $data["links"] = []; + foreach ($node->getElementsByTagName("link")->getIterator() as $link) { + $date["links"][] = ["rel" => $link->getAttribute("rel"), + "href" => $link->getAttribute("href")]; + if ($link->getAttribute("rel") === $linkrel) { + $data["link"] ??= $link->getAttribute("href"); + } + } + $data["link"] ??= @$data["links"][0]; + + $data["date"] = $node->getElementsByTagName("published") + ?->item(0)?->textContent; + $data["date"] ??= $node->getElementsByTagName("updated") + ?->item(0)?->textContent; + $data["date"] = new DateTime($data["date"]); + $data["date"]->setTimezone($config["timezone"]); + + $parsed[] = $data; + } + } + + return $parsed??[]; +} + +function load_cached(int $ttl, string $uri, string $linkrel = "alternate"): array { + global $config; + $path = $config["cache_dir"]."/".md5($uri); +// echo $path."\n"; + if ((@filemtime($path) ?? 0) + $ttl < time()) { +// echo "cache miss, loading over network\n"; + $data = load_rss($uri, $linkrel); + file_put_contents($path, serialize($data)); + return $data; + } else { +// echo "cache hit, loading from file\n"; + return unserialize(file_get_contents($path)); + } +} + +@mkdir($config["cache_dir"], recursive: true); + +foreach ($_GET["disabled"]??[] as $idx => $feed) { + if (!array_key_exists($feed, $feeds)) { + unset($_GET["disabled"][$idx]); + continue; + } + $off_feeds[$feed] = @$feeds[$feed]; + unset($feeds[$feed]); +} + +$combined = []; +// Real Feed Processing Happens Here +foreach ($feeds as $name => $data) { + if (!isset($data["url"])) { + error_log("Feed \"$name\" missing url. Ignoring."); + continue; + } + if (!isset($data["ttl"])) $data["ttl"] = 3600; + if (!isset($data["linkrel"])) $data["linkrel"] = "alternate"; + + foreach(load_cached($data["ttl"], $data["url"], $data["linkrel"]) as $entry) { + $entry["source"] = $name; + $combined[] = $entry; + } +} + +// reverse-chronological by default +usort($combined, fn($a, $b) => $b["date"]->getTimestamp() <=> $a["date"]->getTimestamp()); + +if (isset($_GET["reverse"])) + $combined = array_reverse($combined); + +$base = parse_url($_SERVER["REQUEST_URI"], PHP_URL_PATH); + +?> + + + + + + + + +
+ +

Looks like you filtered out everything...

+

Try unfiltering some feeds!

+ +
+

+ + • + +
+ +
+ + + -- cgit v1.2.3-54-g00ecf