<?php define("VERSION", "0.0.2");
/* --- FEEDS - EDIT AS NEEDED --- */
$feeds["OTW News"] ["url"] = "https://www.transformativeworks.org/category/announcement/feed/";
$feeds["OTW News"] ["mode"] = "content";
$feeds["Dreamwidth News"]["url"] = "https://dw-news.dreamwidth.org/data/rss";
$feeds["Dreamwidth News"]["home"] = "https://dw-news.dreamwidth.org/";
/* --- CONFIG - EDIT AS NEEDED --- */
/// Directory to store RSS cache.
///
/// Multiple instances can share one dir.
$config["cache_dir"] = "/tmp/rss_dot_php";
/// Custom CSS
$config["custom_css"] = <<<'EOC'
/* custom CSS goes here! */
EOC;
/// Document Language
$config["lang"] = "en";
/// Date Format
///
/// Displayed under every article, see
/// <https://www.php.net/manual/en/datetime.format.php>
/// for documentation.
$config["date_fmt"] = "l, M jS, Y, H:i T";
/// Timezone
///
/// A value of type DateTimeZone, see
/// <https://www.php.net/manual/en/class.datetimezone.php>
/// for documentation.
$config["timezone"] = new DateTimeZone('UTC');
/// <a target=? >
///
/// What to set for the target= attribute on generated links.
/// _top will redirect the main tab, _blank wil make a nwe tab
$config["link_target"] = "_top";
/* --- CODE - DO NOT TOUCH --- */
function load_rss(string $uri, string $linkrel = "alternate", ?bool $allow_html = NULL): array {
global $config;
$xml = file_get_contents($uri);
// if the file doesn't contain an encoding, attempt to read it from http headers and re-encode
if (!preg_match("/^[^>]+encoding/", $xml) && str_starts_with($uri, "http")) {
foreach ($http_response_header as $header) {
if (!str_starts_with(strtolower($header), "content-type")) continue;
if (preg_match("/(?<=charset=)[a-z0-9_-]+/i", $header, $matches)) {
$xml = iconv($matches[0], "UTF-8", $xml);
$doc = new DOMDocument(encoding: "UTF-8");
}
break;
}
}
$doc ??= new DOMDocument();
$doc->loadXML($xml);
if ($doc->documentElement->nodeName == "rss") {
// TODO: better rss / atom sniffing
foreach ($doc->getElementsByTagName("item") as $node) {
$data["title"] = $node->getElementsByTagName("title")
?->item(0)?->textContent;
$data["title"] ??= "[[[No Title]]]";
$data["title"] = htmlentities(html_entity_decode($data["title"]));
$data["link"] ??= $node->getElementsByTagName("link")
?->item(0)?->textContent;
$data["link"] ??= htmlentities($data["link"]);
// assume rss is html by default
$data["content"] = $node->getElementsByTagName("description")
?->item(0)?->textContent??"";
if ($allow_html === TRUE || $allow_html === NULL) {
$data["content"] = strip_html($data["content"]);
} else {
$data["content"] = htmlentities(html_entity_decode($data["content"]));
}
foreach($node->getElementsByTagNameNS("http://search.yahoo.com/mrss/", "content")
->getIterator() as $media) {
$type = $media->getAttribute("type");
if (str_starts_with($type, "image/")) {
$data["images"][] = htmlentities($media->getAttribute("url"));
} elseif (str_starts_with($type, "video/")) {
$data["videos"][] = htmlentities($media->getAttribute("url"));
}
}
$data["date"] = new DateTime($node->getElementsByTagName("pubDate")
?->item(0)?->textContent ?? '@0');
$data["date"]->setTimezone($config["timezone"]);
$parsed[] = $data;
}
} else {
// assume atom
foreach ($doc->getElementsByTagName("entry") as $node) {
$data["title"] = $node->getElementsByTagName("title")
?->item(0)?->textContent;
$data["title"] ??= "[[[No Title]]]";
$data["title"] = htmlentities(html_entity_decode($data["title"]));
$data["content"] = $node->getElementsByTagName("content")
?->item(0)?->textContent??"";
if ($node->getElementsByTagName("content")
?->item(0)
?->getAttribute("type") === "html" && $allow_html !== FALSE) {
$data["content"] = strip_html($data["content"]);
} else {
$data["content"] = htmlentities(html_entity_decode($data["content"]));
}
$data["links"] = [];
foreach ($node->getElementsByTagName("link")->getIterator() as $link) {
$date["links"][] = ["rel" => htmlentities($link->getAttribute("rel")),
"href" => htmlentities($link->getAttribute("href"))];
if ($link->getAttribute("rel") === $linkrel) {
$data["link"] ??= htmlentities($link->getAttribute("href"));
}
}
$data["link"] ??= @$data["links"][0];
$data["date"] = $node->getElementsByTagName("published")
?->item(0)?->textContent;
$data["date"] ??= $node->getElementsByTagName("updated")
?->item(0)?->textContent;
$data["date"] = new DateTime($data["date"] ?? '@0');
$data["date"]->setTimezone($config["timezone"]);
$parsed[] = $data;
}
}
return $parsed??[];
}
function load_cached(int $ttl, string $uri, string $linkrel = "alternate", ?bool $allow_html = NULL): array {
global $config;
$path = $config["cache_dir"]."/".md5($uri);
// echo $path."\n";
if ((@filemtime($path) ?? 0) + $ttl < time()) {
// echo "cache miss, loading over network\n";
$data = load_rss($uri, $linkrel, $allow_html);
file_put_contents($path, serialize($data));
return $data;
} else {
// echo "cache hit, loading from file\n";
return unserialize(file_get_contents($path));
}
}
// potentially unsafe, shouldn't matter cause source is always trusted
// TODO: sniff for 8.4 Dom\HTMLDocument when 8.4 releases
// <https://www.php.net/manual/en/domdocument.loadhtml.php>
function strip_html(string $html): string {
if ($html === "") return $html;
$doc = new DomDocument();
// this is a really ugly hack but libxml has left me no choice :(
@$doc->loadHTML("<meta charset='UTF-8'>".$html);
foreach($doc->getElementsByTagName("style")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("script")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("link")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("meta")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("base")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("title")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("template")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("slot")->getIterator() as $el)
$el->remove();
foreach($doc->getElementsByTagName("img")->getIterator() as $el)
$el->setAttribute("loading", "lazy");
foreach($doc->getElementsByTagName("*")->getIterator() as $el) {
if (str_starts_with($el->getAttribute("href"), "javascript:"))
$el->setAttribute("javascript:alert('Link stripped for security.')");
if (str_starts_with($el->getAttribute("src"), "javascript:"))
$el->setAttribute("javascript:alert('Link stripped for security.')");
@$el->removeAttribute("autoplay");
}
return implode(
array_map(
fn($x) => $doc->saveHTML($x),
iterator_to_array(
$doc->getElementsByTagName("body")
->item(0)
->childNodes
->getiterator())));
}
// code begins here
$config["link_target"] = htmlentities($config["link_target"]);
@mkdir($config["cache_dir"], recursive: true);
foreach ($_GET["disabled"]??[] as $idx => $feed) {
if (!array_key_exists($feed, $feeds)) {
unset($_GET["disabled"][$idx]);
continue;
}
$off_feeds[$feed] = @$feeds[$feed];
unset($feeds[$feed]);
}
$combined = [];
// Real Feed Processing Happens Here
foreach ($feeds as $name => $data) {
if (!isset($data["url"])) {
error_log("Feed \"$name\" missing url. Ignoring.");
continue;
}
if (!isset($data["ttl"])) $data["ttl"] = 3600;
if (!isset($data["linkrel"])) $data["linkrel"] = "alternate";
$data["mode"] ??= "title";
foreach(load_cached($data["ttl"], $data["url"], $data["linkrel"], @$data["allow_html"]) as $entry) {
$entry["source"] = htmlentities($name);
$entry["home"] = htmlentities(@$data["home"]);
if ($data["mode"] == "title") {
unset($entry["content"]);
}
if ($data["mode"] == "no_title") {
unset($entry["title"]);
}
if (@$data["media"]) {
unset($entry["media"]);
}
$combined[] = $entry;
}
}
// reverse-chronological by default
usort($combined, fn($a, $b) => $b["date"]->getTimestamp() <=> $a["date"]->getTimestamp());
if (isset($_GET["reverse"]))
$combined = array_reverse($combined);
$base = parse_url($_SERVER["REQUEST_URI"], PHP_URL_PATH);
?>
<!doctype html>
<html lang="<?= $config['lang'] ?>">
<head>
<meta charset="UTF-8">
<style> object, img, video { max-width: 60vw; } </style>
<style><?= $config['custom_css'] ?></style>
</head>
<body>
<nav>
<div>
<b>Toggle Feeds</b>:
<?php foreach ($feeds??[] as $name => $data):
$query = $_GET;
$query["disabled"][] = $name;
$uri = $base."?".http_build_query($query);
?>
<span class="rss-source" data-source="<?= htmlentities($name) ?>">
<a href="<?= htmlentities($uri) ?>"><?= htmlentities($name) ?></a>
</span>
<?php endforeach; ?>
<?php foreach ($off_feeds??[] as $name => $data):
$query = $_GET;
$query["disabled"] = array_filter($query["disabled"], fn($x) => $x !== $name);
$uri = $base."?".http_build_query($query);
?>
<span class="rss-source disabled" data-source="<?= htmlentities($name) ?>">
<a href="<?= htmlentities($uri) ?>"><?= htmlentities($name) ?></a>
</span>
<?php endforeach; ?>
</div>
</nav>
<main>
<?php if (!count($combined) && isset($_GET['disabled'])): ?>
<h1>Looks like you filtered out everything...</h1>
<p>Try unfiltering some feeds!</p>
<?php endif;
foreach ($combined as $entry): ?>
<article data-source="<?= $entry['source'] ?>">
<div class="rss-content">
<?php if(isset($entry['title'])): ?>
<h1 class="rss-title"><a target="<?= $config['link_target'] ?>" href="<?= $entry['link'] ?>"><?= $entry['title'] ?></a></h1>
<?php endif; ?>
<?php if(isset($entry['content'])): ?>
<div><?= $entry['content'] ?></div>
<?php endif; ?>
<?php if(count($entry['images']??[])): ?>
<div class="rss-images">
<?php foreach($entry['images'] as $media): ?>
<img loading="lazy" src="<?= $media ?>"></object>
<?php endforeach; ?>
</div>
<?php endif; ?>
<?php if(count($entry['videos']??[])): ?>
<div class="rss-videos">
<?php foreach($entry['videos'] as $media): ?>
<video controls src="<?= $media ?>"></video>
<?php endforeach; ?>
</div>
<?php endif; ?>
</div>
<span class="rss-source">
<?php if ($entry['home']): ?>
<a target="<?= $config['link_target'] ?>" href="<?= $entry['home'] ?>"><?= $entry['source'] ?></a>
<?php else: ?>
<?= $entry['source'] ?>
<?php endif; ?>
</span>
<?php if(!isset($entry['title'])): ?>
•
<a href="<?= $entry['link'] ?>">Source</a>
<?php endif; ?>
•
<time datetime="<?= $entry['date']->format(DateTime::ISO8601) ?>">
<?= htmlentities($entry['date']->format($config['date_fmt'])) ?>
</time>
</article>
<?php endforeach; ?>
</main>
<!-- generated by rss_dot_php <?= VERSION ?>
https://git.aleteoryx.me/cgit/rss_dot_php -->
</body>
</html>