add blog generator + rss feed/feed generator

author: squibid <me@zacharyscheiman.com> 2023-03-11 15:01:56 -0500
committer: squibid <me@zacharyscheiman.com> 2023-03-11 15:01:56 -0500
commit: d99696640c16738a33123114db94a073ac24ad0a (patch)
tree: 3791756021587c63085308af32cfae2e71a75319 /blog/sup
parent: 2ca0d8d3fb9420ac2158eff74d2f657a4382e649 (diff)
download: site-d99696640c16738a33123114db94a073ac24ad0a.tar.gz
site-d99696640c16738a33123114db94a073ac24ad0a.tar.bz2
site-d99696640c16738a33123114db94a073ac24ad0a.zip
1 files changed, 69 insertions, 0 deletions
diff --git a/blog/sup b/blog/sup
new file mode 100755
index 0000000..8509af7
--- /dev/null
+++ b/blog/sup
@@ -0,0 +1,69 @@
+#!/bin/sh
+
+[ ! -f "$1" ] &&
+	echo "Give \`sup\` a page which has been added/updated." &&
+	exit 1
+
+# You will want to change these variables to your needs.
+website="https://squi.bid"
+rssfile="rss.xml"
+
+# In order to cleanly use sed on a multi-line file, we have to use `tr` to
+# convert newlines to a set character, then run sed, then reconvert the
+# character. Unfortunately, due to a current issue in GNU's tr, characters of
+# more than one byte are not functioning properly. It would be more ideal to
+# use a rarer character (some random Tamil character, for example), but ^ is
+# one byte.
+replchar='^'
+# So if you have a page with ^ in it, you'll have to change this to another
+# single byte character that isn't in the page like ~ or something.
+
+link="$website/$1"
+title="$(sed -n 's/<title>\(.*\)<\/title>/\1/Ip' "$1")"
+
+# Check and see if this page has already been added to the RSS feed.
+if grep -q "<guid.*>$link</guid>" "$rssfile"; then
+	# Do this if it has been adding and we are updating it.
+
+	# If updating a file, we append the time/date to the GUID, as all GUIDs
+	# must be unique to validate an RSS feed. Even feed readers that follow
+	# GUIDs will still be lead to the same page with this.
+	guid="$link#$(date '+%y%m%d%H%M%S')"
+	title="$title (Updated)"
+	echo "Explain the nature of the update:"
+	read -r content
+	[ -z "$content" ] && content="New updates to $link"
+else
+	# Do this if it is a new page.
+
+	guid=$link
+	# Get the page body content, excluding the nav and footer.
+	content="$(tr '\n' $replchar < "$1" | sed "
+	s/.*<body>//
+	s/<footer>.*<\/footer>//
+	s/<nav>.*<\/nav>//
+	s/<\/body>.*//
+	" | tr -s $replchar '\n')"
+fi
+
+rssdate="$(LC_TIME=en_US date '+%a, %d %b %Y %H:%M:%S %z')"
+
+# Eh, I'm a brainlet and I'm not sure how to elegantly add in the content to
+# the RSS feed without first writing it out to a file. This is because if we
+# tried run, say, a sed substitute command, we'd have to escape with \
+# basically every other character. If you know how to do it without creating a
+# temporary file, tell me. I do the same in lb, actually.
+temp="$(mktemp)";
+trap 'rm -f "$temp"' 0 1 2 3 15	# Delete temp file after script termination.
+echo "
+<item>
+<title>$title</title>
+<guid>$guid</guid>
+<link>$link</link>
+<pubDate>$rssdate</pubDate>
+<description><![CDATA[$content
+]]></description>
+</item>
+" > "$temp"
+
+sed -i "/<!-- LB -->/r $temp" "$rssfile"
author	squibid <me@zacharyscheiman.com>	2023-03-11 15:01:56 -0500
committer	squibid <me@zacharyscheiman.com>	2023-03-11 15:01:56 -0500
commit	d99696640c16738a33123114db94a073ac24ad0a (patch)
tree	3791756021587c63085308af32cfae2e71a75319 /blog/sup
parent	2ca0d8d3fb9420ac2158eff74d2f657a4382e649 (diff)
download	site-d99696640c16738a33123114db94a073ac24ad0a.tar.gz site-d99696640c16738a33123114db94a073ac24ad0a.tar.bz2 site-d99696640c16738a33123114db94a073ac24ad0a.zip