diff options
author | squibid <me@zacharyscheiman.com> | 2023-03-11 15:01:56 -0500 |
---|---|---|
committer | squibid <me@zacharyscheiman.com> | 2023-03-11 15:01:56 -0500 |
commit | d99696640c16738a33123114db94a073ac24ad0a (patch) | |
tree | 3791756021587c63085308af32cfae2e71a75319 /blog/sup | |
parent | 2ca0d8d3fb9420ac2158eff74d2f657a4382e649 (diff) | |
download | site-d99696640c16738a33123114db94a073ac24ad0a.tar.gz site-d99696640c16738a33123114db94a073ac24ad0a.tar.bz2 site-d99696640c16738a33123114db94a073ac24ad0a.zip |
add blog generator + rss feed/feed generator
Diffstat (limited to '')
-rwxr-xr-x | blog/sup | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/blog/sup b/blog/sup new file mode 100755 index 0000000..8509af7 --- /dev/null +++ b/blog/sup @@ -0,0 +1,69 @@ +#!/bin/sh + +[ ! -f "$1" ] && + echo "Give \`sup\` a page which has been added/updated." && + exit 1 + +# You will want to change these variables to your needs. +website="https://squi.bid" +rssfile="rss.xml" + +# In order to cleanly use sed on a multi-line file, we have to use `tr` to +# convert newlines to a set character, then run sed, then reconvert the +# character. Unfortunately, due to a current issue in GNU's tr, characters of +# more than one byte are not functioning properly. It would be more ideal to +# use a rarer character (some random Tamil character, for example), but ^ is +# one byte. +replchar='^' +# So if you have a page with ^ in it, you'll have to change this to another +# single byte character that isn't in the page like ~ or something. + +link="$website/$1" +title="$(sed -n 's/<title>\(.*\)<\/title>/\1/Ip' "$1")" + +# Check and see if this page has already been added to the RSS feed. +if grep -q "<guid.*>$link</guid>" "$rssfile"; then + # Do this if it has been adding and we are updating it. + + # If updating a file, we append the time/date to the GUID, as all GUIDs + # must be unique to validate an RSS feed. Even feed readers that follow + # GUIDs will still be lead to the same page with this. + guid="$link#$(date '+%y%m%d%H%M%S')" + title="$title (Updated)" + echo "Explain the nature of the update:" + read -r content + [ -z "$content" ] && content="New updates to $link" +else + # Do this if it is a new page. + + guid=$link + # Get the page body content, excluding the nav and footer. + content="$(tr '\n' $replchar < "$1" | sed " + s/.*<body>// + s/<footer>.*<\/footer>// + s/<nav>.*<\/nav>// + s/<\/body>.*// + " | tr -s $replchar '\n')" +fi + +rssdate="$(LC_TIME=en_US date '+%a, %d %b %Y %H:%M:%S %z')" + +# Eh, I'm a brainlet and I'm not sure how to elegantly add in the content to +# the RSS feed without first writing it out to a file. This is because if we +# tried run, say, a sed substitute command, we'd have to escape with \ +# basically every other character. If you know how to do it without creating a +# temporary file, tell me. I do the same in lb, actually. +temp="$(mktemp)"; +trap 'rm -f "$temp"' 0 1 2 3 15 # Delete temp file after script termination. +echo " +<item> +<title>$title</title> +<guid>$guid</guid> +<link>$link</link> +<pubDate>$rssdate</pubDate> +<description><![CDATA[$content +]]></description> +</item> +" > "$temp" + +sed -i "/<!-- LB -->/r $temp" "$rssfile" |