blog/sup


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

#!/bin/sh

[ ! -f "$1" ] &&
	echo "Give \`sup\` a page which has been added/updated." &&
	exit 1

# You will want to change these variables to your needs.
website="https://squi.bid"
rssfile="rss.xml"

# In order to cleanly use sed on a multi-line file, we have to use `tr` to
# convert newlines to a set character, then run sed, then reconvert the
# character. Unfortunately, due to a current issue in GNU's tr, characters of
# more than one byte are not functioning properly. It would be more ideal to
# use a rarer character (some random Tamil character, for example), but ^ is
# one byte.
replchar='^'
# So if you have a page with ^ in it, you'll have to change this to another
# single byte character that isn't in the page like ~ or something.

link="$website/$1"
title="$(sed -n 's/<title>\(.*\)<\/title>/\1/Ip' "$1")"

# Check and see if this page has already been added to the RSS feed.
if grep -q "<guid.*>$link</guid>" "$rssfile"; then
	# Do this if it has been adding and we are updating it.

	# If updating a file, we append the time/date to the GUID, as all GUIDs
	# must be unique to validate an RSS feed. Even feed readers that follow
	# GUIDs will still be lead to the same page with this.
	guid="$link#$(date '+%y%m%d%H%M%S')"
	title="$title (Updated)"
	echo "Explain the nature of the update:"
	read -r content
	[ -z "$content" ] && content="New updates to $link"
else
	# Do this if it is a new page.

	guid=$link
	# Get the page body content, excluding the nav and footer.
	content="$(tr '\n' $replchar < "$1" | sed "
	s/.*<body>//
	s/<footer>.*<\/footer>//
	s/<nav>.*<\/nav>//
	s/<\/body>.*//
	" | tr -s $replchar '\n')"
fi

rssdate="$(LC_TIME=en_US date '+%a, %d %b %Y %H:%M:%S %z')"

# Eh, I'm a brainlet and I'm not sure how to elegantly add in the content to
# the RSS feed without first writing it out to a file. This is because if we
# tried run, say, a sed substitute command, we'd have to escape with \
# basically every other character. If you know how to do it without creating a
# temporary file, tell me. I do the same in lb, actually.
temp="$(mktemp)";
trap 'rm -f "$temp"' 0 1 2 3 15	# Delete temp file after script termination.
echo "
<item>
<title>$title</title>
<guid>$guid</guid>
<link>$link</link>
<pubDate>$rssdate</pubDate>
<description><![CDATA[$content
]]></description>
</item>
" > "$temp"

sed -i "/<!-- LB -->/r $temp" "$rssfile"