summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2020-12-18 22:59:41 -0500
committerJune McEnroe <june@causal.agency>2020-12-18 22:59:41 -0500
commit592efa05e747359be717705d82ec25acca0f16b2 (patch)
tree7bf5dcd4e2cd7dd063f76aa0fed862f3bc2ab916
parentFix bibsort name sorting for middle names, trailing titles (diff)
downloadsrc-592efa05e747359be717705d82ec25acca0f16b2.tar.gz
src-592efa05e747359be717705d82ec25acca0f16b2.zip
Add scripts to download, compress and tag IETF RFCs
Diffstat (limited to '')
-rw-r--r--rfc/.gitignore3
-rw-r--r--rfc/Makefile11
-rw-r--r--rfc/rfctags.pl26
3 files changed, 40 insertions, 0 deletions
diff --git a/rfc/.gitignore b/rfc/.gitignore
new file mode 100644
index 00000000..808cd63e
--- /dev/null
+++ b/rfc/.gitignore
@@ -0,0 +1,3 @@
+*.txt
+*.txt.gz
+tags
diff --git a/rfc/Makefile b/rfc/Makefile
new file mode 100644
index 00000000..ed89a4e4
--- /dev/null
+++ b/rfc/Makefile
@@ -0,0 +1,11 @@
+tags: rfctags.pl
+	perl rfctags.pl > $@
+
+sync:
+	rsync -z ftp.rfc-editor.org::rfcs-text-only/'rfc[0-9]*.txt' .
+
+compress:
+	gzip -9f *.txt
+
+clean:
+	rm -f *.txt *.txt.gz tags
diff --git a/rfc/rfctags.pl b/rfc/rfctags.pl
new file mode 100644
index 00000000..cd3ba288
--- /dev/null
+++ b/rfc/rfctags.pl
@@ -0,0 +1,26 @@
+use strict;
+use warnings;
+use open ':encoding(ISO-8859-1)';
+
+use IO::Uncompress::Gunzip qw($GunzipError);
+
+($,, $\) = ("\t", "\n");
+for my $rfc (<*.txt.gz>) {
+	my $handle = new IO::Uncompress::Gunzip $rfc
+		or die "${rfc}: ${GunzipError}";
+	while (<$handle>) {
+		chomp;
+		# Section headings
+		if (/^([\d.]+|[A-Z][.])\s+([^\t]+)/) {
+			print $1, $rfc, $.;
+			print $2, $rfc, $.;
+		}
+		# References
+		if (/^\s*(\[[\w-]+\])\s{2,}/) {
+			print $1, $rfc, $.;
+			print "\\$1", $rfc, $.; # vim ^] prepends \ to [
+		}
+	}
+	die "${rfc}: $!" if $!;
+	close $handle;
+}