summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2021-01-12 15:50:47 -0500
committerJune McEnroe <june@causal.agency>2021-01-12 16:15:37 -0500
commit3e7ffc868d1105740ad3a5b3bf8ee721f47485a7 (patch)
treeb03adeb09bda33d2f006baddde0e823ff0d6ac84
parentReplace causal.agency with a simple mdoc page (diff)
downloadsrc-3e7ffc868d1105740ad3a5b3bf8ee721f47485a7.tar.gz
src-3e7ffc868d1105740ad3a5b3bf8ee721f47485a7.zip
Add htagml
-rw-r--r--bin/.gitignore1
-rw-r--r--bin/Makefile1
-rw-r--r--bin/README.74
-rw-r--r--bin/htagml.c182
-rw-r--r--bin/man1/htagml.147
5 files changed, 234 insertions, 1 deletions
diff --git a/bin/.gitignore b/bin/.gitignore
index df322fd4..2bfab38f 100644
--- a/bin/.gitignore
+++ b/bin/.gitignore
@@ -13,6 +13,7 @@ fbclock
 glitch
 hi
 hnel
+htagml
 modem
 nudge
 open
diff --git a/bin/Makefile b/bin/Makefile
index 5691b0fc..f858d483 100644
--- a/bin/Makefile
+++ b/bin/Makefile
@@ -27,6 +27,7 @@ BINS += dtch
 BINS += glitch
 BINS += hi
 BINS += hnel
+BINS += htagml
 BINS += modem
 BINS += nudge
 BINS += order
diff --git a/bin/README.7 b/bin/README.7
index 40dcdd0b..31acdde5 100644
--- a/bin/README.7
+++ b/bin/README.7
@@ -1,4 +1,4 @@
-.Dd December 15, 2020
+.Dd January 12, 2021
 .Dt BIN 7
 .Os "Causal Agency"
 .
@@ -40,6 +40,8 @@ PNG glitcher
 syntax highlighter
 .It Xr hnel 1
 PTY input remapper
+.It Xr htagml 1
+tags HTMLizer
 .It Xr modem 1
 fixed baud rate wrapper
 .It Xr nudge 1
diff --git a/bin/htagml.c b/bin/htagml.c
new file mode 100644
index 00000000..22d921a4
--- /dev/null
+++ b/bin/htagml.c
@@ -0,0 +1,182 @@
+/* Copyright (C) 2021  C. McEnroe <june@causal.agency>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <err.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+struct Tag {
+	char *tag;
+	regex_t regex;
+};
+
+struct Match {
+	struct Tag *tag;
+	regmatch_t match;
+};
+
+static int compar(const void *_a, const void *_b) {
+	const struct Match *a = _a;
+	const struct Match *b = _b;
+	return (a->match.rm_so > b->match.rm_so)
+		- (a->match.rm_so < b->match.rm_so);
+}
+
+static char *nomagic(const char *pattern) {
+	char *buf = malloc(2 * strlen(pattern) + 1);
+	if (!buf) err(EX_OSERR, "malloc");
+	char *ptr = buf;
+	for (const char *ch = pattern; *ch; ++ch) {
+		if (strchr(".[*", *ch)) *ptr++ = '\\';
+		*ptr++ = *ch;
+	}
+	*ptr = '\0';
+	return buf;
+}
+
+static size_t escape(const char *ptr, size_t len) {
+	for (size_t i = 0; i < len; ++i) {
+		switch (ptr[i]) {
+			break; case '&': printf("&amp;");
+			break; case '<': printf("&lt;");
+			break; case '"': printf("&quot;");
+			break; default:  putchar(ptr[i]);
+		}
+	}
+	return len;
+}
+
+int main(int argc, char *argv[]) {
+	bool pre = false;
+	const char *tagsFile = "tags";
+	for (int opt; 0 < (opt = getopt(argc, argv, "f:p"));) {
+		switch (opt) {
+			break; case 'f': tagsFile = optarg;
+			break; case 'p': pre = true;
+			break; default:  return EX_USAGE;
+		}
+	}
+	if (optind == argc) errx(EX_USAGE, "name required");
+	const char *name = argv[optind];
+
+	FILE *file = fopen(tagsFile, "r");
+	if (!file) err(EX_NOINPUT, "%s", tagsFile);
+
+	size_t len = 0;
+	size_t cap = 256;
+	struct Tag *tags = malloc(cap * sizeof(*tags));
+	if (!tags) err(EX_OSERR, "malloc");
+
+	char *buf = NULL;
+	size_t bufCap = 0;
+	while (0 < getline(&buf, &bufCap, file)) {
+		char *line = buf;
+		char *tag = strsep(&line, "\t ");
+		char *file = strsep(&line, "\t ");
+		char *search = strsep(&line, "\n");
+		if (!tag || !file || !search) errx(EX_DATAERR, "malformed tags file");
+		if (search[0] != '/' || search[strlen(search)-1] != '/') {
+			errx(EX_DATAERR, "search is not forward regex: %s", search);
+		}
+		search++;
+		search[strlen(search)-1] = '\0';
+
+		if (strcmp(file, name)) continue;
+		if (len == cap) {
+			tags = realloc(tags, (cap *= 2) * sizeof(*tags));
+			if (!tags) err(EX_OSERR, "realloc");
+		}
+		tags[len].tag = strdup(tag);
+		if (!tags[len].tag) err(EX_OSERR, "strdup");
+		char *pattern = nomagic(search);
+		int error = regcomp(&tags[len].regex, pattern, REG_NEWLINE);
+		if (error) errx(EX_DATAERR, "invalid regex: %s", pattern);
+		free(pattern);
+		len++;
+	}
+	free(buf);
+	fclose(file);
+
+	file = fopen(name, "r");
+	if (!file) err(EX_NOINPUT, "%s", name);
+
+	struct stat stat;
+	int error = fstat(fileno(file), &stat);
+	if (error) err(EX_IOERR, "%s", name);
+	buf = malloc(stat.st_size + 1);
+	if (!buf) err(EX_OSERR, "malloc");
+
+	size_t size = fread(buf, 1, stat.st_size, file);
+	if (size < (size_t)stat.st_size && ferror(file)) err(EX_IOERR, "%s", name);
+	buf[size] = '\0';
+	fclose(file);
+
+	struct Match *matches = calloc(len, sizeof(*matches));
+	if (!matches) err(EX_OSERR, "calloc");
+	for (size_t i = 0; i < len; ++i) {
+		matches[i].tag = &tags[i];
+		regexec(&tags[i].regex, buf, 1, &matches[i].match, 0);
+	}
+	qsort(matches, len, sizeof(*matches), compar);
+
+	char *main;
+	const char *base = strrchr(name, '/');
+	int n = asprintf(&main, "M%s", (base ? &base[1] : name));
+	if (n < 0) err(EX_OSERR, "asprintf");
+	if (strrchr(main, '.')) *strrchr(main, '.') = '\0';
+
+	regoff_t pos = 0;
+	if (pre) printf("<pre>");
+	for (size_t i = 0; i < len; ++i) {
+		if (matches[i].match.rm_so == matches[i].match.rm_eo) {
+			warnx("no match for tag %s", matches[i].tag->tag);
+			continue;
+		}
+		if (matches[i].match.rm_so <= pos) {
+			warnx("overlapping match for tag %s", matches[i].tag->tag);
+			continue;
+		}
+
+		pos += escape(&buf[pos], matches[i].match.rm_so - pos);
+		const char *text = matches[i].tag->tag;
+		if (!strcmp(text, main)) text = "main";
+		if (!strcmp(text, "yyparse") || !strcmp(text, "yylex")) text = "%%";
+		char *tag = strstr(&buf[pos], text);
+		if (!tag || tag >= &buf[matches[i].match.rm_eo]) {
+			warnx("tag %s does not occur in match", matches[i].tag->tag);
+			continue;
+		}
+
+		pos += escape(&buf[pos], tag - &buf[pos]);
+		printf("<a class=\"tag\" id=\"");
+		escape(matches[i].tag->tag, strlen(matches[i].tag->tag));
+		printf("\" href=\"#");
+		escape(matches[i].tag->tag, strlen(matches[i].tag->tag));
+		printf("\">");
+		pos += escape(&buf[pos], strlen(text));
+		printf("</a>");
+
+		pos += escape(&buf[pos], matches[i].match.rm_eo - pos);
+	}
+	escape(&buf[pos], strlen(&buf[pos]));
+	if (pre) printf("</pre>");
+}
diff --git a/bin/man1/htagml.1 b/bin/man1/htagml.1
new file mode 100644
index 00000000..96379416
--- /dev/null
+++ b/bin/man1/htagml.1
@@ -0,0 +1,47 @@
+.Dd January 12, 2021
+.Dt HTAGML 1
+.Os
+.
+.Sh NAME
+.Nm htagml
+.Nd format tagged file as HTML
+.
+.Sh SYNOPSIS
+.Nm
+.Op Fl p
+.Op Fl f Ar tags_file
+.Ar name
+.
+.Sh DESCRIPTION
+The
+.Nm
+utility formats a file tagged with
+.Xr ctags 1
+as HTML.
+Tags are output as fragment hyperlinks
+with the class
+.Qq tag .
+.
+.Pp
+The arguments are as follows:
+.Bl -tag -width Ds
+.It Fl f Ar tags_file
+Read the tag descriptions from a file called
+.Ar tags_file .
+The default behavior is
+to read them from a file called
+.Pa tags .
+.It Fl p
+Wrap the output in a
+.Sy pre
+tag.
+.El
+.
+.Sh FILES
+.Bl -tag -width Ds
+.It Pa tags
+default input tags file
+.El
+.
+.Sh SEE ALSO
+.Xr ctags 1