From 3e7ffc868d1105740ad3a5b3bf8ee721f47485a7 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Tue, 12 Jan 2021 15:50:47 -0500 Subject: Add htagml --- bin/.gitignore | 1 + bin/Makefile | 1 + bin/README.7 | 4 +- bin/htagml.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ bin/man1/htagml.1 | 47 ++++++++++++++ 5 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 bin/htagml.c create mode 100644 bin/man1/htagml.1 diff --git a/bin/.gitignore b/bin/.gitignore index df322fd4..2bfab38f 100644 --- a/bin/.gitignore +++ b/bin/.gitignore @@ -13,6 +13,7 @@ fbclock glitch hi hnel +htagml modem nudge open diff --git a/bin/Makefile b/bin/Makefile index 5691b0fc..f858d483 100644 --- a/bin/Makefile +++ b/bin/Makefile @@ -27,6 +27,7 @@ BINS += dtch BINS += glitch BINS += hi BINS += hnel +BINS += htagml BINS += modem BINS += nudge BINS += order diff --git a/bin/README.7 b/bin/README.7 index 40dcdd0b..31acdde5 100644 --- a/bin/README.7 +++ b/bin/README.7 @@ -1,4 +1,4 @@ -.Dd December 15, 2020 +.Dd January 12, 2021 .Dt BIN 7 .Os "Causal Agency" . @@ -40,6 +40,8 @@ PNG glitcher syntax highlighter .It Xr hnel 1 PTY input remapper +.It Xr htagml 1 +tags HTMLizer .It Xr modem 1 fixed baud rate wrapper .It Xr nudge 1 diff --git a/bin/htagml.c b/bin/htagml.c new file mode 100644 index 00000000..22d921a4 --- /dev/null +++ b/bin/htagml.c @@ -0,0 +1,182 @@ +/* Copyright (C) 2021 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct Tag { + char *tag; + regex_t regex; +}; + +struct Match { + struct Tag *tag; + regmatch_t match; +}; + +static int compar(const void *_a, const void *_b) { + const struct Match *a = _a; + const struct Match *b = _b; + return (a->match.rm_so > b->match.rm_so) + - (a->match.rm_so < b->match.rm_so); +} + +static char *nomagic(const char *pattern) { + char *buf = malloc(2 * strlen(pattern) + 1); + if (!buf) err(EX_OSERR, "malloc"); + char *ptr = buf; + for (const char *ch = pattern; *ch; ++ch) { + if (strchr(".[*", *ch)) *ptr++ = '\\'; + *ptr++ = *ch; + } + *ptr = '\0'; + return buf; +} + +static size_t escape(const char *ptr, size_t len) { + for (size_t i = 0; i < len; ++i) { + switch (ptr[i]) { + break; case '&': printf("&"); + break; case '<': printf("<"); + break; case '"': printf("""); + break; default: putchar(ptr[i]); + } + } + return len; +} + +int main(int argc, char *argv[]) { + bool pre = false; + const char *tagsFile = "tags"; + for (int opt; 0 < (opt = getopt(argc, argv, "f:p"));) { + switch (opt) { + break; case 'f': tagsFile = optarg; + break; case 'p': pre = true; + break; default: return EX_USAGE; + } + } + if (optind == argc) errx(EX_USAGE, "name required"); + const char *name = argv[optind]; + + FILE *file = fopen(tagsFile, "r"); + if (!file) err(EX_NOINPUT, "%s", tagsFile); + + size_t len = 0; + size_t cap = 256; + struct Tag *tags = malloc(cap * sizeof(*tags)); + if (!tags) err(EX_OSERR, "malloc"); + + char *buf = NULL; + size_t bufCap = 0; + while (0 < getline(&buf, &bufCap, file)) { + char *line = buf; + char *tag = strsep(&line, "\t "); + char *file = strsep(&line, "\t "); + char *search = strsep(&line, "\n"); + if (!tag || !file || !search) errx(EX_DATAERR, "malformed tags file"); + if (search[0] != '/' || search[strlen(search)-1] != '/') { + errx(EX_DATAERR, "search is not forward regex: %s", search); + } + search++; + search[strlen(search)-1] = '\0'; + + if (strcmp(file, name)) continue; + if (len == cap) { + tags = realloc(tags, (cap *= 2) * sizeof(*tags)); + if (!tags) err(EX_OSERR, "realloc"); + } + tags[len].tag = strdup(tag); + if (!tags[len].tag) err(EX_OSERR, "strdup"); + char *pattern = nomagic(search); + int error = regcomp(&tags[len].regex, pattern, REG_NEWLINE); + if (error) errx(EX_DATAERR, "invalid regex: %s", pattern); + free(pattern); + len++; + } + free(buf); + fclose(file); + + file = fopen(name, "r"); + if (!file) err(EX_NOINPUT, "%s", name); + + struct stat stat; + int error = fstat(fileno(file), &stat); + if (error) err(EX_IOERR, "%s", name); + buf = malloc(stat.st_size + 1); + if (!buf) err(EX_OSERR, "malloc"); + + size_t size = fread(buf, 1, stat.st_size, file); + if (size < (size_t)stat.st_size && ferror(file)) err(EX_IOERR, "%s", name); + buf[size] = '\0'; + fclose(file); + + struct Match *matches = calloc(len, sizeof(*matches)); + if (!matches) err(EX_OSERR, "calloc"); + for (size_t i = 0; i < len; ++i) { + matches[i].tag = &tags[i]; + regexec(&tags[i].regex, buf, 1, &matches[i].match, 0); + } + qsort(matches, len, sizeof(*matches), compar); + + char *main; + const char *base = strrchr(name, '/'); + int n = asprintf(&main, "M%s", (base ? &base[1] : name)); + if (n < 0) err(EX_OSERR, "asprintf"); + if (strrchr(main, '.')) *strrchr(main, '.') = '\0'; + + regoff_t pos = 0; + if (pre) printf("
");
+	for (size_t i = 0; i < len; ++i) {
+		if (matches[i].match.rm_so == matches[i].match.rm_eo) {
+			warnx("no match for tag %s", matches[i].tag->tag);
+			continue;
+		}
+		if (matches[i].match.rm_so <= pos) {
+			warnx("overlapping match for tag %s", matches[i].tag->tag);
+			continue;
+		}
+
+		pos += escape(&buf[pos], matches[i].match.rm_so - pos);
+		const char *text = matches[i].tag->tag;
+		if (!strcmp(text, main)) text = "main";
+		if (!strcmp(text, "yyparse") || !strcmp(text, "yylex")) text = "%%";
+		char *tag = strstr(&buf[pos], text);
+		if (!tag || tag >= &buf[matches[i].match.rm_eo]) {
+			warnx("tag %s does not occur in match", matches[i].tag->tag);
+			continue;
+		}
+
+		pos += escape(&buf[pos], tag - &buf[pos]);
+		printf("tag, strlen(matches[i].tag->tag));
+		printf("\" href=\"#");
+		escape(matches[i].tag->tag, strlen(matches[i].tag->tag));
+		printf("\">");
+		pos += escape(&buf[pos], strlen(text));
+		printf("");
+
+		pos += escape(&buf[pos], matches[i].match.rm_eo - pos);
+	}
+	escape(&buf[pos], strlen(&buf[pos]));
+	if (pre) printf("
"); +} diff --git a/bin/man1/htagml.1 b/bin/man1/htagml.1 new file mode 100644 index 00000000..96379416 --- /dev/null +++ b/bin/man1/htagml.1 @@ -0,0 +1,47 @@ +.Dd January 12, 2021 +.Dt HTAGML 1 +.Os +. +.Sh NAME +.Nm htagml +.Nd format tagged file as HTML +. +.Sh SYNOPSIS +.Nm +.Op Fl p +.Op Fl f Ar tags_file +.Ar name +. +.Sh DESCRIPTION +The +.Nm +utility formats a file tagged with +.Xr ctags 1 +as HTML. +Tags are output as fragment hyperlinks +with the class +.Qq tag . +. +.Pp +The arguments are as follows: +.Bl -tag -width Ds +.It Fl f Ar tags_file +Read the tag descriptions from a file called +.Ar tags_file . +The default behavior is +to read them from a file called +.Pa tags . +.It Fl p +Wrap the output in a +.Sy pre +tag. +.El +. +.Sh FILES +.Bl -tag -width Ds +.It Pa tags +default input tags file +.El +. +.Sh SEE ALSO +.Xr ctags 1 -- cgit 1.4.1