summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2021-09-12 21:02:19 -0400
committerJune McEnroe <june@causal.agency>2021-09-12 21:02:19 -0400
commite7b4d18510cf26a0a72fac72604f930d62b4fa7d (patch)
tree238ecad4e34893d3bc7aa0f3c03deb6e7c92c200
parentAlso defer printing comment for lone close-parens (diff)
downloadsrc-e7b4d18510cf26a0a72fac72604f930d62b4fa7d.tar.gz
src-e7b4d18510cf26a0a72fac72604f930d62b4fa7d.zip
Replace htagml regex with strncmp
Since ctags only ever produces regular expressions of the form
/^re$/ or /^re/ with no other special characters, instead unescape
the pattern and simply use strncmp.

Running on a sqlite3.c amalgamation, the regex version takes ~37s
while the strncmp version takes ~1s, producing identical output.
Big win!
Diffstat (limited to '')
-rw-r--r--bin/htagml.c33
1 files changed, 17 insertions, 16 deletions
diff --git a/bin/htagml.c b/bin/htagml.c
index a3a4101d..d8d73a86 100644
--- a/bin/htagml.c
+++ b/bin/htagml.c
@@ -16,7 +16,6 @@
 
 #include <ctype.h>
 #include <err.h>
-#include <regex.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -24,13 +23,18 @@
 #include <sysexits.h>
 #include <unistd.h>
 
-static char *nomagic(const char *pattern) {
-	char *buf = malloc(2 * strlen(pattern) + 1);
+static char *deregex(const char *patt) {
+	char *buf = malloc(strlen(patt) + 1);
 	if (!buf) err(EX_OSERR, "malloc");
 	char *ptr = buf;
-	for (const char *ch = pattern; *ch; ++ch) {
-		if (strchr(".[*", *ch)) *ptr++ = '\\';
-		*ptr++ = *ch;
+	if (*patt == '^') patt++;
+	for (; *patt; ++patt) {
+		if (patt[0] == '$' && !patt[1]) {
+			*ptr++ = '\n';
+			break;
+		}
+		if (patt[0] == '\\' && patt[1]) patt++;
+		*ptr++ = *patt;
 	}
 	*ptr = '\0';
 	return buf;
@@ -98,7 +102,8 @@ int main(int argc, char *argv[]) {
 	struct Tag {
 		char *tag;
 		int num;
-		regex_t regex;
+		char *str;
+		size_t len;
 	} *tags = malloc(cap * sizeof(*tags));
 	if (!tags) err(EX_OSERR, "malloc");
 
@@ -123,15 +128,11 @@ int main(int argc, char *argv[]) {
 		if (def[0] == '/' || def[0] == '?') {
 			def++;
 			def[strlen(def)-1] = '\0';
-			char *search = nomagic(def);
-			int error = regcomp(
-				&tags[len].regex, search, REG_NEWLINE | REG_NOSUB
-			);
-			free(search);
-			if (error) {
-				warnx("invalid regex for tag %s: %s", tag, def);
-				continue;
+			if (def[0] != '^') {
+				warnx("unanchored regex for tag %s: %s", tag, def);
 			}
+			tags[len].str = deregex(def);
+			tags[len].len = strlen(tags[len].str);
 		} else {
 			tags[len].num = strtol(def, &def, 10);
 			if (*def) {
@@ -154,7 +155,7 @@ int main(int argc, char *argv[]) {
 			if (tags[i].num) {
 				if (num != tags[i].num) continue;
 			} else {
-				if (regexec(&tags[i].regex, buf, 0, NULL, 0)) continue;
+				if (strncmp(tags[i].str, buf, tags[i].len)) continue;
 			}
 			tag = &tags[i];
 			tag->num = num;