From 865ee640ace97962cb9918b9a17a9bbb87e2695d Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Sun, 12 Sep 2021 21:02:19 -0400 Subject: Replace htagml regex with strncmp Since ctags only ever produces regular expressions of the form /^re$/ or /^re/ with no other special characters, instead unescape the pattern and simply use strncmp. Running on a sqlite3.c amalgamation, the regex version takes ~37s while the strncmp version takes ~1s, producing identical output. Big win! --- bin/htagml.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'bin') diff --git a/bin/htagml.c b/bin/htagml.c index eb5128d1..8ad6c0fa 100644 --- a/bin/htagml.c +++ b/bin/htagml.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -24,13 +23,18 @@ #include #include -static char *nomagic(const char *pattern) { - char *buf = malloc(2 * strlen(pattern) + 1); +static char *deregex(const char *patt) { + char *buf = malloc(strlen(patt) + 1); if (!buf) err(EX_OSERR, "malloc"); char *ptr = buf; - for (const char *ch = pattern; *ch; ++ch) { - if (strchr(".[*", *ch)) *ptr++ = '\\'; - *ptr++ = *ch; + if (*patt == '^') patt++; + for (; *patt; ++patt) { + if (patt[0] == '$' && !patt[1]) { + *ptr++ = '\n'; + break; + } + if (patt[0] == '\\' && patt[1]) patt++; + *ptr++ = *patt; } *ptr = '\0'; return buf; @@ -98,7 +102,8 @@ int main(int argc, char *argv[]) { struct Tag { char *tag; int num; - regex_t regex; + char *str; + size_t len; } *tags = malloc(cap * sizeof(*tags)); if (!tags) err(EX_OSERR, "malloc"); @@ -123,15 +128,11 @@ int main(int argc, char *argv[]) { if (def[0] == '/' || def[0] == '?') { def++; def[strlen(def)-1] = '\0'; - char *search = nomagic(def); - int error = regcomp( - &tags[len].regex, search, REG_NEWLINE | REG_NOSUB - ); - free(search); - if (error) { - warnx("invalid regex for tag %s: %s", tag, def); - continue; + if (def[0] != '^') { + warnx("unanchored regex for tag %s: %s", tag, def); } + tags[len].str = deregex(def); + tags[len].len = strlen(tags[len].str); } else { tags[len].num = strtol(def, &def, 10); if (*def) { @@ -154,7 +155,7 @@ int main(int argc, char *argv[]) { if (tags[i].num) { if (num != tags[i].num) continue; } else { - if (regexec(&tags[i].regex, buf, 0, NULL, 0)) continue; + if (strncmp(tags[i].str, buf, tags[i].len)) continue; } tag = &tags[i]; tag->num = num; -- cgit 1.4.1