diff options
author | June McEnroe <june@causal.agency> | 2021-04-09 15:12:56 -0400 |
---|---|---|
committer | June McEnroe <june@causal.agency> | 2021-04-09 15:12:56 -0400 |
commit | 5edf949cb743e8da5e73aa2f98695c2dd370657f (patch) | |
tree | dc05d06d293a2af23857b8ab5e6da0b03240340f | |
parent | Exit on getopt failure (diff) | |
download | bubger-5edf949cb743e8da5e73aa2f98695c2dd370657f.tar.gz bubger-5edf949cb743e8da5e73aa2f98695c2dd370657f.zip |
Remove unnecessary capture group in URL regex
That capture group actually makes the regex ambiguous, it seems, since apparently some implementations backtrack to match the . branch rather than the $ branch, leaving off a trailing character from the second capture group. Regardless, that trailing capture group is totally unnecessary and I don't know why it was there.
-rw-r--r-- | html.c | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/html.c b/html.c index 77438a0..34ffb16 100644 --- a/html.c +++ b/html.c @@ -228,12 +228,12 @@ static void swap(char *a, char *b) { static int htmlMarkupURLs(FILE *file, char *buf) { static regex_t regex; - compile(®ex, "(^|[[:space:]<])(https?:[^[:space:]>]+)(.|$)"); + compile(®ex, "(^|[[:space:]<])(https?:[^[:space:]>]+)"); int error; char *ptr; - regmatch_t match[4]; - for (ptr = buf; !regexec(®ex, ptr, 4, match, 0); ptr += match[2].rm_eo) { + regmatch_t match[3]; + for (ptr = buf; !regexec(®ex, ptr, 3, match, 0); ptr += match[2].rm_eo) { char nul = '\0'; swap(&ptr[match[2].rm_so], &nul); @@ -242,14 +242,14 @@ static int htmlMarkupURLs(FILE *file, char *buf) { swap(&ptr[match[2].rm_so], &nul); const char *template = Q(<a href="[url]">[url]</a>); - swap(&ptr[match[3].rm_so], &nul); + swap(&ptr[match[2].rm_eo], &nul); struct Variable vars[] = { { "url", &ptr[match[2].rm_so] }, {0}, }; error = templateRender(file, template, vars, escapeXML); if (error) return error; - swap(&ptr[match[3].rm_so], &nul); + swap(&ptr[match[2].rm_eo], &nul); } return escapeXML(file, ptr); } |