From 5edf949cb743e8da5e73aa2f98695c2dd370657f Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Fri, 9 Apr 2021 15:12:56 -0400 Subject: Remove unnecessary capture group in URL regex That capture group actually makes the regex ambiguous, it seems, since apparently some implementations backtrack to match the . branch rather than the $ branch, leaving off a trailing character from the second capture group. Regardless, that trailing capture group is totally unnecessary and I don't know why it was there. --- html.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/html.c b/html.c index 77438a0..34ffb16 100644 --- a/html.c +++ b/html.c @@ -228,12 +228,12 @@ static void swap(char *a, char *b) { static int htmlMarkupURLs(FILE *file, char *buf) { static regex_t regex; - compile(®ex, "(^|[[:space:]<])(https?:[^[:space:]>]+)(.|$)"); + compile(®ex, "(^|[[:space:]<])(https?:[^[:space:]>]+)"); int error; char *ptr; - regmatch_t match[4]; - for (ptr = buf; !regexec(®ex, ptr, 4, match, 0); ptr += match[2].rm_eo) { + regmatch_t match[3]; + for (ptr = buf; !regexec(®ex, ptr, 3, match, 0); ptr += match[2].rm_eo) { char nul = '\0'; swap(&ptr[match[2].rm_so], &nul); @@ -242,14 +242,14 @@ static int htmlMarkupURLs(FILE *file, char *buf) { swap(&ptr[match[2].rm_so], &nul); const char *template = Q([url]); - swap(&ptr[match[3].rm_so], &nul); + swap(&ptr[match[2].rm_eo], &nul); struct Variable vars[] = { { "url", &ptr[match[2].rm_so] }, {0}, }; error = templateRender(file, template, vars, escapeXML); if (error) return error; - swap(&ptr[match[3].rm_so], &nul); + swap(&ptr[match[2].rm_eo], &nul); } return escapeXML(file, ptr); } -- cgit 1.4.1