From 339a3191c8ced69f3339297a88d8b4503c78d86e Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Tue, 12 Jan 2021 23:12:22 -0500 Subject: Avoid matching tag text inside HTML elements --- bin/htagml.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'bin') diff --git a/bin/htagml.c b/bin/htagml.c index 0e14df77..f94e7bfc 100644 --- a/bin/htagml.c +++ b/bin/htagml.c @@ -61,6 +61,17 @@ static size_t escape(bool esc, const char *ptr, size_t len) { return len; } +static char *hstrstr(char *haystack, char *needle) { + while (haystack) { + char *elem = strchr(haystack, '<'); + char *match = strstr(haystack, needle); + if (!match) return NULL; + if (!elem || match < elem) return match; + haystack = strchr(elem, '>'); + } + return NULL; +} + int main(int argc, char *argv[]) { bool pre = false; bool pipe = false; @@ -157,10 +168,10 @@ int main(int argc, char *argv[]) { } char *text = tag->tag; - char *match = strstr(buf, text); + char *match = (pipe ? hstrstr(buf, text) : strstr(buf, text)); if (!match && tag->tag[0] == 'M') { text = "main"; - match = strstr(buf, text); + match = (pipe ? hstrstr(buf, text) : strstr(buf, text)); } if (match) escape(!pipe, buf, match - buf); printf("