diff options
author | June McEnroe <june@causal.agency> | 2019-09-05 13:51:08 -0400 |
---|---|---|
committer | June McEnroe <june@causal.agency> | 2019-09-05 13:51:08 -0400 |
commit | aabfe37483233382e3a823af5202ad29d2ef3868 (patch) | |
tree | 76d66b62ba802b85a1e1c84cea4986b466145117 /bin | |
parent | Print title as soon as it's available (diff) | |
download | src-aabfe37483233382e3a823af5202ad29d2ef3868.tar.gz src-aabfe37483233382e3a823af5202ad29d2ef3868.zip |
Decode entities in titles
Diffstat (limited to 'bin')
-rw-r--r-- | bin/title.c | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/bin/title.c b/bin/title.c index 84f4f382..6f882223 100644 --- a/bin/title.c +++ b/bin/title.c @@ -36,7 +36,48 @@ static regex_t regex(const char *pattern) { errx(EX_SOFTWARE, "regcomp: %s: %s", buf, pattern); } +static const struct Entity { + wchar_t ch; + const char *name; +} Entities[] = { + { L'"', """ }, + { L'&', "&" }, + { L'<', "<" }, + { L'>', ">" }, + { L'', " " }, +}; + +static wchar_t entity(const char *name) { + for (size_t i = 0; i < sizeof(Entities) / sizeof(Entities[0]); ++i) { + struct Entity entity = Entities[i]; + if (strncmp(name, entity.name, strlen(entity.name))) continue; + return entity.ch; + } + if (!strncmp(name, "&#x", 3)) return strtoul(&name[3], NULL, 16); + if (!strncmp(name, "&#", 2)) return strtoul(&name[2], NULL, 10); + return 0; +} + +static const char EntityPattern[] = { + "[[:space:]]+|&([[:alpha:]]+|#([[:digit:]]+|x[[:xdigit:]]+));" +}; +static regex_t EntityRegex; + static void showTitle(const char *title) { + regmatch_t match = {0}; + for (; *title; title += match.rm_eo) { + if (regexec(&EntityRegex, title, 1, &match, 0)) break; + if (title[match.rm_so] != '&') { + printf("%.*s ", (int)match.rm_so, title); + continue; + } + wchar_t ch = entity(&title[match.rm_so]); + if (ch) { + printf("%.*s%lc", (int)match.rm_so, title, (wint_t)ch); + } else { + printf("%.*s", (int)match.rm_eo, title); + } + } printf("%s\n", title); } @@ -98,6 +139,7 @@ static CURLcode fetchTitle(const char *url) { } int main(int argc, char *argv[]) { + EntityRegex = regex(EntityPattern); TitleRegex = regex(TitlePattern); setlocale(LC_CTYPE, ""); |