From aadee09d292ced6d04ace5b22e5214ad70f5365a Mon Sep 17 00:00:00 2001 From: June McEnroe Date: Thu, 5 Sep 2019 13:36:58 -0400 Subject: Print title as soon as it's available --- bin/title.c | 94 +++++++++++++++++++++++++++++++------------------------------ 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/bin/title.c b/bin/title.c index ed9eeccc..37d1d8f5 100644 --- a/bin/title.c +++ b/bin/title.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -23,16 +24,22 @@ #include #include #include +#include static regex_t regex(const char *pattern) { regex_t regex; int error = regcomp(®ex, pattern, REG_EXTENDED); if (!error) return regex; + char buf[256]; regerror(error, ®ex, buf, sizeof(buf)); errx(EX_SOFTWARE, "regcomp: %s: %s", buf, pattern); } +static void showTitle(const char *title) { + printf("%s\n", title); +} + static CURL *curl; static bool html; static struct { @@ -40,66 +47,62 @@ static struct { size_t len; } body; +static const char ContentType[] = "Content-Type: text/html"; + static size_t handleHeader(char *buf, size_t size, size_t nitems, void *user) { (void)user; size_t len = size * nitems; - const char ContentType[] = "Content-Type: text/html"; if (sizeof(ContentType) - 1 < len) len = sizeof(ContentType) - 1; if (!strncasecmp(buf, ContentType, len)) html = true; return size * nitems; } +// HE COMES +static const char TitlePattern[] = "([^<]*)"; +static regex_t TitleRegex; + static size_t handleBody(char *buf, size_t size, size_t nitems, void *user) { (void)user; + size_t len = size * nitems; - size_t cap = sizeof(body.buf) - body.len; - size_t cpy = (len < cap ? len : cap); - memcpy(&body.buf[body.len], buf, cpy); - body.len += cpy; - return len; -} + size_t cap = sizeof(body.buf) - body.len - 1; + size_t new = (len < cap ? len : cap); + if (!new) return 0; -static const char *TitlePattern = "([^<]*)"; -static regex_t TitleRegex; + memcpy(&body.buf[body.len], buf, new); + body.len += new; + body.buf[body.len] = '\0'; + + regmatch_t match[2]; + if (regexec(&TitleRegex, body.buf, 2, match, 0)) return len; + body.buf[match[1].rm_eo] = '\0'; + showTitle(&body.buf[match[1].rm_so]); + return 0; +} -static bool getTitle(const char *url) { +static CURLcode fetchTitle(const char *url) { CURLcode code = curl_easy_setopt(curl, CURLOPT_URL, url); - if (code) { - warnx("CURLOPT_URL: %s", curl_easy_strerror(code)); - return false; - } + if (code) return code; html = false; curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); - if ((code = curl_easy_perform(curl))) { - warnx("curl_easy_perform: %s", curl_easy_strerror(code)); - return false; - } - if (!html) return false; + code = curl_easy_perform(curl); + if (code) return code; + if (!html) return CURLE_OK; body.len = 0; curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); - if ((code = curl_easy_perform(curl))) { - warnx("curl_easy_perform: %s", curl_easy_strerror(code)); - return false; - } - body.buf[body.len - 1] = '\0'; - - regmatch_t match[2]; - int error = regexec(&TitleRegex, body.buf, 2, match, 0); - if (error == REG_NOMATCH) return false; - if (error) errx(EX_SOFTWARE, "regexec: %d", error); - - body.buf[match[1].rm_eo] = '\0'; - char *title = &body.buf[match[1].rm_so]; - - printf("%s\n", title); - return true; + code = curl_easy_perform(curl); + if (code == CURLE_WRITE_ERROR) return CURLE_OK; + return code; } int main(int argc, char *argv[]) { TitleRegex = regex(TitlePattern); + setlocale(LC_CTYPE, ""); + setlinebuf(stdout); + CURLcode code = curl_global_init(CURL_GLOBAL_ALL); if (code) errx(EX_OSERR, "curl_global_init: %s", curl_easy_strerror(code)); @@ -108,31 +111,30 @@ int main(int argc, char *argv[]) { curl_easy_setopt(curl, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L); + curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 3L); curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, handleHeader); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, handleBody); - setlinebuf(stdout); - if (argc > 1) { - return (getTitle(argv[1]) ? EX_OK : EX_DATAERR); + code = fetchTitle(argv[1]); + if (!code) return EX_OK; + errx(EX_DATAERR, "curl_easy_perform: %s", curl_easy_strerror(code)); } - regex_t urlRegex = regex("https?://[^[:space:]>\"]+"); - char *buf = NULL; size_t cap = 0; + + regex_t urlRegex = regex("https?://[^[:space:]>\"]+"); while (0 < getline(&buf, &cap, stdin)) { regmatch_t match = {0}; for (char *url = buf; *url; url += match.rm_eo) { - int error = regexec(&urlRegex, url, 1, &match, 0); - if (error == REG_NOMATCH) break; - if (error) errx(EX_SOFTWARE, "regexec: %d", error); - + if (regexec(&urlRegex, url, 1, &match, 0)) break; url[match.rm_eo] = '\0'; - getTitle(&url[match.rm_so]); + code = fetchTitle(&url[match.rm_so]); + if (code) warnx("curl_easy_perform: %s", curl_easy_strerror(code)); url[match.rm_eo] = ' '; } } + if (ferror(stdin)) err(EX_IOERR, "getline"); } -- cgit 1.4.1