summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2019-09-05 01:49:01 -0400
committerJune McEnroe <june@causal.agency>2019-09-05 01:49:01 -0400
commit8dddcdeb41e01736b14f415a0adf60819ed8594f (patch)
treeb3a3abff9fd6b12cb8bbac12421035673532f103
parentAdd Avorter n'est pas tuer (diff)
downloadsrc-8dddcdeb41e01736b14f415a0adf60819ed8594f.tar.gz
src-8dddcdeb41e01736b14f415a0adf60819ed8594f.zip
Add title
Diffstat (limited to '')
-rw-r--r--bin/.gitignore1
-rw-r--r--bin/Makefile9
-rw-r--r--bin/README3
-rw-r--r--bin/bin.74
-rw-r--r--bin/man1/title.134
-rw-r--r--bin/title.c138
6 files changed, 186 insertions, 3 deletions
diff --git a/bin/.gitignore b/bin/.gitignore
index b31a9d08..78e94de3 100644
--- a/bin/.gitignore
+++ b/bin/.gitignore
@@ -30,6 +30,7 @@ scheme.png
 setopt
 shotty
 tags
+title
 ttpre
 up
 when
diff --git a/bin/Makefile b/bin/Makefile
index 3eb71c62..1b038ea6 100644
--- a/bin/Makefile
+++ b/bin/Makefile
@@ -44,9 +44,11 @@ BINS_LINUX += fbatt
 BINS_LINUX += fbclock
 BINS_LINUX += psfed
 
+BINS_CURL += title
+
 BINS_TLS += relay
 
-BINS = ${BINS_ANY} ${BINS_BSD} ${BINS_LINUX} ${BINS_TLS}
+BINS = ${BINS_ANY} ${BINS_BSD} ${BINS_LINUX} ${BINS_CURL} ${BINS_TLS}
 MANS = ${BINS:%=man1/%.1}
 LINKS = ${LINKS_ANY}
 
@@ -58,6 +60,8 @@ bsd: meta ${BINS_BSD}
 
 linux: meta ${BINS_LINUX}
 
+curl: meta ${BINS_CURL}
+
 tls: meta ${BINS_TLS}
 
 meta: tags .gitignore README
@@ -74,6 +78,9 @@ open pbcopy pbpaste: pbd
 relay: relay.c
 	${CC} ${CFLAGS_tls} ${LDFLAGS_tls} $@.c ${LDLIBS_tls} -o $@
 
+title: title.c
+	${CC} ${CFLAGS} ${LDFLAGS} $@.c ${LDLIBS} -lcurl -o $@
+
 # Headers
 
 fbatt.o fbclock.o: scheme.h
diff --git a/bin/README b/bin/README
index c534e7dc..bfbfa203 100644
--- a/bin/README
+++ b/bin/README
@@ -30,6 +30,7 @@ DESCRIPTION
      scheme(1)   color scheme
      setopt(1)   quoting getopt
      shotty(1)   terminal capture
+     title(1)    page titles
      ttpre(1)    man output to HTML
      up(1)       upload file
      when(1)     date calculator
@@ -39,4 +40,4 @@ DESCRIPTION
 
      png(3)  basic PNG output
 
-Causal Agency                   August 29, 2019                  Causal Agency
+Causal Agency                  September 3, 2019                 Causal Agency
diff --git a/bin/bin.7 b/bin/bin.7
index 4d2d5ac7..98717614 100644
--- a/bin/bin.7
+++ b/bin/bin.7
@@ -1,4 +1,4 @@
-.Dd August 29, 2019
+.Dd September 3, 2019
 .Dt BIN 7
 .Os "Causal Agency"
 .
@@ -62,6 +62,8 @@ color scheme
 quoting getopt
 .It Xr shotty 1
 terminal capture
+.It Xr title 1
+page titles
 .It Xr ttpre 1
 man output to HTML
 .It Xr up 1
diff --git a/bin/man1/title.1 b/bin/man1/title.1
new file mode 100644
index 00000000..927163fa
--- /dev/null
+++ b/bin/man1/title.1
@@ -0,0 +1,34 @@
+.Dd September 5, 2019
+.Dt TITLE 1
+.Os
+.
+.Sh NAME
+.Nm title
+.Nd page titles
+.
+.Sh SYNOPSIS
+.Nm
+.Op Ar url
+.
+.Sh DESCRIPTION
+.Nm
+fetches HTML page titles
+over HTTP and HTTPS.
+.Nm
+scans standard input for URLs
+and writes their titles to standard output.
+If a
+.Ar url
+argument is given,
+.Nm
+exits after fetching its title.
+.
+.Sh EXAMPLES
+.Bd -literal -offset indent
+mkfifo snarf titles
+relay irc.example.org 6697 snarf '#example' <>titles >snarf
+title <snarf >titles
+.Ed
+.
+.Sh SEE ALSO
+.Xr relay 1
diff --git a/bin/title.c b/bin/title.c
new file mode 100644
index 00000000..ed9eeccc
--- /dev/null
+++ b/bin/title.c
@@ -0,0 +1,138 @@
+/* Copyright (C) 2019  June McEnroe <june@causal.agency>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <curl/curl.h>
+#include <err.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sysexits.h>
+
+static regex_t regex(const char *pattern) {
+	regex_t regex;
+	int error = regcomp(&regex, pattern, REG_EXTENDED);
+	if (!error) return regex;
+	char buf[256];
+	regerror(error, &regex, buf, sizeof(buf));
+	errx(EX_SOFTWARE, "regcomp: %s: %s", buf, pattern);
+}
+
+static CURL *curl;
+static bool html;
+static struct {
+	char buf[8192];
+	size_t len;
+} body;
+
+static size_t handleHeader(char *buf, size_t size, size_t nitems, void *user) {
+	(void)user;
+	size_t len = size * nitems;
+	const char ContentType[] = "Content-Type: text/html";
+	if (sizeof(ContentType) - 1 < len) len = sizeof(ContentType) - 1;
+	if (!strncasecmp(buf, ContentType, len)) html = true;
+	return size * nitems;
+}
+
+static size_t handleBody(char *buf, size_t size, size_t nitems, void *user) {
+	(void)user;
+	size_t len = size * nitems;
+	size_t cap = sizeof(body.buf) - body.len;
+	size_t cpy = (len < cap ? len : cap);
+	memcpy(&body.buf[body.len], buf, cpy);
+	body.len += cpy;
+	return len;
+}
+
+static const char *TitlePattern = "<title>([^<]*)</title>";
+static regex_t TitleRegex;
+
+static bool getTitle(const char *url) {
+	CURLcode code = curl_easy_setopt(curl, CURLOPT_URL, url);
+	if (code) {
+		warnx("CURLOPT_URL: %s", curl_easy_strerror(code));
+		return false;
+	}
+
+	html = false;
+	curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
+	if ((code = curl_easy_perform(curl))) {
+		warnx("curl_easy_perform: %s", curl_easy_strerror(code));
+		return false;
+	}
+	if (!html) return false;
+
+	body.len = 0;
+	curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
+	if ((code = curl_easy_perform(curl))) {
+		warnx("curl_easy_perform: %s", curl_easy_strerror(code));
+		return false;
+	}
+	body.buf[body.len - 1] = '\0';
+
+	regmatch_t match[2];
+	int error = regexec(&TitleRegex, body.buf, 2, match, 0);
+	if (error == REG_NOMATCH) return false;
+	if (error) errx(EX_SOFTWARE, "regexec: %d", error);
+
+	body.buf[match[1].rm_eo] = '\0';
+	char *title = &body.buf[match[1].rm_so];
+
+	printf("%s\n", title);
+	return true;
+}
+
+int main(int argc, char *argv[]) {
+	TitleRegex = regex(TitlePattern);
+
+	CURLcode code = curl_global_init(CURL_GLOBAL_ALL);
+	if (code) errx(EX_OSERR, "curl_global_init: %s", curl_easy_strerror(code));
+
+	curl = curl_easy_init();
+	if (!curl) errx(EX_SOFTWARE, "curl_easy_init");
+
+	curl_easy_setopt(curl, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
+	curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+	curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
+
+	curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, handleHeader);
+	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, handleBody);
+
+	setlinebuf(stdout);
+
+	if (argc > 1) {
+		return (getTitle(argv[1]) ? EX_OK : EX_DATAERR);
+	}
+
+	regex_t urlRegex = regex("https?://[^[:space:]>\"]+");
+
+	char *buf = NULL;
+	size_t cap = 0;
+	while (0 < getline(&buf, &cap, stdin)) {
+		regmatch_t match = {0};
+		for (char *url = buf; *url; url += match.rm_eo) {
+			int error = regexec(&urlRegex, url, 1, &match, 0);
+			if (error == REG_NOMATCH) break;
+			if (error) errx(EX_SOFTWARE, "regexec: %d", error);
+
+			url[match.rm_eo] = '\0';
+			getTitle(&url[match.rm_so]);
+			url[match.rm_eo] = ' ';
+		}
+	}
+}