From 44957b0c7006183c7c956cc5d3be824e386cd002 Mon Sep 17 00:00:00 2001 From: June McEnroe Date: Sat, 9 Feb 2019 02:48:42 -0500 Subject: Add output format options to hi The monospace option for IRC output doesn't really work, since IRCCloud puts a box around each span this way. --- bin/hi.c | 211 +++++++++++++++++++++++++++++++++++++--------------------- bin/man1/hi.1 | 47 +++++++++++-- 2 files changed, 174 insertions(+), 84 deletions(-) diff --git a/bin/hi.c b/bin/hi.c index 6ceb5330..3087fb92 100644 --- a/bin/hi.c +++ b/bin/hi.c @@ -306,8 +306,27 @@ static void check(void) { } } -typedef void HeaderFn(const char *name); -typedef void OutputFn(enum Class class, const char *str, size_t len); +#define ENUM_OPTION \ + X(Monospace, "monospace") \ + X(Document, "document") \ + X(Title, "title") + +enum Option { +#define X(option, _) option, + ENUM_OPTION +#undef X + OptionLen, +}; + +static const char *OptionKey[OptionLen] = { +#define X(option, key) [option] = key, + ENUM_OPTION +#undef X +}; + +typedef void HeaderFn(const char *opts[]); +typedef void +OutputFn(const char *opts[], enum Class class, const char *str, size_t len); // ANSI format {{{ @@ -337,7 +356,9 @@ static const enum SGR ANSIStyle[ClassLen][3] = { [Todo] = { SGRBlue, SGRBoldOn, SGRBoldOff }, }; -static void ansiOutput(enum Class class, const char *str, size_t len) { +static void +ansiOutput(const char *opts[], enum Class class, const char *str, size_t len) { + (void)opts; // Style each line separately, otherwise less -R won't look right. while (len) { size_t line = strcspn(str, "\n"); @@ -384,6 +405,7 @@ enum IRC { IRCLightGray, IRCBold = 0x02, IRCColor = 0x03, + IRCMonospace = 0x11, }; static const enum IRC SGRIRC[] = { @@ -400,27 +422,32 @@ static const enum IRC SGRIRC[] = { [SGRDefault] = 0, }; -static void ircOutput(enum Class class, const char *str, size_t len) { +static void +ircOutput(const char *opts[], enum Class class, const char *str, size_t len) { + char mono[2] = ""; + if (opts[Monospace]) mono[0] = IRCMonospace; + + char cc[3] = ""; + if (ANSIStyle[class][0] != SGRDefault) { + snprintf(cc, sizeof(cc), "%d", SGRIRC[ANSIStyle[class][0]]); + } + // Style each line separately, for multiple IRC messages. while (len) { size_t line = strcspn(str, "\n"); if (line > len) line = len; - char cc[3] = ""; - if (ANSIStyle[class][0] != SGRDefault) { - snprintf(cc, sizeof(cc), "%d", SGRIRC[ANSIStyle[class][0]]); - } if (ANSIStyle[class][1]) { printf( - "%c%s%c%.*s%c", + "%c%s%c%s%.*s%s%c", IRCColor, cc, SGRIRC[ANSIStyle[class][1]], - (int)line, str, + mono, (int)line, str, mono, SGRIRC[ANSIStyle[class][2]] ); } else { // Double-toggle bold to prevent str being interpreted as color. printf( - "%c%s%c%c%.*s", - IRCColor, cc, IRCBold, IRCBold, (int)line, str + "%c%s%c%c%s%.*s%s", + IRCColor, cc, IRCBold, IRCBold, mono, (int)line, str, mono ); } if (line < len) { @@ -436,15 +463,6 @@ static void ircOutput(enum Class class, const char *str, size_t len) { // HTML format {{{ -static void htmlHeader(const char *name) { - (void)name; - printf("
");
-}
-static void htmlFooter(const char *name) {
-	(void)name;
-	printf("
\n"); -} - static void htmlEscape(const char *str, size_t len) { while (len) { size_t run = strcspn(str, "&<>"); @@ -460,37 +478,46 @@ static void htmlEscape(const char *str, size_t len) { } } +static void htmlHeader(const char *opts[]) { + if (opts[Document]) { + printf("\n"); + if (opts[Title]) htmlEscape(opts[Title], strlen(opts[Title])); + printf("\n"); + printf( + "\n" + ); + } + printf("
");
+}
+
+static void htmlFooter(const char *opts[]) {
+	(void)opts;
+	printf("
\n"); +} + static const char *ClassName[ClassLen] = { #define X(class) [class] = #class, ENUM_CLASS #undef X }; -static void htmlOutput(enum Class class, const char *str, size_t len) { +static void +htmlOutput(const char *opts[], enum Class class, const char *str, size_t len) { + (void)opts; printf("", ClassName[class]); htmlEscape(str, len); printf(""); } -static void htmlDocumentHeader(const char *name) { - printf("\n"); - htmlEscape(name, strlen(name)); - printf( - "\n" - "\n" - ); - htmlHeader(name); -} - // }}} static const struct Format { @@ -500,40 +527,80 @@ static const struct Format { HeaderFn *footer; } Formats[] = { { "ansi", ansiOutput, NULL, NULL }, - { "irc", ircOutput, NULL, NULL }, + { "irc", ircOutput, NULL, NULL }, { "html", htmlOutput, htmlHeader, htmlFooter }, - { "html-document", htmlOutput, htmlDocumentHeader, htmlFooter }, }; +static bool findLanguage(struct Language *lang, const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { + if (strcmp(name, Languages[i].name)) continue; + *lang = Languages[i]; + return true; + } + return false; +} + +static bool matchLanguage(struct Language *lang, const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { + regex_t regex = compile(Languages[i].pattern, REG_NOSUB); + int error = regexec(®ex, name, 0, NULL, 0); + regfree(®ex); + if (error == REG_NOMATCH) continue; + if (error) errx(EX_SOFTWARE, "regexec: %d", error); + *lang = Languages[i]; + return true; + } + return false; +} + +static bool findFormat(struct Format *format, const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Formats); ++i) { + if (strcmp(name, Formats[i].name)) continue; + *format = Formats[i]; + return true; + } + return false; +} + +static bool findOption(enum Option *opt, const char *key) { + for (*opt = 0; *opt < OptionLen; ++*opt) { + if (!strcmp(key, OptionKey[*opt])) return true; + } + return false; +} + int main(int argc, char *argv[]) { const char *name = NULL; - const struct Language *lang = NULL; - const struct Format *format = NULL; + struct Language lang = {0}; + struct Format format = Formats[0]; + const char *opts[OptionLen] = {0}; int opt; - while (0 < (opt = getopt(argc, argv, "cf:l:n:"))) { + while (0 < (opt = getopt(argc, argv, "cf:l:n:o:"))) { switch (opt) { - break; case 'c': { - check(); - return EX_OK; - } + break; case 'c': check(); return EX_OK; break; case 'f': { - for (size_t i = 0; i < ARRAY_LEN(Formats); ++i) { - if (strcmp(optarg, Formats[i].name)) continue; - format = &Formats[i]; - break; + if (!findFormat(&format, optarg)) { + errx(EX_USAGE, "no such format %s", optarg); } - if (!format) errx(EX_USAGE, "no such format %s", optarg); } break; case 'l': { - for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { - if (strcmp(optarg, Languages[i].name)) continue; - lang = &Languages[i]; - break; + if (!findLanguage(&lang, optarg)) { + errx(EX_USAGE, "no such language %s", optarg); } - if (!lang) errx(EX_USAGE, "no such language %s", optarg); } break; case 'n': name = optarg; + break; case 'o': { + enum Option key; + char *keystr, *valstr; + while (NULL != (valstr = strsep(&optarg, ","))) { + keystr = strsep(&valstr, "="); + if (!findOption(&key, keystr)) { + errx(EX_USAGE, "no such option %s", keystr); + } + opts[key] = (valstr ? valstr : keystr); + } + } break; default: return EX_USAGE; } } @@ -550,20 +617,10 @@ int main(int argc, char *argv[]) { name = strrchr(path, '/'); name = (name ? &name[1] : path); } - - if (!lang) { - for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { - regex_t regex = compile(Languages[i].pattern, REG_NOSUB); - bool match = !regexec(®ex, name, 0, NULL, 0); - regfree(®ex); - if (match) { - lang = &Languages[i]; - break; - } - } - if (!lang) errx(EX_USAGE, "cannot infer language for %s", name); + if (!lang.syntax && !matchLanguage(&lang, name)) { + errx(EX_USAGE, "cannot infer language for %s", name); } - if (!format) format = &Formats[0]; + if (!opts[Title]) opts[Title] = name; size_t len = 32 * 1024; if (file != stdin) { @@ -583,15 +640,15 @@ int main(int argc, char *argv[]) { enum Class *hi = calloc(len, sizeof(*hi)); if (!hi) err(EX_OSERR, "calloc"); - highlight(*lang, hi, str); + highlight(lang, hi, str); - if (format->header) format->header(name); + if (format.header) format.header(opts); size_t run = 0; for (size_t i = 0; i < len; i += run) { for (run = 0; i + run < len; ++run) { if (hi[i + run] != hi[i]) break; } - format->output(hi[i], &str[i], run); + format.output(opts, hi[i], &str[i], run); } - if (format->footer) format->footer(name); + if (format.footer) format.footer(opts); } diff --git a/bin/man1/hi.1 b/bin/man1/hi.1 index 77bb1c46..e1ab5b5b 100644 --- a/bin/man1/hi.1 +++ b/bin/man1/hi.1 @@ -11,6 +11,7 @@ .Op Fl f Ar format .Op Fl l Ar lang .Op Fl n Ar name +.Op Fl o Ar opts .Op Ar file .Nm .Fl c @@ -37,20 +38,48 @@ Set the input language. .It Fl n Ar name Override the name used to infer the input language. +.It Fl o Ar opts +Set output format options. +.Ar opts +is a comma-separated list of options. .El . .Ss Output Formats The default output format is .Cm ansi . -.Bl -tag -width "html-document" +. +.Bl -tag -width Ds .It Cm ansi -ANSI terminal escape codes. +Output ANSI terminal escape codes. +. .It Cm irc -IRC formatting codes. +Output IRC formatting codes. +.Pp +The options are as follows: +.Bl -tag -width "monospace" +.It Cm monospace +Use the monospace formatting code +introduced by IRCCloud. +.El +. .It Cm html -HTML fragment. -.It Cm html-document -HTML document. +Output HTML +.Sy
+with
+.Sy 
+classes.
+.Pp
+The options are as follows:
+.Bl -tag -width "title=..."
+.It Cm document
+Output an HTML document
+with inline CSS.
+.It Cm title Ns = Ns Ar ...
+Set the title of the HTML document.
+The default title is the
+.Ar file
+name.
+.El
 .El
 .
 .Ss Input Languages
@@ -61,18 +90,22 @@ it may be inferred from the name set by
 or from the provided
 .Ar file
 name.
-.Bl -tag -width "make"
+.
+.Bl -tag -width Ds
 .It Cm c
 The C11 language.
+.
 .It Cm make
 The portable subset of
 .Xr make 1 .
 Variable substitution supports
 one level of nesting with the same delimiter.
+.
 .It Cm mdoc
 The
 .Xr mdoc 7
 language.
+.
 .It Cm sh
 The POSIX
 .Xr sh 1
-- 
cgit 1.4.1