From f5235d92eeb7477fa459b7f5665873c0c23452ff Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Tue, 7 Sep 2021 16:53:43 -0400 Subject: Add dehtml --- bin/man1/dehtml.1 | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 bin/man1/dehtml.1 (limited to 'bin/man1') diff --git a/bin/man1/dehtml.1 b/bin/man1/dehtml.1 new file mode 100644 index 00000000..a0c5a8c4 --- /dev/null +++ b/bin/man1/dehtml.1 @@ -0,0 +1,35 @@ +.Dd September 7, 2021 +.Dt DEHTML 1 +.Os +. +.Sh NAME +.Nm dehtml +.Nd extract text from HTML +. +.Sh SYNOPSIS +.Nm +.Op Fl s +.Op Ar +. +.Sh DESCRIPTION +The +.Nm +utility extracts text +from HTML documents. +Text inside +.Sy , +.Sy <style> +and +.Sy <script> +tags is discarded. +Numeric and common named HTML entities +are converted. +. +.Pp +The arguments are as follows: +.Bl -tag -width Ds +.It Fl s +Collapse whitespace outside of +.Sy <pre> +tags. +.El -- cgit 1.4.1