From 1ba049268ba27c22e168cd033b2e9c9f66c04eb8 Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Wed, 7 Dec 2011 00:23:04 +0000 Subject: Add cat2html functionality. This keeps track of italic/bold mode per line and properly handles some funny troff-isms we've exposed. I originally wanted to use man2html.c (found on W3's website with no known author) but the code is dodgy. This will need some more work (links, etc.) but does a decent job thusfar. Note: I think it's better style NOT to use
, and instead have each
line employ 
afterward. This allows browsers to break the lines if necessary. This can be changed trivially (replacing the newline and pre tags with the
and new tag). --- cgi.c | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 184 insertions(+), 57 deletions(-) diff --git a/cgi.c b/cgi.c index 1d88c434..17bc6ad7 100644 --- a/cgi.c +++ b/cgi.c @@ -1,4 +1,4 @@ -/* $Id: cgi.c,v 1.9 2011/12/04 22:52:50 kristaps Exp $ */ +/* $Id: cgi.c,v 1.10 2011/12/07 00:23:04 kristaps Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons * @@ -69,6 +69,7 @@ static int atou(const char *, unsigned *); static void catman(const char *); static void format(const char *); static void html_print(const char *); +static void html_putchar(char); static int kval_decode(char *); static void kval_parse(struct kval **, size_t *, char *); static void kval_free(struct kval *, size_t); @@ -80,9 +81,8 @@ static void pg_show(const struct manpaths *, const struct req *, char *); static void resp_bad(void); static void resp_baddb(void); -static void resp_badexpr(const struct req *); -static void resp_badmanual(void); -static void resp_badpage(void); +static void resp_error400(void); +static void resp_error404(const char *); static void resp_begin_html(int, const char *); static void resp_begin_http(int, const char *); static void resp_end_html(void); @@ -123,6 +123,29 @@ atou(const char *buf, unsigned *v) return(1); } +static void +html_putchar(char c) +{ + + switch (c) { + case ('"'): + printf(""e;"); + break; + case ('&'): + printf("&"); + break; + case ('>'): + printf(">"); + break; + case ('<'): + printf("<"); + break; + default: + putchar((unsigned char)c); + break; + } +} + /* * Print a word, escaping HTML along the way. * This will pass non-ASCII straight to output: be warned! @@ -130,29 +153,11 @@ atou(const char *buf, unsigned *v) static void html_print(const char *p) { - char c; if (NULL == p) return; - while ('\0' != *p) - switch ((c = *p++)) { - case ('"'): - printf(""e;"); - break; - case ('&'): - printf("&"); - break; - case ('>'): - printf(">"); - break; - case ('<'): - printf("<"); - break; - default: - putchar((unsigned char)c); - break; - } + html_putchar(*p++); } static void @@ -290,6 +295,8 @@ resp_begin_html(int code, const char *msg) " \"http://www.w3.org/TR/html4/strict.dtd\">" "\n" "" "\n" " " "\n" + " " "\n" " System Manpage Reference" "\n" " " "\n" " " "\n" @@ -323,7 +330,7 @@ resp_searchform(const struct req *req) printf("
\n"); - puts("
" "\n" + puts("
\n" " "); printf(" Terms: Page not found.

"); + resp_begin_html(400, "Query Malformed"); + puts("

Malformed Query

\n" + "

\n" + " The query your entered was malformed.\n" + " Try again from the\n" + " main page\n" + "

"); resp_end_html(); } static void -resp_badmanual(void) +resp_error404(const char *page) { resp_begin_html(404, "Not Found"); - puts("

Requested manual not found.

"); - resp_end_html(); -} - -static void -resp_badexpr(const struct req *req) -{ - - resp_begin_html(200, NULL); - resp_searchform(req); - puts("

Your search didn't work.

"); + puts("

Page Not Found

\n" + "

\n" + " The page you're looking for, "); + printf(" "); + html_print(page); + puts(",\n" + " could not be found.\n" + " Try searching from the\n" + " main page\n" + "

"); resp_end_html(); } @@ -448,24 +459,134 @@ pg_index(const struct manpaths *ps, const struct req *req, char *path) static void catman(const char *file) { - int fd; - char buf[BUFSIZ]; - ssize_t ssz; + FILE *f; + size_t len; + int i; + char *p; + int italic, bold; - if (-1 == (fd = open(file, O_RDONLY, 0))) { + if (NULL == (f = fopen(file, "r"))) { resp_baddb(); return; } - resp_begin_http(200, NULL); + resp_begin_html(200, NULL); - while ((ssz = read(fd, buf, BUFSIZ)) > 0) - write(STDOUT_FILENO, buf, (size_t)ssz); + puts("
");
+	while (NULL != (p = fgetln(f, &len))) {
+		bold = italic = 0;
+		for (i = 0; i < (int)len - 1; i++) {
+			/* 
+			 * This means that the catpage is out of state.
+			 * Ignore it and keep going (although the
+			 * catpage is bogus).
+			 */
+
+			if ('\b' == p[i] || '\n' == p[i])
+				continue;
+
+			/*
+			 * Print a regular character.
+			 * Close out any bold/italic scopes.
+			 * If we're in back-space mode, make sure we'll
+			 * have something to enter when we backspace.
+			 */
+
+			if ('\b' != p[i + 1]) {
+				if (italic)
+					printf("");
+				if (bold)
+					printf("");
+				italic = bold = 0;
+				html_putchar(p[i]);
+				continue;
+			} else if (i + 2 >= (int)len)
+				continue;
+
+			/* Italic mode. */
+
+			if ('_' == p[i]) {
+				if (bold)
+					printf("");
+				if ( ! italic)
+					printf("");
+				bold = 0;
+				italic = 1;
+				i += 2;
+				html_putchar(p[i]);
+				continue;
+			}
 
-	if (ssz < 0)
-		perror(file);
+			/* 
+			 * Handle funny behaviour troff-isms.
+			 * These grok'd from the original man2html.c.
+			 */
+
+			if (('+' == p[i] && 'o' == p[i + 2]) ||
+					('o' == p[i] && '+' == p[i + 2]) ||
+					('|' == p[i] && '=' == p[i + 2]) ||
+					('=' == p[i] && '|' == p[i + 2]) ||
+					('*' == p[i] && '=' == p[i + 2]) ||
+					('=' == p[i] && '*' == p[i + 2]) ||
+					('*' == p[i] && '|' == p[i + 2]) ||
+					('|' == p[i] && '*' == p[i + 2]))  {
+				if (italic)
+					printf("");
+				if (bold)
+					printf("");
+				italic = bold = 0;
+				putchar('*');
+				i += 2;
+				continue;
+			} else if (('|' == p[i] && '-' == p[i + 2]) ||
+					('-' == p[i] && '|' == p[i + 1]) ||
+					('+' == p[i] && '-' == p[i + 1]) ||
+					('-' == p[i] && '+' == p[i + 1]) ||
+					('+' == p[i] && '|' == p[i + 1]) ||
+					('|' == p[i] && '+' == p[i + 1]))  {
+				if (italic)
+					printf("");
+				if (bold)
+					printf("");
+				italic = bold = 0;
+				putchar('+');
+				i += 2;
+				continue;
+			}
 
-	close(fd);
+			/* Bold mode. */
+			
+			if (italic)
+				printf("");
+			if ( ! bold)
+				printf("");
+			bold = 1;
+			italic = 0;
+			i += 2;
+			html_putchar(p[i]);
+		}
+
+		/* 
+		 * Clean up the last character.
+		 * We can get to a newline; don't print that. 
+		 */
+
+		if (italic)
+			printf("");
+		if (bold)
+			printf("");
+
+		if (i == (int)len - 1 && '\n' != p[i])
+			html_putchar(p[i]);
+
+		putchar('\n');
+	}
+
+	puts("
\n" + "\n" + ""); + + fclose(f); } static void @@ -477,6 +598,7 @@ format(const char *file) struct man *man; void *vp; enum mandoclevel rc; + char opts[MAXPATHLEN + 128]; if (-1 == (fd = open(file, O_RDONLY, 0))) { resp_baddb(); @@ -492,8 +614,13 @@ format(const char *file) return; } + snprintf(opts, sizeof(opts), "style=/style.css," + "man=%s/search.html?sec=%%S&expr=%%N," + "includes=/cgi-bin/man.cgi/usr/include/%%I", + progname); + mparse_result(mp, &mdoc, &man); - vp = html_alloc(NULL); + vp = html_alloc(opts); if (NULL != mdoc) { resp_begin_http(200, NULL); @@ -520,19 +647,19 @@ pg_show(const struct manpaths *ps, const struct req *req, char *path) DBT key, val; if (NULL == path) { - resp_badmanual(); + resp_error400(); return; } else if (NULL == (sub = strrchr(path, '/'))) { - resp_badmanual(); + resp_error400(); return; } else *sub++ = '\0'; if ( ! (atou(path, &vol) && atou(sub, &rec))) { - resp_badmanual(); + resp_error400(); return; } else if (vol >= (unsigned int)ps->sz) { - resp_badmanual(); + resp_error400(); return; } @@ -551,7 +678,7 @@ pg_show(const struct manpaths *ps, const struct req *req, char *path) key.size = 4; if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) { - rc < 0 ? resp_baddb() : resp_badmanual(); + rc < 0 ? resp_baddb() : resp_error400(); goto out; } @@ -639,7 +766,7 @@ pg_search(const struct manpaths *ps, const struct req *req, char *path) if (0 == rc) resp_baddb(); else if (-1 == rc) - resp_badexpr(req); + resp_search(NULL, 0, (void *)req); for (i = 0; i < sz; i++) free(cp[i]); @@ -729,7 +856,7 @@ main(void) pg_show(&paths, &req, subpath); break; default: - resp_badpage(); + resp_error404(path); break; } -- cgit v1.2.3