From 202b7e8f93f2d7b52432ac7b359f9df32d46f683 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Thu, 31 May 2012 22:29:13 +0000 Subject: Implement the roff \z escape sequence, intended to output the next character without advancing the cursor position; implement it to simply skip the next character, as it will usually be overwritten. With this change, the pod2man(1) preamble user-defined string \*:, intended to render as a diaeresis or umlaut diacritic above the preceding character, is rendered in a slightly less ugly way, though still not correctly. It was rendered as "z.." and is now rendered as ".". Given that the definition of \*: uses elaborate manual \h positioning, there is little chance for mandoc(1) to ever render it correctly, but at least we can refrain from printing out a spurious "z", and we can make the \z do something semi-reasonable for easier cases. "just commit" kristaps@ --- html.c | 83 +++++++++++++++++++++++++++++++++++++++++++++------------------- html.h | 3 ++- mandoc.c | 14 +++++++++-- mandoc.h | 5 ++-- term.c | 68 +++++++++++++++++++++++++++++++++++++++++----------- term.h | 3 ++- 6 files changed, 131 insertions(+), 45 deletions(-) diff --git a/html.c b/html.c index 326df035..115b79a0 100644 --- a/html.c +++ b/html.c @@ -1,7 +1,7 @@ -/* $Id: html.c,v 1.150 2011/10/05 21:35:17 kristaps Exp $ */ +/* $Id: html.c,v 1.151 2012/05/31 22:29:13 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2011 Ingo Schwarze + * Copyright (c) 2011, 2012 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -262,8 +262,8 @@ print_metaf(struct html *h, enum mandoc_esc deco) int html_strlen(const char *cp) { - int ssz, sz; - const char *seq, *p; + size_t rsz; + int skip, sz; /* * Account for escaped sequences within string length @@ -274,10 +274,21 @@ html_strlen(const char *cp) */ sz = 0; - while (NULL != (p = strchr(cp, '\\'))) { - sz += (int)(p - cp); - ++cp; - switch (mandoc_escape(&cp, &seq, &ssz)) { + skip = 0; + while (1) { + rsz = strcspn(cp, "\\"); + if (rsz) { + cp += rsz; + if (skip) { + skip = 0; + rsz--; + } + sz += rsz; + } + if ('\0' == *cp) + break; + cp++; + switch (mandoc_escape(&cp, NULL, NULL)) { case (ESCAPE_ERROR): return(sz); case (ESCAPE_UNICODE): @@ -285,15 +296,19 @@ html_strlen(const char *cp) case (ESCAPE_NUMBERED): /* FALLTHROUGH */ case (ESCAPE_SPECIAL): - sz++; + if (skip) + skip = 0; + else + sz++; + break; + case (ESCAPE_SKIPCHAR): + skip = 1; break; default: break; } } - - assert(sz >= 0); - return(sz + strlen(cp)); + return(sz); } static int @@ -308,6 +323,12 @@ print_encode(struct html *h, const char *p, int norecurse) nospace = 0; while ('\0' != *p) { + if (HTML_SKIPCHAR & h->flags && '\\' != *p) { + h->flags &= ~HTML_SKIPCHAR; + p++; + continue; + } + sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); @@ -337,6 +358,31 @@ print_encode(struct html *h, const char *p, int norecurse) if (ESCAPE_ERROR == esc) break; + switch (esc) { + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTPREV): + /* FALLTHROUGH */ + case (ESCAPE_FONTBOLD): + /* FALLTHROUGH */ + case (ESCAPE_FONTITALIC): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): + if (0 == norecurse) + print_metaf(h, esc); + continue; + case (ESCAPE_SKIPCHAR): + h->flags |= HTML_SKIPCHAR; + continue; + default: + break; + } + + if (h->flags & HTML_SKIPCHAR) { + h->flags &= ~HTML_SKIPCHAR; + continue; + } + switch (esc) { case (ESCAPE_UNICODE): /* Skip passed "u" header. */ @@ -356,19 +402,6 @@ print_encode(struct html *h, const char *p, int norecurse) else if (-1 == c && 1 == len) putchar((int)*seq); break; - case (ESCAPE_FONT): - /* FALLTHROUGH */ - case (ESCAPE_FONTPREV): - /* FALLTHROUGH */ - case (ESCAPE_FONTBOLD): - /* FALLTHROUGH */ - case (ESCAPE_FONTITALIC): - /* FALLTHROUGH */ - case (ESCAPE_FONTROMAN): - if (norecurse) - break; - print_metaf(h, esc); - break; case (ESCAPE_NOSPACE): if ('\0' == *p) nospace = 1; diff --git a/html.h b/html.h index 60960702..9bb5cb42 100644 --- a/html.h +++ b/html.h @@ -1,4 +1,4 @@ -/* $Id: html.h,v 1.47 2011/10/05 21:35:17 kristaps Exp $ */ +/* $Id: html.h,v 1.48 2012/05/31 22:29:13 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * @@ -117,6 +117,7 @@ struct html { #define HTML_PREKEEP (1 << 3) #define HTML_NONOSPACE (1 << 4) /* never add spaces */ #define HTML_LITERAL (1 << 5) /* literal (e.g.,
) context */
+#define	HTML_SKIPCHAR	 (1 << 6) /* skip the next character */
 	struct tagq	  tags; /* stack of open tags */
 	struct rofftbl	  tbl; /* current table */
 	struct tag	 *tblt; /* current open table scope */
diff --git a/mandoc.c b/mandoc.c
index 604bb67e..bde6e559 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,7 +1,7 @@
-/*	$Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */
+/*	$Id: mandoc.c,v 1.63 2012/05/31 22:29:13 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons 
- * Copyright (c) 2011 Ingo Schwarze 
+ * Copyright (c) 2011, 2012 Ingo Schwarze 
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -142,6 +142,16 @@ mandoc_escape(const char **end, const char **start, int *sz)
 		term = '\'';
 		break;
 
+	/*
+	 * The \z escape is supposed to output the following
+	 * character without advancing the cursor position.  
+	 * Since we are mostly dealing with terminal mode,
+	 * let us just skip the next character.
+	 */
+	case ('z'):
+		(*end)++;
+		return(ESCAPE_SKIPCHAR);
+
 	/*
 	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
 	 * 'X' is the trigger.  These have opaque sub-strings.
diff --git a/mandoc.h b/mandoc.h
index 2665371f..d0560cfc 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/*	$Id: mandoc.h,v 1.101 2012/05/27 17:54:54 schwarze Exp $ */
+/*	$Id: mandoc.h,v 1.102 2012/05/31 22:29:13 schwarze Exp $ */
 /*
  * Copyright (c) 2010, 2011 Kristaps Dzonsons 
  *
@@ -379,7 +379,8 @@ enum	mandoc_esc {
 	ESCAPE_FONTPREV, /* previous font mode */
 	ESCAPE_NUMBERED, /* a numbered glyph */
 	ESCAPE_UNICODE, /* a unicode codepoint */
-	ESCAPE_NOSPACE /* suppress space if the last on a line */
+	ESCAPE_NOSPACE, /* suppress space if the last on a line */
+	ESCAPE_SKIPCHAR /* skip the next character */
 };
 
 typedef	void	(*mandocmsg)(enum mandocerr, enum mandoclevel,
diff --git a/term.c b/term.c
index 3dadb7e4..a10fce7a 100644
--- a/term.c
+++ b/term.c
@@ -1,4 +1,4 @@
-/*	$Id: term.c,v 1.202 2012/05/27 18:02:49 schwarze Exp $ */
+/*	$Id: term.c,v 1.203 2012/05/31 22:29:13 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons 
  * Copyright (c) 2010, 2011, 2012 Ingo Schwarze 
@@ -33,6 +33,7 @@
 #include "term.h"
 #include "main.h"
 
+static	size_t		 cond_width(const struct termp *, int, int *);
 static	void		 adjbuf(struct termp *p, int);
 static	void		 bufferc(struct termp *, char);
 static	void		 encode(struct termp *, const char *, size_t);
@@ -419,12 +420,17 @@ term_word(struct termp *p, const char *word)
 	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
 
 	while ('\0' != *word) {
-		if ((ssz = strcspn(word, "\\")) > 0)
+		if ('\\' != *word) {
+			if (TERMP_SKIPCHAR & p->flags) {
+				p->flags &= ~TERMP_SKIPCHAR;
+				word++;
+				continue;
+			}
+			ssz = strcspn(word, "\\");
 			encode(p, word, ssz);
-
-		word += (int)ssz;
-		if ('\\' != *word)
+			word += (int)ssz;
 			continue;
+		}
 
 		word++;
 		esc = mandoc_escape(&word, &seq, &sz);
@@ -480,9 +486,14 @@ term_word(struct termp *p, const char *word)
 			term_fontlast(p);
 			break;
 		case (ESCAPE_NOSPACE):
-			if ('\0' == *word)
+			if (TERMP_SKIPCHAR & p->flags)
+				p->flags &= ~TERMP_SKIPCHAR;
+			else if ('\0' == *word)
 				p->flags |= TERMP_NOSPACE;
 			break;
+		case (ESCAPE_SKIPCHAR):
+			p->flags |= TERMP_SKIPCHAR;
+			break;
 		default:
 			break;
 		}
@@ -522,6 +533,11 @@ encode1(struct termp *p, int c)
 {
 	enum termfont	  f;
 
+	if (TERMP_SKIPCHAR & p->flags) {
+		p->flags &= ~TERMP_SKIPCHAR;
+		return;
+	}
+
 	if (p->col + 4 >= p->maxcols)
 		adjbuf(p, p->col + 4);
 
@@ -545,6 +561,11 @@ encode(struct termp *p, const char *word, size_t sz)
 	enum termfont	  f;
 	int		  i, len;
 
+	if (TERMP_SKIPCHAR & p->flags) {
+		p->flags &= ~TERMP_SKIPCHAR;
+		return;
+	}
+
 	/* LINTED */
 	len = sz;
 
@@ -593,12 +614,22 @@ term_len(const struct termp *p, size_t sz)
 	return((*p->width)(p, ' ') * sz);
 }
 
+static size_t
+cond_width(const struct termp *p, int c, int *skip)
+{
+
+	if (*skip) {
+		(*skip) = 0;
+		return(0);
+	} else
+		return((*p->width)(p, c));
+}
 
 size_t
 term_strlen(const struct termp *p, const char *cp)
 {
 	size_t		 sz, rsz, i;
-	int		 ssz, c;
+	int		 ssz, skip, c;
 	const char	*seq, *rhs;
 	enum mandoc_esc	 esc;
 	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
@@ -610,10 +641,11 @@ term_strlen(const struct termp *p, const char *cp)
 	 */
 
 	sz = 0;
+	skip = 0;
 	while ('\0' != *cp) {
 		rsz = strcspn(cp, rej);
 		for (i = 0; i < rsz; i++)
-			sz += (*p->width)(p, *cp++);
+			sz += cond_width(p, *cp++, &skip);
 
 		c = 0;
 		switch (*cp) {
@@ -630,14 +662,14 @@ term_strlen(const struct termp *p, const char *cp)
 						(seq + 1, ssz - 1);
 					if ('\0' == c)
 						break;
-					sz += (*p->width)(p, c);
+					sz += cond_width(p, c, &skip);
 					continue;
 				case (ESCAPE_SPECIAL):
 					c = mchars_spec2cp
 						(p->symtab, seq, ssz);
 					if (c <= 0)
 						break;
-					sz += (*p->width)(p, c);
+					sz += cond_width(p, c, &skip);
 					continue;
 				default:
 					break;
@@ -647,12 +679,12 @@ term_strlen(const struct termp *p, const char *cp)
 
 			switch (esc) {
 			case (ESCAPE_UNICODE):
-				sz += (*p->width)(p, '?');
+				sz += cond_width(p, '?', &skip);
 				break;
 			case (ESCAPE_NUMBERED):
 				c = mchars_num2char(seq, ssz);
 				if ('\0' != c)
-					sz += (*p->width)(p, c);
+					sz += cond_width(p, c, &skip);
 				break;
 			case (ESCAPE_SPECIAL):
 				rhs = mchars_spec2str
@@ -664,6 +696,9 @@ term_strlen(const struct termp *p, const char *cp)
 				rhs = seq;
 				rsz = ssz;
 				break;
+			case (ESCAPE_SKIPCHAR):
+				skip = 1;
+				break;
 			default:
 				break;
 			}
@@ -671,15 +706,20 @@ term_strlen(const struct termp *p, const char *cp)
 			if (NULL == rhs)
 				break;
 
+			if (skip) {
+				skip = 0;
+				break;
+			}
+
 			for (i = 0; i < rsz; i++)
 				sz += (*p->width)(p, *rhs++);
 			break;
 		case (ASCII_NBRSP):
-			sz += (*p->width)(p, ' ');
+			sz += cond_width(p, ' ', &skip);
 			cp++;
 			break;
 		case (ASCII_HYPH):
-			sz += (*p->width)(p, '-');
+			sz += cond_width(p, '-', &skip);
 			cp++;
 			break;
 		default:
diff --git a/term.h b/term.h
index 1745a341..3da11394 100644
--- a/term.h
+++ b/term.h
@@ -1,4 +1,4 @@
-/*	$Id: term.h,v 1.91 2012/05/27 18:02:49 schwarze Exp $ */
+/*	$Id: term.h,v 1.92 2012/05/31 22:29:13 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons 
  *
@@ -78,6 +78,7 @@ struct	termp {
 #define	TERMP_ANPREC	 (1 << 13)	/* See termp_an_pre(). */
 #define	TERMP_KEEP	 (1 << 14)	/* Keep words together. */
 #define	TERMP_PREKEEP	 (1 << 15)	/* ...starting with the next one. */
+#define	TERMP_SKIPCHAR	 (1 << 16)	/* Skip the next character. */
 	int		 *buf;		/* Output buffer. */
 	enum termenc	  enc;		/* Type of encoding. */
 	struct mchars	 *symtab;	/* Encoded-symbol table. */
-- 
cgit v1.2.3