fixed all the issues that started from the split refactor - txt2html

commit d689bdd2d88dc5372d89196240f8a4153a76e8cd
parent b76435c83bed126c9bab3767692545004659f469
Author: gearsix <gearsix@tuta.io>
Date:   Mon, 15 Nov 2021 15:25:03 +0000

fixed all the issues that started from the split refactor

Diffstat:
M node.c  | 53 +++++++++++++++++++++++------------------------------
M parse.c  | 73 +++++++++++++++++++++++++++++++++++++++++++------------------------------
M rule.c  | 6 +++---
M txt2html.c  | 65 +++++++++++++++++++++++++++++++++--------------------------------
M txt2html.h  | 14 +++++++-------

5 files changed, 109 insertions(+), 102 deletions(-)
diff --git a/node.c b/node.c
@@ -4,8 +4,6 @@ struct node *node_create(struct node *prev, NodeType t)
 {
 	struct node *n = calloc(1, sizeof(struct node));
 
-	n->next = 0;
-	
 	if (prev) {
 		n->prev = prev;
 		prev->next = n;
@@ -16,22 +14,26 @@ struct node *node_create(struct node *prev, NodeType t)
 	if (t == OPEN+BR+CLOSE) {
 		n->buf = "<br/>\n\0";
 	} else if (t & OPEN) {
-		if      (t & H1)  n->buf = "<h1>\0";
-		else if (t & H2)  n->buf = "<h2>\0";
-		else if (t & PRE) n->buf = "<pre>\0";
-		else if (t & P)   n->buf = "<p>\0";
-		else if (t & LI)  n->buf = "<li>\0"; // must come before OL/UL
-		else if (t & OL)  n->buf = "<ol>\0";
-		else if (t & UL)  n->buf = "<ul>\0";
+		t &= 0x0F;
+		if      (t == H1)  n->buf = "<h1>\0";
+		else if (t == H2)  n->buf = "<h2>\0";
+		else if (t == PRE) n->buf = "<pre>\0";
+		else if (t == P)   n->buf = "<p>\0";
+		else if (t == OL)  n->buf = "<ol>\n\0";
+		else if (t == UL)  n->buf = "<ul>\n\0";
+		else if (t == OL+LI || t == UL+LI)
+			n->buf = "<li>\0";
 	} else if (t & CLOSE) {
-		node_writec(prev, EOF);
-		if      (t & H1)  n->buf = "</h1>\n\0";
-		else if (t & H2)  n->buf = "</h2>\n\0";
-		else if (t & PRE) n->buf = "</pre>\n\0";
-		else if (t & P)   n->buf = "</p>\n\0";
-		else if (t & LI)  n->buf = "</li>\n\0"; // must come before OL/UL
-		else if (t & OL)  n->buf = "</ol>\n\0";
-		else if (t & UL)  n->buf = "</ul>\n\0";
+		t &= 0x0F;
+		node_writec(&prev, EOF);
+		if      (t == H1)  n->buf = "</h1>\n\0";
+		else if (t == H2)  n->buf = "</h2>\n\0";
+		else if (t == PRE) n->buf = "</pre>\n\0";
+		else if (t == P)   n->buf = "</p>\n\0";
+		else if (t == OL)  n->buf = "</ol>\n\0";
+		else if (t == UL)  n->buf = "</ul>\n\0";
+		else if (t == UL+LI || t == OL+LI)
+			n->buf = "</li>\n\0";
 	}
 
 	return n;
@@ -40,28 +42,21 @@ struct node *node_create(struct node *prev, NodeType t)
 // writebuf has an internal static buffer (`buf`) that it writes `c` to.
 // if `c == EOF` or `buf` reaches `BUFSIZ`, then `buf` it's written to n->buf.
 // `n->buf` will only be allocated required memory.
-void node_writec(struct node *n, int c)
+void node_writec(struct node **n, int c)
 {
 	assert(n);
 
-	static struct node *last_n;
-
 	static int pg = 0;
 	static int len = 0;
 	static char buf[BUFSIZ+1];
 
-	if ((last_n && last_n != n) || len+2 == BUFSIZ || (c == EOF && len > 0)) {
+	if (len+2 == BUFSIZ || (c == EOF && len > 0)) {
 		if (c == EOF) {
 			buf[len++] = '\0';
 			buf[len++] = '$'; // signal malloc not assigned const
 		}
-		if (last_n != n) {
-			struct node *tmp = last_n;
-			last_n = n;
-			n = tmp;
-		}
-		n->buf = (pg == 0) ? malloc(len) : realloc(n->buf, strlen(n->buf) + len);
-		memmove(n->buf, buf, len);
+		(*n)->buf = (pg == 0) ? malloc(len) : realloc((*n)->buf, strlen((*n)->buf) + len);
+		memmove((*n)->buf, buf, len);
 		++pg;
 		len = 0;
 		memset(buf, '\0', BUFSIZ); 
@@ -80,8 +75,6 @@ void node_writec(struct node *n, int c)
 			len += 1;
 			break;
 	}
-
-	last_n = n;
 }
 
 size_t node_next(const char *str, struct node **n)
diff --git a/parse.c b/parse.c
@@ -1,54 +1,64 @@
 #include "txt2html.h"
 
-struct node *parse_buf(const char *buf, struct node **queue, uint8_t opts)
+struct node *parse_buf(const char *buf, struct node **n, uint8_t opts)
 {
-	struct node *n = *queue;
 	size_t i = 0;
 	size_t len = (buf) ? strlen(buf) : 0;
 
-	if (!buf && queue)
-		return node_create(*queue, CLOSE+n->type);
+	if (!buf && (*n)) {
+		*n = node_create(*n, CLOSE+(*n)->type);
+		i = len;
+	}
 
 	while (i < len && buf) {
 		while (buf[i] == '\n') ++i;
-		if (!n) i += node_next(&buf[i], &n);
-		switch (n->type) {
+		if (!(*n) || ((*n)->type & CLOSE))
+			i += node_next(&buf[i], n);
+		switch ((*n)->type) {
 			case H1:
-			case H2:    i += parse_heading(&buf[i], n);   break;
+			case H2:
+				i += parse_heading(&buf[i], n);
+				break;
 			case P:     i += parse_p(&buf[i], n, opts);   break;
 			case OL+LI: i += parse_oli(&buf[i], n, opts); break;
 			case UL+LI: i += parse_uli(&buf[i], n, opts); break;
 			case PRE:
 				i += parse_textblock(&buf[i], n, opts & OPT_BR);
+				*n = node_create(*n, CLOSE+PRE);
+				break;
+			default:
+				i += node_next(&buf[i], n);
 				break;
-			default:    i += node_next(&buf[i], &n);       break;
 		}
 	}
-	return n;
+	
+	return *n;
 }
 
-size_t parse_textblock(const char *str, struct node *n, bool softbreaks)
+size_t parse_textblock(const char *str, struct node **n, bool softbreaks)
 {
 	size_t ret = 0;
 
-	while (str[ret] != '\0' && (isprint(str[ret]) || str[ret] == '\n')) {
+	while (str[ret] != '\0' && (isprint(str[ret]) || str[ret] == '\n' || str[ret] == '\t')) {
 		if (str[ret] == '\n' && str[ret+1] == '\n')
 			break;
+		else if ((*n)->type == PRE && str[ret] == '\t')
+			++ret;
 		else if (str[ret] == '\n') {
-			if (((n)->type & (OL+LI) && rule_match(&str[ret+1], OPEN+OL+LI)) ||
-				((n)->type & (UL+LI) && rule_match(&str[ret+1], OPEN+UL+LI))) {
+			if (((*n)->type & (OL+LI) && rule_match(&str[ret+1], OPEN+OL+LI)) ||
+				((*n)->type & (UL+LI) && rule_match(&str[ret+1], OPEN+UL+LI))) {
 				++ret;
 				break;
 			}
 
-			if (n->type == PRE && str[ret+1] == '\t') {
+			if ((*n)->type == PRE && str[ret+1] == '\t') {
 				node_writec(n, '\n');
 				++ret;
-			} else if (n->type == PRE && str[ret+1] != '\t') {
+			} else if ((*n)->type == PRE && str[ret+1] != '\t') {
 				break;
 			} else if (softbreaks) {
-				n = node_create(n, OPEN+BR+CLOSE);
-				n = node_create(n, (n)->prev->type);
+				*n = node_create(*n, OPEN+BR+CLOSE);
+				*n = node_create(*n, (*n)->prev->type);
 			} else {
 				node_writec(n, str[ret]);
 			}
@@ -57,66 +67,69 @@ size_t parse_textblock(const char *str, struct node *n, bool softbreaks)
 		}
 		++ret;
 	}
+	node_writec(n, EOF);
 
 	return ret;
 }
 
-size_t parse_heading(const char *str, struct node *n)
+size_t parse_heading(const char *str, struct node **n)
 {
 	assert(str);
 	size_t i = 0;
 	while(str[i] && str[i] != '\n')
 		node_writec(n, str[i++]);
+	node_writec(n, EOF);
 	do { ++i; } while (str[i] == '-' || str[i] == '=');
+	*n = node_create(*n, CLOSE+(*n)->type);
 	return i;
 }
 
-size_t parse_oli(const char *str, struct node *n, uint8_t opts)
+size_t parse_oli(const char *str, struct node **n, uint8_t opts)
 {
 	assert(str);
 	size_t i = 0, len = strlen(str);
 	while(i < len) {
 		i += parse_textblock(&str[i], n, opts & OPT_BR);
-		n = node_create(n, CLOSE+OL+LI);
+		*n = node_create(*n, CLOSE+OL+LI);
 
 		if (str[i] == '\0' || rule_match(&str[i], CLOSE+OL)) {
 			i += rule_len(CLOSE+OL);
-			n = node_create(n, CLOSE+OL);
+			*n = node_create(*n, CLOSE+OL);
 			break;
 		} else if (rule_match(&str[i], OPEN+OL+LI)) {
 			i += rule_len(OPEN+OL+LI);
-			n = node_create(n, OPEN+OL+LI);
-			n = node_create(n, OL+LI);
+			*n = node_create(*n, OPEN+OL+LI);
+			*n = node_create(*n, OL+LI);
 		}
 	}
 	return i;
 }
 
-size_t parse_p(const char *str, struct node *n, uint8_t opts)
+size_t parse_p(const char *str, struct node **n, uint8_t opts)
 {
 	size_t i = parse_textblock(str, n, opts & OPT_BR);
 	if (str[i] == '\n' && str[i+1] == '\n')
-		n = node_create(n, CLOSE+P);
+		*n = node_create(*n, CLOSE+P);
 	return i;
 }
 
-size_t parse_uli(const char *str, struct node *n, uint8_t opts)
+size_t parse_uli(const char *str, struct node **n, uint8_t opts)
 {
 	size_t ret = 0;
 	size_t len = strlen(str);
 
 	while (ret < len) {
 		ret += parse_textblock(&str[ret], n, opts & OPT_BR);
-		n = node_create(n, CLOSE+UL+LI);
+		*n = node_create(*n, CLOSE+UL+LI);
 
 		if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+UL)) {
 			ret += rule_len(CLOSE+UL);
-			n = node_create(n, CLOSE+UL);
+			*n = node_create(*n, CLOSE+UL);
 			break;
 		} else if (rule_match(&str[ret], OPEN+UL+LI)) {
 			ret += rule_len(OPEN+UL+LI);
-			n = node_create(n, OPEN+UL+LI);
-			n = node_create(n, UL+LI);
+			*n = node_create(*n, OPEN+UL+LI);
+			*n = node_create(*n, UL+LI);
 		} else break;
 	}
 
diff --git a/rule.c b/rule.c
@@ -23,7 +23,7 @@ bool rule_match(const char *str, NodeType type)
 	if ((type & CLOSE) && strlen(str) >= 2)
 		return (str[0] == '\n' && str[1] == '\n');
 
-	bool match;
+	bool match = false;
 	switch (type) {
 		case H1:
 			match = (rule_match_heading(str) == H1);
@@ -57,9 +57,9 @@ NodeType rule_match_heading(const char *str)
 	NodeType heading = 0;
 	while (*str && *str++ != '\n'); // skip line
 	if (strlen(str) >= 3) {
-		if (*str == '=' && *str+1 == '=' && *str+2 == '=')
+		if (*str == '=' && *(str+1) == '=' && *(str+2) == '=')
 			heading = H1;
-		else if (*str == '-' && *str+1 == '-' && *str+2 == '-')
+		else if (*str == '-' && *(str+1) == '-' && *(str+2) == '-')
 			heading = H2;
 	}
 	return heading;	
diff --git a/txt2html.c b/txt2html.c
@@ -1,9 +1,9 @@
 #include "txt2html.h"
 
-struct node *parsef(FILE *f);
-int readq(struct node *queue);
+struct node *convf(FILE *f);
+int readn(struct node *n);
 
-uint8_t opts;
+static uint8_t opts; // make extern if passing it about becomes a pain
 
 void help()
 {
@@ -27,6 +27,7 @@ void verbose(const char *fmt, ...)
 		va_start(args, fmt);
 		vprintf(fmt, args);
 		va_end(args);
+		fflush(stdout);
 	}
 }
 
@@ -61,7 +62,7 @@ int main(int argc, char **argv)
 
 	int a;
 	FILE *f;
-	struct node *queue;
+	struct node *n;
 	for (a = 1; a < argc; ++a) {
 		if (strlen(argv[a]) == 0)
 			continue;
@@ -72,19 +73,19 @@ int main(int argc, char **argv)
 			continue;
 		}
 
-		queue = parsef(f);
-		verbose("counted %d nodes\n", readq(queue));
+		n = convf(f);
+		verbose("counted %d nodes\n", readn(n));
 		verbose("closing %s\n", argv[a]);
 		if (fclose(f) == EOF) perror("fclose failed");
 
-		while (!queue) {
-			if (queue->buf && queue->buf[strlen(queue->buf)+1] == '$')
-				free(queue->buf);
-			if (queue->next) free(queue->next);
-			if (queue->prev) {
-				queue = queue->prev;
+		while (!n) {
+			if (n->buf && n->buf[strlen(n->buf)+1] == '$')
+				free(n->buf);
+			if (n->next) free(n->next);
+			if (n->prev) {
+				n = n->prev;
 			} else {
-				free(queue);
+				free(n);
 				break;
 			}
 		}
@@ -94,30 +95,30 @@ int main(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
-struct node *parsef(FILE *f)
+struct node *convf(FILE *f)
 {
-	int n;
-	struct node *queue = 0;
-	do {
-		verbose("reading block...\r");
-		char buf[BUFSIZ] = {'\0'};
-		n = fread(buf, BUFSIZ-1, sizeof(char), f);
-		queue = parse_buf(buf, &queue, opts);
-		verbose("                \r");
-	} while (n > 0);
-	parse_buf(NULL, &queue, opts);
-	return queue;
+	int siz;
+	struct node *n = 0;
+	char buf[BUFSIZ] = {'\0'};
+	while (true) {
+		siz = fread(buf, sizeof(char), BUFSIZ-1, f);
+		if (siz == 0) break;
+		buf[siz+1] = '\0';
+		verbose("read %d bytes\n", siz);
+		n = parse_buf(buf, &n, opts);
+	}
+	n = parse_buf(NULL, &n, opts);
+	return n;
 }
 
-int readq(struct node *q)
+int readn(struct node *n)
 {
-	while (q->prev)
-		q = q->prev; // rewind
+	while (n->prev)
+		n = n->prev; // rewind
 	int cnt = 0;
-	while (q) {
-		if (q->buf != NULL)
-			printf("%s", q->buf);
-		q = q->next;
+	while (n) {
+		if (n->buf) printf("%s", n->buf);
+		n = n->next;
 		++cnt;
 	}
 	return cnt;
diff --git a/txt2html.h b/txt2html.h
@@ -60,7 +60,7 @@ struct node *node_create(struct node *prev, NodeType t);
 // If `c == EOF` or `buf` reaches `BUFSIZ` or `n` does not match
 // `n` from the previous call, then `buf` is written to the previous
 // `n` and reset for a new set of data.
-void node_writec(struct node *n, int c);
+void node_writec(struct node **n, int c);
 
 // rule `str` against a set of rules and determine the next node type.
 // `n` will be updated to a newly created node of the determined type.
@@ -70,31 +70,31 @@ size_t node_next(const char *str, struct node **n);
   parse.c
 ---------*/
 // main parsing function
-struct node *parse_buf(const char *buf, struct node **out, uint8_t opts);
+struct node *parse_buf(const char *buf, struct node **n, uint8_t opts);
 
 // parse `str` into `n` until *\0* or *\n\n* is found.
 // If `opts & OPT_BR` then `\n` will be parsed as a `<br/>` node.
 // If `n->type` is *PRE*, then parsing will also stop after the first
 // `\n` that is not followed by a `\t`.
 // The number of parsed bytes is returned
-size_t parse_textblock(const char *str, struct node *n, bool softbreaks);
+size_t parse_textblock(const char *str, struct node **n, bool softbreaks);
 
 // parse a line of text from `str` into `n` and skip the line after
 // aslong as it contains *=* or *-*.
 // The number of parsed bytes is returned.
-size_t parse_heading(const char *str, struct node *n);
+size_t parse_heading(const char *str, struct node **n);
 
 // parse `str` into `n` for *OL+LI* until *CLOSE+OL*.
 // The number of parsed bytes is returned.
-size_t parse_oli(const char *str, struct node *n, uint8_t opts);
+size_t parse_oli(const char *str, struct node **n, uint8_t opts);
 
 // parse `str` into`n` until *\0* or *\n\n*. After this, assign
 // a new node to `n` of CLOSE+P.
 // The number of parsed bytes is returned.
-size_t parse_p(const char *str, struct node *n, uint8_t opts);
+size_t parse_p(const char *str, struct node **n, uint8_t opts);
 
 // parse `str` into `n` for *UL+LI* until *CLOSE+UL*.
 // The number of parsed bytes is returned.
-size_t parse_uli(const char *str, struct node *n, uint8_t opts);
+size_t parse_uli(const char *str, struct node **n, uint8_t opts);
 
 #endif

	txt2html Converts plaintext to HTML
	git clone git://src.gearsix.net/txt2html	txt2html.zip
	Log \| Files \| Refs \| Atom \| README

M	node.c	\|	53	+++++++++++++++++++++++------------------------------
M	parse.c	\|	73	+++++++++++++++++++++++++++++++++++++++++++------------------------------
M	rule.c	\|	6	+++---
M	txt2html.c	\|	65	+++++++++++++++++++++++++++++++++--------------------------------
M	txt2html.h	\|	14	+++++++-------