swapped newnode params; added parsing for unordered & ordered lists. - txt2html

commit 4fb782045034e10871abd6e9dc915574ff7c1036
parent 75a996dcf73392b66ccf7ea34b3dff9654e464e1
Author: gearsix <gearsix@tuta.io>
Date:   Sat, 10 Jul 2021 00:30:49 +0100

swapped newnode params; added parsing for unordered & ordered lists.

- added a tidy way to catch NULL in places to stop it from causing parse
errors. This is also a tidy way to make sure nodes are properly closed.
This is what the new _EXIT:_ label is used for.
- also did a bit of tidyup to formatting in newnode().

Diffstat:
M txt2html.c  | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------

1 file changed, 124 insertions(+), 44 deletions(-)
diff --git a/txt2html.c b/txt2html.c
@@ -31,20 +31,21 @@ int readp(struct node *n, char *txt, int txti);
 int isheading(char *txt, int txti);
 void writebuf(struct node *n, int c);
 struct node *txt2html(char *txt);
-struct node *newnode(struct node *prev, uint8_t tag, struct node *n);
+struct node *newnode(struct node *prev, struct node *next, uint8_t tag);
 struct node *closenode(struct node *n);
 
 const uint8_t opts = OPT_HB;
 
 int main(int argc, char **argv)
 {
-	char *text = "aaaaaaaaa\n====\n\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\naaaaaaa\n\naaaaaa\n---";
+	char *text = "12345\n====\n\n12345\n67890\n\n123\n---\n\n- 1\n- 2\n- 3\n";
 	char *html = malloc(4062);
 
 	struct node *n = txt2html(text);
 	while(n != NULL) {
 		if (n->buf != NULL)
-			printf("%02x='%s'\n", n->type, n->buf);
+			printf("%s", n->buf);
+			//printf("%02x='%s'\n", n->type, n->buf);
 		n = n->next;
 	}
 
@@ -62,14 +63,61 @@ struct node *txt2html(char *txt)
 
 		switch (n->type) {
 			case UL+OPEN+LI:
+				n = newnode(n, n+1, UL+LI);
+			case UL+LI:
+				while (i <= len && isprint(txt[i]))
+					writebuf(n, txt[i++]);
+				if (txt[i] == '\n' && (txt[i+1] == '\n' || txt[i+1] == '\0')) {
+					++i;
+					n = closenode(n);
+					n = newnode(n, n+1, CLOSE+UL);
+					if (txt[i+1] == '\0') goto EXIT;
+				} else if (txt[i] == '\n' && (txt[i+1] == '-' || txt[i+1] == '*') && txt[i+2] == ' ') {
+					i += 2;
+					n = closenode(n);
+					n = newnode(n, n+1, UL+OPEN+LI);
+					n = newnode(n, n+1, UL+LI);
+				} else if (txt[i] == '\n' && (opts & OPT_HB)) {
+					n = newnode(n, n+1, OPEN+BR+CLOSE);
+					n = newnode(n, n+1, UL+OPEN+LI);
+				} else if (txt[i] == '\n') {
+					writebuf(n, ' ');
+				} else {
+					writebuf(n, txt[i]);
+				}
+				++i;
+				break;
 			case OL+OPEN+LI:
+				n = newnode(n, n+1, OL+LI);
+			case OL+LI:
+				while (i <= len && isprint(txt[i]))
+					writebuf(n, txt[i++]);
+				if (txt[i] == '\n' && (txt[i+1] == '\n' || txt[i+1] == '\0')) {
+					++i;
+					n = closenode(n);
+					n = newnode(n, n+1, CLOSE+OL);
+					if (txt[i+1] == '\0') goto EXIT;
+				} else if (txt[i] == '\n' && (isalnum(txt[i]) && txt[i+1] == '.' && txt[i+2] == ' ')) {
+					i += 2;
+					n = closenode(n);
+					n = newnode(n, n+1, OL+OPEN+LI);
+					n = newnode(n, n+1, OL+LI);
+				} else if (txt[i] == '\n' && (opts & OPT_HB)) {
+					n = newnode(n, n+1, OPEN+BR+CLOSE);
+					n = newnode(n, n+1, OL+LI);
+				} else if (txt[i] == '\n') {
+					writebuf(n, ' ');
+				} else {
+					writebuf(n, txt[i]);
+				}
+				++i;
 				break;
 			case H1:
 			case H2:
 				while (txt[i] != '\n')
 					writebuf(n, txt[i++]);
 				do { ++i; } while (txt[i] == '-' || txt[i] == '=');
-				n = newnode(n, CLOSE+n->type, n+1);
+				n = newnode(n, n+1, CLOSE+n->type);
 				break;
 			case P:
 				while (i <= len && isprint(txt[i]))
@@ -78,8 +126,8 @@ struct node *txt2html(char *txt)
 					++i;
 					n = closenode(n);
 				} else if (txt[i] == '\n' && (opts & OPT_HB)) {
-					n = newnode(n, OPEN+BR+CLOSE, n+1);
-					n = newnode(n, P, n+1);
+					n = newnode(n, n+1, OPEN+BR+CLOSE);
+					n = newnode(n, n+1, P);
 				} else if (txt[i] == '\n') {
 					writebuf(n, ' ');
 				} else {
@@ -89,30 +137,30 @@ struct node *txt2html(char *txt)
 				break;
 			default:
 				if (isalnum(txt[i]) && txt[i+1] == '.' && txt[i+2] == ' ') {
-					n = newnode(n, OPEN+OL, n+1);
-					n = newnode(n, OL+OPEN+LI, n+1);
-					i += 2;
+					n = newnode(n, n+1, OPEN+OL);
+					n = newnode(n, n+1, OL+OPEN+LI);
+					i += 3;
 				} else if ((txt[i] == '*' || txt[i] == '-') && txt[i+1] == ' ') {
-					n = newnode(n, OPEN+UL, n+1);
-					n = newnode(n, UL+OPEN+LI, n+1);
-					i++;
+					n = newnode(n, n+1, OPEN+UL);
+					n = newnode(n, n+1, UL+OPEN+LI);
+					i += 2;
 				} else if (txt[i] == '\t' && isprint(txt[i+1])) {
-					n = newnode(n, OPEN+PRE, n+1);
-					n = newnode(n, PRE, n+1);
+					n = newnode(n, n+1, OPEN+PRE);
+					n = newnode(n, n+1, PRE);
 					++i;
 				} else if (isprint(txt[i])) {
 					switch (isheading(txt, i)) {
 						case H1:
-							n = newnode(n, OPEN+H1, n+1);
-							n = newnode(n, H1, n+1);
+							n = newnode(n, n+1, OPEN+H1);
+							n = newnode(n, n+1, H1);
 							break;
 						case H2:
-							n = newnode(n, OPEN+H2, n+1);
-							n = newnode(n, H2, n+1);
+							n = newnode(n, n+1, OPEN+H2);
+							n = newnode(n, n+1, H2);
 							break;
 						default:
-							n = newnode(n, OPEN+P, n+1);
-							n = newnode(n, P, n+1);
+							n = newnode(n, n+1, OPEN+P);
+							n = newnode(n, n+1, P);
 							break;
 					}
 					writebuf(n, txt[i++]);
@@ -121,6 +169,7 @@ struct node *txt2html(char *txt)
 		}
 
 		if (i >= len) {
+EXIT:
 			i = EOF;
 			n = closenode(n);
 			continue;
@@ -136,22 +185,25 @@ struct node *closenode(struct node *n)
 {
 	switch (n->type) {
 		case UL+OPEN+LI:
-			n = newnode(n, CLOSE+UL+LI, n+1);
-			n = newnode(n, CLOSE+UL, n+1);
+		case UL+LI:
+			n = newnode(n, n+1, CLOSE+UL+LI);
 			break;
-		case UL:
-			n = newnode(n, CLOSE+UL, n+1);
+		case OPEN+UL:
+		case CLOSE+UL+LI:
+			n = newnode(n, n+1, CLOSE+UL);
 			break;
 		case OL+OPEN+LI:
-			n = newnode(n, CLOSE+OL+LI, n+1);
-			n = newnode(n, CLOSE+OL, n+1);
+		case OL+LI:
+			n = newnode(n, n+1, CLOSE+OL+LI);
+			n = newnode(n, n+1, CLOSE+OL);
 			break;
-		case OL:
-			n = newnode(n, CLOSE+OL, n+1);
+		case OPEN+OL:
+		case CLOSE+OL+LI:
+			n = newnode(n, n+1, CLOSE+OL);
 			break;
-		case P:
 		case OPEN+P:
-			n = newnode(n, CLOSE+P, n+1);
+		case P:
+			n = newnode(n, n+1, CLOSE+P);
 			break;
 		default:
 			break;
@@ -161,42 +213,70 @@ struct node *closenode(struct node *n)
 
 // malloc node `n` and set it's values according to `tag`.
 // a pointer to `n` is returned.
-struct node *newnode(struct node *prev, uint8_t tag, struct node *n)
+struct node *newnode(struct node *prev, struct node *next, uint8_t tag)
 {
-	if (n == NULL)
-		perror("newnode, n cannot be NULL");
+	if (next == NULL)
+		perror("newnode, next cannot be NULL");
 	if (prev != NULL)
-		prev->next = n;
-	n->prev = prev;
-	n->type = tag;
+		prev->next = next;
+	next->prev = prev;
+	next->type = tag;
 	switch(tag) {
-		case OPEN+H1: n->buf = "<h1>\0"; break;
-		case OPEN+H2: n->buf = "<h2>\0"; break;
-		case OPEN+P:  n->buf = "<p>\0"; break;
+		case OPEN+H1:
+			next->buf = "<h1>\0";
+			break;
+		case OPEN+H2:
+			next->buf = "<h2>\0";
+			break;
+		case OPEN+P:
+			next->buf = "<p>\0";
+			break;
+		case OPEN+OL:
+			next->buf = "<ol>\n\0";
+			break;
+		case OPEN+UL:
+			next->buf = "<ul>\n\0";
+			break;
+		case OL+OPEN+LI:
+		case UL+OPEN+LI:
+			next->buf = "&emsp;<li>\0";
+			break;
 		case CLOSE+H1:
 			if (prev != NULL && prev->type == H1)
 				writebuf(prev, EOF);
-			n->buf = "</h1>\n\0";
+			next->buf = "</h1>\n\0";
 			break;
 		case CLOSE+H2:
 			if (prev != NULL && prev->type == H2)
 				writebuf(prev, EOF);
-			n->buf = "</h2>\n\0";
+			next->buf = "</h2>\n\0";
 			break;
 		case CLOSE+P:
 			if (prev != NULL && prev->type == P)
 				writebuf(prev, EOF);
-			n->buf = "</p>\n\0";
+			next->buf = "</p>\n\0";
+			break;
+		case CLOSE+OL:
+			next->buf = "</ol>\n\0";
+			break;
+		case CLOSE+UL:
+			next->buf = "</ul>\n\0";
+			break;
+		case UL+CLOSE+LI:
+		case OL+CLOSE+LI:
+			if (prev != NULL && (prev->type & OPEN+LI) != 0)
+				writebuf(prev, EOF);
+			next->buf = "</li>\n\0";
 			break;
 		case OPEN+BR+CLOSE:
 			if (prev != NULL && prev->type == P)
 				writebuf(prev, EOF);
-			n->buf = "<br/>\n\0";
+			next->buf = "<br/>\n\0";
 			break;
 		default:
 			break;
 	}
-	return n;
+	return next;
 }
 
 // writebuf has an internal static buffer (`buf`) that it writes `c` to.

	txt2html Converts plaintext to HTML
	git clone git://src.gearsix.net/txt2html	txt2html.zip
	Log \| Files \| Refs \| Atom \| README