large re-work; added node.c parse.c, moved a lot of code from txt2html.c - txt2html

commit 7bb5c18e2563cdca6cb2e9a72e72e186bede309b
parent 6639d97ad7cd6dcc547717c7cb9ff5875e3ae898
Author: gearsix <gearsix@tuta.io>
Date:   Tue,  2 Nov 2021 22:30:28 +0000

large re-work; added node.c parse.c, moved a lot of code from txt2html.c

Diffstat:
A node.c  | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A parse.c  | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M rule.c  | 45 ++++++++++++++++++++++++++++++++++-----------
M txt2html.c  | 339 +++++++------------------------------------------------------------------------
M txt2html.h  | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------

5 files changed, 387 insertions(+), 340 deletions(-)
diff --git a/node.c b/node.c
@@ -0,0 +1,121 @@
+#include "txt2html.h"
+
+struct node *node_create(struct node *prev, NodeType t)
+{
+	struct node *n = calloc(1, sizeof(struct node));
+
+	n->next = 0;
+	
+	if (prev) {
+		n->prev = prev;
+		prev->next = n;
+	}
+
+	n->type = t;
+
+	if (t == OPEN+BR+CLOSE) {
+		n->buf = "<br/>\n\0";
+	} else if (t & OPEN) {
+		if      (t & H1)  n->buf = "<h1>\0";
+		else if (t & H2)  n->buf = "<h2>\0";
+		else if (t & PRE) n->buf = "<pre>\0";
+		else if (t & P)   n->buf = "<p>\0";
+		else if (t & LI)  n->buf = "<li>\0"; // must come before OL/UL
+		else if (t & OL)  n->buf = "<ol>\0";
+		else if (t & UL)  n->buf = "<ul>\0";
+	} else if (t & CLOSE) {
+		node_writec(prev, EOF);
+		if      (t & H1)  n->buf = "</h1>\n\0";
+		else if (t & H2)  n->buf = "</h2>\n\0";
+		else if (t & PRE) n->buf = "</pre>\n\0";
+		else if (t & P)   n->buf = "</p>\n\0";
+		else if (t & LI)  n->buf = "</li>\n\0"; // must come before OL/UL
+		else if (t & OL)  n->buf = "</ol>\n\0";
+		else if (t & UL)  n->buf = "</ul>\n\0";
+	}
+
+	return n;
+}
+
+// writebuf has an internal static buffer (`buf`) that it writes `c` to.
+// if `c == EOF` or `buf` reaches `BUFSIZ`, then `buf` it's written to n->buf.
+// `n->buf` will only be allocated required memory.
+void node_writec(struct node *n, int c)
+{
+	assert(n);
+
+	static struct node *last_n;
+
+	static int pg = 0;
+	static int len = 0;
+	static char buf[BUFSIZ+1];
+
+	if ((last_n && last_n != n) || len+2 == BUFSIZ || (c == EOF && len > 0)) {
+		if (c == EOF) {
+			buf[len++] = '\0';
+			buf[len++] = '$'; // signal malloc not assigned const
+		}
+		if (last_n != n) {
+			struct node *tmp = last_n;
+			last_n = n;
+			n = tmp;
+		}
+		n->buf = (pg == 0) ? malloc(len) : realloc(n->buf, strlen(n->buf) + len);
+		memmove(n->buf, buf, len);
+		++pg;
+		len = 0;
+		memset(buf, '\0', BUFSIZ); 
+	}
+
+	switch (c) {
+		case EOF:
+			pg = 0;
+			break;
+		case '\t':
+			strncat(buf, "&emsp;", 7);
+			len += 6;
+			break;
+		default:
+			strncat(buf, (char *)&c, 2);
+			len += 1;
+			break;
+	}
+
+	last_n = n;
+}
+
+size_t node_next(const char *str, struct node **n)
+{
+	size_t ret = 0;
+	if (rule_match(&str[ret], OPEN+OL+LI)) {
+		ret += rule_len(OPEN+OL+LI);
+		*n = node_create(*n, OPEN+OL);
+		*n = node_create(*n, OPEN+OL+LI);
+		*n = node_create(*n, OL+LI);
+	} else if (rule_match(&str[ret], OPEN+UL+LI)) {
+		ret += rule_len(OPEN+UL+LI);
+		*n = node_create(*n, OPEN+UL);
+		*n = node_create(*n, OPEN+UL+LI);
+		*n = node_create(*n, UL+LI);
+	} else if (rule_match(&str[ret], OPEN+PRE)) {
+		ret += rule_len(OPEN+PRE);
+		*n = node_create(*n, OPEN+PRE);
+		*n = node_create(*n, PRE);
+	} else if (isprint(str[ret])) {
+		switch (rule_match_heading(&str[ret])) {
+			case H1:
+				*n = node_create(*n, OPEN+H1);
+				*n = node_create(*n, H1);
+				break;
+			case H2:
+				*n = node_create(*n, OPEN+H2);
+				*n = node_create(*n, H2);
+				break;
+			default:
+				*n = node_create(*n, OPEN+P);
+				*n = node_create(*n, P);
+				break;
+		}
+	}
+	return ret;
+}
diff --git a/parse.c b/parse.c
@@ -0,0 +1,124 @@
+#include "txt2html.h"
+
+struct node *parse_buf(const char *buf, struct node **queue, uint8_t opts)
+{
+	struct node *n = *queue;
+	size_t i = 0;
+	size_t len = (buf) ? strlen(buf) : 0;
+
+	if (!buf && queue)
+		return node_create(*queue, CLOSE+n->type);
+
+	while (i < len && buf) {
+		while (buf[i] == '\n') ++i;
+		if (!n) i += node_next(&buf[i], &n);
+		switch (n->type) {
+			case H1:
+			case H2:    i += parse_heading(&buf[i], n);   break;
+			case P:     i += parse_p(&buf[i], n, opts);   break;
+			case OL+LI: i += parse_oli(&buf[i], n, opts); break;
+			case UL+LI: i += parse_uli(&buf[i], n, opts); break;
+			case PRE:
+				i += parse_textblock(&buf[i], n, opts & OPT_BR);
+				break;
+			default:    i += node_next(&buf[i], &n);       break;
+		}
+	}
+	return n;
+}
+
+size_t parse_textblock(const char *str, struct node *n, bool softbreaks)
+{
+	size_t ret = 0;
+
+	while (str[ret] != '\0' && (isprint(str[ret]) || str[ret] == '\n')) {
+		if (str[ret] == '\n' && str[ret+1] == '\n')
+			break;
+		else if (str[ret] == '\n') {
+			if (((n)->type & (OL+LI) && rule_match(&str[ret+1], OPEN+OL+LI)) ||
+				((n)->type & (UL+LI) && rule_match(&str[ret+1], OPEN+UL+LI))) {
+				++ret;
+				break;
+			}
+
+			if (n->type == PRE && str[ret+1] == '\t') {
+				node_writec(n, '\n');
+				++ret;
+			} else if (n->type == PRE && str[ret+1] != '\t') {
+				break;
+			} else if (softbreaks) {
+				n = node_create(n, OPEN+BR+CLOSE);
+				n = node_create(n, (n)->prev->type);
+			} else {
+				node_writec(n, str[ret]);
+			}
+		} else {
+			node_writec(n, str[ret]);
+		}
+		++ret;
+	}
+
+	return ret;
+}
+
+size_t parse_heading(const char *str, struct node *n)
+{
+	assert(str);
+	size_t i = 0;
+	while(str[i] && str[i] != '\n')
+		node_writec(n, str[i++]);
+	do { ++i; } while (str[i] == '-' || str[i] == '=');
+	return i;
+}
+
+size_t parse_oli(const char *str, struct node *n, uint8_t opts)
+{
+	assert(str);
+	size_t i = 0, len = strlen(str);
+	while(i < len) {
+		i += parse_textblock(&str[i], n, opts & OPT_BR);
+		n = node_create(n, CLOSE+OL+LI);
+
+		if (str[i] == '\0' || rule_match(&str[i], CLOSE+OL)) {
+			i += rule_len(CLOSE+OL);
+			n = node_create(n, CLOSE+OL);
+			break;
+		} else if (rule_match(&str[i], OPEN+OL+LI)) {
+			i += rule_len(OPEN+OL+LI);
+			n = node_create(n, OPEN+OL+LI);
+			n = node_create(n, OL+LI);
+		}
+	}
+	return i;
+}
+
+size_t parse_p(const char *str, struct node *n, uint8_t opts)
+{
+	size_t i = parse_textblock(str, n, opts & OPT_BR);
+	if (str[i] == '\n' && str[i+1] == '\n')
+		n = node_create(n, CLOSE+P);
+	return i;
+}
+
+size_t parse_uli(const char *str, struct node *n, uint8_t opts)
+{
+	size_t ret = 0;
+	size_t len = strlen(str);
+
+	while (ret < len) {
+		ret += parse_textblock(&str[ret], n, opts & OPT_BR);
+		n = node_create(n, CLOSE+UL+LI);
+
+		if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+UL)) {
+			ret += rule_len(CLOSE+UL);
+			n = node_create(n, CLOSE+UL);
+			break;
+		} else if (rule_match(&str[ret], OPEN+UL+LI)) {
+			ret += rule_len(OPEN+UL+LI);
+			n = node_create(n, OPEN+UL+LI);
+			n = node_create(n, UL+LI);
+		} else break;
+	}
+
+	return ret;
+}
diff --git a/rule.c b/rule.c
@@ -1,12 +1,36 @@
 #include "txt2html.h"
 
+size_t rule_len(NodeType type)
+{
+	if (type & CLOSE) return 2;
+
+	int len = 0;
+	switch (type) {
+		case OPEN+OL+LI:
+			return 3;
+		case OPEN+UL+LI:
+			return 2;
+		case OPEN+PRE:
+			return 1;
+	}
+	return len;
+}
+
 bool rule_match(const char *str, NodeType type)
 {
+	assert(str);
+
 	if ((type & CLOSE) && strlen(str) >= 2)
 		return (str[0] == '\n' && str[1] == '\n');
 
 	bool match;
 	switch (type) {
+		case H1:
+			match = (rule_match_heading(str) == H1);
+			break;
+		case H2:
+			match = (rule_match_heading(str) == H2);
+			break;
 		case OPEN+OL+LI:
 			if (strlen(str) >= rule_len(OPEN+OL+LI))
 				match = (isalnum(str[0]) && str[1] == '.' && str[2] == ' ');
@@ -26,18 +50,17 @@ bool rule_match(const char *str, NodeType type)
 	return match;
 }
 
-size_t rule_len(NodeType type)
+NodeType rule_match_heading(const char *str)
 {
-	if (type & CLOSE) return 2;
+	assert(str);
 
-	int len = 0;
-	switch (type) {
-		case OPEN+OL+LI:
-			return 3;
-		case OPEN+UL+LI:
-			return 2;
-		case OPEN+PRE:
-			return 1;
+	NodeType heading = 0;
+	while (*str && *str++ != '\n'); // skip line
+	if (strlen(str) >= 3) {
+		if (*str == '=' && *str+1 == '=' && *str+2 == '=')
+			heading = H1;
+		else if (*str == '-' && *str+1 == '-' && *str+2 == '-')
+			heading = H2;
 	}
-	return len;
+	return heading;	
 }
diff --git a/txt2html.c b/txt2html.c
@@ -1,31 +1,9 @@
 #include "txt2html.h"
 
-#define MEMLIMIT 100000000
+struct node *parsef(FILE *f);
+int readq(struct node *queue);
 
-#define OPT_V  0x10 // print verbose logs
-#define OPT_NM 0x20 // no memory limit
-#define OPT_BR 0x01 // newlines as <br/> nodes within <p> (not ' ')
-
-struct node {
-	struct node *prev, *next;
-	uint8_t type;
-	char *buf;
-};
-
-struct node *parsef(FILE **f);
-int readast(struct node *ast);
-struct node *buf2ast(const char *buf, struct node *ast);
-struct node *newnode(struct node *prev, const int type);
-size_t nextnode(const char *str, struct node **n);
-size_t parseh(const char *str, struct node **n);
-size_t parseoli(const char *str, struct node **n);
-size_t parseuli(const char *str, struct node **n);
-size_t parsep(const char *str, struct node **n);
-size_t parsetxt(const char *str, struct node **n);
-void writebuf(struct node *n, int c);
-int isheading(const char *txt);
-
-uint8_t opts = 0;
+uint8_t opts;
 
 void help()
 {
@@ -83,7 +61,7 @@ int main(int argc, char **argv)
 
 	int a;
 	FILE *f;
-	struct node *ast;
+	struct node *queue;
 	for (a = 1; a < argc; ++a) {
 		if (strlen(argv[a]) == 0)
 			continue;
@@ -94,316 +72,53 @@ int main(int argc, char **argv)
 			continue;
 		}
 
-		ast = parsef(&f);
-		verbose("counted %d nodes\n", readast(ast));
+		queue = parsef(f);
+		verbose("counted %d nodes\n", readq(queue));
 		verbose("closing %s\n", argv[a]);
 		if (fclose(f) == EOF) perror("fclose failed");
 
-		while (ast != NULL) {
-			if (ast->buf && ast->buf[strlen(ast->buf)+1] == '$')
-				free(ast->buf);
-			if (ast->next) free(ast->next);
-			if (ast->prev) {
-				ast = ast->prev;
+		while (!queue) {
+			if (queue->buf && queue->buf[strlen(queue->buf)+1] == '$')
+				free(queue->buf);
+			if (queue->next) free(queue->next);
+			if (queue->prev) {
+				queue = queue->prev;
 			} else {
-				free(ast);
+				free(queue);
 				break;
 			}
 		}
-		newnode(NULL, 0); // reset node count
+		node_create(NULL, 0); // reset node count
 	}
 
 	return EXIT_SUCCESS;
 }
 
-struct node *parsef(FILE **f)
+struct node *parsef(FILE *f)
 {
 	int n;
-	struct node *ast = NULL;
+	struct node *queue = 0;
 	do {
 		verbose("reading block...\r");
 		char buf[BUFSIZ] = {'\0'};
-		n = fread(buf, BUFSIZ-1, sizeof(char), *f);
-		ast = buf2ast(buf, ast);
+		n = fread(buf, BUFSIZ-1, sizeof(char), f);
+		queue = parse_buf(buf, &queue, opts);
 		verbose("                \r");
 	} while (n > 0);
-	buf2ast(NULL, ast);
-	return ast;
+	parse_buf(NULL, &queue, opts);
+	return queue;
 }
 
-int readast(struct node *ast)
+int readq(struct node *q)
 {
-	while (ast->prev != NULL) ast = ast->prev; // rewind
+	while (q->prev)
+		q = q->prev; // rewind
 	int cnt = 0;
-	while (ast != NULL) {
-		if (ast->buf != NULL)
-			printf("%s", ast->buf);
-		ast = ast->next;
+	while (q) {
+		if (q->buf != NULL)
+			printf("%s", q->buf);
+		q = q->next;
 		++cnt;
 	}
 	return cnt;
 }
-
-struct node *buf2ast(const char *buf, struct node *ast)
-{
-	struct node *n = ast;
-	size_t i = 0;
-	size_t len = (buf != NULL) ? strlen(buf) : 0;
-
-	if (buf == NULL && ast != NULL)
-		n = newnode(n, CLOSE+n->type);
-
-	while (i < len && buf != NULL) {
-		while (buf[i] == '\n') ++i;
-		if (n == NULL)
-			i += nextnode(&buf[i], &n);
-		switch (n->type) {
-			case H1:
-			case H2:    i += parseh(&buf[i], &n);   break;
-			case P:     i += parsep(&buf[i], &n);   break;
-			case PRE:   i += parsetxt(&buf[i], &n); break;
-			case OL+LI: i += parseoli(&buf[i], &n); break;
-			case UL+LI: i += parseuli(&buf[i], &n); break;
-			default:    i += nextnode(&buf[i], &n); break;
-		}
-	}
-	return n;
-}
-
-struct node *newnode(struct node *prev, const int type)
-{
-	static size_t cnt;
-
-	if (prev == NULL && type == 0) {
-		cnt = 0;
-		return NULL;
-	}
-
-	if (!(opts & OPT_NM) && (sizeof(struct node) * cnt > MEMLIMIT)) {
-		printf("txt2html: reached memory limit\n");
-		abort();
-	}
-
-	struct node *n = calloc(1, sizeof(struct node));
-	n->type = type;
-
-	if (prev != NULL) {
-		n->prev = prev;
-		prev->next = n;
-		if (type & CLOSE) writebuf(prev, EOF);
-	}
-
-	switch(type) {
-		case OPEN+H1:       n->buf = "<h1>\0";   break;
-		case OPEN+H2:       n->buf = "<h2>\0";   break;
-		case OPEN+PRE:      n->buf = "<pre>\0";  break;
-		case OPEN+P:        n->buf = "<p>\0";    break;
-		case OPEN+OL:       n->buf = "<ol>\n\0"; break;
-		case OPEN+UL:       n->buf = "<ul>\n\0"; break;
-		case OL+OPEN+LI:
-		case UL+OPEN+LI:    n->buf = "  <li>\0"; break;
-		
-		case CLOSE+H1:      n->buf = "</h1>\n\0";  break;
-		case CLOSE+H2:      n->buf = "</h2>\n\0";  break;
-		case CLOSE+PRE:     n->buf = "</pre>\n\0"; break;
-		case CLOSE+P:       n->buf = "</p>\n\0";   break;
-		case CLOSE+OL:      n->buf = "</ol>\n\0";  break;
-		case CLOSE+UL:      n->buf = "</ul>\n\0";  break;
-		case UL+CLOSE+LI:
-		case OL+CLOSE+LI:   n->buf = "</li>\n\0"; break;
-	
-		case OPEN+BR+CLOSE: n->buf = "<br/>\n\0"; break;
-		
-		default:
-			--cnt;
-			break;
-	}
-
-	++cnt;
-	return n;
-}
-
-size_t nextnode(const char *str, struct node **n)
-{
-	size_t ret = 0;
-	if (rule_match(&str[ret], OPEN+OL+LI)) {
-		ret += rule_len(OPEN+OL+LI);
-		*n = newnode(*n, OPEN+OL);
-		*n = newnode(*n, OPEN+OL+LI);
-		*n = newnode(*n, OL+LI);
-	} else if (rule_match(&str[ret], OPEN+UL+LI)) {
-		ret += rule_len(OPEN+UL+LI);
-		*n = newnode(*n, OPEN+UL);
-		*n = newnode(*n, OPEN+UL+LI);
-		*n = newnode(*n, UL+LI);
-	} else if (rule_match(&str[ret], OPEN+PRE)) {
-		ret += rule_len(OPEN+PRE);
-		*n = newnode(*n, OPEN+PRE);
-		*n = newnode(*n, PRE);
-	} else if (isprint(str[ret])) {
-		switch (isheading(&str[ret])) {
-			case H1:
-				*n = newnode(*n, OPEN+H1);
-				*n = newnode(*n, H1);
-				break;
-			case H2:
-				*n = newnode(*n, OPEN+H2);
-				*n = newnode(*n, H2);
-				break;
-			default:
-				*n = newnode(*n, OPEN+P);
-				*n = newnode(*n, P);
-				break;
-		}
-	}
-	return ret;
-}
-
-size_t parseh(const char *str, struct node **n)
-{
-	size_t ret = 0;
-	while(str[ret] != '\n' && str[ret] != '\0')
-		writebuf(*n, str[ret++]);
-	do { ++ret; } while (str[ret] == '-' || str[ret] == '=');
-	*n = newnode(*n, CLOSE+(*n)->type);
-	return ret;
-}
-
-size_t parsep(const char *str, struct node **n)
-{
-	size_t i = parsetxt(str, n);
-	if (str[i] == '\n' && str[i+1] == '\n')
-		*n = newnode(*n, CLOSE+P);
-	return i;
-}
-
-size_t parseoli(const char *str, struct node **n)
-{
-	size_t ret = 0;
-	size_t len = strlen(str);
-
-	while (ret < len) {
-		ret += parsetxt(&str[ret], n);
-		*n = newnode(*n, CLOSE+OL+LI);
-
-		if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+OL)) {
-			ret += rule_len(CLOSE+OL);
-			*n = newnode(*n, CLOSE+OL);
-			break;
-		} else if (rule_match(&str[ret], OPEN+OL+LI)) {
-			ret += rule_len(OPEN+OL+LI);
-			*n = newnode(*n, OPEN+OL+LI);
-			*n = newnode(*n, OL+LI);
-		}
-	}
-
-	return ret;
-}
-
-size_t parseuli(const char *str, struct node **n)
-{
-	size_t ret = 0;
-	size_t len = strlen(str);
-
-	while (ret < len) {
-		ret += parsetxt(&str[ret], n);
-		*n = newnode(*n, CLOSE+UL+LI);
-
-		if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+UL)) {
-			ret += rule_len(CLOSE+UL);
-			*n = newnode(*n, CLOSE+UL);
-			break;
-		} else if (rule_match(&str[ret], OPEN+UL+LI)) {
-			ret += rule_len(OPEN+UL+LI);
-			*n = newnode(*n, OPEN+UL+LI);
-			*n = newnode(*n, UL+LI);
-		} else break;
-	}
-
-	return ret;
-}
-
-size_t parsetxt(const char *str, struct node **n)
-{
-	size_t ret = 0;
-
-	while (str[ret] != '\0' && (isprint(str[ret]) || str[ret] == '\n')) {
-		if (str[ret] == '\n' && str[ret+1] == '\n')
-			break;
-		else if (str[ret] == '\n') {
-			if (((*n)->type & OL+LI && rule_match(&str[ret+1], OPEN+OL+LI)) ||
-				((*n)->type & UL+LI && rule_match(&str[ret+1], OPEN+UL+LI))) {
-				++ret;
-				break;
-			}
-
-			if ((*n)->type == PRE && str[ret+1] == '\t') {
-				writebuf(*n, '\n');
-				++ret;
-			} else if ((*n)->type == PRE && str[ret+1] != '\t') {
-				break;
-			} else if (opts & OPT_BR) {
-				*n = newnode(*n, OPEN+BR+CLOSE);
-				*n = newnode(*n, (*n)->prev->type);
-			} else {
-				writebuf(*n, str[ret]);
-			}
-		} else {
-			writebuf(*n, str[ret]);
-		}
-		++ret;
-	}
-
-	return ret;
-}
-
-// writebuf has an internal static buffer (`buf`) that it writes `c` to.
-// if `c == EOF` or `buf` reaches `BUFSIZ`, then `buf` it's written to n->buf.
-// `n->buf` will only be allocated required memory.
-void writebuf(struct node *n, int c)
-{
-	assert(n != NULL);
-	static int pg = 0;
-	static int len = 0;
-	static char buf[BUFSIZ+1];
-
-	if (len+2 == BUFSIZ || c == EOF && len > 0) {
-		if (c == EOF) {
-			buf[len++] = '\0';
-			buf[len++] = '$'; // signal malloc'd (not assigned)
-		}
-		n->buf = (pg == 0) ? malloc(len) : realloc(n->buf, strlen(n->buf) + len);
-		memmove(n->buf, buf, len);
-		++pg;
-		len = 0;
-		memset(buf, '\0', BUFSIZ); 
-	}
-
-	switch (c) {
-		case EOF:
-			pg = 0;
-			break;
-		case '\t':
-			strncat(buf, "&emsp;", 7);
-			len += 6;
-			break;
-		default:
-			strncat(buf, (char *)&c, 2);
-			len += 1;
-			break;
-	}
-}
-
-int isheading(const char *txt)
-{
-	assert(txt != NULL);
-	while (*txt++ != '\n' && *txt != '\0'); // skip to next line
-	if (*txt == '\0' || strlen(txt) < 3)
-		return 0;
-	if (*txt == '=' && *(txt+1) == '=' && *(txt+2) == '=')
-		return H1;
-	if (*txt == '-' && *(txt+1) == '-' && *(txt+2) == '-')
-		return H2;
-	else
-		return 0;
-}
diff --git a/txt2html.h b/txt2html.h
@@ -1,5 +1,5 @@
-#ifndef _TXT2HTML
-#define _TXT2HTML
+#ifndef _TXT2HTML_H_
+#define _TXT2HTML_H_
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -7,30 +7,94 @@
 #include <stdint.h>
 #include <string.h>
 #include <stdbool.h>
-#include <ctype.h> // replace with utf8 support
+#include <ctype.h> // TODO replace with utf8 support
 #include <assert.h>
 
+#define OPT_V  0x10 // print verbose logs
+#define OPT_NM 0x20 // no memory limit
+#define OPT_BR 0x01 // newlines as <br/> nodes within <p> (not ' ')
+
 typedef uint8_t NodeType;
 
-// node tags
-enum NodeTypes {
+enum {
+	NONE  = 0x00,
 	OPEN  = 0x10,
 	CLOSE = 0x20,
-	H1    = 0x01,
-	H2    = 0x02,
-	P     = 0x03,
-	PRE   = 0x04,
-	LI    = 0x05,
-	BR    = 0x06,
-	OL    = 0x07,
-	UL    = 0x08
+	
+	H1  = 0x01,
+	H2  = 0x02,
+	P   = 0x03,
+	PRE = 0x04,
+	BR  = 0x05,
+	LI  = 0x06,
+	OL  = 0x07,
+	UL  = 0x08
+};
+
+struct node {
+	struct node *prev, *next;
+	NodeType type;
+	char *buf;
 };
 
-/* rule.c */
-// check if `str` matches the rule for `NodeType t`
+/*--------
+  rule.c
+--------*/
+// get the length of a rule for `NodeType t`.
+size_t rule_len(NodeType t);
+
+// check if `str` matches the rule for `NodeType t`.
 bool rule_match(const char *str, NodeType t);
 
-// get the length of a rule for `NodeType t`
-size_t  rule_len(NodeType t);
+// return H1 or H2 if `str` matches said NodeType.
+// If it matches neither 0 will be returned.y
+NodeType rule_match_heading(const char *str);
+
+/*--------
+  node.c
+--------*/
+struct node *node_create(struct node *prev, NodeType t);
+
+// write a character to `n->buf`.
+// Has an internal static buffer (`buf`) that `c` is written to.
+// If `c == EOF` or `buf` reaches `BUFSIZ` or `n` does not match
+// `n` from the previous call, then `buf` is written to the previous
+// `n` and reset for a new set of data.
+void node_writec(struct node *n, int c);
+
+// rule `str` against a set of rules and determine the next node type.
+// `n` will be updated to a newly created node of the determined type.
+size_t node_next(const char *str, struct node **n);
+
+/*---------
+  parse.c
+---------*/
+// main parsing function
+struct node *parse_buf(const char *buf, struct node **out, uint8_t opts);
+
+// parse `str` into `n` until *\0* or *\n\n* is found.
+// If `opts & OPT_BR` then `\n` will be parsed as a `<br/>` node.
+// If `n->type` is *PRE*, then parsing will also stop after the first
+// `\n` that is not followed by a `\t`.
+// The number of parsed bytes is returned
+size_t parse_textblock(const char *str, struct node *n, bool softbreaks);
+
+// parse a line of text from `str` into `n` and skip the line after
+// aslong as it contains *=* or *-*.
+// The number of parsed bytes is returned.
+size_t parse_heading(const char *str, struct node *n);
+
+// parse `str` into `n` for *OL+LI* until *CLOSE+OL*.
+// The number of parsed bytes is returned.
+size_t parse_oli(const char *str, struct node *n, uint8_t opts);
+
+// parse `str` into`n` until *\0* or *\n\n*. After this, assign
+// a new node to `n` of CLOSE+P.
+// The number of parsed bytes is returned.
+size_t parse_p(const char *str, struct node *n, uint8_t opts);
+
+// parse `str` into `n` for *UL+LI* until *CLOSE+UL*.
+// The number of parsed bytes is returned.
+size_t parse_uli(const char *str, struct node *n, uint8_t opts);
 
 #endif

	txt2html Converts plaintext to HTML
	git clone git://src.gearsix.net/txt2html	txt2html.zip
	Log \| Files \| Refs \| Atom \| README

A	node.c	\|	121	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	parse.c	\|	124	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	rule.c	\|	45	++++++++++++++++++++++++++++++++++-----------
M	txt2html.c	\|	339	+++++++------------------------------------------------------------------------
M	txt2html.h	\|	98	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------