started splitting txt2html.c up; added rule.c, txt2html.h. - txt2html

commit 6639d97ad7cd6dcc547717c7cb9ff5875e3ae898
parent fbbe7cc381afbc36143dd10737b68e1339d9deba
Author: gearsix <gearsix@tuta.io>
Date:   Tue,  2 Nov 2021 16:34:59 +0000

started splitting txt2html.c up; added rule.c, txt2html.h.

Should've done this a while ago but I was being stubborn (thinking it
would be easier in a single file).

txt2html.h has a NodeTypes enum with the previously #define'd values.

I've removed the #define RULE_ because it was ugly and a little
dangerous, doing it in functions allows me to make sure str is long
enough before running the checks. It also makes the code cleaner since
everything is a combination of NodeTypes.

Diffstat:
A rule.c  | 43 +++++++++++++++++++++++++++++++++++++++++++
M txt2html.c  | 70 +++++++++++++++++++++-------------------------------------------------
A txt2html.h  | 36 ++++++++++++++++++++++++++++++++++++

3 files changed, 100 insertions(+), 49 deletions(-)
diff --git a/rule.c b/rule.c
@@ -0,0 +1,43 @@
+#include "txt2html.h"
+
+bool rule_match(const char *str, NodeType type)
+{
+	if ((type & CLOSE) && strlen(str) >= 2)
+		return (str[0] == '\n' && str[1] == '\n');
+
+	bool match;
+	switch (type) {
+		case OPEN+OL+LI:
+			if (strlen(str) >= rule_len(OPEN+OL+LI))
+				match = (isalnum(str[0]) && str[1] == '.' && str[2] == ' ');
+			break;
+		case OPEN+UL+LI:
+			if (strlen(str) >= rule_len(OPEN+UL+LI))
+				match = ((str[0] == '-' || str[0] == '*') && str[1] == ' ');
+			break;
+		case OPEN+PRE:
+			// +1 to peek and make sure next char is print
+			if (strlen(str) >= rule_len(OPEN+PRE)+1)
+				match = (str[0] == '\t' && isprint(str[1]));
+			break;
+		default:
+			match = false;
+	}
+	return match;
+}
+
+size_t rule_len(NodeType type)
+{
+	if (type & CLOSE) return 2;
+
+	int len = 0;
+	switch (type) {
+		case OPEN+OL+LI:
+			return 3;
+		case OPEN+UL+LI:
+			return 2;
+		case OPEN+PRE:
+			return 1;
+	}
+	return len;
+}
diff --git a/txt2html.c b/txt2html.c
@@ -1,10 +1,4 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <string.h>
-#include <ctype.h> // replace with utf8 support
-#include <assert.h>
+#include "txt2html.h"
 
 #define MEMLIMIT 100000000
 
@@ -12,30 +6,6 @@
 #define OPT_NM 0x20 // no memory limit
 #define OPT_BR 0x01 // newlines as <br/> nodes within <p> (not ' ')
 
-// node tags
-#define OPEN      0x10
-#define CLOSE     0x20
-#define H1        0x01
-#define H2        0x02
-#define P         0x03
-#define PRE       0x04
-#define LI        0x05
-#define BR        0x06
-#define OL        0x07
-#define UL        0x08
-
-// rules for detecting tags
-#define RULE_CLOSE_OL(str) (*str == '\n' && (*(str+1) == '\n' || *(str+1) == '\0'))
-#define LEN_CLOSE_OL       2
-#define RULE_OPEN_OLI(str) (isalnum(*str) && *(str+1) == '.' && *(str+2) == ' ')
-#define LEN_OPEN_OLI       3
-#define RULE_CLOSE_UL(str) (*str == '\n' && (*(str+1) == '\n' || *(str+1) == '\0'))
-#define LEN_CLOSE_UL       2
-#define RULE_OPEN_ULI(str) ((*str == '-' || *str == '*') && *(str+1) == ' ')
-#define LEN_OPEN_ULI       2
-#define RULE_OPEN_PRE(str) (*str == '\t' && isprint(*(str+1)))
-#define LEN_OPEN_PRE       1
-
 struct node {
 	struct node *prev, *next;
 	uint8_t type;
@@ -256,18 +226,18 @@ struct node *newnode(struct node *prev, const int type)
 size_t nextnode(const char *str, struct node **n)
 {
 	size_t ret = 0;
-	if (RULE_OPEN_OLI(&str[ret])) {
-		ret += LEN_OPEN_OLI;
+	if (rule_match(&str[ret], OPEN+OL+LI)) {
+		ret += rule_len(OPEN+OL+LI);
 		*n = newnode(*n, OPEN+OL);
 		*n = newnode(*n, OPEN+OL+LI);
 		*n = newnode(*n, OL+LI);
-	} else if (RULE_OPEN_ULI(&str[ret])) {
-		ret += LEN_OPEN_ULI;
+	} else if (rule_match(&str[ret], OPEN+UL+LI)) {
+		ret += rule_len(OPEN+UL+LI);
 		*n = newnode(*n, OPEN+UL);
 		*n = newnode(*n, OPEN+UL+LI);
 		*n = newnode(*n, UL+LI);
-	} else if (RULE_OPEN_PRE(&str[ret])) {
-		ret += LEN_OPEN_PRE;
+	} else if (rule_match(&str[ret], OPEN+PRE)) {
+		ret += rule_len(OPEN+PRE);
 		*n = newnode(*n, OPEN+PRE);
 		*n = newnode(*n, PRE);
 	} else if (isprint(str[ret])) {
@@ -292,7 +262,8 @@ size_t nextnode(const char *str, struct node **n)
 size_t parseh(const char *str, struct node **n)
 {
 	size_t ret = 0;
-	while(str[ret] != '\n' && str[ret] != '\0') writebuf(*n, str[ret++]);
+	while(str[ret] != '\n' && str[ret] != '\0')
+		writebuf(*n, str[ret++]);
 	do { ++ret; } while (str[ret] == '-' || str[ret] == '=');
 	*n = newnode(*n, CLOSE+(*n)->type);
 	return ret;
@@ -301,7 +272,8 @@ size_t parseh(const char *str, struct node **n)
 size_t parsep(const char *str, struct node **n)
 {
 	size_t i = parsetxt(str, n);
-	if (str[i] == '\n' && str[i+1] == '\n') *n = newnode(*n, CLOSE+P);
+	if (str[i] == '\n' && str[i+1] == '\n')
+		*n = newnode(*n, CLOSE+P);
 	return i;
 }
 
@@ -314,12 +286,12 @@ size_t parseoli(const char *str, struct node **n)
 		ret += parsetxt(&str[ret], n);
 		*n = newnode(*n, CLOSE+OL+LI);
 
-		if (str[ret] == '\0' || RULE_CLOSE_OL(&str[ret])) {
-			ret += LEN_CLOSE_OL;
+		if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+OL)) {
+			ret += rule_len(CLOSE+OL);
 			*n = newnode(*n, CLOSE+OL);
 			break;
-		} else if (RULE_OPEN_OLI(&str[ret])) {
-			ret += LEN_OPEN_OLI;
+		} else if (rule_match(&str[ret], OPEN+OL+LI)) {
+			ret += rule_len(OPEN+OL+LI);
 			*n = newnode(*n, OPEN+OL+LI);
 			*n = newnode(*n, OL+LI);
 		}
@@ -337,12 +309,12 @@ size_t parseuli(const char *str, struct node **n)
 		ret += parsetxt(&str[ret], n);
 		*n = newnode(*n, CLOSE+UL+LI);
 
-		if (str[ret] == '\0' || RULE_CLOSE_UL(&str[ret])) {
-			ret += LEN_CLOSE_UL;
+		if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+UL)) {
+			ret += rule_len(CLOSE+UL);
 			*n = newnode(*n, CLOSE+UL);
 			break;
-		} else if (RULE_OPEN_ULI(&str[ret])) {
-			ret += LEN_OPEN_ULI;
+		} else if (rule_match(&str[ret], OPEN+UL+LI)) {
+			ret += rule_len(OPEN+UL+LI);
 			*n = newnode(*n, OPEN+UL+LI);
 			*n = newnode(*n, UL+LI);
 		} else break;
@@ -359,8 +331,8 @@ size_t parsetxt(const char *str, struct node **n)
 		if (str[ret] == '\n' && str[ret+1] == '\n')
 			break;
 		else if (str[ret] == '\n') {
-			if (((*n)->type & OL+LI && RULE_OPEN_OLI(&str[ret+1])) ||
-				((*n)->type & UL+LI && RULE_OPEN_ULI(&str[ret+1]))) {
+			if (((*n)->type & OL+LI && rule_match(&str[ret+1], OPEN+OL+LI)) ||
+				((*n)->type & UL+LI && rule_match(&str[ret+1], OPEN+UL+LI))) {
 				++ret;
 				break;
 			}
diff --git a/txt2html.h b/txt2html.h
@@ -0,0 +1,36 @@
+#ifndef _TXT2HTML
+#define _TXT2HTML
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+#include <ctype.h> // replace with utf8 support
+#include <assert.h>
+
+typedef uint8_t NodeType;
+
+// node tags
+enum NodeTypes {
+	OPEN  = 0x10,
+	CLOSE = 0x20,
+	H1    = 0x01,
+	H2    = 0x02,
+	P     = 0x03,
+	PRE   = 0x04,
+	LI    = 0x05,
+	BR    = 0x06,
+	OL    = 0x07,
+	UL    = 0x08
+};
+
+/* rule.c */
+// check if `str` matches the rule for `NodeType t`
+bool rule_match(const char *str, NodeType t);
+
+// get the length of a rule for `NodeType t`
+size_t  rule_len(NodeType t);
+
+#endif

	txt2html Converts plaintext to HTML
	git clone git://src.gearsix.net/txt2html	txt2html.zip
	Log \| Files \| Refs \| Atom \| README

A	rule.c	\|	43	+++++++++++++++++++++++++++++++++++++++++++
M	txt2html.c	\|	70	+++++++++++++++++++++-------------------------------------------------
A	txt2html.h	\|	36	++++++++++++++++++++++++++++++++++++