txt2html

Converts plaintext to HTML
git clone git://src.gearsix.net/txt2html
Log | Files | Refs | Atom | README

commit 6639d97ad7cd6dcc547717c7cb9ff5875e3ae898
parent fbbe7cc381afbc36143dd10737b68e1339d9deba
Author: gearsix <gearsix@tuta.io>
Date:   Tue,  2 Nov 2021 16:34:59 +0000

started splitting txt2html.c up; added rule.c, txt2html.h.

Should've done this a while ago but I was being stubborn (thinking it
would be easier in a single file).

txt2html.h has a NodeTypes enum with the previously #define'd values.

I've removed the #define RULE_ because it was ugly and a little
dangerous, doing it in functions allows me to make sure str is long
enough before running the checks. It also makes the code cleaner since
everything is a combination of NodeTypes.

Diffstat:
Arule.c | 43+++++++++++++++++++++++++++++++++++++++++++
Mtxt2html.c | 70+++++++++++++++++++++-------------------------------------------------
Atxt2html.h | 36++++++++++++++++++++++++++++++++++++
3 files changed, 100 insertions(+), 49 deletions(-)

diff --git a/rule.c b/rule.c @@ -0,0 +1,43 @@ +#include "txt2html.h" + +bool rule_match(const char *str, NodeType type) +{ + if ((type & CLOSE) && strlen(str) >= 2) + return (str[0] == '\n' && str[1] == '\n'); + + bool match; + switch (type) { + case OPEN+OL+LI: + if (strlen(str) >= rule_len(OPEN+OL+LI)) + match = (isalnum(str[0]) && str[1] == '.' && str[2] == ' '); + break; + case OPEN+UL+LI: + if (strlen(str) >= rule_len(OPEN+UL+LI)) + match = ((str[0] == '-' || str[0] == '*') && str[1] == ' '); + break; + case OPEN+PRE: + // +1 to peek and make sure next char is print + if (strlen(str) >= rule_len(OPEN+PRE)+1) + match = (str[0] == '\t' && isprint(str[1])); + break; + default: + match = false; + } + return match; +} + +size_t rule_len(NodeType type) +{ + if (type & CLOSE) return 2; + + int len = 0; + switch (type) { + case OPEN+OL+LI: + return 3; + case OPEN+UL+LI: + return 2; + case OPEN+PRE: + return 1; + } + return len; +} diff --git a/txt2html.c b/txt2html.c @@ -1,10 +1,4 @@ -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include <stdint.h> -#include <string.h> -#include <ctype.h> // replace with utf8 support -#include <assert.h> +#include "txt2html.h" #define MEMLIMIT 100000000 @@ -12,30 +6,6 @@ #define OPT_NM 0x20 // no memory limit #define OPT_BR 0x01 // newlines as <br/> nodes within <p> (not ' ') -// node tags -#define OPEN 0x10 -#define CLOSE 0x20 -#define H1 0x01 -#define H2 0x02 -#define P 0x03 -#define PRE 0x04 -#define LI 0x05 -#define BR 0x06 -#define OL 0x07 -#define UL 0x08 - -// rules for detecting tags -#define RULE_CLOSE_OL(str) (*str == '\n' && (*(str+1) == '\n' || *(str+1) == '\0')) -#define LEN_CLOSE_OL 2 -#define RULE_OPEN_OLI(str) (isalnum(*str) && *(str+1) == '.' && *(str+2) == ' ') -#define LEN_OPEN_OLI 3 -#define RULE_CLOSE_UL(str) (*str == '\n' && (*(str+1) == '\n' || *(str+1) == '\0')) -#define LEN_CLOSE_UL 2 -#define RULE_OPEN_ULI(str) ((*str == '-' || *str == '*') && *(str+1) == ' ') -#define LEN_OPEN_ULI 2 -#define RULE_OPEN_PRE(str) (*str == '\t' && isprint(*(str+1))) -#define LEN_OPEN_PRE 1 - struct node { struct node *prev, *next; uint8_t type; @@ -256,18 +226,18 @@ struct node *newnode(struct node *prev, const int type) size_t nextnode(const char *str, struct node **n) { size_t ret = 0; - if (RULE_OPEN_OLI(&str[ret])) { - ret += LEN_OPEN_OLI; + if (rule_match(&str[ret], OPEN+OL+LI)) { + ret += rule_len(OPEN+OL+LI); *n = newnode(*n, OPEN+OL); *n = newnode(*n, OPEN+OL+LI); *n = newnode(*n, OL+LI); - } else if (RULE_OPEN_ULI(&str[ret])) { - ret += LEN_OPEN_ULI; + } else if (rule_match(&str[ret], OPEN+UL+LI)) { + ret += rule_len(OPEN+UL+LI); *n = newnode(*n, OPEN+UL); *n = newnode(*n, OPEN+UL+LI); *n = newnode(*n, UL+LI); - } else if (RULE_OPEN_PRE(&str[ret])) { - ret += LEN_OPEN_PRE; + } else if (rule_match(&str[ret], OPEN+PRE)) { + ret += rule_len(OPEN+PRE); *n = newnode(*n, OPEN+PRE); *n = newnode(*n, PRE); } else if (isprint(str[ret])) { @@ -292,7 +262,8 @@ size_t nextnode(const char *str, struct node **n) size_t parseh(const char *str, struct node **n) { size_t ret = 0; - while(str[ret] != '\n' && str[ret] != '\0') writebuf(*n, str[ret++]); + while(str[ret] != '\n' && str[ret] != '\0') + writebuf(*n, str[ret++]); do { ++ret; } while (str[ret] == '-' || str[ret] == '='); *n = newnode(*n, CLOSE+(*n)->type); return ret; @@ -301,7 +272,8 @@ size_t parseh(const char *str, struct node **n) size_t parsep(const char *str, struct node **n) { size_t i = parsetxt(str, n); - if (str[i] == '\n' && str[i+1] == '\n') *n = newnode(*n, CLOSE+P); + if (str[i] == '\n' && str[i+1] == '\n') + *n = newnode(*n, CLOSE+P); return i; } @@ -314,12 +286,12 @@ size_t parseoli(const char *str, struct node **n) ret += parsetxt(&str[ret], n); *n = newnode(*n, CLOSE+OL+LI); - if (str[ret] == '\0' || RULE_CLOSE_OL(&str[ret])) { - ret += LEN_CLOSE_OL; + if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+OL)) { + ret += rule_len(CLOSE+OL); *n = newnode(*n, CLOSE+OL); break; - } else if (RULE_OPEN_OLI(&str[ret])) { - ret += LEN_OPEN_OLI; + } else if (rule_match(&str[ret], OPEN+OL+LI)) { + ret += rule_len(OPEN+OL+LI); *n = newnode(*n, OPEN+OL+LI); *n = newnode(*n, OL+LI); } @@ -337,12 +309,12 @@ size_t parseuli(const char *str, struct node **n) ret += parsetxt(&str[ret], n); *n = newnode(*n, CLOSE+UL+LI); - if (str[ret] == '\0' || RULE_CLOSE_UL(&str[ret])) { - ret += LEN_CLOSE_UL; + if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+UL)) { + ret += rule_len(CLOSE+UL); *n = newnode(*n, CLOSE+UL); break; - } else if (RULE_OPEN_ULI(&str[ret])) { - ret += LEN_OPEN_ULI; + } else if (rule_match(&str[ret], OPEN+UL+LI)) { + ret += rule_len(OPEN+UL+LI); *n = newnode(*n, OPEN+UL+LI); *n = newnode(*n, UL+LI); } else break; @@ -359,8 +331,8 @@ size_t parsetxt(const char *str, struct node **n) if (str[ret] == '\n' && str[ret+1] == '\n') break; else if (str[ret] == '\n') { - if (((*n)->type & OL+LI && RULE_OPEN_OLI(&str[ret+1])) || - ((*n)->type & UL+LI && RULE_OPEN_ULI(&str[ret+1]))) { + if (((*n)->type & OL+LI && rule_match(&str[ret+1], OPEN+OL+LI)) || + ((*n)->type & UL+LI && rule_match(&str[ret+1], OPEN+UL+LI))) { ++ret; break; } diff --git a/txt2html.h b/txt2html.h @@ -0,0 +1,36 @@ +#ifndef _TXT2HTML +#define _TXT2HTML + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdint.h> +#include <string.h> +#include <stdbool.h> +#include <ctype.h> // replace with utf8 support +#include <assert.h> + +typedef uint8_t NodeType; + +// node tags +enum NodeTypes { + OPEN = 0x10, + CLOSE = 0x20, + H1 = 0x01, + H2 = 0x02, + P = 0x03, + PRE = 0x04, + LI = 0x05, + BR = 0x06, + OL = 0x07, + UL = 0x08 +}; + +/* rule.c */ +// check if `str` matches the rule for `NodeType t` +bool rule_match(const char *str, NodeType t); + +// get the length of a rule for `NodeType t` +size_t rule_len(NodeType t); + +#endif