txt2html

Converts plaintext to HTML
git clone git://src.gearsix.net/txt2html
Log | Files | Refs | Atom | README

commit d689bdd2d88dc5372d89196240f8a4153a76e8cd
parent b76435c83bed126c9bab3767692545004659f469
Author: gearsix <gearsix@tuta.io>
Date:   Mon, 15 Nov 2021 15:25:03 +0000

fixed all the issues that started from the split refactor

Diffstat:
Mnode.c | 53+++++++++++++++++++++++------------------------------
Mparse.c | 73+++++++++++++++++++++++++++++++++++++++++++------------------------------
Mrule.c | 6+++---
Mtxt2html.c | 65+++++++++++++++++++++++++++++++++--------------------------------
Mtxt2html.h | 14+++++++-------
5 files changed, 109 insertions(+), 102 deletions(-)

diff --git a/node.c b/node.c @@ -4,8 +4,6 @@ struct node *node_create(struct node *prev, NodeType t) { struct node *n = calloc(1, sizeof(struct node)); - n->next = 0; - if (prev) { n->prev = prev; prev->next = n; @@ -16,22 +14,26 @@ struct node *node_create(struct node *prev, NodeType t) if (t == OPEN+BR+CLOSE) { n->buf = "<br/>\n\0"; } else if (t & OPEN) { - if (t & H1) n->buf = "<h1>\0"; - else if (t & H2) n->buf = "<h2>\0"; - else if (t & PRE) n->buf = "<pre>\0"; - else if (t & P) n->buf = "<p>\0"; - else if (t & LI) n->buf = "<li>\0"; // must come before OL/UL - else if (t & OL) n->buf = "<ol>\0"; - else if (t & UL) n->buf = "<ul>\0"; + t &= 0x0F; + if (t == H1) n->buf = "<h1>\0"; + else if (t == H2) n->buf = "<h2>\0"; + else if (t == PRE) n->buf = "<pre>\0"; + else if (t == P) n->buf = "<p>\0"; + else if (t == OL) n->buf = "<ol>\n\0"; + else if (t == UL) n->buf = "<ul>\n\0"; + else if (t == OL+LI || t == UL+LI) + n->buf = "<li>\0"; } else if (t & CLOSE) { - node_writec(prev, EOF); - if (t & H1) n->buf = "</h1>\n\0"; - else if (t & H2) n->buf = "</h2>\n\0"; - else if (t & PRE) n->buf = "</pre>\n\0"; - else if (t & P) n->buf = "</p>\n\0"; - else if (t & LI) n->buf = "</li>\n\0"; // must come before OL/UL - else if (t & OL) n->buf = "</ol>\n\0"; - else if (t & UL) n->buf = "</ul>\n\0"; + t &= 0x0F; + node_writec(&prev, EOF); + if (t == H1) n->buf = "</h1>\n\0"; + else if (t == H2) n->buf = "</h2>\n\0"; + else if (t == PRE) n->buf = "</pre>\n\0"; + else if (t == P) n->buf = "</p>\n\0"; + else if (t == OL) n->buf = "</ol>\n\0"; + else if (t == UL) n->buf = "</ul>\n\0"; + else if (t == UL+LI || t == OL+LI) + n->buf = "</li>\n\0"; } return n; @@ -40,28 +42,21 @@ struct node *node_create(struct node *prev, NodeType t) // writebuf has an internal static buffer (`buf`) that it writes `c` to. // if `c == EOF` or `buf` reaches `BUFSIZ`, then `buf` it's written to n->buf. // `n->buf` will only be allocated required memory. -void node_writec(struct node *n, int c) +void node_writec(struct node **n, int c) { assert(n); - static struct node *last_n; - static int pg = 0; static int len = 0; static char buf[BUFSIZ+1]; - if ((last_n && last_n != n) || len+2 == BUFSIZ || (c == EOF && len > 0)) { + if (len+2 == BUFSIZ || (c == EOF && len > 0)) { if (c == EOF) { buf[len++] = '\0'; buf[len++] = '$'; // signal malloc not assigned const } - if (last_n != n) { - struct node *tmp = last_n; - last_n = n; - n = tmp; - } - n->buf = (pg == 0) ? malloc(len) : realloc(n->buf, strlen(n->buf) + len); - memmove(n->buf, buf, len); + (*n)->buf = (pg == 0) ? malloc(len) : realloc((*n)->buf, strlen((*n)->buf) + len); + memmove((*n)->buf, buf, len); ++pg; len = 0; memset(buf, '\0', BUFSIZ); @@ -80,8 +75,6 @@ void node_writec(struct node *n, int c) len += 1; break; } - - last_n = n; } size_t node_next(const char *str, struct node **n) diff --git a/parse.c b/parse.c @@ -1,54 +1,64 @@ #include "txt2html.h" -struct node *parse_buf(const char *buf, struct node **queue, uint8_t opts) +struct node *parse_buf(const char *buf, struct node **n, uint8_t opts) { - struct node *n = *queue; size_t i = 0; size_t len = (buf) ? strlen(buf) : 0; - if (!buf && queue) - return node_create(*queue, CLOSE+n->type); + if (!buf && (*n)) { + *n = node_create(*n, CLOSE+(*n)->type); + i = len; + } while (i < len && buf) { while (buf[i] == '\n') ++i; - if (!n) i += node_next(&buf[i], &n); - switch (n->type) { + if (!(*n) || ((*n)->type & CLOSE)) + i += node_next(&buf[i], n); + switch ((*n)->type) { case H1: - case H2: i += parse_heading(&buf[i], n); break; + case H2: + i += parse_heading(&buf[i], n); + break; case P: i += parse_p(&buf[i], n, opts); break; case OL+LI: i += parse_oli(&buf[i], n, opts); break; case UL+LI: i += parse_uli(&buf[i], n, opts); break; case PRE: i += parse_textblock(&buf[i], n, opts & OPT_BR); + *n = node_create(*n, CLOSE+PRE); + break; + default: + i += node_next(&buf[i], n); break; - default: i += node_next(&buf[i], &n); break; } } - return n; + + return *n; } -size_t parse_textblock(const char *str, struct node *n, bool softbreaks) +size_t parse_textblock(const char *str, struct node **n, bool softbreaks) { size_t ret = 0; - while (str[ret] != '\0' && (isprint(str[ret]) || str[ret] == '\n')) { + while (str[ret] != '\0' && (isprint(str[ret]) || str[ret] == '\n' || str[ret] == '\t')) { if (str[ret] == '\n' && str[ret+1] == '\n') break; + else if ((*n)->type == PRE && str[ret] == '\t') + ++ret; else if (str[ret] == '\n') { - if (((n)->type & (OL+LI) && rule_match(&str[ret+1], OPEN+OL+LI)) || - ((n)->type & (UL+LI) && rule_match(&str[ret+1], OPEN+UL+LI))) { + if (((*n)->type & (OL+LI) && rule_match(&str[ret+1], OPEN+OL+LI)) || + ((*n)->type & (UL+LI) && rule_match(&str[ret+1], OPEN+UL+LI))) { ++ret; break; } - if (n->type == PRE && str[ret+1] == '\t') { + if ((*n)->type == PRE && str[ret+1] == '\t') { node_writec(n, '\n'); ++ret; - } else if (n->type == PRE && str[ret+1] != '\t') { + } else if ((*n)->type == PRE && str[ret+1] != '\t') { break; } else if (softbreaks) { - n = node_create(n, OPEN+BR+CLOSE); - n = node_create(n, (n)->prev->type); + *n = node_create(*n, OPEN+BR+CLOSE); + *n = node_create(*n, (*n)->prev->type); } else { node_writec(n, str[ret]); } @@ -57,66 +67,69 @@ size_t parse_textblock(const char *str, struct node *n, bool softbreaks) } ++ret; } + node_writec(n, EOF); return ret; } -size_t parse_heading(const char *str, struct node *n) +size_t parse_heading(const char *str, struct node **n) { assert(str); size_t i = 0; while(str[i] && str[i] != '\n') node_writec(n, str[i++]); + node_writec(n, EOF); do { ++i; } while (str[i] == '-' || str[i] == '='); + *n = node_create(*n, CLOSE+(*n)->type); return i; } -size_t parse_oli(const char *str, struct node *n, uint8_t opts) +size_t parse_oli(const char *str, struct node **n, uint8_t opts) { assert(str); size_t i = 0, len = strlen(str); while(i < len) { i += parse_textblock(&str[i], n, opts & OPT_BR); - n = node_create(n, CLOSE+OL+LI); + *n = node_create(*n, CLOSE+OL+LI); if (str[i] == '\0' || rule_match(&str[i], CLOSE+OL)) { i += rule_len(CLOSE+OL); - n = node_create(n, CLOSE+OL); + *n = node_create(*n, CLOSE+OL); break; } else if (rule_match(&str[i], OPEN+OL+LI)) { i += rule_len(OPEN+OL+LI); - n = node_create(n, OPEN+OL+LI); - n = node_create(n, OL+LI); + *n = node_create(*n, OPEN+OL+LI); + *n = node_create(*n, OL+LI); } } return i; } -size_t parse_p(const char *str, struct node *n, uint8_t opts) +size_t parse_p(const char *str, struct node **n, uint8_t opts) { size_t i = parse_textblock(str, n, opts & OPT_BR); if (str[i] == '\n' && str[i+1] == '\n') - n = node_create(n, CLOSE+P); + *n = node_create(*n, CLOSE+P); return i; } -size_t parse_uli(const char *str, struct node *n, uint8_t opts) +size_t parse_uli(const char *str, struct node **n, uint8_t opts) { size_t ret = 0; size_t len = strlen(str); while (ret < len) { ret += parse_textblock(&str[ret], n, opts & OPT_BR); - n = node_create(n, CLOSE+UL+LI); + *n = node_create(*n, CLOSE+UL+LI); if (str[ret] == '\0' || rule_match(&str[ret], CLOSE+UL)) { ret += rule_len(CLOSE+UL); - n = node_create(n, CLOSE+UL); + *n = node_create(*n, CLOSE+UL); break; } else if (rule_match(&str[ret], OPEN+UL+LI)) { ret += rule_len(OPEN+UL+LI); - n = node_create(n, OPEN+UL+LI); - n = node_create(n, UL+LI); + *n = node_create(*n, OPEN+UL+LI); + *n = node_create(*n, UL+LI); } else break; } diff --git a/rule.c b/rule.c @@ -23,7 +23,7 @@ bool rule_match(const char *str, NodeType type) if ((type & CLOSE) && strlen(str) >= 2) return (str[0] == '\n' && str[1] == '\n'); - bool match; + bool match = false; switch (type) { case H1: match = (rule_match_heading(str) == H1); @@ -57,9 +57,9 @@ NodeType rule_match_heading(const char *str) NodeType heading = 0; while (*str && *str++ != '\n'); // skip line if (strlen(str) >= 3) { - if (*str == '=' && *str+1 == '=' && *str+2 == '=') + if (*str == '=' && *(str+1) == '=' && *(str+2) == '=') heading = H1; - else if (*str == '-' && *str+1 == '-' && *str+2 == '-') + else if (*str == '-' && *(str+1) == '-' && *(str+2) == '-') heading = H2; } return heading; diff --git a/txt2html.c b/txt2html.c @@ -1,9 +1,9 @@ #include "txt2html.h" -struct node *parsef(FILE *f); -int readq(struct node *queue); +struct node *convf(FILE *f); +int readn(struct node *n); -uint8_t opts; +static uint8_t opts; // make extern if passing it about becomes a pain void help() { @@ -27,6 +27,7 @@ void verbose(const char *fmt, ...) va_start(args, fmt); vprintf(fmt, args); va_end(args); + fflush(stdout); } } @@ -61,7 +62,7 @@ int main(int argc, char **argv) int a; FILE *f; - struct node *queue; + struct node *n; for (a = 1; a < argc; ++a) { if (strlen(argv[a]) == 0) continue; @@ -72,19 +73,19 @@ int main(int argc, char **argv) continue; } - queue = parsef(f); - verbose("counted %d nodes\n", readq(queue)); + n = convf(f); + verbose("counted %d nodes\n", readn(n)); verbose("closing %s\n", argv[a]); if (fclose(f) == EOF) perror("fclose failed"); - while (!queue) { - if (queue->buf && queue->buf[strlen(queue->buf)+1] == '$') - free(queue->buf); - if (queue->next) free(queue->next); - if (queue->prev) { - queue = queue->prev; + while (!n) { + if (n->buf && n->buf[strlen(n->buf)+1] == '$') + free(n->buf); + if (n->next) free(n->next); + if (n->prev) { + n = n->prev; } else { - free(queue); + free(n); break; } } @@ -94,30 +95,30 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } -struct node *parsef(FILE *f) +struct node *convf(FILE *f) { - int n; - struct node *queue = 0; - do { - verbose("reading block...\r"); - char buf[BUFSIZ] = {'\0'}; - n = fread(buf, BUFSIZ-1, sizeof(char), f); - queue = parse_buf(buf, &queue, opts); - verbose(" \r"); - } while (n > 0); - parse_buf(NULL, &queue, opts); - return queue; + int siz; + struct node *n = 0; + char buf[BUFSIZ] = {'\0'}; + while (true) { + siz = fread(buf, sizeof(char), BUFSIZ-1, f); + if (siz == 0) break; + buf[siz+1] = '\0'; + verbose("read %d bytes\n", siz); + n = parse_buf(buf, &n, opts); + } + n = parse_buf(NULL, &n, opts); + return n; } -int readq(struct node *q) +int readn(struct node *n) { - while (q->prev) - q = q->prev; // rewind + while (n->prev) + n = n->prev; // rewind int cnt = 0; - while (q) { - if (q->buf != NULL) - printf("%s", q->buf); - q = q->next; + while (n) { + if (n->buf) printf("%s", n->buf); + n = n->next; ++cnt; } return cnt; diff --git a/txt2html.h b/txt2html.h @@ -60,7 +60,7 @@ struct node *node_create(struct node *prev, NodeType t); // If `c == EOF` or `buf` reaches `BUFSIZ` or `n` does not match // `n` from the previous call, then `buf` is written to the previous // `n` and reset for a new set of data. -void node_writec(struct node *n, int c); +void node_writec(struct node **n, int c); // rule `str` against a set of rules and determine the next node type. // `n` will be updated to a newly created node of the determined type. @@ -70,31 +70,31 @@ size_t node_next(const char *str, struct node **n); parse.c ---------*/ // main parsing function -struct node *parse_buf(const char *buf, struct node **out, uint8_t opts); +struct node *parse_buf(const char *buf, struct node **n, uint8_t opts); // parse `str` into `n` until *\0* or *\n\n* is found. // If `opts & OPT_BR` then `\n` will be parsed as a `<br/>` node. // If `n->type` is *PRE*, then parsing will also stop after the first // `\n` that is not followed by a `\t`. // The number of parsed bytes is returned -size_t parse_textblock(const char *str, struct node *n, bool softbreaks); +size_t parse_textblock(const char *str, struct node **n, bool softbreaks); // parse a line of text from `str` into `n` and skip the line after // aslong as it contains *=* or *-*. // The number of parsed bytes is returned. -size_t parse_heading(const char *str, struct node *n); +size_t parse_heading(const char *str, struct node **n); // parse `str` into `n` for *OL+LI* until *CLOSE+OL*. // The number of parsed bytes is returned. -size_t parse_oli(const char *str, struct node *n, uint8_t opts); +size_t parse_oli(const char *str, struct node **n, uint8_t opts); // parse `str` into`n` until *\0* or *\n\n*. After this, assign // a new node to `n` of CLOSE+P. // The number of parsed bytes is returned. -size_t parse_p(const char *str, struct node *n, uint8_t opts); +size_t parse_p(const char *str, struct node **n, uint8_t opts); // parse `str` into `n` for *UL+LI* until *CLOSE+UL*. // The number of parsed bytes is returned. -size_t parse_uli(const char *str, struct node *n, uint8_t opts); +size_t parse_uli(const char *str, struct node **n, uint8_t opts); #endif