pagr

A 'static site generator', built using dati.
Log | Files | Refs | Atom

content.go (7161B)


      1 package main
      2 
      3 import (
      4 	"bufio"
      5 	"bytes"
      6 	"fmt"
      7 	"io"
      8 	"io/ioutil"
      9 	"mime"
     10 	"os"
     11 	"os/exec"
     12 	"path/filepath"
     13 	"sort"
     14 	"strings"
     15 	"time"
     16 
     17 	"github.com/yuin/goldmark"
     18 	goldmarkext "github.com/yuin/goldmark/extension"
     19 	goldmarkparse "github.com/yuin/goldmark/parser"
     20 	goldmarkhtml "github.com/yuin/goldmark/renderer/html"
     21 	"notabug.org/gearsix/suti"
     22 )
     23 
     24 // Content is the converted HTML string of a Content file
     25 type Content string
     26 
     27 var contentExts = [4]string{
     28 	"",      // pre-formatted text
     29 	".txt",  // plain-text
     30 	".html", // HTML
     31 	".md",   // commonmark + extensions (linkify, auto-heading id, unsafe HTML)
     32 }
     33 
     34 func isContentExt(ext string) int {
     35 	for i, supported := range contentExts {
     36 		if ext == supported {
     37 			return i
     38 		}
     39 	}
     40 	return -1
     41 }
     42 
     43 // FIX kills performance on windows
     44 func gitModTime(fpath string) (mod time.Time, err error) {
     45 	if gitBin == "" {
     46 		err = fmt.Errorf("git binary not found")
     47 		return
     48 	}
     49 
     50 	if fpath, err = filepath.Abs(fpath); err != nil {
     51 		return
     52 	}
     53 
     54 	git := exec.Command(gitBin, "-C", filepath.Dir(fpath), "log", "-1", "--format='%ad'", "--", fpath)
     55 	var out []byte
     56 	if out, err = git.Output(); err == nil {
     57 		outstr := strings.ReplaceAll(string(out), "'", "")
     58 		outstr = strings.TrimSuffix(outstr, "\n")
     59 		mod, err = time.Parse("Mon Jan 2 15:04:05 2006 -0700", outstr)
     60 	}
     61 	return
     62 }
     63 
     64 func lastPageMod(fpath string) (t time.Time) {
     65 	if fd, err := os.Stat(fpath); err != nil {
     66 		if t, err = gitModTime(fpath); err != nil {
     67 			t = time.Now()
     68 		}
     69 	} else {
     70 		if t, err = gitModTime(fpath); err != nil {
     71 			t = fd.ModTime()
     72 		}
     73 
     74 		if fd.IsDir() { // find last modified file in directory (depth 1)
     75 			var dir []os.FileInfo
     76 			if dir, err = ioutil.ReadDir(fpath); err == nil {
     77 				for i, f := range dir {
     78 					if f.IsDir() {
     79 						continue
     80 					}
     81 
     82 					var ft time.Time
     83 					if ft, err = gitModTime(filepath.Join(fpath, f.Name())); err != nil {
     84 						ft = fd.ModTime()
     85 					}
     86 
     87 					if i == 0 || ft.After(t) {
     88 						t = ft
     89 					}
     90 				}
     91 			}
     92 		}
     93 	}
     94 	return
     95 }
     96 
     97 // LoadContentsDir parses all files/directories in `dir` into a `Content`.
     98 // For each directory, a new `Page` element will be generated, any file with a
     99 // filetype found in `contentExts`, will be parsed into a string of HTML
    100 // and appended to the `.Content` of the `Page` generated for it's parent
    101 // directory.
    102 func LoadContentDir(dir string) (p []Page, e error) {
    103 	if _, e = os.Stat(dir); e != nil {
    104 		return
    105 	}
    106 	dir = filepath.Clean(dir)
    107 
    108 	pages := make(map[string]Page)
    109 	dmeta := make(map[string]Meta)
    110 
    111 	e = filepath.Walk(dir, func(fpath string, info os.FileInfo, err error) error {
    112 		if err != nil || ignoreFile(fpath) {
    113 			return err
    114 		}
    115 
    116 		if info.IsDir() {
    117 			path := pagePath(dir, fpath)
    118 			pages[path] = NewPage(path, lastPageMod(fpath))
    119 		} else {
    120 			path := pagePath(dir, filepath.Dir(fpath))
    121 			pages[path], dmeta, err = loadContentFile(pages[path], dmeta, fpath, path)
    122 		}
    123 		return err
    124 	})
    125 
    126 	for _, page := range pages {
    127 		page.applyDefaults(dmeta)
    128 		p = append(p, page)
    129 	}
    130 
    131 	sort.SliceStable(p, func(i, j int) bool {
    132 		if it, err := time.Parse(timefmt, p[i].Updated); err == nil {
    133 			if jt, err := time.Parse(timefmt, p[j].Updated); err == nil {
    134 				return it.After(jt)
    135 			}
    136 		}
    137 		return false
    138 	})
    139 
    140 	p = BuildSitemap(p)
    141 
    142 	return
    143 }
    144 
    145 func loadContentFile(p Page, def map[string]Meta, fpath string, ppath string) (Page, map[string]Meta, error) {
    146 	var err error
    147 	fname := strings.TrimSuffix(filepath.Base(fpath), filepath.Ext(fpath))
    148 
    149 	if suti.IsSupportedDataLang(filepath.Ext(fpath)) != -1 &&
    150 		(fname == "defaults" || fname == "meta") {
    151 		var m Meta
    152 		if err = suti.LoadDataFilepath(fpath, &m); err == nil {
    153 			if fname == "defaults" || fname == "default" {
    154 				if meta, ok := def[ppath]; ok {
    155 					m.MergeMeta(meta, false)
    156 				}
    157 				def[ppath] = m
    158 			} else if fname == "meta" {
    159 				p.Meta.MergeMeta(m, true)
    160 			}
    161 		}
    162 	} else if isContentExt(filepath.Ext(fpath)) != -1 {
    163 		err = p.NewContentFromFile(fpath)
    164 	} else {
    165 		a := filepath.Join(ppath, filepath.Base(fpath))
    166 		p.Assets.All = append(p.Assets.All, a)
    167 		ref := &p.Assets.All[len(p.Assets.All)-1]
    168 		mimetype := mime.TypeByExtension(filepath.Ext(fpath))
    169 		if strings.Contains(mimetype, "image") {
    170 			p.Assets.Image = append(p.Assets.Image, ref)
    171 		} else if strings.Contains(mimetype, "video") {
    172 			p.Assets.Video = append(p.Assets.Video, ref)
    173 		} else if strings.Contains(mimetype, "audio") {
    174 			p.Assets.Audio = append(p.Assets.Audio, ref)
    175 		} else {
    176 			p.Assets.Misc = append(p.Assets.Misc, ref)
    177 		}
    178 	}
    179 	return p, def, err
    180 }
    181 
    182 // NewContentFromFile loads the file from `fpath` and converts it to HTML
    183 // from the language matching it's file extension (see below).
    184 // - ".txt" = plain-text
    185 // - ".md", ".gfm", ".cm" = various flavours of markdown
    186 // - ".html" = parsed as-is
    187 // Successful conversions are appended to `p.Contents`
    188 func NewContentFromFile(fpath string) (c Content, err error) {
    189 	var buf []byte
    190 	if f, err := os.Open(fpath); err == nil {
    191 		buf, err = ioutil.ReadAll(f)
    192 		f.Close()
    193 	}
    194 	if err != nil {
    195 		return
    196 	}
    197 
    198 	var body string
    199 	for _, lang := range contentExts {
    200 		if filepath.Ext(fpath) == lang {
    201 			switch lang {
    202 			case "":
    203 				body = "<pre>" + string(buf) + "</pre>"
    204 			case ".txt":
    205 				body = convertTextToHTML(bytes.NewReader(buf))
    206 			case ".md":
    207 				body, err = convertMarkdownToHTML(buf)
    208 			case ".html":
    209 				body = string(buf)
    210 			default:
    211 				break
    212 			}
    213 		}
    214 	}
    215 	if len(body) == 0 {
    216 		err = fmt.Errorf("invalid filetype (%s) passed to NewContentFromFile",
    217 			filepath.Ext(fpath))
    218 	}
    219 	c = Content(body)
    220 	return
    221 }
    222 
    223 // convertTextToHTML parses textual data from `in` and line-by-line converts
    224 // it to HTML. Conversion rules are as follows:
    225 // - Blank lines (with escape characters trimmed) will close any opon tags
    226 // - If a text line is prefixed with a tab and no tag is open, it will open a <pre> tag
    227 // - Otherwise any line of text will open a <p> tag
    228 func convertTextToHTML(in io.Reader) (html string) {
    229 	var tag int
    230 	const p = 1
    231 	const pre = 2
    232 
    233 	fscan := bufio.NewScanner(in)
    234 	for fscan.Scan() {
    235 		line := fscan.Text()
    236 		if len(strings.TrimSpace(line)) == 0 {
    237 			switch tag {
    238 			case p:
    239 				html += "</p>\n"
    240 			case pre:
    241 				html += "</pre>\n"
    242 			}
    243 			tag = 0
    244 		} else if tag == 0 && line[0] == '\t' {
    245 			tag = pre
    246 			html += "<pre>" + line[1:] + "\n"
    247 		} else if tag == 0 || (tag == pre && line[0] != '\t') {
    248 			if tag == pre {
    249 				html += "</pre>\n"
    250 			}
    251 			tag = p
    252 			html += "<p>" + line
    253 		} else if tag == p {
    254 			html += " " + line
    255 		} else if tag == pre {
    256 			html += line[1:] + "\n"
    257 		}
    258 	}
    259 	if tag == p {
    260 		html += "</p>"
    261 	} else if tag == pre {
    262 		html += "</pre>"
    263 	}
    264 
    265 	return html
    266 }
    267 
    268 // convertMarkdownToHTML initialises a `goldmark.Markdown` based on `lang` and
    269 // returns values from calling it's `Convert` function on `in`.
    270 // ".md" (and anything else) = commonmark + extensions (linkify, auto-heading id, unsafe HTML)
    271 func convertMarkdownToHTML(buf []byte) (md string, err error) {
    272 	markdown := goldmark.New(
    273 		goldmark.WithExtensions(goldmarkext.Linkify),
    274 		goldmark.WithParserOptions(goldmarkparse.WithAutoHeadingID()),
    275 		goldmark.WithRendererOptions(goldmarkhtml.WithUnsafe()),
    276 	)
    277 	var out bytes.Buffer
    278 	err = markdown.Convert(buf, &out)
    279 	return out.String(), err
    280 }