commit ace0f818d172c81e23783b9c7e571464dcd0f604
parent 6446070da557bf8b56fa44b2bbdc4690edf490a9
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Wed, 22 Aug 2018 16:29:20 +0200
sfeed_tail: improvements
- sfeed_tail only reads from files now, not from stdin anymore. This had too many
caveats.
- Instead of a timer it now detects changes faster and is more efficient using
memory.
- Improve documentation of its behaviour.
Diffstat:
M | sfeed_tail.1 | | | 35 | ++++++++++++++--------------------- |
M | sfeed_tail.c | | | 62 | +++++++++++++++++++++++++++++++------------------------------- |
2 files changed, 45 insertions(+), 52 deletions(-)
diff --git a/sfeed_tail.1 b/sfeed_tail.1
@@ -1,4 +1,4 @@
-.Dd January 29, 2016
+.Dd August 22, 2018
.Dt SFEED_TAIL 1
.Os
.Sh NAME
@@ -6,40 +6,33 @@
.Nd format unseen feed data to a plain-text list
.Sh SYNOPSIS
.Nm
-.Op Ar file...
+.Ar file...
.Sh DESCRIPTION
.Nm
-formats unseen feed data (TSV) from
-.Xr sfeed 1
-from stdin or
+formats only new and unseen feed data (TSV) from
+one or more
.Ar file
to stdout as a plain-text list.
-If one or more
-.Ar file
-are specified, the basename of the
+The basename of the
.Ar file
is used as the feed name in the output.
-If no
-.Ar file
-parameters are specified and so the data is read from stdin the feed name
-is empty.
-.Pp
-.Nm
-will mark the initial items on the first run as seen.
-Then it will print the initial items if data is read from stdin, if the data
-is read on the first run by specifying the argument(s)
-.Ar file
-it will not show the initial seen items.
.Pp
Unseen items are printed per line in a similar format to
.Xr sfeed_plain 1 ,
duplicate items are ignored.
-The list of unique items is determined by the fields: feedname, item id and
+The list of unique items is determined by the fields: item id, item link and
UNIX timestamp of the item date.
+.Pp
+.Nm
+will also only process and show items that are considered new: the item
+timestamp is not older than a day ago.
.Sh IMPLEMENTATION NOTES
+.Nm
+checks for file modifications each second by checking the filesize and
+modification time.
Keep in mind that because
.Nm
-keeps a list of all the items it can consume much memory.
+keeps a list of items it can potentially consume much memory.
.Sh SEE ALSO
.Xr sfeed 1 ,
.Xr sfeed_plain 1 ,
diff --git a/sfeed_tail.c b/sfeed_tail.c
@@ -1,3 +1,4 @@
+#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
@@ -11,11 +12,10 @@
#include "tree.h"
#include "util.h"
-static int firsttime;
-static int sleepsecs;
static char *line;
static size_t linesize;
-time_t comparetime;
+static int changed;
+static time_t comparetime;
struct line {
char *id;
@@ -47,8 +47,6 @@ gc(void)
RB_FOREACH_SAFE(line, linetree, &head, tmp) {
if (line->timestamp < comparetime) {
-/* printf("DEBUG: gc: removing: %s %s\n",
- line->id, line->title);*/
free(line->id);
free(line->link);
free(line->title);
@@ -90,8 +88,7 @@ printfeed(FILE *fp, const char *feedname)
if (RB_FIND(linetree, &head, &search))
continue;
-/* printf("DEBUG: new: id: %s, link: %s, title: %s\n",
- fields[FieldId], fields[FieldLink], fields[FieldTitle]);*/
+ changed = 1;
if (!(add = calloc(1, sizeof(*add))))
err(1, "calloc");
@@ -104,9 +101,6 @@ printfeed(FILE *fp, const char *feedname)
add->timestamp = parsedtime;
RB_INSERT(linetree, &head, add);
- if (firsttime)
- continue;
-
if (feedname[0]) {
printutf8pad(stdout, feedname, 15, ' ');
fputs(" ", stdout);
@@ -123,6 +117,7 @@ printfeed(FILE *fp, const char *feedname)
int
main(int argc, char *argv[])
{
+ struct stat *stfiles, st;
char *name;
FILE *fp;
int i, slept = 0;
@@ -130,45 +125,50 @@ main(int argc, char *argv[])
if (pledge("stdio rpath", NULL) == -1)
err(1, "pledge");
+ if (argc <= 1) {
+ fprintf(stderr, "usage: %s <file>...\n", argv[0]);
+ return 1;
+ }
+
setlocale(LC_CTYPE, "");
- if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1)
- err(1, "pledge");
+ if (!(stfiles = calloc(argc - 1, sizeof(*stfiles))))
+ err(1, "calloc");
- if (argc == 1)
- sleepsecs = 1;
- else
- sleepsecs = 300;
+ while (1) {
+ changed = 0;
- for (firsttime = (argc > 1); ; firsttime = 0) {
if ((comparetime = time(NULL)) == -1)
err(1, "time");
/* 1 day is old news */
comparetime -= 86400;
- if (argc == 1) {
- printfeed(stdin, "");
- } else {
- for (i = 1; i < argc; i++) {
- if (!(fp = fopen(argv[i], "r")))
- err(1, "fopen: %s", argv[i]);
+
+ for (i = 1; i < argc; i++) {
+ if (!(fp = fopen(argv[i], "r")))
+ err(1, "fopen: %s", argv[i]);
+ if (fstat(fileno(fp), &st) == -1)
+ err(1, "fstat: %s", argv[i]);
+
+ /* did the file change? by size, modification */
+ if (stfiles[i - 1].st_size != st.st_size ||
+ stfiles[i - 1].st_mtime != st.st_mtime) {
name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i];
printfeed(fp, name);
if (ferror(fp))
err(1, "ferror: %s", argv[i]);
- fclose(fp);
}
+ memcpy(&stfiles[i - 1], &st, sizeof(st));
+ fclose(fp);
}
- /* DEBUG: TODO: gc first run. */
- gc();
-
- sleep(sleepsecs);
- slept += sleepsecs;
- /* gc once every hour (excluding run-time) */
- if (slept >= 3600) {
+ /* "garbage collect" on a change or every 5 minutes */
+ if (changed || slept > 10) {
gc();
+ changed = 0;
slept = 0;
}
+ sleep(1);
+ slept++;
}
return 0;
}