filed
Job queue using FUSE
git clone git://mccd.space/filed
| Log | Files | Refs | README | LICENSE |
commit d0992dccfc635d431446e06b65e5d5b412f3b360 parent 8e3db53500da9e2a8ddeb2950552744d3b5fe5f7 Author: Marc Coquand <marc@coquand.email> Date: Tue, 16 Dec 2025 15:53:21 +0100 * Diffstat:
| M | README.md | | | 23 | ++++++++++++++--------- |
| M | filed.1.scd | | | 15 | ++++++++++----- |
| M | jobdir.go | | | 4 | ++-- |
| M | main.go | | | 1 | + |
| M | manager.go | | | 2 | +- |
| M | pendingdir.go | | | 2 | +- |
| M | store/jobs.go | | | 5 | +++-- |
7 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/README.md b/README.md
@@ -36,36 +36,41 @@ $ mkdir /tmp/filed-jobs
$ filed /tmp/filed-jobs
```
-Once run, `filed` will set up a directory in `filed-jobs` that contains a few files and directories.
+`filed` mounts the directory `filed-jobs` and exposes a few files and directories.
-A job can easily be added by just creating a file in the pending directory:
+A job can then be added by creating a file in the pending directory:
```
$ printf "echo 'hello world'" > /tmp/filed-jobs/pending/1
```
-If all went well, you can see the job output:
+If all went well, you can see the job output in `/complete`:
```
$ cat /tmp/filed-jobs/complete/1
```
-By default, a job retries 3 times, and if unsuccessful is moved to the `failed` directory. You can inspect the logs to see what went wrong:
+By default, a job retries 3 times, and if unsuccessful, it is moved to the `failed` directory. You can inspect the logs to see what went wrong:
```
-$ cat /tmp/filed-jobs/failed/1
+$ cat /tmp/filed-jobs/failed/2
+>>> ech this-will-fail
+sh: 1: ech: not found
+
+
+[System Error]: exit status 127
```
And you can restart a job by moving the job back to pending:
```
-$ mv /tmp/filed-jobs/failed/1 /tmp/filed-jobs/pending
+$ mv /tmp/filed-jobs/failed/2 /tmp/filed-jobs/pending
```
Finally, if you want to remove a completed or failed jobs:
```
-$ rm /tmp/filed-jobs/failed/1
+$ rm /tmp/filed-jobs/failed/2
```
## Design & Motivation
@@ -76,9 +81,9 @@ Often these jobs can fail, whether that's due to network errors, memory issues o
I wanted a tool that I could incorporate and use with whatever programming language I desired, and that makes it easy to understand when a job fail and rerun jobs if there is an error. `filed` is very intuitive to build an integration for: just write a file telling it what to execute.
-I also wanted a tool that made it simple to inspect, without needing to expose a web portal or set up separate auth system. `filed` allows you to inspect and operate the queue just by SSHing into the server, and reuses the decades old proven identity system already built into Linux.
+I also wanted a tool that made it simple to inspect, without needing to expose a admin portal with separate sign in. `filed` allows you to inspect and operate the queue just by SSHing into the server, and reuses the decades old identity system already built into Linux.
-The simple file-based API of File d'attente, inspired by plan9, also allows me to slim down the amount of code needed considerably, while still exposing a very scriptable and easy-to-understand interface.
+The simple file-based API of File d'attente, also allows me to slim down the amount of code needed to write considerably, while still exposing a very scriptable and easy-to-understand interface.
I've tried a few other queue tools: sqs/sns, rabbitmq, bull, systemd-run. The first two felt heavyweight, and required setting up a lot of infrastructure, especially if you want to rerun and inspect jobs. It felt like far too much work for a simple app. Bull was more in line with what I wanted, but I think operating on files is simpler for building custom automation, and easier to secure. Systemd-run lacked the retry functionality and the interface was rather clunky.
diff --git a/filed.1.scd b/filed.1.scd
@@ -11,11 +11,12 @@ filed - queue jobs utility
# DESCRIPTION
filed (file d'attente) is an inspectable job queue that operates on files
-with retries. It mounts a directory to _mountpoint_ that is used to inspect
-and run jobs.
+with retries. It mounts a directory to _mountpoint_, which is where the user
+can add and inspect jobs.
-filed exposes 4 directories, where each directory contains zero or more _jobs_.
-Job names must be unique across all four directories. The directories are:
+filed exposes 4 directories to _mountpoint_, where each directory contains
+zero or more _jobs_. Job names must be unique across all four directories. The
+directories are:
*pending* - jobs to be run. To create a new job, create a file
here with the command to run.
@@ -54,6 +55,9 @@ has access to the state, and is thus able to rewrite access rights. It is
recommended for the running scripts to use _namespaces(7)_ or _Landlock(7)_
to drop privileges. More security features are coming in the future.
+Another aspect to be aware of is that File d'attente stores logs of all jobs.
+Care should be taken to ensure that no secrets are printed.
+
Access rights can be modified using _CHOWN(1)_ and _CHMOD(1)_.
# MAINTENANCE
@@ -74,7 +78,8 @@ Maximum amount of retries before moving the job to failed.
## Max job count
-Maximum amount of concurrent jobs
+Maximum amount of concurrent jobs. It is recommended to not set this much
+higher than 20.
## Backoff mult and backoff base
diff --git a/jobdir.go b/jobdir.go
@@ -104,14 +104,14 @@ func (d JobDir) Rename(ctx context.Context, req *fuse.RenameRequest, newDir fs.N
}
func (jd JobDir) Lookup(ctx context.Context, name string) (fs.Node, error) {
- slog.Info("FUSE: Jobdir Lookup", "name", name)
+ slog.Debug("FUSE: Jobdir Lookup", "name", name)
job, err := jd.manager.store.GetJob(name)
if err != nil {
slog.Warn("FUSE: Not found", "name", name)
return nil, syscall.ENOENT
}
if job.State == jd.state {
- slog.Info("FUSE: Found job", "id", job.ID)
+ slog.Debug("FUSE: Found job", "id", job.ID)
return &File{job, jd.manager}, nil
} else {
return nil, syscall.ENOENT
diff --git a/main.go b/main.go
@@ -32,6 +32,7 @@ func main() {
xdg_home := os.Getenv("XDG_DATA_HOME")
if xdg_home == "" {
fmt.Fprintf(os.Stderr, "FILED_STATE_FILE environment variable needs to be set.\n")
+ fmt.Fprintf(os.Stderr, "For example: export FILED_STATE_FILE=$HOME/.local/share/filed.db")
usage()
os.Exit(1)
}
diff --git a/manager.go b/manager.go
@@ -129,7 +129,7 @@ func (jm *JobManager) runJob(id, commandStr string) {
jobOutput = append(jobOutput, []byte(errMsg)...)
jm.store.RestartJob(id, jobOutput)
} else {
- slog.Info("Worker: Job completed", "id", id, "Output", jobOutput, "exitCode", exitCode)
+ slog.Info("Worker: Job completed", "id", id, "exitCode", exitCode)
jm.store.CompleteJob(id, jobOutput)
}
}
diff --git a/pendingdir.go b/pendingdir.go
@@ -102,7 +102,7 @@ type File struct {
func (f File) Attr(ctx context.Context, a *fuse.Attr) error {
// Append 20 to avoid collission with static files
a.Inode = uint64(f.job.INode + 20)
- slog.Info("FUSE", "inode", a.Inode)
+ slog.Debug("FUSE", "inode", a.Inode)
a.Mode = 0o775
a.Gid = uint32(os.Getgid())
a.Uid = uint32(os.Getuid())
diff --git a/store/jobs.go b/store/jobs.go
@@ -40,7 +40,7 @@ func NewStore(filepath string) (*Store, error) {
return nil, err
}
- if _, err := db.Exec("PRAGMA journal_mode=WAL;"); err != nil {
+ if _, err := db.Exec("PRAGMA journal_mode=WAL;PRAGMA busy_timeout=5000;"); err != nil {
return nil, err
}
@@ -147,7 +147,8 @@ func (s *Store) DeleteJob(id string) error {
}
func (s *Store) ListJobsByState(state string) ([]Job, error) {
- rows, err := s.db.Query("SELECT id, command, attempts, created_at,updated_at FROM jobs WHERE state = ?", state)
+ // Since it's a queue, it should be first in first out.
+ rows, err := s.db.Query("SELECT id, command, attempts, created_at,updated_at FROM jobs WHERE state = ? ORDER BY created_at", state)
if err != nil {
return nil, err
}