TL:DR a simple but effective mutex for cron jobs

Often you need to run a job periodically, e.g. backing up files, but the job could take more time than the interval allotted between runs, and you do not want multiple instances of the process to be running at the same time. For instance, bad things happen when multiple rsync processes are trying to synchronize the same folders to the same destination. Thus you want a mutex, something that ensures only one copy of the process can run at any given time.

There are approaches using lock files, but if the computer reboots or the job crashes, the lockfile will not be deleted and all subsequent runs of the job will fail. Some advocate using flock() or fcntl(), but those calls are finicky with strange semantics, e.g. fcntl will release a lock if any related process closes the file.

My solution to deal with this is to bind an IPv6 localhost ::1 socket to a given port. Only one process can do this, and thus it’s a very effective mutex. No lock files to cause havoc, no dealing with the dark and buggy corners of advisory file locking.

For shell scripts, simply replace the #!/bin/sh with #!/somewhere/bin/lock 2048 where 2048 is the port number you will use to enforce the lock (greater than 1024 if you do not want to deal with the hassles of privileged ports). If you want the jobs to wait and not exit immediately if they fail to acquire the lock, just change the line to #!/somewhere/bin/lock w2048

The code is in lock.c. Just compile using:

gcc -O2 -o lock lock.c

or

clang -O2 -o lock lock.c.

#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <netinet/in.h>
#include <inttypes.h>
#include <sys/time.h>
#include <string.h>

extern char **environ;

int main(int argc, char **argv) {
  int sock, port, status, exit_on_fail;
  char *port_start, *port_end = NULL;
  struct sockaddr_in6 sin6;
  struct timeval timeout;

  if (argc < 3) {
    fprintf(
      stderr,
      "Usage:\n"
      "\t#!%s [w]<port:1-65535> (first line of script instead of #!/bin/sh)\n"
      "\t\tor\n"
      "\t%s [w]<port:1-65535> -c \"cmd [args...]\"\n\n"
      "\tw: wait if we could not get the port\n",
      argv[0], argv[0]);
    return -1;
  }
  
  exit_on_fail = 1;
  port_start = argv[1];
  if (port_start[0] == 'w') {
    exit_on_fail = 0;
    port_start++;
  }
  port = strtol(port_start, &port_end, 10);
  if (port_end != port_start + strlen(port_start)) {
    printf("port %s invalid format, must be integer between 1 and 65535\n",
           port_start);
    return -2;
  }
  if (port < 1 || port > 65535) {
    printf("port %d invalid, must be between 1 and 65535\n", port);
    return -3;
  }

  sock = socket(PF_INET6, SOCK_DGRAM, IPPROTO_UDP);
  if (sock == -1) {
    perror("could not create socket");
    return -4;
  }

  sin6.sin6_family = AF_INET6;
  sin6.sin6_port = htons(port);
  sin6.sin6_addr = in6addr_loopback;

  status = -1;
  while (status < 0) {
    status = bind(sock, (const struct sockaddr *) &sin6, sizeof(sin6));
    if (status < 0) {
      if (exit_on_fail) {
        /* perror("could not bind socket"); */
        return -5;
      }
      timeout.tv_sec = 1;
      timeout.tv_usec = 0;
      /* fputs("sleeping...\n", stderr); */
      select(0, NULL, NULL, NULL, &timeout);
      
    }
  }
  /* default to /bin/sh if no args are supplied, so we can do something like:
     #!lock 2048
     instead of
     #!/bin/sh
  */
  argv[1] = "/bin/sh";
  execvp("/bin/sh", &argv[1]);
}