From 917ff8354d77242323a48ac31eef0ea73f60e1d1 Mon Sep 17 00:00:00 2001 From: nanos Date: Tue, 21 Mar 2023 08:26:48 +0000 Subject: [PATCH] Document locking for #18 --- README.md | 3 ++- find_posts.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 49e79e5..4a3cb51 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ If you want to, you can of course also run this script locally as a cron job: 1. To get started, clone this repository. (If you'd rather not clone the full repository, you can simply download the `find_posts.py` file, but don't forget to create a directory called `artifacts` in the same directory: The script expects this directory to be present, and stores information about posts it has already pushed into your instance in that directory, to avoid pushing the same posts over and over again.) 2. Then simply run this script like so: `python find_posts.py --access-token= --server=` etc. (run `python find_posts.py -h` to get a list of all options) -When setting up your cronjob, do make sure you are setting the interval long enough that two runs of the script don't overlap though! Running this script with overlapping will have unpleasant results ... +When setting up your cronjob, we are using file based locking to avoid multiple overlapping executions of the script. The timeout period for the lock can be configured using `--lock-hours`. If you are running this script locally, my recommendation is to run it manually once, before turning on the cron job: The first run will be significantly slower than subsequent runs, and that will help you prevent overlapping during that first run. @@ -87,6 +87,7 @@ Please see below for a list of configuration options. | `MAX_FOLLOWERS` | `--max-followers` | No | Provide to backfill profiles for your most recent followers. Determines how many of your last followers you want to backfill. (An integer number, e.g. `80`. Ensure you also provide `USER`). | `MAX_FOLLOW_REQUESTS` | `--max-follow-requests` | No | Provide to backfill profiles for the API key owner's most recent pending follow requests. Determines how many of your last follow requests you want to backfill. (An integer number, e.g. `80`.). Requires an access token with `read:follows` scope. | `HTTP_TIMEOUT` | `--http-timeout` | No | The timeout for any HTTP requests to the Mastodon API in seconds. Defaults to `5`. +| -- | `--lock-hours` | No | Determines after how many hours a lock file should be discarded. Not required when running the script as GitHub Action, as concurrency is prevented using a different mechanism. #### Required Access Token Scopes diff --git a/find_posts.py b/find_posts.py index c03c568..6390987 100644 --- a/find_posts.py +++ b/find_posts.py @@ -22,7 +22,7 @@ argparser.add_argument('--max-followings', required = False, type=int, default=0 argparser.add_argument('--max-followers', required = False, type=int, default=0, help="Backfill posts for new accounts following --user. We'll backfill at most this many followers' posts") argparser.add_argument('--max-follow-requests', required = False, type=int, default=0, help="Backfill posts of the API key owners pending follow requests. We'll backfill at most this many requester's posts") argparser.add_argument('--http-timeout', required = False, type=int, default=5, help="The timeout for any HTTP requests to your own, or other instances.") -argparser.add_argument('--lock-hours', required = False, type=int, default=0, help="The lock timeout in hours.") +argparser.add_argument('--lock-hours', required = False, type=int, default=24, help="The lock timeout in hours.") def pull_context( server,