Compare commits
99 commits
Author | SHA1 | Date | |
---|---|---|---|
|
205b0731db | ||
|
bf0ed943ec | ||
|
f5c1033fc9 | ||
|
ca302bb8db | ||
|
34d07a4fa1 | ||
|
e86863a8ae | ||
|
e4fca0d67e | ||
|
fe1c69f3ba | ||
|
0416cc159a | ||
|
52d3b8d9e9 | ||
|
3d8ab95f11 | ||
|
a8dc809787 | ||
|
099ef7d37a | ||
|
f69eaed5a6 | ||
|
7be5dfb9b1 | ||
|
95b644d431 | ||
|
bed11e83f1 | ||
|
dafaf93d50 | ||
|
31f475dcdd | ||
|
a76b52642d | ||
|
0744caad6f | ||
|
adc0d4ec4e | ||
|
253c7c4f2b | ||
|
db2dcce2ff | ||
|
712d88cf0d | ||
|
ffa6617fff | ||
|
e207bb6435 | ||
|
c90a7e42ab | ||
|
3294a44f76 | ||
|
174448a2b0 | ||
|
ae55c96506 | ||
|
5a2b2c2311 | ||
|
179bb65253 | ||
|
a7611c6e6f | ||
|
80ee1387f7 | ||
|
c92d4e1c2c | ||
|
c169b2ae30 | ||
|
213ef57abe | ||
|
4dc41ee02c | ||
|
47e8b485a5 | ||
|
93d5b503af | ||
|
f7d015004e | ||
|
6f7392cfaa | ||
|
0472fe6e0c | ||
|
8edfbc030c | ||
|
d212e7a8a3 | ||
|
b04664f9d5 | ||
|
4751d96a1d | ||
|
4011883ef2 | ||
|
e290f2c05f | ||
|
b7ef2be02e | ||
|
c1f0e8ac61 | ||
|
535bf1f404 | ||
|
12fbd0ed72 | ||
|
9fec312b38 | ||
|
9edbee7285 | ||
|
3620b4944b | ||
|
8168aa8036 | ||
|
bf1b73bc04 | ||
|
46a5be98df | ||
|
5a3db443cb | ||
|
d4dfa1e315 | ||
|
c7e0555394 | ||
|
5f6ef2646a | ||
|
1fffddcb23 | ||
|
311353348c | ||
|
080c5dfb78 | ||
|
3ae3be9184 | ||
|
8235cda859 | ||
|
d0cb212315 | ||
|
bfbffc0773 | ||
|
b9d82dc073 | ||
|
886a0cecd2 | ||
|
12ba458563 | ||
|
a3f2b23022 | ||
|
9f28ba2333 | ||
|
2b707e7807 | ||
|
73a296c310 | ||
|
56039cfdea | ||
|
f51d19730b | ||
|
9ea4ba05fb | ||
|
785499ab82 | ||
|
a357470328 | ||
|
5f438ee873 | ||
|
dc0d94a274 | ||
|
0c87cd6727 | ||
|
44f15de367 | ||
|
871cfdeab6 | ||
|
e40a5447ee | ||
|
9b7093e478 | ||
|
c5208568b5 | ||
|
87fd32eb9d | ||
|
1e7aafa6b2 | ||
|
a8cf5d3eef | ||
|
ee045ab493 | ||
|
3b9eecce08 | ||
|
b129c9445c | ||
|
58c064db0f | ||
|
0c73962fb4 |
9 changed files with 1071 additions and 265 deletions
14
.github/workflows/build-container.yaml
vendored
14
.github/workflows/build-container.yaml
vendored
|
@ -9,11 +9,12 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to GHCR
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
if: github.event_name != 'pull_request'
|
||||
with:
|
||||
registry: ghcr.io
|
||||
|
@ -21,9 +22,10 @@ jobs:
|
|||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository_owner }}/mastodon_get_replies:${{ github.ref_name }}
|
||||
ghcr.io/${{ github.repository_owner }}/mastodon_get_replies:latest
|
||||
ghcr.io/${{ github.repository_owner }}/fedifetcher:${{ github.ref_name }}
|
||||
ghcr.io/${{ github.repository_owner }}/fedifetcher:latest
|
||||
|
|
22
.github/workflows/get_context.yml
vendored
22
.github/workflows/get_context.yml
vendored
|
@ -11,20 +11,18 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
environment: mastodon
|
||||
steps:
|
||||
- name: Get latest release
|
||||
run: |
|
||||
curl --retry 20 -s https://api.github.com/repos/nanos/mastodon_get_replies/releases/latest | jq .zipball_url | xargs wget -O download.zip
|
||||
unzip -j download.zip
|
||||
mkdir artifacts
|
||||
ls -lR
|
||||
- name: Checkout original repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- run: pip install -r requirements.txt
|
||||
- name: Download all workflow run artifacts
|
||||
uses: dawidd6/action-download-artifact@v2
|
||||
uses: dawidd6/action-download-artifact@v3
|
||||
with:
|
||||
name: artifacts
|
||||
workflow: get_context.yml
|
||||
|
@ -32,14 +30,14 @@ jobs:
|
|||
path: artifacts
|
||||
- name: Get Directory structure
|
||||
run: ls -lR
|
||||
- run: python find_posts.py --lock-hours=0 --access-token=${{ secrets.ACCESS_TOKEN }} --server=${{ vars.MASTODON_SERVER }} --reply-interval-in-hours=${{ vars.REPLY_INTERVAL_IN_HOURS || 0 }} --home-timeline-length=${{ vars.HOME_TIMELINE_LENGTH || 0 }} --max-followings=${{ vars.MAX_FOLLOWINGS || 0 }} --user=${{ vars.USER }} --max-followers=${{ vars.MAX_FOLLOWERS || 0 }} --http-timeout=${{ vars.HTTP_TIMEOUT || 5 }} --max-follow-requests=${{ vars.MAX_FOLLOW_REQUESTS || 0 }} --on-fail=${{ vars.ON_FAIL }} --on-start=${{ vars.ON_START }} --on-done=${{ vars.ON_DONE }} --max-bookmarks=${{ vars.MAX_BOOKMARKS || 0 }}
|
||||
- run: python find_posts.py --lock-hours=0 --access-token=${{ secrets.ACCESS_TOKEN }} -c="./config.json"
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: artifacts
|
||||
path: |
|
||||
artifacts
|
||||
- name: Check out, so that we can keep the workflow alive
|
||||
uses: actions/checkout@v3
|
||||
- name: Checkout user's forked repository for keeping workflow alive
|
||||
uses: actions/checkout@v4
|
||||
- name: Keep workflow alive
|
||||
uses: gautamkrishnar/keepalive-workflow@v1
|
||||
|
|
185
README.md
185
README.md
|
@ -1,114 +1,175 @@
|
|||
# Pull missing responses into Mastodon
|
||||
# FediFetcher for Mastodon
|
||||
|
||||
This GitHub repository provides a GitHub action that runs every 10 mins, doing the following:
|
||||
This GitHub repository provides a simple script that can pull missing posts into Mastodon using the Mastodon API. FediFetcher has no further dependencies, and can be run as either a GitHub Action, as a scheduled cron job, or a pre-packaged container. Here is what FediFetcher can do:
|
||||
|
||||
1. It can [pull remote replies into your instance](https://blog.thms.uk/2023/03/pull-missing-responses-into-mastodon?utm_source=github), using the Mastodon API. That part itself has two parts:
|
||||
1. It gets remote replies to posts that users on your instance have already replied to during the last `REPLY_INTERVAL_IN_HOURS` hours, and adds them to your own server.
|
||||
2. It gets remote replies to the last `HOME_TIMELINE_LENGTH` posts from your home timeline, and adds them to your own server.
|
||||
3. It gets remote replies to the last `MAX_BOOKMARKS` of your bookmarks, and adds them to your own server.
|
||||
2. It can also [backfill posts](https://blog.thms.uk/2023/03/backfill-recently-followed-accounts?utm_source=github):
|
||||
1. from the last `MAX_FOLLOWINGS` users that you have followed.
|
||||
2. form the last `MAX_FOLLOWERS` users that have followed you.
|
||||
3. form the last `MAX_FOLLOW_REQUESTS` users that have sent you a follow request.
|
||||
1. It can pull missing remote replies to posts that are already on your server into your server. Specifically, it can
|
||||
1. fetch missing replies to posts that users on your instance have already replied to,
|
||||
2. fetch missing replies to the most recent posts in your home timeline,
|
||||
3. fetch missing replies to your bookmarks.
|
||||
4. fetch missing replies to your favourites.
|
||||
2. It can also backfill profiles on your instance. In particular it can
|
||||
1. fetch missing posts from users that have recently appeared in your notifications,
|
||||
1. fetch missing posts from users that you have recently followed,
|
||||
2. fetch missing posts form users that have recently followed you,
|
||||
3. fetch missing posts form users that have recently sent you a follow request.
|
||||
|
||||
Each part can be disabled completely, and all of the parameters are configurable.
|
||||
Each part of this script is fully configurable, and you can completely disable parts that you are not interested in.
|
||||
|
||||
**Be aware, that this script may run for a long time, if these values are too high.** Experiment a bit with what works for you, by starting with fairly small numbers (maybe `HOME_TIMELINE_LENGTH = 200`, `REPLY_INTERVAL_IN_HOURS = 12`) and increase the numbers as you see fit.
|
||||
FediFetcher will store posts and profiles it has already pulled in on disk, to prevent re-fetching the same info in subsequent executions.
|
||||
|
||||
For full context and discussion on why this is needed, read the following two blog posts:
|
||||
**Be aware, that this script may run for a *very* long time.** This is particularly true, the first time this script runs, and/or if you enable all parts of this script. You should ensure that you take steps to prevent multiple overlapping executions of this script, as that will lead to unpleasant results. There are detailed instructions for this below.
|
||||
|
||||
- The original announcement post: [Pull missing responses into Mastodon](https://blog.thms.uk/2023/03/pull-missing-responses-into-mastodon?utm_source=github)
|
||||
- The announcement for v3.0.0: [Pull missing posts from recently followed accounts into Mastodon](https://blog.thms.uk/2023/03/backfill-recently-followed-accounts?utm_source=github)
|
||||
For detailed information on the how and why, please read the [FediFetcher for Mastodon page](https://blog.thms.uk/fedifetcher?utm_source=github).
|
||||
|
||||
## Supported servers
|
||||
|
||||
FediFetcher makes use of the Mastodon API. It'll run against any instance implementing this API, and whilst it was built for Mastodon, it's been [confirmed working against Pleroma](https://fed.xnor.in/objects/6bd47928-704a-4cb8-82d6-87471d1b632f) as well.
|
||||
|
||||
FediFetcher will pull in posts and profiles from any servers running the following software: Mastodon, Pleroma, Akkoma, Pixelfed, Hometown, Misskey, Firefish (Calckey), Foundkey, and Lemmy.
|
||||
|
||||
## Setup
|
||||
|
||||
You can run this script either as a GitHub Action, as a scheduled cron job on your local machine, or from a pre-packed container.
|
||||
You can run FediFetcher either as a GitHub Action, as a scheduled cron job on your local machine/server, or from a pre-packed container.
|
||||
|
||||
### 1) Get the required access token:
|
||||
|
||||
Regardless of how you want to run this script, you must first get an access token:
|
||||
Regardless of how you want to run FediFetcher, you must first get an access token:
|
||||
|
||||
#### If you are an Admin on your instance
|
||||
|
||||
1. In Mastodon go to Preferences > Development > New Application
|
||||
1. give it a nice name
|
||||
2. Enable the required scopes for your options. See below for details, but if you want to use all parts of this script, you'll need these scopes: `read:search`, `read:statuses`, `read:follows`, `read:bookmarks`, and `admin:read:accounts`
|
||||
1. Give it a nice name
|
||||
2. Enable the required scopes for your options. You could tick `read` and `admin:read:accounts`, or see below for a list of which scopes are required for which options.
|
||||
3. Save
|
||||
4. Copy the value of `Your access token`
|
||||
|
||||
### 2.1) Configure and run the GitHub Action
|
||||
#### If you are not an Admin on your Instance
|
||||
|
||||
To run this script as a GitHub Action:
|
||||
1. Go to [GetAuth for Mastodon](https://getauth.thms.uk?scopes=read&client_name=FediFetcher)
|
||||
2. Type in your Mastodon instance's domain
|
||||
3. Copy the token.
|
||||
|
||||
### 2) Configure and run FediFetcher
|
||||
|
||||
Run FediFetcher as a GitHub Action, a cron job, or a container:
|
||||
|
||||
#### To run FediFetcher as a GitHub Action:
|
||||
|
||||
1. Fork this repository
|
||||
2. Add your access token:
|
||||
1. Go to Settings > Secrets and Variables > Actions
|
||||
2. Click New Repository Secret
|
||||
3. Supply the Name `ACCESS_TOKEN` and provide the Token generated above as Secret
|
||||
3. Provide the required environment variables, to configure your Action:
|
||||
1. Go to Settings > Environments
|
||||
2. Click New Environment
|
||||
3. Provide the name `Mastodon`
|
||||
4. Add environment variables to configure your action as described below.
|
||||
3. Create a file called `config.json` with your [configuration options](#configuration-options) in the repository root. **Do NOT include the Access Token in your `config.json`!**
|
||||
4. Finally go to the Actions tab and enable the action. The action should now automatically run approximately once every 10 min.
|
||||
|
||||
Keep in mind that [the schedule event can be delayed during periods of high loads of GitHub Actions workflow runs](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule), and that [scheduled workflows are automatically disabled when no repository activity has occurred in 60 days](https://github.com/nanos/mastodon_get_replies/issues/17).
|
||||
> **Note**
|
||||
>
|
||||
> Keep in mind that [the schedule event can be delayed during periods of high loads of GitHub Actions workflow runs](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule).
|
||||
|
||||
### 2.2) Run this script locally as a cron job
|
||||
#### To run FediFetcher as a cron job:
|
||||
|
||||
If you want to, you can of course also run this script locally as a cron job:
|
||||
|
||||
1. To get started, clone this repository. (If you'd rather not clone the full repository, you can simply download the `find_posts.py` file, but don't forget to create a directory called `artifacts` in the same directory: The script expects this directory to be present, and stores information about posts it has already pushed into your instance in that directory, to avoid pushing the same posts over and over again.)
|
||||
1. Clone this repository.
|
||||
2. Install requirements: `pip install -r requirements.txt`
|
||||
3. Then simply run this script like so: `python find_posts.py --access-token=<TOKEN> --server=<SERVER>` etc. (run `python find_posts.py -h` to get a list of all options)
|
||||
3. Create a `json` file with [your configuration options](#configuration-options). You may wish to store this in the `./artifacts` directory, as that directory is `.gitignore`d
|
||||
4. Then simply run this script like so: `python find_posts.py -c=./artifacts/config.json`.
|
||||
|
||||
When setting up your cronjob, we are using file based locking to avoid multiple overlapping executions of the script. The timeout period for the lock can be configured using `--lock-hours`.
|
||||
If desired, all configuration options can be provided as command line flags, instead of through a JSON file. An [example script](./examples/FediFetcher.sh) can be found in the `examples` folder.
|
||||
|
||||
If you are running this script locally, my recommendation is to run it manually once, before turning on the cron job: The first run will be significantly slower than subsequent runs, and that will help you prevent overlapping during that first run.
|
||||
When using a cronjob, we are using file based locking to avoid multiple overlapping executions of the script. The timeout period for the lock can be configured using `lock-hours`.
|
||||
|
||||
### 2.3) Run this script from a container
|
||||
> **Note**
|
||||
>
|
||||
> If you are running FediFetcher locally, my recommendation is to run it manually once, before turning on the cron job: The first run will be significantly slower than subsequent runs, and that will help you prevent overlapping during that first run.
|
||||
|
||||
This script is also available in a pre-packaged container, [mastodon_get_replies](https://github.com/nanos/mastodon_get_replies/pkgs/container/mastodon_get_replies) - Thank you [@nikdoof](https://github.com/nikdoof).
|
||||
#### To run FediFetcher from a container:
|
||||
|
||||
1. Pull the container from `ghcr.io`, using Docker or your container tool of choice: `docker pull ghcr.io/nanos/mastodon_get_replies:latest`
|
||||
2. Run the container, passing the command line arguments like running the script directly: `docker run -it ghcr.io/nanos/mastodon_get_replies:latest --access-token=<TOKEN> --server=<SERVER>`
|
||||
FediFetcher is also available in a pre-packaged container, [FediFetcher](https://github.com/nanos/FediFetcher/pkgs/container/fedifetcher) - Thank you [@nikdoof](https://github.com/nikdoof).
|
||||
|
||||
The same rules for running this as a cron job apply to running the container, don't overlap any executions.
|
||||
1. Pull the container from `ghcr.io`, using Docker or your container tool of choice: `docker pull ghcr.io/nanos/fedifetcher:latest`
|
||||
2. Run the container, passing the configurations options as command line arguments: `docker run -it ghcr.io/nanos/fedifetcher:latest --access-token=<TOKEN> --server=<SERVER>`
|
||||
|
||||
> **Note**
|
||||
>
|
||||
> The same rules for running this as a cron job apply to running the container: don't overlap any executions.
|
||||
|
||||
Persistent files are stored in `/app/artifacts` within the container, so you may want to map this to a local folder on your system.
|
||||
|
||||
An example Kubernetes CronJob for running the container is included in the [`examples`](https://github.com/nanos/mastodon_get_replies/tree/main/examples) folder.
|
||||
An [example Kubernetes CronJob](./examples/k8s-cronjob.yaml) for running the container is included in the `examples` folder.
|
||||
|
||||
An [example Docker Compose Script](./examples/docker-compose.yaml) for running the container periodically is included in the `examples` folder.
|
||||
|
||||
### Configuration options
|
||||
|
||||
Please see below for a list of configuration options.
|
||||
FediFetcher has quite a few configuration options, so here is my quick configuration advice, that should probably work for most people:
|
||||
|
||||
| Environment Variable Name (if using GitHub Action) | Command line flag (if using cron, or the container) | Required? | Notes |
|
||||
|:---------------------------------------------------|:----------------------------------------------------|-----------|:------|
|
||||
| -- | `--access-token` | Yes | The access token. If using GitHub action, this needs to be provided as a Secret called `ACCESS_TOKEN` |
|
||||
|`MASTODON_SERVER`|`--server`|Yes|The domain only of your mastodon server (without `https://` prefix) e.g. `mstdn.thms.uk`. |
|
||||
| `HOME_TIMELINE_LENGTH` | `--home-timeline-length` | No | Provide to fetch remote replies to posts in the API-Key owner's home timeline. Determines how many posts we'll fetch replies for. (An integer number, e.g. `200`)
|
||||
| `REPLY_INTERVAL_IN_HOURS` | `--reply-interval-in-hours` | No | Provide to fetch remote replies to posts that have received replies from users on your own instance. Determines how far back in time we'll go to find posts that have received replies. (An integer number, e.g. `24`.) Requires an access token with `admin:read:accounts`
|
||||
| `USER` | `--user` | See Notes | Required together with `MAX_FOLLOWERS` or `MAX_FOLLOWINGS`: The username of the user whose followers or followings you want to backfill (e.g. `michael` for the user `@michael@thms.uk`).
|
||||
| `MAX_FOLLOWINGS` | `--max-followings` | No | Provide to backfill profiles for your most recent followings. Determines how many of your last followings you want to backfill. (An integer number, e.g. `80`. Ensure you also provide `USER`).
|
||||
| `MAX_FOLLOWERS` | `--max-followers` | No | Provide to backfill profiles for your most recent followers. Determines how many of your last followers you want to backfill. (An integer number, e.g. `80`. Ensure you also provide `USER`).
|
||||
| `MAX_FOLLOW_REQUESTS` | `--max-follow-requests` | No | Provide to backfill profiles for the API key owner's most recent pending follow requests. Determines how many of your last follow requests you want to backfill. (An integer number, e.g. `80`.). Requires an access token with `read:follows` scope.
|
||||
| `MAX_BOOKMARKS` | `--max-bookmarks` | No | Provide to fetch remote replies to any posts you have bookmarked. Determines how many of your bookmarks you want to get replies to. (An integer number, e.g. `80`.). Requires an access token with `read:bookmarks` scope.
|
||||
| `HTTP_TIMEOUT` | `--http-timeout` | No | The timeout for any HTTP requests to the Mastodon API in seconds. Defaults to `5`.
|
||||
| -- | `--lock-hours` | No | Determines after how many hours a lock file should be discarded. Not relevant when running the script as GitHub Action, as concurrency is prevented using a different mechanism.
|
||||
| `ON_START` | `--on-start` | No | Optionally provide a callback URL that will be pinged when processing is starting. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io.
|
||||
| `ON_DONE` | `--on-done` | No | Optionally provide a callback URL that will be called when processing is finished. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io.
|
||||
| `ON_FAIL` | `--on-fail` | No | Optionally provide a callback URL that will be called when processing has failed. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io.
|
||||
> **Warning**
|
||||
>
|
||||
> **Do NOT** include your `access-token` in the `config.json` when running FediFetcher as GitHub Action. When running FediFetcher as GitHub Action **ALWAYS** [set the Access Token as an Action Secret](#to-run-fedifetcher-as-a-github-action).
|
||||
|
||||
#### Required Access Token Scopes
|
||||
```json
|
||||
{
|
||||
"access-token": "Your access token",
|
||||
"server": "your.mastodon.server",
|
||||
"home-timeline-length": 200,
|
||||
"max-followings": 80,
|
||||
"from-notifications": 1
|
||||
}
|
||||
```
|
||||
|
||||
If you configure FediFetcher this way, it'll fetch missing remote replies to the last 200 posts in your home timeline. It'll additionally backfill profiles of the last 80 people you followed, and of every account who appeared in your notifications during the past hour.
|
||||
|
||||
#### Advanced Options
|
||||
|
||||
Please find the list of all configuration options, including descriptions, below:
|
||||
|
||||
Option | Required? | Notes |
|
||||
|:----------------------------------------------------|-----------|:------|
|
||||
|`access-token` | Yes | The access token. If using GitHub action, this needs to be provided as a Secret called `ACCESS_TOKEN`. If running as a cron job or a container, you can supply this option as array, to [fetch posts for multiple users](https://blog.thms.uk/2023/04/muli-user-support-for-fedifetcher) on your instance. |
|
||||
|`server`|Yes|The domain only of your mastodon server (without `https://` prefix) e.g. `mstdn.thms.uk`. |
|
||||
|`home-timeline-length` | No | Provide to fetch remote replies to posts in the API-Key owner's home timeline. Determines how many posts we'll fetch replies for. Recommended value: `200`.
|
||||
| `max-bookmarks` | No | Provide to fetch remote replies to any posts you have bookmarked. Determines how many of your bookmarks you want to get replies to. Recommended value: `80`. Requires an access token with `read:bookmarks` scope.
|
||||
| `max-favourites` | No | Provide to fetch remote replies to any posts you have favourited. Determines how many of your favourites you want to get replies to. Recommended value: `40`. Requires an access token with `read:favourites` scope.
|
||||
| `max-followings` | No | Provide to backfill profiles for your most recent followings. Determines how many of your last followings you want to backfill. Recommended value: `80`.
|
||||
| `max-followers` | No | Provide to backfill profiles for your most recent followers. Determines how many of your last followers you want to backfill. Recommended value: `80`.
|
||||
| `max-follow-requests` | No | Provide to backfill profiles for the API key owner's most recent pending follow requests. Determines how many of your last follow requests you want to backfill. Recommended value: `80`.
|
||||
| `from-notifications` | No | Provide to backfill profiles of anyone mentioned in your recent notifications. Determines how many hours of notifications you want to look at. Requires an access token with `read:notifications` scope. Recommended value: `1`, unless you run FediFetcher less than once per hour.
|
||||
| `reply-interval-in-hours` | No | Provide to fetch remote replies to posts that have received replies from users on your own instance. Determines how far back in time we'll go to find posts that have received replies. You must be administrator on your instance to use this option, and this option is not supported on Pleroma / Akkoma and its forks. Recommend value: `0` (disabled). Requires an access token with `admin:read:accounts`.
|
||||
|`backfill-with-context` | No | Set to `0` to disable fetching remote replies while backfilling profiles. This is enabled by default, but you can disable it, if it's too slow for you.
|
||||
|`backfill-mentioned-users` | No | Set to `0` to disable backfilling any mentioned users when fetching the home timeline. This is enabled by default, but you can disable it, if it's too slow for you.
|
||||
| `remember-users-for-hours` | No | How long between back-filling attempts for non-followed accounts? Defaults to `168`, i.e. one week.
|
||||
| `remember-hosts-for-days` | No | How long should FediFetcher cache host info for? Defaults to `30`.
|
||||
| `http-timeout` | No | The timeout for any HTTP requests to the Mastodon API in seconds. Defaults to `5`.
|
||||
| `lock-hours` | No | Determines after how many hours a lock file should be discarded. Not relevant when running the script as GitHub Action, as concurrency is prevented using a different mechanism. Recommended value: `24`.
|
||||
| `lock-file` | No | Location for the lock file. If not specified, will use `lock.lock` under the state directory. Not relevant when running the script as GitHub Action.
|
||||
| `state-dir` | No | Directory storing persistent files, and the default location for lock file. Not relevant when running the script as GitHub Action.
|
||||
| `on-start` | No | Optionally provide a callback URL that will be pinged when processing is starting. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io.
|
||||
| `on-done` | No | Optionally provide a callback URL that will be called when processing is finished. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io.
|
||||
| `on-fail` | No | Optionally provide a callback URL that will be called when processing has failed. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io.
|
||||
|
||||
### Multi User support
|
||||
|
||||
If you wish to [run FediFetcher for multiple users on your instance](https://blog.thms.uk/2023/04/muli-user-support-for-fedifetcher?utm_source=github), you can supply the `access-token` as an array, with different access tokens for different users. That will allow you to fetch replies and/or backfill profiles for multiple users on your account.
|
||||
|
||||
This is only supported when running FediFetcher as cron job, or container. Multi-user support is not available when running FediFetcher as GitHub Action.
|
||||
|
||||
### Required Access Token Scopes
|
||||
|
||||
- For all actions, your access token must include these scopes:
|
||||
- `read:search`
|
||||
- `read:statuses`
|
||||
- If you are supplying `REPLY_INTERVAL_IN_HOURS` / `--reply-interval-in-hours` you must additionally enable this scope:
|
||||
- `read:accounts`
|
||||
- If you are supplying `reply-interval-in-hours` you must additionally enable this scope:
|
||||
- `admin:read:accounts`
|
||||
- If you are supplying `MAX_FOLLOW_REQUESTS` / `--max-follow-requests` you must additionally enable this scope:
|
||||
- If you are supplying `max-follow-requests` you must additionally enable this scope:
|
||||
- `read:follows`
|
||||
- If you are supplying `MAX_BOOKMARKS` / `--max-bookmarks` you must additionally enable this scope:
|
||||
- If you are supplying `max-bookmarks` you must additionally enable this scope:
|
||||
- `read:bookmarks`
|
||||
- If you are supplying `max-favourites` you must additionally enable this scope:
|
||||
- `read:favourites`
|
||||
- If you are supplying `from-notifications` you must additionally enable this scope:
|
||||
- `read:notifications`
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
This script is mostly taken from [Abhinav Sarkar](https://notes.abhinavsarkar.net/2023/mastodon-context), with just some additions and alterations. Thank you Abhinav!
|
||||
The original inspiration of this script, as well as parts of its implementation are taken from [Abhinav Sarkar](https://notes.abhinavsarkar.net/2023/mastodon-context). Thank you Abhinav!
|
||||
|
|
33
examples/FediFetcher-multiple-users.sh
Normal file
33
examples/FediFetcher-multiple-users.sh
Normal file
|
@ -0,0 +1,33 @@
|
|||
# This script is a sample script that you can schedule
|
||||
# to run every 10 minutes from your cron job.
|
||||
# Supply any other arguments, as you see fit.
|
||||
|
||||
|
||||
# In this script, FediFetcher will fetch remote replies for multiple
|
||||
# users on your instance
|
||||
|
||||
# TOKEN1, TOKEN2, and TOKEN3 belong to 3 different users here.
|
||||
|
||||
|
||||
# Sample schedule:
|
||||
# */10 * * * * /usr/bin/bash /path/to/FediFetcher.sh
|
||||
|
||||
###################### IMPORTANT ######################
|
||||
# #
|
||||
# YOU SHOULD RUN THIS SCRIPT MANUALLY AT LEAST ONCE #
|
||||
# WITH YOUR CHOSEN ARGUMENTS, TO AVOID CONCURRENT #
|
||||
# EXECUTIONS OF FEDIFETCHER! #
|
||||
# #
|
||||
###################### IMPORTANT ######################
|
||||
|
||||
|
||||
cd /path/to/FediFetcher
|
||||
python3 find_posts.py \
|
||||
--access-token=TOKEN1 \
|
||||
--access-token=TOKEN2 \
|
||||
--access-token=TOKEN3 \
|
||||
--server=your.server.social \
|
||||
--home-timeline-length=200 \
|
||||
--max-followings=80 \
|
||||
--from-notifications=1 \
|
||||
--lock-hours=1
|
25
examples/FediFetcher.sh
Normal file
25
examples/FediFetcher.sh
Normal file
|
@ -0,0 +1,25 @@
|
|||
# This script is a sample script that you can schedule
|
||||
# to run every 10 minutes from your cron job.
|
||||
# Supply any other arguments, as you see fit.
|
||||
|
||||
|
||||
# Sample schedule:
|
||||
# */10 * * * * /usr/bin/bash /path/to/FediFetcher.sh
|
||||
|
||||
###################### IMPORTANT ######################
|
||||
# #
|
||||
# YOU SHOULD RUN THIS SCRIPT MANUALLY AT LEAST ONCE #
|
||||
# WITH YOUR CHOSEN ARGUMENTS, TO AVOID CONCURRENT #
|
||||
# EXECUTIONS OF FEDIFETCHER! #
|
||||
# #
|
||||
###################### IMPORTANT ######################
|
||||
|
||||
|
||||
cd /path/to/FediFetcher
|
||||
python find_posts.py \
|
||||
--access-token=TOKEN \
|
||||
--server=your.server.social \
|
||||
--home-timeline-length=200 \
|
||||
--max-followings=80 \
|
||||
--from-notifications=1 \
|
||||
--lock-hours=1
|
19
examples/docker-compose.yaml
Normal file
19
examples/docker-compose.yaml
Normal file
|
@ -0,0 +1,19 @@
|
|||
name: fedifetcher
|
||||
services:
|
||||
fedifetcher:
|
||||
stdin_open: true
|
||||
tty: true
|
||||
image: ghcr.io/nanos/fedifetcher:latest
|
||||
command: "--access-token=<TOKEN> --server=<SERVER>"
|
||||
# Persist our data
|
||||
volumes:
|
||||
- ./data:/app/artifacts
|
||||
# Use the `deploy` option to enable `restart_policy`
|
||||
deploy:
|
||||
# Don't go above 1 replica to avoid multiple overlapping executions of the script
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
# The `any` condition means even after successful runs, we'll restart the script
|
||||
condition: any
|
||||
# Specify how often the script should run - for example; after 1 hour.
|
||||
delay: 1h
|
|
@ -2,7 +2,7 @@
|
|||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: mastodon-get-replies-pvc
|
||||
name: fedifetcher-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
|
@ -14,7 +14,7 @@ spec:
|
|||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: mastodon-get-replies
|
||||
name: fedifetcher
|
||||
spec:
|
||||
# Run every 2 hours
|
||||
schedule: "0 */2 * * *"
|
||||
|
@ -28,21 +28,19 @@ spec:
|
|||
volumes:
|
||||
- name: artifacts
|
||||
persistentVolumeClaim:
|
||||
claimName: mastodon-get-replies-pvc
|
||||
claimName: fedifetcher-pvc
|
||||
containers:
|
||||
- name: mastodon-get-replies
|
||||
image: ghcr.io/nanos/mastodon_get_replies:latest
|
||||
- name: fedifetcher
|
||||
image: ghcr.io/nanos/fedifetcher:latest
|
||||
args:
|
||||
- --server=your.server.social
|
||||
- --access-token=TOKEN
|
||||
- --home-timeline-length
|
||||
- "200"
|
||||
- --reply-interval-in-hours
|
||||
- "24"
|
||||
- --max-followings
|
||||
- "80"
|
||||
- --max-followers
|
||||
- "80"
|
||||
- --from-notification
|
||||
- "4"
|
||||
volumeMounts:
|
||||
- name: artifacts
|
||||
mountPath: /app/artifacts
|
||||
|
|
976
find_posts.py
976
find_posts.py
File diff suppressed because it is too large
Load diff
|
@ -2,7 +2,9 @@ certifi==2022.12.7
|
|||
charset-normalizer==3.0.1
|
||||
docutils==0.19
|
||||
idna==3.4
|
||||
python-dateutil==2.8.2
|
||||
requests==2.28.2
|
||||
six==1.16.0
|
||||
smmap==5.0.0
|
||||
urllib3==1.26.14
|
||||
python-dateutil==2.8.2
|
||||
defusedxml==0.7.1
|
||||
|
|
Loading…
Add table
Reference in a new issue