Compare commits
23 commits
Author | SHA1 | Date | |
---|---|---|---|
|
205b0731db | ||
|
bf0ed943ec | ||
|
f5c1033fc9 | ||
|
ca302bb8db | ||
|
34d07a4fa1 | ||
|
e86863a8ae | ||
|
e4fca0d67e | ||
|
fe1c69f3ba | ||
|
0416cc159a | ||
|
52d3b8d9e9 | ||
|
3d8ab95f11 | ||
|
a8dc809787 | ||
|
099ef7d37a | ||
|
f69eaed5a6 | ||
|
7be5dfb9b1 | ||
|
95b644d431 | ||
|
bed11e83f1 | ||
|
dafaf93d50 | ||
|
31f475dcdd | ||
|
a76b52642d | ||
|
0744caad6f | ||
|
adc0d4ec4e | ||
|
db2dcce2ff |
6 changed files with 55 additions and 38 deletions
10
.github/workflows/build-container.yaml
vendored
10
.github/workflows/build-container.yaml
vendored
|
@ -9,11 +9,12 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to GHCR
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
if: github.event_name != 'pull_request'
|
||||
with:
|
||||
registry: ghcr.io
|
||||
|
@ -21,9 +22,10 @@ jobs:
|
|||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository_owner }}/fedifetcher:${{ github.ref_name }}
|
||||
ghcr.io/${{ github.repository_owner }}/fedifetcher:latest
|
||||
|
|
12
.github/workflows/get_context.yml
vendored
12
.github/workflows/get_context.yml
vendored
|
@ -12,17 +12,17 @@ jobs:
|
|||
environment: mastodon
|
||||
steps:
|
||||
- name: Checkout original repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- run: pip install -r requirements.txt
|
||||
- name: Download all workflow run artifacts
|
||||
uses: dawidd6/action-download-artifact@v2
|
||||
uses: dawidd6/action-download-artifact@v3
|
||||
with:
|
||||
name: artifacts
|
||||
workflow: get_context.yml
|
||||
|
@ -32,12 +32,12 @@ jobs:
|
|||
run: ls -lR
|
||||
- run: python find_posts.py --lock-hours=0 --access-token=${{ secrets.ACCESS_TOKEN }} -c="./config.json"
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: artifacts
|
||||
path: |
|
||||
artifacts
|
||||
- name: Checkout user's forked repository for keeping workflow alive
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Keep workflow alive
|
||||
uses: gautamkrishnar/keepalive-workflow@v1
|
||||
uses: gautamkrishnar/keepalive-workflow@v1
|
||||
|
|
|
@ -97,6 +97,8 @@ Persistent files are stored in `/app/artifacts` within the container, so you may
|
|||
|
||||
An [example Kubernetes CronJob](./examples/k8s-cronjob.yaml) for running the container is included in the `examples` folder.
|
||||
|
||||
An [example Docker Compose Script](./examples/docker-compose.yaml) for running the container periodically is included in the `examples` folder.
|
||||
|
||||
### Configuration options
|
||||
|
||||
FediFetcher has quite a few configuration options, so here is my quick configuration advice, that should probably work for most people:
|
||||
|
|
19
examples/docker-compose.yaml
Normal file
19
examples/docker-compose.yaml
Normal file
|
@ -0,0 +1,19 @@
|
|||
name: fedifetcher
|
||||
services:
|
||||
fedifetcher:
|
||||
stdin_open: true
|
||||
tty: true
|
||||
image: ghcr.io/nanos/fedifetcher:latest
|
||||
command: "--access-token=<TOKEN> --server=<SERVER>"
|
||||
# Persist our data
|
||||
volumes:
|
||||
- ./data:/app/artifacts
|
||||
# Use the `deploy` option to enable `restart_policy`
|
||||
deploy:
|
||||
# Don't go above 1 replica to avoid multiple overlapping executions of the script
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
# The `any` condition means even after successful runs, we'll restart the script
|
||||
condition: any
|
||||
# Specify how often the script should run - for example; after 1 hour.
|
||||
delay: 1h
|
|
@ -14,7 +14,7 @@ spec:
|
|||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: FediFetcher
|
||||
name: fedifetcher
|
||||
spec:
|
||||
# Run every 2 hours
|
||||
schedule: "0 */2 * * *"
|
||||
|
@ -30,7 +30,7 @@ spec:
|
|||
persistentVolumeClaim:
|
||||
claimName: fedifetcher-pvc
|
||||
containers:
|
||||
- name: FediFetcher
|
||||
- name: fedifetcher
|
||||
image: ghcr.io/nanos/fedifetcher:latest
|
||||
args:
|
||||
- --server=your.server.social
|
||||
|
|
|
@ -137,7 +137,7 @@ def get_user_posts_mastodon(userName, webserver):
|
|||
try:
|
||||
user_id = get_user_id(webserver, userName)
|
||||
except Exception as ex:
|
||||
log(f"Error getting user ID for user {user['acct']}: {ex}")
|
||||
log(f"Error getting user ID for user {userName}: {ex}")
|
||||
return None
|
||||
|
||||
try:
|
||||
|
@ -148,14 +148,14 @@ def get_user_posts_mastodon(userName, webserver):
|
|||
return response.json()
|
||||
elif response.status_code == 404:
|
||||
raise Exception(
|
||||
f"User {user['acct']} was not found on server {webserver}"
|
||||
f"User {userName} was not found on server {webserver}"
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
f"Error getting URL {url}. Status code: {response.status_code}"
|
||||
)
|
||||
except Exception as ex:
|
||||
log(f"Error getting posts for user {user['acct']}: {ex}")
|
||||
log(f"Error getting posts for user {userName}: {ex}")
|
||||
return None
|
||||
|
||||
def get_user_posts_lemmy(userName, userUrl, webserver):
|
||||
|
@ -556,6 +556,11 @@ def parse_url(url, parsed_urls):
|
|||
match = parse_mastodon_url(url)
|
||||
if match is not None:
|
||||
parsed_urls[url] = match
|
||||
|
||||
if url not in parsed_urls:
|
||||
match = parse_mastodon_uri(url)
|
||||
if match is not None:
|
||||
parsed_urls[url] = match
|
||||
|
||||
if url not in parsed_urls:
|
||||
match = parse_pleroma_url(url)
|
||||
|
@ -601,6 +606,14 @@ def parse_mastodon_url(url):
|
|||
return (match.group("server"), match.group("toot_id"))
|
||||
return None
|
||||
|
||||
def parse_mastodon_uri(uri):
|
||||
"""parse a Mastodon URI and return the server and ID"""
|
||||
match = re.match(
|
||||
r"https://(?P<server>[^/]+)/users/(?P<username>[^/]+)/statuses/(?P<toot_id>[^/]+)", uri
|
||||
)
|
||||
if match is not None:
|
||||
return (match.group("server"), match.group("toot_id"))
|
||||
return None
|
||||
|
||||
def parse_pleroma_url(url):
|
||||
"""parse a Pleroma URL and return the server and ID"""
|
||||
|
@ -733,11 +746,6 @@ def get_mastodon_urls(webserver, toot_id, toot_url):
|
|||
except Exception as ex:
|
||||
log(f"Error parsing context for toot {toot_url}. Exception: {ex}")
|
||||
return []
|
||||
elif resp.status_code == 429:
|
||||
reset = datetime.strptime(resp.headers['x-ratelimit-reset'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
log(f"Rate Limit hit when getting context for {toot_url}. Waiting to retry at {resp.headers['x-ratelimit-reset']}")
|
||||
time.sleep((reset - datetime.now()).total_seconds() + 1)
|
||||
return get_mastodon_urls(webserver, toot_id, toot_url)
|
||||
|
||||
log(
|
||||
f"Error getting context for toot {toot_url}. Status code: {resp.status_code}"
|
||||
|
@ -770,11 +778,6 @@ def get_lemmy_comment_context(webserver, toot_id, toot_url):
|
|||
except Exception as ex:
|
||||
log(f"Error parsing context for comment {toot_url}. Exception: {ex}")
|
||||
return []
|
||||
elif resp.status_code == 429:
|
||||
reset = datetime.strptime(resp.headers['x-ratelimit-reset'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
log(f"Rate Limit hit when getting context for {toot_url}. Waiting to retry at {resp.headers['x-ratelimit-reset']}")
|
||||
time.sleep((reset - datetime.now()).total_seconds() + 1)
|
||||
return get_lemmy_comment_context(webserver, toot_id, toot_url)
|
||||
|
||||
def get_lemmy_comments_urls(webserver, post_id, toot_url):
|
||||
"""get the URLs of the comments of the given post"""
|
||||
|
@ -811,11 +814,6 @@ def get_lemmy_comments_urls(webserver, post_id, toot_url):
|
|||
return urls
|
||||
except Exception as ex:
|
||||
log(f"Error parsing comments for post {toot_url}. Exception: {ex}")
|
||||
elif resp.status_code == 429:
|
||||
reset = datetime.strptime(resp.headers['x-ratelimit-reset'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
log(f"Rate Limit hit when getting comments for {toot_url}. Waiting to retry at {resp.headers['x-ratelimit-reset']}")
|
||||
time.sleep((reset - datetime.now()).total_seconds() + 1)
|
||||
return get_lemmy_comments_urls(webserver, post_id, toot_url)
|
||||
|
||||
log(f"Error getting comments for post {toot_url}. Status code: {resp.status_code}")
|
||||
return []
|
||||
|
@ -901,11 +899,6 @@ def add_context_url(url, server, access_token):
|
|||
"Make sure you have the read:search scope enabled for your access token."
|
||||
)
|
||||
return False
|
||||
elif resp.status_code == 429:
|
||||
reset = datetime.strptime(resp.headers['x-ratelimit-reset'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
log(f"Rate Limit hit when adding url {search_url}. Waiting to retry at {resp.headers['x-ratelimit-reset']}")
|
||||
time.sleep((reset - datetime.now()).total_seconds() + 1)
|
||||
return add_context_url(url, server, access_token)
|
||||
else:
|
||||
log(
|
||||
f"Error adding url {search_url} to server {server}. Status code: {resp.status_code}"
|
||||
|
@ -1142,6 +1135,7 @@ def get_nodeinfo(server, seen_hosts, host_meta_fallback = False):
|
|||
return None
|
||||
|
||||
if resp.status_code == 200:
|
||||
nodeLoc = None
|
||||
try:
|
||||
nodeInfo = resp.json()
|
||||
for link in nodeInfo['links']:
|
||||
|
@ -1174,7 +1168,7 @@ def get_nodeinfo(server, seen_hosts, host_meta_fallback = False):
|
|||
|
||||
# return early if the web domain has been seen previously (in cases with host-meta lookups)
|
||||
if server in seen_hosts:
|
||||
return seen_hosts[server]
|
||||
return seen_hosts.get(server)
|
||||
|
||||
try:
|
||||
resp = get(nodeLoc, timeout = 30)
|
||||
|
@ -1224,8 +1218,8 @@ def get_server_info(server, seen_hosts):
|
|||
def set_server_apis(server):
|
||||
# support for new server software should be added here
|
||||
software_apis = {
|
||||
'mastodonApiSupport': ['mastodon', 'pleroma', 'akkoma', 'pixelfed', 'hometown'],
|
||||
'misskeyApiSupport': ['misskey', 'calckey', 'firefish', 'foundkey'],
|
||||
'mastodonApiSupport': ['mastodon', 'pleroma', 'akkoma', 'pixelfed', 'hometown', 'iceshrimp'],
|
||||
'misskeyApiSupport': ['misskey', 'calckey', 'firefish', 'foundkey', 'sharkey'],
|
||||
'lemmyApiSupport': ['lemmy']
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue