add hacky support for misskey and calckey/firefish
This commit is contained in:
parent
213ef57abe
commit
c169b2ae30
1 changed files with 136 additions and 3 deletions
139
find_posts.py
139
find_posts.py
|
@ -75,7 +75,7 @@ def add_user_posts(server, access_token, followings, know_followings, all_known_
|
||||||
count = 0
|
count = 0
|
||||||
failed = 0
|
failed = 0
|
||||||
for post in posts:
|
for post in posts:
|
||||||
if post.get('reblog') is None and post.get('url') is not None and post.get('url') not in seen_urls:
|
if post.get('reblog') is None and post.get('renoteId') is None and post.get('url') is not None and post.get('url') not in seen_urls:
|
||||||
added = add_post_with_context(post, server, access_token, seen_urls)
|
added = add_post_with_context(post, server, access_token, seen_urls)
|
||||||
if added is True:
|
if added is True:
|
||||||
seen_urls.add(post['url'])
|
seen_urls.add(post['url'])
|
||||||
|
@ -150,7 +150,11 @@ def get_user_posts(user, know_followings, server):
|
||||||
user_id = get_user_id(parsed_url[0], parsed_url[1])
|
user_id = get_user_id(parsed_url[0], parsed_url[1])
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
log(f"Error getting user ID for user {user['acct']}: {ex}")
|
log(f"Error getting user ID for user {user['acct']}: {ex}")
|
||||||
return None
|
|
||||||
|
## HACK HACK HACK: in the future actually try to detect the correct APIs to use at runtime
|
||||||
|
log('trying misskey')
|
||||||
|
|
||||||
|
return get_user_posts_misskey(user, know_followings, parsed_url[0])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
url = f"https://{parsed_url[0]}/api/v1/accounts/{user_id}/statuses?limit=40"
|
url = f"https://{parsed_url[0]}/api/v1/accounts/{user_id}/statuses?limit=40"
|
||||||
|
@ -169,7 +173,57 @@ def get_user_posts(user, know_followings, server):
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
log(f"Error getting posts for user {user['acct']}: {ex}")
|
log(f"Error getting posts for user {user['acct']}: {ex}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_user_posts_misskey(user, know_followings, server):
|
||||||
|
# query user info via search api
|
||||||
|
# we could filter by host but there's no way to limit that to just the main host on firefish currently
|
||||||
|
# on misskey it works if you supply '.' as the host but firefish does not
|
||||||
|
url = f'https://{server}/api/users/search-by-username-and-host'
|
||||||
|
try:
|
||||||
|
resp = post(url, { 'username': user['username'] })
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error finding user {user['username']} from {server}. Exception: {ex}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if resp.status_code == 200:
|
||||||
|
try:
|
||||||
|
res = resp.json()
|
||||||
|
for user in res:
|
||||||
|
if user['host'] is None:
|
||||||
|
userId = user['id']
|
||||||
|
break
|
||||||
|
if userId is None:
|
||||||
|
raise Exception('user not found on server in search')
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error finding user {user['username']} from {server}. Exception: {ex}")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
log(f"Error finding user {user['username']} from {server}. Status Code: {resp.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
url = f'https://{server}/api/users/notes'
|
||||||
|
try:
|
||||||
|
resp = post(url, { 'userId': userId, 'limit': 40 })
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error getting posts by user {user['username']} from {server}. Exception: {ex}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if resp.status_code == 200:
|
||||||
|
try:
|
||||||
|
notes = resp.json()
|
||||||
|
for note in notes:
|
||||||
|
if note.get('url') is None:
|
||||||
|
# add this to make it look like Mastodon status objects
|
||||||
|
note.update({ 'url': f"https://{server}/notes/{note['id']}" })
|
||||||
|
return notes
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error getting posts by user {user['username']} from {server}. Exception: {ex}")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
log(f"Error getting posts by user {user['username']} from {server}. Status Code: {resp.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_new_follow_requests(server, access_token, max, known_followings):
|
def get_new_follow_requests(server, access_token, max, known_followings):
|
||||||
"""Get any new follow requests for the specified user, up to the max number provided"""
|
"""Get any new follow requests for the specified user, up to the max number provided"""
|
||||||
|
|
||||||
|
@ -504,6 +558,11 @@ def parse_url(url, parsed_urls):
|
||||||
if match is not None:
|
if match is not None:
|
||||||
parsed_urls[url] = match
|
parsed_urls[url] = match
|
||||||
|
|
||||||
|
if url not in parsed_urls:
|
||||||
|
match = parse_misskey_url(url)
|
||||||
|
if match is not None:
|
||||||
|
parsed_urls[url] = match
|
||||||
|
|
||||||
if url not in parsed_urls:
|
if url not in parsed_urls:
|
||||||
log(f"Error parsing toot URL {url}")
|
log(f"Error parsing toot URL {url}")
|
||||||
parsed_urls[url] = None
|
parsed_urls[url] = None
|
||||||
|
@ -560,6 +619,15 @@ def parse_pixelfed_url(url):
|
||||||
return (match.group("server"), match.group("toot_id"))
|
return (match.group("server"), match.group("toot_id"))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def parse_misskey_url(url):
|
||||||
|
"""parse a Misskey URL and return the server and ID"""
|
||||||
|
match = re.match(
|
||||||
|
r"https://(?P<server>[^/]+)/notes/(?P<toot_id>[^/]+)", url
|
||||||
|
)
|
||||||
|
if match is not None:
|
||||||
|
return (match.group("server"), match.group("toot_id"))
|
||||||
|
return None
|
||||||
|
|
||||||
def parse_pixelfed_profile_url(url):
|
def parse_pixelfed_profile_url(url):
|
||||||
"""parse a Pixelfed Profile URL and return the server and username"""
|
"""parse a Pixelfed Profile URL and return the server and username"""
|
||||||
match = re.match(r"https://(?P<server>[^/]+)/(?P<username>[^/]+)", url)
|
match = re.match(r"https://(?P<server>[^/]+)/(?P<username>[^/]+)", url)
|
||||||
|
@ -623,6 +691,8 @@ def get_toot_context(server, toot_id, toot_url):
|
||||||
return get_comment_context(server, toot_id, toot_url)
|
return get_comment_context(server, toot_id, toot_url)
|
||||||
if toot_url.find("/post/") != -1:
|
if toot_url.find("/post/") != -1:
|
||||||
return get_comments_urls(server, toot_id, toot_url)
|
return get_comments_urls(server, toot_id, toot_url)
|
||||||
|
if toot_url.find("/notes/") != -1:
|
||||||
|
return get_misskey_urls(server, toot_id, toot_url)
|
||||||
url = f"https://{server}/api/v1/statuses/{toot_id}/context"
|
url = f"https://{server}/api/v1/statuses/{toot_id}/context"
|
||||||
try:
|
try:
|
||||||
resp = get(url)
|
resp = get(url)
|
||||||
|
@ -716,6 +786,47 @@ def get_comments_urls(server, post_id, toot_url):
|
||||||
log(f"Error getting comments for post {toot_url}. Status code: {resp.status_code}")
|
log(f"Error getting comments for post {toot_url}. Status code: {resp.status_code}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def get_misskey_urls(server, post_id, toot_url):
|
||||||
|
"""get the URLs of the comments of a given misskey post"""
|
||||||
|
urls = []
|
||||||
|
url = f"https://{server}/api/notes/children"
|
||||||
|
try:
|
||||||
|
resp = post(url, { 'noteId': post_id })
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error getting post {post_id} from {toot_url}. Exception: {ex}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
if resp.status_code == 200:
|
||||||
|
try:
|
||||||
|
res = resp.json()
|
||||||
|
log(f"Got children for misskey post {toot_url}")
|
||||||
|
list_of_urls = [f'https://{server}/notes/{comment_info["id"]}' for comment_info in res]
|
||||||
|
urls.extend(list_of_urls)
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error parsing post {post_id} from {toot_url}. Exception: {ex}")
|
||||||
|
else:
|
||||||
|
log(f"Error getting post {post_id} from {toot_url}. Status Code: {resp.status_code}")
|
||||||
|
|
||||||
|
url = f"https://{server}/api/notes/conversation"
|
||||||
|
try:
|
||||||
|
resp = post(url, { 'noteId': post_id })
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error getting post {post_id} from {toot_url}. Exception: {ex}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
if resp.status_code == 200:
|
||||||
|
try:
|
||||||
|
res = resp.json()
|
||||||
|
log(f"Got conversation for misskey post {toot_url}")
|
||||||
|
list_of_urls = [f'https://{server}/notes/{comment_info["id"]}' for comment_info in res]
|
||||||
|
urls.extend(list_of_urls)
|
||||||
|
except Exception as ex:
|
||||||
|
log(f"Error parsing post {post_id} from {toot_url}. Exception: {ex}")
|
||||||
|
else:
|
||||||
|
log(f"Error getting post {post_id} from {toot_url}. Status Code: {resp.status_code}")
|
||||||
|
|
||||||
|
return urls
|
||||||
|
|
||||||
def add_context_urls(server, access_token, context_urls, seen_urls):
|
def add_context_urls(server, access_token, context_urls, seen_urls):
|
||||||
"""add the given toot URLs to the server"""
|
"""add the given toot URLs to the server"""
|
||||||
count = 0
|
count = 0
|
||||||
|
@ -845,6 +956,28 @@ def get(url, headers = {}, timeout = 0, max_tries = 5):
|
||||||
raise Exception(f"Maximum number of retries exceeded for rate limited request {url}")
|
raise Exception(f"Maximum number of retries exceeded for rate limited request {url}")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
def post(url, json, headers = {}, timeout = 0, max_tries = 5):
|
||||||
|
"""A simple wrapper to make a post request while providing our user agent, and respecting rate limits"""
|
||||||
|
h = headers.copy()
|
||||||
|
if 'User-Agent' not in h:
|
||||||
|
h['User-Agent'] = 'FediFetcher (https://go.thms.uk/mgr)'
|
||||||
|
|
||||||
|
if timeout == 0:
|
||||||
|
timeout = arguments.http_timeout
|
||||||
|
|
||||||
|
response = requests.post( url, json=json, headers= h, timeout=timeout)
|
||||||
|
if response.status_code == 429:
|
||||||
|
if max_tries > 0:
|
||||||
|
reset = parser.parse(response.headers['x-ratelimit-reset'])
|
||||||
|
now = datetime.now(datetime.now().astimezone().tzinfo)
|
||||||
|
wait = (reset - now).total_seconds() + 1
|
||||||
|
log(f"Rate Limit hit requesting {url}. Waiting {wait} sec to retry at {response.headers['x-ratelimit-reset']}")
|
||||||
|
time.sleep(wait)
|
||||||
|
return post(url, json, headers, timeout, max_tries - 1)
|
||||||
|
|
||||||
|
raise Exception(f"Maximum number of retries exceeded for rate limited request {url}")
|
||||||
|
return response
|
||||||
|
|
||||||
def log(text):
|
def log(text):
|
||||||
print(f"{datetime.now()} {datetime.now().astimezone().tzinfo}: {text}")
|
print(f"{datetime.now()} {datetime.now().astimezone().tzinfo}: {text}")
|
||||||
|
|
||||||
|
@ -1123,4 +1256,4 @@ if __name__ == "__main__":
|
||||||
get(f"{arguments.on_fail}?rid={runId}")
|
get(f"{arguments.on_fail}?rid={runId}")
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
log(f"Error getting callback url: {ex}")
|
log(f"Error getting callback url: {ex}")
|
||||||
raise
|
raise
|
||||||
|
|
Loading…
Reference in a new issue