improved error handling

Fixes #2
This commit is contained in:
Michael Thomas 2023-03-09 09:01:33 +00:00
parent dee0804188
commit f1dad28084

View file

@ -18,6 +18,8 @@ def pull_context(
reply_interval_hours, reply_interval_hours,
max_home_timeline_length, max_home_timeline_length,
): ):
parsed_urls = {}
if reply_interval_hours > 0: if reply_interval_hours > 0:
"""pull the context toots of toots user replied to, from their """pull the context toots of toots user replied to, from their
@ -26,10 +28,10 @@ def pull_context(
reply_toots = get_all_reply_toots( reply_toots = get_all_reply_toots(
server, user_ids, access_token, seen_urls, reply_interval_hours server, user_ids, access_token, seen_urls, reply_interval_hours
) )
known_context_urls = get_all_known_context_urls(server, reply_toots) known_context_urls = get_all_known_context_urls(server, reply_toots,parsed_urls)
seen_urls.update(known_context_urls) seen_urls.update(known_context_urls)
replied_toot_ids = get_all_replied_toot_server_ids( replied_toot_ids = get_all_replied_toot_server_ids(
server, reply_toots, replied_toot_server_ids server, reply_toots, replied_toot_server_ids, parsed_urls
) )
context_urls = get_all_context_urls(server, replied_toot_ids) context_urls = get_all_context_urls(server, replied_toot_ids)
add_context_urls(server, access_token, context_urls, seen_urls) add_context_urls(server, access_token, context_urls, seen_urls)
@ -38,7 +40,7 @@ def pull_context(
if max_home_timeline_length > 0: if max_home_timeline_length > 0:
"""Do the same with any toots on the key owner's home timeline """ """Do the same with any toots on the key owner's home timeline """
timeline_toots = get_timeline(server, access_token, max_home_timeline_length) timeline_toots = get_timeline(server, access_token, max_home_timeline_length)
known_context_urls = get_all_known_context_urls(server, timeline_toots) known_context_urls = get_all_known_context_urls(server, timeline_toots,parsed_urls)
add_context_urls(server, access_token, known_context_urls, seen_urls) add_context_urls(server, access_token, known_context_urls, seen_urls)
def get_timeline(server, access_token, max): def get_timeline(server, access_token, max):
@ -187,15 +189,15 @@ def get_reply_toots(user_id, server, access_token, seen_urls, reply_since):
) )
def get_all_known_context_urls(server, reply_toots): def get_all_known_context_urls(server, reply_toots,parsed_urls):
"""get the context toots of the given toots from their original server""" """get the context toots of the given toots from their original server"""
known_context_urls = set( known_context_urls = set(
filter( filter(
lambda url: not url.startswith(f"https://{server}/"), lambda url: not url.startswith(f"https://{server}/"),
itertools.chain.from_iterable( itertools.chain.from_iterable(
get_toot_context(*parse_url(toot["url"] if toot["reblog"] is None else toot["reblog"]["url"]), toot["url"]) get_toot_context(*parse_url(toot["url"] if toot["reblog"] is None else toot["reblog"]["url"],parsed_urls), toot["url"])
for toot in filter( for toot in filter(
toot_has_parseable_url, lambda toot: toot_has_parseable_url(toot,parsed_urls),
reply_toots reply_toots
) )
), ),
@ -205,27 +207,27 @@ def get_all_known_context_urls(server, reply_toots):
return known_context_urls return known_context_urls
def toot_has_parseable_url(toot): def toot_has_parseable_url(toot,parsed_urls):
parsed = parse_url(toot["url"] if toot["reblog"] is None else toot["reblog"]["url"]) parsed = parse_url(toot["url"] if toot["reblog"] is None else toot["reblog"]["url"],parsed_urls)
if(parsed is None) : if(parsed is None) :
return False return False
return True return True
def get_all_replied_toot_server_ids( def get_all_replied_toot_server_ids(
server, reply_toots, replied_toot_server_ids server, reply_toots, replied_toot_server_ids, parsed_urls
): ):
"""get the server and ID of the toots the given toots replied to""" """get the server and ID of the toots the given toots replied to"""
return filter( return filter(
lambda x: x is not None, lambda x: x is not None,
( (
get_replied_toot_server_id(server, toot, replied_toot_server_ids) get_replied_toot_server_id(server, toot, replied_toot_server_ids, parsed_urls)
for toot in reply_toots for toot in reply_toots
), ),
) )
def get_replied_toot_server_id(server, toot, replied_toot_server_ids): def get_replied_toot_server_id(server, toot, replied_toot_server_ids,parsed_urls):
"""get the server and ID of the toot the given toot replied to""" """get the server and ID of the toot the given toot replied to"""
in_reply_to_id = toot["in_reply_to_id"] in_reply_to_id = toot["in_reply_to_id"]
in_reply_to_account_id = toot["in_reply_to_account_id"] in_reply_to_account_id = toot["in_reply_to_account_id"]
@ -248,7 +250,7 @@ def get_replied_toot_server_id(server, toot, replied_toot_server_ids):
if url is None: if url is None:
return None return None
match = parse_url(url) match = parse_url(url,parsed_urls)
if match is not None: if match is not None:
replied_toot_server_ids[o_url] = (url, match) replied_toot_server_ids[o_url] = (url, match)
return (url, match) return (url, match)
@ -257,17 +259,22 @@ def get_replied_toot_server_id(server, toot, replied_toot_server_ids):
replied_toot_server_ids[o_url] = None replied_toot_server_ids[o_url] = None
return None return None
def parse_url(url): def parse_url(url, parsed_urls):
match = parse_mastodon_url(url) if url not in parsed_urls:
if match is not None: match = parse_mastodon_url(url)
return match if match is not None:
parsed_urls[url] = match
if url not in parsed_urls:
match = parse_pleroma_url(url)
if match is not None:
parsed_urls[url] = match
match = parse_pleroma_url(url) if url not in parsed_urls:
if match is not None: print(f"Error parsing toot URL {url}")
return match parsed_urls[url] = None
print(f"Error parsing toot URL {url}") return parsed_urls[url]
return None
def parse_mastodon_url(url): def parse_mastodon_url(url):
"""parse a Mastodon URL and return the server and ID""" """parse a Mastodon URL and return the server and ID"""