inoreader2readwise/job/main.py

184 lines
5.8 KiB
Python
Raw Permalink Normal View History

2024-01-17 11:14:23 +01:00
import os
import time
import json
import requests
import logging
2024-01-17 11:14:23 +01:00
DATA_STORE_PATH = "/data/last_update_time.txt"
2024-01-24 12:29:35 +01:00
DATABASE_URL = os.getenv("DATABASE_URL")
logging.basicConfig(level=logging.INFO)
class APIHandler:
2024-01-17 12:25:43 +01:00
def __init__(self, base_url, headers={}):
self.base_url = base_url
2024-01-17 12:25:43 +01:00
self.headers = headers
def get(self, endpoint, params=None):
2024-01-17 12:25:43 +01:00
response = requests.get(self.base_url + endpoint, params=params, headers=self.headers)
response.raise_for_status()
return response.json()
def post(self, endpoint, data=None):
2024-01-17 12:25:43 +01:00
response = requests.post(self.base_url + endpoint, data=json.dumps(data), headers=self.headers)
response.raise_for_status()
return response.status_code
2024-01-17 11:14:23 +01:00
def get_last_update_time():
with open(DATA_STORE_PATH, 'r') as file:
return int(file.read().strip())
2024-01-17 11:14:23 +01:00
def update_last_update_time(new_time):
with open(DATA_STORE_PATH, 'w') as file:
file.write(str(new_time))
2024-01-17 11:14:23 +01:00
2024-01-17 13:01:17 +01:00
def get_new_annotations(last_annotation_time):
inoreader = APIHandler(
"https://www.inoreader.com/reader/api/0/stream/contents",
2024-01-17 12:25:43 +01:00
headers = {
2024-01-24 12:29:35 +01:00
'Authorization': 'Bearer ' + get_inoreader_access_token()
2024-01-17 13:01:17 +01:00
}
)
all_annotations = []
continuation = None
while True:
params = {
2024-01-17 13:01:17 +01:00
"annotations": 1,
"n": 100,
}
if continuation:
params["c"] = continuation
inoreader_response = inoreader.get(
"/user/-/state/com.google/annotated",
params=params
)
data = json.loads(inoreader_response)
for item in data["items"]:
annotations = item.get("annotations", [])
for annotation in annotations:
annotation['title'] = item['title']
annotation['author'] = item['author']
annotation['sources'] = item['canonical']
all_annotations.append(annotation)
if 'continuation' in data:
continuation = data['continuation']
time.sleep(900) # Sleep for 15 minutes between pages
else:
break
2024-01-17 13:01:17 +01:00
return [annotation for annotation in all_annotations if annotation['added_on'] > last_annotation_time]
2024-01-17 12:25:43 +01:00
2024-01-17 13:01:17 +01:00
def push_annotations_to_readwise(annotations):
readwise = APIHandler(
"https://readwise.io",
2024-01-17 12:25:43 +01:00
headers = {
'Authorization': 'Token ' + os.getenv("READWISE_ACCESS_TOKEN"),
'Content-Type': 'application/json'
}
)
2024-01-17 11:14:23 +01:00
2024-01-17 13:01:17 +01:00
readwise.post(
"/api/v2/highlights/",
data={
'highlights': [
{
'text': annotation['text'],
'title': annotation['title'],
'author': annotation['author'],
'note': annotation['note'],
'highlighted_at': annotation['added_on'],
'category': 'articles',
'source_url': annotation['sources'][0]['href'] if annotation['sources'] else None,
2024-01-17 13:01:17 +01:00
}
for annotation in annotations
]
}
)
2024-01-24 12:29:35 +01:00
def get_inoreader_access_token():
response = requests.get(f'{DATABASE_URL}/token/latest')
response.raise_for_status()
if response.status_code == 204:
# throw error - not logged in. Please log in first through the web app
raise Exception("Not logged in. Please log in first through the web app")
elif response.status_code == 200:
resp_json = response.json()
if resp_json['token']['expiration_seconds'] + resp_json['token']['timestamp'] > datetime.now().timestamp():
return resp_json['token']['access_token']
else:
return refresh_inoreader_access_token(resp_json['token']['refresh_token'])
access_token = get_token_from_database()
if not access_token:
access_token = refresh_inoreader_access_token()
if not access_token:
raise Exception("Unable to get access token. Try logging in again through the web app")
return access_token
def refresh_inoreader_access_token(refresh_token):
response = requests.post(
'https://www.inoreader.com/oauth2/token',
headers={
'Content-Type': 'application/x-www-form-urlencoded',
},
data={
'refresh_token': refresh_token,
'client_id': os.getenv("INOREADER_CLIENT_ID"),
'client_secret': os.getenv("INOREADER_CLIENT_SECRET"),
'grant_type': 'refresh_token'
}
)
response.raise_for_status()
tokens = response.json()
# Save tokens for later use
save_tokens(tokens['access_token'], tokens['refresh_token'], tokens['expires_in'])
return tokens['access_token']
def save_tokens(access_token, refresh_token, expiration_seconds):
response = requests.post(
f'{DATABASE_URL}/token',
headers={
'Content-Type': 'application/json'
},
json={
'access_token': access_token,
'refresh_token': refresh_token,
'expiration_seconds': expiration_seconds
}
)
response.raise_for_status()
2024-01-17 13:01:17 +01:00
def main():
2024-01-17 11:14:23 +01:00
while True:
try:
last_annotation_time = get_last_update_time()
2024-01-17 13:01:17 +01:00
new_annotations = get_new_annotations(last_annotation_time)
if new_annotations:
latest_added_on = max(annotation['added_on'] for annotation in new_annotations)
2024-01-17 13:01:17 +01:00
push_annotations_to_readwise(new_annotations)
update_last_update_time(latest_added_on)
else:
logging.info("No new annotations found")
2024-01-17 11:14:23 +01:00
time.sleep(86400) # Sleep for 24 hours
2024-01-17 11:14:23 +01:00
except Exception as e:
logging.error(f"An error occurred: {e}")
time.sleep(3600) # Sleep for 1 hour in case of error
2024-01-17 11:14:23 +01:00
if __name__ == "__main__":
main()