diff --git a/job/main.py b/job/main.py new file mode 100644 index 0000000..6039ad1 --- /dev/null +++ b/job/main.py @@ -0,0 +1,242 @@ +import os +import time +import json +import requests +import logging + +DATA_STORE_PATH = "/data/last_update_time.txt" + +DATABASE_URL = os.getenv("DATABASE_URL") + +logging.basicConfig(level=logging.INFO) + +class APIHandler: + def __init__(self, base_url, headers={}): + self.base_url = base_url + self.headers = headers + + def get(self, endpoint, params=None): + response = requests.get(self.base_url + endpoint, params=params, headers=self.headers) + response.raise_for_status() + return response.json() + + def post(self, endpoint, data=None): + response = requests.post(self.base_url + endpoint, data=json.dumps(data), headers=self.headers) + response.raise_for_status() + return response.status_code + +def get_last_update_time(email): + response = requests.get(f'{DATABASE_URL}/annotation_last_update/{email}') + response.raise_for_status() + + if response.status_code == 204: + return 0 + elif response.status_code == 200: + return response.json()['last_update_time'] + +def update_last_update_time(email, new_time): + response = requests.post( + f'{DATABASE_URL}/annotation_last_update', + headers={ + 'Content-Type': 'application/json' + }, + json={ + 'email': email, + 'last_update_time': new_time + } + ) + response.raise_for_status() + +def get_new_annotations(last_annotation_time, inoreader_token): + inoreader = APIHandler( + "https://www.inoreader.com/reader/api/0/stream/contents", + headers = { + 'Authorization': 'Bearer ' + inoreader_token() + } + ) + + all_annotations = [] + continuation = None + + while True: + params = { + "annotations": 1, + "n": 100, + } + if continuation: + params["c"] = continuation + + inoreader_response = inoreader.get( + "/user/-/state/com.google/annotated", + params=params + ) + data = json.loads(inoreader_response) + + for item in data["items"]: + annotations = item.get("annotations", []) + for annotation in annotations: + annotation['title'] = item['title'] + annotation['author'] = item['author'] + annotation['sources'] = item['canonical'] + all_annotations.append(annotation) + + if 'continuation' in data: + continuation = data['continuation'] + time.sleep(900) # Sleep for 15 minutes between pages + else: + break + + return [annotation for annotation in all_annotations if annotation['added_on'] > last_annotation_time] + +def push_annotations_to_readwise(annotations, readwise_token): + readwise = APIHandler( + "https://readwise.io", + headers = { + 'Authorization': 'Token ' + readwise_token, + 'Content-Type': 'application/json' + } + ) + + readwise.post( + "/api/v2/highlights/", + data={ + 'highlights': [ + { + 'text': annotation['text'], + 'title': annotation['title'], + 'author': annotation['author'], + 'note': annotation['note'], + 'highlighted_at': annotation['added_on'], + 'category': 'articles', + 'source_url': annotation['sources'][0]['href'] if annotation['sources'] else None, + } + for annotation in annotations + ] + } + ) + +# def get_inoreader_access_token(): +# response = requests.get(f'{DATABASE_URL}/token/latest') +# response.raise_for_status() + +# if response.status_code == 204: +# # throw error - not logged in. Please log in first through the web app +# raise Exception("Not logged in. Please log in first through the web app") +# elif response.status_code == 200: +# resp_json = response.json() +# if resp_json['token']['expiration_seconds'] + resp_json['token']['timestamp'] > datetime.now().timestamp(): +# return resp_json['token']['access_token'] +# else: +# return refresh_inoreader_access_token(resp_json['token']['refresh_token']) + +# access_token = get_token_from_database() +# if not access_token: +# access_token = refresh_inoreader_access_token() + +# if not access_token: +# raise Exception("Unable to get access token. Try logging in again through the web app") +# return access_token + +def refresh_inoreader_access_token(refresh_token, readwise_api_key): + response = requests.post( + 'https://www.inoreader.com/oauth2/token', + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }, + data={ + 'refresh_token': refresh_token, + 'client_id': os.getenv("INOREADER_CLIENT_ID"), + 'client_secret': os.getenv("INOREADER_CLIENT_SECRET"), + 'grant_type': 'refresh_token' + } + ) + + response.raise_for_status() + + token = response.json() + + user_info = requests.get('https://www.inoreader.com/reader/api/0/user-info', headers={ + 'Authorization': f'Bearer {token.get("access_token")}' + }).json() + + # Save tokens for later use + save_token( + user_info.get('userEmail'), + token['access_token'], + token['refresh_token'], + token['expires_in'], + readwise_api_key + ) + + return token['access_token'], readwise_api_key + +def save_token(email, access_token, refresh_token, expiration_seconds, readwise_api_key): + response = requests.post( + f'{DATABASE_URL}/token', + headers={ + 'Content-Type': 'application/json' + }, + json={ + 'email': email, + 'access_token': access_token, + 'refresh_token': refresh_token, + 'expiration_seconds': expiration_seconds, + 'readwise_api_key': readwise_api_key + } + ) + response.raise_for_status() + +def get_all_active_tokens(): + response = requests.get(f'{DATABASE_URL}/token/all?only_active=true') + response.raise_for_status() + + if response.status_code == 200: + return response.json()['tokens'] + else: + return [] + +def deactivate_token(token_id): + response = requests.post( + f'{DATABASE_URL}/token/{token_id}/deactivate', + headers={ + 'Content-Type': 'application/json' + } + ) + response.raise_for_status() + +def check_and_refresh_access_token(token): + if token['expiration_seconds'] + token['timestamp'] > datetime.now().timestamp(): + return token['access_token'], token['readwise_api_key'] + else: + deactivate_token(token['id']) + return refresh_inoreader_access_token(token['refresh_token'], token['readwise_api_key']) + +def main(): + + while True: + try: + all_tokens = get_all_active_tokens() + for token in all_tokens: + logging.info("Checking for new annotations for user with email: {}".format(token['email'])) + + inoreader_token, readwise_api_key = check_and_refresh_access_token(token) + + last_annotation_time = get_last_update_time(token['email']) + new_annotations = get_new_annotations(last_annotation_time, inoreader_token) + + if new_annotations: + latest_added_on = max(annotation['added_on'] for annotation in new_annotations) + push_annotations_to_readwise(new_annotations, readwise_api_key) + update_last_update_time(token['email'], latest_added_on) + logging.info("Successfully pushed {} new annotations to Readwise for user with email: {}".format(len(new_annotations), token['email'])) + else: + logging.info("No new annotations found for user with email: {}".format(token['email'])) + + time.sleep(86400) # Sleep for 24 hours + + except Exception as e: + logging.error(f"An error occurred: {e}") + time.sleep(3600) # Sleep for 1 hour in case of error + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/job/requirements.txt b/job/requirements.txt new file mode 100644 index 0000000..077c95d --- /dev/null +++ b/job/requirements.txt @@ -0,0 +1 @@ +requests==2.31.0 \ No newline at end of file