2024-01-17 11:14:23 +01:00
|
|
|
import os
|
|
|
|
import time
|
|
|
|
import json
|
|
|
|
import requests
|
2024-01-17 11:44:50 +01:00
|
|
|
import logging
|
2024-01-17 11:14:23 +01:00
|
|
|
|
|
|
|
DATA_STORE_PATH = "/data/last_update_time.txt"
|
|
|
|
|
2024-01-24 12:29:35 +01:00
|
|
|
DATABASE_URL = os.getenv("DATABASE_URL")
|
|
|
|
|
2024-01-17 11:44:50 +01:00
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
class APIHandler:
|
2024-01-17 12:25:43 +01:00
|
|
|
def __init__(self, base_url, headers={}):
|
2024-01-17 11:44:50 +01:00
|
|
|
self.base_url = base_url
|
2024-01-17 12:25:43 +01:00
|
|
|
self.headers = headers
|
2024-01-17 11:44:50 +01:00
|
|
|
|
|
|
|
def get(self, endpoint, params=None):
|
2024-01-17 12:25:43 +01:00
|
|
|
response = requests.get(self.base_url + endpoint, params=params, headers=self.headers)
|
2024-01-17 11:44:50 +01:00
|
|
|
response.raise_for_status()
|
|
|
|
return response.json()
|
|
|
|
|
|
|
|
def post(self, endpoint, data=None):
|
2024-01-17 12:25:43 +01:00
|
|
|
response = requests.post(self.base_url + endpoint, data=json.dumps(data), headers=self.headers)
|
2024-01-17 11:44:50 +01:00
|
|
|
response.raise_for_status()
|
|
|
|
return response.status_code
|
|
|
|
|
2024-02-01 11:54:16 +01:00
|
|
|
def get_last_update_time(email):
|
|
|
|
response = requests.get(f'{DATABASE_URL}/annotation_last_update/{email}')
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
if response.status_code == 204:
|
|
|
|
return 0
|
|
|
|
elif response.status_code == 200:
|
|
|
|
return response.json()['last_update_time']
|
2024-01-17 11:14:23 +01:00
|
|
|
|
2024-02-01 11:54:16 +01:00
|
|
|
def update_last_update_time(email, new_time):
|
|
|
|
response = requests.post(
|
|
|
|
f'{DATABASE_URL}/annotation_last_update',
|
|
|
|
headers={
|
|
|
|
'Content-Type': 'application/json'
|
|
|
|
},
|
|
|
|
json={
|
|
|
|
'email': email,
|
|
|
|
'last_update_time': new_time
|
|
|
|
}
|
|
|
|
)
|
|
|
|
response.raise_for_status()
|
2024-01-17 11:14:23 +01:00
|
|
|
|
2024-01-31 07:19:27 +01:00
|
|
|
def get_new_annotations(last_annotation_time, inoreader_token):
|
2024-01-17 11:44:50 +01:00
|
|
|
inoreader = APIHandler(
|
|
|
|
"https://www.inoreader.com/reader/api/0/stream/contents",
|
2024-01-17 12:25:43 +01:00
|
|
|
headers = {
|
2024-01-31 07:19:27 +01:00
|
|
|
'Authorization': 'Bearer ' + inoreader_token()
|
2024-01-17 13:01:17 +01:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2024-01-17 14:50:22 +01:00
|
|
|
all_annotations = []
|
|
|
|
continuation = None
|
|
|
|
|
|
|
|
while True:
|
|
|
|
params = {
|
2024-01-17 13:01:17 +01:00
|
|
|
"annotations": 1,
|
|
|
|
"n": 100,
|
|
|
|
}
|
2024-01-17 14:50:22 +01:00
|
|
|
if continuation:
|
|
|
|
params["c"] = continuation
|
|
|
|
|
|
|
|
inoreader_response = inoreader.get(
|
|
|
|
"/user/-/state/com.google/annotated",
|
|
|
|
params=params
|
|
|
|
)
|
|
|
|
data = json.loads(inoreader_response)
|
|
|
|
|
|
|
|
for item in data["items"]:
|
|
|
|
annotations = item.get("annotations", [])
|
|
|
|
for annotation in annotations:
|
|
|
|
annotation['title'] = item['title']
|
|
|
|
annotation['author'] = item['author']
|
|
|
|
annotation['sources'] = item['canonical']
|
|
|
|
all_annotations.append(annotation)
|
|
|
|
|
|
|
|
if 'continuation' in data:
|
|
|
|
continuation = data['continuation']
|
|
|
|
time.sleep(900) # Sleep for 15 minutes between pages
|
|
|
|
else:
|
|
|
|
break
|
2024-01-17 13:01:17 +01:00
|
|
|
|
|
|
|
return [annotation for annotation in all_annotations if annotation['added_on'] > last_annotation_time]
|
2024-01-17 12:25:43 +01:00
|
|
|
|
2024-01-31 07:19:27 +01:00
|
|
|
def push_annotations_to_readwise(annotations, readwise_token):
|
2024-01-17 11:44:50 +01:00
|
|
|
readwise = APIHandler(
|
|
|
|
"https://readwise.io",
|
2024-01-17 12:25:43 +01:00
|
|
|
headers = {
|
2024-01-31 07:19:27 +01:00
|
|
|
'Authorization': 'Token ' + readwise_token,
|
2024-01-17 12:25:43 +01:00
|
|
|
'Content-Type': 'application/json'
|
|
|
|
}
|
2024-01-17 11:44:50 +01:00
|
|
|
)
|
2024-01-17 11:14:23 +01:00
|
|
|
|
2024-01-17 13:01:17 +01:00
|
|
|
readwise.post(
|
|
|
|
"/api/v2/highlights/",
|
|
|
|
data={
|
|
|
|
'highlights': [
|
|
|
|
{
|
|
|
|
'text': annotation['text'],
|
|
|
|
'title': annotation['title'],
|
|
|
|
'author': annotation['author'],
|
|
|
|
'note': annotation['note'],
|
2024-01-17 14:50:22 +01:00
|
|
|
'highlighted_at': annotation['added_on'],
|
|
|
|
'category': 'articles',
|
|
|
|
'source_url': annotation['sources'][0]['href'] if annotation['sources'] else None,
|
2024-01-17 13:01:17 +01:00
|
|
|
}
|
|
|
|
for annotation in annotations
|
|
|
|
]
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2024-01-31 07:19:27 +01:00
|
|
|
# def get_inoreader_access_token():
|
|
|
|
# response = requests.get(f'{DATABASE_URL}/token/latest')
|
|
|
|
# response.raise_for_status()
|
|
|
|
|
|
|
|
# if response.status_code == 204:
|
|
|
|
# # throw error - not logged in. Please log in first through the web app
|
|
|
|
# raise Exception("Not logged in. Please log in first through the web app")
|
|
|
|
# elif response.status_code == 200:
|
|
|
|
# resp_json = response.json()
|
|
|
|
# if resp_json['token']['expiration_seconds'] + resp_json['token']['timestamp'] > datetime.now().timestamp():
|
|
|
|
# return resp_json['token']['access_token']
|
|
|
|
# else:
|
|
|
|
# return refresh_inoreader_access_token(resp_json['token']['refresh_token'])
|
|
|
|
|
|
|
|
# access_token = get_token_from_database()
|
|
|
|
# if not access_token:
|
|
|
|
# access_token = refresh_inoreader_access_token()
|
|
|
|
|
|
|
|
# if not access_token:
|
|
|
|
# raise Exception("Unable to get access token. Try logging in again through the web app")
|
|
|
|
# return access_token
|
|
|
|
|
|
|
|
def refresh_inoreader_access_token(refresh_token, readwise_api_key):
|
2024-01-24 12:29:35 +01:00
|
|
|
response = requests.post(
|
|
|
|
'https://www.inoreader.com/oauth2/token',
|
|
|
|
headers={
|
|
|
|
'Content-Type': 'application/x-www-form-urlencoded',
|
|
|
|
},
|
|
|
|
data={
|
|
|
|
'refresh_token': refresh_token,
|
|
|
|
'client_id': os.getenv("INOREADER_CLIENT_ID"),
|
|
|
|
'client_secret': os.getenv("INOREADER_CLIENT_SECRET"),
|
|
|
|
'grant_type': 'refresh_token'
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
2024-01-31 07:19:27 +01:00
|
|
|
token = response.json()
|
|
|
|
|
|
|
|
user_info = requests.get('https://www.inoreader.com/reader/api/0/user-info', headers={
|
|
|
|
'Authorization': f'Bearer {token.get("access_token")}'
|
|
|
|
}).json()
|
2024-01-24 12:29:35 +01:00
|
|
|
|
|
|
|
# Save tokens for later use
|
2024-01-31 07:19:27 +01:00
|
|
|
save_token(
|
|
|
|
user_info.get('userEmail'),
|
|
|
|
token['access_token'],
|
|
|
|
token['refresh_token'],
|
|
|
|
token['expires_in'],
|
|
|
|
readwise_api_key
|
|
|
|
)
|
2024-01-24 12:29:35 +01:00
|
|
|
|
2024-01-31 07:19:27 +01:00
|
|
|
return token['access_token'], readwise_api_key
|
2024-01-24 12:29:35 +01:00
|
|
|
|
2024-01-31 07:19:27 +01:00
|
|
|
def save_token(email, access_token, refresh_token, expiration_seconds, readwise_api_key):
|
2024-01-24 12:29:35 +01:00
|
|
|
response = requests.post(
|
|
|
|
f'{DATABASE_URL}/token',
|
|
|
|
headers={
|
|
|
|
'Content-Type': 'application/json'
|
|
|
|
},
|
|
|
|
json={
|
2024-01-31 07:19:27 +01:00
|
|
|
'email': email,
|
2024-01-24 12:29:35 +01:00
|
|
|
'access_token': access_token,
|
|
|
|
'refresh_token': refresh_token,
|
2024-01-31 07:19:27 +01:00
|
|
|
'expiration_seconds': expiration_seconds,
|
|
|
|
'readwise_api_key': readwise_api_key
|
2024-01-24 12:29:35 +01:00
|
|
|
}
|
|
|
|
)
|
|
|
|
response.raise_for_status()
|
|
|
|
|
2024-01-31 07:19:27 +01:00
|
|
|
def get_all_active_tokens():
|
|
|
|
response = requests.get(f'{DATABASE_URL}/token/all?only_active=true')
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
return response.json()['tokens']
|
|
|
|
else:
|
|
|
|
return []
|
|
|
|
|
|
|
|
def deactivate_token(token_id):
|
|
|
|
response = requests.post(
|
|
|
|
f'{DATABASE_URL}/token/{token_id}/deactivate',
|
|
|
|
headers={
|
|
|
|
'Content-Type': 'application/json'
|
|
|
|
}
|
|
|
|
)
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
def check_and_refresh_access_token(token):
|
|
|
|
if token['expiration_seconds'] + token['timestamp'] > datetime.now().timestamp():
|
|
|
|
return token['access_token'], token['readwise_api_key']
|
|
|
|
else:
|
|
|
|
deactivate_token(token['id'])
|
|
|
|
return refresh_inoreader_access_token(token['refresh_token'], token['readwise_api_key'])
|
|
|
|
|
2024-01-17 13:01:17 +01:00
|
|
|
def main():
|
|
|
|
|
2024-01-17 11:14:23 +01:00
|
|
|
while True:
|
2024-01-17 11:44:50 +01:00
|
|
|
try:
|
2024-01-31 07:19:27 +01:00
|
|
|
all_tokens = get_all_active_tokens()
|
|
|
|
for token in all_tokens:
|
|
|
|
logging.info("Checking for new annotations for user with email: {}".format(token['email']))
|
|
|
|
|
|
|
|
inoreader_token, readwise_api_key = check_and_refresh_access_token(token)
|
|
|
|
|
2024-02-01 11:54:16 +01:00
|
|
|
last_annotation_time = get_last_update_time(token['email'])
|
2024-01-31 07:19:27 +01:00
|
|
|
new_annotations = get_new_annotations(last_annotation_time, inoreader_token)
|
|
|
|
|
|
|
|
if new_annotations:
|
|
|
|
latest_added_on = max(annotation['added_on'] for annotation in new_annotations)
|
|
|
|
push_annotations_to_readwise(new_annotations, readwise_api_key)
|
2024-02-01 11:54:16 +01:00
|
|
|
update_last_update_time(token['email'], latest_added_on)
|
2024-01-31 07:19:27 +01:00
|
|
|
logging.info("Successfully pushed {} new annotations to Readwise for user with email: {}".format(len(new_annotations), token['email']))
|
|
|
|
else:
|
|
|
|
logging.info("No new annotations found for user with email: {}".format(token['email']))
|
2024-01-17 11:14:23 +01:00
|
|
|
|
2024-01-17 14:50:22 +01:00
|
|
|
time.sleep(86400) # Sleep for 24 hours
|
2024-01-17 11:14:23 +01:00
|
|
|
|
2024-01-17 11:44:50 +01:00
|
|
|
except Exception as e:
|
|
|
|
logging.error(f"An error occurred: {e}")
|
2024-01-17 14:50:22 +01:00
|
|
|
time.sleep(3600) # Sleep for 1 hour in case of error
|
2024-01-17 11:14:23 +01:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2024-01-17 14:50:22 +01:00
|
|
|
main()
|