From ec1f85526171d2792e64b170063ee1f84219dbf4 Mon Sep 17 00:00:00 2001 From: Shailaja kumari Date: Wed, 14 Feb 2024 10:58:53 +0530 Subject: [PATCH 1/3] url encoding with python --- app/main.py | 232 +++++++++++++++++++++++++++++++++++++++++++ app/requirements.txt | 2 + 2 files changed, 234 insertions(+) create mode 100644 app/main.py create mode 100644 app/requirements.txt diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..f666b37 --- /dev/null +++ b/app/main.py @@ -0,0 +1,232 @@ +import os +from flask import Flask, render_template, request, redirect, abort, url_for, session +import requests +from datetime import datetime +from urllib.parse import urlencode + +def get_env_variable(var_name): + value = os.environ.get(var_name) + if not value: + raise ValueError(f"Missing required environment variable: {var_name}") + return value + +app = Flask(__name__) + +# Read environment variables outside the route function +client_id = get_env_variable('CLIENT_ID') +client_secret = get_env_variable('CLIENT_SECRET') +redirect_uri = get_env_variable('REDIRECT_URI') +optional_scopes = get_env_variable('OPTIONAL_SCOPES') +database_url = get_env_variable('DATABASE_URL') +secret_key = get_env_variable('APP_SECRET_KEY') + +# Set secret key to enable sessions +app.secret_key = secret_key + +# https://www.inoreader.com/oauth2/auth +AUTH_URL = 'https://github.com/login/oauth/authorize' + +@app.route('/') +def home(): + if is_logged_in(): + token_id = session.get('token_id') + resp = requests.get(f'{database_url}/token/{token_id}') + raise_for_status(resp) + resp_json = resp.json() + token = resp_json['token'] + + user_info = requests.get('https://api.github.com/user', headers={ + 'Authorization': f'Bearer {token.get("access_token")}' + }).json() + + last_synced = datetime.fromtimestamp(token.get('updated_at')).strftime('%Y-%m-%d %H:%M:%S') + next_sync = datetime.fromtimestamp(token.get('updated_at') + token.get('expiration_seconds')).strftime('%Y-%m-%d %H:%M:%S') + return render_template('home.html', + user_login=user_info.get('login'), + user_email=user_info.get('email'), # for inoreader it's userName and userEmail + readwise_api_key=token.get('readwise_api_key') or '', + last_synced=last_synced, next_sync=next_sync) + + # Generate a CSRF protection string + session['csrf_protection_string'] = os.urandom(16).hex() + # Construct the OAuth URL with URL encoding + oauth_params = { + 'client_id': client_id, + 'redirect_uri': redirect_uri, + 'response_type': 'code', + 'scope': optional_scopes, + 'state': session['csrf_protection_string'] + } + # Use urlencode to properly encode the URL parameters + oauth_url = f'{AUTH_URL}?{urlencode(oauth_params)}' + + + # Pass dynamic variables to the template + # return render_template('login.html',oauth_url) + return render_template('login.html', oauth_url=oauth_url) + + + +@app.route('/oauth-redirect') +def oauth_redirect(): + auth_code = request.args.get('code') + csrf_token = request.args.get('state') + + # Verify the CSRF protection string + if csrf_token != session.get('csrf_protection_string'): + abort(403, 'Invalid CSRF token. Please try again.') + + # Exchange authorization code for access and refresh tokens + # response = requests.post( + # 'https://www.inoreader.com/oauth2/token', + # headers={ + # 'Content-Type': 'application/x-www-form-urlencoded', + # }, + # data={ + # 'code': auth_code, + # 'redirect_uri': redirect_uri, + # 'client_id': client_id, + # 'client_secret': client_secret, + # 'scope': '', + # 'grant_type': 'authorization_code' + # } + # ) + + # TEST: Github OAuth - REMOVE + response = requests.post( + 'https://github.com/login/oauth/access_token', + headers={ + 'Accept': 'application/json' + }, + data={ + 'code': auth_code, + 'redirect_uri': redirect_uri, + 'client_id': client_id, + 'client_secret': client_secret, + } + ) + + raise_for_status(response) + + token = response.json() + + # TEST: Github OAuth - REMOVE + token['refresh_token'] = 'N/A' + token['expires_in'] = 3600 + + # REPLACE user API call with inoreader API call + # https://www.inoreader.com/reader/api/0/user-info + user_info = requests.get('https://api.github.com/user', headers={ + 'Authorization': f'Bearer {token.get("access_token")}' + }).json() + + # Save tokens for later use + token_id = save_token( + user_info.get('email'), # for inoreader it's userEmail + token.get('access_token'), + token.get('refresh_token'), + token.get('expires_in') + ) + + set_session_token_id(token_id) + return redirect(url_for('home')) + +# logout +@app.route('/logout', methods=['POST']) +def logout(): + token_id = session.get('token_id') + + if not token_id: + return redirect(url_for('home')) + + # remove token_id from session + session.pop('token_id', None) + + # response = requests.put(f'{database_url}/token/{token_id}', headers={ + # 'Content-Type': 'application/json' + # }, json={ + # 'is_logged_in': False + # }) + # response.raise_for_status() + + return redirect(url_for('home')) + +@app.route('/readwise', methods=['POST']) +def submit_readwise_api(): + token_id = session.get('token_id') + + if not token_id: + return redirect(url_for('home')) + + response = requests.put(f'{database_url}/token/{token_id}', headers={ + 'Content-Type': 'application/json' + }, json={ + 'readwise_api_key': request.form.get('readwise_api_key') + }) + raise_for_status(response) + + return redirect(url_for('home')) + +def is_logged_in(): + token_id = session.get('token_id') + if not token_id: + return False + + response = requests.get(f'{database_url}/token/{token_id}') + raise_for_status(response) + resp_json = response.json() + token = resp_json['token'] + + return token.get('active', False) + +def save_token(email, access_token, refresh_token, expiration_seconds): + # check if an active token with this email already exists + token_by_email_resp = requests.get(f'{database_url}/token?email={email}') + raise_for_status(token_by_email_resp) + + if token_by_email_resp.status_code != 200: + response = requests.post( + f'{database_url}/token', + headers={ + 'Content-Type': 'application/json' + }, + json={ + 'email': email, + 'access_token': access_token, + 'refresh_token': refresh_token, + 'expiration_seconds': expiration_seconds + } + ) + raise_for_status(response) + return response.json().get('id') + else: + token_by_email_resp_json = token_by_email_resp.json() + token = token_by_email_resp_json['token'] + response = requests.put( + f'{database_url}/token/{token["id"]}', + headers={ + 'Content-Type': 'application/json' + }, + json={ + 'access_token': access_token, + 'refresh_token': refresh_token, + 'expiration_seconds': expiration_seconds, + } + ) + raise_for_status(response) + return token['id'] + +def set_session_token_id(token_id): + session['token_id'] = token_id + +def raise_for_status(response): + if response.status_code not in range(200, 300): + msg = None + try: + msg = response.json().get('error', '') + except: + msg = response.text + raise Exception(f'HTTPError: {response.status_code} \n Message: {msg}') + +if __name__ == '__main__': + app.run(host='0.0.0.0', debug=True, port=5000) \ No newline at end of file diff --git a/app/requirements.txt b/app/requirements.txt new file mode 100644 index 0000000..4b73652 --- /dev/null +++ b/app/requirements.txt @@ -0,0 +1,2 @@ +Flask==3.0.1 +requests==2.31.0 \ No newline at end of file -- 2.45.1 From 6609f8a054f8da42fce54200ce069d85278531b5 Mon Sep 17 00:00:00 2001 From: Shailaja kumari Date: Sun, 18 Feb 2024 13:46:59 +0530 Subject: [PATCH 2/3] Added database --- database/main.py | 200 ++++++++++++++++++++++++++++++++++++++ database/requirements.txt | 2 + 2 files changed, 202 insertions(+) create mode 100644 database/main.py create mode 100644 database/requirements.txt diff --git a/database/main.py b/database/main.py new file mode 100644 index 0000000..602b670 --- /dev/null +++ b/database/main.py @@ -0,0 +1,200 @@ +from flask import Flask, jsonify, request +from flask_sqlalchemy import SQLAlchemy +from datetime import datetime +import uuid + +app = Flask(__name__) +app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///tokens.db' # Use SQLite for simplicity +app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False +db = SQLAlchemy(app) + +class Token(db.Model): + id = db.Column(db.String(36), primary_key=True, default=str(uuid.uuid4())) + email = db.Column(db.String(255), nullable=False) + access_token = db.Column(db.String(255), nullable=False) + refresh_token = db.Column(db.String(255), nullable=False) + expiration_seconds = db.Column(db.Integer, nullable=False) + readwise_api_key = db.Column(db.String(255)) + active = db.Column(db.Boolean, default=True) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, default=datetime.utcnow) + + def __repr__(self): + return f'' + +# This table stores email-wise last annotation timestamp +# only one entry per email +class AnnotationLastUpdate(db.Model): + id = db.Column(db.String(36), primary_key=True, default=str(uuid.uuid4())) + email = db.Column(db.String(255), nullable=False) + last_update_time = db.Column(db.DateTime, nullable=False) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, default=datetime.utcnow) + + def __repr__(self): + return f'' + +# Create an application context +with app.app_context(): + db.create_all() + +# API to create a new token entry +@app.route('/token', methods=['POST']) +def create_token(): + data = request.get_json() + email = data.get('email') + access_token = data.get('access_token') + refresh_token = data.get('refresh_token') + expiration_seconds = data.get('expiration_seconds') + readwise_api_key = data.get('readwise_api_key') + + required_fields = ['email', 'access_token', 'refresh_token', 'expiration_seconds'] + missing_fields = [field for field in required_fields if not data.get(field)] + + if missing_fields: + return jsonify({'error': f'Missing required fields: {", ".join(missing_fields)}'}), 400 + + # unique email when active is true + existing_token = Token.query.filter_by(email=email, active=True).first() + if existing_token: + return jsonify({'error': 'An active token with this email already exists'}), 400 + + new_token = Token( + email=email, + access_token=access_token, + refresh_token=refresh_token, + expiration_seconds=expiration_seconds, + readwise_api_key=readwise_api_key + ) + db.session.add(new_token) + db.session.commit() + + return jsonify({'id': new_token.id}), 201 + +# API to get the token based on the id +@app.route('/token/', methods=['GET']) +def get_token_by_id(id): + token = Token.query.get(id) + if not token: + return jsonify({'error': 'Token not found'}), 404 + token_info = { + 'id': token.id, + 'email': token.email, + 'access_token': token.access_token, + 'refresh_token': token.refresh_token, + 'expiration_seconds': int(token.expiration_seconds), + 'readwise_api_key': token.readwise_api_key, + 'active': token.active, + 'created_at': int(token.created_at.timestamp()), + 'updated_at': int(token.updated_at.timestamp()) + } + return jsonify({'token': token_info}), 200 + +# API to get the token based on the email +@app.route('/token', methods=['GET']) +def get_token_by_email(): + email = request.args.get('email') + if not email: + return jsonify({'error': 'Missing email query parameter'}), 400 + token = Token.query.filter_by(email=email, active=True).first() + if not token: + return '', 204 + token_info = { + 'id': token.id, + 'email': token.email, + 'access_token': token.access_token, + 'refresh_token': token.refresh_token, + 'expiration_seconds': int(token.expiration_seconds), + 'readwise_api_key': token.readwise_api_key, + 'active': token.active, + 'created_at': int(token.created_at.timestamp()), + 'updated_at': int(token.updated_at.timestamp()) + } + return jsonify({'token': token_info}), 200 + +# API to update the token based on the id +@app.route('/token/', methods=['PUT']) +def update_token_by_id(id): + token = Token.query.get_or_404(id) + data = request.get_json() + token.access_token = data.get('access_token', token.access_token) + token.refresh_token = data.get('refresh_token', token.refresh_token) + token.expiration_seconds = data.get('expiration_seconds', token.expiration_seconds) + token.readwise_api_key = data.get('readwise_api_key', token.readwise_api_key) + token.updated_at = datetime.utcnow() + db.session.commit() + return '', 204 + +# deactivate token +@app.route('/token//deactivate', methods=['POST']) +def deactivate_token_by_id(id): + token = Token.query.get_or_404(id) + token.active = False + db.session.commit() + return '', 204 + +# get all tokens +@app.route('/token/all', methods=['GET']) +def get_all_tokens(): + only_active = request.args.get('only_active') + tokens = Token.query.all() if not only_active else Token.query.filter_by(active=True).all() + tokens_info = [{ + 'id': token.id, + 'email': token.email, + 'access_token': token.access_token, + 'refresh_token': token.refresh_token, + 'expiration_seconds': int(token.expiration_seconds), + 'readwise_api_key': token.readwise_api_key, + 'active': token.active, + 'created_at': int(token.created_at.timestamp()), + 'updated_at': int(token.updated_at.timestamp()) + } for token in tokens] + return jsonify({'tokens': tokens_info}), 200 + +# API to create or update the last annotation timestamp +@app.route('/annotation_last_update', methods=['POST']) +def create_or_update_annotation_last_update(): + data = request.get_json() + email = data.get('email') + last_update_time = data.get('last_update_time') + + required_fields = ['email', 'last_update_time'] + missing_fields = [field for field in required_fields if not data.get(field)] + + if missing_fields: + return jsonify({'error': f'Missing required fields: {", ".join(missing_fields)}'}), 400 + + existing_annotation_last_update = AnnotationLastUpdate.query.filter_by(email=email).first() + if existing_annotation_last_update: + existing_annotation_last_update.last_update_time = last_update_time + existing_annotation_last_update.updated_at = datetime.utcnow() + db.session.commit() + return '', 204 + else: + new_annotation_last_update = AnnotationLastUpdate( + email=email, + last_update_time=last_update_time + ) + db.session.add(new_annotation_last_update) + db.session.commit() + return '', 204 + +# API to get the last annotation timestamp based on the email +@app.route('/annotation_last_update/', methods=['GET']) +def get_annotation_last_update_by_email(email): + if not email: + return jsonify({'error': 'Missing email query parameter'}), 400 + annotation_last_update = AnnotationLastUpdate.query.filter_by(email=email).first() + if not annotation_last_update: + return '', 204 + annotation_last_update_info = { + 'id': annotation_last_update.id, + 'email': annotation_last_update.email, + 'last_update_time': int(annotation_last_update.last_update_time.timestamp()), + 'created_at': int(annotation_last_update.created_at.timestamp()), + 'updated_at': int(annotation_last_update.updated_at.timestamp()) + } + return jsonify(annotation_last_update_info), 200 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000, debug=True) diff --git a/database/requirements.txt b/database/requirements.txt new file mode 100644 index 0000000..86d56fd --- /dev/null +++ b/database/requirements.txt @@ -0,0 +1,2 @@ +Flask==3.0.1 +Flask-SQLAlchemy==3.1.1 \ No newline at end of file -- 2.45.1 From 56061d5361a3b6ccb074e56150b1e22d836dc6df Mon Sep 17 00:00:00 2001 From: Shailaja kumari Date: Sun, 18 Feb 2024 14:04:01 +0530 Subject: [PATCH 3/3] Added jobs --- job/main.py | 242 +++++++++++++++++++++++++++++++++++++++++++ job/requirements.txt | 1 + 2 files changed, 243 insertions(+) create mode 100644 job/main.py create mode 100644 job/requirements.txt diff --git a/job/main.py b/job/main.py new file mode 100644 index 0000000..6039ad1 --- /dev/null +++ b/job/main.py @@ -0,0 +1,242 @@ +import os +import time +import json +import requests +import logging + +DATA_STORE_PATH = "/data/last_update_time.txt" + +DATABASE_URL = os.getenv("DATABASE_URL") + +logging.basicConfig(level=logging.INFO) + +class APIHandler: + def __init__(self, base_url, headers={}): + self.base_url = base_url + self.headers = headers + + def get(self, endpoint, params=None): + response = requests.get(self.base_url + endpoint, params=params, headers=self.headers) + response.raise_for_status() + return response.json() + + def post(self, endpoint, data=None): + response = requests.post(self.base_url + endpoint, data=json.dumps(data), headers=self.headers) + response.raise_for_status() + return response.status_code + +def get_last_update_time(email): + response = requests.get(f'{DATABASE_URL}/annotation_last_update/{email}') + response.raise_for_status() + + if response.status_code == 204: + return 0 + elif response.status_code == 200: + return response.json()['last_update_time'] + +def update_last_update_time(email, new_time): + response = requests.post( + f'{DATABASE_URL}/annotation_last_update', + headers={ + 'Content-Type': 'application/json' + }, + json={ + 'email': email, + 'last_update_time': new_time + } + ) + response.raise_for_status() + +def get_new_annotations(last_annotation_time, inoreader_token): + inoreader = APIHandler( + "https://www.inoreader.com/reader/api/0/stream/contents", + headers = { + 'Authorization': 'Bearer ' + inoreader_token() + } + ) + + all_annotations = [] + continuation = None + + while True: + params = { + "annotations": 1, + "n": 100, + } + if continuation: + params["c"] = continuation + + inoreader_response = inoreader.get( + "/user/-/state/com.google/annotated", + params=params + ) + data = json.loads(inoreader_response) + + for item in data["items"]: + annotations = item.get("annotations", []) + for annotation in annotations: + annotation['title'] = item['title'] + annotation['author'] = item['author'] + annotation['sources'] = item['canonical'] + all_annotations.append(annotation) + + if 'continuation' in data: + continuation = data['continuation'] + time.sleep(900) # Sleep for 15 minutes between pages + else: + break + + return [annotation for annotation in all_annotations if annotation['added_on'] > last_annotation_time] + +def push_annotations_to_readwise(annotations, readwise_token): + readwise = APIHandler( + "https://readwise.io", + headers = { + 'Authorization': 'Token ' + readwise_token, + 'Content-Type': 'application/json' + } + ) + + readwise.post( + "/api/v2/highlights/", + data={ + 'highlights': [ + { + 'text': annotation['text'], + 'title': annotation['title'], + 'author': annotation['author'], + 'note': annotation['note'], + 'highlighted_at': annotation['added_on'], + 'category': 'articles', + 'source_url': annotation['sources'][0]['href'] if annotation['sources'] else None, + } + for annotation in annotations + ] + } + ) + +# def get_inoreader_access_token(): +# response = requests.get(f'{DATABASE_URL}/token/latest') +# response.raise_for_status() + +# if response.status_code == 204: +# # throw error - not logged in. Please log in first through the web app +# raise Exception("Not logged in. Please log in first through the web app") +# elif response.status_code == 200: +# resp_json = response.json() +# if resp_json['token']['expiration_seconds'] + resp_json['token']['timestamp'] > datetime.now().timestamp(): +# return resp_json['token']['access_token'] +# else: +# return refresh_inoreader_access_token(resp_json['token']['refresh_token']) + +# access_token = get_token_from_database() +# if not access_token: +# access_token = refresh_inoreader_access_token() + +# if not access_token: +# raise Exception("Unable to get access token. Try logging in again through the web app") +# return access_token + +def refresh_inoreader_access_token(refresh_token, readwise_api_key): + response = requests.post( + 'https://www.inoreader.com/oauth2/token', + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }, + data={ + 'refresh_token': refresh_token, + 'client_id': os.getenv("INOREADER_CLIENT_ID"), + 'client_secret': os.getenv("INOREADER_CLIENT_SECRET"), + 'grant_type': 'refresh_token' + } + ) + + response.raise_for_status() + + token = response.json() + + user_info = requests.get('https://www.inoreader.com/reader/api/0/user-info', headers={ + 'Authorization': f'Bearer {token.get("access_token")}' + }).json() + + # Save tokens for later use + save_token( + user_info.get('userEmail'), + token['access_token'], + token['refresh_token'], + token['expires_in'], + readwise_api_key + ) + + return token['access_token'], readwise_api_key + +def save_token(email, access_token, refresh_token, expiration_seconds, readwise_api_key): + response = requests.post( + f'{DATABASE_URL}/token', + headers={ + 'Content-Type': 'application/json' + }, + json={ + 'email': email, + 'access_token': access_token, + 'refresh_token': refresh_token, + 'expiration_seconds': expiration_seconds, + 'readwise_api_key': readwise_api_key + } + ) + response.raise_for_status() + +def get_all_active_tokens(): + response = requests.get(f'{DATABASE_URL}/token/all?only_active=true') + response.raise_for_status() + + if response.status_code == 200: + return response.json()['tokens'] + else: + return [] + +def deactivate_token(token_id): + response = requests.post( + f'{DATABASE_URL}/token/{token_id}/deactivate', + headers={ + 'Content-Type': 'application/json' + } + ) + response.raise_for_status() + +def check_and_refresh_access_token(token): + if token['expiration_seconds'] + token['timestamp'] > datetime.now().timestamp(): + return token['access_token'], token['readwise_api_key'] + else: + deactivate_token(token['id']) + return refresh_inoreader_access_token(token['refresh_token'], token['readwise_api_key']) + +def main(): + + while True: + try: + all_tokens = get_all_active_tokens() + for token in all_tokens: + logging.info("Checking for new annotations for user with email: {}".format(token['email'])) + + inoreader_token, readwise_api_key = check_and_refresh_access_token(token) + + last_annotation_time = get_last_update_time(token['email']) + new_annotations = get_new_annotations(last_annotation_time, inoreader_token) + + if new_annotations: + latest_added_on = max(annotation['added_on'] for annotation in new_annotations) + push_annotations_to_readwise(new_annotations, readwise_api_key) + update_last_update_time(token['email'], latest_added_on) + logging.info("Successfully pushed {} new annotations to Readwise for user with email: {}".format(len(new_annotations), token['email'])) + else: + logging.info("No new annotations found for user with email: {}".format(token['email'])) + + time.sleep(86400) # Sleep for 24 hours + + except Exception as e: + logging.error(f"An error occurred: {e}") + time.sleep(3600) # Sleep for 1 hour in case of error + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/job/requirements.txt b/job/requirements.txt new file mode 100644 index 0000000..077c95d --- /dev/null +++ b/job/requirements.txt @@ -0,0 +1 @@ +requests==2.31.0 \ No newline at end of file -- 2.45.1