feat: convert to self-hosted version

This commit is contained in:
Jayden Pyles
2024-07-06 22:40:47 -05:00
parent 8808b493e6
commit 2bfc751d01
24 changed files with 1073 additions and 222 deletions

View File

@@ -1,49 +0,0 @@
# STL
import os
import logging
from typing import Any
# PDM
import boto3
from mypy_boto3_dynamodb.service_resource import Table, DynamoDBServiceResource
LOG = logging.getLogger(__name__)
def connect_to_dynamo() -> Table:
region_name = os.getenv("AWS_REGION")
dynamodb: DynamoDBServiceResource = boto3.resource(
"dynamodb", region_name=region_name
)
return dynamodb.Table("scrape")
def insert(table: Table, item: dict[str, Any]) -> None:
i = table.put_item(Item=item)
LOG.info(f"Inserted item: {i}")
def query(table: Table, index_name: str, key_condition: Any) -> list[Any]:
try:
response = table.query(
IndexName=index_name, KeyConditionExpression=key_condition
)
items = response.get("Items", [])
for item in items:
LOG.info(f"Queried item: {item}")
return items
except Exception as e:
LOG.error(f"Failed to query table: {e}")
raise
def query_by_id(table: Table, key_condition: Any) -> list[Any]:
try:
response = table.query(KeyConditionExpression=key_condition)
items = response.get("Items", [])
for item in items:
LOG.info(f"Queried item: {item}")
return items
except Exception as e:
LOG.error(f"Failed to query table: {e}")
raise

View File

@@ -5,27 +5,28 @@ from io import StringIO
# PDM
import pandas as pd
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI
from fastapi.encoders import jsonable_encoder
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from boto3.dynamodb.conditions import Key
# LOCAL
from api.backend.amazon import query, insert, query_by_id, connect_to_dynamo
from api.backend.job import query, insert
from api.backend.models import DownloadJob, SubmitScrapeJob, RetrieveScrapeJobs
from api.backend.scraping import scrape
from api.backend.auth.auth_router import auth_router
logging.basicConfig(
level=logging.INFO,
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
format="%(levelname)s: %(asctime)s - %(name)s - %(message)s",
handlers=[logging.StreamHandler()],
)
LOG = logging.getLogger(__name__)
app = FastAPI(title="api")
app.include_router(auth_router)
app.add_middleware(
CORSMiddleware,
@@ -54,10 +55,9 @@ async def submit_scrape_job(job: SubmitScrapeJob):
)
json_scraped = jsonable_encoder(scraped)
table = connect_to_dynamo()
job.result = json_scraped
job.id = uuid.uuid4().hex
insert(table, jsonable_encoder(job))
await insert(jsonable_encoder(job))
return JSONResponse(content=json_scraped)
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
@@ -67,8 +67,7 @@ async def submit_scrape_job(job: SubmitScrapeJob):
async def retrieve_scrape_jobs(retrieve: RetrieveScrapeJobs):
LOG.info(f"Retrieving jobs for account: {retrieve.user}")
try:
table = connect_to_dynamo()
results = query(table, "user", Key("user").eq(retrieve.user))
results = await query({"user": retrieve.user})
return JSONResponse(content=results)
except Exception as e:
LOG.error(f"Exception occurred: {e}")
@@ -79,8 +78,7 @@ async def retrieve_scrape_jobs(retrieve: RetrieveScrapeJobs):
async def download(download_job: DownloadJob):
LOG.info(f"Downloading job with id: {download_job.id}")
try:
table = connect_to_dynamo()
results = query_by_id(table, Key("id").eq(download_job.id))
results = await query({"id": download_job.id})
df = pd.DataFrame(results)

View File

View File

@@ -0,0 +1,57 @@
# STL
from datetime import timedelta
# PDM
from fastapi import Depends, APIRouter, HTTPException, status
from fastapi.security import OAuth2PasswordRequestForm
# LOCAL
from api.backend.schemas import User, Token, UserCreate
from api.backend.database import get_user_collection
from api.backend.auth.auth_utils import (
ACCESS_TOKEN_EXPIRE_MINUTES,
get_current_user,
authenticate_user,
get_password_hash,
create_access_token,
)
auth_router = APIRouter()
@auth_router.post("/api/auth/token", response_model=Token)
async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
user = await authenticate_user(form_data.username, form_data.password)
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
expire_minutes = (
int(ACCESS_TOKEN_EXPIRE_MINUTES) if ACCESS_TOKEN_EXPIRE_MINUTES else 60
)
access_token_expires = timedelta(minutes=expire_minutes)
access_token = create_access_token(
data={"sub": user.email}, expires_delta=access_token_expires
)
return {"access_token": access_token, "token_type": "bearer"}
@auth_router.post("/api/auth/signup", response_model=User)
async def create_user(user: UserCreate):
users_collection = get_user_collection()
hashed_password = get_password_hash(user.password)
user_dict = user.model_dump()
user_dict["hashed_password"] = hashed_password
del user_dict["password"]
_ = await users_collection.insert_one(user_dict)
return user_dict
@auth_router.get("/api/auth/users/me", response_model=User)
async def read_users_me(current_user: User = Depends(get_current_user)):
return current_user

View File

@@ -0,0 +1,101 @@
# STL
import os
from typing import Any, Optional
from datetime import datetime, timedelta
# PDM
from jose import JWTError, jwt
from dotenv import load_dotenv
from fastapi import Depends, HTTPException, status
from passlib.context import CryptContext
from fastapi.security import OAuth2PasswordBearer
# LOCAL
from api.backend.schemas import User, UserInDB, TokenData
from api.backend.database import get_user_collection
_ = load_dotenv()
SECRET_KEY = os.getenv("SECRET_KEY") or ""
ALGORITHM = os.getenv("ALGORITHM") or ""
ACCESS_TOKEN_EXPIRE_MINUTES = os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES")
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="auth/token")
def verify_password(plain_password: str, hashed_password: str):
return pwd_context.verify(plain_password, hashed_password)
def get_password_hash(password: str):
return pwd_context.hash(password)
async def get_user(email: str):
user_collection = get_user_collection()
user = await user_collection.find_one({"email": email})
if not user:
return
return UserInDB(**user)
async def authenticate_user(email: str, password: str):
user = await get_user(email)
if not user:
return False
if not verify_password(password, user.hashed_password):
return False
return user
def create_access_token(
data: dict[str, Any], expires_delta: Optional[timedelta] = None
):
to_encode = data.copy()
if expires_delta:
expire = datetime.now() + expires_delta
else:
expire = datetime.now() + timedelta(minutes=15)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
return encoded_jwt
async def get_current_user(token: str = Depends(oauth2_scheme)):
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
try:
payload: Optional[dict[str, Any]] = jwt.decode(
token, SECRET_KEY, algorithms=[ALGORITHM]
)
if not payload:
raise credentials_exception
email = payload.get("sub")
if email is None:
raise credentials_exception
token_data = TokenData(email=email)
except JWTError:
raise credentials_exception
user = await get_user(email=token_data.email)
if user is None:
raise credentials_exception
return user

23
api/backend/database.py Normal file
View File

@@ -0,0 +1,23 @@
# STL
import os
from typing import Any
# PDM
from dotenv import load_dotenv
from motor.motor_asyncio import AsyncIOMotorClient
_ = load_dotenv()
MONGODB_URI = os.getenv("MONGODB_URI")
def get_user_collection():
client: AsyncIOMotorClient[dict[str, Any]] = AsyncIOMotorClient(MONGODB_URI)
db = client["scrape"]
return db["users"]
def get_job_collection():
client: AsyncIOMotorClient[dict[str, Any]] = AsyncIOMotorClient(MONGODB_URI)
db = client["scrape"]
return db["jobs"]

26
api/backend/job.py Normal file
View File

@@ -0,0 +1,26 @@
# STL
import logging
from typing import Any
# LOCAL
from api.backend.database import get_job_collection
LOG = logging.getLogger(__name__)
async def insert(item: dict[str, Any]) -> None:
collection = get_job_collection()
i = await collection.insert_one(item)
LOG.info(f"Inserted item: {i}")
async def query(filter: dict[str, Any]) -> list[dict[str, Any]]:
collection = get_job_collection()
cursor = collection.find(filter)
results: list[dict[str, Any]] = []
async for document in cursor:
del document["_id"]
results.append(document)
return results

30
api/backend/schemas.py Normal file
View File

@@ -0,0 +1,30 @@
# STL
from typing import Optional
# PDM
from pydantic import EmailStr, BaseModel
class Token(BaseModel):
access_token: str
token_type: str
class TokenData(BaseModel):
email: Optional[str] = None
class User(BaseModel):
email: EmailStr
full_name: Optional[str] = None
disabled: Optional[bool] = None
class UserInDB(User):
hashed_password: str
class UserCreate(BaseModel):
email: EmailStr
password: str
full_name: Optional[str] = None