mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-12-14 11:46:17 +00:00
Feat: Site Mapping (#46)
Some checks failed
Unit Tests / unit-tests (push) Has been cancelled
Some checks failed
Unit Tests / unit-tests (push) Has been cancelled
* wip: add site mapping * chore: cleanup
This commit is contained in:
19
api/backend/job/__init__.py
Normal file
19
api/backend/job/__init__.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
from .job import (
|
||||||
|
query,
|
||||||
|
insert,
|
||||||
|
update_job,
|
||||||
|
delete_jobs,
|
||||||
|
get_jobs_per_day,
|
||||||
|
get_queued_job,
|
||||||
|
average_elements_per_link,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"query",
|
||||||
|
"insert",
|
||||||
|
"update_job",
|
||||||
|
"delete_jobs",
|
||||||
|
"get_jobs_per_day",
|
||||||
|
"get_queued_job",
|
||||||
|
"average_elements_per_link",
|
||||||
|
]
|
||||||
@@ -6,8 +6,8 @@ from typing import Any, Optional
|
|||||||
from pymongo import DESCENDING
|
from pymongo import DESCENDING
|
||||||
|
|
||||||
# LOCAL
|
# LOCAL
|
||||||
from api.backend.models import FetchOptions
|
|
||||||
from api.backend.database import get_job_collection
|
from api.backend.database import get_job_collection
|
||||||
|
from api.backend.job.models.job_options import FetchOptions
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
0
api/backend/job/models/__init__.py
Normal file
0
api/backend/job/models/__init__.py
Normal file
14
api/backend/job/models/job_options.py
Normal file
14
api/backend/job/models/job_options.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import Any, Optional
|
||||||
|
from api.backend.job.models.site_map import SiteMap
|
||||||
|
|
||||||
|
|
||||||
|
class FetchOptions(BaseModel):
|
||||||
|
chat: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
|
class JobOptions(BaseModel):
|
||||||
|
multi_page_scrape: bool = False
|
||||||
|
custom_headers: dict[str, Any] = {}
|
||||||
|
proxies: list[str] = []
|
||||||
|
site_map: Optional[SiteMap] = None
|
||||||
14
api/backend/job/models/site_map.py
Normal file
14
api/backend/job/models/site_map.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
|
class Action(BaseModel):
|
||||||
|
type: Literal["click", "input"]
|
||||||
|
xpath: str
|
||||||
|
name: str
|
||||||
|
input: str = ""
|
||||||
|
do_once: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
class SiteMap(BaseModel):
|
||||||
|
actions: list[Action]
|
||||||
30
api/backend/job/scraping/scraping_utils.py
Normal file
30
api/backend/job/scraping/scraping_utils.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import time
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_content(driver: webdriver.Chrome, pages: set[tuple[str, str]]):
|
||||||
|
_ = WebDriverWait(driver, 10).until(
|
||||||
|
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
||||||
|
)
|
||||||
|
|
||||||
|
last_height = cast(str, driver.execute_script("return document.body.scrollHeight"))
|
||||||
|
while True:
|
||||||
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
|
||||||
|
time.sleep(3) # Wait for the page to load
|
||||||
|
new_height = cast(
|
||||||
|
str, driver.execute_script("return document.body.scrollHeight")
|
||||||
|
)
|
||||||
|
|
||||||
|
if new_height == last_height:
|
||||||
|
break
|
||||||
|
|
||||||
|
last_height = new_height
|
||||||
|
|
||||||
|
pages.add((driver.page_source, driver.current_url))
|
||||||
|
return driver.page_source
|
||||||
0
api/backend/job/site_mapping/__init__.py
Normal file
0
api/backend/job/site_mapping/__init__.py
Normal file
94
api/backend/job/site_mapping/site_mapping.py
Normal file
94
api/backend/job/site_mapping/site_mapping.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
from api.backend.job.models.site_map import Action, SiteMap
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.common.exceptions import NoSuchElementException
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from typing import Any
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
from api.backend.job.scraping.scraping_utils import scrape_content
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from seleniumwire.inspect import TimeoutException
|
||||||
|
from seleniumwire.webdriver import Chrome
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_done_actions(site_map: dict[str, Any]):
|
||||||
|
"""Clear all actions that have been clicked."""
|
||||||
|
cleared_site_map = deepcopy(site_map)
|
||||||
|
|
||||||
|
cleared_site_map["actions"] = [
|
||||||
|
action for action in cleared_site_map["actions"] if not action["do_once"]
|
||||||
|
]
|
||||||
|
|
||||||
|
return cleared_site_map
|
||||||
|
|
||||||
|
|
||||||
|
def handle_input(action: Action, driver: webdriver.Chrome):
|
||||||
|
try:
|
||||||
|
element = WebDriverWait(driver, 10).until(
|
||||||
|
EC.element_to_be_clickable((By.XPATH, action.xpath))
|
||||||
|
)
|
||||||
|
LOG.info(f"Sending keys: {action.input} to element: {element}")
|
||||||
|
|
||||||
|
element.send_keys(action.input)
|
||||||
|
|
||||||
|
except NoSuchElementException:
|
||||||
|
LOG.info(f"Element not found: {action.xpath}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except TimeoutException:
|
||||||
|
LOG.info(f"Timeout waiting for element: {action.xpath}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
LOG.info(f"Error handling input: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def handle_click(action: Action, driver: webdriver.Chrome):
|
||||||
|
try:
|
||||||
|
element = driver.find_element(By.XPATH, action.xpath)
|
||||||
|
LOG.info(f"Clicking element: {element}")
|
||||||
|
|
||||||
|
element.click()
|
||||||
|
|
||||||
|
except NoSuchElementException:
|
||||||
|
LOG.info(f"Element not found: {action.xpath}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
ACTION_MAP = {
|
||||||
|
"click": handle_click,
|
||||||
|
"input": handle_input,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_site_mapping(
|
||||||
|
site_map_dict: dict[str, Any],
|
||||||
|
driver: Chrome,
|
||||||
|
pages: set[tuple[str, str]],
|
||||||
|
):
|
||||||
|
site_map = SiteMap(**site_map_dict)
|
||||||
|
LOG.info(f"Handling site map: {site_map}")
|
||||||
|
|
||||||
|
for action in site_map.actions:
|
||||||
|
action_handler = ACTION_MAP[action.type]
|
||||||
|
if not action_handler(action, driver):
|
||||||
|
return
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
_ = scrape_content(driver, pages)
|
||||||
|
|
||||||
|
cleared_site_map_dict = clear_done_actions(site_map_dict)
|
||||||
|
|
||||||
|
if cleared_site_map_dict["actions"]:
|
||||||
|
await handle_site_mapping(cleared_site_map_dict, driver, pages)
|
||||||
@@ -2,12 +2,14 @@
|
|||||||
from typing import Any, Optional, Union
|
from typing import Any, Optional, Union
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
# LOCAL
|
||||||
|
from api.backend.job.models.job_options import JobOptions
|
||||||
|
|
||||||
# PDM
|
# PDM
|
||||||
import pydantic
|
import pydantic
|
||||||
|
|
||||||
|
|
||||||
class FetchOptions(pydantic.BaseModel):
|
|
||||||
chat: Optional[bool] = None
|
|
||||||
|
|
||||||
|
|
||||||
class Element(pydantic.BaseModel):
|
class Element(pydantic.BaseModel):
|
||||||
@@ -22,12 +24,6 @@ class CapturedElement(pydantic.BaseModel):
|
|||||||
name: str
|
name: str
|
||||||
|
|
||||||
|
|
||||||
class JobOptions(pydantic.BaseModel):
|
|
||||||
multi_page_scrape: bool = False
|
|
||||||
custom_headers: Optional[dict[str, Any]] = {}
|
|
||||||
proxies: Optional[list[str]] = []
|
|
||||||
|
|
||||||
|
|
||||||
class RetrieveScrapeJobs(pydantic.BaseModel):
|
class RetrieveScrapeJobs(pydantic.BaseModel):
|
||||||
user: str
|
user: str
|
||||||
|
|
||||||
|
|||||||
@@ -12,22 +12,17 @@ from fastapi.encoders import jsonable_encoder
|
|||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
|
|
||||||
# LOCAL
|
# LOCAL
|
||||||
from api.backend.job import (
|
from api.backend.job import query, insert, update_job, delete_jobs
|
||||||
query,
|
|
||||||
insert,
|
|
||||||
update_job,
|
|
||||||
delete_jobs,
|
|
||||||
)
|
|
||||||
from api.backend.models import (
|
from api.backend.models import (
|
||||||
UpdateJobs,
|
UpdateJobs,
|
||||||
DownloadJob,
|
DownloadJob,
|
||||||
FetchOptions,
|
|
||||||
DeleteScrapeJobs,
|
DeleteScrapeJobs,
|
||||||
Job,
|
Job,
|
||||||
)
|
)
|
||||||
from api.backend.schemas import User
|
from api.backend.schemas import User
|
||||||
from api.backend.auth.auth_utils import get_current_user
|
from api.backend.auth.auth_utils import get_current_user
|
||||||
from api.backend.utils import clean_text
|
from api.backend.utils import clean_text
|
||||||
|
from api.backend.job.models.job_options import FetchOptions
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -1,19 +1,20 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
import time
|
|
||||||
import random
|
import random
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from seleniumwire import webdriver
|
from seleniumwire import webdriver
|
||||||
from lxml.etree import _Element # type: ignore [reportPrivateImport]
|
from lxml.etree import _Element # pyright: ignore [reportPrivateUsage]
|
||||||
from fake_useragent import UserAgent
|
from fake_useragent import UserAgent
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
|
||||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||||
from urllib.parse import urlparse, urljoin
|
from urllib.parse import urlparse, urljoin
|
||||||
from api.backend.models import Element, CapturedElement
|
from api.backend.models import Element, CapturedElement
|
||||||
|
from api.backend.job.site_mapping.site_mapping import (
|
||||||
|
handle_site_mapping,
|
||||||
|
)
|
||||||
|
from api.backend.job.scraping.scraping_utils import scrape_content
|
||||||
|
from api.backend.job.models.site_map import SiteMap
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -95,6 +96,7 @@ async def make_site_request(
|
|||||||
pages: set[tuple[str, str]] = set(),
|
pages: set[tuple[str, str]] = set(),
|
||||||
original_url: str = "",
|
original_url: str = "",
|
||||||
proxies: Optional[list[str]] = [],
|
proxies: Optional[list[str]] = [],
|
||||||
|
site_map: Optional[dict[str, Any]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Make basic `GET` request to site using Selenium."""
|
"""Make basic `GET` request to site using Selenium."""
|
||||||
# Check if URL has already been visited
|
# Check if URL has already been visited
|
||||||
@@ -114,27 +116,16 @@ async def make_site_request(
|
|||||||
final_url = driver.current_url
|
final_url = driver.current_url
|
||||||
visited_urls.add(url)
|
visited_urls.add(url)
|
||||||
visited_urls.add(final_url)
|
visited_urls.add(final_url)
|
||||||
_ = WebDriverWait(driver, 10).until(
|
|
||||||
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
|
||||||
)
|
|
||||||
|
|
||||||
last_height = driver.execute_script("return document.body.scrollHeight")
|
page_source = scrape_content(driver, pages)
|
||||||
while True:
|
|
||||||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
|
||||||
|
|
||||||
time.sleep(3) # Wait for the page to load
|
if site_map:
|
||||||
new_height = driver.execute_script("return document.body.scrollHeight")
|
LOG.info("Site map: %s", site_map)
|
||||||
|
_ = await handle_site_mapping(
|
||||||
if new_height == last_height:
|
site_map,
|
||||||
break
|
driver,
|
||||||
|
pages,
|
||||||
last_height = new_height
|
)
|
||||||
|
|
||||||
final_height = driver.execute_script("return document.body.scrollHeight")
|
|
||||||
|
|
||||||
page_source = driver.page_source
|
|
||||||
LOG.debug(f"Page source for url: {url}\n{page_source}")
|
|
||||||
pages.add((page_source, final_url))
|
|
||||||
finally:
|
finally:
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
@@ -192,6 +183,7 @@ async def scrape(
|
|||||||
headers: Optional[dict[str, Any]],
|
headers: Optional[dict[str, Any]],
|
||||||
multi_page_scrape: bool = False,
|
multi_page_scrape: bool = False,
|
||||||
proxies: Optional[list[str]] = [],
|
proxies: Optional[list[str]] = [],
|
||||||
|
site_map: Optional[SiteMap] = None,
|
||||||
):
|
):
|
||||||
visited_urls: set[str] = set()
|
visited_urls: set[str] = set()
|
||||||
pages: set[tuple[str, str]] = set()
|
pages: set[tuple[str, str]] = set()
|
||||||
@@ -204,6 +196,7 @@ async def scrape(
|
|||||||
pages=pages,
|
pages=pages,
|
||||||
original_url=url,
|
original_url=url,
|
||||||
proxies=proxies,
|
proxies=proxies,
|
||||||
|
site_map=site_map,
|
||||||
)
|
)
|
||||||
|
|
||||||
elements: list[dict[str, dict[str, list[CapturedElement]]]] = list()
|
elements: list[dict[str, dict[str, list[CapturedElement]]]] = list()
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ async def process_job():
|
|||||||
job["job_options"]["custom_headers"],
|
job["job_options"]["custom_headers"],
|
||||||
job["job_options"]["multi_page_scrape"],
|
job["job_options"]["multi_page_scrape"],
|
||||||
job["job_options"]["proxies"],
|
job["job_options"]["proxies"],
|
||||||
|
job["job_options"]["site_map"],
|
||||||
)
|
)
|
||||||
LOG.info(
|
LOG.info(
|
||||||
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
|
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
|
||||||
|
|||||||
@@ -10,5 +10,8 @@ services:
|
|||||||
- "$PWD/package-lock.json:/app/package-lock.json"
|
- "$PWD/package-lock.json:/app/package-lock.json"
|
||||||
- "$PWD/tsconfig.json:/app/tsconfig.json"
|
- "$PWD/tsconfig.json:/app/tsconfig.json"
|
||||||
scraperr_api:
|
scraperr_api:
|
||||||
|
environment:
|
||||||
|
- LOG_LEVEL=INFO
|
||||||
volumes:
|
volumes:
|
||||||
- "$PWD/api:/project/api"
|
- "$PWD/api:/project/api"
|
||||||
|
- "$PWD/scraping:/project/scraping"
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import {
|
|||||||
Button,
|
Button,
|
||||||
Tooltip,
|
Tooltip,
|
||||||
IconButton,
|
IconButton,
|
||||||
|
TableContainer,
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
||||||
import StarIcon from "@mui/icons-material/Star";
|
import StarIcon from "@mui/icons-material/Star";
|
||||||
@@ -52,145 +53,147 @@ export const JobQueue = ({
|
|||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Table sx={{ tableLayout: "fixed", width: "100%" }}>
|
<TableContainer component={Box} sx={{ maxHeight: "90dvh" }}>
|
||||||
<TableHead>
|
<Table sx={{ tableLayout: "fixed", width: "100%" }}>
|
||||||
<TableRow>
|
<TableHead>
|
||||||
<TableCell>Select</TableCell>
|
<TableRow>
|
||||||
<TableCell>Id</TableCell>
|
<TableCell>Select</TableCell>
|
||||||
<TableCell>Url</TableCell>
|
<TableCell>Id</TableCell>
|
||||||
<TableCell>Elements</TableCell>
|
<TableCell>Url</TableCell>
|
||||||
<TableCell>Result</TableCell>
|
<TableCell>Elements</TableCell>
|
||||||
<TableCell>Time Created</TableCell>
|
<TableCell>Result</TableCell>
|
||||||
<TableCell>Status</TableCell>
|
<TableCell>Time Created</TableCell>
|
||||||
<TableCell>Actions</TableCell>
|
<TableCell>Status</TableCell>
|
||||||
</TableRow>
|
<TableCell>Actions</TableCell>
|
||||||
</TableHead>
|
</TableRow>
|
||||||
<TableBody>
|
</TableHead>
|
||||||
{filteredJobs.map((row, index) => (
|
<TableBody sx={{ overflow: "auto" }}>
|
||||||
<TableRow key={index}>
|
{filteredJobs.map((row, index) => (
|
||||||
<TableCell padding="checkbox">
|
<TableRow key={index}>
|
||||||
<Checkbox
|
<TableCell padding="checkbox">
|
||||||
checked={selectedJobs.has(row.id)}
|
<Checkbox
|
||||||
onChange={() => onSelectJob(row.id)}
|
checked={selectedJobs.has(row.id)}
|
||||||
/>
|
onChange={() => onSelectJob(row.id)}
|
||||||
<Tooltip title="Chat with AI">
|
/>
|
||||||
<span>
|
<Tooltip title="Chat with AI">
|
||||||
<IconButton
|
<span>
|
||||||
onClick={() => {
|
<IconButton
|
||||||
router.push({
|
onClick={() => {
|
||||||
pathname: "/chat",
|
router.push({
|
||||||
query: {
|
pathname: "/chat",
|
||||||
job: row.id,
|
query: {
|
||||||
},
|
job: row.id,
|
||||||
});
|
},
|
||||||
}}
|
});
|
||||||
>
|
|
||||||
<AutoAwesome />
|
|
||||||
</IconButton>
|
|
||||||
</span>
|
|
||||||
</Tooltip>
|
|
||||||
<Tooltip title="Favorite Job">
|
|
||||||
<span>
|
|
||||||
<IconButton
|
|
||||||
color={row.favorite ? "warning" : "default"}
|
|
||||||
onClick={() => {
|
|
||||||
onFavorite([row.id], "favorite", !row.favorite);
|
|
||||||
row.favorite = !row.favorite;
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<StarIcon />
|
|
||||||
</IconButton>
|
|
||||||
</span>
|
|
||||||
</Tooltip>
|
|
||||||
</TableCell>
|
|
||||||
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
|
||||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.id}</Box>
|
|
||||||
</TableCell>
|
|
||||||
<TableCell sx={{ maxWidth: 200, overflow: "auto" }}>
|
|
||||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.url}</Box>
|
|
||||||
</TableCell>
|
|
||||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
|
||||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
|
||||||
{JSON.stringify(row.elements)}
|
|
||||||
</Box>
|
|
||||||
</TableCell>
|
|
||||||
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
|
|
||||||
<Accordion sx={{ margin: 0, padding: 0.5 }}>
|
|
||||||
<AccordionSummary
|
|
||||||
expandIcon={<ExpandMoreIcon />}
|
|
||||||
aria-controls="panel1a-content"
|
|
||||||
id="panel1a-header"
|
|
||||||
sx={{
|
|
||||||
minHeight: 0,
|
|
||||||
"&.Mui-expanded": { minHeight: 0 },
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<Box
|
|
||||||
sx={{
|
|
||||||
maxHeight: 150,
|
|
||||||
overflow: "auto",
|
|
||||||
width: "100%",
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<Typography sx={{ fontSize: "0.875rem" }}>
|
|
||||||
Show Result
|
|
||||||
</Typography>
|
|
||||||
</Box>
|
|
||||||
</AccordionSummary>
|
|
||||||
<AccordionDetails sx={{ padding: 1 }}>
|
|
||||||
<Box sx={{ maxHeight: 200, overflow: "auto" }}>
|
|
||||||
<Typography
|
|
||||||
sx={{
|
|
||||||
fontSize: "0.875rem",
|
|
||||||
whiteSpace: "pre-wrap",
|
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{JSON.stringify(row.result, null, 2)}
|
<AutoAwesome />
|
||||||
</Typography>
|
</IconButton>
|
||||||
</Box>
|
</span>
|
||||||
</AccordionDetails>
|
</Tooltip>
|
||||||
</Accordion>
|
<Tooltip title="Favorite Job">
|
||||||
</TableCell>
|
<span>
|
||||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
<IconButton
|
||||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
color={row.favorite ? "warning" : "default"}
|
||||||
{new Date(row.time_created).toLocaleString()}
|
onClick={() => {
|
||||||
</Box>
|
onFavorite([row.id], "favorite", !row.favorite);
|
||||||
</TableCell>
|
row.favorite = !row.favorite;
|
||||||
<TableCell sx={{ maxWidth: 50, overflow: "auto" }}>
|
}}
|
||||||
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
>
|
||||||
<Box
|
<StarIcon />
|
||||||
className="rounded-md p-2 text-center"
|
</IconButton>
|
||||||
sx={{ bgcolor: colors[row.status] }}
|
</span>
|
||||||
>
|
</Tooltip>
|
||||||
{row.status}
|
</TableCell>
|
||||||
|
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
|
||||||
|
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.id}</Box>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell sx={{ maxWidth: 200, overflow: "auto" }}>
|
||||||
|
<Box sx={{ maxHeight: 100, overflow: "auto" }}>{row.url}</Box>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||||
|
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||||
|
{JSON.stringify(row.elements)}
|
||||||
</Box>
|
</Box>
|
||||||
</Box>
|
</TableCell>
|
||||||
</TableCell>
|
<TableCell sx={{ maxWidth: 150, overflow: "auto", padding: 0 }}>
|
||||||
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
<Accordion sx={{ margin: 0, padding: 0.5 }}>
|
||||||
<Box sx={{ display: "flex", gap: 1 }}>
|
<AccordionSummary
|
||||||
<Button
|
expandIcon={<ExpandMoreIcon />}
|
||||||
onClick={() => {
|
aria-controls="panel1a-content"
|
||||||
onDownload([row.id]);
|
id="panel1a-header"
|
||||||
}}
|
sx={{
|
||||||
size="small"
|
minHeight: 0,
|
||||||
sx={{ minWidth: 0, padding: "4px 8px" }}
|
"&.Mui-expanded": { minHeight: 0 },
|
||||||
>
|
}}
|
||||||
Download
|
>
|
||||||
</Button>
|
<Box
|
||||||
<Button
|
sx={{
|
||||||
onClick={() =>
|
maxHeight: 150,
|
||||||
onNavigate(row.elements, row.url, row.job_options)
|
overflow: "auto",
|
||||||
}
|
width: "100%",
|
||||||
size="small"
|
}}
|
||||||
sx={{ minWidth: 0, padding: "4px 8px" }}
|
>
|
||||||
>
|
<Typography sx={{ fontSize: "0.875rem" }}>
|
||||||
Rerun
|
Show Result
|
||||||
</Button>
|
</Typography>
|
||||||
</Box>
|
</Box>
|
||||||
</TableCell>
|
</AccordionSummary>
|
||||||
</TableRow>
|
<AccordionDetails sx={{ padding: 1 }}>
|
||||||
))}
|
<Box sx={{ maxHeight: 200, overflow: "auto" }}>
|
||||||
</TableBody>
|
<Typography
|
||||||
</Table>
|
sx={{
|
||||||
|
fontSize: "0.875rem",
|
||||||
|
whiteSpace: "pre-wrap",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{JSON.stringify(row.result, null, 2)}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||||
|
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||||
|
{new Date(row.time_created).toLocaleString()}
|
||||||
|
</Box>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell sx={{ maxWidth: 50, overflow: "auto" }}>
|
||||||
|
<Box sx={{ maxHeight: 100, overflow: "auto" }}>
|
||||||
|
<Box
|
||||||
|
className="rounded-md p-2 text-center"
|
||||||
|
sx={{ bgcolor: colors[row.status] }}
|
||||||
|
>
|
||||||
|
{row.status}
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell sx={{ maxWidth: 150, overflow: "auto" }}>
|
||||||
|
<Box sx={{ display: "flex", gap: 1 }}>
|
||||||
|
<Button
|
||||||
|
onClick={() => {
|
||||||
|
onDownload([row.id]);
|
||||||
|
}}
|
||||||
|
size="small"
|
||||||
|
sx={{ minWidth: 0, padding: "4px 8px" }}
|
||||||
|
>
|
||||||
|
Download
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
onClick={() =>
|
||||||
|
onNavigate(row.elements, row.url, row.job_options)
|
||||||
|
}
|
||||||
|
size="small"
|
||||||
|
sx={{ minWidth: 0, padding: "4px 8px" }}
|
||||||
|
>
|
||||||
|
Rerun
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
</TableContainer>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
107
src/components/pages/home/home.tsx
Normal file
107
src/components/pages/home/home.tsx
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import React, { useState, useEffect, useRef } from "react";
|
||||||
|
import { Button, Container, Box, Snackbar, Alert } from "@mui/material";
|
||||||
|
import { useRouter } from "next/router";
|
||||||
|
import { Element, Result } from "@/types";
|
||||||
|
import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
|
||||||
|
import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||||
|
|
||||||
|
export const Home = () => {
|
||||||
|
const {
|
||||||
|
submittedURL,
|
||||||
|
setSubmittedURL,
|
||||||
|
rows,
|
||||||
|
setRows,
|
||||||
|
results,
|
||||||
|
snackbarOpen,
|
||||||
|
setSnackbarOpen,
|
||||||
|
snackbarMessage,
|
||||||
|
snackbarSeverity,
|
||||||
|
} = useJobSubmitterProvider();
|
||||||
|
const router = useRouter();
|
||||||
|
const { elements, url } = router.query;
|
||||||
|
|
||||||
|
const resultsRef = useRef<HTMLTableElement | null>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (elements) {
|
||||||
|
setRows(JSON.parse(elements as string));
|
||||||
|
}
|
||||||
|
if (url) {
|
||||||
|
setSubmittedURL(url as string);
|
||||||
|
}
|
||||||
|
}, [elements, url]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (results && resultsRef.current) {
|
||||||
|
resultsRef.current.scrollIntoView({ behavior: "smooth" });
|
||||||
|
}
|
||||||
|
}, [results]);
|
||||||
|
|
||||||
|
const handleCloseSnackbar = () => {
|
||||||
|
setSnackbarOpen(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
const ErrorSnackbar = () => {
|
||||||
|
return (
|
||||||
|
<Snackbar
|
||||||
|
open={snackbarOpen}
|
||||||
|
autoHideDuration={6000}
|
||||||
|
onClose={handleCloseSnackbar}
|
||||||
|
>
|
||||||
|
<Alert onClose={handleCloseSnackbar} severity="error">
|
||||||
|
{snackbarMessage}
|
||||||
|
</Alert>
|
||||||
|
</Snackbar>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const NotifySnackbar = () => {
|
||||||
|
const goTo = () => {
|
||||||
|
router.push("/jobs");
|
||||||
|
};
|
||||||
|
|
||||||
|
const action = (
|
||||||
|
<Button color="inherit" size="small" onClick={goTo}>
|
||||||
|
Go To Job
|
||||||
|
</Button>
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Snackbar
|
||||||
|
open={snackbarOpen}
|
||||||
|
autoHideDuration={6000}
|
||||||
|
onClose={handleCloseSnackbar}
|
||||||
|
>
|
||||||
|
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
|
||||||
|
{snackbarMessage}
|
||||||
|
</Alert>
|
||||||
|
</Snackbar>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
bgcolor="background.default"
|
||||||
|
display="flex"
|
||||||
|
flexDirection="column"
|
||||||
|
justifyContent="center"
|
||||||
|
alignItems="center"
|
||||||
|
height="100%"
|
||||||
|
py={4}
|
||||||
|
>
|
||||||
|
<Container maxWidth="lg" className="overflow-y-auto max-h-full">
|
||||||
|
<JobSubmitter />
|
||||||
|
{submittedURL.length ? (
|
||||||
|
<ElementTable
|
||||||
|
rows={rows}
|
||||||
|
setRows={setRows}
|
||||||
|
submittedURL={submittedURL}
|
||||||
|
/>
|
||||||
|
) : null}
|
||||||
|
</Container>
|
||||||
|
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
1
src/components/pages/home/index.ts
Normal file
1
src/components/pages/home/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./home";
|
||||||
@@ -1,2 +1 @@
|
|||||||
export * from "./ElementTable";
|
|
||||||
export * from "./job-submitter";
|
export * from "./job-submitter";
|
||||||
|
|||||||
@@ -15,9 +15,11 @@ import {
|
|||||||
IconButton,
|
IconButton,
|
||||||
Tooltip,
|
Tooltip,
|
||||||
useTheme,
|
useTheme,
|
||||||
|
Divider,
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
import AddIcon from "@mui/icons-material/Add";
|
import AddIcon from "@mui/icons-material/Add";
|
||||||
import { Element } from "../../types";
|
import { Element } from "@/types";
|
||||||
|
import { SiteMap } from "../site-map";
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
rows: Element[];
|
rows: Element[];
|
||||||
@@ -169,6 +171,13 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
|
|||||||
</div>
|
</div>
|
||||||
</TableContainer>
|
</TableContainer>
|
||||||
</Box>
|
</Box>
|
||||||
|
<Divider
|
||||||
|
sx={{
|
||||||
|
borderColor: theme.palette.mode === "dark" ? "#ffffff" : "0000000",
|
||||||
|
marginBottom: 2,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<SiteMap />
|
||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
export { ElementTable } from "./element-table";
|
||||||
@@ -1 +1,2 @@
|
|||||||
export { JobSubmitter } from "./job-submitter";
|
export { JobSubmitter } from "./job-submitter";
|
||||||
|
export { ElementTable } from "./element-table";
|
||||||
|
|||||||
@@ -1,26 +1,20 @@
|
|||||||
import React, { Dispatch } from "react";
|
import React from "react";
|
||||||
import { TextField, Button, CircularProgress } from "@mui/material";
|
import { TextField, Button, CircularProgress } from "@mui/material";
|
||||||
import { Element } from "@/types";
|
import { useJobSubmitterProvider } from "../provider";
|
||||||
|
|
||||||
export type JobSubmitterInputProps = {
|
export type JobSubmitterInputProps = {
|
||||||
submittedURL: string;
|
|
||||||
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
|
||||||
isValidURL: boolean;
|
|
||||||
urlError: string | null;
|
urlError: string | null;
|
||||||
handleSubmit: () => void;
|
handleSubmit: () => void;
|
||||||
loading: boolean;
|
loading: boolean;
|
||||||
rows: Element[];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export const JobSubmitterInput = ({
|
export const JobSubmitterInput = ({
|
||||||
submittedURL,
|
|
||||||
setSubmittedURL,
|
|
||||||
isValidURL,
|
|
||||||
urlError,
|
|
||||||
handleSubmit,
|
handleSubmit,
|
||||||
loading,
|
loading,
|
||||||
rows,
|
urlError,
|
||||||
}: JobSubmitterInputProps) => {
|
}: JobSubmitterInputProps) => {
|
||||||
|
const { submittedURL, setSubmittedURL, isValidURL, rows } =
|
||||||
|
useJobSubmitterProvider();
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-row space-x-4 items-center mb-2">
|
<div className="flex flex-row space-x-4 items-center mb-2">
|
||||||
<TextField
|
<TextField
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { RawJobOptions } from "@/types/job";
|
import { RawJobOptions } from "@/types/job";
|
||||||
import { Box, FormControlLabel, Checkbox, TextField } from "@mui/material";
|
import { Box, FormControlLabel, Checkbox, TextField } from "@mui/material";
|
||||||
import { Dispatch, SetStateAction } from "react";
|
import { Dispatch, SetStateAction } from "react";
|
||||||
|
import { useJobSubmitterProvider } from "../provider";
|
||||||
|
|
||||||
export type JobSubmitterOptionsProps = {
|
export type JobSubmitterOptionsProps = {
|
||||||
jobOptions: RawJobOptions;
|
jobOptions: RawJobOptions;
|
||||||
@@ -14,9 +15,9 @@ export type JobSubmitterOptionsProps = {
|
|||||||
export const JobSubmitterOptions = ({
|
export const JobSubmitterOptions = ({
|
||||||
jobOptions,
|
jobOptions,
|
||||||
setJobOptions,
|
setJobOptions,
|
||||||
|
handleSelectProxies,
|
||||||
customJSONSelected,
|
customJSONSelected,
|
||||||
setCustomJSONSelected,
|
setCustomJSONSelected,
|
||||||
handleSelectProxies,
|
|
||||||
proxiesSelected,
|
proxiesSelected,
|
||||||
}: JobSubmitterOptionsProps) => {
|
}: JobSubmitterOptionsProps) => {
|
||||||
const handleMultiPageScrapeChange = () => {
|
const handleMultiPageScrapeChange = () => {
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import React, { useEffect, useState, Dispatch } from "react";
|
import React, { useEffect, useState } from "react";
|
||||||
import { Element } from "@/types";
|
|
||||||
import { useAuth } from "@/contexts/AuthContext";
|
import { useAuth } from "@/contexts/AuthContext";
|
||||||
import { useRouter } from "next/router";
|
import { useRouter } from "next/router";
|
||||||
import { RawJobOptions } from "@/types/job";
|
import { RawJobOptions } from "@/types/job";
|
||||||
@@ -10,21 +9,7 @@ import { JobSubmitterHeader } from "./job-submitter-header";
|
|||||||
import { JobSubmitterInput } from "./job-submitter-input";
|
import { JobSubmitterInput } from "./job-submitter-input";
|
||||||
import { JobSubmitterOptions } from "./job-submitter-options";
|
import { JobSubmitterOptions } from "./job-submitter-options";
|
||||||
import { ApiService } from "@/services";
|
import { ApiService } from "@/services";
|
||||||
|
import { useJobSubmitterProvider } from "./provider";
|
||||||
interface StateProps {
|
|
||||||
submittedURL: string;
|
|
||||||
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
|
||||||
rows: Element[];
|
|
||||||
isValidURL: boolean;
|
|
||||||
setIsValidUrl: Dispatch<React.SetStateAction<boolean>>;
|
|
||||||
setSnackbarMessage: Dispatch<React.SetStateAction<string>>;
|
|
||||||
setSnackbarOpen: Dispatch<React.SetStateAction<boolean>>;
|
|
||||||
setSnackbarSeverity: Dispatch<React.SetStateAction<string>>;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Props {
|
|
||||||
stateProps: StateProps;
|
|
||||||
}
|
|
||||||
|
|
||||||
const initialJobOptions: RawJobOptions = {
|
const initialJobOptions: RawJobOptions = {
|
||||||
multi_page_scrape: false,
|
multi_page_scrape: false,
|
||||||
@@ -32,7 +17,7 @@ const initialJobOptions: RawJobOptions = {
|
|||||||
proxies: null,
|
proxies: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
export const JobSubmitter = ({ stateProps }: Props) => {
|
export const JobSubmitter = () => {
|
||||||
const { user } = useAuth();
|
const { user } = useAuth();
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const { job_options } = router.query;
|
const { job_options } = router.query;
|
||||||
@@ -40,11 +25,13 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
|||||||
const {
|
const {
|
||||||
submittedURL,
|
submittedURL,
|
||||||
rows,
|
rows,
|
||||||
|
siteMap,
|
||||||
setIsValidUrl,
|
setIsValidUrl,
|
||||||
setSnackbarMessage,
|
setSnackbarMessage,
|
||||||
setSnackbarOpen,
|
setSnackbarOpen,
|
||||||
setSnackbarSeverity,
|
setSnackbarSeverity,
|
||||||
} = stateProps;
|
setSiteMap,
|
||||||
|
} = useJobSubmitterProvider();
|
||||||
|
|
||||||
const [urlError, setUrlError] = useState<string | null>(null);
|
const [urlError, setUrlError] = useState<string | null>(null);
|
||||||
const [loading, setLoading] = useState<boolean>(false);
|
const [loading, setLoading] = useState<boolean>(false);
|
||||||
@@ -87,7 +74,8 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
|||||||
rows,
|
rows,
|
||||||
user,
|
user,
|
||||||
jobOptions,
|
jobOptions,
|
||||||
customHeaders
|
customHeaders,
|
||||||
|
siteMap
|
||||||
)
|
)
|
||||||
.then(async (response) => {
|
.then(async (response) => {
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
@@ -120,31 +108,28 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
|||||||
job_options as string,
|
job_options as string,
|
||||||
setCustomJSONSelected,
|
setCustomJSONSelected,
|
||||||
setProxiesSelected,
|
setProxiesSelected,
|
||||||
setJobOptions
|
setJobOptions,
|
||||||
|
setSiteMap
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}, [job_options]);
|
}, [job_options]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<div>
|
||||||
<div>
|
<JobSubmitterHeader />
|
||||||
<JobSubmitterHeader />
|
<JobSubmitterInput
|
||||||
<JobSubmitterInput
|
urlError={urlError}
|
||||||
{...stateProps}
|
handleSubmit={handleSubmit}
|
||||||
urlError={urlError}
|
loading={loading}
|
||||||
handleSubmit={handleSubmit}
|
/>
|
||||||
loading={loading}
|
<JobSubmitterOptions
|
||||||
/>
|
jobOptions={jobOptions}
|
||||||
<JobSubmitterOptions
|
setJobOptions={setJobOptions}
|
||||||
{...stateProps}
|
customJSONSelected={customJSONSelected}
|
||||||
jobOptions={jobOptions}
|
setCustomJSONSelected={setCustomJSONSelected}
|
||||||
setJobOptions={setJobOptions}
|
handleSelectProxies={handleSelectProxies}
|
||||||
customJSONSelected={customJSONSelected}
|
proxiesSelected={proxiesSelected}
|
||||||
setCustomJSONSelected={setCustomJSONSelected}
|
/>
|
||||||
handleSelectProxies={handleSelectProxies}
|
</div>
|
||||||
proxiesSelected={proxiesSelected}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</>
|
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
84
src/components/submit/job-submitter/provider.tsx
Normal file
84
src/components/submit/job-submitter/provider.tsx
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
import React, {
|
||||||
|
createContext,
|
||||||
|
PropsWithChildren,
|
||||||
|
useContext,
|
||||||
|
useState,
|
||||||
|
Dispatch,
|
||||||
|
useMemo,
|
||||||
|
} from "react";
|
||||||
|
import { Element, Result, SiteMap } from "@/types";
|
||||||
|
|
||||||
|
type JobSubmitterProviderType = {
|
||||||
|
submittedURL: string;
|
||||||
|
setSubmittedURL: Dispatch<React.SetStateAction<string>>;
|
||||||
|
rows: Element[];
|
||||||
|
setRows: Dispatch<React.SetStateAction<Element[]>>;
|
||||||
|
results: Result;
|
||||||
|
setResults: Dispatch<React.SetStateAction<Result>>;
|
||||||
|
snackbarOpen: boolean;
|
||||||
|
setSnackbarOpen: Dispatch<React.SetStateAction<boolean>>;
|
||||||
|
snackbarMessage: string;
|
||||||
|
setSnackbarMessage: Dispatch<React.SetStateAction<string>>;
|
||||||
|
snackbarSeverity: string;
|
||||||
|
setSnackbarSeverity: Dispatch<React.SetStateAction<string>>;
|
||||||
|
isValidURL: boolean;
|
||||||
|
setIsValidUrl: Dispatch<React.SetStateAction<boolean>>;
|
||||||
|
siteMap: SiteMap | null;
|
||||||
|
setSiteMap: Dispatch<React.SetStateAction<SiteMap | null>>;
|
||||||
|
};
|
||||||
|
|
||||||
|
const JobSubmitterProvider = createContext<JobSubmitterProviderType>(
|
||||||
|
{} as JobSubmitterProviderType
|
||||||
|
);
|
||||||
|
|
||||||
|
export const Provider = ({ children }: PropsWithChildren) => {
|
||||||
|
const [submittedURL, setSubmittedURL] = useState<string>("");
|
||||||
|
const [rows, setRows] = useState<Element[]>([]);
|
||||||
|
const [results, setResults] = useState<Result>({});
|
||||||
|
const [snackbarOpen, setSnackbarOpen] = useState<boolean>(false);
|
||||||
|
const [snackbarMessage, setSnackbarMessage] = useState<string>("");
|
||||||
|
const [snackbarSeverity, setSnackbarSeverity] = useState<string>("error");
|
||||||
|
const [isValidURL, setIsValidUrl] = useState<boolean>(true);
|
||||||
|
const [siteMap, setSiteMap] = useState<SiteMap | null>(null);
|
||||||
|
|
||||||
|
const value: JobSubmitterProviderType = useMemo(
|
||||||
|
() => ({
|
||||||
|
submittedURL,
|
||||||
|
setSubmittedURL,
|
||||||
|
rows,
|
||||||
|
setRows,
|
||||||
|
results,
|
||||||
|
setResults,
|
||||||
|
snackbarOpen,
|
||||||
|
setSnackbarOpen,
|
||||||
|
snackbarMessage,
|
||||||
|
setSnackbarMessage,
|
||||||
|
snackbarSeverity,
|
||||||
|
setSnackbarSeverity,
|
||||||
|
isValidURL,
|
||||||
|
setIsValidUrl,
|
||||||
|
siteMap,
|
||||||
|
setSiteMap,
|
||||||
|
}),
|
||||||
|
[
|
||||||
|
submittedURL,
|
||||||
|
rows,
|
||||||
|
results,
|
||||||
|
snackbarOpen,
|
||||||
|
snackbarMessage,
|
||||||
|
snackbarSeverity,
|
||||||
|
isValidURL,
|
||||||
|
siteMap,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<JobSubmitterProvider.Provider value={value}>
|
||||||
|
{children}
|
||||||
|
</JobSubmitterProvider.Provider>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const useJobSubmitterProvider = () => {
|
||||||
|
return useContext(JobSubmitterProvider);
|
||||||
|
};
|
||||||
1
src/components/submit/job-submitter/site-map/index.ts
Normal file
1
src/components/submit/job-submitter/site-map/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export * from "./site-map";
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
export * from "./site-map-input";
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
.button {
|
||||||
|
height: 3rem;
|
||||||
|
width: 2rem;
|
||||||
|
|
||||||
|
color: #ffffff;
|
||||||
|
font-weight: 600;
|
||||||
|
border-radius: 0.375rem;
|
||||||
|
transition: transform 0.2s ease-in-out;
|
||||||
|
transform: scale(1);
|
||||||
|
&:hover {
|
||||||
|
transform: scale(1.05);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.remove {
|
||||||
|
background-color: var(--delete-red) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.remove:hover {
|
||||||
|
background-color: var(--delete-red-hover) !important;
|
||||||
|
}
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
import { useState } from "react";
|
||||||
|
import { useJobSubmitterProvider } from "../../provider";
|
||||||
|
import {
|
||||||
|
MenuItem,
|
||||||
|
Select,
|
||||||
|
TextField,
|
||||||
|
FormControl,
|
||||||
|
Button,
|
||||||
|
Checkbox,
|
||||||
|
FormControlLabel,
|
||||||
|
} from "@mui/material";
|
||||||
|
import { ActionOption } from "@/types/job";
|
||||||
|
import classes from "./site-map-input.module.css";
|
||||||
|
import { clsx } from "clsx";
|
||||||
|
|
||||||
|
export type SiteMapInputProps = {
|
||||||
|
disabled?: boolean;
|
||||||
|
xpath?: string;
|
||||||
|
option?: ActionOption;
|
||||||
|
clickOnce?: boolean;
|
||||||
|
input?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const SiteMapInput = ({
|
||||||
|
disabled,
|
||||||
|
xpath,
|
||||||
|
option,
|
||||||
|
clickOnce,
|
||||||
|
input,
|
||||||
|
}: SiteMapInputProps) => {
|
||||||
|
console.log(clickOnce);
|
||||||
|
const [optionState, setOptionState] = useState<ActionOption>(
|
||||||
|
option || "click"
|
||||||
|
);
|
||||||
|
const [xpathState, setXpathState] = useState<string>(xpath || "");
|
||||||
|
const [clickOnceState, setClickOnceState] = useState<boolean>(
|
||||||
|
clickOnce || false
|
||||||
|
);
|
||||||
|
const [inputState, setInputState] = useState<string>(input || "");
|
||||||
|
|
||||||
|
const { siteMap, setSiteMap } = useJobSubmitterProvider();
|
||||||
|
|
||||||
|
const handleAdd = () => {
|
||||||
|
if (!siteMap) return;
|
||||||
|
|
||||||
|
console.log(optionState, xpathState, clickOnceState, inputState);
|
||||||
|
|
||||||
|
setSiteMap((prevSiteMap) => ({
|
||||||
|
...prevSiteMap,
|
||||||
|
actions: [
|
||||||
|
{
|
||||||
|
type: optionState,
|
||||||
|
xpath: xpathState,
|
||||||
|
name: "",
|
||||||
|
do_once: clickOnceState,
|
||||||
|
input: inputState,
|
||||||
|
},
|
||||||
|
...(prevSiteMap?.actions || []),
|
||||||
|
],
|
||||||
|
}));
|
||||||
|
|
||||||
|
setXpathState("");
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleRemove = () => {
|
||||||
|
if (!siteMap) return;
|
||||||
|
|
||||||
|
setSiteMap((prevSiteMap) => ({
|
||||||
|
...prevSiteMap,
|
||||||
|
actions: (prevSiteMap?.actions || []).slice(0, -1),
|
||||||
|
}));
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col gap-2 w-full">
|
||||||
|
<div className="flex gap-2 items-center">
|
||||||
|
<FormControl className="w-1/4">
|
||||||
|
<Select
|
||||||
|
disabled={disabled}
|
||||||
|
displayEmpty
|
||||||
|
value={optionState}
|
||||||
|
onChange={(e) => setOptionState(e.target.value as ActionOption)}
|
||||||
|
>
|
||||||
|
<MenuItem value="click">Click</MenuItem>
|
||||||
|
<MenuItem value="input">Input</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
{optionState === "input" && (
|
||||||
|
<TextField
|
||||||
|
label="Input Text"
|
||||||
|
fullWidth
|
||||||
|
value={inputState}
|
||||||
|
onChange={(e) => setInputState(e.target.value)}
|
||||||
|
disabled={disabled}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
<TextField
|
||||||
|
label="XPath Selector"
|
||||||
|
fullWidth
|
||||||
|
value={xpathState}
|
||||||
|
onChange={(e) => setXpathState(e.target.value)}
|
||||||
|
disabled={disabled}
|
||||||
|
/>
|
||||||
|
{disabled ? (
|
||||||
|
<Button
|
||||||
|
onClick={handleRemove}
|
||||||
|
className={clsx(classes.button, classes.remove)}
|
||||||
|
>
|
||||||
|
Delete
|
||||||
|
</Button>
|
||||||
|
) : (
|
||||||
|
<Button
|
||||||
|
onClick={handleAdd}
|
||||||
|
disabled={!xpathState}
|
||||||
|
className={clsx(classes.button, classes.add)}
|
||||||
|
>
|
||||||
|
Add
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{!disabled && (
|
||||||
|
<FormControlLabel
|
||||||
|
label="Do Once"
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={clickOnceState}
|
||||||
|
disabled={disabled}
|
||||||
|
onChange={() => setClickOnceState(!clickOnceState)}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
70
src/components/submit/job-submitter/site-map/site-map.tsx
Normal file
70
src/components/submit/job-submitter/site-map/site-map.tsx
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import { useEffect, useState } from "react";
|
||||||
|
import { useJobSubmitterProvider } from "../provider";
|
||||||
|
import { Button, Divider, Typography, useTheme } from "@mui/material";
|
||||||
|
import { SiteMapInput } from "./site-map-input";
|
||||||
|
|
||||||
|
export const SiteMap = () => {
|
||||||
|
const { siteMap, setSiteMap } = useJobSubmitterProvider();
|
||||||
|
const [showSiteMap, setShowSiteMap] = useState<boolean>(false);
|
||||||
|
const theme = useTheme();
|
||||||
|
|
||||||
|
const handleCreateSiteMap = () => {
|
||||||
|
setSiteMap({ actions: [] });
|
||||||
|
setShowSiteMap(true);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleClearSiteMap = () => {
|
||||||
|
setSiteMap(null);
|
||||||
|
setShowSiteMap(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (siteMap) {
|
||||||
|
setShowSiteMap(true);
|
||||||
|
}
|
||||||
|
}, [siteMap]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col gap-4">
|
||||||
|
{siteMap ? (
|
||||||
|
<Button onClick={handleClearSiteMap}>Clear Site Map</Button>
|
||||||
|
) : (
|
||||||
|
<Button onClick={handleCreateSiteMap}>Create Site Map</Button>
|
||||||
|
)}
|
||||||
|
{showSiteMap && (
|
||||||
|
<div className="flex flex-col gap-4">
|
||||||
|
<SiteMapInput />
|
||||||
|
{siteMap?.actions && siteMap?.actions.length > 0 && (
|
||||||
|
<>
|
||||||
|
<Divider
|
||||||
|
sx={{
|
||||||
|
borderColor:
|
||||||
|
theme.palette.mode === "dark" ? "#ffffff" : "0000000",
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<Typography className="w-full text-center" variant="h5">
|
||||||
|
Site Map Actions
|
||||||
|
</Typography>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
<ul className="flex flex-col gap-4">
|
||||||
|
{siteMap?.actions.reverse().map((action, index) => (
|
||||||
|
<li key={action.xpath} className="flex w-full items-center">
|
||||||
|
<Typography variant="h6" className="w-[10%] mr-2">
|
||||||
|
Action {index + 1}:
|
||||||
|
</Typography>
|
||||||
|
<SiteMapInput
|
||||||
|
disabled={Boolean(siteMap)}
|
||||||
|
xpath={action.xpath}
|
||||||
|
option={action.type}
|
||||||
|
clickOnce={action.do_once}
|
||||||
|
input={action.input}
|
||||||
|
/>
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
@@ -1,15 +1,17 @@
|
|||||||
import { Dispatch, SetStateAction } from "react";
|
import { Dispatch, SetStateAction } from "react";
|
||||||
|
|
||||||
import { RawJobOptions } from "@/types";
|
import { RawJobOptions, SiteMap } from "@/types";
|
||||||
|
|
||||||
export const parseJobOptions = (
|
export const parseJobOptions = (
|
||||||
job_options: string,
|
job_options: string,
|
||||||
setCustomJSONSelected: Dispatch<SetStateAction<boolean>>,
|
setCustomJSONSelected: Dispatch<SetStateAction<boolean>>,
|
||||||
setProxiesSelected: Dispatch<SetStateAction<boolean>>,
|
setProxiesSelected: Dispatch<SetStateAction<boolean>>,
|
||||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>
|
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
||||||
|
setSiteMap: Dispatch<SetStateAction<any>>
|
||||||
) => {
|
) => {
|
||||||
if (job_options) {
|
if (job_options) {
|
||||||
const jsonOptions = JSON.parse(job_options as string);
|
const jsonOptions = JSON.parse(job_options as string);
|
||||||
|
console.log(jsonOptions);
|
||||||
const newJobOptions: RawJobOptions = {
|
const newJobOptions: RawJobOptions = {
|
||||||
multi_page_scrape: false,
|
multi_page_scrape: false,
|
||||||
custom_headers: null,
|
custom_headers: null,
|
||||||
@@ -31,6 +33,10 @@ export const parseJobOptions = (
|
|||||||
newJobOptions.proxies = jsonOptions.proxies.join(",");
|
newJobOptions.proxies = jsonOptions.proxies.join(",");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (jsonOptions.site_map) {
|
||||||
|
setSiteMap(jsonOptions.site_map);
|
||||||
|
}
|
||||||
|
|
||||||
setJobOptions(newJobOptions);
|
setJobOptions(newJobOptions);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,117 +1,10 @@
|
|||||||
"use client";
|
import { Provider as JobSubmitterProvider } from "@/components/submit/job-submitter/provider";
|
||||||
|
import { Home } from "@/components/pages/home/home";
|
||||||
import React, { useState, useEffect, useRef } from "react";
|
|
||||||
import { Button, Container, Box, Snackbar, Alert } from "@mui/material";
|
|
||||||
import { useRouter } from "next/router";
|
|
||||||
import { Element, Result } from "@/types";
|
|
||||||
import { ElementTable } from "@/components/submit";
|
|
||||||
import { JobSubmitter } from "@/components/submit/job-submitter";
|
|
||||||
|
|
||||||
const Home = () => {
|
|
||||||
const router = useRouter();
|
|
||||||
const { elements, url } = router.query;
|
|
||||||
|
|
||||||
const [submittedURL, setSubmittedURL] = useState<string>("");
|
|
||||||
const [rows, setRows] = useState<Element[]>([]);
|
|
||||||
const [results, setResults] = useState<Result>({});
|
|
||||||
const [snackbarOpen, setSnackbarOpen] = useState<boolean>(false);
|
|
||||||
const [snackbarMessage, setSnackbarMessage] = useState<string>("");
|
|
||||||
const [snackbarSeverity, setSnackbarSeverity] = useState<string>("error");
|
|
||||||
const [isValidURL, setIsValidUrl] = useState<boolean>(true);
|
|
||||||
|
|
||||||
const resultsRef = useRef<HTMLTableElement | null>(null);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (elements) {
|
|
||||||
setRows(JSON.parse(elements as string));
|
|
||||||
}
|
|
||||||
if (url) {
|
|
||||||
setSubmittedURL(url as string);
|
|
||||||
}
|
|
||||||
}, [elements, url]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (results && resultsRef.current) {
|
|
||||||
resultsRef.current.scrollIntoView({ behavior: "smooth" });
|
|
||||||
}
|
|
||||||
}, [results]);
|
|
||||||
|
|
||||||
const handleCloseSnackbar = () => {
|
|
||||||
setSnackbarOpen(false);
|
|
||||||
};
|
|
||||||
|
|
||||||
const ErrorSnackbar = () => {
|
|
||||||
return (
|
|
||||||
<Snackbar
|
|
||||||
open={snackbarOpen}
|
|
||||||
autoHideDuration={6000}
|
|
||||||
onClose={handleCloseSnackbar}
|
|
||||||
>
|
|
||||||
<Alert onClose={handleCloseSnackbar} severity="error">
|
|
||||||
{snackbarMessage}
|
|
||||||
</Alert>
|
|
||||||
</Snackbar>
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
const NotifySnackbar = () => {
|
|
||||||
const goTo = () => {
|
|
||||||
router.push("/jobs");
|
|
||||||
};
|
|
||||||
|
|
||||||
const action = (
|
|
||||||
<Button color="inherit" size="small" onClick={goTo}>
|
|
||||||
Go To Job
|
|
||||||
</Button>
|
|
||||||
);
|
|
||||||
|
|
||||||
return (
|
|
||||||
<Snackbar
|
|
||||||
open={snackbarOpen}
|
|
||||||
autoHideDuration={6000}
|
|
||||||
onClose={handleCloseSnackbar}
|
|
||||||
>
|
|
||||||
<Alert onClose={handleCloseSnackbar} severity="info" action={action}>
|
|
||||||
{snackbarMessage}
|
|
||||||
</Alert>
|
|
||||||
</Snackbar>
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
export default function Main() {
|
||||||
return (
|
return (
|
||||||
<Box
|
<JobSubmitterProvider>
|
||||||
bgcolor="background.default"
|
<Home />
|
||||||
display="flex"
|
</JobSubmitterProvider>
|
||||||
flexDirection="column"
|
|
||||||
justifyContent="center"
|
|
||||||
alignItems="center"
|
|
||||||
height="100%"
|
|
||||||
py={4}
|
|
||||||
>
|
|
||||||
<Container maxWidth="lg">
|
|
||||||
<JobSubmitter
|
|
||||||
stateProps={{
|
|
||||||
submittedURL,
|
|
||||||
setSubmittedURL,
|
|
||||||
rows,
|
|
||||||
isValidURL,
|
|
||||||
setIsValidUrl,
|
|
||||||
setSnackbarMessage,
|
|
||||||
setSnackbarOpen,
|
|
||||||
setSnackbarSeverity,
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
{submittedURL.length ? (
|
|
||||||
<ElementTable
|
|
||||||
rows={rows}
|
|
||||||
setRows={setRows}
|
|
||||||
submittedURL={submittedURL}
|
|
||||||
/>
|
|
||||||
) : null}
|
|
||||||
</Container>
|
|
||||||
{snackbarSeverity === "info" ? <NotifySnackbar /> : <ErrorSnackbar />}
|
|
||||||
</Box>
|
|
||||||
);
|
);
|
||||||
};
|
}
|
||||||
|
|
||||||
export default Home;
|
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
|
import { SiteMap } from "@/types/job";
|
||||||
|
|
||||||
export const submitJob = async (
|
export const submitJob = async (
|
||||||
submittedURL: string,
|
submittedURL: string,
|
||||||
rows: any[],
|
rows: any[],
|
||||||
user: any,
|
user: any,
|
||||||
jobOptions: any,
|
jobOptions: any,
|
||||||
customHeaders: any
|
customHeaders: any,
|
||||||
|
siteMap: SiteMap | null
|
||||||
) => {
|
) => {
|
||||||
return await fetch(`/api/submit-scrape-job`, {
|
return await fetch(`/api/submit-scrape-job`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
@@ -18,6 +21,7 @@ export const submitJob = async (
|
|||||||
...jobOptions,
|
...jobOptions,
|
||||||
custom_headers: customHeaders || {},
|
custom_headers: customHeaders || {},
|
||||||
proxies: jobOptions.proxies ? jobOptions.proxies.split(",") : [],
|
proxies: jobOptions.proxies ? jobOptions.proxies.split(",") : [],
|
||||||
|
site_map: siteMap,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
|
|||||||
@@ -2,6 +2,11 @@
|
|||||||
@tailwind components;
|
@tailwind components;
|
||||||
@tailwind utilities;
|
@tailwind utilities;
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--delete-red: #ef4444;
|
||||||
|
--delete-red-hover: #ff6969;
|
||||||
|
}
|
||||||
|
|
||||||
#__next {
|
#__next {
|
||||||
height: 100%;
|
height: 100%;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,6 +34,12 @@ const commonThemeOptions = {
|
|||||||
h4: {
|
h4: {
|
||||||
fontWeight: 500,
|
fontWeight: 500,
|
||||||
},
|
},
|
||||||
|
h5: {
|
||||||
|
fontWeight: 500,
|
||||||
|
},
|
||||||
|
h6: {
|
||||||
|
fontWeight: 500,
|
||||||
|
},
|
||||||
body1: {
|
body1: {
|
||||||
fontFamily: '"Schibsted Grotesk", sans-serif',
|
fontFamily: '"Schibsted Grotesk", sans-serif',
|
||||||
},
|
},
|
||||||
@@ -175,6 +181,9 @@ const darkTheme = createTheme({
|
|||||||
h5: {
|
h5: {
|
||||||
color: "#ffffff",
|
color: "#ffffff",
|
||||||
},
|
},
|
||||||
|
h6: {
|
||||||
|
color: "#ffffff",
|
||||||
|
},
|
||||||
body1: {
|
body1: {
|
||||||
...commonThemeOptions.typography.body1,
|
...commonThemeOptions.typography.body1,
|
||||||
color: "#ffffff",
|
color: "#ffffff",
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ export type JobOptions = {
|
|||||||
multi_page_scrape: boolean;
|
multi_page_scrape: boolean;
|
||||||
custom_headers: null | string;
|
custom_headers: null | string;
|
||||||
proxies: string[];
|
proxies: string[];
|
||||||
|
site_map?: SiteMap;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type RawJobOptions = {
|
export type RawJobOptions = {
|
||||||
@@ -23,3 +24,17 @@ export type RawJobOptions = {
|
|||||||
custom_headers: string | null;
|
custom_headers: string | null;
|
||||||
proxies: string | null;
|
proxies: string | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type ActionOption = "click" | "input";
|
||||||
|
|
||||||
|
export type Action = {
|
||||||
|
type: ActionOption;
|
||||||
|
xpath: string;
|
||||||
|
name: string;
|
||||||
|
do_once?: boolean;
|
||||||
|
input?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SiteMap = {
|
||||||
|
actions: Action[];
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user