mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-12-13 19:25:58 +00:00
feat: allow custom cookies (#77)
* feat: working new advanced job options * feat: working new advanced job options * feat: add tests for adding custom cookies/headers
This commit is contained in:
@@ -13,3 +13,4 @@ class JobOptions(BaseModel):
|
||||
proxies: list[str] = []
|
||||
site_map: Optional[SiteMap] = None
|
||||
collect_media: bool = False
|
||||
custom_cookies: list[dict[str, Any]] = []
|
||||
|
||||
48
api/backend/job/scraping/add_custom.py
Normal file
48
api/backend/job/scraping/add_custom.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from playwright.async_api import Page, BrowserContext
|
||||
|
||||
import logging
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def add_custom_cookies(
|
||||
custom_cookies: list[dict[str, Any]],
|
||||
url: str,
|
||||
context: BrowserContext,
|
||||
) -> None:
|
||||
parsed_url = urlparse(url)
|
||||
domain = parsed_url.netloc
|
||||
|
||||
for cookie in custom_cookies:
|
||||
cookie_dict = {
|
||||
"name": cookie.get("name", "default_name"),
|
||||
"value": cookie.get("value", "default_value"),
|
||||
"domain": domain,
|
||||
"path": "/",
|
||||
}
|
||||
|
||||
LOG.info(f"Adding cookie: {cookie_dict}")
|
||||
await context.add_cookies([cookie_dict]) # type: ignore
|
||||
|
||||
|
||||
async def add_custom_headers(
|
||||
custom_headers: dict[str, Any],
|
||||
page: Page,
|
||||
) -> None:
|
||||
await page.set_extra_http_headers(custom_headers)
|
||||
|
||||
|
||||
async def add_custom_items(
|
||||
url: str,
|
||||
page: Page,
|
||||
cookies: Optional[list[dict[str, Any]]] = None,
|
||||
headers: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
if cookies:
|
||||
await add_custom_cookies(cookies, url, page.context)
|
||||
|
||||
if headers:
|
||||
await add_custom_headers(headers, page)
|
||||
@@ -12,6 +12,8 @@ from api.backend.models import Element, CapturedElement
|
||||
from api.backend.job.scraping.scraping_utils import scrape_content
|
||||
from api.backend.job.site_mapping.site_mapping import handle_site_mapping
|
||||
|
||||
from api.backend.job.scraping.add_custom import add_custom_items
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -44,11 +46,13 @@ async def make_site_request(
|
||||
proxies: Optional[list[str]] = None,
|
||||
site_map: Optional[dict[str, Any]] = None,
|
||||
collect_media: bool = False,
|
||||
custom_cookies: Optional[list[dict[str, Any]]] = None,
|
||||
):
|
||||
if url in visited_urls:
|
||||
return
|
||||
|
||||
proxy = None
|
||||
|
||||
if proxies:
|
||||
proxy = random.choice(proxies)
|
||||
LOG.info(f"Using proxy: {proxy}")
|
||||
@@ -56,8 +60,8 @@ async def make_site_request(
|
||||
async with AsyncCamoufox(headless=True, proxy=proxy) as browser:
|
||||
page: Page = await browser.new_page()
|
||||
|
||||
if headers:
|
||||
await page.set_extra_http_headers(headers)
|
||||
# Add cookies and headers
|
||||
await add_custom_items(url, page, custom_cookies, headers)
|
||||
|
||||
LOG.info(f"Visiting URL: {url}")
|
||||
|
||||
@@ -113,6 +117,7 @@ async def make_site_request(
|
||||
proxies=proxies,
|
||||
site_map=site_map,
|
||||
collect_media=collect_media,
|
||||
custom_cookies=custom_cookies,
|
||||
)
|
||||
|
||||
|
||||
@@ -152,6 +157,7 @@ async def scrape(
|
||||
proxies: Optional[list[str]] = None,
|
||||
site_map: Optional[dict[str, Any]] = None,
|
||||
collect_media: bool = False,
|
||||
custom_cookies: Optional[list[dict[str, Any]]] = None,
|
||||
):
|
||||
visited_urls: set[str] = set()
|
||||
pages: set[tuple[str, str]] = set()
|
||||
@@ -166,6 +172,7 @@ async def scrape(
|
||||
proxies=proxies,
|
||||
site_map=site_map,
|
||||
collect_media=collect_media,
|
||||
custom_cookies=custom_cookies,
|
||||
)
|
||||
|
||||
elements: list[dict[str, dict[str, list[CapturedElement]]]] = []
|
||||
|
||||
@@ -1,25 +1,53 @@
|
||||
import pytest
|
||||
import logging
|
||||
from playwright.async_api import async_playwright, Error
|
||||
from typing import Dict
|
||||
from playwright.async_api import async_playwright, Cookie, Route
|
||||
from api.backend.job.scraping.add_custom import add_custom_items
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy():
|
||||
proxy = "127.0.0.1:8080"
|
||||
async def test_add_custom_items():
|
||||
test_cookies = [{"name": "big", "value": "cookie"}]
|
||||
test_headers = {"User-Agent": "test-agent", "Accept": "application/json"}
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.firefox.launch(
|
||||
headless=True, proxy={"server": f"http://{proxy}"}
|
||||
)
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
|
||||
with pytest.raises(Error) as excinfo:
|
||||
await page.goto("http://example.com")
|
||||
# Set up request interception
|
||||
captured_headers: Dict[str, str] = {}
|
||||
|
||||
assert "NS_ERROR_PROXY_CONNECTION_REFUSED" in str(excinfo.value)
|
||||
async def handle_route(route: Route) -> None:
|
||||
nonlocal captured_headers
|
||||
captured_headers = route.request.headers
|
||||
await route.continue_()
|
||||
|
||||
await page.route("**/*", handle_route)
|
||||
|
||||
await add_custom_items(
|
||||
url="http://example.com",
|
||||
page=page,
|
||||
cookies=test_cookies,
|
||||
headers=test_headers,
|
||||
)
|
||||
|
||||
# Navigate to example.com
|
||||
await page.goto("http://example.com")
|
||||
|
||||
# Verify cookies were added
|
||||
cookies: list[Cookie] = await page.context.cookies()
|
||||
test_cookie = next((c for c in cookies if c.get("name") == "big"), None)
|
||||
|
||||
assert test_cookie is not None
|
||||
assert test_cookie.get("value") == "cookie"
|
||||
assert test_cookie.get("path") == "/" # Default path should be set
|
||||
assert test_cookie.get("sameSite") == "Lax" # Default sameSite should be set
|
||||
|
||||
# Verify headers were added
|
||||
assert captured_headers.get("user-agent") == "test-agent"
|
||||
|
||||
await browser.close()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import json
|
||||
|
||||
from api.backend.job import get_queued_job, update_job
|
||||
from api.backend.scraping import scrape
|
||||
@@ -34,14 +35,25 @@ async def process_job():
|
||||
LOG.info(f"Beginning processing job: {job}.")
|
||||
try:
|
||||
_ = await update_job([job["id"]], field="status", value="Scraping")
|
||||
|
||||
proxies = job["job_options"]["proxies"]
|
||||
|
||||
if proxies and isinstance(proxies[0], str) and proxies[0].startswith("{"):
|
||||
try:
|
||||
proxies = [json.loads(p) for p in proxies]
|
||||
except json.JSONDecodeError:
|
||||
LOG.error(f"Failed to parse proxy JSON: {proxies}")
|
||||
proxies = []
|
||||
|
||||
scraped = await scrape(
|
||||
job["url"],
|
||||
[Element(**j) for j in job["elements"]],
|
||||
job["job_options"]["custom_headers"],
|
||||
job["job_options"]["multi_page_scrape"],
|
||||
job["job_options"]["proxies"],
|
||||
proxies,
|
||||
job["job_options"]["site_map"],
|
||||
job["job_options"]["collect_media"],
|
||||
job["job_options"]["custom_cookies"],
|
||||
)
|
||||
LOG.info(
|
||||
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
import { Box, Link, Typography } from "@mui/material";
|
||||
import { SetStateAction, Dispatch, useState } from "react";
|
||||
import { AdvancedJobOptionsDialog } from "./dialog/advanced-job-options-dialog";
|
||||
import { RawJobOptions } from "@/types";
|
||||
|
||||
export type AdvancedJobOptionsProps = {
|
||||
jobOptions: RawJobOptions;
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
|
||||
};
|
||||
|
||||
export const AdvancedJobOptions = ({
|
||||
jobOptions,
|
||||
setJobOptions,
|
||||
}: AdvancedJobOptionsProps) => {
|
||||
const [open, setOpen] = useState(false);
|
||||
return (
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Link
|
||||
component="button"
|
||||
variant="body2"
|
||||
onClick={() => setOpen(true)}
|
||||
sx={{
|
||||
textDecoration: "none",
|
||||
color: "primary.main",
|
||||
"&:hover": {
|
||||
color: "primary.dark",
|
||||
textDecoration: "underline",
|
||||
},
|
||||
paddingLeft: 1,
|
||||
display: "inline-flex",
|
||||
alignItems: "center",
|
||||
gap: 0.5,
|
||||
}}
|
||||
>
|
||||
<Typography variant="body2">Advanced Job Options</Typography>
|
||||
</Link>
|
||||
<AdvancedJobOptionsDialog
|
||||
open={open}
|
||||
onClose={() => setOpen(false)}
|
||||
jobOptions={jobOptions}
|
||||
setJobOptions={setJobOptions}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1,269 @@
|
||||
import {
|
||||
Accordion,
|
||||
AccordionDetails,
|
||||
AccordionSummary,
|
||||
Box,
|
||||
Checkbox,
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogTitle,
|
||||
Divider,
|
||||
FormControl,
|
||||
FormControlLabel,
|
||||
FormGroup,
|
||||
IconButton,
|
||||
TextField,
|
||||
Tooltip,
|
||||
Typography,
|
||||
useTheme,
|
||||
} from "@mui/material";
|
||||
import {
|
||||
ExpandMore as ExpandMoreIcon,
|
||||
InfoOutlined,
|
||||
Code as CodeIcon,
|
||||
Settings,
|
||||
} from "@mui/icons-material";
|
||||
import { Dispatch, SetStateAction } from "react";
|
||||
import { RawJobOptions } from "@/types";
|
||||
import { ExpandedTableInput } from "../../expanded-table-input";
|
||||
|
||||
export type AdvancedJobOptionsDialogProps = {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
jobOptions: RawJobOptions;
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
|
||||
};
|
||||
|
||||
export const AdvancedJobOptionsDialog = ({
|
||||
open,
|
||||
onClose,
|
||||
jobOptions,
|
||||
setJobOptions,
|
||||
}: AdvancedJobOptionsDialogProps) => {
|
||||
const theme = useTheme();
|
||||
const handleMultiPageScrapeChange = () => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
multi_page_scrape: !prevJobOptions.multi_page_scrape,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleProxiesChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
proxies: e.target.value,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleCollectMediaChange = () => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
collect_media: !prevJobOptions.collect_media,
|
||||
}));
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog
|
||||
open={open}
|
||||
onClose={onClose}
|
||||
maxWidth="md"
|
||||
fullWidth
|
||||
PaperProps={{
|
||||
sx: {
|
||||
borderRadius: 2,
|
||||
boxShadow: "0 8px 32px rgba(0, 0, 0, 0.1)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<DialogTitle
|
||||
sx={{
|
||||
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||
backgroundColor: theme.palette.background.default,
|
||||
color: theme.palette.primary.contrastText,
|
||||
borderRadius: 2,
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "space-between",
|
||||
padding: "1rem 2rem",
|
||||
marginRight: 2,
|
||||
marginLeft: 2,
|
||||
}}
|
||||
>
|
||||
<Typography variant="h6" component="div">
|
||||
Advanced Job Options
|
||||
</Typography>
|
||||
<Settings
|
||||
sx={{
|
||||
color: theme.palette.primary.contrastText,
|
||||
}}
|
||||
/>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent
|
||||
sx={{ padding: 3, overflowY: "auto", marginTop: 2, height: "60rem" }}
|
||||
>
|
||||
<FormControl fullWidth>
|
||||
<Box sx={{ mb: 3 }}>
|
||||
<Typography
|
||||
variant="subtitle1"
|
||||
sx={{
|
||||
mb: 1,
|
||||
fontWeight: "bold",
|
||||
color: theme.palette.text.primary,
|
||||
}}
|
||||
>
|
||||
Collection Options
|
||||
</Typography>
|
||||
<Divider sx={{ mb: 2, backgroundColor: theme.palette.divider }} />
|
||||
|
||||
<FormGroup row sx={{ gap: 4, mb: 1 }}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={jobOptions.multi_page_scrape}
|
||||
onChange={handleMultiPageScrapeChange}
|
||||
/>
|
||||
}
|
||||
label={
|
||||
<Box sx={{ display: "flex", alignItems: "center" }}>
|
||||
<Typography>Multi Page Scrape</Typography>
|
||||
<Tooltip title="Enable crawling through multiple pages">
|
||||
<IconButton size="small">
|
||||
<InfoOutlined fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={jobOptions.collect_media}
|
||||
onChange={handleCollectMediaChange}
|
||||
/>
|
||||
}
|
||||
label={
|
||||
<Box sx={{ display: "flex", alignItems: "center" }}>
|
||||
<Typography>Collect Media</Typography>
|
||||
<Tooltip title="Download images and other media">
|
||||
<IconButton size="small">
|
||||
<InfoOutlined fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
</FormGroup>
|
||||
</Box>
|
||||
|
||||
<Box sx={{ mb: 3 }}>
|
||||
<Typography
|
||||
variant="subtitle1"
|
||||
sx={{
|
||||
mb: 1,
|
||||
fontWeight: "bold",
|
||||
color: theme.palette.text.primary,
|
||||
}}
|
||||
>
|
||||
Custom Options
|
||||
</Typography>
|
||||
<Divider sx={{ mb: 2, backgroundColor: theme.palette.divider }} />
|
||||
|
||||
{/* Proxies Section */}
|
||||
<Accordion
|
||||
defaultExpanded
|
||||
elevation={0}
|
||||
sx={{
|
||||
mb: 2,
|
||||
border: `1px solid ${theme.palette.divider}`,
|
||||
"&:before": { display: "none" },
|
||||
borderRadius: 1,
|
||||
overflow: "hidden",
|
||||
padding: 1,
|
||||
}}
|
||||
>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
sx={{
|
||||
backgroundColor: theme.palette.background.paper,
|
||||
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||
"&.Mui-expanded": {
|
||||
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||
},
|
||||
}}
|
||||
>
|
||||
<Box sx={{ display: "flex", alignItems: "center" }}>
|
||||
<div
|
||||
style={{
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
gap: "0.5rem",
|
||||
}}
|
||||
>
|
||||
<Typography
|
||||
sx={{
|
||||
fontWeight: 500,
|
||||
color: theme.palette.text.primary,
|
||||
}}
|
||||
>
|
||||
Proxies
|
||||
</Typography>
|
||||
|
||||
<Tooltip title="Comma separated list of proxies that should follow Playwright proxy format">
|
||||
<InfoOutlined fontSize="small" />
|
||||
</Tooltip>
|
||||
</div>
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails
|
||||
sx={{ p: 2, backgroundColor: theme.palette.background.default }}
|
||||
>
|
||||
<TextField
|
||||
placeholder='Proxies ([{"server": "proxy.example.com:8080", "username": "username", "password": "password"}])'
|
||||
fullWidth
|
||||
variant="outlined"
|
||||
size="small"
|
||||
value={jobOptions.proxies}
|
||||
onChange={handleProxiesChange}
|
||||
InputProps={{
|
||||
startAdornment: (
|
||||
<CodeIcon
|
||||
sx={{ color: theme.palette.text.secondary, mr: 1 }}
|
||||
/>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
|
||||
{/* Custom Headers Section */}
|
||||
<ExpandedTableInput
|
||||
label="Custom Headers"
|
||||
placeholder='{"User-Agent": "CustomAgent", "Accept": "*/*"}'
|
||||
urlParam="custom_headers"
|
||||
onChange={(value) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
custom_headers: value,
|
||||
}));
|
||||
}}
|
||||
/>
|
||||
|
||||
{/* Custom Cookies Section */}
|
||||
<ExpandedTableInput
|
||||
label="Custom Cookies"
|
||||
placeholder='[{"name": "value", "name2": "value2"}]'
|
||||
urlParam="custom_cookies"
|
||||
onChange={(value) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
custom_cookies: value,
|
||||
}));
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</FormControl>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1 @@
|
||||
export * from "./advanced-job-options-dialog";
|
||||
1
src/components/common/advanced-job-options/index.ts
Normal file
1
src/components/common/advanced-job-options/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./advanced-job-options";
|
||||
@@ -0,0 +1,204 @@
|
||||
import {
|
||||
Accordion,
|
||||
AccordionSummary,
|
||||
TableCell,
|
||||
TableRow,
|
||||
Paper,
|
||||
TableBody,
|
||||
useTheme,
|
||||
TextField,
|
||||
Box,
|
||||
Typography,
|
||||
AccordionDetails,
|
||||
TableHead,
|
||||
TableContainer,
|
||||
Table,
|
||||
} from "@mui/material";
|
||||
import { useEffect, useState } from "react";
|
||||
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
|
||||
import { parseJsonToEntries } from "@/lib/helpers/parse-json-to-entries";
|
||||
|
||||
export type ExpandedTableInputProps = {
|
||||
label: string;
|
||||
onChange: (value: any) => void;
|
||||
placeholder: string;
|
||||
urlParam: string;
|
||||
};
|
||||
|
||||
export const ExpandedTableInput = ({
|
||||
label,
|
||||
onChange,
|
||||
placeholder,
|
||||
urlParam,
|
||||
}: ExpandedTableInputProps) => {
|
||||
const theme = useTheme();
|
||||
const [value, setValue] = useState("");
|
||||
const [parsedHeaders, setParsedHeaders] = useState<[string, string][] | null>(
|
||||
null
|
||||
);
|
||||
|
||||
const [jsonError, setJsonError] = useState<string | null>(null);
|
||||
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
|
||||
const validateAndParse = (val: string) => {
|
||||
if (val.trim() === "") {
|
||||
setParsedHeaders(null);
|
||||
setJsonError(null);
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(val);
|
||||
const entries = parseJsonToEntries(val);
|
||||
|
||||
if (entries === null) {
|
||||
setParsedHeaders(null);
|
||||
setJsonError("Invalid JSON object");
|
||||
return null;
|
||||
} else {
|
||||
setParsedHeaders(entries);
|
||||
setJsonError(null);
|
||||
return parsed;
|
||||
}
|
||||
} catch (e) {
|
||||
setParsedHeaders(null);
|
||||
setJsonError("Invalid JSON format");
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const val = e.target.value;
|
||||
setValue(val);
|
||||
const parsed = validateAndParse(val);
|
||||
onChange(parsed);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const jobOptions = urlParams.get("job_options");
|
||||
|
||||
if (!jobOptions) {
|
||||
setParsedHeaders(null);
|
||||
setJsonError(null);
|
||||
return;
|
||||
}
|
||||
|
||||
const jobOptionsObject = JSON.parse(jobOptions || "{}");
|
||||
let val = jobOptionsObject[urlParam];
|
||||
|
||||
if (val.length === 0 || Object.keys(val).length === 0) {
|
||||
setParsedHeaders(null);
|
||||
setJsonError(null);
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof val === "string") {
|
||||
try {
|
||||
val = JSON.parse(val);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
const finalVal =
|
||||
typeof val === "string" ? val : val != null ? JSON.stringify(val) : "";
|
||||
|
||||
setValue(finalVal);
|
||||
const parsed = validateAndParse(finalVal);
|
||||
onChange(parsed);
|
||||
}, [urlParam]);
|
||||
|
||||
return (
|
||||
<Accordion
|
||||
defaultExpanded
|
||||
elevation={0}
|
||||
sx={{
|
||||
mb: 2,
|
||||
border: `1px solid ${theme.palette.divider}`,
|
||||
"&:before": { display: "none" },
|
||||
borderRadius: 1,
|
||||
overflow: "hidden",
|
||||
padding: 1,
|
||||
}}
|
||||
>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
sx={{
|
||||
backgroundColor: theme.palette.background.paper,
|
||||
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||
"&.Mui-expanded": {
|
||||
borderBottom: `1px solid ${theme.palette.divider}`,
|
||||
},
|
||||
}}
|
||||
>
|
||||
<Box sx={{ display: "flex", alignItems: "center" }}>
|
||||
<Typography
|
||||
sx={{ fontWeight: 500, color: theme.palette.text.primary }}
|
||||
>
|
||||
{label}
|
||||
</Typography>
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails
|
||||
sx={{ p: 2, backgroundColor: theme.palette.background.default }}
|
||||
>
|
||||
<TextField
|
||||
placeholder={placeholder}
|
||||
value={value}
|
||||
onChange={handleChange}
|
||||
fullWidth
|
||||
variant="outlined"
|
||||
size="small"
|
||||
error={jsonError !== null}
|
||||
helperText={jsonError ?? ""}
|
||||
/>
|
||||
|
||||
{parsedHeaders && parsedHeaders.length > 0 && (
|
||||
<Paper
|
||||
variant="outlined"
|
||||
sx={{
|
||||
marginTop: 1,
|
||||
border: `1px solid ${theme.palette.divider}`,
|
||||
borderRadius: 1,
|
||||
overflow: "hidden",
|
||||
padding: 0,
|
||||
}}
|
||||
>
|
||||
<TableContainer sx={{ maxHeight: 200 }}>
|
||||
<Table size="small" stickyHeader>
|
||||
<TableHead>
|
||||
<TableRow
|
||||
sx={{
|
||||
backgroundColor: theme.palette.background.paper,
|
||||
}}
|
||||
>
|
||||
<TableCell sx={{ fontWeight: "bold" }}>Header</TableCell>
|
||||
<TableCell sx={{ fontWeight: "bold" }}>Value</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{parsedHeaders.map(([key, val]) => (
|
||||
<TableRow
|
||||
key={key}
|
||||
hover
|
||||
sx={{
|
||||
"&:nth-of-type(odd)": {
|
||||
backgroundColor:
|
||||
theme.palette.mode === "light"
|
||||
? "rgba(0, 0, 0, 0.02)"
|
||||
: "rgba(255, 255, 255, 0.02)",
|
||||
},
|
||||
}}
|
||||
>
|
||||
<TableCell sx={{ fontWeight: 500 }}>{key}</TableCell>
|
||||
<TableCell>{val}</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
</Paper>
|
||||
)}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
);
|
||||
};
|
||||
1
src/components/common/expanded-table-input/index.ts
Normal file
1
src/components/common/expanded-table-input/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./expanded-table-input";
|
||||
@@ -10,12 +10,14 @@ import { JobSubmitterInput } from "./job-submitter-input";
|
||||
import { JobSubmitterOptions } from "./job-submitter-options";
|
||||
import { ApiService } from "@/services";
|
||||
import { useJobSubmitterProvider } from "./provider";
|
||||
import { AdvancedJobOptions } from "@/components/common/advanced-job-options";
|
||||
|
||||
const initialJobOptions: RawJobOptions = {
|
||||
multi_page_scrape: false,
|
||||
custom_headers: null,
|
||||
proxies: null,
|
||||
collect_media: false,
|
||||
custom_cookies: null,
|
||||
};
|
||||
|
||||
export const JobSubmitter = () => {
|
||||
@@ -38,12 +40,8 @@ export const JobSubmitter = () => {
|
||||
const [loading, setLoading] = useState<boolean>(false);
|
||||
const [jobOptions, setJobOptions] =
|
||||
useState<RawJobOptions>(initialJobOptions);
|
||||
const [customJSONSelected, setCustomJSONSelected] = useState<boolean>(false);
|
||||
const [proxiesSelected, setProxiesSelected] = useState<boolean>(false);
|
||||
|
||||
const handleSelectProxies = () => {
|
||||
setProxiesSelected(!proxiesSelected);
|
||||
};
|
||||
console.log(jobOptions);
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (!validateURL(submittedURL)) {
|
||||
@@ -57,12 +55,13 @@ export const JobSubmitter = () => {
|
||||
setLoading(true);
|
||||
|
||||
let customHeaders;
|
||||
let customCookies;
|
||||
|
||||
try {
|
||||
customHeaders = jobOptions.custom_headers
|
||||
? JSON.parse(jobOptions.custom_headers)
|
||||
: null;
|
||||
} catch (error) {
|
||||
customHeaders = jobOptions.custom_headers || null;
|
||||
customCookies = jobOptions.custom_cookies || null;
|
||||
} catch (error: any) {
|
||||
console.error(error);
|
||||
setSnackbarMessage("Invalid JSON in custom headers.");
|
||||
setSnackbarOpen(true);
|
||||
setSnackbarSeverity("error");
|
||||
@@ -76,6 +75,7 @@ export const JobSubmitter = () => {
|
||||
user,
|
||||
jobOptions,
|
||||
customHeaders,
|
||||
customCookies,
|
||||
siteMap
|
||||
)
|
||||
.then(async (response) => {
|
||||
@@ -102,16 +102,9 @@ export const JobSubmitter = () => {
|
||||
.finally(() => setLoading(false));
|
||||
};
|
||||
|
||||
// Parse the job options from the query string
|
||||
useEffect(() => {
|
||||
if (job_options) {
|
||||
parseJobOptions(
|
||||
job_options as string,
|
||||
setCustomJSONSelected,
|
||||
setProxiesSelected,
|
||||
setJobOptions,
|
||||
setSiteMap
|
||||
);
|
||||
parseJobOptions(job_options as string, setJobOptions, setSiteMap);
|
||||
}
|
||||
}, [job_options]);
|
||||
|
||||
@@ -123,13 +116,9 @@ export const JobSubmitter = () => {
|
||||
handleSubmit={handleSubmit}
|
||||
loading={loading}
|
||||
/>
|
||||
<JobSubmitterOptions
|
||||
<AdvancedJobOptions
|
||||
jobOptions={jobOptions}
|
||||
setJobOptions={setJobOptions}
|
||||
customJSONSelected={customJSONSelected}
|
||||
setCustomJSONSelected={setCustomJSONSelected}
|
||||
handleSelectProxies={handleSelectProxies}
|
||||
proxiesSelected={proxiesSelected}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -4,10 +4,8 @@ import { RawJobOptions, SiteMap } from "@/types";
|
||||
|
||||
export const parseJobOptions = (
|
||||
job_options: string,
|
||||
setCustomJSONSelected: Dispatch<SetStateAction<boolean>>,
|
||||
setProxiesSelected: Dispatch<SetStateAction<boolean>>,
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>,
|
||||
setSiteMap: Dispatch<SetStateAction<any>>
|
||||
setSiteMap: Dispatch<SetStateAction<SiteMap | null>>
|
||||
) => {
|
||||
if (job_options) {
|
||||
const jsonOptions = JSON.parse(job_options as string);
|
||||
@@ -16,20 +14,23 @@ export const parseJobOptions = (
|
||||
custom_headers: null,
|
||||
proxies: null,
|
||||
collect_media: false,
|
||||
custom_cookies: null,
|
||||
};
|
||||
|
||||
if (
|
||||
jsonOptions.custom_headers &&
|
||||
Object.keys(jsonOptions.custom_headers).length
|
||||
) {
|
||||
setCustomJSONSelected(true);
|
||||
newJobOptions.custom_headers = JSON.stringify(jsonOptions.custom_headers);
|
||||
newJobOptions.custom_headers = jsonOptions.custom_headers;
|
||||
}
|
||||
|
||||
if (jsonOptions.custom_cookies && jsonOptions.custom_cookies.length > 0) {
|
||||
newJobOptions.custom_cookies = jsonOptions.custom_cookies;
|
||||
}
|
||||
|
||||
newJobOptions.multi_page_scrape = jsonOptions.multi_page_scrape;
|
||||
|
||||
if (jsonOptions.proxies.length > 0) {
|
||||
setProxiesSelected(true);
|
||||
newJobOptions.proxies = jsonOptions.proxies.join(",");
|
||||
}
|
||||
|
||||
|
||||
37
src/lib/helpers/parse-json-to-entries.ts
Normal file
37
src/lib/helpers/parse-json-to-entries.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
export const parseJsonToEntries = (json: string): [string, string][] | null => {
|
||||
try {
|
||||
const parsed = JSON.parse(json);
|
||||
|
||||
if (Array.isArray(parsed)) {
|
||||
if (
|
||||
parsed.length > 0 &&
|
||||
Array.isArray(parsed[0]) &&
|
||||
parsed[0].length === 2 &&
|
||||
typeof parsed[0][0] === "string"
|
||||
) {
|
||||
// Already array of [key, val] tuples
|
||||
// Just ensure values are strings
|
||||
return parsed.map(([k, v]) => [k, String(v)]);
|
||||
}
|
||||
|
||||
// Array of objects
|
||||
const allEntries: [string, string][] = [];
|
||||
for (const item of parsed) {
|
||||
if (typeof item === "object" && item !== null) {
|
||||
allEntries.push(
|
||||
// @ts-ignore
|
||||
...Object.entries(item).map(([k, v]) => [k, String(v)])
|
||||
);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return allEntries.length > 0 ? allEntries : null;
|
||||
} else if (typeof parsed === "object" && parsed !== null) {
|
||||
return Object.entries(parsed).map(([k, v]) => [k, String(v)]);
|
||||
}
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
@@ -6,6 +6,7 @@ export const submitJob = async (
|
||||
user: any,
|
||||
jobOptions: any,
|
||||
customHeaders: any,
|
||||
customCookies: any,
|
||||
siteMap: SiteMap | null
|
||||
) => {
|
||||
return await fetch(`/api/submit-scrape-job`, {
|
||||
@@ -23,6 +24,7 @@ export const submitJob = async (
|
||||
custom_headers: customHeaders || {},
|
||||
proxies: jobOptions.proxies ? jobOptions.proxies.split(",") : [],
|
||||
site_map: siteMap,
|
||||
custom_cookies: customCookies || [],
|
||||
},
|
||||
},
|
||||
}),
|
||||
|
||||
@@ -70,6 +70,16 @@ const commonThemeOptions = {
|
||||
},
|
||||
},
|
||||
},
|
||||
MuiCheckbox: {
|
||||
styleOverrides: {
|
||||
colorPrimary: {
|
||||
color: "#1976d2",
|
||||
"&.Mui-checked": {
|
||||
color: "#034efc",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
MuiPaper: {
|
||||
styleOverrides: {
|
||||
root: {
|
||||
@@ -85,6 +95,7 @@ const lightTheme = createTheme({
|
||||
mode: "light",
|
||||
primary: {
|
||||
main: "#1976d2",
|
||||
contrastText: "#000000",
|
||||
},
|
||||
secondary: {
|
||||
main: "#dc004e",
|
||||
@@ -139,6 +150,7 @@ const darkTheme = createTheme({
|
||||
mode: "dark",
|
||||
primary: {
|
||||
main: "#90caf9",
|
||||
contrastText: "#fff",
|
||||
},
|
||||
secondary: {
|
||||
main: "#f48fb1",
|
||||
|
||||
@@ -24,6 +24,7 @@ export type RawJobOptions = {
|
||||
custom_headers: string | null;
|
||||
proxies: string | null;
|
||||
collect_media: boolean;
|
||||
custom_cookies: string | null;
|
||||
};
|
||||
|
||||
export type ActionOption = "click" | "input";
|
||||
|
||||
Reference in New Issue
Block a user