mirror of
https://github.com/jaypyles/Scraperr.git
synced 2025-12-13 11:15:59 +00:00
feat: add proxies (#39)
This commit is contained in:
2
.github/workflows/docker-image.yml
vendored
2
.github/workflows/docker-image.yml
vendored
@@ -1,4 +1,6 @@
|
||||
name: ci
|
||||
requires:
|
||||
- unit-tests
|
||||
on:
|
||||
push:
|
||||
branches: ["master"]
|
||||
|
||||
25
.github/workflows/unit-tests.yml
vendored
Normal file
25
.github/workflows/unit-tests.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Unit Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install pdm
|
||||
run: pip install pdm
|
||||
|
||||
- name: Install project dependencies
|
||||
run: pdm install
|
||||
|
||||
- name: Run tests
|
||||
run: PYTHONPATH=. pdm run pytest api/backend/tests
|
||||
@@ -23,8 +23,9 @@ class CapturedElement(pydantic.BaseModel):
|
||||
|
||||
|
||||
class JobOptions(pydantic.BaseModel):
|
||||
multi_page_scrape: bool
|
||||
custom_headers: Optional[dict[str, Any]]
|
||||
multi_page_scrape: bool = False
|
||||
custom_headers: Optional[dict[str, Any]] = {}
|
||||
proxies: Optional[list[str]] = []
|
||||
|
||||
|
||||
class RetrieveScrapeJobs(pydantic.BaseModel):
|
||||
|
||||
@@ -5,7 +5,6 @@ from io import StringIO
|
||||
import csv
|
||||
import logging
|
||||
import random
|
||||
from typing import Optional
|
||||
|
||||
# PDM
|
||||
from fastapi import Depends, APIRouter
|
||||
@@ -27,7 +26,7 @@ from api.backend.models import (
|
||||
Job,
|
||||
)
|
||||
from api.backend.schemas import User
|
||||
from api.backend.auth.auth_utils import get_current_user, EMPTY_USER
|
||||
from api.backend.auth.auth_utils import get_current_user
|
||||
from api.backend.utils import clean_text
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
import time
|
||||
import random
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import etree
|
||||
@@ -12,7 +13,6 @@ from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
from api.backend.models import Element, CapturedElement
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
@@ -60,7 +60,7 @@ def interceptor(headers: dict[str, Any]):
|
||||
return _interceptor
|
||||
|
||||
|
||||
def create_driver():
|
||||
def create_driver(proxies: Optional[list[str]] = []):
|
||||
ua = UserAgent()
|
||||
chrome_options = ChromeOptions()
|
||||
chrome_options.add_argument("--headless")
|
||||
@@ -68,7 +68,23 @@ def create_driver():
|
||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||
chrome_options.add_argument(f"user-agent={ua.random}")
|
||||
|
||||
return webdriver.Chrome(options=chrome_options)
|
||||
sw_options = {}
|
||||
if proxies:
|
||||
selected_proxy = proxies[random.randint(0, len(proxies) - 1)]
|
||||
LOG.info(f"Using proxy: {selected_proxy}")
|
||||
|
||||
sw_options = {
|
||||
"proxy": {
|
||||
"https": f"https://{selected_proxy}",
|
||||
"http": f"http://{selected_proxy}",
|
||||
}
|
||||
}
|
||||
|
||||
driver = webdriver.Chrome(
|
||||
options=chrome_options,
|
||||
seleniumwire_options=sw_options,
|
||||
)
|
||||
return driver
|
||||
|
||||
|
||||
async def make_site_request(
|
||||
@@ -78,13 +94,14 @@ async def make_site_request(
|
||||
visited_urls: set[str] = set(),
|
||||
pages: set[tuple[str, str]] = set(),
|
||||
original_url: str = "",
|
||||
proxies: Optional[list[str]] = [],
|
||||
) -> None:
|
||||
"""Make basic `GET` request to site using Selenium."""
|
||||
# Check if URL has already been visited
|
||||
if url in visited_urls:
|
||||
return
|
||||
|
||||
driver = create_driver()
|
||||
driver = create_driver(proxies)
|
||||
driver.implicitly_wait(10)
|
||||
|
||||
if headers:
|
||||
@@ -93,6 +110,7 @@ async def make_site_request(
|
||||
try:
|
||||
LOG.info(f"Visiting URL: {url}")
|
||||
driver.get(url)
|
||||
|
||||
final_url = driver.current_url
|
||||
visited_urls.add(url)
|
||||
visited_urls.add(final_url)
|
||||
@@ -173,6 +191,7 @@ async def scrape(
|
||||
xpaths: list[Element],
|
||||
headers: Optional[dict[str, Any]],
|
||||
multi_page_scrape: bool = False,
|
||||
proxies: Optional[list[str]] = [],
|
||||
):
|
||||
visited_urls: set[str] = set()
|
||||
pages: set[tuple[str, str]] = set()
|
||||
@@ -184,6 +203,7 @@ async def scrape(
|
||||
visited_urls=visited_urls,
|
||||
pages=pages,
|
||||
original_url=url,
|
||||
proxies=proxies,
|
||||
)
|
||||
|
||||
elements: list[dict[str, dict[str, list[CapturedElement]]]] = list()
|
||||
|
||||
@@ -5,12 +5,14 @@ from faker import Faker
|
||||
fake = Faker()
|
||||
|
||||
|
||||
def create_job():
|
||||
def create_job(
|
||||
job_options: JobOptions = JobOptions(multi_page_scrape=False, custom_headers={})
|
||||
):
|
||||
return Job(
|
||||
id=uuid.uuid4().hex,
|
||||
url="https://example.com",
|
||||
elements=[Element(name="test", xpath="xpath")],
|
||||
job_options=JobOptions(multi_page_scrape=False, custom_headers={}),
|
||||
job_options=job_options,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -9,12 +9,18 @@ client = TestClient(app)
|
||||
|
||||
mocked_job = create_completed_job().model_dump()
|
||||
mock_results = [mocked_job]
|
||||
mocked_random_int = 123456
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("api.backend.app.query")
|
||||
async def test_download(mock_query: AsyncMock):
|
||||
@patch("api.backend.routers.job_router.query")
|
||||
@patch("api.backend.routers.job_router.random.randint")
|
||||
async def test_download(mock_randint: AsyncMock, mock_query: AsyncMock):
|
||||
# Ensure the mock returns immediately
|
||||
mock_query.return_value = mock_results
|
||||
mock_randint.return_value = mocked_random_int
|
||||
|
||||
# Create a DownloadJob instance
|
||||
download_job = DownloadJob(ids=[mocked_job["id"]])
|
||||
|
||||
# Make a POST request to the /download endpoint
|
||||
@@ -26,5 +32,9 @@ async def test_download(mock_query: AsyncMock):
|
||||
|
||||
# Check the content of the CSV
|
||||
csv_content = response.content.decode("utf-8")
|
||||
expected_csv = f"id,url,element_name,xpath,text,user,time_created\r\n{mocked_job['id']},https://example.com,element_name,//div,example,{mocked_job['user']},{mocked_job['time_created']}\r\n"
|
||||
expected_csv = (
|
||||
f'"id","url","element_name","xpath","text","user","time_created"\r\n'
|
||||
f'"{mocked_job["id"]}-{mocked_random_int}","https://example.com","element_name","//div","example",'
|
||||
f'"{mocked_job["user"]}","{mocked_job["time_created"]}"\r\n'
|
||||
)
|
||||
assert csv_content == expected_csv
|
||||
|
||||
0
api/backend/tests/scraping/__init__.py
Normal file
0
api/backend/tests/scraping/__init__.py
Normal file
33
api/backend/tests/scraping/test_scraping.py
Normal file
33
api/backend/tests/scraping/test_scraping.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
from api.backend.tests.factories.job_factory import create_job
|
||||
from api.backend.models import JobOptions
|
||||
from api.backend.scraping import create_driver
|
||||
|
||||
|
||||
mocked_job = create_job(
|
||||
job_options=JobOptions(
|
||||
multi_page_scrape=False, custom_headers={}, proxies=["127.0.0.1:8080"]
|
||||
)
|
||||
).model_dump()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("seleniumwire.webdriver.Chrome.get")
|
||||
async def test_proxy(mock_get: AsyncMock):
|
||||
# Mock the response of the requests.get call
|
||||
mock_response = MagicMock()
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
driver = create_driver(proxies=["127.0.0.1:8080"])
|
||||
assert driver is not None
|
||||
|
||||
# Simulate a request
|
||||
driver.get("http://example.com")
|
||||
response = driver.last_request
|
||||
|
||||
# Check if the proxy header is set correctly
|
||||
if response:
|
||||
assert response.headers["Proxy"] == "127.0.0.1:8080"
|
||||
|
||||
driver.quit()
|
||||
@@ -23,6 +23,7 @@ async def process_job():
|
||||
[Element(**j) for j in job["elements"]],
|
||||
job["job_options"]["custom_headers"],
|
||||
job["job_options"]["multi_page_scrape"],
|
||||
job["job_options"]["proxies"],
|
||||
)
|
||||
LOG.info(
|
||||
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
|
||||
|
||||
@@ -19,4 +19,4 @@ services:
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- "$PWD/api:/project/app/api"
|
||||
- "$PWD/api:/project/api"
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import { RawJobOptions } from "@/types/job";
|
||||
import { Box, FormControlLabel, Checkbox, TextField } from "@mui/material";
|
||||
import { Dispatch, SetStateAction } from "react";
|
||||
|
||||
import { JobOptions } from "@/types/job";
|
||||
|
||||
export type JobSubmitterOptionsProps = {
|
||||
jobOptions: JobOptions;
|
||||
setJobOptions: Dispatch<SetStateAction<JobOptions>>;
|
||||
jobOptions: RawJobOptions;
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>;
|
||||
customJSONSelected: boolean;
|
||||
setCustomJSONSelected: Dispatch<SetStateAction<boolean>>;
|
||||
handleSelectProxies: () => void;
|
||||
proxiesSelected: boolean;
|
||||
};
|
||||
|
||||
export const JobSubmitterOptions = ({
|
||||
@@ -15,24 +16,69 @@ export const JobSubmitterOptions = ({
|
||||
setJobOptions,
|
||||
customJSONSelected,
|
||||
setCustomJSONSelected,
|
||||
handleSelectProxies,
|
||||
proxiesSelected,
|
||||
}: JobSubmitterOptionsProps) => {
|
||||
const handleMultiPageScrapeChange = () => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
multi_page_scrape: !prevJobOptions.multi_page_scrape,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleProxiesChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
proxies: e.target.value,
|
||||
}));
|
||||
};
|
||||
|
||||
const handleCustomHeadersChange = (
|
||||
e: React.ChangeEvent<HTMLInputElement>
|
||||
) => {
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
custom_headers: e.target.value,
|
||||
}));
|
||||
};
|
||||
|
||||
return (
|
||||
<Box bgcolor="background.paper" className="flex flex-col mb-2 rounded-md">
|
||||
<div id="options" className="p-2 flex flex-row space-x-2">
|
||||
<FormControlLabel
|
||||
label="Multi-Page Scrape"
|
||||
className="mr-0"
|
||||
control={
|
||||
<Checkbox
|
||||
checked={jobOptions.multi_page_scrape}
|
||||
onChange={() =>
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
multi_page_scrape: !prevJobOptions.multi_page_scrape,
|
||||
}))
|
||||
}
|
||||
onChange={handleMultiPageScrapeChange}
|
||||
/>
|
||||
}
|
||||
></FormControlLabel>
|
||||
<FormControlLabel
|
||||
label="Proxies"
|
||||
control={
|
||||
<Checkbox
|
||||
checked={proxiesSelected}
|
||||
onChange={handleSelectProxies}
|
||||
/>
|
||||
}
|
||||
></FormControlLabel>
|
||||
{proxiesSelected ? (
|
||||
<div id="proxies">
|
||||
<TextField
|
||||
InputLabelProps={{ shrink: false }}
|
||||
fullWidth
|
||||
multiline={false}
|
||||
variant="outlined"
|
||||
value={jobOptions.proxies || ""}
|
||||
onChange={handleProxiesChange}
|
||||
inputProps={{
|
||||
style: { whiteSpace: "nowrap", overflowX: "auto" },
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
) : null}
|
||||
<FormControlLabel
|
||||
label="Custom Headers (JSON)"
|
||||
control={
|
||||
@@ -58,14 +104,8 @@ export const JobSubmitterOptions = ({
|
||||
minRows={4}
|
||||
variant="outlined"
|
||||
value={jobOptions.custom_headers || ""}
|
||||
onChange={(e) =>
|
||||
setJobOptions((prevJobOptions) => ({
|
||||
...prevJobOptions,
|
||||
custom_headers: e.target.value,
|
||||
}))
|
||||
}
|
||||
onChange={handleCustomHeadersChange}
|
||||
style={{ maxHeight: "20vh", overflow: "auto" }}
|
||||
className="mt-2"
|
||||
/>
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
@@ -4,11 +4,12 @@ import React, { useEffect, useState, Dispatch } from "react";
|
||||
import { Element } from "@/types";
|
||||
import { useAuth } from "@/contexts/AuthContext";
|
||||
import { useRouter } from "next/router";
|
||||
import { Constants } from "@/lib";
|
||||
|
||||
import { RawJobOptions } from "@/types/job";
|
||||
import { parseJobOptions, validateURL } from "@/lib";
|
||||
import { JobSubmitterHeader } from "./job-submitter-header";
|
||||
import { JobSubmitterInput } from "./job-submitter-input";
|
||||
import { JobSubmitterOptions } from "./job-submitter-options";
|
||||
import { ApiService } from "@/services";
|
||||
|
||||
interface StateProps {
|
||||
submittedURL: string;
|
||||
@@ -25,22 +26,20 @@ interface Props {
|
||||
stateProps: StateProps;
|
||||
}
|
||||
|
||||
interface JobOptions {
|
||||
multi_page_scrape: boolean;
|
||||
custom_headers: null | string;
|
||||
}
|
||||
const initialJobOptions: RawJobOptions = {
|
||||
multi_page_scrape: false,
|
||||
custom_headers: null,
|
||||
proxies: null,
|
||||
};
|
||||
|
||||
export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
const { user } = useAuth();
|
||||
const router = useRouter();
|
||||
|
||||
const { job_options } = router.query;
|
||||
|
||||
const {
|
||||
submittedURL,
|
||||
setSubmittedURL,
|
||||
rows,
|
||||
isValidURL,
|
||||
setIsValidUrl,
|
||||
setSnackbarMessage,
|
||||
setSnackbarOpen,
|
||||
@@ -49,22 +48,16 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
|
||||
const [urlError, setUrlError] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState<boolean>(false);
|
||||
const [jobOptions, setJobOptions] = useState<JobOptions>({
|
||||
multi_page_scrape: false,
|
||||
custom_headers: null,
|
||||
});
|
||||
const [jobOptions, setJobOptions] =
|
||||
useState<RawJobOptions>(initialJobOptions);
|
||||
const [customJSONSelected, setCustomJSONSelected] = useState<boolean>(false);
|
||||
const [proxiesSelected, setProxiesSelected] = useState<boolean>(false);
|
||||
|
||||
function validateURL(url: string): boolean {
|
||||
try {
|
||||
new URL(url);
|
||||
return true;
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const handleSelectProxies = () => {
|
||||
setProxiesSelected(!proxiesSelected);
|
||||
};
|
||||
|
||||
const handleSubmit = () => {
|
||||
const handleSubmit = async () => {
|
||||
if (!validateURL(submittedURL)) {
|
||||
setIsValidUrl(false);
|
||||
setUrlError("Please enter a valid URL.");
|
||||
@@ -76,6 +69,7 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
setLoading(true);
|
||||
|
||||
let customHeaders;
|
||||
|
||||
try {
|
||||
customHeaders = jobOptions.custom_headers
|
||||
? JSON.parse(jobOptions.custom_headers)
|
||||
@@ -88,21 +82,14 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
return;
|
||||
}
|
||||
|
||||
fetch(`${Constants.DOMAIN}/api/submit-scrape-job`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
url: submittedURL,
|
||||
elements: rows,
|
||||
user: user?.email,
|
||||
time_created: new Date().toISOString(),
|
||||
job_options: {
|
||||
...jobOptions,
|
||||
custom_headers: customHeaders,
|
||||
},
|
||||
}),
|
||||
})
|
||||
.then((response) => {
|
||||
await ApiService.submitJob(
|
||||
submittedURL,
|
||||
rows,
|
||||
user,
|
||||
jobOptions,
|
||||
customHeaders
|
||||
)
|
||||
.then(async (response) => {
|
||||
if (!response.ok) {
|
||||
return response.json().then((error) => {
|
||||
throw new Error(error.error);
|
||||
@@ -126,27 +113,16 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
.finally(() => setLoading(false));
|
||||
};
|
||||
|
||||
// Parse the job options from the query string
|
||||
useEffect(() => {
|
||||
if (job_options) {
|
||||
const jsonOptions = JSON.parse(job_options as string);
|
||||
const newJobOptions: JobOptions = {
|
||||
multi_page_scrape: false,
|
||||
custom_headers: null,
|
||||
};
|
||||
|
||||
if (
|
||||
jsonOptions.custom_headers &&
|
||||
Object.keys(jsonOptions.custom_headers).length
|
||||
) {
|
||||
setCustomJSONSelected(true);
|
||||
newJobOptions.custom_headers = JSON.stringify(
|
||||
jsonOptions.custom_headers
|
||||
parseJobOptions(
|
||||
job_options as string,
|
||||
setCustomJSONSelected,
|
||||
setProxiesSelected,
|
||||
setJobOptions
|
||||
);
|
||||
}
|
||||
|
||||
newJobOptions.multi_page_scrape = jsonOptions.multi_page_scrape;
|
||||
setJobOptions(newJobOptions);
|
||||
}
|
||||
}, [job_options]);
|
||||
|
||||
return (
|
||||
@@ -165,6 +141,8 @@ export const JobSubmitter = ({ stateProps }: Props) => {
|
||||
setJobOptions={setJobOptions}
|
||||
customJSONSelected={customJSONSelected}
|
||||
setCustomJSONSelected={setCustomJSONSelected}
|
||||
handleSelectProxies={handleSelectProxies}
|
||||
proxiesSelected={proxiesSelected}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
||||
2
src/lib/helpers/index.ts
Normal file
2
src/lib/helpers/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from "./parse-job-options";
|
||||
export * from "./validate-url";
|
||||
36
src/lib/helpers/parse-job-options.ts
Normal file
36
src/lib/helpers/parse-job-options.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { Dispatch, SetStateAction } from "react";
|
||||
|
||||
import { RawJobOptions } from "@/types";
|
||||
|
||||
export const parseJobOptions = (
|
||||
job_options: string,
|
||||
setCustomJSONSelected: Dispatch<SetStateAction<boolean>>,
|
||||
setProxiesSelected: Dispatch<SetStateAction<boolean>>,
|
||||
setJobOptions: Dispatch<SetStateAction<RawJobOptions>>
|
||||
) => {
|
||||
if (job_options) {
|
||||
const jsonOptions = JSON.parse(job_options as string);
|
||||
const newJobOptions: RawJobOptions = {
|
||||
multi_page_scrape: false,
|
||||
custom_headers: null,
|
||||
proxies: null,
|
||||
};
|
||||
|
||||
if (
|
||||
jsonOptions.custom_headers &&
|
||||
Object.keys(jsonOptions.custom_headers).length
|
||||
) {
|
||||
setCustomJSONSelected(true);
|
||||
newJobOptions.custom_headers = JSON.stringify(jsonOptions.custom_headers);
|
||||
}
|
||||
|
||||
newJobOptions.multi_page_scrape = jsonOptions.multi_page_scrape;
|
||||
|
||||
if (jsonOptions.proxies) {
|
||||
setProxiesSelected(true);
|
||||
newJobOptions.proxies = jsonOptions.proxies.join(",");
|
||||
}
|
||||
|
||||
setJobOptions(newJobOptions);
|
||||
}
|
||||
};
|
||||
8
src/lib/helpers/validate-url.ts
Normal file
8
src/lib/helpers/validate-url.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
export function validateURL(url: string): boolean {
|
||||
try {
|
||||
new URL(url);
|
||||
return true;
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1,2 +1,3 @@
|
||||
export * from "./constants";
|
||||
export * from "./utils";
|
||||
export * from "./helpers";
|
||||
|
||||
5
src/services/api-service/api-service.ts
Normal file
5
src/services/api-service/api-service.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
import * as functions from "./functions";
|
||||
|
||||
export const ApiService = {
|
||||
...functions,
|
||||
};
|
||||
1
src/services/api-service/functions/index.ts
Normal file
1
src/services/api-service/functions/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./submit-job";
|
||||
25
src/services/api-service/functions/submit-job.ts
Normal file
25
src/services/api-service/functions/submit-job.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import { Constants } from "@/lib";
|
||||
|
||||
export const submitJob = async (
|
||||
submittedURL: string,
|
||||
rows: any[],
|
||||
user: any,
|
||||
jobOptions: any,
|
||||
customHeaders: any
|
||||
) => {
|
||||
return await fetch(`${Constants.DOMAIN}/api/submit-scrape-job`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
url: submittedURL,
|
||||
elements: rows,
|
||||
user: user?.email,
|
||||
time_created: new Date().toISOString(),
|
||||
job_options: {
|
||||
...jobOptions,
|
||||
custom_headers: customHeaders,
|
||||
proxies: jobOptions.proxies ? jobOptions.proxies.split(",") : [],
|
||||
},
|
||||
}),
|
||||
});
|
||||
};
|
||||
1
src/services/api-service/index.ts
Normal file
1
src/services/api-service/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./api-service";
|
||||
1
src/services/index.ts
Normal file
1
src/services/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "./api-service";
|
||||
@@ -15,4 +15,11 @@ export interface Job {
|
||||
export type JobOptions = {
|
||||
multi_page_scrape: boolean;
|
||||
custom_headers: null | string;
|
||||
proxies: string[];
|
||||
};
|
||||
|
||||
export type RawJobOptions = {
|
||||
multi_page_scrape: boolean;
|
||||
custom_headers: string | null;
|
||||
proxies: string | null;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user