import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, AutoscaledPoolOptions, BaseHttpClient, CrawlingContext, DatasetExportOptions, EnqueueLinksOptions, EventManager, FinalStatistics, GetUserDataFromRequest, IRequestList, IRequestManager, LoadedContext, ProxyInfo, Request, RequestsLike, RestrictedCrawlingContext, RouterHandler, RouterRoutes, Session, SessionPoolOptions, SkippedRequestCallback, StatisticsOptions, StatisticState } from '@crawlee/core';
import { AutoscaledPool, Configuration, Dataset, RequestProvider, SessionPool, Statistics } from '@crawlee/core';
import type { Awaitable, BatchAddRequestsResult, Dictionary, SetStatusMessageOptions } from '@crawlee/types';
import { RobotsTxtFile } from '@crawlee/utils';
import type { SetRequired } from 'type-fest';
import type { Log } from '@apify/log';
import { TimeoutError } from '@apify/timeout';
export interface BasicCrawlingContext<UserData extends Dictionary = Dictionary> extends CrawlingContext<BasicCrawler, UserData> {
    /**
     * This function automatically finds and enqueues links from the current page, adding them to the {@link RequestQueue}
     * currently used by the crawler.
     *
     * Optionally, the function allows you to filter the target links' URLs using an array of globs or regular expressions
     * and override settings of the enqueued {@link Request} objects.
     *
     * Check out the [Crawl a website with relative links](https://crawlee.dev/js/docs/examples/crawl-relative-links) example
     * for more details regarding its usage.
     *
     * **Example usage**
     *
     * ```ts
     * async requestHandler({ enqueueLinks }) {
     *     await enqueueLinks({
     *       urls: [...],
     *     });
     * },
     * ```
     *
     * @param [options] All `enqueueLinks()` parameters are passed via an options object.
     * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
     */
    enqueueLinks(options?: SetRequired<EnqueueLinksOptions, 'urls'>): Promise<BatchAddRequestsResult>;
}
export type RequestHandler<Context extends CrawlingContext = LoadedContext<BasicCrawlingContext & RestrictedCrawlingContext>> = (inputs: LoadedContext<Context>) => Awaitable<void>;
export type ErrorHandler<Context extends CrawlingContext = LoadedContext<BasicCrawlingContext & RestrictedCrawlingContext>> = (inputs: LoadedContext<Context>, error: Error) => Awaitable<void>;
export interface StatusMessageCallbackParams<Context extends CrawlingContext = BasicCrawlingContext, Crawler extends BasicCrawler<any> = BasicCrawler<Context>> {
    state: StatisticState;
    crawler: Crawler;
    previousState: StatisticState;
    message: string;
}
export type StatusMessageCallback<Context extends CrawlingContext = BasicCrawlingContext, Crawler extends BasicCrawler<any> = BasicCrawler<Context>> = (params: StatusMessageCallbackParams<Context, Crawler>) => Awaitable<void>;
export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCrawlingContext> {
    /**
     * User-provided function that performs the logic of the crawler. It is called for each URL to crawl.
     *
     * The function receives the {@link BasicCrawlingContext} as an argument,
     * where the {@link BasicCrawlingContext.request|`request`} represents the URL to crawl.
     *
     * The function must return a promise, which is then awaited by the crawler.
     *
     * If the function throws an exception, the crawler will try to re-crawl the
     * request later, up to the {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
     * If all the retries fail, the crawler calls the function
     * provided to the {@link BasicCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
     * To make this work, we should **always**
     * let our function throw exceptions rather than catch them.
     * The exceptions are logged to the request using the
     * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
     */
    requestHandler?: RequestHandler<LoadedContext<Context>>;
    /**
     * User-provided function that performs the logic of the crawler. It is called for each URL to crawl.
     *
     * The function receives the {@link BasicCrawlingContext} as an argument,
     * where the {@link BasicCrawlingContext.request|`request`} represents the URL to crawl.
     *
     * The function must return a promise, which is then awaited by the crawler.
     *
     * If the function throws an exception, the crawler will try to re-crawl the
     * request later, up to the {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
     * If all the retries fail, the crawler calls the function
     * provided to the {@link BasicCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
     * To make this work, we should **always**
     * let our function throw exceptions rather than catch them.
     * The exceptions are logged to the request using the
     * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
     *
     * @deprecated `handleRequestFunction` has been renamed to `requestHandler` and will be removed in a future version.
     * @ignore
     */
    handleRequestFunction?: RequestHandler<Context>;
    /**
     * Static list of URLs to be processed.
     * If not provided, the crawler will open the default request queue when the {@link BasicCrawler.addRequests|`crawler.addRequests()`} function is called.
     * > Alternatively, `requests` parameter of {@link BasicCrawler.run|`crawler.run()`} could be used to enqueue the initial requests -
     * it is a shortcut for running `crawler.addRequests()` before the `crawler.run()`.
     */
    requestList?: IRequestList;
    /**
     * Dynamic queue of URLs to be processed. This is useful for recursive crawling of websites.
     * If not provided, the crawler will open the default request queue when the {@link BasicCrawler.addRequests|`crawler.addRequests()`} function is called.
     * > Alternatively, `requests` parameter of {@link BasicCrawler.run|`crawler.run()`} could be used to enqueue the initial requests -
     * it is a shortcut for running `crawler.addRequests()` before the `crawler.run()`.
     */
    requestQueue?: RequestProvider;
    /**
     * Allows explicitly configuring a request manager. Mutually exclusive with the `requestQueue` and `requestList` options.
     *
     * This enables explicitly configuring the crawler to use `RequestManagerTandem`, for instance.
     * If using this, the type of `BasicCrawler.requestQueue` may not be fully compatible with the `RequestProvider` class.
     */
    requestManager?: IRequestManager;
    /**
     * Timeout in which the function passed as {@link BasicCrawlerOptions.requestHandler|`requestHandler`} needs to finish, in seconds.
     * @default 60
     */
    requestHandlerTimeoutSecs?: number;
    /**
     * Timeout in which the function passed as {@link BasicCrawlerOptions.requestHandler|`requestHandler`} needs to finish, in seconds.
     * @default 60
     * @deprecated `handleRequestTimeoutSecs` has been renamed to `requestHandlerTimeoutSecs` and will be removed in a future version.
     * @ignore
     */
    handleRequestTimeoutSecs?: number;
    /**
     * User-provided function that allows modifying the request object before it gets retried by the crawler.
     * It's executed before each retry for the requests that failed less than {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
     *
     * The function receives the {@link BasicCrawlingContext} as the first argument,
     * where the {@link BasicCrawlingContext.request|`request`} corresponds to the request to be retried.
     * Second argument is the `Error` instance that
     * represents the last error thrown during processing of the request.
     */
    errorHandler?: ErrorHandler<Context>;
    /**
     * A function to handle requests that failed more than {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
     *
     * The function receives the {@link BasicCrawlingContext} as the first argument,
     * where the {@link BasicCrawlingContext.request|`request`} corresponds to the failed request.
     * Second argument is the `Error` instance that
     * represents the last error thrown during processing of the request.
     */
    failedRequestHandler?: ErrorHandler<Context>;
    /**
     * A function to handle requests that failed more than {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
     *
     * The function receives the {@link BasicCrawlingContext} as the first argument,
     * where the {@link BasicCrawlingContext.request|`request`} corresponds to the failed request.
     * Second argument is the `Error` instance that
     * represents the last error thrown during processing of the request.
     *
     * @deprecated `handleFailedRequestFunction` has been renamed to `failedRequestHandler` and will be removed in a future version.
     * @ignore
     */
    handleFailedRequestFunction?: ErrorHandler<Context>;
    /**
     * Specifies the maximum number of retries allowed for a request if its processing fails.
     * This includes retries due to navigation errors or errors thrown from user-supplied functions
     * (`requestHandler`, `preNavigationHooks`, `postNavigationHooks`).
     *
     * This limit does not apply to retries triggered by session rotation
     * (see {@link BasicCrawlerOptions.maxSessionRotations|`maxSessionRotations`}).
     * @default 3
     */
    maxRequestRetries?: number;
    /**
     * Indicates how much time (in seconds) to wait before crawling another same domain request.
     * @default 0
     */
    sameDomainDelaySecs?: number;
    /**
     * Maximum number of session rotations per request.
     * The crawler will automatically rotate the session in case of a proxy error or if it gets blocked by the website.
     *
     * The session rotations are not counted towards the {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} limit.
     * @default 10
     */
    maxSessionRotations?: number;
    /**
     * Maximum number of pages that the crawler will open. The crawl will stop when this limit is reached.
     * This value should always be set in order to prevent infinite loops in misconfigured crawlers.
     * > *NOTE:* In cases of parallel crawling, the actual number of pages visited might be slightly higher than this value.
     */
    maxRequestsPerCrawl?: number;
    /**
     * Maximum depth of the crawl. If not set, the crawl will continue until all requests are processed.
     * Setting this to `0` will only process the initial requests, skipping all links enqueued by `crawlingContext.enqueueLinks` and `crawlingContext.addRequests`.
     * Passing `1` will process the initial requests and all links enqueued by `crawlingContext.enqueueLinks` and `crawlingContext.addRequests` in the handler for initial requests.
     */
    maxCrawlDepth?: number;
    /**
     * Custom options passed to the underlying {@link AutoscaledPool} constructor.
     * > *NOTE:* The {@link AutoscaledPoolOptions.runTaskFunction|`runTaskFunction`}
     * option is provided by the crawler and cannot be overridden.
     * However, we can provide custom implementations of {@link AutoscaledPoolOptions.isFinishedFunction|`isFinishedFunction`}
     * and {@link AutoscaledPoolOptions.isTaskReadyFunction|`isTaskReadyFunction`}.
     */
    autoscaledPoolOptions?: AutoscaledPoolOptions;
    /**
     * Sets the minimum concurrency (parallelism) for the crawl. Shortcut for the
     * AutoscaledPool {@link AutoscaledPoolOptions.minConcurrency|`minConcurrency`} option.
     * > *WARNING:* If we set this value too high with respect to the available system memory and CPU, our crawler will run extremely slow or crash.
     * If not sure, it's better to keep the default value and the concurrency will scale up automatically.
     */
    minConcurrency?: number;
    /**
     * Sets the maximum concurrency (parallelism) for the crawl. Shortcut for the
     * AutoscaledPool {@link AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} option.
     */
    maxConcurrency?: number;
    /**
     * The maximum number of requests per minute the crawler should run.
     * By default, this is set to `Infinity`, but we can pass any positive, non-zero integer.
     * Shortcut for the AutoscaledPool {@link AutoscaledPoolOptions.maxTasksPerMinute|`maxTasksPerMinute`} option.
     */
    maxRequestsPerMinute?: number;
    /**
     * Allows to keep the crawler alive even if the {@link RequestQueue} gets empty.
     * By default, the `crawler.run()` will resolve once the queue is empty. With `keepAlive: true` it will keep running,
     * waiting for more requests to come. Use `crawler.stop()` to exit the crawler gracefully, or `crawler.teardown()` to stop it immediately.
     */
    keepAlive?: boolean;
    /**
     * Basic crawler will initialize the {@link SessionPool} with the corresponding {@link SessionPoolOptions|`sessionPoolOptions`}.
     * The session instance will be than available in the {@link BasicCrawlerOptions.requestHandler|`requestHandler`}.
     */
    useSessionPool?: boolean;
    /**
     * The configuration options for {@link SessionPool} to use.
     */
    sessionPoolOptions?: SessionPoolOptions;
    /**
     * Defines the length of the interval for calling the `setStatusMessage` in seconds.
     */
    statusMessageLoggingInterval?: number;
    /**
     * Allows overriding the default status message. The callback needs to call `crawler.setStatusMessage()` explicitly.
     * The default status message is provided in the parameters.
     *
     * ```ts
     * const crawler = new CheerioCrawler({
     *     statusMessageCallback: async (ctx) => {
     *         return ctx.crawler.setStatusMessage(`this is status message from ${new Date().toISOString()}`, { level: 'INFO' }); // log level defaults to 'DEBUG'
     *     },
     *     statusMessageLoggingInterval: 1, // defaults to 10s
     *     async requestHandler({ $, enqueueLinks, request, log }) {
     *         // ...
     *     },
     * });
     * ```
     */
    statusMessageCallback?: StatusMessageCallback;
    /**
     * If set to `true`, the crawler will automatically try to bypass any detected bot protection.
     *
     * Currently supports:
     * - [**Cloudflare** Bot Management](https://www.cloudflare.com/products/bot-management/)
     * - [**Google Search** Rate Limiting](https://www.google.com/sorry/)
     */
    retryOnBlocked?: boolean;
    /**
     * If set to `true`, the crawler will automatically try to fetch the robots.txt file for each domain,
     * and skip those that are not allowed. This also prevents disallowed URLs to be added via `enqueueLinks`.
     */
    respectRobotsTxtFile?: boolean;
    /**
     * When a request is skipped for some reason, you can use this callback to act on it.
     * This is currently fired for requests skipped
     * 1. based on robots.txt file,
     * 2. because they don't match enqueueLinks filters,
     * 3. because they are redirected to a URL that doesn't match the enqueueLinks strategy,
     * 4. or because the {@link BasicCrawlerOptions.maxRequestsPerCrawl|`maxRequestsPerCrawl`} limit has been reached
     */
    onSkippedRequest?: SkippedRequestCallback;
    /** @internal */
    log?: Log;
    /**
     * Enables experimental features of Crawlee, which can alter the behavior of the crawler.
     * WARNING: these options are not guaranteed to be stable and may change or be removed at any time.
     */
    experiments?: CrawlerExperiments;
    /**
     * Customize the way statistics collecting works, such as logging interval or
     * whether to output them to the Key-Value store.
     */
    statisticsOptions?: StatisticsOptions;
    /**
     * HTTP client implementation for the `sendRequest` context helper and for plain HTTP crawling.
     * Defaults to a new instance of {@link GotScrapingHttpClient}
     */
    httpClient?: BaseHttpClient;
}
/**
 * A set of options that you can toggle to enable experimental features in Crawlee.
 *
 * NOTE: These options will not respect semantic versioning and may be removed or changed at any time. Use at your own risk.
 * If you do use these and encounter issues, please report them to us.
 */
export interface CrawlerExperiments {
    /**
     * @deprecated This experiment is now enabled by default, and this flag will be removed in a future release.
     * If you encounter issues due to this change, please:
     * - report it to us: https://github.com/apify/crawlee
     * - set `requestLocking` to `false` in the `experiments` option of the crawler
     */
    requestLocking?: boolean;
}
/**
 * Provides a simple framework for parallel crawling of web pages.
 * The URLs to crawl are fed either from a static list of URLs
 * or from a dynamic queue of URLs enabling recursive crawling of websites.
 *
 * `BasicCrawler` is a low-level tool that requires the user to implement the page
 * download and data extraction functionality themselves.
 * If we want a crawler that already facilitates this functionality,
 * we should consider using {@link CheerioCrawler}, {@link PuppeteerCrawler} or {@link PlaywrightCrawler}.
 *
 * `BasicCrawler` invokes the user-provided {@link BasicCrawlerOptions.requestHandler|`requestHandler`}
 * for each {@link Request} object, which represents a single URL to crawl.
 * The {@link Request} objects are fed from the {@link RequestList} or {@link RequestQueue}
 * instances provided by the {@link BasicCrawlerOptions.requestList|`requestList`} or {@link BasicCrawlerOptions.requestQueue|`requestQueue`}
 * constructor options, respectively. If neither `requestList` nor `requestQueue` options are provided,
 * the crawler will open the default request queue either when the {@link BasicCrawler.addRequests|`crawler.addRequests()`} function is called,
 * or if `requests` parameter (representing the initial requests) of the {@link BasicCrawler.run|`crawler.run()`} function is provided.
 *
 * If both {@link BasicCrawlerOptions.requestList|`requestList`} and {@link BasicCrawlerOptions.requestQueue|`requestQueue`} options are used,
 * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
 * to the {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
 *
 * The crawler finishes if there are no more {@link Request} objects to crawl.
 *
 * New requests are only dispatched when there is enough free CPU and memory available,
 * using the functionality provided by the {@link AutoscaledPool} class.
 * All {@link AutoscaledPool} configuration options can be passed to the {@link BasicCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
 * parameter of the `BasicCrawler` constructor.
 * For user convenience, the {@link AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
 * {@link AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
 * underlying {@link AutoscaledPool} constructor are available directly in the `BasicCrawler` constructor.
 *
 * **Example usage:**
 *
 * ```javascript
 * import { BasicCrawler, Dataset } from 'crawlee';
 *
 * // Create a crawler instance
 * const crawler = new BasicCrawler({
 *     async requestHandler({ request, sendRequest }) {
 *         // 'request' contains an instance of the Request class
 *         // Here we simply fetch the HTML of the page and store it to a dataset
 *         const { body } = await sendRequest({
 *             url: request.url,
 *             method: request.method,
 *             body: request.payload,
 *             headers: request.headers,
 *         });
 *
 *         await Dataset.pushData({
 *             url: request.url,
 *             html: body,
 *         })
 *     },
 * });
 *
 * // Enqueue the initial requests and run the crawler
 * await crawler.run([
 *     'http://www.example.com/page-1',
 *     'http://www.example.com/page-2',
 * ]);
 * ```
 * @category Crawlers
 */
export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext> {
    readonly config: Configuration;
    protected static readonly CRAWLEE_STATE_KEY = "CRAWLEE_STATE";
    /**
     * A reference to the underlying {@link Statistics} class that collects and logs run statistics for requests.
     */
    readonly stats: Statistics;
    /**
     * A reference to the underlying {@link RequestList} class that manages the crawler's {@link Request|requests}.
     * Only available if used by the crawler.
     */
    requestList?: IRequestList;
    /**
     * Dynamic queue of URLs to be processed. This is useful for recursive crawling of websites.
     * A reference to the underlying {@link RequestQueue} class that manages the crawler's {@link Request|requests}.
     * Only available if used by the crawler.
     */
    requestQueue?: RequestProvider;
    /**
     * The main request-handling component of the crawler. It's initialized during the crawler startup.
     */
    protected requestManager?: IRequestManager;
    /**
     * A reference to the underlying {@link SessionPool} class that manages the crawler's {@link Session|sessions}.
     * Only available if used by the crawler.
     */
    sessionPool?: SessionPool;
    /**
     * A reference to the underlying {@link AutoscaledPool} class that manages the concurrency of the crawler.
     * > *NOTE:* This property is only initialized after calling the {@link BasicCrawler.run|`crawler.run()`} function.
     * We can use it to change the concurrency settings on the fly,
     * to pause the crawler by calling {@link AutoscaledPool.pause|`autoscaledPool.pause()`}
     * or to abort it by calling {@link AutoscaledPool.abort|`autoscaledPool.abort()`}.
     */
    autoscaledPool?: AutoscaledPool;
    /**
     * Default {@link Router} instance that will be used if we don't specify any {@link BasicCrawlerOptions.requestHandler|`requestHandler`}.
     * See {@link Router.addHandler|`router.addHandler()`} and {@link Router.addDefaultHandler|`router.addDefaultHandler()`}.
     */
    readonly router: RouterHandler<LoadedContext<Context>>;
    running: boolean;
    hasFinishedBefore: boolean;
    readonly log: Log;
    protected requestHandler: RequestHandler<Context>;
    protected errorHandler?: ErrorHandler<Context>;
    protected failedRequestHandler?: ErrorHandler<Context>;
    protected requestHandlerTimeoutMillis: number;
    protected internalTimeoutMillis: number;
    protected maxRequestRetries: number;
    protected maxCrawlDepth?: number;
    protected sameDomainDelayMillis: number;
    protected domainAccessedTime: Map<string, number>;
    protected maxSessionRotations: number;
    protected maxRequestsPerCrawl?: number;
    protected handledRequestsCount: number;
    protected statusMessageLoggingInterval: number;
    protected statusMessageCallback?: StatusMessageCallback;
    protected sessionPoolOptions: SessionPoolOptions;
    protected useSessionPool: boolean;
    protected crawlingContexts: Map<string, Context>;
    protected autoscaledPoolOptions: AutoscaledPoolOptions;
    protected events: EventManager;
    protected httpClient: BaseHttpClient;
    protected retryOnBlocked: boolean;
    protected respectRobotsTxtFile: boolean;
    protected onSkippedRequest?: SkippedRequestCallback;
    private _closeEvents?;
    private shouldLogMaxProcessedRequestsExceeded;
    private shouldLogMaxEnqueuedRequestsExceeded;
    private experiments;
    private readonly robotsTxtFileCache;
    private _experimentWarnings;
    protected static optionsShape: {
// @ts-ignore optional peer dependency or compatibility with es2022
        requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        requestQueue: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        requestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        handleRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        requestHandlerTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        handleRequestTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        errorHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        failedRequestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        handleFailedRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        maxRequestRetries: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        sameDomainDelaySecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        maxSessionRotations: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        maxRequestsPerCrawl: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        maxCrawlDepth: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        autoscaledPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        statusMessageLoggingInterval: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        statusMessageCallback: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        retryOnBlocked: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        respectRobotsTxtFile: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        onSkippedRequest: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        httpClient: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        minConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        maxConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        maxRequestsPerMinute: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        keepAlive: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        log: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        experiments: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
        statisticsOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
    };
    /**
     * All `BasicCrawler` parameters are passed via an options object.
     */
    constructor(options?: BasicCrawlerOptions<Context>, config?: Configuration);
    /**
     * Checks if the given error is a proxy error by comparing its message to a list of known proxy error messages.
     * Used for retrying requests that failed due to proxy errors.
     *
     * @param error The error to check.
     */
    protected isProxyError(error: Error): boolean;
    /**
     * Checks whether the given crawling context is getting blocked by anti-bot protection using several heuristics.
     * Returns `false` if the request is not blocked, otherwise returns a string with a description of the block reason.
     * @param _crawlingContext The crawling context to check.
     */
    protected isRequestBlocked(_crawlingContext: Context): Promise<string | false>;
    /**
     * This method is periodically called by the crawler, every `statusMessageLoggingInterval` seconds.
     */
    setStatusMessage(message: string, options?: SetStatusMessageOptions): Promise<void>;
    private getPeriodicLogger;
    /**
     * Runs the crawler. Returns a promise that resolves once all the requests are processed
     * and `autoscaledPool.isFinished` returns `true`.
     *
     * We can use the `requests` parameter to enqueue the initial requests — it is a shortcut for
     * running {@link BasicCrawler.addRequests|`crawler.addRequests()`} before {@link BasicCrawler.run|`crawler.run()`}.
     *
     * @param [requests] The requests to add.
     * @param [options] Options for the request queue.
     */
    run(requests?: RequestsLike, options?: CrawlerRunOptions): Promise<FinalStatistics>;
    /**
     * Gracefully stops the current run of the crawler.
     *
     * All the tasks active at the time of calling this method will be allowed to finish.
     *
     * To stop the crawler immediately, use {@link BasicCrawler.teardown|`crawler.teardown()`} instead.
     */
    stop(message?: string): void;
    getRequestQueue(): Promise<RequestProvider>;
    useState<State extends Dictionary = Dictionary>(defaultValue?: State): Promise<State>;
    protected get pendingRequestCountApproximation(): number;
    protected calculateEnqueuedRequestLimit(explicitLimit?: number): number | undefined;
    protected handleSkippedRequest(options: Parameters<SkippedRequestCallback>[0]): Promise<void>;
    /**
     * Adds requests to the queue in batches. By default, it will resolve after the initial batch is added, and continue
     * adding the rest in background. You can configure the batch size via `batchSize` option and the sleep time in between
     * the batches via `waitBetweenBatchesMillis`. If you want to wait for all batches to be added to the queue, you can use
     * the `waitForAllRequestsToBeAdded` promise you get in the response object.
     *
     * This is an alias for calling `addRequestsBatched()` on the implicit `RequestQueue` for this crawler instance.
     *
     * @param requests The requests to add
     * @param options Options for the request queue
     */
    addRequests(requests: RequestsLike, options?: CrawlerAddRequestsOptions): Promise<CrawlerAddRequestsResult>;
    /**
     * Pushes data to the specified {@link Dataset}, or the default crawler {@link Dataset} by calling {@link Dataset.pushData}.
     */
    pushData(data: Parameters<Dataset['pushData']>[0], datasetIdOrName?: string): Promise<void>;
    /**
     * Retrieves the specified {@link Dataset}, or the default crawler {@link Dataset}.
     */
    getDataset(idOrName?: string): Promise<Dataset>;
    /**
     * Retrieves data from the default crawler {@link Dataset} by calling {@link Dataset.getData}.
     */
    getData(...args: Parameters<Dataset['getData']>): ReturnType<Dataset['getData']>;
    /**
     * Retrieves all the data from the default crawler {@link Dataset} and exports them to the specified format.
     * Supported formats are currently 'json' and 'csv', and will be inferred from the `path` automatically.
     */
    exportData<Data>(path: string, format?: 'json' | 'csv', options?: DatasetExportOptions): Promise<Data[]>;
    /**
     * Initializes the crawler.
     */
    protected _init(): Promise<void>;
    protected _runRequestHandler(crawlingContext: Context): Promise<void>;
    /**
     * Handles blocked request
     */
    protected _throwOnBlockedRequest(session: Session, statusCode: number): void;
    private isAllowedBasedOnRobotsTxtFile;
    protected getRobotsTxtFileForUrl(url: string): Promise<RobotsTxtFile | undefined>;
    protected _pauseOnMigration(): Promise<void>;
    /**
     * Initializes the RequestManager based on the configured requestList and requestQueue.
     */
    private initializeRequestManager;
    /**
     * Fetches the next request to process from the underlying request provider.
     */
    protected _fetchNextRequest(): Promise<Request<Dictionary> | null>;
    /**
     * Executed when `errorHandler` finishes or the request is successful.
     * Can be used to clean up orphaned browser pages.
     */
    protected _cleanupContext(_crawlingContext: Context): Promise<void>;
    /**
     * Delays processing of the request based on the `sameDomainDelaySecs` option,
     * adding it back to the queue after the timeout passes. Returns `true` if the request
     * should be ignored and will be reclaimed to the queue once ready.
     */
    protected delayRequest(request: Request, source: IRequestList | RequestProvider | IRequestManager): boolean;
    /**
     * Wrapper around requestHandler that fetches requests from RequestList/RequestQueue
     * then retries them in a case of an error, etc.
     */
    protected _runTaskFunction(): Promise<void>;
    /**
     * Run async callback with given timeout and retry.
     * @ignore
     */
    protected _timeoutAndRetry(handler: () => Promise<unknown>, timeout: number, error: Error | string, maxRetries?: number, retried?: number): Promise<void>;
    /**
     * Returns true if either RequestList or RequestQueue have a request ready for processing.
     */
    protected _isTaskReadyFunction(): Promise<boolean>;
    /**
     * Returns true if both RequestList and RequestQueue have all requests finished.
     */
    protected _defaultIsFinishedFunction(): Promise<boolean>;
    private _rotateSession;
    /**
     * Handles errors thrown by user provided requestHandler()
     */
    protected _requestFunctionErrorHandler(error: Error, crawlingContext: Context, source: IRequestList | IRequestManager): Promise<void>;
    protected _tagUserHandlerError<T>(cb: () => unknown): Promise<T>;
    protected _handleFailedRequestHandler(crawlingContext: Context, error: Error): Promise<void>;
    /**
     * Resolves the most verbose error message from a thrown error
     * @param error The error received
     * @returns The message to be logged
     */
    protected _getMessageFromError(error: Error, forceStack?: boolean): string | TimeoutError | undefined;
    protected _canRequestBeRetried(request: Request, error: Error): boolean;
    protected _augmentContextWithDeprecatedError(context: Context, error: Error): LoadedContext<Context>;
    /**
     * Updates handledRequestsCount from possibly stored counts, usually after worker migration.
     */
    protected _loadHandledRequestCount(): Promise<void>;
    protected _executeHooks<HookLike extends (...args: any[]) => Awaitable<void>>(hooks: HookLike[], ...args: Parameters<HookLike>): Promise<void>;
    /**
     * Stops the crawler immediately.
     *
     * This method doesn't wait for currently active requests to finish.
     *
     * To stop the crawler gracefully (waiting for all running requests to finish), use {@link BasicCrawler.stop|`crawler.stop()`} instead.
     */
    teardown(): Promise<void>;
    protected _handlePropertyNameChange<New, Old>({ newProperty, newName, oldProperty, oldName, propertyKey, allowUndefined, }: HandlePropertyNameChangeData<New, Old>): void;
    protected _getCookieHeaderFromRequest(request: Request): string;
    private _getRequestQueue;
    protected requestMatchesEnqueueStrategy(request: Request): boolean;
}
export interface CreateContextOptions {
    request: Request;
    session?: Session;
    proxyInfo?: ProxyInfo;
}
export interface CrawlerAddRequestsOptions extends AddRequestsBatchedOptions {
}
export interface CrawlerAddRequestsResult extends AddRequestsBatchedResult {
}
export interface CrawlerRunOptions extends CrawlerAddRequestsOptions {
    /**
     * Whether to purge the RequestQueue before running the crawler again. Defaults to true, so it is possible to reprocess failed requests.
     * When disabled, only new requests will be considered. Note that even a failed request is considered as handled.
     * @default true
     */
    purgeRequestQueue?: boolean;
}
interface HandlePropertyNameChangeData<New, Old> {
    oldProperty?: Old;
    newProperty?: New;
    oldName: string;
    newName: string;
    propertyKey: string;
    allowUndefined?: boolean;
}
/**
 * Creates new {@link Router} instance that works based on request labels.
 * This instance can then serve as a {@link BasicCrawlerOptions.requestHandler|`requestHandler`} of our {@link BasicCrawler}.
 * Defaults to the {@link BasicCrawlingContext}.
 *
 * > Serves as a shortcut for using `Router.create<BasicCrawlingContext>()`.
 *
 * ```ts
 * import { BasicCrawler, createBasicRouter } from 'crawlee';
 *
 * const router = createBasicRouter();
 * router.addHandler('label-a', async (ctx) => {
 *    ctx.log.info('...');
 * });
 * router.addDefaultHandler(async (ctx) => {
 *    ctx.log.info('...');
 * });
 *
 * const crawler = new BasicCrawler({
 *     requestHandler: router,
 * });
 * await crawler.run();
 * ```
 */
export declare function createBasicRouter<Context extends BasicCrawlingContext = BasicCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): RouterHandler<Context>;
export {};
//# sourceMappingURL=basic-crawler.d.ts.map