Skip to content

Commit

Permalink
Crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
cofob committed Nov 17, 2024
1 parent 6f06e69 commit a376d64
Show file tree
Hide file tree
Showing 10 changed files with 512 additions and 360 deletions.
275 changes: 253 additions & 22 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions fastside-cloudflare/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ tower-service = "0.3.3"
console_error_panic_hook = "0.1.7"
getrandom = { version = "0.2.15", features = ["js"] }
tokio = { version = "1.41.1", default-features = false, features = ["sync"] }
serde = "1.0.215"
serde_json = "1.0.132"
reqwest = "0.12.9"

# [features]
# default = []
Expand Down
110 changes: 98 additions & 12 deletions fastside-cloudflare/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,57 @@ use std::{collections::HashMap, sync::Arc};

use axum::Router;
use fastside_core::{
crawler::Crawler,
crawler::{CrawledData, Crawler},
routes::main_router,
types::{AppState, LoadedData},
};
use fastside_shared::config::{AppConfig, UserConfig};
use fastside_shared::{
config::AppConfig,
serde_types::{ServicesData, StoredData},
};
use tokio::sync::RwLock;
use tower_service::Service;
use worker::*;

fn router() -> Router {
let config = Arc::new(AppConfig::default());
let loaded_data = Arc::new(RwLock::new(LoadedData {
services: HashMap::new(),
proxies: HashMap::new(),
default_user_config: UserConfig::default(),
}));
fn load_config(env: &Env) -> AppConfig {
let config_str = env
.var("config")
.expect("config variable is not set")
.to_string();
let config: AppConfig = serde_json::from_str(&config_str).expect("failed to parse config");
config
}

async fn router(env: &Env) -> Router {
let config = Arc::new(load_config(&env));
let loaded_data: Arc<RwLock<LoadedData>> = Arc::new(RwLock::new(
serde_json::from_str(
&env.kv("fastside")
.expect("failed to get kv")
.get("loaded_data")
.text()
.await
.expect("failed to get loaded_data from kv")
.expect("loaded_data not found"),
)
.expect("failed to parse loaded_data"),
));
let crawled_data: CrawledData = serde_json::from_str(
&env.kv("fastside")
.expect("failed to get kv")
.get("crawled_data")
.text()
.await
.expect("failed to get crawled_data from kv")
.expect("crawled_data not found"),
)
.expect("failed to parse data");
let shared_state = Arc::new(AppState {
config: config.clone(),
crawler: Arc::new(Crawler::new(
crawler: Arc::new(Crawler::with_data(
loaded_data.clone(),
config.clone().crawler.clone(),
crawled_data,
)),
loaded_data: loaded_data.clone(),
regexes: HashMap::new(),
Expand All @@ -35,10 +65,66 @@ fn router() -> Router {
#[event(fetch)]
async fn fetch(
req: HttpRequest,
_env: Env,
env: Env,
_ctx: Context,
) -> Result<axum::http::Response<axum::body::Body>> {
console_error_panic_hook::set_once();

Ok(router().call(req).await?)
Ok(router(&env).await.call(req).await?)
}

#[event(scheduled)]
async fn scheduled(_event: ScheduledEvent, env: Env, _ctx: ScheduleContext) {
console_error_panic_hook::set_once();

let config = load_config(&env);
let services_url = env
.var("services_url")
.expect("services_url variable is not set")
.to_string();

let services_str = reqwest::get(services_url)
.await
.expect("request to services failed")
.text()
.await
.expect("failed to get services text");
let stored_data: StoredData =
serde_json::from_str(&services_str).expect("failed to parse services");

let loaded_data = {
let services_data: ServicesData = stored_data
.services
.into_iter()
.map(|service| (service.name.clone(), service))
.collect();
let loaded_data = LoadedData {
services: services_data,
proxies: config.proxies.clone(),
default_user_config: config.default_user_config.clone(),
};
env.kv("fastside")
.expect("failed to get kv")
.put(
"loaded_data",
serde_json::to_string(&loaded_data).expect("failed to serialize loaded_data"),
)
.expect("failed to put loaded_data to kv (builder)")
.execute()
.await
.expect("failed to put loaded_data to kv (request)");
Arc::new(RwLock::new(loaded_data))
};

let crawler = Crawler::new(loaded_data, config.crawler.clone());
crawler.crawl(None).await.expect("failed to crawl");

let data_str = serde_json::to_string(&*crawler.read().await).expect("failed to serialize data");
env.kv("fastside")
.expect("failed to get kv")
.put("crawled_data", data_str)
.expect("failed to put crawled_data to kv (builder)")
.execute()
.await
.expect("failed to put crawled_data to kv (request)");
}
11 changes: 11 additions & 0 deletions fastside-cloudflare/wrangler.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,16 @@ name = "fastside"
main = "build/worker/shim.mjs"
compatibility_date = "2023-03-22"

kv_namespaces = [
{ binding = "fastside", id = "989f6480799a4d7c8e2890ac01118fc7" }
]

[build]
command = "cargo install worker-build --version 0.1.0 && worker-build --release"

[vars]
services_url = "https://raw.githubusercontent.com/cofob/fastside/refs/heads/master/services.json"
config = '{"crawler":{"request_timeout":{"secs":1,"nanos":0},"max_concurrent_requests":200},"default_user_config":{"required_tags":["clearnet","https","ipv4"]}}'

[observability]
enabled = true
2 changes: 1 addition & 1 deletion fastside-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ thiserror = "1.0.60" #
tokio = { version = "1.37.0", default-features = false, features = ["sync"] }
futures = "0.3.30" # async
rand = "0.8.5" # random
chrono = "0.4.38" # datetime
chrono = { version = "0.4.38", features = ["serde"] }
time = "0.3.36" # time offsets
regex = "1.10.5" # regex
base64 = "0.22.1" # base64
Expand Down
Loading

0 comments on commit a376d64

Please sign in to comment.