From 378bb5577f002f008d4a90eda2671da026f69d1b Mon Sep 17 00:00:00 2001 From: Brad Culwell Date: Sun, 2 Jun 2024 20:43:06 -0500 Subject: [PATCH] fix: make captcha solver stable --- src/app.rs | 8 +- src/source/torrent_galaxy.rs | 234 +++++++++++++++++++---------------- src/widget/captcha.rs | 19 ++- src/widget/input.rs | 8 +- src/widget/page.rs | 2 +- 5 files changed, 144 insertions(+), 127 deletions(-) diff --git a/src/app.rs b/src/app.rs index e7e25ea..efa5894 100644 --- a/src/app.rs +++ b/src/app.rs @@ -8,7 +8,6 @@ use ratatui::{ Frame, Terminal, }; use reqwest::cookie::Jar; -use reqwest::{cookie::CookieStore, Url}; use tokio::{sync::mpsc, task::AbortHandle}; use crate::{ @@ -384,13 +383,8 @@ impl App { Ok(SourceResults::Results(rt)) => ctx.results = rt, Ok(SourceResults::Captcha(c)) => { ctx.mode = Mode::Captcha; - // self.widgets.captcha.ses_id = Some(ses_id); self.widgets.captcha.image = Some(c); - let cookies = jar.cookies(&Url::parse("https://torrentgalaxy.to/")?); - let x = cookies.map(|c| c.to_str().unwrap_or("").to_owned()).unwrap_or_default(); - ctx.notify(format!("Cookies:\n{}", x)); - // jar.add_cookie_str("", &Url::parse("https://torrentgalaxy.to/")?) - // jar.add_cookies_str(ses_id, Url::parse("")); + self.widgets.captcha.input.clear(); } Err(e) => { // Clear results on error diff --git a/src/source/torrent_galaxy.rs b/src/source/torrent_galaxy.rs index 9448803..26fbde9 100644 --- a/src/source/torrent_galaxy.rs +++ b/src/source/torrent_galaxy.rs @@ -1,11 +1,16 @@ -use std::{cmp::max, collections::HashMap, error::Error, time::Duration}; +use std::{ + cmp::max, + collections::HashMap, + error::Error, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; use ratatui::{ layout::{Alignment, Constraint}, style::{Color, Stylize}, }; use reqwest::{StatusCode, Url}; -use scraper::{Html, Selector}; +use scraper::{selectable::Selectable, Html, Selector}; use serde::{Deserialize, Serialize}; use urlencoding::encode; @@ -101,6 +106,72 @@ popup_enum! { pub struct TorrentGalaxyHtmlSource; +fn get_url( + base_url: String, + search: &SearchQuery, +) -> Result<(Url, Url), Box> { + let base_url = Url::parse(&add_protocol(base_url, true))?.join("torrents.php")?; + + let query = encode(&search.query); + + let sort = match TgxSort::try_from(search.sort.sort) { + Ok(TgxSort::Date) => "&sort=id", + Ok(TgxSort::Seeders) => "&sort=seeders", + Ok(TgxSort::Leechers) => "&sort=leechers", + Ok(TgxSort::Size) => "&sort=size", + Ok(TgxSort::Name) => "&sort=name", + _ => "", + }; + let ord = format!("&order={}", search.sort.dir.to_url()); + let filter = match TgxFilter::try_from(search.filter) { + Ok(TgxFilter::OnlineStreams) => "&filterstream=1", + Ok(TgxFilter::ExcludeXXX) => "&nox=2&nox=1", + Ok(TgxFilter::NoWildcard) => "&nowildcard=1", + _ => "", + }; + let cat = match search.category { + 0 => "".to_owned(), + x => format!("&c{}=1", x), + }; + + let q = format!( + "search={}&page={}{}{}{}{}", + query, + search.page - 1, + filter, + cat, + sort, + ord + ); + let mut url = base_url.clone(); + url.set_query(Some(&q)); + Ok((base_url, url)) +} + +async fn try_get_content( + client: &reqwest::Client, + timeout: Option, + url: &Url, +) -> Result> { + let mut request = client.get(url.to_owned()); + if let Some(timeout) = timeout { + request = request.timeout(Duration::from_secs(timeout)); + } + let response = request + .header( + "User-Agent", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0", + ) + .send() + .await?; + if response.status() != StatusCode::OK { + // Throw error if response code is not OK + let code = response.status().as_u16(); + return Err(format!("{}\nInvalid response code: {}", url, code).into()); + } + Ok(response.text().await?) +} + fn get_lang(full_name: String) -> String { match full_name.as_str() { "English" => "en", @@ -180,61 +251,21 @@ impl Source for TorrentGalaxyHtmlSource { _date_format: Option, ) -> Result> { let tgx = config.tgx.to_owned().unwrap_or_default(); - let base_url = Url::parse(&add_protocol(tgx.base_url, true))?.join("torrents.php")?; - let query = encode(&search.query); - - let sort = match TgxSort::try_from(search.sort.sort) { - Ok(TgxSort::Date) => "&sort=id", - Ok(TgxSort::Seeders) => "&sort=seeders", - Ok(TgxSort::Leechers) => "&sort=leechers", - Ok(TgxSort::Size) => "&sort=size", - Ok(TgxSort::Name) => "&sort=name", - _ => "", - }; - let ord = format!("&order={}", search.sort.dir.to_url()); - let filter = match TgxFilter::try_from(search.filter) { - Ok(TgxFilter::OnlineStreams) => "&filterstream=1", - Ok(TgxFilter::ExcludeXXX) => "&nox=2&nox=1", - Ok(TgxFilter::NoWildcard) => "&nowildcard=1", - _ => "", - }; - let cat = match search.category { - 0 => "".to_owned(), - x => format!("&c{}=1", x), - }; - - let q = format!( - "search={}&page={}{}{}{}{}", - query, - search.page - 1, - filter, - cat, - sort, - ord - ); - let mut url = base_url.clone(); - url.set_query(Some(&q)); - - let mut request = client.get(url.to_owned()); - if let Some(timeout) = tgx.timeout { - request = request.timeout(Duration::from_secs(timeout)); - } - let response = request.send().await?; - if response.status() != StatusCode::OK { - // Throw error if response code is not OK - let code = response.status().as_u16(); - return Err(format!("{}\nInvalid response code: {}", url, code).into()); - } - let content = response.text().await?; + let (base_url, url) = get_url(tgx.base_url, search)?; let table_sel = &sel!(".tgxtable")?; - #[cfg(feature = "unstable-captcha")] + + // If that doesn't work, try making the user solve a captcha + let content = try_get_content(client, tgx.timeout, &url).await?; if Html::parse_document(&content).select(table_sel).count() == 0 { let mut request = client.get("https://torrentgalaxy.to/captcha/cpt_show.pnp?v=txlight&63fd4c746843c74b53ca60277192fb48"); if let Some(timeout) = tgx.timeout { request = request.timeout(Duration::from_secs(timeout)); } - let response = request.send().await?; + let response = request + .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0") + .send() + .await?; let bytes = response.bytes().await?; let mut picker = ratatui_image::picker::Picker::new((1, 2)); picker.protocol_type = ratatui_image::picker::ProtocolType::Halfblocks; @@ -243,14 +274,10 @@ impl Source for TorrentGalaxyHtmlSource { return Ok(SourceResponse::Captcha(image)); } + + // Results table found, can start parsing let doc = Html::parse_document(&content); - if doc.select(table_sel).count() == 0 { - return Err(format!( - "{}\nNo results table found:\nMost likely due to captcha or rate limit\n\nWait a bit before searching again...", - url, - ) - .into()); - } + let item_sel = &sel!("div.tgxtablerow")?; let title_sel = &sel!("div.tgxtablecell:nth-of-type(4) > div > a.txlight:first-of-type")?; let imdb_sel = &sel!("div.tgxtablecell:nth-of-type(4) > div > a:last-of-type")?; @@ -398,67 +425,56 @@ impl Source for TorrentGalaxyHtmlSource { date_format: Option, ) -> Result> { let tgx = config.tgx.to_owned().unwrap_or_default(); - // let jar = Jar::default(); - // jar.add_cookie_str(cookie, url) - // let client = ClientBuilder::new() - // .cookie_provider(true) - // .timeout(Duration::from_secs(60)) - // .build()?; - // - // let mut headers = HeaderMap::new(); - // headers.insert( - // "Cookie", - // HeaderValue::from_str(&format!("PHPSESSID={}", ses_id))?, - // ); - - let base_url = Url::parse(&add_protocol(tgx.base_url, true))?.join("torrents.php")?; - let query = encode(&search.query); - - let sort = match TgxSort::try_from(search.sort.sort) { - Ok(TgxSort::Date) => "&sort=id", - Ok(TgxSort::Seeders) => "&sort=seeders", - Ok(TgxSort::Leechers) => "&sort=leechers", - Ok(TgxSort::Size) => "&sort=size", - Ok(TgxSort::Name) => "&sort=name", - _ => "", - }; - let ord = format!("&order={}", search.sort.dir.to_url()); - let filter = match TgxFilter::try_from(search.filter) { - Ok(TgxFilter::OnlineStreams) => "&filterstream=1", - Ok(TgxFilter::ExcludeXXX) => "&nox=2&nox=1", - Ok(TgxFilter::NoWildcard) => "&nowildcard=1", - _ => "", - }; - let cat = match search.category { - 0 => "".to_owned(), - x => format!("&c{}=1", x), - }; + let time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_millis(); + + let hash = "4578678889c4b42ae37b543434c81d85"; + // let hash = "ff9df5a6db0ebe6bd636296da767a587"; + let base_url = Url::parse(&tgx.base_url)?; + let mut hash_url = base_url.clone().join("hub.php")?; + hash_url.set_query(Some(&format!("a=vlad&u={}", time))); + // let hash_url = format!("https://torrentgalaxy.to/hub.php?a=vlad&u={}", time); + client + .post(hash_url.clone()) + .body(format!("fash={}", hash)) + .header("Content-Type", "application/x-www-form-urlencoded") + .header( + "User-Agent", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0", + ) + .send() + .await?; - let q = format!( - "search={}&page={}{}{}{}{}", - query, - search.page - 1, - filter, - cat, - sort, - ord - ); - let mut url = base_url.clone(); - url.set_query(Some(&q)); - - let mut request = client.post(format!( - "https://torrentgalaxy.to/galaxyfence.php?captcha={}&dropoff={}", + let (_base_url, url) = get_url(tgx.base_url, search)?; + let mut full_url = base_url.clone().join("galaxyfence.php")?; + full_url.set_query(Some(&format!( + "captcha={}&dropoff={}", solution, encode(&format!( "{}?{}", url.path(), url.query().unwrap_or_default() - )), - )); + )) + ))); + let mut request = client.post(full_url.clone()); if let Some(timeout) = tgx.timeout { request = request.timeout(Duration::from_secs(timeout)); } - request.send().await?.text().await?; + request = request.header( + "Accept", + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + ) + .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0") + .header("Content-Type", "application/x-www-form-urlencoded"); + + let response = request.send().await?; + if response.status() != StatusCode::OK { + return Err(format!( + "Captcha solution returned HTTP status {}", + response.status() + ) + .into()); + } + TorrentGalaxyHtmlSource::search(client, search, config, date_format).await } diff --git a/src/widget/captcha.rs b/src/widget/captcha.rs index 1269f27..97e1a23 100644 --- a/src/widget/captcha.rs +++ b/src/widget/captcha.rs @@ -12,7 +12,6 @@ use super::{input::InputWidget, Widget}; pub struct CaptchaPopup { pub image: Option>, - pub ses_id: Option, pub input: InputWidget, } @@ -20,12 +19,18 @@ impl Default for CaptchaPopup { fn default() -> Self { Self { image: Default::default(), - ses_id: Default::default(), - input: InputWidget::new(5, None), + input: InputWidget::new(32, None), } } } +impl InputWidget { + pub fn clear(&mut self) { + self.input.clear(); + self.cursor = 0; + } +} + impl Widget for CaptchaPopup { fn draw(&mut self, f: &mut Frame, ctx: &Context, area: Rect) { let center = area.inner(&Margin { @@ -39,9 +44,8 @@ impl Widget for CaptchaPopup { ) .split(center); if let Some(img) = self.image.as_mut() { - let sess_id = self.ses_id.clone().unwrap_or_default(); f.render_widget( - super::border_block(&ctx.theme, true).title(sess_id), + super::border_block(&ctx.theme, true).title("Captcha"), layout[0], ); StatefulImage::new(None).render( @@ -53,7 +57,10 @@ impl Widget for CaptchaPopup { img, ); } - f.render_widget(super::border_block(&ctx.theme, true), layout[1]); + f.render_widget( + super::border_block(&ctx.theme, true).title("Enter Captcha solution"), + layout[1], + ); let input_area = layout[1].inner(&Margin { horizontal: 1, diff --git a/src/widget/input.rs b/src/widget/input.rs index 59067c5..571b013 100644 --- a/src/widget/input.rs +++ b/src/widget/input.rs @@ -14,11 +14,11 @@ pub struct InputWidget { pub input: String, pub cursor: usize, pub max_len: usize, - pub validator: Option bool>, + pub validator: Option bool>, } impl InputWidget { - pub fn new(max_len: usize, validator: Option bool>) -> Self { + pub fn new(max_len: usize, validator: Option bool>) -> Self { InputWidget { input: "".to_owned(), cursor: 0, @@ -65,7 +65,7 @@ impl super::Widget for InputWidget { match (code, modifiers) { (Char(c), &KeyModifiers::NONE | &KeyModifiers::SHIFT) => { if let Some(validator) = &self.validator { - if !validator(*c) { + if !validator(c) { return; // If character is invalid, ignore it } } @@ -142,7 +142,7 @@ impl super::Widget for InputWidget { if let Event::Paste(mut p) = evt.to_owned() { if let Some(validator) = self.validator { // Remove invalid chars - p = p.chars().filter(|c| validator(*c)).collect(); + p = p.chars().filter(validator).collect(); } self.input = format!( "{}{}{}", diff --git a/src/widget/page.rs b/src/widget/page.rs index 95755d0..588e21b 100644 --- a/src/widget/page.rs +++ b/src/widget/page.rs @@ -24,7 +24,7 @@ pub struct PagePopup { impl Default for PagePopup { fn default() -> Self { PagePopup { - input: InputWidget::new(3, Some(|e| e.is_numeric())), + input: InputWidget::new(3, Some(char::is_ascii_digit)), } } }