From 63112bfc502abb50976fab62d49f2ec5a9aff360 Mon Sep 17 00:00:00 2001 From: andyfcx Date: Thu, 21 Nov 2024 14:54:38 +0800 Subject: [PATCH] fix: make sure to default utf-8 as encoding and strip unwanted single or double quotes --- scrapling/engines/camo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scrapling/engines/camo.py b/scrapling/engines/camo.py index e55d951..596ddb9 100644 --- a/scrapling/engines/camo.py +++ b/scrapling/engines/camo.py @@ -109,8 +109,11 @@ def fetch(self, url: str) -> Response: content_type = res.headers.get('content-type', '') # Parse charset from content-type encoding = 'utf-8' # default encoding - if 'charset=' in content_type.lower(): - encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip() + content_type_lower = content_type.lower() + if 'charset=' in content_type_lower: + encoding = content_type_lower.split('charset=')[-1].split(';')[0].strip().strip('"').strip("'") + if 'utf-8' in encoding: + encoding = 'utf-8' status_text = res.status_text # PlayWright API sometimes give empty status text for some reason!