From a02547e5c2c88db8f6a9969da5f52896681c8099 Mon Sep 17 00:00:00 2001 From: josh! Date: Tue, 5 Dec 2023 12:15:17 +1300 Subject: [PATCH] add incransom clearnet & backdate fix urlencode parser --- parsers.py | 3 +- posts.json | 48 ++++++++++++++++---------------- source/incransom-incapt.html | 0 source/incransom-incbackend.html | 0 4 files changed, 26 insertions(+), 25 deletions(-) create mode 100644 source/incransom-incapt.html create mode 100644 source/incransom-incbackend.html diff --git a/parsers.py b/parsers.py index ed110943c2b..48b81394e48 100644 --- a/parsers.py +++ b/parsers.py @@ -1117,8 +1117,9 @@ def knight(): def incransom(): stdlog('parser: ' + 'incransom') + # jq -r '.payload[].title' source/incransom-incback*.html | sed -e 's/%20/ /' || true parser = ''' - jq -r '.payload[].title' source/incransom-incback*.html | sed -e 's/%20/ /' || true + jq -r '.payload[].title' source/incransom-incback*.html | perl -MURI::Escape -ne 'print uri_unescape($_)' | sort | uniq || true ''' posts = runshellcmd(parser) if len(posts) == 1: diff --git a/posts.json b/posts.json index 027bcd7f295..933beed859f 100644 --- a/posts.json +++ b/posts.json @@ -38350,7 +38350,7 @@ "discovered": "2023-08-31 10:26:18.647424" }, { - "post_title": "Pifer's Auction%20%26%20Realty", + "post_title": "Pifer's Auction & Realty", "group_name": "incransom", "discovered": "2023-08-31 10:26:40.469477" }, @@ -39085,12 +39085,12 @@ "discovered": "2023-09-06 19:28:17.540502" }, { - "post_title": "It4 Solutions%20Robras", + "post_title": "It4 Solutions Robras", "group_name": "incransom", "discovered": "2023-09-06 19:28:51.475674" }, { - "post_title": "I Keating%20Furniture%20World", + "post_title": "I Keating Furniture World", "group_name": "incransom", "discovered": "2023-09-06 19:28:52.249661" }, @@ -40165,7 +40165,7 @@ "discovered": "2023-09-20 13:31:11.805471" }, { - "post_title": "ENTRUST Solutions%20Group", + "post_title": "ENTRUST Solutions Group", "group_name": "incransom", "discovered": "2023-09-20 13:31:12.647570" }, @@ -40180,7 +40180,7 @@ "discovered": "2023-09-20 14:33:29.359171" }, { - "post_title": "Federal Labor%20Relations%20Authority", + "post_title": "Federal Labor Relations Authority", "group_name": "incransom", "discovered": "2023-09-20 14:33:29.958511" }, @@ -41900,7 +41900,7 @@ "discovered": "2023-10-11 11:30:01.875294" }, { - "post_title": "Village Building%20Co.", + "post_title": "Village Building Co.", "group_name": "incransom", "discovered": "2023-10-11 15:29:27.934509" }, @@ -41995,7 +41995,7 @@ "discovered": "2023-10-12 16:33:32.583886" }, { - "post_title": "Vicon industries%20inc.", + "post_title": "Vicon industries inc.", "group_name": "incransom", "discovered": "2023-10-12 17:31:59.254562" }, @@ -42035,7 +42035,7 @@ "discovered": "2023-10-13 13:31:37.753007" }, { - "post_title": "Cleveland City%20Schools", + "post_title": "Cleveland City Schools", "group_name": "incransom", "discovered": "2023-10-13 16:32:50.784065" }, @@ -42695,7 +42695,7 @@ "discovered": "2023-10-25 11:57:38.141649" }, { - "post_title": "Direct Mail%20Corporation", + "post_title": "Direct Mail Corporation", "group_name": "incransom", "discovered": "2023-10-25 12:52:20.281658" }, @@ -42805,7 +42805,7 @@ "discovered": "2023-10-27 17:56:00.631408" }, { - "post_title": "Alam Flora%20Sdn%20Bhd", + "post_title": "Alam Flora Sdn Bhd", "group_name": "incransom", "discovered": "2023-10-27 18:42:12.542013" }, @@ -42890,7 +42890,7 @@ "discovered": "2023-10-29 19:16:22.531973" }, { - "post_title": "Global Export%20Marketing%20Co.%20Ltd.", + "post_title": "Global Export Marketing Co. Ltd.", "group_name": "incransom", "discovered": "2023-10-29 23:03:21.943309" }, @@ -43050,7 +43050,7 @@ "discovered": "2023-10-31 19:11:05.779793" }, { - "post_title": "Universal Sewing%20Supply", + "post_title": "Universal Sewing Supply", "group_name": "incransom", "discovered": "2023-10-31 19:11:14.030716" }, @@ -43595,7 +43595,7 @@ "discovered": "2023-11-06 18:33:36.442135" }, { - "post_title": "EFU Life%20Assurance", + "post_title": "EFU Life Assurance", "group_name": "incransom", "discovered": "2023-11-06 18:33:47.350358" }, @@ -44250,12 +44250,12 @@ "discovered": "2023-11-15 18:40:32.885120" }, { - "post_title": "Decatur Independent%20School%20District", + "post_title": "Decatur Independent School District", "group_name": "incransom", "discovered": "2023-11-15 18:40:33.615224" }, { - "post_title": "Yamaha Motor%20Philippines%2CInc.", + "post_title": "Yamaha Motor Philippines,Inc.", "group_name": "incransom", "discovered": "2023-11-15 18:40:34.615669" }, @@ -44355,7 +44355,7 @@ "discovered": "2023-11-16 22:35:13.046442" }, { - "post_title": "Consilium staffing%20llc", + "post_title": "Consilium staffing llc", "group_name": "incransom", "discovered": "2023-11-17 00:51:21.633302" }, @@ -44375,7 +44375,7 @@ "discovered": "2023-11-17 14:36:12.021190" }, { - "post_title": "WellLife Network%20Inc.", + "post_title": "WellLife Network Inc.", "group_name": "incransom", "discovered": "2023-11-17 14:36:16.242293" }, @@ -44635,17 +44635,17 @@ "discovered": "2023-11-23 00:49:37.589121" }, { - "post_title": "Trylon TSF%20Inc.", + "post_title": "Trylon TSF Inc.", "group_name": "incransom", "discovered": "2023-11-23 08:37:18.090454" }, { - "post_title": "Springfield Area%20Chamber%20of%20Commerce", + "post_title": "Springfield Area Chamber of Commerce", "group_name": "incransom", "discovered": "2023-11-23 08:37:18.543157" }, { - "post_title": "Pro Metals%20LLC", + "post_title": "Pro Metals LLC", "group_name": "incransom", "discovered": "2023-11-23 08:37:19.319594" }, @@ -44655,17 +44655,17 @@ "discovered": "2023-11-23 08:37:19.973810" }, { - "post_title": "Ingo Money%20Inc", + "post_title": "Ingo Money Inc", "group_name": "incransom", "discovered": "2023-11-23 08:37:20.877853" }, { - "post_title": "DM Civil%20Co.", + "post_title": "DM Civil Co.", "group_name": "incransom", "discovered": "2023-11-23 08:37:21.911371" }, { - "post_title": "B%2BP Ger%C3%BCstbau%20GmbH", + "post_title": "B+P Gerüstbau GmbH", "group_name": "incransom", "discovered": "2023-11-23 08:37:22.493917" }, @@ -45315,7 +45315,7 @@ "discovered": "2023-12-01 14:36:28.318750" }, { - "post_title": "Kellett %26%20Bartholow%20PLLC", + "post_title": "Kellett & Bartholow PLLC", "group_name": "incransom", "discovered": "2023-12-01 14:36:29.683011" }, diff --git a/source/incransom-incapt.html b/source/incransom-incapt.html new file mode 100644 index 00000000000..e69de29bb2d diff --git a/source/incransom-incbackend.html b/source/incransom-incbackend.html new file mode 100644 index 00000000000..e69de29bb2d