From a1b746f41ae97f5d1ca6b9404b1db516e96e9f5f Mon Sep 17 00:00:00 2001 From: "Th. Ma" Date: Fri, 14 Jul 2023 17:40:00 +0200 Subject: [PATCH] Fixing highlights saving (#67) Fixing highlights saving Adapting worker to source Graphql API Updating sources before installing packages bugfix: Fixing status popularity saving Revised publication endpoint and access key rotation Restored total partitions Updated test configuration file Revised assignment --- .env.dist | 3 +- .env.test.dist | 4 + .github/workflows/tests.yml | 5 +- Makefile | 2 +- src/command/save_highlights.clj | 17 ++-- src/twitter/api_client.clj | 143 +++++++++++++++++++++----------- src/twitter/status.clj | 5 +- 7 files changed, 118 insertions(+), 61 deletions(-) diff --git a/.env.dist b/.env.dist index 171423c..f8c1880 100644 --- a/.env.dist +++ b/.env.dist @@ -1,11 +1,12 @@ BEARER_TOKEN='' +FALLBACK_ENDPOINT='' +PUBLICATION_ENDPOINT='' COMPOSE_PROJECT_NAME='org_example_highlights' DD_ENV=prod DD_AGENT_HOST='' DD_SERVICE='highlights.example.org' DD_VERSION='__VERSION__' DEBUG= -FALLBACK_ENDPOINT='' LIST_NAME='' WORKER='highlights.example.org' WORKER_UID=1000 diff --git a/.env.test.dist b/.env.test.dist index 2d29855..b3af8a7 100644 --- a/.env.test.dist +++ b/.env.test.dist @@ -1,3 +1,7 @@ +BEARER_TOKEN='' +FALLBACK_ENDPOINT='' +PUBLICATION_ENDPOINT='' +COMPOSE_PROJECT_NAME='org_example_highlights' DEBUG=1 WORKER='highlights.example.org' WORKER_UID=1000 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cfabe53..86e3959 100755 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,8 +17,11 @@ jobs: - 5433:5432 steps: + - name: Update sources + run: sudo apt update --assume-yes + - name: Install packages - run: sudo apt-get install bash gzip postgresql-client tar wget + run: sudo apt install bash gzip postgresql-client tar wget --assume-yes - name: Checkout project uses: actions/checkout@v3 diff --git a/Makefile b/Makefile index 07ab8e3..6984b04 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL:=/bin/bash .PHONY: restart start stop test -COMPOSE_PROJECT_NAME = ?= 'org_example_highlights' +COMPOSE_PROJECT_NAME ?= 'org_example_highlights' WORKER ?= 'highlights.example.org' TMP_DIR ?= '/tmp/tmp_${WORKER}' diff --git a/src/command/save_highlights.clj b/src/command/save_highlights.clj index 78d254a..5ce285a 100644 --- a/src/command/save_highlights.clj +++ b/src/command/save_highlights.clj @@ -55,12 +55,12 @@ (try (assoc {:status-id (:id tweet)} - :total-retweets (:retweet_count tweet) - :checked-at checked-at - :total-favorites (:favorite_count tweet)) + :total-retweets (get tweet :retweet_count) + :checked-at checked-at + :total-favorites (get tweet :favorite_count)) (catch Exception e - (error-handler/log-error e) - nil)))) + (let [error-message (.getMessage e)] + (error-handler/log-error error-message)))))) (defn record-popularity-of-highlights-batch [highlights checked-at {status-popularity :status-popularity @@ -71,8 +71,11 @@ statuses (remove #(nil? %) statuses) status-popularity-props (doall (map (try-assoc checked-at) statuses)) - status-popularity-props (remove #(nil? %) status-popularity-props) - status-popularities (bulk-insert-of-status-popularity-props status-popularity-props checked-at status-popularity)] + status-popularity-props (remove #(nil? (:total-retweets %)) status-popularity-props) + status-popularities (try + (bulk-insert-of-status-popularity-props status-popularity-props checked-at status-popularity) + (catch Exception e + (error-handler/log-error e (str "Could not bulk insert status popularity"))))] (doall (map #(timbre/info (str "Saved popularity of status #" (:status-id %))) diff --git a/src/twitter/api_client.clj b/src/twitter/api_client.clj index 674b7c3..4bda5ad 100644 --- a/src/twitter/api_client.clj +++ b/src/twitter/api_client.clj @@ -42,6 +42,7 @@ (def error-unauthorized-favorites-list-access "Twitter responded to request '/1.1/favorites/list.json' with error 401: Not authorized.") (def error-no-status "Twitter responded to request with error 144: No status found with that ID.") (def error-bad-authentication-data "Twitter responded to request with error 215: Bad Authentication data.") +(def error-empty-body "The response body is empty.") ; @see https://clojuredocs.org/clojure.core/declare about making forward declaration (declare find-next-token) @@ -129,13 +130,9 @@ guest-token)) (defn find-first-available-token-when - [endpoint context token-model token-type-model] + [context] (let [selected-token (next-fallback-token) - excluded-access-token @current-access-token - ;excluded-access-tokens (frozen-access-tokens) - ;token-candidate (find-first-available-tokens-other-than excluded-access-tokens token-model token-type-model) - ;selected-token (find-token endpoint token-candidate context token-model token-type-model) - ] + excluded-access-token @current-access-token] (when *api-client-enabled-logging* (timbre/info (str "About to replace access token \"" excluded-access-token "\" with \"" (:token selected-token) "\" when " context))) @@ -236,7 +233,7 @@ (if it-is-frozen (do (set-next-token - (find-first-available-token-when endpoint context token-model token-type-model) + (find-first-available-token-when context) context) (swap! remaining-calls #(assoc % (keyword endpoint) ((keyword endpoint) @call-limits)))) @@ -412,22 +409,52 @@ :total-subscriptions 0} member-model)))))) (defn get-twitter-status-by-id - [props token-model token-type-model] + [props token-model token-type-model & [retry]] (let [status-id (:status-id props)] (do (try (let [fallback-token @next-token + shall-retry (nil? retry) bearer-token (str "Bearer " (:bearer-token env)) - ;response (with-open [client (ac/create-client)] - ; (statuses-show-id - ; :client client - ; :oauth-creds (twitter-credentials @next-token) - ; :params {:id status-id})) - endpoint (str "https://api.twitter.com/1.1/statuses/show.json?id=" status-id "&tweet_mode=extended&include_entities=true") + variables (json/write-str {"focalTweetId" status-id + "with_rux_injections" false + "includePromotedContent" true + "withCommunity" true + "withQuickPromoteEligibilityTweetFields" true + "withBirdwatchNotes" true + "withVoice" true + "withV2Timeline" true}) + features (json/write-str {"rweb_lists_timeline_redesign_enabled" true + "responsive_web_graphql_exclude_directive_enabled" true + "verified_phone_label_enabled" false + "creator_subscriptions_tweet_preview_api_enabled" true + "responsive_web_graphql_timeline_navigation_enabled" true + "responsive_web_graphql_skip_user_profile_image_extensions_enabled" false + "tweetypie_unmention_optimization_enabled" true + "responsive_web_edit_tweet_api_enabled" true + "graphql_is_translatable_rweb_tweet_is_translatable_enabled" true + "view_counts_everywhere_api_enabled" true + "longform_notetweets_consumption_enabled" true + "responsive_web_twitter_article_tweet_consumption_enabled" false + "tweet_awards_web_tipping_enabled" false + "freedom_of_speech_not_reach_fetch_enabled" true + "standardized_nudges_misinfo" true + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled" true + "longform_notetweets_rich_text_read_enabled" true + "longform_notetweets_inline_media_enabled" true + "responsive_web_media_download_video_enabled" false, + "responsive_web_enhance_cards_enabled" false}) + fieldToggles (json/write-str {"withAuxiliaryUserLabels" false + "withArticleRichContentState" false}) + vars (http-client/generate-query-string {"variables" variables}) + feats (http-client/generate-query-string {"features" features}) + fieldToggles (http-client/generate-query-string {"fieldToggles" fieldToggles}) + publication-endpoint (str (:publication-endpoint env)) + endpoint (str publication-endpoint "?" vars "&" feats "&" fieldToggles) response (try (http-client/get endpoint - {:content-type :json - :accept :json + {:accept :json + :content-type :json :cookie-spec (fn [http-context] (proxy [org.apache.http.impl.cookie.CookieSpecBase] [] ;; Version and version header @@ -443,28 +470,53 @@ (match [cookie cookie-origin] true) ;; Format a list of cookies into a list of headers (formatCookies [cookies] (java.util.ArrayList.)))) - :headers { - :authorization bearer-token, - :accept-language "fr-FR,en;q=0.5", - :connection "keep-alive", - :x-guest-token fallback-token, - :x-twitter-active-user "yes", - :authority "api.twitter.com", - :DNT "1"}}) - (catch Exception e (error-handler/log-error e - (str "An error occurred when fetching response from API: ")))) - _ (update-remaining-calls (:headers response) "statuses/show/:id") + :headers { + :authorization bearer-token, + :accept-language "fr-FR,en;q=0.5", + :connection "keep-alive", + :x-guest-token fallback-token, + :x-twitter-active-user "yes", + :x-twitter-client-language "fr", + :authority "twitter.com", + :DNT "1"}}) + (catch Exception e + (if (nil? retry) + (do + (find-next-token token-model token-type-model "statuses/show/:id" "trying to call \"statuses/show\" with an id") + (timbre/info (str "Rotated access tokens before accessing publication having id #" status-id)) + (get-twitter-status-by-id props token-model token-type-model :retry)) + (error-handler/log-error e + (str "An error occurred when fetching tweet having id \"" status-id "\" from API: "))))) + body (if + (nil? (:body response)) + (throw (Exception. (str error-empty-body))) + (try + (if + (nil? (:full_text (:body response))) + (-> (json/read-json + (:body response)) + :data + :threaded_conversation_with_injections_v2 + :instructions + (get 0) + :entries + (get 0) + :content + :itemContent + :tweet_results + :result + :legacy) + (:body response)) + (catch Exception e + (error-handler/log-error e)))) response (if (nil? response) - (throw (Exception. (str error-page-not-found))) - (assoc response :body (json/read-str (:body response))))] + '() + (assoc response :body body))] (timbre/info (str - "Fetched status having id #" status-id " with consumer key " - ;(subs (:token (deref next-token)) 0 20) - ))subs + "Fetched status having id #" status-id " with consumer key")) response) (catch Exception e - ;(timbre/info (str "{\"token\": \"" (subs (:token (deref next-token)) 0 20) "\"}")) (timbre/warn (.getMessage e)) (cond (page-not-found-exception? e) (make-not-found-statuses-response @@ -478,7 +530,9 @@ (get-twitter-status-by-id props token-model token-type-model)) (string/includes? (.getMessage e) error-no-status) {:error error-no-status} (string/includes? (.getMessage e) error-missing-status-id) {:error error-missing-status-id} - :else (error-handler e))))))) + :else (do + (error-handler e) + {:error error-page-not-found}))))))) (defn know-all-about-remaining-calls-and-limit [] @@ -509,19 +563,9 @@ twitter-user)))) (defn status-by-prop - [props token-model token-type-model context] - ;(if - ; (and - ; (know-all-about-remaining-calls-and-limit) - ; (is-rate-limit-exceeded)) - ; (do - ; (freeze-current-token) - ; (find-next-token token-model token-type-model "statuses/show/:id" context) - ; (status-by-prop props token-model token-type-model context)) + [props token-model token-type-model] (let [twitter-status (get-twitter-status-by-id props token-model token-type-model)] - twitter-status) - ;) - ) + twitter-status)) (defn get-member-by-screen-name [screen-name token-model token-type-model member-model] @@ -542,16 +586,17 @@ (defn get-status-by-id [{id :id status-id :status-id} token-model token-type-model] - (let [status (status-by-prop {:status-id status-id :id id} token-model token-type-model "a call to \"statuses/show\" with an id") + (let [status (status-by-prop {:status-id status-id :id id} token-model token-type-model) headers (:headers status)] (if (and (some? headers) (nil? (:error status))) (do - ;(guard-against-api-rate-limit headers "statuses/show/:id" nil token-model token-type-model) (assoc (:body status) :id id)) - (timbre/info (str "Could not find status having id #" status-id))))) + (do + (timbre/info (str "Could not find status having id #" status-id)) + '())))) (defn get-id-of-member-having-username [screen-name member-model token-model token-type-model] diff --git a/src/twitter/status.clj b/src/twitter/status.clj index 3dec5e2..f77c4db 100644 --- a/src/twitter/status.clj +++ b/src/twitter/status.clj @@ -233,8 +233,9 @@ _ (if (pos? total-statuses) (timbre/info (str "About to fetch " total-statuses " statuse(s).")) (timbre/info (str "No need to find some status."))) - mapped-tweets (doall (map (pmap-by-id token token-type) filtered-statuses-chunk))] - (flatten mapped-tweets) + mapped-tweets (doall (map (pmap-by-id token token-type) filtered-statuses-chunk)) + fmapped-tweets (flatten mapped-tweets)] + fmapped-tweets ;(if ; (and ; (not (nil? remaining-calls))