Skip to content

Commit

Permalink
api - training.data - optionally include text
Browse files Browse the repository at this point in the history
  • Loading branch information
DamianZaremba committed Feb 11, 2022
1 parent 55045a8 commit fa77051
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions api/modules/training_data.module.php
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,37 @@ public function content()
),
);

/*
* Optionally include the revision text from Wikipedia
*/
if (isset($_REQUEST['include_text'])) {
$mh = curl_multi_init();

$curl_worker_previous = curl_init();
curl_setopt($curl_worker_previous, CURLOPT_URL, 'https://en.wikipedia.org/w/index.php?action=raw&diff=' . (int)$previous_revision_row['rev_id']);
curl_setopt($curl_worker_previous, CURLOPT_RETURNTRANSFER, true);
curl_multi_add_handle($mh, $curl_worker_previous);

$curl_worker_current = curl_init();
curl_setopt($curl_worker_current, CURLOPT_URL, 'https://en.wikipedia.org/w/index.php?action=raw&diff=' . (int)$revision_row['rev_id']);
curl_setopt($curl_worker_current, CURLOPT_RETURNTRANSFER, true);
curl_multi_add_handle($mh, $curl_worker_current);

do {
$status = curl_multi_exec($mh, $active);
if ($active) {
curl_multi_select($mh);
}
} while ($active && $status == CURLM_OK);

curl_multi_remove_handle($mh, $curl_worker_previous);
curl_multi_remove_handle($mh, $curl_worker_current);
curl_multi_close($mh);

$data['previous']['text'] = curl_multi_getcontent($curl_worker_previous);
$data['current']['text'] = curl_multi_getcontent($curl_worker_current);
}

/*
* The following logic is similar to https://github.com/cluebotng/bot/blob/main/mysql_functions.php
* However it differs in using an explict diff id & time spans
Expand Down

0 comments on commit fa77051

Please sign in to comment.