From c2a49eeaae9827920b8cc23a2e91e20f28414153 Mon Sep 17 00:00:00 2001 From: Matti Harjula Date: Wed, 25 Oct 2023 10:52:40 +0300 Subject: [PATCH] Move the logic for inclusion of remote content to be a VLE specific part so that should a VLE want to apply some sort of proxy settings they now can. In preparation for future also include special paths for shared CASText2 templates in addition to the current contrib for shared CAS content. --- stack/cas/castext2/blocks/include.block.php | 15 +--- stack/maximaparser/utils.php | 19 +---- vle_specific.php | 83 +++++++++++++++++++++ 3 files changed, 89 insertions(+), 28 deletions(-) diff --git a/stack/cas/castext2/blocks/include.block.php b/stack/cas/castext2/blocks/include.block.php index 92ba03e151b..c84073a8a49 100644 --- a/stack/cas/castext2/blocks/include.block.php +++ b/stack/cas/castext2/blocks/include.block.php @@ -40,19 +40,8 @@ */ class stack_cas_castext2_include extends stack_cas_castext2_block { - // Avoid retrieving the same file multiple times during the same request. - private static $extcache = []; - - private static function file_get_contents($url) { - if (isset(self::$extcache[$url])) { - return self::$extcache[$url]; - } - self::$extcache[$url] = file_get_contents($url); - return self::$extcache[$url]; - } - public function compile($format, $options): ?MP_Node { - $src = self::file_get_contents($this->params['src']); + $src = stack_fetch_included_content($this->params['src']); if (isset($options['in include'])) { // We will need to rethink the validate_extract_attributes()-logic // to extract casstrings from nested inclusions. Also loops... @@ -79,7 +68,7 @@ public function is_flat(): bool { public function validate_extract_attributes(): array { // This is tricky, we need to validate the attributes of the included content. // To do that we need to retrieve it and process it again, luckily this gets cached. - $src = self::file_get_contents($this->params['src']); + $src = stack_fetch_included_content($this->params['src']); if ($src === false) { throw new stack_exception('Include block source not accessible: ' . $this->params['src']); } diff --git a/stack/maximaparser/utils.php b/stack/maximaparser/utils.php index 0247f7234ba..b1ca6a8dd02 100644 --- a/stack/maximaparser/utils.php +++ b/stack/maximaparser/utils.php @@ -24,6 +24,7 @@ require_once(__DIR__ . '/autogen/parser.mbstring.php'); // Also needs stack_string(). require_once(__DIR__ . '/../../locallib.php'); +require_once(__DIR__ . '/../../vle_specific.php'); require_once(__DIR__ . '/../utils.class.php'); require_once(__DIR__ . '/MP_classes.php'); @@ -247,8 +248,6 @@ public static function parse_and_insert_missing_semicolons($str, $lastfix = -1) // Generates errors if inclusions within inclusions or inclusions in unexpected places. // Returns either the AST or some form of an exception. public static function parse_and_insert_missing_semicolons_with_includes($str) { - static $remotes = []; - $root = self::parse_and_insert_missing_semicolons($str); if ($root instanceof MP_Root) { if (isset($root->position['fixedsemicolons'])) { @@ -259,7 +258,7 @@ public static function parse_and_insert_missing_semicolons_with_includes($str) { // Ok now seek for the inclusions if any are there. $includecount = 0; $errors = []; - $include = function($node) use (&$includecount, &$errors, &$remotes) { + $include = function($node) use (&$includecount, &$errors) { if ($node instanceof MP_FunctionCall && $node->name instanceof MP_Atom && ($node->name->value === 'stack_include' || $node->name->value === 'stack_include_contrib')) { // Now the first requirement for this is that this must be a top level item @@ -275,19 +274,9 @@ public static function parse_and_insert_missing_semicolons_with_includes($str) { // such stuff. $remoteurl = $node->arguments[0]->value; if ($node->name->value === 'stack_include_contrib') { - $remoteurl = 'https://raw.githubusercontent.com/maths/moodle-qtype_stack/' . - 'master/stack/maxima/contrib/' . $remoteurl; - } - if (isset($remotes[$remoteurl])) { - $srccode = $remotes[$remoteurl]; - } else { - $fileheaders = get_headers($remoteurl); - $srccode = false; - if (strpos($fileheaders[0], '404') === false) { - $srccode = file_get_contents($remoteurl); - $remotes[$remoteurl] = $srccode; - } + $remoteurl = 'contrib://' . $remoteurl; } + $srccode = stack_fetch_included_content($remoteurl); if ($srccode === false) { // Do not give the address in the output. $errors[] = 'stack_include or stack_include_contrib, could not retrieve: ' . $remoteurl; diff --git a/vle_specific.php b/vle_specific.php index d4db739788a..30bd61fd764 100644 --- a/vle_specific.php +++ b/vle_specific.php @@ -210,3 +210,86 @@ function question_display_options() { $options->suppressruntestslink = true; return $options; } + + +/* + * This uses whatever methods the VLE wants to use to fetch included urls + * for the inclusion methods and can do caching at the request level. + * + * The requirements are as follows: + * 1. Must not cache, over multiple requests, the inclusion must use + * remote version at the time of inclusion. + * 2. Supports inclusion from http(s)://, contrib(l):// and template(l):// + * URLs. + * 3. contrib:// is special shorthand for fetchign a file from a particular + * GitHub side folder. If the "l" suffix is there then the file will be red + * from a matching local folder, if fetching from GitHub fails we do not + * automatically fall-back to the local version. + * 4. template:// is similalr but has a different folder. + * + * contrib:// is for CAS side stuff and template:// is for CASText side stuff. + * + * Returns the string content of the URL/file. If failign return false. + */ +function stack_fetch_included_content(string $url): string | bool { + static $cache = []; + $lc = trim(strtolower($url)); + $good = false; + $islocalfile = false; + $error = 'Not a fetchable URL type.'; + $translated = $url; + if (strpos($lc, 'http://') === 0 || strpos($lc, 'https://') === 0) { + $good = true; + } else if (strpos($lc, 'contrib://') === 0 || strpos($lc, 'contribl://') === 0) { + $path = explode('://', $url, 2)[1]; + if (strpos('..', $path) !== false || strpos('/', $path) === 0) { + $error = 'Traversing the directory tree is forbidden.'; + } else { + $good = true; + if (strpos($lc, 'contrib://') === 0) { + $translated = 'https://raw.githubusercontent.com/maths/moodle-qtype_stack/' . + 'master/stack/maxima/contrib/' . $path; + } else { + $islocalfile = true; + $translated = __DIR__ . '/stack/maxima/contrib/' . $path; + } + } + } else if (strpos($lc, 'template://') === 0 || strpos($lc, 'templatel://') === 0) { + $path = explode('://', $url, 2)[1]; + if (strpos('..', $path) !== false || strpos('/', $path) === 0) { + $error = 'Traversing the directory tree is forbidden.'; + } else { + $good = true; + if (strpos($lc, 'template://') === 0) { + $translated = 'https://raw.githubusercontent.com/maths/moodle-qtype_stack/' . + 'master/stack/cas/castext2/template/' . $path; + } else { + $islocalfile = true; + $translated = __DIR__ . '/stack/cas/castext2/template/' . $path; + } + } + } + // Not actually passing the $error out now, it is here for documentation + // and possible future use. + + if ($good) { + if (!isset($cache[$translated])) { + // Feel free to apply any proxying here if you want. + // Just remember that $islocalfile might be true and you might do + // something else then. + if ($islocalfile) { + $cache[$translated] = file_get_contents($translated); + } else { + $headers = get_headers($translated); + if (strpos($headers[0], '404') === false) { + $cache[$translated] = file_get_contents($translated); + } else { + $cache[$translated] = false; + } + } + } + return $cache[$translated]; + } + $cache[$translated] = false; + return false; +} \ No newline at end of file