## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated + ## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated %if library:

${Text(_("Be sure you want to import a library before continuing. The contents of the imported library will replace the contents of the existing library. {em_start}You cannot undo a library import{em_end}. Before you proceed, we recommend that you export the current library, so that you have a backup copy of it.")).format(em_start=HTML(''), em_end=HTML(""))}

${_("The library that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a library.xml file. It may also contain other files.")}

${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your library until the import operation has completed.")}

%else:

${Text(_("Be sure you want to import a course before continuing. The contents of the imported course will replace the contents of the existing course. {em_start}You cannot undo a course import{em_end}. Before you proceed, we recommend that you export the current course, so that you have a backup copy of it.")).format(em_start=HTML(''), em_end=HTML(""))}

-

${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a course.xml file. It may also contain other files.")}

+

${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip) or .zip (that is a compressed file). This .tar.gz or .zip file must contain a course.xml file. It may also contain other files.")}

${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your course until the import operation has completed.")}

%endif @@ -60,11 +61,12 @@

## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated + ## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated

%if library: ${_("Select a .tar.gz File to Replace Your Library Content")} %else: - ${_("Select a .tar.gz File to Replace Your Course Content")} + ${_("Select a .tar.gz or .zip File to Replace Your Course Content")} %endif

diff --git a/openedx/core/lib/extract_archives.py b/openedx/core/lib/extract_archives.py new file mode 100644 index 000000000000..7fcfb136ace7 --- /dev/null +++ b/openedx/core/lib/extract_archives.py @@ -0,0 +1,122 @@ +""" +Safe version of extractall which does not extract any files that would +be, or symlink to a file that is, outside of the directory extracted in. + +Adapted from: +http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python +""" + +import logging +import tarfile +from contextlib import contextmanager +from os.path import join as joinpath +from os.path import abspath, dirname, realpath +from zipfile import ZipFile + +from django.conf import settings +from django.core.exceptions import SuspiciousOperation + +log = logging.getLogger(__name__) + + +def _resolved(rpath): + """ + Returns the canonical absolute path of `rpath`. + """ + return realpath(abspath(rpath)) + + +def _is_bad_path(path, base): + """ + Is (the canonical absolute path of) `path` outside `base`? + """ + return not _resolved(joinpath(base, path)).startswith(base) + + +def _is_bad_link(info, base): + """ + Does the file sym- or hard-link to files outside `base`? + """ + # Links are interpreted relative to the directory containing the link + tip = _resolved(joinpath(base, dirname(info.name))) + return _is_bad_path(info.linkname, base=tip) + + +def _checkmembers(members, base): + """ + Check that all elements of the archive file are safe. + """ + base = _resolved(base) + + # check that we're not trying to import outside of the github_repo_root + if not base.startswith(_resolved(settings.GITHUB_REPO_ROOT)): + raise SuspiciousOperation( + "Attempted to import course outside of data dir") + + for finfo in members: + if _is_bad_path(finfo.name, base): + log.debug("File %r is blocked (illegal path)", finfo.name) + raise SuspiciousOperation("Illegal path") + if finfo.issym() and _is_bad_link(finfo, base): + log.debug( + "File %r is blocked: Hard link to %r", + finfo.name, + finfo.linkname + ) + raise SuspiciousOperation("Hard link") + if finfo.islnk() and _is_bad_link(finfo, base): + log.debug("File %r is blocked: Symlink to %r", finfo.name, + finfo.linkname) + raise SuspiciousOperation("Symlink") + if finfo.isdev(): + log.debug("File %r is blocked: FIFO, device or character file", + finfo.name) + raise SuspiciousOperation("Dev file") + + +class ZipMemberAdapter: + """ + Adapter ZipInfo to Member + + from the stack overflow link above: + Starting with python 2.7.4, this is a non-issue for ZIP archives: + The method zipfile.extract() prohibits the creation of files + outside the sandbox + """ + def __init__(self, zipinfo): + self.name = zipinfo.filename + self.issym = lambda: False + self.islnk = lambda: False + self.isdev = lambda: False + + +@contextmanager +def safe_open_archive(file_name, output_path): + """ + Safe Extract Zip or Tar files + """ + if not output_path.endswith('/'): + output_path += '/' + try: + if file_name.endswith('.zip'): + archive = ZipFile(file_name, 'r') + members = [ + ZipMemberAdapter(zipinfo) + for zipinfo in archive.infolist() + ] + elif file_name.endswith('.tar.gz'): + archive = tarfile.open(file_name) + members = archive.getmembers() + + _checkmembers(members, output_path) + yield archive + finally: + archive.close() + + +def safe_extractall(file_name, output_path): + """ + Extract Zip or Tar files + """ + with safe_open_archive(file_name, output_path) as archive: + archive.extractall(output_path) diff --git a/openedx/core/lib/extract_tar.py b/openedx/core/lib/extract_tar.py deleted file mode 100644 index e8780efb51b3..000000000000 --- a/openedx/core/lib/extract_tar.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -Safe version of tarfile.extractall which does not extract any files that would -be, or symlink to a file that is, outside of the directory extracted in. - -Adapted from: -http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python -""" - -import logging -from os.path import join as joinpath -from os.path import abspath, dirname, realpath - -from django.conf import settings -from django.core.exceptions import SuspiciousOperation - -log = logging.getLogger(__name__) - - -def resolved(rpath): - """ - Returns the canonical absolute path of `rpath`. - """ - return realpath(abspath(rpath)) - - -def _is_bad_path(path, base): - """ - Is (the canonical absolute path of) `path` outside `base`? - """ - return not resolved(joinpath(base, path)).startswith(base) - - -def _is_bad_link(info, base): - """ - Does the file sym- or hard-link to files outside `base`? - """ - # Links are interpreted relative to the directory containing the link - tip = resolved(joinpath(base, dirname(info.name))) - return _is_bad_path(info.linkname, base=tip) - - -def safemembers(members, base): - """ - Check that all elements of a tar file are safe. - """ - - base = resolved(base) - - # check that we're not trying to import outside of the github_repo_root - if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)): - raise SuspiciousOperation("Attempted to import course outside of data dir") - - for finfo in members: - if _is_bad_path(finfo.name, base): # lint-amnesty, pylint: disable=no-else-raise - log.debug("File %r is blocked (illegal path)", finfo.name) - raise SuspiciousOperation("Illegal path") - elif finfo.issym() and _is_bad_link(finfo, base): - log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname) - raise SuspiciousOperation("Hard link") - elif finfo.islnk() and _is_bad_link(finfo, base): - log.debug("File %r is blocked: Symlink to %r", finfo.name, - finfo.linkname) - raise SuspiciousOperation("Symlink") - elif finfo.isdev(): - log.debug("File %r is blocked: FIFO, device or character file", - finfo.name) - raise SuspiciousOperation("Dev file") - - return members - - -def safetar_extractall(tar_file, path=".", members=None): # pylint: disable=unused-argument - """ - Safe version of `tar_file.extractall()`. - """ - path = str(path) - return tar_file.extractall(path, safemembers(tar_file, path))