-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #23068 from open-craft/olx-rest-api
REST API to export modulestore XBlocks as OLX
- Loading branch information
Showing
9 changed files
with
625 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
""" | ||
Helpers required to adapt to differing APIs | ||
""" | ||
from contextlib import contextmanager | ||
import logging | ||
import re | ||
|
||
from opaque_keys import InvalidKeyError | ||
from opaque_keys.edx.keys import AssetKey, CourseKey | ||
from fs.memoryfs import MemoryFS | ||
from fs.wrapfs import WrapFS | ||
|
||
from static_replace import replace_static_urls | ||
from xmodule.contentstore.content import StaticContent | ||
from xmodule.assetstore.assetmgr import AssetManager | ||
from xmodule.modulestore.django import modulestore as store | ||
from xmodule.modulestore.exceptions import ItemNotFoundError | ||
from xmodule.exceptions import NotFoundError | ||
from xmodule.xml_module import XmlParserMixin | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
def get_block(usage_key): | ||
""" | ||
Return an XBlock from modulestore. | ||
""" | ||
return store().get_item(usage_key) | ||
|
||
|
||
def get_asset_content_from_path(course_key, asset_path): | ||
""" | ||
Locate the given asset content, load it into memory, and return it. | ||
Returns None if the asset is not found. | ||
""" | ||
try: | ||
asset_key = StaticContent.get_asset_key_from_path(course_key, asset_path) | ||
return AssetManager.find(asset_key) | ||
except (ItemNotFoundError, NotFoundError): | ||
return None | ||
|
||
|
||
def rewrite_absolute_static_urls(text, course_id): | ||
""" | ||
Convert absolute URLs like | ||
https://studio-site.opencraft.hosting/asset-v1:LabXchange+101+2019+type@asset+block@SCI_1.2_Image_.png | ||
to the proper | ||
/static/SCI_1.2_Image_.png | ||
format for consistency and portability. | ||
""" | ||
assert isinstance(course_id, CourseKey) | ||
asset_full_url_re = r'https?://[^/]+/(?P<maybe_asset_key>[^\s\'"&]+)' | ||
|
||
def check_asset_key(match_obj): | ||
""" | ||
If this URL's path part is an AssetKey from the same course, rewrite it. | ||
""" | ||
try: | ||
asset_key = AssetKey.from_string(match_obj.group('maybe_asset_key')) | ||
except InvalidKeyError: | ||
return match_obj.group(0) # Not an asset key; do not rewrite | ||
if asset_key.course_key == course_id: | ||
return '/static/' + asset_key.path # Rewrite this to portable form | ||
else: | ||
return match_obj.group(0) # From a different course; do not rewrite | ||
|
||
return re.sub(asset_full_url_re, check_asset_key, text) | ||
|
||
|
||
def collect_assets_from_text(text, course_id, include_content=False): | ||
""" | ||
Yield dicts of asset content and path from static asset paths found in the given text. | ||
Make sure to have replaced the URLs with rewrite_absolute_static_urls first. | ||
If include_content is True, the result will include a contentstore | ||
StaticContent file object which wraps the actual binary content of the file. | ||
""" | ||
# Replace static urls like '/static/foo.png' | ||
static_paths = [] | ||
# Drag-and-drop-v2 has | ||
# "/static/blah.png" | ||
# which must be changed to "/static/blah.png" for replace_static_urls to work: | ||
text2 = text.replace(""", '"') | ||
replace_static_urls(text=text2, course_id=course_id, static_paths_out=static_paths) | ||
for (path, uri) in static_paths: | ||
if path.startswith('/static/'): | ||
path = path[8:] | ||
info = { | ||
'path': path, | ||
'url': '/' + str(course_id.make_asset_key("asset", path)), | ||
} | ||
if include_content: | ||
content = get_asset_content_from_path(course_id, path) | ||
if content is None: | ||
log.error("Static asset not found: (%s, %s)", path, uri) | ||
else: | ||
info['content'] = content | ||
yield info | ||
|
||
|
||
@contextmanager | ||
def override_export_fs(block): | ||
""" | ||
Hack required for some legacy XBlocks which inherit | ||
XModuleDescriptor.add_xml_to_node() instead of the usual | ||
XmlSerializationMixin.add_xml_to_node() method. | ||
This method temporarily replaces a block's runtime's | ||
'export_fs' system with an in-memory filesystem. | ||
This method also abuses the XmlParserMixin.export_to_file() | ||
API to prevent the XModule export code from exporting each | ||
block as two files (one .olx pointing to one .xml file). | ||
The export_to_file was meant to be used only by the | ||
customtag XModule but it makes our lives here much easier. | ||
""" | ||
fs = WrapFS(MemoryFS()) | ||
fs.makedir('course') | ||
fs.makedir('course/static') # Video XBlock requires this directory to exists, to put srt files etc. | ||
|
||
old_export_fs = block.runtime.export_fs | ||
block.runtime.export_fs = fs | ||
if hasattr(block, 'export_to_file'): | ||
old_export_to_file = block.export_to_file | ||
block.export_to_file = lambda: False | ||
old_global_export_to_file = XmlParserMixin.export_to_file | ||
XmlParserMixin.export_to_file = lambda _: False # So this applies to child blocks that get loaded during export | ||
yield fs | ||
block.runtime.export_fs = old_export_fs | ||
if hasattr(block, 'export_to_file'): | ||
block.export_to_file = old_export_to_file | ||
XmlParserMixin.export_to_file = old_global_export_to_file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
olx_rest_api Django application initialization. | ||
""" | ||
from django.apps import AppConfig | ||
|
||
from openedx.core.djangoapps.plugins.constants import PluginURLs, ProjectType | ||
|
||
|
||
class OlxRestApiAppConfig(AppConfig): | ||
""" | ||
Configuration for the olx_rest_api Django plugin application. | ||
See: https://github.com/edx/edx-platform/blob/master/openedx/core/djangoapps/plugins/README.rst | ||
""" | ||
|
||
name = 'openedx.core.djangoapps.olx_rest_api' | ||
verbose_name = 'Modulestore OLX REST API' | ||
plugin_app = { | ||
PluginURLs.CONFIG: { | ||
ProjectType.CMS: { | ||
# The namespace to provide to django's urls.include. | ||
PluginURLs.NAMESPACE: 'olx_rest_api', | ||
}, | ||
}, | ||
} |
163 changes: 163 additions & 0 deletions
163
openedx/core/djangoapps/olx_rest_api/block_serializer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
""" | ||
Code for serializing a modulestore XBlock to OLX suitable for import into | ||
Blockstore. | ||
""" | ||
import logging | ||
import os | ||
from collections import namedtuple | ||
|
||
from lxml import etree | ||
|
||
from . import adapters | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
# A static file required by an XBlock | ||
StaticFile = namedtuple('StaticFile', ['name', 'url', 'data']) | ||
|
||
|
||
def blockstore_def_key_from_modulestore_usage_key(usage_key): | ||
""" | ||
In modulestore, the "definition key" is a MongoDB ObjectID kept in split's | ||
definitions table, which theoretically allows the same block to be used in | ||
many places (each with a unique usage key). However, that functionality is | ||
not exposed in Studio (other than via content libraries). So when we import | ||
into Blockstore, we assume that each usage is unique, don't generate a usage | ||
key, and create a new "definition key" from the original usage key. | ||
So modulestore usage key | ||
block-v1:A+B+C+type@html+block@introduction | ||
will become Blockstore definition key | ||
html/introduction | ||
""" | ||
block_type = usage_key.block_type | ||
if block_type == 'vertical': | ||
# We transform <vertical> to <unit> | ||
block_type = "unit" | ||
return block_type + "/" + usage_key.block_id | ||
|
||
|
||
class XBlockSerializer(object): | ||
""" | ||
This class will serialize an XBlock, producing: | ||
(1) A new definition ID for use in Blockstore | ||
(2) an XML string defining the XBlock and referencing the IDs of its | ||
children (but not containing the actual XML of its children) | ||
(3) a list of any static files required by the XBlock and their URL | ||
""" | ||
|
||
def __init__(self, block): | ||
""" | ||
Serialize an XBlock to an OLX string + supporting files, and store the | ||
resulting data in this object. | ||
""" | ||
self.orig_block_key = block.scope_ids.usage_id | ||
self.static_files = [] | ||
self.def_id = blockstore_def_key_from_modulestore_usage_key(self.orig_block_key) | ||
|
||
# Special cases: | ||
if self.orig_block_key.block_type == 'html': | ||
self.serialize_html_block(block) | ||
else: | ||
self.serialize_normal_block(block) | ||
|
||
course_key = self.orig_block_key.course_key | ||
# Search the OLX for references to files stored in the course's | ||
# "Files & Uploads" (contentstore): | ||
self.olx_str = adapters.rewrite_absolute_static_urls(self.olx_str, course_key) | ||
for asset in adapters.collect_assets_from_text(self.olx_str, course_key): | ||
path = asset['path'] | ||
if path not in [sf.name for sf in self.static_files]: | ||
self.static_files.append(StaticFile(name=path, url=asset['url'], data=None)) | ||
|
||
def serialize_normal_block(self, block): | ||
""" | ||
Serialize an XBlock to XML. | ||
This method is used for every block type except HTML, which uses | ||
serialize_html_block() instead. | ||
""" | ||
# Create an XML node to hold the exported data | ||
olx_node = etree.Element("root") # The node name doesn't matter: add_xml_to_node will change it | ||
# ^ Note: We could pass nsmap=xblock.core.XML_NAMESPACES here, but the | ||
# resulting XML namespace attributes don't seem that useful? | ||
with adapters.override_export_fs(block) as filesystem: # Needed for XBlocks that inherit XModuleDescriptor | ||
# Tell the block to serialize itself as XML/OLX: | ||
if not block.has_children: | ||
block.add_xml_to_node(olx_node) | ||
else: | ||
# We don't want the children serialized at this time, because | ||
# otherwise we can't tell which files in 'filesystem' belong to | ||
# this block and which belong to its children. So, temporarily | ||
# disable any children: | ||
children = block.children | ||
block.children = [] | ||
block.add_xml_to_node(olx_node) | ||
block.children = children | ||
|
||
# Now the block/module may have exported addtional data as files in | ||
# 'filesystem'. If so, store them: | ||
for item in filesystem.walk(): # pylint: disable=not-callable | ||
for unit_file in item.files: | ||
file_path = os.path.join(item.path, unit_file.name) | ||
with filesystem.open(file_path, 'rb') as fh: | ||
data = fh.read() | ||
self.static_files.append(StaticFile(name=unit_file.name, data=data, url=None)) | ||
# Apply some transformations to the OLX: | ||
self.transform_olx(olx_node, usage_id=block.scope_ids.usage_id) | ||
# Add <xblock-include /> tags for each child (XBlock XML export | ||
# normally puts children inline as e.g. <html> tags, but we want | ||
# references to them only.) | ||
if block.has_children: | ||
for child_id in block.children: | ||
# In modulestore, the "definition key" is a MongoDB ObjectID | ||
# kept in split's definitions table, which theoretically allows | ||
# the same block to be used in many places (each with a unique | ||
# usage key). However, that functionality is not exposed in | ||
# Studio (other than via content libraries). So when we import | ||
# into Blockstore, we assume that each usage is unique, don't | ||
# generate a usage key, and create a new "definition key" from | ||
# the original usage key. | ||
# So modulestore usage key | ||
# block-v1:A+B+C+type@html+block@introduction | ||
# will become Blockstore definition key | ||
# html+introduction | ||
# | ||
# If we needed the real definition key, we could get it via | ||
# child = block.runtime.get_block(child_id) | ||
# child_def_id = str(child.scope_ids.def_id) | ||
# and then use | ||
# <xblock-include definition={child_def_id} usage={child_id.block_id} /> | ||
def_id = blockstore_def_key_from_modulestore_usage_key(child_id) | ||
olx_node.append(olx_node.makeelement("xblock-include", {"definition": def_id})) | ||
# Store the resulting XML as a string: | ||
self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True) | ||
|
||
def serialize_html_block(self, block): | ||
""" | ||
Special case handling for HTML blocks | ||
""" | ||
olx_node = etree.Element("html") | ||
if block.display_name: | ||
olx_node.attrib["display_name"] = block.display_name | ||
olx_node.text = etree.CDATA("\n" + block.data + "\n") | ||
self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True) | ||
|
||
def transform_olx(self, olx_node, usage_id): | ||
""" | ||
Apply transformations to the given OLX etree Node. | ||
""" | ||
# Remove 'url_name' - we store the definition key in the folder name | ||
# that holds the OLX and the usage key elsewhere, so specifying it | ||
# within the OLX file is redundant and can lead to issues if the file is | ||
# copied and pasted elsewhere in the bundle with a new definition key. | ||
olx_node.attrib.pop('url_name', None) | ||
# Convert <vertical> to the new <unit> tag/block | ||
if olx_node.tag == 'vertical': | ||
olx_node.tag = 'unit' | ||
for key in olx_node.attrib.keys(): | ||
if key not in ('display_name', 'url_name'): | ||
log.warning( | ||
'<vertical> tag attribute "%s" will be ignored after conversion to <unit> (in %s)', | ||
key, | ||
str(usage_id) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
""" | ||
Test the OLX REST API adapters code | ||
""" | ||
import unittest | ||
|
||
from opaque_keys.edx.keys import CourseKey | ||
|
||
from openedx.core.djangoapps.olx_rest_api import adapters | ||
|
||
|
||
class TestAdapters(unittest.TestCase): | ||
""" | ||
Test the OLX REST API adapters code | ||
""" | ||
|
||
def test_rewrite_absolute_static_urls(self): | ||
""" | ||
Test that rewrite_absolute_static_urls() can find and replace all uses | ||
of absolute Studio URLs in a course. | ||
Some criteria: | ||
- Rewriting only happens if the course ID is the same. If the absolute | ||
URL points to a different course, the new /static/foo.png form won't | ||
work. | ||
""" | ||
# Note that this doesn't have to be well-formed OLX | ||
course_id = CourseKey.from_string("course-v1:TestCourse+101+2020") | ||
olx_in = """ | ||
<problem> | ||
<img src="https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@SCI_1.2_Image_.png"> | ||
<a href='https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@Québec.html'> | ||
View a file with accented characters in the filename. | ||
</a> | ||
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>. | ||
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png"> | ||
</problem> | ||
""" | ||
olx_expected = """ | ||
<problem> | ||
<img src="/static/SCI_1.2_Image_.png"> | ||
<a href='/static/Québec.html'> | ||
View a file with accented characters in the filename. | ||
</a> | ||
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>. | ||
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png"> | ||
</problem> | ||
""" | ||
olx_out = adapters.rewrite_absolute_static_urls(olx_in, course_id) | ||
self.assertEqual(olx_out, olx_expected) |
Oops, something went wrong.