Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REST API to export modulestore XBlocks as OLX #23068

Merged
merged 1 commit into from
Feb 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
129 changes: 129 additions & 0 deletions openedx/core/djangoapps/olx_rest_api/adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""
Helpers required to adapt to differing APIs
"""
from contextlib import contextmanager
import logging
import re

from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import AssetKey, CourseKey
from fs.memoryfs import MemoryFS
from fs.wrapfs import WrapFS

from static_replace import replace_static_urls
from xmodule.contentstore.content import StaticContent
from xmodule.assetstore.assetmgr import AssetManager
from xmodule.modulestore.django import modulestore as store
from xmodule.modulestore.exceptions import ItemNotFoundError
from xmodule.exceptions import NotFoundError
from xmodule.xml_module import XmlParserMixin

log = logging.getLogger(__name__)


def get_block(usage_key):
"""
Return an XBlock from modulestore.
"""
return store().get_item(usage_key)


def get_asset_content_from_path(course_key, asset_path):
"""
Locate the given asset content, load it into memory, and return it.
Returns None if the asset is not found.
"""
try:
asset_key = StaticContent.get_asset_key_from_path(course_key, asset_path)
return AssetManager.find(asset_key)
except (ItemNotFoundError, NotFoundError):
return None


def rewrite_absolute_static_urls(text, course_id):
"""
Convert absolute URLs like
https://studio-site.opencraft.hosting/asset-v1:LabXchange+101+2019+type@asset+block@SCI_1.2_Image_.png
to the proper
/static/SCI_1.2_Image_.png
format for consistency and portability.
"""
assert isinstance(course_id, CourseKey)
asset_full_url_re = r'https?://[^/]+/(?P<maybe_asset_key>[^\s\'"&]+)'

def check_asset_key(match_obj):
"""
If this URL's path part is an AssetKey from the same course, rewrite it.
"""
try:
asset_key = AssetKey.from_string(match_obj.group('maybe_asset_key'))
except InvalidKeyError:
return match_obj.group(0) # Not an asset key; do not rewrite
if asset_key.course_key == course_id:
return '/static/' + asset_key.path # Rewrite this to portable form
else:
return match_obj.group(0) # From a different course; do not rewrite

return re.sub(asset_full_url_re, check_asset_key, text)


def collect_assets_from_text(text, course_id, include_content=False):
"""
Yield dicts of asset content and path from static asset paths found in the given text.
Make sure to have replaced the URLs with rewrite_absolute_static_urls first.
If include_content is True, the result will include a contentstore
StaticContent file object which wraps the actual binary content of the file.
"""
# Replace static urls like '/static/foo.png'
static_paths = []
# Drag-and-drop-v2 has
# &quot;/static/blah.png&quot;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(sigh)... totally legit comment... but (sigh)

# which must be changed to "/static/blah.png" for replace_static_urls to work:
text2 = text.replace("&quot;", '"')
replace_static_urls(text=text2, course_id=course_id, static_paths_out=static_paths)
for (path, uri) in static_paths:
if path.startswith('/static/'):
path = path[8:]
info = {
'path': path,
'url': '/' + str(course_id.make_asset_key("asset", path)),
}
if include_content:
content = get_asset_content_from_path(course_id, path)
if content is None:
log.error("Static asset not found: (%s, %s)", path, uri)
else:
info['content'] = content
yield info


@contextmanager
def override_export_fs(block):
"""
Hack required for some legacy XBlocks which inherit
XModuleDescriptor.add_xml_to_node() instead of the usual
XmlSerializationMixin.add_xml_to_node() method.
This method temporarily replaces a block's runtime's
'export_fs' system with an in-memory filesystem.
This method also abuses the XmlParserMixin.export_to_file()
API to prevent the XModule export code from exporting each
block as two files (one .olx pointing to one .xml file).
The export_to_file was meant to be used only by the
customtag XModule but it makes our lives here much easier.
"""
fs = WrapFS(MemoryFS())
fs.makedir('course')
fs.makedir('course/static') # Video XBlock requires this directory to exists, to put srt files etc.

old_export_fs = block.runtime.export_fs
block.runtime.export_fs = fs
if hasattr(block, 'export_to_file'):
old_export_to_file = block.export_to_file
block.export_to_file = lambda: False
old_global_export_to_file = XmlParserMixin.export_to_file
XmlParserMixin.export_to_file = lambda _: False # So this applies to child blocks that get loaded during export
yield fs
block.runtime.export_fs = old_export_fs
if hasattr(block, 'export_to_file'):
block.export_to_file = old_export_to_file
XmlParserMixin.export_to_file = old_global_export_to_file
25 changes: 25 additions & 0 deletions openedx/core/djangoapps/olx_rest_api/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
"""
olx_rest_api Django application initialization.
"""
from django.apps import AppConfig

from openedx.core.djangoapps.plugins.constants import PluginURLs, ProjectType


class OlxRestApiAppConfig(AppConfig):
"""
Configuration for the olx_rest_api Django plugin application.
See: https://github.com/edx/edx-platform/blob/master/openedx/core/djangoapps/plugins/README.rst
"""

name = 'openedx.core.djangoapps.olx_rest_api'
verbose_name = 'Modulestore OLX REST API'
plugin_app = {
PluginURLs.CONFIG: {
ProjectType.CMS: {
# The namespace to provide to django's urls.include.
PluginURLs.NAMESPACE: 'olx_rest_api',
},
},
}
163 changes: 163 additions & 0 deletions openedx/core/djangoapps/olx_rest_api/block_serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""
Code for serializing a modulestore XBlock to OLX suitable for import into
Blockstore.
"""
import logging
import os
from collections import namedtuple

from lxml import etree

from . import adapters

log = logging.getLogger(__name__)

# A static file required by an XBlock
StaticFile = namedtuple('StaticFile', ['name', 'url', 'data'])


def blockstore_def_key_from_modulestore_usage_key(usage_key):
"""
In modulestore, the "definition key" is a MongoDB ObjectID kept in split's
definitions table, which theoretically allows the same block to be used in
many places (each with a unique usage key). However, that functionality is
not exposed in Studio (other than via content libraries). So when we import
into Blockstore, we assume that each usage is unique, don't generate a usage
key, and create a new "definition key" from the original usage key.
So modulestore usage key
block-v1:A+B+C+type@html+block@introduction
will become Blockstore definition key
html/introduction
"""
block_type = usage_key.block_type
if block_type == 'vertical':
# We transform <vertical> to <unit>
block_type = "unit"
return block_type + "/" + usage_key.block_id


class XBlockSerializer(object):
"""
This class will serialize an XBlock, producing:
(1) A new definition ID for use in Blockstore
(2) an XML string defining the XBlock and referencing the IDs of its
children (but not containing the actual XML of its children)
(3) a list of any static files required by the XBlock and their URL
"""

def __init__(self, block):
"""
Serialize an XBlock to an OLX string + supporting files, and store the
resulting data in this object.
"""
self.orig_block_key = block.scope_ids.usage_id
self.static_files = []
self.def_id = blockstore_def_key_from_modulestore_usage_key(self.orig_block_key)

# Special cases:
if self.orig_block_key.block_type == 'html':
self.serialize_html_block(block)
else:
self.serialize_normal_block(block)

course_key = self.orig_block_key.course_key
# Search the OLX for references to files stored in the course's
# "Files & Uploads" (contentstore):
self.olx_str = adapters.rewrite_absolute_static_urls(self.olx_str, course_key)
for asset in adapters.collect_assets_from_text(self.olx_str, course_key):
path = asset['path']
if path not in [sf.name for sf in self.static_files]:
self.static_files.append(StaticFile(name=path, url=asset['url'], data=None))

def serialize_normal_block(self, block):
"""
Serialize an XBlock to XML.

This method is used for every block type except HTML, which uses
serialize_html_block() instead.
"""
# Create an XML node to hold the exported data
olx_node = etree.Element("root") # The node name doesn't matter: add_xml_to_node will change it
# ^ Note: We could pass nsmap=xblock.core.XML_NAMESPACES here, but the
# resulting XML namespace attributes don't seem that useful?
with adapters.override_export_fs(block) as filesystem: # Needed for XBlocks that inherit XModuleDescriptor
# Tell the block to serialize itself as XML/OLX:
if not block.has_children:
block.add_xml_to_node(olx_node)
else:
# We don't want the children serialized at this time, because
# otherwise we can't tell which files in 'filesystem' belong to
# this block and which belong to its children. So, temporarily
# disable any children:
children = block.children
block.children = []
block.add_xml_to_node(olx_node)
block.children = children

# Now the block/module may have exported addtional data as files in
# 'filesystem'. If so, store them:
for item in filesystem.walk(): # pylint: disable=not-callable
for unit_file in item.files:
file_path = os.path.join(item.path, unit_file.name)
with filesystem.open(file_path, 'rb') as fh:
data = fh.read()
self.static_files.append(StaticFile(name=unit_file.name, data=data, url=None))
# Apply some transformations to the OLX:
self.transform_olx(olx_node, usage_id=block.scope_ids.usage_id)
# Add <xblock-include /> tags for each child (XBlock XML export
# normally puts children inline as e.g. <html> tags, but we want
# references to them only.)
if block.has_children:
for child_id in block.children:
# In modulestore, the "definition key" is a MongoDB ObjectID
# kept in split's definitions table, which theoretically allows
# the same block to be used in many places (each with a unique
# usage key). However, that functionality is not exposed in
# Studio (other than via content libraries). So when we import
# into Blockstore, we assume that each usage is unique, don't
# generate a usage key, and create a new "definition key" from
# the original usage key.
# So modulestore usage key
# block-v1:A+B+C+type@html+block@introduction
# will become Blockstore definition key
# html+introduction
#
# If we needed the real definition key, we could get it via
# child = block.runtime.get_block(child_id)
# child_def_id = str(child.scope_ids.def_id)
# and then use
# <xblock-include definition={child_def_id} usage={child_id.block_id} />
def_id = blockstore_def_key_from_modulestore_usage_key(child_id)
olx_node.append(olx_node.makeelement("xblock-include", {"definition": def_id}))
# Store the resulting XML as a string:
self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True)

def serialize_html_block(self, block):
"""
Special case handling for HTML blocks
"""
olx_node = etree.Element("html")
if block.display_name:
olx_node.attrib["display_name"] = block.display_name
olx_node.text = etree.CDATA("\n" + block.data + "\n")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bradenmacdonald Something I stumbled into while unit testing upstream_sync: this line causes html_block.data to be wrapped in newlines, if it's not wrapped already. Does that sound right? It doesn't change the meaning of the HTML, so I think it's fine?

Minimal repro:

# Boilerplate...
from organizations.api import ensure_organization
from organizations.models import Organization
from openedx.core.djangoapps.content_libraries import api as libs
from openedx.core.djangoapps.xblock import api as xblock
from django.contrib.auth.models import User
user = User.objects.get(username="openedx")
ensure_organization("TestX")
library = libs.create_library(
	org=Organization.objects.get(short_name="TestX"), slug="TestLib", title="Test Library"
)
block_key = libs.create_library_block(library.key, "html", "test-newlines").usage_key

# Initial block data: No newlines
block = xblock.load_block(block_key, user)
block.data = "<html><body>Hello</body></html>"
print(repr(block.data))  # '<html><body>Hello</body></html>'
block.save()

# First round-trip: data is wrapped in newlines
block = xblock.load_block(block_key, user)
print(repr(block.data))  # '\n<html><body>Hello</body></html>\n'
block.save()

# Second round-trip: data is stilled wrapped in newlines
# Fortunately, it didn't add another pair of newlines.
block = xblock.load_block(block_key, user)
print(repr(block.data))  # '\n<html><body>Hello</body></html>\n'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, yeah I don't think we need the newlines.

Copy link
Member

@kdmccormick kdmccormick Sep 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bradenmacdonald Actually, it's a little worse than I thought. As long as any block fields are edited, then it always adds a pair of newlines to .data, regardless of whether it's already wrapped in a pair.

print(repr(block.data))  # '\n<html><body>Hello</body></html>\n'
block.display_name = "blah"
block.save()

block = xblock.load_block(block_key, user)
print(repr(block.data))  # '\n\n<html><body>Hello</body></html>\n\n'

This still wouldn't break anything for end users AFAICT, but it'll make unit tests awkward, and it'll probably bother folks who edit OLX directly. I'll open a bug ticket. (EDIT: #35525)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll remove the newlines and see if any unit tests break.

self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True)

def transform_olx(self, olx_node, usage_id):
"""
Apply transformations to the given OLX etree Node.
"""
# Remove 'url_name' - we store the definition key in the folder name
# that holds the OLX and the usage key elsewhere, so specifying it
# within the OLX file is redundant and can lead to issues if the file is
# copied and pasted elsewhere in the bundle with a new definition key.
olx_node.attrib.pop('url_name', None)
# Convert <vertical> to the new <unit> tag/block
if olx_node.tag == 'vertical':
olx_node.tag = 'unit'
for key in olx_node.attrib.keys():
if key not in ('display_name', 'url_name'):
log.warning(
'<vertical> tag attribute "%s" will be ignored after conversion to <unit> (in %s)',
key,
str(usage_id)
)
49 changes: 49 additions & 0 deletions openedx/core/djangoapps/olx_rest_api/test_adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Test the OLX REST API adapters code
"""
import unittest

from opaque_keys.edx.keys import CourseKey

from openedx.core.djangoapps.olx_rest_api import adapters


class TestAdapters(unittest.TestCase):
"""
Test the OLX REST API adapters code
"""

def test_rewrite_absolute_static_urls(self):
"""
Test that rewrite_absolute_static_urls() can find and replace all uses
of absolute Studio URLs in a course.

Some criteria:
- Rewriting only happens if the course ID is the same. If the absolute
URL points to a different course, the new /static/foo.png form won't
work.
"""
# Note that this doesn't have to be well-formed OLX
course_id = CourseKey.from_string("course-v1:TestCourse+101+2020")
olx_in = """
<problem>
<img src="https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@SCI_1.2_Image_.png">
<a href='https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@Québec.html'>
View a file with accented characters in the filename.
</a>
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>.
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png">
</problem>
"""
olx_expected = """
<problem>
<img src="/static/SCI_1.2_Image_.png">
<a href='/static/Québec.html'>
View a file with accented characters in the filename.
</a>
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>.
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png">
</problem>
"""
olx_out = adapters.rewrite_absolute_static_urls(olx_in, course_id)
self.assertEqual(olx_out, olx_expected)
Loading