From d149f3dbaee44c52c434a5f9b0583a6f3f77b227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= Date: Wed, 9 Oct 2024 16:01:44 +0200 Subject: [PATCH] feat: improve executor capabilities --- executor/.dockerignore | 3 +- executor/Dockerfile | 38 ++++++++++++++++++-- executor/ffmpeg-wrapper | 2 ++ executor/requirements-skip.txt | 1 + executor/requirements.txt | 7 ++-- executor/server.rs | 64 ++++++++++++++++++++-------------- executor/sitecustomize.py | 13 +++++-- 7 files changed, 93 insertions(+), 35 deletions(-) create mode 100755 executor/ffmpeg-wrapper diff --git a/executor/.dockerignore b/executor/.dockerignore index e52a897..d0eff70 100644 --- a/executor/.dockerignore +++ b/executor/.dockerignore @@ -6,4 +6,5 @@ !sitecustomize.py !requirements.txt !requirements-skip.txt -!pandoc-wrapper \ No newline at end of file +!pandoc-wrapper +!ffmpeg-wrapper \ No newline at end of file diff --git a/executor/Dockerfile b/executor/Dockerfile index 51568a0..9a1af35 100644 --- a/executor/Dockerfile +++ b/executor/Dockerfile @@ -35,24 +35,48 @@ RUN apk add --no-cache git make gcc g++ musl-dev && \ make install FROM docker.io/alpine:${ALPINE_VERSION} AS runtime -RUN apk add --no-cache \ +RUN apk add --no-cache --repository=https://dl-cdn.alpinelinux.org/alpine/edge/testing \ + bash \ coreutils \ ffmpeg \ font-dejavu \ + ghostscript-fonts \ imagemagick \ + opencv \ pandoc-cli \ python3 \ + py3-ffmpeg \ + py3-ffmpeg-pyc \ py3-jinja2 \ + py3-jinja2-pyc \ py3-matplotlib \ + py3-matplotlib-pyc \ py3-matplotlib-venn \ + py3-matplotlib-venn-pyc \ + py3-moviepy \ + py3-moviepy-pyc \ py3-numpy \ + py3-numpy-pyc \ + py3-opencv \ + py3-opencv-pyc \ py3-pandas \ + py3-pandas-pyc \ + py3-pikepdf \ + py3-pikepdf-pyc \ py3-pillow \ + py3-pillow-pyc \ py3-pip \ + py3-pip-pyc \ + py3-pypandoc \ + py3-pypandoc-pyc \ py3-scipy \ + py3-scipy-pyc \ py3-sympy \ + py3-sympy-pyc \ py3-tabulate \ + py3-tabulate-pyc \ py3-xarray \ + py3-xarray-pyc \ weasyprint RUN mkdir /workspace && chmod 777 /workspace WORKDIR /workspace @@ -61,13 +85,21 @@ USER 1001050000 # Python COPY requirements.txt requirements-skip.txt / RUN python -m venv --system-site-packages /tmp/venv && /tmp/venv/bin/pip install --no-cache-dir -r /requirements.txt -COPY sitecustomize.py /venv/lib/python3.12/site-packages +COPY sitecustomize.py /tmp/venv/lib/python3.12/site-packages ENV PATH="/tmp/venv/bin:$PATH" \ - MPLCONFIGDIR="/tmp/mplconfigdir" + MPLCONFIGDIR="/tmp/mplconfigdir" \ + XDG_CACHE_HOME="/tmp/.cache" \ + HOME="/tmp/home" # Pandoc COPY pandoc-wrapper /usr/local/bin/pandoc +# ffmpeg +COPY ffmpeg-wrapper /usr/local/bin/ffmpeg + +# xonsh +ENV XONSH_SHOW_TRACEBACK=True + # UPM COPY --from=build-upm /go/bin/upm /usr/local/bin/upm COPY --from=build-upm /workdir/upm/internal/backends/python/pypi_map.sqlite /pypi_map.sqlite diff --git a/executor/ffmpeg-wrapper b/executor/ffmpeg-wrapper new file mode 100755 index 0000000..25d9bae --- /dev/null +++ b/executor/ffmpeg-wrapper @@ -0,0 +1,2 @@ +#!/bin/sh +exec /usr/bin/ffmpeg -hide_banner -loglevel error "$@" diff --git a/executor/requirements-skip.txt b/executor/requirements-skip.txt index 9bc3c51..3ffb867 100644 --- a/executor/requirements-skip.txt +++ b/executor/requirements-skip.txt @@ -1 +1,2 @@ ffmpeg # actually ffmpeg-python +opencv-python # installed through OS package \ No newline at end of file diff --git a/executor/requirements.txt b/executor/requirements.txt index 1cab161..0c66314 100644 --- a/executor/requirements.txt +++ b/executor/requirements.txt @@ -3,14 +3,15 @@ jinja2 matplotlib matplotlib-venn numpy +moviepy pandas[excel,plot,output-formatting,computation] pillow -pip pypandoc -PyPDF2 +pikepdf python-docx python-pptx scipy sympy tabulate -xarray \ No newline at end of file +xarray +xonsh diff --git a/executor/server.rs b/executor/server.rs index bbd8e6c..445febb 100644 --- a/executor/server.rs +++ b/executor/server.rs @@ -30,6 +30,7 @@ use tokio::process::Command; #[derive(Serialize, Deserialize)] struct ExecuteRequest { + language: String, source_code: String, timeout: Option, } @@ -129,40 +130,50 @@ async fn get_file_hashes(dir: &str) -> HashMap { hashes } -async fn execute_python(payload: web::Json) -> Result { +async fn execute(payload: web::Json) -> Result { let workspace = env::var("APP_WORKSPACE").unwrap_or_else(|_| "/workspace".to_string()); let before_hashes = get_file_hashes(&workspace).await; let source_dir = TempDir::new()?; - tokio::fs::write(source_dir.path().join("script.py"), &payload.source_code).await?; - let guessed_deps = String::from_utf8_lossy( - &Command::new("upm") - .arg("guess") - .current_dir(source_dir.path()) - .output() - .await? - .stdout, - ).trim().to_string(); - - let new_deps: Vec<&str> = guessed_deps - .split_whitespace() - .filter(|dep| !REQUIREMENTS.contains(*dep)) - .collect(); - - if !new_deps.is_empty() { - Command::new("pip") - .arg("install") - .arg("--no-cache-dir") - .args(&new_deps) - .output() - .await?; + let (interpreter, file_extension) = match payload.language.to_lowercase().as_str() { + "python" => ("xonsh", "py"), // TODO: replace xonsh with python once not needed + "bash" => ("bash", "sh"), + _ => return Err(actix_web::error::ErrorBadRequest("Unsupported language")), + }; + + let script_path = source_dir.path().join(format!("script.{}", file_extension)); + tokio::fs::write(&script_path, &payload.source_code).await?; + + if interpreter == "python" { + let guessed_deps = String::from_utf8_lossy( + &Command::new("upm") + .arg("guess") + .current_dir(source_dir.path()) + .output() + .await? + .stdout, + ).trim().to_string(); + + let new_deps: Vec<&str> = guessed_deps + .split_whitespace() + .filter(|dep| !REQUIREMENTS.contains(*dep)) + .collect(); + + if !new_deps.is_empty() { + Command::new("pip") + .arg("install") + .arg("--no-cache-dir") + .args(&new_deps) + .output() + .await?; + } } let timeout = Duration::from_secs(payload.timeout.unwrap_or(60)); let (stdout, stderr, exit_code) = tokio::time::timeout( timeout, - Command::new("python") - .arg(source_dir.path().join("script.py")) + Command::new(interpreter) + .arg(&script_path) .output(), ) .await @@ -176,6 +187,7 @@ async fn execute_python(payload: web::Json) -> Result std::io::Result<()> { .wrap(Logger::default()) .route("/workspace/{path:.*}", web::put().to(upload_file)) .route("/workspace/{path:.*}", web::get().to(download_file)) - .route("/execute", web::post().to(execute_python)) + .route("/execute", web::post().to(execute)) }) .bind(&listen_addr)? .run() diff --git a/executor/sitecustomize.py b/executor/sitecustomize.py index 06e9d63..304164b 100644 --- a/executor/sitecustomize.py +++ b/executor/sitecustomize.py @@ -1,4 +1,13 @@ -import matplotlib.pyplot - # LLM tends to generate `.show()` which does not work in a headless environment +import matplotlib.pyplot matplotlib.pyplot.show = lambda *_args, **_kwargs: matplotlib.pyplot.savefig("plot.png") + +# Disable progressbar for MoviePy which fills up the context window +import moviepy.editor + +old_moviepy_editor_VideoClip_write_videofile = moviepy.editor.VideoClip.write_videofile +moviepy.editor.VideoClip.write_videofile = ( + lambda self, *args, **kwargs: old_moviepy_editor_VideoClip_write_videofile( + self, *args, verbose=False, logger=None, **kwargs + ) +) \ No newline at end of file