From 8466026d7af08cec7cce0d81b3b699b4bf02b6e1 Mon Sep 17 00:00:00 2001 From: weqian Date: Thu, 25 Oct 2018 10:25:53 -0700 Subject: [PATCH] # This is a combination of 6 commits. # This is the 1st commit message: Init # This is the commit message #2: Init # This is the commit message #3: [#34] Updated documentation and added CONTRIBUTING.md (#45) # This is the commit message #4: Update scheduler.py # This is the commit message #5: Initialize Github and Scheduler Extensions # This is the commit message #6: g --- build/extension_init.sh | 14 + docs/CONTRIBUTING.md | 45 ++ docs/getting-started/build-ppextensions.md | 9 +- docs/index.md | 10 +- docs/ppextensions-config-ui/config-ui.md | 3 + .../github-integration.md | 103 ++++- docs/ppextensions-scheduler/scheduler.md | 101 ++++- .../extensions/extension_logger/__init__.py | 0 .../extension_logger/extension_logger.py | 21 + ppextensions/extensions/github/__init__.py | 0 ppextensions/extensions/github/github.py | 230 ++++++++++ .../extensions/github/static/github.js | 204 +++++++++ .../extensions/github/static/githubcommit.js | 74 ++++ .../extensions/github/static/githubmain.js | 43 ++ ppextensions/extensions/scheduler/__init__.py | 0 .../extensions/scheduler/scheduler.py | 226 ++++++++++ .../extensions/scheduler/static/daginfo.html | 66 +++ .../extensions/scheduler/static/editdag.html | 147 +++++++ .../extensions/scheduler/static/scheduler.js | 204 +++++++++ .../scheduler/static/schedulermain.js | 67 +++ .../scheduler/template/dag_template.py | 82 ++++ .../scheduler/template/var_template.conf | 9 + .../__pycache__/github.cpython-35.pyc | Bin 0 -> 11325 bytes .../nbextension/extension/scheduler.py | 413 ++++++++++++++++++ .../nbextension/extension/template.html | 12 + .../nbextension/nbextension.egg-info/PKG-INFO | 15 + .../nbextension.egg-info/SOURCES.txt | 7 + .../nbextension.egg-info/dependency_links.txt | 1 + .../nbextension.egg-info/requires.txt | 3 + .../nbextension.egg-info/top_level.txt | 1 + ppextensions/nbextension/static/daginfo.html | 71 +++ ppextensions/nbextension/static/editdag.html | 167 +++++++ ppextensions/nbextension/static/scheduler.js | 241 ++++++++++ .../nbextension/static/schedulermain.js | 87 ++++ ppextensions/nbextension/static/template.html | 12 + setup.py | 6 +- 36 files changed, 2680 insertions(+), 14 deletions(-) create mode 100644 build/extension_init.sh create mode 100644 docs/CONTRIBUTING.md create mode 100644 docs/ppextensions-config-ui/config-ui.md create mode 100644 ppextensions/extensions/extension_logger/__init__.py create mode 100644 ppextensions/extensions/extension_logger/extension_logger.py create mode 100644 ppextensions/extensions/github/__init__.py create mode 100644 ppextensions/extensions/github/github.py create mode 100644 ppextensions/extensions/github/static/github.js create mode 100644 ppextensions/extensions/github/static/githubcommit.js create mode 100644 ppextensions/extensions/github/static/githubmain.js create mode 100644 ppextensions/extensions/scheduler/__init__.py create mode 100644 ppextensions/extensions/scheduler/scheduler.py create mode 100644 ppextensions/extensions/scheduler/static/daginfo.html create mode 100644 ppextensions/extensions/scheduler/static/editdag.html create mode 100644 ppextensions/extensions/scheduler/static/scheduler.js create mode 100644 ppextensions/extensions/scheduler/static/schedulermain.js create mode 100644 ppextensions/extensions/scheduler/template/dag_template.py create mode 100644 ppextensions/extensions/scheduler/template/var_template.conf create mode 100644 ppextensions/nbextension/__pycache__/github.cpython-35.pyc create mode 100644 ppextensions/nbextension/extension/scheduler.py create mode 100644 ppextensions/nbextension/extension/template.html create mode 100644 ppextensions/nbextension/nbextension.egg-info/PKG-INFO create mode 100644 ppextensions/nbextension/nbextension.egg-info/SOURCES.txt create mode 100644 ppextensions/nbextension/nbextension.egg-info/dependency_links.txt create mode 100644 ppextensions/nbextension/nbextension.egg-info/requires.txt create mode 100644 ppextensions/nbextension/nbextension.egg-info/top_level.txt create mode 100644 ppextensions/nbextension/static/daginfo.html create mode 100644 ppextensions/nbextension/static/editdag.html create mode 100644 ppextensions/nbextension/static/scheduler.js create mode 100644 ppextensions/nbextension/static/schedulermain.js create mode 100644 ppextensions/nbextension/static/template.html diff --git a/build/extension_init.sh b/build/extension_init.sh new file mode 100644 index 0000000..c03e879 --- /dev/null +++ b/build/extension_init.sh @@ -0,0 +1,14 @@ +#!/bin/sh +cd ppextensions/extensions/github +jupyter nbextension install static +cd ../scheduler +jupyter nbextension install static + +jupyter nbextension enable static/github --section='tree' +jupyter nbextension enable static/githubmain --section='tree' +jupyter nbextension enable static/githubcommit --section='notebook' +jupyter nbextension enable static/schedulermain --section='tree' +jupyter nbextension enable static/scheduler --section='tree' + +jupyter serverextension enable --user ppextensions.extensions.github.github +jupyter serverextension enable --user ppextensions.extensions.scheduler.scheduler diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 0000000..2b2b4b9 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,45 @@ + +# Contributing to PPExtensions + +## Discussions + +Our recommendation is to start a slack discussion as soon as you have an idea for contributing to PPExtensions. +This will help you, other interested contributors & the committers get to common grounds early in time. +Contact the PPExtenions community on [slack](https://join.slack.com/t/ppextensions/shared_invite/enQtNDIyODk5NzYzMzEyLTIwOGM3MWE0OGZlNjFkYTUxZTJiN2NjOWFlNmUxNDRiY2U3MzE0Nzg5NDRjZjE2M2VmZGI4NWJhOGVjYTRiMTk). + +-------------------------------------------------------------------------------------------------------------------- + +## How can you help + +### Code +Look for Issues that are open on below categories, pick an issue, start a slack discussion on slack channel, once we get to common grounds on the solution approach, open a PR with your implementation. +* Bug Fixes +* Enhancements & Improvements (Jira Details) +* Add a new extension that you see are useful to Jupyter community + +### Documentation +You can also improve our documentation +* readme.md +* docs/*.md +* Any other valuable comment you'd like to add to the code that will simplify other developers' lives. + +-------------------------------------------------------------------------------------------------------------------- + + +## Contribution Process + +* Get your github account. +* Fork the PPExtensions repo into your account. +* Create an issue branch using the master branch. +* Make modifications to the code. +* Ensure code coverage by added test cases. +* All commits must have the issue ID & summary. Say "[#32] Add Codacy Integration and Badge". +* Ensure all your commits are squashed. +* Make a Pull Request to develop branch. +* If there are code review related changes - ensure those commits are also squashed. +* DO NOT include changes that are not directly related to the issue you are trying to resolve. +* Once PR is approved, code will be merged to Development branch. +* Once all regression test cases have passed - changes will be merged to master branch. + +-------------------------------------------------------------------------------------------------------------------- + diff --git a/docs/getting-started/build-ppextensions.md b/docs/getting-started/build-ppextensions.md index ee02985..333e457 100644 --- a/docs/getting-started/build-ppextensions.md +++ b/docs/getting-started/build-ppextensions.md @@ -1,15 +1,16 @@ ## Install -```buildoutcfg +``` pip install ppextensions ``` + ## Try ppextensions ``` %load_ext ppextensions.ppmagics ``` Try help to see all the available options -```buildoutcfg -%%help? +``` +%help ``` | Magic | Usage | Explanation | @@ -24,4 +25,4 @@ Try help to see all the available options For more info: -[Github Link](git@github.com:paypal/PPExtensions.git) +[Github Link](https://github.com/paypal/PPExtensions/) diff --git a/docs/index.md b/docs/index.md index eaad922..12025d1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -5,10 +5,10 @@ PPExtensions is a suite of ipython and jupyter extensions built to improve user # Features -- **PPMagics** - Set of magics to simplify access to different storage systems and tableau. -- **Github Integration** - A jupyter extension to integrate notebooks with github. This extension simplifies version controlling, sharing and resolving merge conflicts of notebooks. -- **Notebooks Scheduling** - A jupyter extension to productionalize the notebooks development environment. This extension enables scheduling notebooks with help of [airflow](https://airflow.apache.org/). -- **Config UI** - A simple UI built to change the configurations of different extensions like PPMagic, [sparkmagic](https://github.com/jupyter-incubator/sparkmagic) ..etc. +- **[PPMagics](ppmagics/csv.md)** - Set of magics to simplify access to different storage systems and tableau. +- **[Github Integration](ppextensions-github-integration/github-integration.md)** - A jupyter extension to integrate notebooks with github. This extension simplifies version controlling, sharing and resolving merge conflicts of notebooks. +- **[Notebooks Scheduling](ppextensions-scheduler/scheduler.md)** - A jupyter extension to productionalize the notebooks development environment. This extension enables scheduling notebooks with help of [airflow](https://airflow.apache.org/). +- **[Config UI](ppextensions-config-ui/config-ui.md)** - A simple UI built to change the configurations of different extensions like PPMagic, [sparkmagic](https://github.com/jupyter-incubator/sparkmagic) ..etc. # Installation @@ -33,6 +33,6 @@ PPExtensions is a suite of ipython and jupyter extensions built to improve user # Questions -* [Slack](https://ppextensions.slack.com) +* [Slack](https://join.slack.com/t/ppextensions/shared_invite/enQtNDIyODk5NzYzMzEyLTIwOGM3MWE0OGZlNjFkYTUxZTJiN2NjOWFlNmUxNDRiY2U3MzE0Nzg5NDRjZjE2M2VmZGI4NWJhOGVjYTRiMTk) * [User Forum](https://groups.google.com/d/forum/ppextensions) * [Developer Forum](https://groups.google.com/d/forum/ppextensions) diff --git a/docs/ppextensions-config-ui/config-ui.md b/docs/ppextensions-config-ui/config-ui.md new file mode 100644 index 0000000..471928d --- /dev/null +++ b/docs/ppextensions-config-ui/config-ui.md @@ -0,0 +1,3 @@ +## Config UI + +Coming Soon \ No newline at end of file diff --git a/docs/ppextensions-github-integration/github-integration.md b/docs/ppextensions-github-integration/github-integration.md index 8c620ae..87fe989 100644 --- a/docs/ppextensions-github-integration/github-integration.md +++ b/docs/ppextensions-github-integration/github-integration.md @@ -1,3 +1,102 @@ -# GitHub Integration +# Github Integration -Coming soon \ No newline at end of file +# About +A Jupyter extension to integrate notebooks with Github. This extension simplifies version controlling, sharing and resolving merge conflicts of notebooks. + +# Getting Started + +Install Github Extension +--- +~~~ +cd PPExtension/ppextension/extensions/github +jupyter nbextension install static +jupyter nbextension enable static/github --user --section=tree +jupyter nbextension enable static/githubmain --user --section=tree +jupyter nbextension enable static/githubcommit --user --section=notebook +jupyter serverextension enable --user ppextensions.extensions.github.github +~~~ + +Alternatively, if you want to install all extensions in ppextension module +~~~ +bash PPExtension/build/extension_init.sh +~~~ + +This command will automatically install all frontend and backend Jupyter extensions we provide. + +Setup +--- + +**Register private Github token:** Go to Github website, click `Settings` --> `Developer settings` --> `Personal access tokens` --> `Generate new token`, copy the new token and export that as an environment variable. +~~~ +export githubtoken= +~~~ + +Notice if the token is replaced, all local repo will be "unlinked" to remote. +**Enable git merge driver:** +To show conflict in notebook, a nbmerge driver from nbdime module should be enabled as well. +~~~ +git-nbmergedriver config --enable --global +~~~ + +**(Optional) Initialize a Github repo for notebooks** + +If you want to create a separate repo for sharing the notebooks, go to github website and create a new repo, be sure to create a README as well in order to initialize the master branch, otherwise when you pull the repo, there will be a "master branch not found" error. + +**(Optional) Use an existing Github repo for sharing the notebooks** +Either push to or pull from that repo will create a local workspace in Private Sharing folder in the notebook startup folder. + +Push to Github +--- +**Push a single notebook to Github:** Select the notebook to be pushed, click `Sharing` --> `Push to Github`, select the repo, branch and type commit messages in the popup, and click on `Push`. + +When you push a notebook outside the `Sharing` folder, the notebook will be moved under `Sharing//` path, and the be pushed to Github. +When you push a notebook inside the `Sharing` folder, only the "Linked" repo in the dropdown will display in the dropdown. + +In the following situation, the push command will fail. + +***During a merge:*** Cannot do partial commit during a merge, please choose commit all notebooks option and push. Notice: this operation will push all other notebooks in this repo! + +***There is a conflict:*** Updates were rejected because the remote contains work that you do not have locally. Please do git pull and fix the possible conflicts before pushing again! + +**Push a folder to Github:** Select the folder, click on `Sharing` --> `Push to Github`, select the repo, branch and type commit messages in the popup, and click on `Push`. + +When you push a folder outside the `Sharing` folder, that entire folder will be moved under "Sharing/" path, and then be pushed to Github. + + +Pull from Github +--- +Click on `Sharing` --> `Pull from Github`, copy the Github repo url and paste that in the input area, then click on `Pull`. + +In the following situations, the pull command will fail. + +***During a merge:*** You have not conclued your merge(MERGE_HEAD exists). Please, commit your changes before you can merge. + +***There is a conflict:*** Auto-mergeing **.ipynb. CONFLICT(content): Merge conflict in **.ipynb. Automatic merge failed; fix conflicts and then commit the result. + +***Untracked notebook in local:*** Your local changes to the following files would be overwritten by merge: xx.ipynb. Please, commit your changes or stash them before you can merge. Aborting. + +Commit +--- +Open up a notebook, click on the Github icon in the tool bar. There are two types of commit: + +**Commit one notebook:** This option will be used in most cases. +In the following situations, this command will fail. + +***Worktree clean, nothing to commit*** + +***There are other untracked/uncommitted notebooks:*** Nothing committed but untracked files presented. + +***During a merge:***Cannot do partial commit during a merge, please choose commit all notebooks option and push. Notice: this operation will push all other notebooks in this repo! + +**Commit all notebooks in the same folder:** This option will only be used when a merge conflict is fixed. + + +Conflict Fix +--- +When you pull from Github and you local commit is different from remote commit, a conflict will be generated, if the conflict cannot be automatically resolved, you should fix the conflicts. + +In the error message, the conflicting files will be displayed. + +Notice: The merge-driver is depending on nbdime module, while it is working well in identifying "cell level" conflicts, it doe not fully support "notebook level" merging. Therefore, it is not guaranteed that a "notebook level" conflict (such as a deleted cell/added cell) will be identified in 100 percent correctness. Before the improved ndime module is released, we would recommend the user to keep the number of cells unchanged in a collaborative circumstance. + +To commit, first click on the Github icon in the notebook toolbar, choose either `Commit this notebook only` or `Commit all notebooks in this folder`, then click on `Commit`. \ No newline at end of file diff --git a/docs/ppextensions-scheduler/scheduler.md b/docs/ppextensions-scheduler/scheduler.md index debfab1..e3f3389 100644 --- a/docs/ppextensions-scheduler/scheduler.md +++ b/docs/ppextensions-scheduler/scheduler.md @@ -1,3 +1,102 @@ # Scheduler -Coming soon \ No newline at end of file +# About +A Jupyter extension to productionalize your notebooks by scheduling them to run in the background + +# Getting Started + +Install Scheduler Extension +--- +~~~ +cd PPExtension/ppextension/extensions/scheduler +jupyter nbextension install static +jupyter nbextension enable static/scheduler --user --section=tree +jupyter nbextension enable static/schedulermain --user --section=tree +jupyter serverextension enable --user ppextensions.extensions.scheduler.scheduler +~~~ + +Alternatively, if you want to install all extensions in ppextension module +~~~ +bash PPExtension/build/extension_init.sh +~~~ + +This command will automatically install all frontend and backend Jupyter extensions we provide. + +Pre Requisites +--- +**Configure Airflow** +~~~ +export AIRFLOW_HOME= +~~~ + +Run airflow in command line, a `airflow.cfg` file will be generated in airflow home. Here are a list of paramenter needs to be changed. + +~~~ +dags_folder = /dags +executor = LocalExecutor +sql_alchemy_conn = mysql+mysqlconnector:://:@:/airflow +dags_are_paused_at_creation = False (recommended) +load_examples = False (recommended) +~~~ + +Create a `dags` and a `variables` folder in airflow home to store the dag files and their related vairable files. + +**Setup MySQL** + +Create a database `airflow` in mysql. This servers as the metadata db for airflow. + +Setup +--- +Here are a few preparations to make scheduler extension work. The Pre-req steps can be skipped with those are already configured. + +**Export Path Variables** +~~~ +export AIRFLOW_METADATA_CONNECTION_STRING='mysql+mysqlconnector://:@:/airflow' +~~~ + +**Start Airflow Scheduler, Webserver** + +In this tutorial, we are using airflow LocalExecutor, hence airflow worker is not required. But if you are using some other executors like CeleryExecutor, then the airflow worker should also be started. + +~~~ +airflow webserver +airflow scheduler +~~~ + +By default, the log files will be generated in airflow_home, you can configure that as well. Refer to https://airflow.apache.org/howto/write-logs.html. + +After everything is settled, source the profile and start Jupyter notebook. + +Schedule Notebook +--- +To schedule a notebook, first select a notebook, click on the `schedule` button apeared in the dynamic tool bar, a scheduler menu will pop up. + +Currently scheduler extension provides the following configurable dag parameters: + +***Interval:*** Three different scales of frequency are provided: hourly, daily and weekly. + +***Start Time/Date:*** The start time/date can not be ealier than current time. + +***Number of Runs:*** The number of runs the job should be executed. For example, if a job is scheduled to at `12:00AM 11/11/2018` with an interval of `1 hour`, and the number of runs is set to 5 times, then the job will be ended at `5:00 AM 11/11/2018`. + +***Emails:*** To receive failure email and success email, check the box and input the email address in the input area. + +To receive the email alert, the STMP server should be setup in the host machine and corresponding parameters in `airflow.cfg` `[smtp]` section need to be configured. + +Click on `Schedule` button, the job will be displayed in `Scheduled Jobs` tab, from which you can see the **Last Run Time**, **Last Run Time**, **Last Run Duration**, **Next Scheduled Run** of each job scheduled. Notice, there will be some delay in the airflow UI to show the job. + +Edit Job +--- +To edit a job, go to the `Scheduled Jobs` tab, click on `Edit` button in `Action` column of the target job, the current configuration of that job except number of runs will be displayed in the configuration menu as default values. Change the configuration and hit on `Confirm Edit` button, the changes will be applied to the job. + + +Delete Job +--- +To delete a job, go to the `Scheduled Jobs` tab, click on `Remove` button in `Action` column of the target job, the dag/vairable file of the related job as well as the records in the metadata db will be removed. + + + + + + + diff --git a/ppextensions/extensions/extension_logger/__init__.py b/ppextensions/extensions/extension_logger/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ppextensions/extensions/extension_logger/extension_logger.py b/ppextensions/extensions/extension_logger/extension_logger.py new file mode 100644 index 0000000..eb747ca --- /dev/null +++ b/ppextensions/extensions/extension_logger/extension_logger.py @@ -0,0 +1,21 @@ +import logging +from pathlib import Path + +logger_name = "extension_logger" +logger = logging.getLogger(logger_name) +logger.setLevel(logging.DEBUG) + +log_path = str(Path.home()) + "/.extensionlog.log" +fh = logging.FileHandler(log_path) +fh.setLevel(logging.DEBUG) + +sh = logging.StreamHandler() +sh.setLevel(logging.DEBUG) + +fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +formatter = logging.Formatter(fmt) + +fh.setFormatter(formatter) +sh.setFormatter(formatter) +logger.addHandler(fh) +logger.addHandler(sh) diff --git a/ppextensions/extensions/github/__init__.py b/ppextensions/extensions/github/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ppextensions/extensions/github/github.py b/ppextensions/extensions/github/github.py new file mode 100644 index 0000000..4835e4a --- /dev/null +++ b/ppextensions/extensions/github/github.py @@ -0,0 +1,230 @@ +from notebook.utils import url_path_join +from notebook.base.handlers import IPythonHandler +from git import Repo, exc, GitCommandError +from shutil import move +from urllib.parse import urlparse, unquote +from ppextensions.extensions.extension_logger.extension_logger import logger + +import requests +import json +import os + +GITHUB_URL_PREFIX = "https://github.com/" +GITHUB_API_PREFIX = "https://api.github.com" +GITHUB_TOKEN = os.getenv('githubtoken','') +NOTEBOOK_STARTUP_PATH = os.getcwd() + "/" +LOCAL_REPO_FOLDER = "Sharing" +LOCAL_REPO_PREFIX = NOTEBOOK_STARTUP_PATH + LOCAL_REPO_FOLDER + +class PrivateGitHandler(IPythonHandler): + """ + The base class that has all functions used in private sharing backend handlers. + """ + + def error_handler(self, err, iserr=True): + err = err.replace("\n", "
").replace("\t", " ") + logger.error(err) + if iserr: + self.set_status(500) + self.finish(err) + + @staticmethod + def git_clone(local_repo_path, repo_url): + try: + repo_instance = Repo(local_repo_path) + except exc.NoSuchPathError: + o = urlparse(repo_url) + repo_url_with_token = o.scheme + "://" + GITHUB_TOKEN + "@" + o.hostname + o.path + Repo.clone_from(repo_url_with_token, local_repo_path) + with open(local_repo_path + "/.gitignore", "a") as f: + f.write("\n.*\n.gitignore") + repo_instance = Repo(local_repo_path) + return repo_instance + + def git_commit(self, from_path, to_path, file_name, repo_instance, commit_message): + try: + move(from_path, to_path) + except Exception as e: + self.error_handler(str(e)) + git_instance = repo_instance.git + git_instance.add("--a") + if not os.path.isdir(to_path): + git_instance.commit("-o", "{}".format(file_name), "-m", commit_message) + else: + git_instance.commit("-m", commit_message) + + @staticmethod + def git_commit_inside(file_name, repo_instance, commit_message, option): + git_instance = repo_instance.git + git_instance.add('--a') + if option == "single": + git_instance.commit("-o", "{}".format(file_name), "-m", commit_message) + else: + git_instance.commit("-m", commit_message) + + @staticmethod + def get_repo(file_path): + repos = dict() + headers = {'Authorization': 'token ' + GITHUB_TOKEN} + parts = file_path.split("/") + if parts[0] == LOCAL_REPO_FOLDER: + repo_name = parts[1] + "/" + parts[2] + branch = requests.get(GITHUB_API_PREFIX + '/repos/' + repo_name + '/branches', headers=headers) + if len(branch.json()) == 0: + repos[repo_name] = ['master'] + else: + repos[repo_name] = [br['name'] for br in branch.json()] + else: + params = {'affiliation': "owner", "per_page": 100, "sort": "full_name"} + repo = requests.get(GITHUB_API_PREFIX + '/user/repos', headers=headers, params=params).json() + for rp in repo: + repo_name = rp['full_name'] + branch = requests.get(GITHUB_API_PREFIX + '/repos/' + repo_name + '/branches', headers=headers) + if len(branch.json()) == 0: + repos[repo_name] = ['master'] + else: + repos[repo_name] = [br['name'] for br in branch.json()] + return json.dumps(repos) + + +class PrivateGitGetRepoHandler(PrivateGitHandler): + """ + Get the accessible github repos and display them in the dropdown in github push menu + """ + + def get(self): + file_path = self.get_argument("filepath", "") + try: + repos = self.get_repo(file_path) + self.finish(repos) + except Exception as e: + self.error_handler(str(e)) + + +class PrivateGitPushHandler(PrivateGitHandler): + """ + Private sharing handler to push a notebook or a folder to remote repo. + Step1: Git Clone (If necessary) + Step2: Git Commit + Step3: Git Push + """ + + def post(self): + repo_name = unquote(self.get_argument("repo")) + branch = self.get_argument("branch") + commit_message = self.get_argument("msg") + file_path = unquote(self.get_argument("filepath")) + file_name = unquote(self.get_argument("filename")) + repo_url = GITHUB_URL_PREFIX + repo_name + ".git" + local_repo_path = LOCAL_REPO_PREFIX + "/" + repo_name + if not file_path.startswith(LOCAL_REPO_FOLDER): + local_repo_file_path = local_repo_path + "/" + file_path + else: + local_repo_file_path = NOTEBOOK_STARTUP_PATH + file_path + repo_instance = self.git_clone(local_repo_path, repo_url) + try: + self.git_commit(NOTEBOOK_STARTUP_PATH + file_path, local_repo_file_path, file_name, repo_instance, + commit_message) + except GitCommandError as e: + if e.status == 128: + self.error_handler("Cannot do partial commit during a merge, please choose commit all notebooks option " + "and push. Notice: this operation will push all other notebooks in this repo!") + elif e.status == 1: + self.error_handler(e.stdout, iserr=False) + else: + self.error_handler(e.stderr, iserr=False) + try: + push_info = repo_instance.remote().push("master:" + branch) + assert push_info[0].flags in [512, 256, 2, 1] + self.finish(file_name + " has been successfully pushed! ") + except AssertionError as e: + if push_info[0].flags == 1032: + self.error_handler("Updates were rejected because the remote contains work that you do not have " + "locally. Please do git pull and fix the possible conflicts before pushing again!") + except GitCommandError as e: + self.error_handler(push_info[0].summary) + + +class PrivateGitPullHandler(PrivateGitHandler): + """ + Private Sharing handler to pull a notebook or an entire repo to local. + If there is a conflict, it will show the conflict in notebook and ask the user to fix. + """ + + def post(self): + github_repo_url = unquote(self.get_argument("github_repo_url")) + o = urlparse(github_repo_url) + if o.path.endswith(".git"): + repo = o.path.strip(".git") + branch = "master" + repo_url = github_repo_url + else: + split_word = "/blob/" if "/blob/" in o.path else "/tree/" + if split_word in o.path: + repo, path = o.path.split(split_word) + branch = path.split("/")[0] + repo_url = github_repo_url.split(split_word)[0] + ".git" + else: + repo = o.path + branch = "master" + repo_url = github_repo_url + ".git" + local_repo_path = LOCAL_REPO_PREFIX + repo + try: + repo_instance = self.git_clone(local_repo_path, repo_url) + git_instance = repo_instance.git + git_instance.pull("origin", branch) + self.finish("Successfully pulled to Sharing" + repo) + except GitCommandError as e: + if "conflict" in e.stdout: + self.error_handler(e.stdout) + else: + self.error_handler(e.stderr) + + +class PrivateGitCommitHandler(PrivateGitHandler): + """ + GitCommit handler used by the git commit button in notebook toolBar + """ + + def post(self): + repo_name = unquote(self.get_argument("repo")) + file_name = unquote(self.get_argument("filename")) + option = self.get_argument("option") + commit_message = "Commit from PayPal Notebook" + local_repo_path = NOTEBOOK_STARTUP_PATH + repo_name + try: + repo_instance = Repo(local_repo_path) + except exc.InvalidGitRepositoryError as e: + self.error_handler(str(e)) + try: + self.git_commit_inside(file_name, repo_instance, commit_message, option) + self.finish("Commit Success!") + except Exception as e: + if e.status == 128: + self.error_handler("Cannot do partial commit during a merge, please choose commit all notebooks option " + "and push. Notice: this operation will push all other notebooks in this repo!") + else: + err = e.stdout.replace("\n","
") + self.error_handler(err) + + +def load_jupyter_server_extension(nb_server_app): + """ + Called when the extension is loaded. + + Args: + nb_server_app (NotebookWebApplication): handle to the Notebook webserver instance. + """ + web_app = nb_server_app.web_app + handlers = [ + (r'/github/private_github_push', PrivateGitPushHandler), + (r'/github/private_github_pull', PrivateGitPullHandler), + (r'/github/private_github_get_repo', PrivateGitGetRepoHandler), + (r'/github/private_github_commit', PrivateGitCommitHandler), + ] + + base_url = web_app.settings['base_url'] + handlers = [(url_path_join(base_url, h[0]), h[1]) for h in handlers] + + host_pattern = '.*$' + web_app.add_handlers(host_pattern, handlers) diff --git a/ppextensions/extensions/github/static/github.js b/ppextensions/extensions/github/static/github.js new file mode 100644 index 0000000..2a47ddd --- /dev/null +++ b/ppextensions/extensions/github/static/github.js @@ -0,0 +1,204 @@ +define(["base/js/namespace", "base/js/dialog", "tree/js/notebooklist", "base/js/utils", "jquery"], function (Jupyter, dialog, notebooklist, utils, $) { + var GithubOperation = function () { + this.base_url = Jupyter.notebook_list.base_url; + this.bind_events(); + }; + + GithubOperation.prototype = Object.create(notebooklist.NotebookList.prototype); + + GithubOperation.prototype.bind_events = function () { + var that = this; + $(".private-github-push").click($.proxy(that.private_github_push, this)); + $(".private-github-pull").click($.proxy(that.private_github_pull, this)); + }; + + GithubOperation.prototype.private_github_push = function () { + var that = this; + var repo = $(""); + branch.append(""); + + function initializeDropdown(res) { + for (var rp in res) { + repo.append(new Option(rp, rp)); + } + repo.change(function () { + var branches = res[repo.val()]; + branch.empty(); + branch.append(""); + $.each(branches, function (i, el) { + branch.append(new Option(el, el)); + }); + }); + } + + var selected = Jupyter.notebook_list.selected[0]; + var settings = { + method: "GET", + data: {"filepath": selected.path}, + success: function(res) { + res = JSON.parse(res); + for (var rp in res) { + repo.append(new Option(rp, rp)); + } + repo.change(function() { + var branches=res[repo.val()]; + branch.empty(); + branch.append(""); + $.each(branches, function(i, el) { + branch.append(new Option(el, el)); + }); + }); + }, + error: function(res) { + console.log(res); + }, + }; + var url = utils.url_path_join(that.base_url, "/github/private_github_get_repo"); + if (sessionStorage.getItem(url) != null) { + var res = JSON.parse(sessionStorage.getItem(url)); + initializeDropdown(res); + } else { + utils.ajax(url, settings); + } + + var commit_msg = $("").css("margin-left", "12px"); + var repo_div = $("
") + .append("") + .append(repo); + var branch_div = $("
") + .append("") + .append(branch); + var text_div = $("
") + .append("") + .append(commit_msg); + var dialog_body=$("

Please notice: if you are pushing one notebook, other notebooks that are already committed in the same folder will also be pushed!

") + .append($("
") + .append(repo_div) + .append(branch_div) + .append(text_div)); + + dialog.modal({ + title: "Push to Github", + body: dialog_body, + buttons: { + Push: { + class: "btn-primary", + click: function () { + var spin = dialog.modal({ + title: "Pushing...", + body: $("
") + .append($("
Notebook is being pushing from github, please wait for a few seconds.
")) + }); + var payload = { + "msg": commit_msg.val(), + "branch": branch.val(), + "repo": repo.val(), + "filepath": selected.path, + "filename": encodeURI(selected.name) + }; + var settings = { + method: "POST", + data: payload, + success: function (res) { + spin.modal("hide"); + dialog.modal({ + title: "Git Push Success", + body: $("
").append(res), + button: { + OK: { + "class": "btn-primary", + click: function () { + Jupyter.notebook_list.load_list(); + } + } + } + }); + }, + error: function (res) { + spin.modal("hide"); + dialog.modal({ + title: "Git Push Failed", + body: $("
").append(res.responseText), + button: { + OK: { + "class": "btn-primary", + click: function () { + Jupyter.notebook_list.load_list(); + } + } + } + }); + } + }; + var url = utils.url_path_join(that.base_url, "/github/private_github_push"); + utils.ajax(url, settings); + } + }, + Cancel: {} + } + }); + }; + GithubOperation.prototype.private_github_pull = function () { + var that = this; + + var dialog_body = $("
") + .append($("")) + .append($("")); + + dialog.modal({ + title: "Pull from Github", + body: dialog_body, + buttons: { + Pull: { + class: "btn-primary", + click: function () { + var spin = dialog.modal({ + title: "Pulling...", + body: $("
") + .append($("
Notebook is being pulled from github, please wait for a few seconds.
")) + }); + var payload = { + "github_repo_url": $("#gru").val() + }; + var settings = { + method: "POST", + data: payload, + success: function (res) { + spin.modal("hide"); + dialog.modal({ + title: "Pull success!", + body: $("
").append(res), + buttons: { + OK: { + class: "btn-primary", + click: function () { + Jupyter.notebook_list.load_list(); + } + } + } + }); + }, + error: function (res) { + spin.modal("hide"); + dialog.modal({ + title: "Pull failed!", + body: $("
").append(res.responseText), + buttons: { + OK: {class: "btn-primary"} + } + }); + } + }; + var url = utils.url_path_join(that.base_url, "/github/private_github_pull"); + utils.ajax(url, settings); + } + }, + Cancel: {} + } + }); + }; + + return {GithubOperation: GithubOperation}; +}); diff --git a/ppextensions/extensions/github/static/githubcommit.js b/ppextensions/extensions/github/static/githubcommit.js new file mode 100644 index 0000000..c2914d6 --- /dev/null +++ b/ppextensions/extensions/github/static/githubcommit.js @@ -0,0 +1,74 @@ +define(["base/js/namespace", "base/js/dialog", "base/js/utils", "jquery"], function (Jupyter, dialog, utils, $) { + + var git_commit = { + help: "Commit current notebook", + icon: "fa-github", + help_index: "", + handler: function (env) { + var re = /^\/notebooks(.*?)$/; + var filepath = window.location.pathname.match(re)[1]; + var repo = filepath.substring(1, filepath.lastIndexOf("/")); + var filename = filepath.substring(filepath.lastIndexOf("/") + 1, filepath.length); + var dialog_body = $("
") + .append("\n" + + ""); + + dialog.modal({ + title: "Commit Notebook", + body: dialog_body, + buttons: { + Commit: { + class: "btn-primary", + click: function () { + var spin = dialog.modal({ + title: "Committing...", + body: $("
") + .append($("
Notebook is being committed to local github repository, please wait for a few seconds.
")) + }); + var payload = { + "repo": repo, + "filename": filename, + "option": $("input[name=optradio]:checked", "#option").val() + }; + if (repo === "/"){ alert("Please commit inside local repo!"); return; } + var settings = { + method: "POST", + data: payload, + success: function (res) { + spin.modal("hide"); + dialog.modal({ + title: "Commit Success!", + body: $("
").append(res), + button: { + OK: { "class": "btn-primary" } + } + }); + }, + error: function (res) { + spin.modal("hide"); + dialog.modal({ + title: "Commit Failed", + body: $("
").append(res.responseText), + button: { + OK: { "class": "btn-primary" } + } + }); + } + }; + var url = utils.url_path_join(Jupyter.notebook.base_url, "/github/private_github_commit"); + utils.ajax(url, settings); + } + }, + Cancel: {} + }, + keyboard_manager: env.notebook.keyboard_manager + }); + } + }; + + function _on_load () { + var action_name = Jupyter.actions.register(git_commit, "commit", "git"); + Jupyter.toolbar.add_buttons_group([action_name]); + } + return { load_ipython_extension: _on_load }; +}); diff --git a/ppextensions/extensions/github/static/githubmain.js b/ppextensions/extensions/github/static/githubmain.js new file mode 100644 index 0000000..96d0fb8 --- /dev/null +++ b/ppextensions/extensions/github/static/githubmain.js @@ -0,0 +1,43 @@ +define(["jquery", + "base/js/namespace", + "base/js/utils", + "./github" +], function ($, Jupyter, utils, githuboperation) { + function load_ipython_extension () { + var github_html = $("
\n" + + " \n" + + " \n" + + "
"); + + $(".tree-buttons > .pull-right").prepend(github_html); + + var _selection_changed = Jupyter.notebook_list.__proto__._selection_changed; + + var gitoperation = new githuboperation.GithubOperation(); + + Jupyter.notebook_list.__proto__._selection_changed = function () { + _selection_changed.apply(this); + var selected = this.selected; + if (selected.length === 1) { + $(".private-github-push").css("display", "block"); + } else { + $(".private-github-push").css("display", "none"); + } + }; + Jupyter.notebook_list._selection_changed(); + } + + return { + load_ipython_extension: load_ipython_extension + }; +}); diff --git a/ppextensions/extensions/scheduler/__init__.py b/ppextensions/extensions/scheduler/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ppextensions/extensions/scheduler/scheduler.py b/ppextensions/extensions/scheduler/scheduler.py new file mode 100644 index 0000000..758419b --- /dev/null +++ b/ppextensions/extensions/scheduler/scheduler.py @@ -0,0 +1,226 @@ +from notebook.utils import url_path_join +from notebook.base.handlers import IPythonHandler +from shutil import copyfile +from sqlalchemy import create_engine +from airflow import settings, models +from ppextensions.extensions.extension_logger import extension_logger + +import datetime +import configparser +import getpass +import os + + +CONNECTION_STRING = os.environ["AIRFLOW_METADATA_CONNECTION_STRING"] +AIRFLOW_HOME = os.environ["AIRFLOW_HOME"] +NOTEBOOK_STARTUP_PATH = os.getcwd() + "/" +DAG_TEMPLATE = os.path.dirname(os.path.abspath(__file__)) + "/template/dag_template.py" +VAR_TEMPLATE = os.path.dirname(os.path.abspath(__file__)) + "/template/var_template.conf" +SCHEDULER_STATIC_FILE_PATH = os.path.dirname(os.path.abspath(__file__)) + "/static" + + +class SchedulerHandler(IPythonHandler): + session = settings.Session() + engine = create_engine(CONNECTION_STRING) + cf = configparser.ConfigParser() + + @staticmethod + def get_dag_id(notebook_name): + return getpass.getuser() + "_" + notebook_name + + @staticmethod + def get_dag_path(dag_id): + dag_path = AIRFLOW_HOME + "/dags/dag_" + dag_id + ".py" + var_path = AIRFLOW_HOME + "/variables/var_" + dag_id + ".conf" + return dag_path, var_path + + @staticmethod + def get_delta(start, interval): + start = datetime.datetime.strptime(start, '%Y-%m-%d %H:%M:%S') + itv = interval.split(" ") + delta = datetime.timedelta(**dict([(itv[1], int(itv[0]))])) + return start, delta + + def dag_info(self, dag_inst): + interval = dag_inst.schedule_interval + notebook_name = dag_inst.dag_id.split('_')[1] + task = dag_inst.get_task("notebook_task") + start_date = task.start_date + end_date = task.end_date + task_instances = task.get_task_instances(self.session, start_date=start_date, end_date=end_date) + if len(task_instances) != 0: + for ti in task_instances[::-1]: + dag_run = dag_inst.get_dagrun(execution_date=ti.execution_date) + if dag_run.external_trigger is False: + last_run_time = ti.execution_date + interval + last_run_status = ti.state + last_run_duration = ti.duration + next_run_time = last_run_time + interval + return [notebook_name, last_run_time, last_run_status, last_run_duration, next_run_time] + return [notebook_name, 'N/A', 'N/A', 'N/A', task.start_date + interval] + else: + return [notebook_name, 'N/A', 'N/A', 'N/A', task.start_date + interval] + + def get_dag(self, username): + dag_bag = models.DagBag(settings.DAGS_FOLDER) + dag_instances = [dag_inst for (dag_id, dag_inst) in dag_bag.dags.items() if dag_inst.owner == username] + dags = [] + for dag_inst in dag_instances: + dags.append(self.dag_info(dag_inst)) + return dags + + def delete_dag(self, notebook_name): + dag_id = self.get_dag_id(notebook_name) + dag_path, var_path = self.get_dag_path(dag_id) + os.remove(dag_path) + os.remove(var_path) + with self.engine.begin() as con: + for t in ["dag", "xcom", "task_instance", "sla_miss", "log", "job", "dag_run", "task_fail", "dag_stats"]: + query = "delete from {} where dag_id='{}'".format(t, dag_id) + con.execute(query) + + def configure(self, dag_id, notebook_path, emails_failure, emails_success, start, runs, interval): + dag_path, var_path = self.get_dag_path(dag_id) + copyfile(DAG_TEMPLATE, dag_path) + copyfile(VAR_TEMPLATE, var_path) + start, delta = self.get_delta(start, interval) + start -= delta + if runs == "None": + end = datetime.datetime.max.replace(microsecond=0) + else: + end = start + int(runs) * delta + self.cf.read(var_path) + self.cf.set("config", "dag_id", dag_id) + self.cf.set("config", "username", getpass.getuser()) + self.cf.set("config", "interval", interval) + self.cf.set("config", "notebook_path", notebook_path) + self.cf.set("config", "start", str(start)) + self.cf.set("config", "end", str(end)) + self.cf.set("config", "emails_failure", emails_failure) + self.cf.set("config", "emails_success", emails_success) + self.cf.write(open(var_path, "w")) + + +class CreateDagHandler(SchedulerHandler): + """ + Backend handler to create a dag and store it in airflow dag folder when the user schedules a job. + """ + + def post(self): + notebook_name = self.get_argument('notebook_name') + notebook_path = self.get_argument('notebook_path') + emails_failure = self.get_argument('emails_failure') + emails_success = self.get_argument('emails_success') + start = self.get_argument('start') + runs = self.get_argument('runs') + interval = self.get_argument('interval') + dag_id = self.get_dag_id(notebook_name) + notebook_path = NOTEBOOK_STARTUP_PATH + notebook_path + self.configure(dag_id, notebook_path, emails_failure, emails_success, start, runs, interval) + self.set_status(204, "") + + +class GetDagHandler(SchedulerHandler): + """ + Backend handler to get dag information and display it scheduled jobs tab + """ + + def get(self): + dag_list = self.get_dag(getpass.getuser()) + base_url = self.get_argument('base_url') + self.render('daginfo.html', base_url=base_url, dag_list=dag_list) + + +class DeleteDagHandler(SchedulerHandler): + """ + Backend handler to delete the dag information includes: + 1. All related dag and task records stored in airflow metadata db + 2. Dag file in dag folder + 3. Var file in variable folder + """ + + def post(self): + notebook_name = self.get_argument("notebook_name") + try: + self.delete_dag(notebook_name) + except Exception as e: + extension_logger.logger.error(str(e)) + self.set_status(400) + self.finish(str(e)) + self.set_status(204, "") + + +class EditDagHandler(SchedulerHandler): + """ + Backend handler required by the edit dag button in scheduled job tab. + For get request: + It will fectch all dag related information from configuration file and display it in edit menu + For post request: + It will update all dag related infromation in the configuration file based on user's input in edit menu + """ + + def get(self): + notebook_name = self.get_argument("notebook_name") + dag_id = self.get_dag_id(notebook_name) + _, var_path = self.get_dag_path(dag_id) + self.cf.read(var_path) + interval = self.cf.get("config", "interval") + start, delta = self.get_delta(self.cf.get("config", "start"), interval) + emails_failure = self.cf.get("config", "emails_failure") + emails_success = self.cf.get("config", "emails_success") + base_url = self.get_argument("base_url") + start += delta + configuration = [dag_id, start, interval, emails_failure, emails_success, base_url] + self.render("editdag.html", configuration=configuration) + + def post(self): + dag_id = self.cf.get("config", "dag_id") + notebook_path = self.cf.get("config", "notebook_path") + start = self.get_argument('start') + freq = self.get_argument('freq') + unit = self.get_argument('unit') + runs = self.get_argument('runs') + emails_failure = self.get_argument("emails_failure") + emails_success = self.get_argument("emails_success") + interval = freq + ' ' + unit + self.configure(dag_id, notebook_path, emails_failure, emails_success, start, runs, interval) + self.set_status(204, "") + + +class CheckDagHandler(SchedulerHandler): + """ + Backend handler to check whether the dag is already existed or not + """ + + def get(self): + dag_bag = models.DagBag(settings.DAGS_FOLDER) + notebook_name = self.get_argument("notebook_name") + dag_id = self.get_dag_id(notebook_name) + if dag_id in dag_bag.dags: + self.finish("True") + else: + self.finish("False") + + +def load_jupyter_server_extension(nb_server_app): + """ + Called when the extension is loaded. + + Args: + nb_server_app (NotebookWebApplication): handle to the Notebook webserver instance. + """ + web_app = nb_server_app.web_app + + handlers = [ + (r'/scheduler/create_dag', CreateDagHandler), + (r'/scheduler/get_dag', GetDagHandler), + (r'/scheduler/delete_dag', DeleteDagHandler), + (r'/scheduler/edit_dag', EditDagHandler), + (r'/scheduler/check_dag', CheckDagHandler) + ] + web_app.settings['template_path'] = SCHEDULER_STATIC_FILE_PATH + base_url = web_app.settings['base_url'] + handlers = [(url_path_join(base_url, h[0]), h[1]) for h in handlers] + + host_pattern = '.*$' + web_app.add_handlers(host_pattern, handlers) diff --git a/ppextensions/extensions/scheduler/static/daginfo.html b/ppextensions/extensions/scheduler/static/daginfo.html new file mode 100644 index 0000000..e2b24a7 --- /dev/null +++ b/ppextensions/extensions/scheduler/static/daginfo.html @@ -0,0 +1,66 @@ +
+ + + + + + + + + + + + {% for dag in dag_list %} + + + + + + + + + + {% end %} + +
Notebook NameLast Run TimeLast Run StatusLast Run DurationNext Scheduled RunAction
{{ dag[0] }}{{ dag[1] }}{{ dag[2] }}{{ dag[3] }}{{ dag[4] }} + + + {% raw xsrf_form_html() %} + + + +
+
+ +
+
+ +
+
+ + diff --git a/ppextensions/extensions/scheduler/static/editdag.html b/ppextensions/extensions/scheduler/static/editdag.html new file mode 100644 index 0000000..38d5b79 --- /dev/null +++ b/ppextensions/extensions/scheduler/static/editdag.html @@ -0,0 +1,147 @@ +Configuration for job: {{ configuration[0] }} +
+
+
+
+ + +
+
+ + +
+
+ + +
+ +
+ + +
+
+ + +
+
+
+ + +
+ +
+ + +
+
+
+
+ +
+
+ diff --git a/ppextensions/extensions/scheduler/static/scheduler.js b/ppextensions/extensions/scheduler/static/scheduler.js new file mode 100644 index 0000000..2e65ae4 --- /dev/null +++ b/ppextensions/extensions/scheduler/static/scheduler.js @@ -0,0 +1,204 @@ +define(["base/js/namespace", "base/js/dialog", "tree/js/notebooklist", "base/js/utils", "jquery"], function (Jupyter, dialog, notebooklist, utils, $) { + var ScheduleOperation = function () { + this.base_url = Jupyter.notebook_list.base_url; + this.bind_events(); + }; + + ScheduleOperation.prototype = Object.create(notebooklist.NotebookList.prototype); + + ScheduleOperation.prototype.bind_events = function () { + var that = this; + $(".schedule-button").click($.proxy(that.schedule_selected, this)); + }; + + ScheduleOperation.prototype.schedule_selected = function () { + var that = this; + var selected = Jupyter.notebook_list.selected; + if (selected.length > 1) { + alert("Cannot schedule more than one notebook at the same time!"); + return false; + } + + // Part1 Schedule + var lst = Array(20).fill(0).map((i, j) => j + 4); + var every_num = $(""); + $.each(lst, function (i, el) { every_num.append(new Option(el, el)); }); + var every_unit = $(""); + var unit_list = ["hours", "days", "weeks"]; + $.each(unit_list, function (i, el) { every_unit.append(new Option(el, el)); }); + + every_unit.change(function () { + switch (every_unit.val()) { + case "hours": + lst = Array(20).fill(0).map((i, j) => j + 1); + break; + case "days": + lst = Array(30).fill(0).map((i, j) => j + 1); + break; + case "weeks": + lst = Array(52).fill(0).map((i, j) => j + 1); + } + every_num.empty(); + $.each(lst, function (i, el) { every_num.append(new Option(el, el)); }); + }); + + var start_time = $("").val("00:00"); + var start_date = $("").val(new Date().toISOString().split("T")[0]); + var runs = $(""); + var runs_list = ["None", "1 time", "2 times", "3 times", "4 times", "5 times", "10 times", "50 times", "100 times"]; + $.each(runs_list, function (i, el) { runs.append(new Option(el, el)); }); + + var schedule_part = $("
") + .append("") + .append(every_num) + .append(every_unit) + .append("
") + .append("") + .append(start_time) + .append("
") + .append("") + .append(start_date) + .append("
") + .append("