diff --git a/containers/files/examples/trino_how_to.ipynb b/containers/files/examples/trino_how_to.ipynb new file mode 100644 index 0000000..2fc0a47 --- /dev/null +++ b/containers/files/examples/trino_how_to.ipynb @@ -0,0 +1,1874 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ba23d459-3afd-46a1-9730-bea3dca4b94c", + "metadata": {}, + "source": [ + "# Guide to using Trino\n", + "\n", + "In this example notebook:\n", + "\n", + "* \"SAIL\" is the name of the remote TRE\n", + "\n", + "* \"EPCC\" is the name of the hosting TRE" + ] + }, + { + "cell_type": "markdown", + "id": "9c07d1a9-c17a-4c0f-b808-7f4cb7639d48", + "metadata": {}, + "source": [ + "## Connecting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0534aa75-2af6-4e65-a275-6e8d995b7884", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Your Trino username goes here\n", + "TRINO_USERNAME = \"\"\n", + "# Your Trino password goes here\n", + "TRINO_PASS = \"\"\n", + "# no http, e.g. trino.teleport.pk.serp.ac.uk\n", + "TRINO_URL = \"trino.teleport.pk.serp.ac.uk\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ca79e533-7ca7-4686-b88e-9bb88d4b7bc6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from trino.auth import BasicAuthentication\n", + "from sqlalchemy import create_engine, text\n", + "\n", + "engine = create_engine(\n", + " \"trino://{0}:{1}@{2}:443\".format(TRINO_USERNAME, TRINO_PASS, TRINO_URL),\n", + " connect_args={\n", + " \"auth\": BasicAuthentication(TRINO_USERNAME, TRINO_PASS),\n", + " \"http_scheme\": \"https\",\n", + " # You may need to uncomment out the line below if you are getting certificate errors \n", + " # \"verify\": False\n", + " }\n", + ")\n", + "\n", + "def call(q):\n", + " ResultProxy = engine.connect().execute(text(q))\n", + " results = ResultProxy.fetchall()\n", + " dataframe = pd.DataFrame(results)\n", + " dataframe.columns = ResultProxy.keys()\n", + " return dataframe" + ] + }, + { + "cell_type": "markdown", + "id": "99512f77-ccee-4b9d-84d4-2e06158ab148", + "metadata": {}, + "source": [ + "## Basic querying" + ] + }, + { + "cell_type": "markdown", + "id": "98b4118a-50a1-4284-a1ac-0cef44e15ed0", + "metadata": {}, + "source": [ + "### Make sure the Trino connection is working" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6f4e008f-2793-4b7d-8dc8-c6fbbea48b02", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
node_idhttp_urinode_versioncoordinatorstate
07d804d793929http://172.26.0.4:8080420Trueactive
\n", + "
" + ], + "text/plain": [ + " node_id http_uri node_version coordinator state\n", + "0 7d804d793929 http://172.26.0.4:8080 420 True active" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q = 'SELECT * FROM system.runtime.nodes LIMIT 3'\n", + "call(q)" + ] + }, + { + "cell_type": "markdown", + "id": "1cbed61f-1757-41fd-987c-6ea29f09af3c", + "metadata": {}, + "source": [ + "### See connectors, schemas, and tables available to you" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "12676def-ef0f-405e-812d-1c1510444efa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Catalog
0epcc
1sail
2system
\n", + "
" + ], + "text/plain": [ + " Catalog\n", + "0 epcc\n", + "1 sail\n", + "2 system" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Schema
0information_schema
1sf1
2sf10
3sf100
4sf1000
5sf10000
6sf100000
7sf300
8sf3000
9sf30000
10tiny
\n", + "
" + ], + "text/plain": [ + " Schema\n", + "0 information_schema\n", + "1 sf1\n", + "2 sf10\n", + "3 sf100\n", + "4 sf1000\n", + "5 sf10000\n", + "6 sf100000\n", + "7 sf300\n", + "8 sf3000\n", + "9 sf30000\n", + "10 tiny" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Table
0call_center
1catalog_page
2catalog_returns
3catalog_sales
4customer
5customer_address
6customer_demographics
7date_dim
8dbgen_version
9household_demographics
10income_band
11inventory
12item
13promotion
14reason
15ship_mode
16store
17store_returns
18store_sales
19time_dim
20warehouse
21web_page
22web_returns
23web_sales
24web_site
\n", + "
" + ], + "text/plain": [ + " Table\n", + "0 call_center\n", + "1 catalog_page\n", + "2 catalog_returns\n", + "3 catalog_sales\n", + "4 customer\n", + "5 customer_address\n", + "6 customer_demographics\n", + "7 date_dim\n", + "8 dbgen_version\n", + "9 household_demographics\n", + "10 income_band\n", + "11 inventory\n", + "12 item\n", + "13 promotion\n", + "14 reason\n", + "15 ship_mode\n", + "16 store\n", + "17 store_returns\n", + "18 store_sales\n", + "19 time_dim\n", + "20 warehouse\n", + "21 web_page\n", + "22 web_returns\n", + "23 web_sales\n", + "24 web_site" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# See connectors\n", + "q = 'SHOW catalogs'\n", + "display(call(q))\n", + "\n", + "# # See schemas in connector/catalog, assuming you have a connector called \"epcc\"\n", + "q = 'SHOW schemas in epcc'\n", + "display(call(q))\n", + "\n", + "# # See tables in a schema, assuming connecter called \"epcc\", schema called \"sf300\"\n", + "q = 'SHOW tables in epcc.sf300'\n", + "display(call(q))" + ] + }, + { + "cell_type": "markdown", + "id": "23bf8ac7-3701-49b8-bcbe-2ae8bb6a7e84", + "metadata": {}, + "source": [ + "### Query a table in your hosting TRE" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "31b64d6f-79cd-4b59-ad51-11540a76d040", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
w_warehouse_skw_warehouse_idw_warehouse_namew_warehouse_sq_ftw_street_numberw_street_namew_street_typew_suite_numberw_cityw_countyw_statew_zipw_countryw_gmt_offset
01AAAAAAAABAAAAAAAConventional childr977787.06516thParkwaySuite 470Pleasant HillSan Miguel CountyNM83604United States-7.00
12AAAAAAAACAAAAAAAImportant issues liv138504.0600View FirstAvenueSuite PSalemHuron CountyMI48048United States-5.00
23AAAAAAAADAAAAAAADoors canno294242.0534Ash LaurelDr.Suite 0SalemZiebach CountySD58048United States-6.00
34AAAAAAAAEAAAAAAABad cards must make.621234.0368Wilson ElmDriveSuite 80SalemRichland CountyOH48048United States-5.00
45AAAAAAAAFAAAAAAANoneNaNNoneNoneNoneNoneFive PointsLuce CountyMI46098United StatesNone
\n", + "
" + ], + "text/plain": [ + " w_warehouse_sk w_warehouse_id w_warehouse_name w_warehouse_sq_ft \\\n", + "0 1 AAAAAAAABAAAAAAA Conventional childr 977787.0 \n", + "1 2 AAAAAAAACAAAAAAA Important issues liv 138504.0 \n", + "2 3 AAAAAAAADAAAAAAA Doors canno 294242.0 \n", + "3 4 AAAAAAAAEAAAAAAA Bad cards must make. 621234.0 \n", + "4 5 AAAAAAAAFAAAAAAA None NaN \n", + "\n", + " w_street_number w_street_name w_street_type w_suite_number \\\n", + "0 651 6th Parkway Suite 470 \n", + "1 600 View First Avenue Suite P \n", + "2 534 Ash Laurel Dr. Suite 0 \n", + "3 368 Wilson Elm Drive Suite 80 \n", + "4 None None None None \n", + "\n", + " w_city w_county w_state w_zip w_country \\\n", + "0 Pleasant Hill San Miguel County NM 83604 United States \n", + "1 Salem Huron County MI 48048 United States \n", + "2 Salem Ziebach County SD 58048 United States \n", + "3 Salem Richland County OH 48048 United States \n", + "4 Five Points Luce County MI 46098 United States \n", + "\n", + " w_gmt_offset \n", + "0 -7.00 \n", + "1 -5.00 \n", + "2 -6.00 \n", + "3 -5.00 \n", + "4 None " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# replace \"tablename\" with the name of the table\n", + "# assuming you're using the connector called \"epcc\" and table is in a schema called \"sf300\"\n", + "q = 'SELECT * FROM epcc.sf300.warehouse LIMIT 5'\n", + "# read the result of this query into a dataframe\n", + "df = call(q)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "7732e884-cb43-41cf-aa2e-1c6368ae512f", + "metadata": {}, + "source": [ + "### Understand what catalogs, schemas, and tables are available in the remote TRE\n", + "By using **query passthrough** to the `system.jdbc` schema, you can get a list of things available in the remote TRE" + ] + }, + { + "cell_type": "markdown", + "id": "226a9a5e-e804-428f-bd7c-a4e9db807ca2", + "metadata": {}, + "source": [ + "#### Catalogs" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9741961a-7fac-4116-87be-c8cf4f264e65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
table_cat
0jmx
1memory
2system
3tpcds
4tpch
\n", + "
" + ], + "text/plain": [ + " table_cat\n", + "0 jmx\n", + "1 memory\n", + "2 system\n", + "3 tpcds\n", + "4 tpch" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q = '''\n", + "SELECT * FROM TABLE(\n", + " sail.system.query(\n", + " query => '\n", + " SELECT * FROM system.jdbc.catalogs\n", + " '\n", + " )\n", + ")'''\n", + "call(q)" + ] + }, + { + "cell_type": "markdown", + "id": "542181e1-864a-4366-a856-9421f4d88ddf", + "metadata": {}, + "source": [ + "#### Schemas" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "dc68a254-4885-40df-b269-ab96873ddecc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
table_schemtable_catalog
0currentjmx
1historyjmx
2information_schemajmx
3defaultmemory
4information_schemamemory
5information_schemasystem
6jdbcsystem
7metadatasystem
8runtimesystem
9information_schematpcds
10sf1tpcds
11sf10tpcds
12sf100tpcds
13sf1000tpcds
14sf10000tpcds
15sf100000tpcds
16sf300tpcds
17sf3000tpcds
18sf30000tpcds
19tinytpcds
20information_schematpch
21sf1tpch
22sf100tpch
23sf1000tpch
24sf10000tpch
25sf100000tpch
26sf300tpch
27sf3000tpch
28sf30000tpch
29tinytpch
\n", + "
" + ], + "text/plain": [ + " table_schem table_catalog\n", + "0 current jmx\n", + "1 history jmx\n", + "2 information_schema jmx\n", + "3 default memory\n", + "4 information_schema memory\n", + "5 information_schema system\n", + "6 jdbc system\n", + "7 metadata system\n", + "8 runtime system\n", + "9 information_schema tpcds\n", + "10 sf1 tpcds\n", + "11 sf10 tpcds\n", + "12 sf100 tpcds\n", + "13 sf1000 tpcds\n", + "14 sf10000 tpcds\n", + "15 sf100000 tpcds\n", + "16 sf300 tpcds\n", + "17 sf3000 tpcds\n", + "18 sf30000 tpcds\n", + "19 tiny tpcds\n", + "20 information_schema tpch\n", + "21 sf1 tpch\n", + "22 sf100 tpch\n", + "23 sf1000 tpch\n", + "24 sf10000 tpch\n", + "25 sf100000 tpch\n", + "26 sf300 tpch\n", + "27 sf3000 tpch\n", + "28 sf30000 tpch\n", + "29 tiny tpch" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q = '''\n", + "SELECT * FROM TABLE(\n", + " sail.system.query(\n", + " query => '\n", + " SELECT * FROM system.jdbc.schemas\n", + " '\n", + " )\n", + ")'''\n", + "call(q)" + ] + }, + { + "cell_type": "markdown", + "id": "18a9e2b9-e6e7-46f6-832e-425836f2ad56", + "metadata": {}, + "source": [ + "#### Tables" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "550a5208-b50d-40e7-9936-8bfae754788e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
table_cattable_schemtable_nametable_typeremarkstype_cattype_schemtype_nameself_referencing_col_nameref_generation
0jmxinformation_schemacolumnsTABLENoneNoneNoneNoneNoneNone
1jmxinformation_schematablesTABLENoneNoneNoneNoneNoneNone
2jmxinformation_schemaviewsTABLENoneNoneNoneNoneNoneNone
3jmxinformation_schemaschemataTABLENoneNoneNoneNoneNoneNone
4jmxinformation_schematable_privilegesTABLENoneNoneNoneNoneNoneNone
.................................
724tpchsf100000partTABLENoneNoneNoneNoneNoneNone
725tpchsf100000partsuppTABLENoneNoneNoneNoneNoneNone
726tpchsf100000supplierTABLENoneNoneNoneNoneNoneNone
727tpchsf100000nationTABLENoneNoneNoneNoneNoneNone
728tpchsf100000regionTABLENoneNoneNoneNoneNoneNone
\n", + "

729 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " table_cat table_schem table_name table_type remarks \\\n", + "0 jmx information_schema columns TABLE None \n", + "1 jmx information_schema tables TABLE None \n", + "2 jmx information_schema views TABLE None \n", + "3 jmx information_schema schemata TABLE None \n", + "4 jmx information_schema table_privileges TABLE None \n", + ".. ... ... ... ... ... \n", + "724 tpch sf100000 part TABLE None \n", + "725 tpch sf100000 partsupp TABLE None \n", + "726 tpch sf100000 supplier TABLE None \n", + "727 tpch sf100000 nation TABLE None \n", + "728 tpch sf100000 region TABLE None \n", + "\n", + " type_cat type_schem type_name self_referencing_col_name ref_generation \n", + "0 None None None None None \n", + "1 None None None None None \n", + "2 None None None None None \n", + "3 None None None None None \n", + "4 None None None None None \n", + ".. ... ... ... ... ... \n", + "724 None None None None None \n", + "725 None None None None None \n", + "726 None None None None None \n", + "727 None None None None None \n", + "728 None None None None None \n", + "\n", + "[729 rows x 10 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q = '''\n", + "SELECT * FROM TABLE(\n", + " sail.system.query(\n", + " query => '\n", + " SELECT * FROM system.jdbc.tables\n", + " '\n", + " )\n", + ")'''\n", + "call(q)" + ] + }, + { + "cell_type": "markdown", + "id": "751984f3-ea78-4c46-aa99-b854d97ab65f", + "metadata": {}, + "source": [ + "If you want to filter by a text value with query passthrough, it's important that you put the string value inside double apostrophes." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5627ce36-9d93-493f-b13b-f1b299229947", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
table_cattable_schemtable_nametable_typeremarkstype_cattype_schemtype_nameself_referencing_col_nameref_generation
0tpchinformation_schemacolumnsTABLENoneNoneNoneNoneNoneNone
1tpchinformation_schematablesTABLENoneNoneNoneNoneNoneNone
2tpchinformation_schemaviewsTABLENoneNoneNoneNoneNoneNone
3tpchinformation_schemaschemataTABLENoneNoneNoneNoneNoneNone
4tpchinformation_schematable_privilegesTABLENoneNoneNoneNoneNoneNone
.................................
75tpchsf100000partTABLENoneNoneNoneNoneNoneNone
76tpchsf100000partsuppTABLENoneNoneNoneNoneNoneNone
77tpchsf100000supplierTABLENoneNoneNoneNoneNoneNone
78tpchsf100000nationTABLENoneNoneNoneNoneNoneNone
79tpchsf100000regionTABLENoneNoneNoneNoneNoneNone
\n", + "

80 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " table_cat table_schem table_name table_type remarks \\\n", + "0 tpch information_schema columns TABLE None \n", + "1 tpch information_schema tables TABLE None \n", + "2 tpch information_schema views TABLE None \n", + "3 tpch information_schema schemata TABLE None \n", + "4 tpch information_schema table_privileges TABLE None \n", + ".. ... ... ... ... ... \n", + "75 tpch sf100000 part TABLE None \n", + "76 tpch sf100000 partsupp TABLE None \n", + "77 tpch sf100000 supplier TABLE None \n", + "78 tpch sf100000 nation TABLE None \n", + "79 tpch sf100000 region TABLE None \n", + "\n", + " type_cat type_schem type_name self_referencing_col_name ref_generation \n", + "0 None None None None None \n", + "1 None None None None None \n", + "2 None None None None None \n", + "3 None None None None None \n", + "4 None None None None None \n", + ".. ... ... ... ... ... \n", + "75 None None None None None \n", + "76 None None None None None \n", + "77 None None None None None \n", + "78 None None None None None \n", + "79 None None None None None \n", + "\n", + "[80 rows x 10 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q = '''\n", + "SELECT * FROM TABLE(\n", + " sail.system.query(\n", + " query => '\n", + " SELECT * FROM system.jdbc.tables WHERE table_cat = ''tpch''\n", + " '\n", + " )\n", + ")'''\n", + "call(q)" + ] + }, + { + "cell_type": "markdown", + "id": "dcd16e28-4384-4e38-9189-17114c2ece83", + "metadata": {}, + "source": [ + "### Query a table in the remote TRE\n", + "When you query a table in the remote TRE, you should always use **query passthrough**. \n", + "\n", + "This will make your queries much faster.\n", + "\n", + "Here is an example of how to use query passthrough to get a subset of data from the 'part' table in the 'sf1' schema using the 'tpch' connector.\n", + "We want only rows where `size` is greater than or equal to 50." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "d6cf0baf-390f-460c-b808-bdb86fb9e59e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
partkeynamemfgrbrandtypesizecontainerretailpricecomment
0150005firebrick drab puff grey lawnManufacturer#5Brand#51LARGE BURNISHED COPPER50JUMBO BOX1055.00nts haggle
1150116yellow almond magenta smoke cornsilkManufacturer#2Brand#23MEDIUM BRUSHED NICKEL50JUMBO CASE1166.11es. even, final p
2150174khaki cornsilk magenta dodger turquoiseManufacturer#4Brand#42ECONOMY BURNISHED BRASS50LG PACK1224.17fully reg
3150177tan red blue chocolate powderManufacturer#1Brand#15STANDARD ANODIZED COPPER50SM DRUM1227.17nts wake
4150224goldenrod puff ivory black oliveManufacturer#1Brand#13ECONOMY BRUSHED STEEL50WRAP CASE1274.22w around the de
..............................
3884149684linen lavender grey brown lemonManufacturer#4Brand#45SMALL PLATED COPPER50JUMBO BAG1733.68fully final pint
3885149748lemon bisque dim chartreuse frostedManufacturer#3Brand#33LARGE BURNISHED COPPER50LG PKG1797.74lose quick
3886149768cream ghost bisque burlywood thistleManufacturer#1Brand#15PROMO BRUSHED BRASS50SM JAR1817.76le carefully slyly
3887149840midnight medium sienna metallic saddleManufacturer#3Brand#32LARGE BRUSHED COPPER50JUMBO CASE1889.84nic dolphins engag
3888149985tomato aquamarine antique black almondManufacturer#4Brand#45STANDARD PLATED COPPER50JUMBO BOX2034.98gular ideas use
\n", + "

3889 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " partkey name mfgr \\\n", + "0 150005 firebrick drab puff grey lawn Manufacturer#5 \n", + "1 150116 yellow almond magenta smoke cornsilk Manufacturer#2 \n", + "2 150174 khaki cornsilk magenta dodger turquoise Manufacturer#4 \n", + "3 150177 tan red blue chocolate powder Manufacturer#1 \n", + "4 150224 goldenrod puff ivory black olive Manufacturer#1 \n", + "... ... ... ... \n", + "3884 149684 linen lavender grey brown lemon Manufacturer#4 \n", + "3885 149748 lemon bisque dim chartreuse frosted Manufacturer#3 \n", + "3886 149768 cream ghost bisque burlywood thistle Manufacturer#1 \n", + "3887 149840 midnight medium sienna metallic saddle Manufacturer#3 \n", + "3888 149985 tomato aquamarine antique black almond Manufacturer#4 \n", + "\n", + " brand type size container retailprice \\\n", + "0 Brand#51 LARGE BURNISHED COPPER 50 JUMBO BOX 1055.00 \n", + "1 Brand#23 MEDIUM BRUSHED NICKEL 50 JUMBO CASE 1166.11 \n", + "2 Brand#42 ECONOMY BURNISHED BRASS 50 LG PACK 1224.17 \n", + "3 Brand#15 STANDARD ANODIZED COPPER 50 SM DRUM 1227.17 \n", + "4 Brand#13 ECONOMY BRUSHED STEEL 50 WRAP CASE 1274.22 \n", + "... ... ... ... ... ... \n", + "3884 Brand#45 SMALL PLATED COPPER 50 JUMBO BAG 1733.68 \n", + "3885 Brand#33 LARGE BURNISHED COPPER 50 LG PKG 1797.74 \n", + "3886 Brand#15 PROMO BRUSHED BRASS 50 SM JAR 1817.76 \n", + "3887 Brand#32 LARGE BRUSHED COPPER 50 JUMBO CASE 1889.84 \n", + "3888 Brand#45 STANDARD PLATED COPPER 50 JUMBO BOX 2034.98 \n", + "\n", + " comment \n", + "0 nts haggle \n", + "1 es. even, final p \n", + "2 fully reg \n", + "3 nts wake \n", + "4 w around the de \n", + "... ... \n", + "3884 fully final pint \n", + "3885 lose quick \n", + "3886 le carefully slyly \n", + "3887 nic dolphins engag \n", + "3888 gular ideas use \n", + "\n", + "[3889 rows x 9 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "q = '''\n", + "SELECT * FROM TABLE(\n", + " sail.system.query(\n", + " query => '\n", + " SELECT * FROM tpch.sf1.part \n", + " WHERE size >= 50\n", + " '\n", + " )\n", + ")'''\n", + "result = call(q)\n", + "display(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "8e4d51e7-8f7e-4df3-bfca-066b9f997252", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num
0200000
\n", + "
" + ], + "text/plain": [ + " num\n", + "0 200000" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "q = '''\n", + "SELECT * FROM TABLE(\n", + " sail.system.query(\n", + " query => '\n", + " SELECT COUNT(*) as num FROM tpch.sf1.part\n", + " '\n", + " )\n", + ")'''\n", + "result = call(q)\n", + "display(result)" + ] + }, + { + "cell_type": "markdown", + "id": "994e47e2-f502-4c4e-b3a8-e4ee27d0858a", + "metadata": {}, + "source": [ + "We can see that the filtered query returned 3889 rows, with the entire table having 200,000 rows. \n", + "All of the query filtering was carried out on servers at the remote TRE, so *only* the data from the 3889 rows had to be transported\n", + "across the internet to the hosting TRE.\n", + "\n", + "If we did this query without passthrough:" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "91ae6fe4-d7b3-4450-a1c8-feeedd6cd74f", + "metadata": {}, + "outputs": [], + "source": [ + "q = '''\n", + "SELECT * FROM sail.sf1.part\n", + "WHERE size >= 50\n", + "'''" + ] + }, + { + "cell_type": "markdown", + "id": "26ceb983-38ac-4ba9-9b77-019bc82c58e2", + "metadata": {}, + "source": [ + "The remote TRE would have to send all 200k rows to the hosting TRE before any of the filtering could be done. \n", + "\n", + "Those sort of queries will take a very long time to run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d072db30-d3fd-4a88-9ec8-261ad456fca3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/containers/files/scripts/addcerts.sh b/containers/files/scripts/addcerts.sh old mode 100644 new mode 100755 diff --git a/containers/files/scripts/session_startup.sh b/containers/files/scripts/session_startup.sh index 838e0f0..b09d4c2 100644 --- a/containers/files/scripts/session_startup.sh +++ b/containers/files/scripts/session_startup.sh @@ -153,4 +153,10 @@ if [ ! -f $VSCODE_EXT_PATH ]; then fi echo "copying vscode extensions to $VSCODE_EXT_PATH" -cp -rf /tmpvscode/code-server/extensions/* ~/.local/share/code-server/extensions/ \ No newline at end of file +cp -rf /tmpvscode/code-server/extensions/* ~/.local/share/code-server/extensions/ + +## move the example notebook to the user area +cp -r /tmp/helpful-files /home/jovyan + +## add any mounted certificates to the cert stores +./tmp/install/scripts/addcerts.sh \ No newline at end of file diff --git a/containers/jupyterlab/standard.Dockerfile b/containers/jupyterlab/standard.Dockerfile index de795dc..3889bab 100644 --- a/containers/jupyterlab/standard.Dockerfile +++ b/containers/jupyterlab/standard.Dockerfile @@ -19,15 +19,23 @@ RUN touch /etc/pip.conf && \ mkdir /tmpvscode && \ mkdir -p /tmp/install/requirements && \ mkdir -p /tmp/install/scripts && \ - mkdir -p /usr/local/bin/start-notebook.d/ + mkdir -p /usr/local/bin/start-notebook.d/ && \ + mkdir -p /tmp/helpful-files # THEN CHOWN RUN chown -R $NB_USER: /etc/pip.conf && \ chown -R $NB_USER: /usr/local/bin/start-notebook.d/ && \ chown -R $NB_USER: /tmpvscode && \ chown -R $NB_USER: /tmp/install/scripts/ && \ + chown -R $NB_USER: /certs && \ + chown -R $NB_USER: /tmp/helpful-files && \ echo source /home/jovyan/.bashrc >> /etc/profile +# NB_USER also requires chown permissions on certificate-related folders +# so we can trust any mounted certs on startup +RUN chown -R $NB_USER: /usr/local/share/ca-certificates/ && \ + chown -R $NB_USER: /etc/ssl/certs/ + # Install necessary packages for addons RUN apt-get update && apt-get install -yq \ gdebi-core \ @@ -58,6 +66,7 @@ COPY containers/files/cloudbeaver/run-server.sh /opt/cloudbeaver/run-server.sh RUN chmod +x /opt/cloudbeaver/run-server.sh && \ chown -R $NB_USER: /opt/cloudbeaver && \ + chown -R $NB_USER: /opt/java/openjdk/lib/security/cacerts && \ mkdir -p /opt/cloudbeaver/workspace/user-projects/cbadmin/.dbeaver # !!! MAKE THIS SOMETHING YOU MOUNT IN THE CHART @@ -138,9 +147,13 @@ RUN pip install jupyter_cloudbeaver_proxy-0.1-py3-none-any.whl --no-cache-dir && WORKDIR /tmp/install/scripts/ RUN ./vsextensions.sh +# TUTORIAL NOTEBOOKS +COPY containers/files/examples/*.ipynb /tmp/helpful-files/ + # SESSION STARTUP RUN mv /tmp/install/scripts/session_startup.sh /usr/local/bin/start-notebook.d/session_startup.sh && \ - mv /tmp/install/scripts/condarc_builder.sh /usr/local/bin/start-notebook.d/condarc_builder.sh + mv /tmp/install/scripts/condarc_builder.sh /usr/local/bin/start-notebook.d/condarc_builder.sh && \ + mv /tmp/install/scripts/addcerts.sh /usr/local/bin/start-notebook.d/addcerts.sh # REBUILD JUPYTER AND CLEANUP RUN jupyter lab build --dev-build=False --minimize=True && \