Skip to content

Commit

Permalink
Fixed python packages install issue with pip, switched to conda install
Browse files Browse the repository at this point in the history
  • Loading branch information
sandwi committed Nov 27, 2021
1 parent bf65b96 commit 39d809a
Showing 1 changed file with 74 additions and 5 deletions.
79 changes: 74 additions & 5 deletions ml-models/pacs008/xgboost/pacs008_xgboost_local.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,75 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install nltk\n",
"!pip install xgboost"
"#!pip install nltk\n",
"!conda install nltk -y\n",
"#!pip install xgboost\n",
"!conda install xgboost -y"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download Labeled Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import boto3\n",
"import sagemaker\n",
"from sagemaker import get_execution_role\n",
"\n",
"sm_client = boto3.Session().client('sagemaker')\n",
"sm_session = sagemaker.Session()\n",
"region = boto3.session.Session().region_name\n",
"\n",
"role = get_execution_role()\n",
"print (\"Notebook is running with assumed role {}\".format (role))\n",
"print(\"Working with AWS services in the {} region\".format(region))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Working directory for the notebook\n",
"WORKDIR = os.getcwd()\n",
"BASENAME = os.path.dirname(WORKDIR)\n",
"print(f\"WORKDIR: {WORKDIR}\")\n",
"print(f\"BASENAME: {BASENAME}\")\n",
"\n",
"# Create a directory storing local data\n",
"iso20022_data_path = 'iso20022-data'\n",
"if not os.path.exists(iso20022_data_path):\n",
" # Create a new directory because it does not exist \n",
" os.makedirs(iso20022_data_path)\n",
"\n",
"# Store all prototype assets in this bucket\n",
"s3_bucket_name = 'iso20022-prototype-t3'\n",
"s3_bucket_uri = 's3://' + s3_bucket_name\n",
"\n",
"# Prefix for all files in this prototype\n",
"prefix = 'iso20022'\n",
"\n",
"pacs008_prefix = prefix + '/pacs008'\n",
"raw_data_prefix = pacs008_prefix + '/raw-data'\n",
"labeled_data_prefix = pacs008_prefix + '/labeled-data'\n",
"\n",
"\n",
"labeled_data_location = s3_bucket_uri + '/' + labeled_data_prefix\n",
"print(f\"Raw labeled data location = {labeled_data_location}\")\n",
"\n",
"# Download labeled raw dataset from S3\n",
"s3_client = boto3.client('s3')\n",
"s3_client.download_file(s3_bucket_name, labeled_data_prefix + '/labeled_data.csv', 'iso20022-data/labeled_data.csv')"
]
},
{
Expand All @@ -62,6 +129,7 @@
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import nltk\n",
"nltk.download('stopwords')\n",
"from nltk.corpus import stopwords\n",
"import string\n",
"import xgboost as xgb\n",
Expand Down Expand Up @@ -1358,10 +1426,11 @@
}
],
"metadata": {
"instance_type": "ml.t3.medium",
"kernelspec": {
"display_name": "conda_python3",
"display_name": "Python 3 (Data Science)",
"language": "python",
"name": "conda_python3"
"name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1373,7 +1442,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
"version": "3.7.10"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 39d809a

Please sign in to comment.