From dae8401b1d998d84c6b226a78ec2c93803c9e41b Mon Sep 17 00:00:00 2001 From: discdiver Date: Thu, 30 Apr 2020 23:14:16 -0400 Subject: [PATCH 1/6] update python notebook example to use more common pandas methods and built in plotting option --- api-guide/ids-api-guide-python-2.ipynb | 749 +++++++++++++++++++------ 1 file changed, 592 insertions(+), 157 deletions(-) diff --git a/api-guide/ids-api-guide-python-2.ipynb b/api-guide/ids-api-guide-python-2.ipynb index e1e5c82..5b9d214 100644 --- a/api-guide/ids-api-guide-python-2.ipynb +++ b/api-guide/ids-api-guide-python-2.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "# Accessing International Debt Statistics (IDS) through World Bank Data API\n", - "## Part 2 - Get and explore data\n", + "## Part 2 - Retrieve, explore, and visualize data\n", "### *Python 3*" ] }, @@ -13,7 +13,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Date: 04/15/2020
\n", + "Date: 04/15/2020, Updated 04/30/2020\n", + "\n", "[Data source](https://datacatalog.worldbank.org/dataset/international-debt-statistics)
\n", "[Developer Information](https://datahelpdesk.worldbank.org/knowledgebase/topics/125589-developer-information)
\n", "[GitHub](https://github.com/worldbank/debt-data/tree/master/api-guide)" @@ -43,25 +44,21 @@ "- [numpy](https://www.numpy.org/): Data analysis\n", "- [datetime](https://docs.python.org/3.5/library/datetime.html): Parsing dates\n", "- [wbdata](https://wbdata.readthedocs.io/en/latest/#): World Bank Data API wrapper\n", - "- [plotly](https://plot.ly/python/): Data visualization\n", "\n", "Then, open up your preferred mode of writing Python. This could be in a [Jupyter Notebook](https://jupyter.org/) using [Jupyter Lab](https://blog.jupyter.org/jupyterlab-is-ready-for-users-5a6f039b8906), using a code editor (like Atom or Visual Studio) + command line, or just from the command line. Now follow the rest of the steps below to retreive and analyze the World Bank data." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ - "# Importing packages\n", + "# Import packages\n", "import pandas as pd\n", "import numpy as np\n", "import datetime\n", - "import wbdata\n", - "import plotly.express as px\n", - "import plotly.io as pio\n", - "pio.renderers.default = \"notebook\" # use \"pio.renderers\" to see the default renderer" + "import wbdata" ] }, { @@ -86,11 +83,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "# Selecting the indicator\n", + "# Select the indicator\n", "indicatorSelection = {\"DT.DOD.DLXF.CD\":\"ExternalDebtStock\"}" ] }, @@ -122,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -135,12 +132,12 @@ "metadata": {}, "source": [ "#### Time\n", - "Here you will select the time frame for your data series. The format for the date is year, month, day. We are selecting data from 2009 to 2018." + "Select the time frame for your data series. The format for the date is year, month, day. We are selecting data from 2009 through 2018." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -158,16 +155,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " id name\n", - "---- -----------------------------\n", - " 6 International Debt Statistics\n" + "6\tInternational Debt Statistics\n" ] } ], @@ -186,18 +181,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this step, we will retrieve the data using the World Bank Data API call. The package \"wbdata,\" created by Oliver Sherouse, can request information from the World Bank database as a dictionary containing full metadata or as a pandas DataFrame. In this example, we will request the data, with the parameters outlined above, as a pandas DataFrame." + "In this step, we will retrieve the data using the World Bank Data API call. The package [wbdata](https://wbdata.readthedocs.io/en/stable/) created by Oliver Sherouse, can request information from the World Bank database as a dictionary containing full metadata or as a pandas DataFrame. In this example, we will request the data, with the parameters outlined above, as a pandas DataFrame." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "# Making the API call and assigning the resulting DataFrame to \"EXD\"\n", "EXD = wbdata.get_dataframe(indicatorSelection,\n", - " source=IDS,\n", + " # source=IDS, # Jeff says had to uncomment\n", " country = locationSelection, \n", " data_date = timeSelection, \n", " convert_date = False)" @@ -207,31 +202,98 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "If you want a quick preview of your freshly retrieved DataFrame, you can print the first 5 lines" + "For a quick preview of your freshly retrieved DataFrame, let's print the first 5 lines with the `.head()` method." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 116, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " ExternalDebtStock\n", - "country date \n", - "East Asia & Pacific (excluding high income) 2018 1.391850e+12\n", - " 2017 1.285327e+12\n", - " 2016 1.172696e+12\n", - " 2015 1.036149e+12\n", - " 2014 1.040363e+12\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ExternalDebtStock
countrydate
East Asia & Pacific (excluding high income)20181.404732e+12
20171.287079e+12
20161.170963e+12
20151.037230e+12
20141.039942e+12
\n", + "
" + ], + "text/plain": [ + " ExternalDebtStock\n", + "country date \n", + "East Asia & Pacific (excluding high income) 2018 1.404732e+12\n", + " 2017 1.287079e+12\n", + " 2016 1.170963e+12\n", + " 2015 1.037230e+12\n", + " 2014 1.039942e+12" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# Print the first 5 lines of the DataFrame\n", - "print(EXD.head())" + "EXD.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "# Make a copy of our DataFrame so we don't need to call the API again if we want to fix something.\n", + "EXD_df = EXD.copy()" ] }, { @@ -256,35 +318,214 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 118, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countrydateExternalDebtStock
0East Asia & Pacific (excluding high income)20181.404732e+12
1East Asia & Pacific (excluding high income)20171.287079e+12
2East Asia & Pacific (excluding high income)20161.170963e+12
3East Asia & Pacific (excluding high income)20151.037230e+12
4East Asia & Pacific (excluding high income)20141.039942e+12
\n", + "
" + ], + "text/plain": [ + " country date ExternalDebtStock\n", + "0 East Asia & Pacific (excluding high income) 2018 1.404732e+12\n", + "1 East Asia & Pacific (excluding high income) 2017 1.287079e+12\n", + "2 East Asia & Pacific (excluding high income) 2016 1.170963e+12\n", + "3 East Asia & Pacific (excluding high income) 2015 1.037230e+12\n", + "4 East Asia & Pacific (excluding high income) 2014 1.039942e+12" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EXD_df = EXD_df.reset_index()\n", + "EXD_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 60 entries, 0 to 59\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 country 60 non-null object \n", + " 1 date 60 non-null object \n", + " 2 ExternalDebtStock 60 non-null float64\n", + "dtypes: float64(1), object(2)\n", + "memory usage: 1.5+ KB\n" + ] + } + ], "source": [ - "# Reshape the data\n", - "EXDreshaped = pd.DataFrame(EXD.to_records())" + "EXD_df.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The data for the long-term external debt stock is currently in units. To improve a table's or chart's readability, convert the units to billions and round the number to 0 decimal places. To do this, create a function called \"formatNum\" that you can then run on your DataFrame." + "The data for the long-term external debt stock is currently in units. To improve a table's or chart's readability, let's convert the units to billions and round the number to 0 decimal places. " ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ - "# Creating a function that will change units to billions and round to 0 decimal places\n", - "def formatNum(x):\n", - " y = x/1000000000\n", - " z = round(y)\n", - " return(z)\n", - "\n", - "# Running the function on the desired data column\n", - "EXDreshaped.ExternalDebtStock = formatNum(EXDreshaped.ExternalDebtStock)" + "# Change units to billions and round to 0 decimal places\n", + "EXD_df['ExternalDebtStock'] = round(EXD_df['ExternalDebtStock']/1_000_000_000, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countrydateExternalDebtStock
0East Asia & Pacific (excluding high income)20181405.0
1East Asia & Pacific (excluding high income)20171287.0
2East Asia & Pacific (excluding high income)20161171.0
3East Asia & Pacific (excluding high income)20151037.0
4East Asia & Pacific (excluding high income)20141040.0
\n", + "
" + ], + "text/plain": [ + " country date ExternalDebtStock\n", + "0 East Asia & Pacific (excluding high income) 2018 1405.0\n", + "1 East Asia & Pacific (excluding high income) 2017 1287.0\n", + "2 East Asia & Pacific (excluding high income) 2016 1171.0\n", + "3 East Asia & Pacific (excluding high income) 2015 1037.0\n", + "4 East Asia & Pacific (excluding high income) 2014 1040.0" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EXD_df.head()" ] }, { @@ -296,25 +537,116 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 122, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RegionYearExternalDebtStock
0East Asia & Pacific (excluding high income)20181405.0
1East Asia & Pacific (excluding high income)20171287.0
2East Asia & Pacific (excluding high income)20161171.0
3East Asia & Pacific (excluding high income)20151037.0
4East Asia & Pacific (excluding high income)20141040.0
\n", + "
" + ], + "text/plain": [ + " Region Year ExternalDebtStock\n", + "0 East Asia & Pacific (excluding high income) 2018 1405.0\n", + "1 East Asia & Pacific (excluding high income) 2017 1287.0\n", + "2 East Asia & Pacific (excluding high income) 2016 1171.0\n", + "3 East Asia & Pacific (excluding high income) 2015 1037.0\n", + "4 East Asia & Pacific (excluding high income) 2014 1040.0" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Renaming column headers\n", - "EXDclean = EXDreshaped.rename(index=str, columns={\n", - " \"date\":\"Year\",\n", - " \"country\":\"Region\",\n", - "})" + "# Rename column headers\n", + "EXD_df.columns=['Region', 'Year', 'ExternalDebtStock']\n", + "EXD_df.head()" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 123, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Sub-Saharan Africa 10\n", + "South Asia 10\n", + "Middle East & North Africa 10\n", + "East Asia & Pacific 10\n", + "Europe & Central Asia 10\n", + "Latin America & Caribbean 10\n", + "Name: Region, dtype: int64" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Remove the \"(excluding high income)\" from each of the region names\n", - "EXDclean[\"Region\"] = EXDclean[\"Region\"].str.replace(\"excluding high income\",\"\").str.replace(\")\",\"\").str.replace(\"(\",\"\")" + "# Remove \"(excluding high income)\" from each of the region names\n", + "EXD_df[\"Region\"] = EXD_df[\"Region\"].str.replace(\"\\(excluding high income\\)\",\"\")\n", + "EXD_df['Region'].value_counts()" ] }, { @@ -326,135 +658,238 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 124, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " Region Year ExternalDebtStock\n", - "0 East Asia & Pacific 2018 1392.0\n", - "1 East Asia & Pacific 2017 1285.0\n", - "2 East Asia & Pacific 2016 1173.0\n", - "3 East Asia & Pacific 2015 1036.0\n", - "4 East Asia & Pacific 2014 1040.0\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RegionYearExternalDebtStock
0East Asia & Pacific20181405.0
1East Asia & Pacific20171287.0
2East Asia & Pacific20161171.0
3East Asia & Pacific20151037.0
4East Asia & Pacific20141040.0
\n", + "
" + ], + "text/plain": [ + " Region Year ExternalDebtStock\n", + "0 East Asia & Pacific 2018 1405.0\n", + "1 East Asia & Pacific 2017 1287.0\n", + "2 East Asia & Pacific 2016 1171.0\n", + "3 East Asia & Pacific 2015 1037.0\n", + "4 East Asia & Pacific 2014 1040.0" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(EXDclean.head())" + "EXD_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Data Visualization\n", - "\n", - "Now use the package \"plotly\" to create a basic line graph of the regional trends in long-term external debt stock." + "Make the `Year` column the index and make it a datetime dtype." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 125, "metadata": {}, "outputs": [ { "data": { "text/html": [ - " \n", - " " + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RegionYearExternalDebtStock
Year
2009-01-01Latin America & Caribbean2009747.0
2009-01-01South Asia2009293.0
2009-01-01Middle East & North Africa2009142.0
2009-01-01Europe & Central Asia2009950.0
2009-01-01East Asia & Pacific2009494.0
\n", + "
" + ], + "text/plain": [ + " Region Year ExternalDebtStock\n", + "Year \n", + "2009-01-01 Latin America & Caribbean 2009 747.0\n", + "2009-01-01 South Asia 2009 293.0\n", + "2009-01-01 Middle East & North Africa 2009 142.0\n", + "2009-01-01 Europe & Central Asia 2009 950.0\n", + "2009-01-01 East Asia & Pacific 2009 494.0" ] }, + "execution_count": 125, "metadata": {}, - "output_type": "display_data" - }, + "output_type": "execute_result" + } + ], + "source": [ + "EXD_df.index = pd.to_datetime(EXD_df['Year'])\n", + "EXD_df = EXD_df.sort_index()\n", + "EXD_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Data Visualization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's make a basic line graph with pandas (using Matplotlib as the backend plotting engine). " + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ { "data": { - "text/html": [ - "
\n", - " \n", - " \n", - "
\n", - " \n", - "
" + "image/png": "\n", + "text/plain": [ + "
" ] }, - "metadata": {}, + "metadata": { + "needs_background": "light" + }, "output_type": "display_data" } ], "source": [ - "# Defining the data source\n", - "source = EXDclean\n", + "EXD_df.groupby('Region')['ExternalDebtStock'].plot(\n", + " kind='line', \n", + " legend='Region',\n", + " figsize=(10, 8),\n", + " title=\"Regional Long-term External Debt Stock (excluding High-Income countries)(USD billion)\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Summary \n", "\n", - "# Creating the chart\n", - "chart = px.line(EXDclean, \n", - " x=\"Year\",\n", - " y=\"ExternalDebtStock\",\n", - " color=\"Region\",\n", - " title=\"Regional Long-term External Debt Stock (excluding High-Income countries)(USD billion)\")\n", - "chart.update_layout(\n", - " plot_bgcolor=\"white\")\n", + "You've seen how to retrieve data from the World Bank API and use it to make a visualization! \n", "\n", - "# Displaying the chart\n", - "chart" + "We can't wait to see what interesting insights you can uncover with data from World Bank! 🌍" ] } ], @@ -474,9 +909,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.7" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From f464b06484b2a920538dcf1fb2db84725b28f5d6 Mon Sep 17 00:00:00 2001 From: discdiver Date: Thu, 30 Apr 2020 23:24:20 -0400 Subject: [PATCH 2/6] remove source argument from api call so that is doesn't error --- api-guide/ids-api-guide-python-2.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/api-guide/ids-api-guide-python-2.ipynb b/api-guide/ids-api-guide-python-2.ipynb index 5b9d214..814d7af 100644 --- a/api-guide/ids-api-guide-python-2.ipynb +++ b/api-guide/ids-api-guide-python-2.ipynb @@ -192,7 +192,6 @@ "source": [ "# Making the API call and assigning the resulting DataFrame to \"EXD\"\n", "EXD = wbdata.get_dataframe(indicatorSelection,\n", - " # source=IDS, # Jeff says had to uncomment\n", " country = locationSelection, \n", " data_date = timeSelection, \n", " convert_date = False)" From 204ba2c71d1533f339733ffce7ccf43ce8196d6c Mon Sep 17 00:00:00 2001 From: discdiver Date: Fri, 1 May 2020 07:30:01 -0400 Subject: [PATCH 3/6] add package versions and edit setup --- api-guide/ids-api-guide-python-2.ipynb | 43 ++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/api-guide/ids-api-guide-python-2.ipynb b/api-guide/ids-api-guide-python-2.ipynb index 814d7af..3db050a 100644 --- a/api-guide/ids-api-guide-python-2.ipynb +++ b/api-guide/ids-api-guide-python-2.ipynb @@ -13,7 +13,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Date: 04/15/2020, Updated 04/30/2020\n", + "Date: 04/15/2020, Updated 05/01/2020\n", "\n", "[Data source](https://datacatalog.worldbank.org/dataset/international-debt-statistics)
\n", "[Developer Information](https://datahelpdesk.worldbank.org/knowledgebase/topics/125589-developer-information)
\n", @@ -28,10 +28,10 @@ "\n", "The following code in this guide will show step-by-step how to:\n", "\n", - "1. Setup up your environment with the needed packages\n", - "2. Input your data specifications (as selected in [Part 1](https://worldbank.github.io/debt-data/api-guide/ids-api-guide-python-1.html))\n", - "3. Use the World Bank Data API call to return the specified data\n", - "4. Explore the data through basic descriptive analysis and create a pretty chart." + "1. Setup up your environment with the needed packages.\n", + "2. Input your data specifications, as selected in [Part 1](https://worldbank.github.io/debt-data/api-guide/ids-api-guide-python-1.html).\n", + "3. Use the World Bank Data API to return the specified data\n", + "4. Explore the data through basic descriptive analysis and create a chart." ] }, { @@ -40,27 +40,52 @@ "source": [ "## 1. Setup\n", "To start, make sure you have the following packages installed on your machine. If you aren't familiar with how to install a Python package, visit each of the linked packages below for instructions.\n", + "\n", "- [pandas](https://pandas.pydata.org/): Data analysis\n", "- [numpy](https://www.numpy.org/): Data analysis\n", "- [datetime](https://docs.python.org/3.5/library/datetime.html): Parsing dates\n", "- [wbdata](https://wbdata.readthedocs.io/en/latest/#): World Bank Data API wrapper\n", "\n", - "Then, open up your preferred mode of writing Python. This could be in a [Jupyter Notebook](https://jupyter.org/) using [Jupyter Lab](https://blog.jupyter.org/jupyterlab-is-ready-for-users-5a6f039b8906), using a code editor (like Atom or Visual Studio) + command line, or just from the command line. Now follow the rest of the steps below to retreive and analyze the World Bank data." + "Then, open your preferred mode of writing Python. This could be in a [Jupyter notebook](https://jupyter.org/) with or without [JupyterLab](https://jupyterlab.readthedocs.io/en/stable/) or in a code editor or IDE such as [Atom](https://atom.io/), [Visual Studio](https://visualstudio.microsoft.com/), or [PyCharm](https://www.jetbrains.com/pycharm/). \n", + "\n", + "Next, follow the rest of the steps below to retrieve and visualize the World Bank data." ] }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 143, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", - "import pandas as pd\n", - "import numpy as np\n", "import datetime\n", + "import pandas as pd \n", + "import numpy as np\n", "import wbdata" ] }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pandas version: 1.0.3\n", + "NumPy version: 1.18.1\n", + "wbdata version: 0.2.7\n" + ] + } + ], + "source": [ + "# Package versions used in most recent code update (for compatibility)\n", + "print(f\"pandas version: {pd.__version__}\")\n", + "print(f\"NumPy version: {np.__version__}\")\n", + "print(f\"wbdata version: {wbdata.__version__}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, From 105a75f9821774d889bd5c31442e47b2fb8df505 Mon Sep 17 00:00:00 2001 From: discdiver Date: Fri, 1 May 2020 07:51:02 -0400 Subject: [PATCH 4/6] improve readability with edits and switching order --- api-guide/ids-api-guide-python-2.ipynb | 462 +++++++++++++++++-------- 1 file changed, 324 insertions(+), 138 deletions(-) diff --git a/api-guide/ids-api-guide-python-2.ipynb b/api-guide/ids-api-guide-python-2.ipynb index 3db050a..d393495 100644 --- a/api-guide/ids-api-guide-python-2.ipynb +++ b/api-guide/ids-api-guide-python-2.ipynb @@ -48,12 +48,12 @@ "\n", "Then, open your preferred mode of writing Python. This could be in a [Jupyter notebook](https://jupyter.org/) with or without [JupyterLab](https://jupyterlab.readthedocs.io/en/stable/) or in a code editor or IDE such as [Atom](https://atom.io/), [Visual Studio](https://visualstudio.microsoft.com/), or [PyCharm](https://www.jetbrains.com/pycharm/). \n", "\n", - "Next, follow the rest of the steps below to retrieve and visualize the World Bank data." + "Next, follow the steps below to retrieve and visualize the World Bank data." ] }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -108,11 +108,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "# Select the indicator\n", "indicatorSelection = {\"DT.DOD.DLXF.CD\":\"ExternalDebtStock\"}" ] }, @@ -144,11 +143,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "# Select the countries or regions\n", "locationSelection = [\"ECA\",\"SSA\",\"SAS\",\"LAC\",\"MNA\",\"EAP\"]" ] }, @@ -162,11 +160,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "# Selecting the time frame\n", "timeSelection = (datetime.datetime(2009, 1, 1), datetime.datetime(2018, 12, 31))" ] }, @@ -180,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -206,16 +203,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this step, we will retrieve the data using the World Bank Data API call. The package [wbdata](https://wbdata.readthedocs.io/en/stable/) created by Oliver Sherouse, can request information from the World Bank database as a dictionary containing full metadata or as a pandas DataFrame. In this example, we will request the data, with the parameters outlined above, as a pandas DataFrame." + "In this step, we will retrieve the data using the World Bank Data API call. \n", + "\n", + "The package [wbdata](https://wbdata.readthedocs.io/en/stable/) created by Oliver Sherouse, can request information from the World Bank database as a dictionary containing full metadata or as a pandas DataFrame. \n", + "\n", + "In this example, we will request the data, with the parameters outlined above, and save it as a pandas DataFrame named `EXD`." ] }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# Making the API call and assigning the resulting DataFrame to \"EXD\"\n", "EXD = wbdata.get_dataframe(indicatorSelection,\n", " country = locationSelection, \n", " data_date = timeSelection, \n", @@ -231,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -300,23 +300,28 @@ " 2014 1.039942e+12" ] }, - "execution_count": 116, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Print the first 5 lines of the DataFrame\n", "EXD.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's make a copy of our DataFrame so we don't need to call the API again if we want to fix something." + ] + }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "# Make a copy of our DataFrame so we don't need to call the API again if we want to fix something.\n", "EXD_df = EXD.copy()" ] }, @@ -337,12 +342,19 @@ "metadata": {}, "source": [ "#### Data Cleaning\n", - "As you saw in the preview of the data in section 3, the DataFrame's format needs to be cleaned up. We want to reshape the data. This will get it ready to present in a table or in a visualization." + "As you saw in the preview of the data in section 3, the DataFrame's format needs to be cleaned up. We want to reshape the data by moving the hierarchical index into the columns and make the `date` column the index. These changes will make the data ready to present in a table or in a visualization." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's move the index into the columns first." ] }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -415,19 +427,26 @@ "4 East Asia & Pacific (excluding high income) 2014 1.039942e+12" ] }, - "execution_count": 118, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "EXD_df = EXD_df.reset_index()\n", - "EXD_df.head()" + "EXD_cleaned = EXD_df.reset_index()\n", + "EXD_cleaned.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Basic statistics" ] }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -448,29 +467,12 @@ } ], "source": [ - "EXD_df.info()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The data for the long-term external debt stock is currently in units. To improve a table's or chart's readability, let's convert the units to billions and round the number to 0 decimal places. " + "EXD_cleaned.info()" ] }, { "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [], - "source": [ - "# Change units to billions and round to 0 decimal places\n", - "EXD_df['ExternalDebtStock'] = round(EXD_df['ExternalDebtStock']/1_000_000_000, 0)" - ] - }, - { - "cell_type": "code", - "execution_count": 121, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -501,67 +503,109 @@ " \n", " \n", " \n", - " 0\n", - " East Asia & Pacific (excluding high income)\n", - " 2018\n", - " 1405.0\n", + " count\n", + " 60\n", + " 60\n", + " 6.000000e+01\n", " \n", " \n", - " 1\n", - " East Asia & Pacific (excluding high income)\n", - " 2017\n", - " 1287.0\n", + " unique\n", + " 6\n", + " 10\n", + " NaN\n", " \n", " \n", - " 2\n", - " East Asia & Pacific (excluding high income)\n", - " 2016\n", - " 1171.0\n", + " top\n", + " Sub-Saharan Africa (excluding high income)\n", + " 2015\n", + " NaN\n", " \n", " \n", - " 3\n", - " East Asia & Pacific (excluding high income)\n", - " 2015\n", - " 1037.0\n", + " freq\n", + " 10\n", + " 6\n", + " NaN\n", " \n", " \n", - " 4\n", - " East Asia & Pacific (excluding high income)\n", - " 2014\n", - " 1040.0\n", + " mean\n", + " NaN\n", + " NaN\n", + " 7.196861e+11\n", + " \n", + " \n", + " std\n", + " NaN\n", + " NaN\n", + " 4.553943e+11\n", + " \n", + " \n", + " min\n", + " NaN\n", + " NaN\n", + " 1.417278e+11\n", + " \n", + " \n", + " 25%\n", + " NaN\n", + " NaN\n", + " 3.166273e+11\n", + " \n", + " \n", + " 50%\n", + " NaN\n", + " NaN\n", + " 5.567326e+11\n", + " \n", + " \n", + " 75%\n", + " NaN\n", + " NaN\n", + " 1.177590e+12\n", + " \n", + " \n", + " max\n", + " NaN\n", + " NaN\n", + " 1.534062e+12\n", " \n", " \n", "\n", "" ], "text/plain": [ - " country date ExternalDebtStock\n", - "0 East Asia & Pacific (excluding high income) 2018 1405.0\n", - "1 East Asia & Pacific (excluding high income) 2017 1287.0\n", - "2 East Asia & Pacific (excluding high income) 2016 1171.0\n", - "3 East Asia & Pacific (excluding high income) 2015 1037.0\n", - "4 East Asia & Pacific (excluding high income) 2014 1040.0" + " country date ExternalDebtStock\n", + "count 60 60 6.000000e+01\n", + "unique 6 10 NaN\n", + "top Sub-Saharan Africa (excluding high income) 2015 NaN\n", + "freq 10 6 NaN\n", + "mean NaN NaN 7.196861e+11\n", + "std NaN NaN 4.553943e+11\n", + "min NaN NaN 1.417278e+11\n", + "25% NaN NaN 3.166273e+11\n", + "50% NaN NaN 5.567326e+11\n", + "75% NaN NaN 1.177590e+12\n", + "max NaN NaN 1.534062e+12" ] }, - "execution_count": 121, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "EXD_df.head()" + "EXD_cleaned.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "These next two sections of code will clean up the naming of headers and regions. First, it will rename the column headers. Second, it will remove the redundant \"(excluding high income)\" from the region names. We can instead include that information in the title of the chart." + "Let's improve the column names." ] }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -595,31 +639,31 @@ " 0\n", " East Asia & Pacific (excluding high income)\n", " 2018\n", - " 1405.0\n", + " 1.404732e+12\n", " \n", " \n", " 1\n", " East Asia & Pacific (excluding high income)\n", " 2017\n", - " 1287.0\n", + " 1.287079e+12\n", " \n", " \n", " 2\n", " East Asia & Pacific (excluding high income)\n", " 2016\n", - " 1171.0\n", + " 1.170963e+12\n", " \n", " \n", " 3\n", " East Asia & Pacific (excluding high income)\n", " 2015\n", - " 1037.0\n", + " 1.037230e+12\n", " \n", " \n", " 4\n", " East Asia & Pacific (excluding high income)\n", " 2014\n", - " 1040.0\n", + " 1.039942e+12\n", " \n", " \n", "\n", @@ -627,62 +671,150 @@ ], "text/plain": [ " Region Year ExternalDebtStock\n", - "0 East Asia & Pacific (excluding high income) 2018 1405.0\n", - "1 East Asia & Pacific (excluding high income) 2017 1287.0\n", - "2 East Asia & Pacific (excluding high income) 2016 1171.0\n", - "3 East Asia & Pacific (excluding high income) 2015 1037.0\n", - "4 East Asia & Pacific (excluding high income) 2014 1040.0" + "0 East Asia & Pacific (excluding high income) 2018 1.404732e+12\n", + "1 East Asia & Pacific (excluding high income) 2017 1.287079e+12\n", + "2 East Asia & Pacific (excluding high income) 2016 1.170963e+12\n", + "3 East Asia & Pacific (excluding high income) 2015 1.037230e+12\n", + "4 East Asia & Pacific (excluding high income) 2014 1.039942e+12" ] }, - "execution_count": 122, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Rename column headers\n", - "EXD_df.columns=['Region', 'Year', 'ExternalDebtStock']\n", - "EXD_df.head()" + "EXD_cleaned.columns=['Region', 'Year', 'ExternalDebtStock']\n", + "EXD_cleaned.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make the `Year` column the index, make it a datetime dtype, and sort it. This will improve the appearance of the DataFrame and make it possible to use pandas plotting methods to visualize the data in a line chart later." ] }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RegionYearExternalDebtStock
Year
2009-01-01Latin America & Caribbean (excluding high income)20097.469656e+11
2009-01-01South Asia20092.933071e+11
2009-01-01Middle East & North Africa (excluding high inc...20091.417278e+11
2009-01-01Europe & Central Asia (excluding high income)20099.504303e+11
2009-01-01East Asia & Pacific (excluding high income)20094.944027e+11
\n", + "
" + ], "text/plain": [ - "Sub-Saharan Africa 10\n", - "South Asia 10\n", - "Middle East & North Africa 10\n", - "East Asia & Pacific 10\n", - "Europe & Central Asia 10\n", - "Latin America & Caribbean 10\n", - "Name: Region, dtype: int64" + " Region Year \\\n", + "Year \n", + "2009-01-01 Latin America & Caribbean (excluding high income) 2009 \n", + "2009-01-01 South Asia 2009 \n", + "2009-01-01 Middle East & North Africa (excluding high inc... 2009 \n", + "2009-01-01 Europe & Central Asia (excluding high income) 2009 \n", + "2009-01-01 East Asia & Pacific (excluding high income) 2009 \n", + "\n", + " ExternalDebtStock \n", + "Year \n", + "2009-01-01 7.469656e+11 \n", + "2009-01-01 2.933071e+11 \n", + "2009-01-01 1.417278e+11 \n", + "2009-01-01 9.504303e+11 \n", + "2009-01-01 4.944027e+11 " ] }, - "execution_count": 123, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Remove \"(excluding high income)\" from each of the region names\n", - "EXD_df[\"Region\"] = EXD_df[\"Region\"].str.replace(\"\\(excluding high income\\)\",\"\")\n", - "EXD_df['Region'].value_counts()" + "EXD_cleaned.index = pd.to_datetime(EXD_cleaned['Year'])\n", + "EXD_cleaned = EXD_cleaned.sort_index()\n", + "EXD_cleaned.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now our data should be ready to present in a table or visualize in a chart. Let's take a look at the first five lines again so we can compare the cleaned up data to the raw output in section 3." + "The data for the long-term external debt stock is currently in units. To improve readability, let's convert the units to billions and round the number to 0 decimal places. " ] }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "EXD_cleaned['ExternalDebtStock'] = round(EXD_cleaned['ExternalDebtStock']/1_000_000_000, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -710,70 +842,119 @@ " Year\n", " ExternalDebtStock\n", " \n", + " \n", + " Year\n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " 0\n", - " East Asia & Pacific\n", - " 2018\n", - " 1405.0\n", + " 2009-01-01\n", + " Latin America & Caribbean (excluding high income)\n", + " 2009\n", + " 747.0\n", " \n", " \n", - " 1\n", - " East Asia & Pacific\n", - " 2017\n", - " 1287.0\n", + " 2009-01-01\n", + " South Asia\n", + " 2009\n", + " 293.0\n", " \n", " \n", - " 2\n", - " East Asia & Pacific\n", - " 2016\n", - " 1171.0\n", + " 2009-01-01\n", + " Middle East & North Africa (excluding high inc...\n", + " 2009\n", + " 142.0\n", " \n", " \n", - " 3\n", - " East Asia & Pacific\n", - " 2015\n", - " 1037.0\n", + " 2009-01-01\n", + " Europe & Central Asia (excluding high income)\n", + " 2009\n", + " 950.0\n", " \n", " \n", - " 4\n", - " East Asia & Pacific\n", - " 2014\n", - " 1040.0\n", + " 2009-01-01\n", + " East Asia & Pacific (excluding high income)\n", + " 2009\n", + " 494.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Region Year ExternalDebtStock\n", - "0 East Asia & Pacific 2018 1405.0\n", - "1 East Asia & Pacific 2017 1287.0\n", - "2 East Asia & Pacific 2016 1171.0\n", - "3 East Asia & Pacific 2015 1037.0\n", - "4 East Asia & Pacific 2014 1040.0" + " Region Year \\\n", + "Year \n", + "2009-01-01 Latin America & Caribbean (excluding high income) 2009 \n", + "2009-01-01 South Asia 2009 \n", + "2009-01-01 Middle East & North Africa (excluding high inc... 2009 \n", + "2009-01-01 Europe & Central Asia (excluding high income) 2009 \n", + "2009-01-01 East Asia & Pacific (excluding high income) 2009 \n", + "\n", + " ExternalDebtStock \n", + "Year \n", + "2009-01-01 747.0 \n", + "2009-01-01 293.0 \n", + "2009-01-01 142.0 \n", + "2009-01-01 950.0 \n", + "2009-01-01 494.0 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EXD_cleaned.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's remove the redundant \"(excluding high income)\" from the region names. We can include that information in the title of the chart instead." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Middle East & North Africa 10\n", + "East Asia & Pacific 10\n", + "Europe & Central Asia 10\n", + "Latin America & Caribbean 10\n", + "South Asia 10\n", + "Sub-Saharan Africa 10\n", + "Name: Region, dtype: int64" ] }, - "execution_count": 124, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "EXD_df.head()" + "EXD_cleaned[\"Region\"] = EXD_cleaned[\"Region\"].str.replace(\"\\(excluding high income\\)\",\"\")\n", + "EXD_cleaned['Region'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Make the `Year` column the index and make it a datetime dtype." + "Now our data should be ready to present in a table or visualize in a chart. Let's take a look at the first five rows again so we can compare the cleaned up data to the raw output in section 3." ] }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -853,15 +1034,13 @@ "2009-01-01 East Asia & Pacific 2009 494.0" ] }, - "execution_count": 125, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "EXD_df.index = pd.to_datetime(EXD_df['Year'])\n", - "EXD_df = EXD_df.sort_index()\n", - "EXD_df.head()" + "EXD_cleaned.head()" ] }, { @@ -880,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -897,7 +1076,7 @@ } ], "source": [ - "EXD_df.groupby('Region')['ExternalDebtStock'].plot(\n", + "EXD_cleaned.groupby('Region')['ExternalDebtStock'].plot(\n", " kind='line', \n", " legend='Region',\n", " figsize=(10, 8),\n", @@ -905,6 +1084,13 @@ ");" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Feel free to explore the data by modifying the chart or making your own!" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -913,7 +1099,7 @@ "\n", "You've seen how to retrieve data from the World Bank API and use it to make a visualization! \n", "\n", - "We can't wait to see what interesting insights you can uncover with data from World Bank! 🌍" + "We can't wait to see what interesting insights you uncover with data from World Bank! 🌍" ] } ], From 9c4552b74b9922eb4325a3c6ecbcab448fd37d49 Mon Sep 17 00:00:00 2001 From: discdiver Date: Fri, 1 May 2020 07:54:20 -0400 Subject: [PATCH 5/6] create .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6bed033 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.ipynb_checkpoints/ \ No newline at end of file From f6235a9506c5ae1bebda1bffd2e0d2388e34dad1 Mon Sep 17 00:00:00 2001 From: discdiver Date: Fri, 1 May 2020 07:59:55 -0400 Subject: [PATCH 6/6] create html version of second python notebook --- api-guide/ids-api-guide-python-2.html | 1020 ++++++++++++++++++++----- 1 file changed, 815 insertions(+), 205 deletions(-) diff --git a/api-guide/ids-api-guide-python-2.html b/api-guide/ids-api-guide-python-2.html index 9854e5f..6ea55ad 100644 --- a/api-guide/ids-api-guide-python-2.html +++ b/api-guide/ids-api-guide-python-2.html @@ -13017,45 +13017,6 @@ .highlight .vm { color: #19177C } /* Name.Variable.Magic */ .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExternalDebtStock
countrydate
East Asia & Pacific (excluding high income)20181.404732e+12
20171.287079e+12
20161.170963e+12
20151.037230e+12
20141.039942e+12
+ + + + + + + + + +
+
+
+

Let's make a copy of our DataFrame so we don't need to call the API again if we want to fix something.

+ +
+
+
+
In [9]:
+
+
+
EXD_df = EXD.copy()
+
+
@@ -13415,96 +13470,484 @@

4. Explore the data!
-

Data Cleaning

As you saw in the preview of the data in section 3, the DataFrame's format needs to be cleaned up. We want to reshape the data. This will get it ready to present in a table or in a visualization.

+

Data Cleaning

As you saw in the preview of the data in section 3, the DataFrame's format needs to be cleaned up. We want to reshape the data by moving the hierarchical index into the columns and make the date column the index. These changes will make the data ready to present in a table or in a visualization.

+ +
+
+

+
+
+
+

Let's move the index into the columns first.

-
In [8]:
+
In [10]:
-
# Reshape the data
-EXDreshaped = pd.DataFrame(EXD.to_records())
+
EXD_cleaned = EXD_df.reset_index()
+EXD_cleaned.head()
 
+
+
+ + +
+ +
Out[10]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
countrydateExternalDebtStock
0East Asia & Pacific (excluding high income)20181.404732e+12
1East Asia & Pacific (excluding high income)20171.287079e+12
2East Asia & Pacific (excluding high income)20161.170963e+12
3East Asia & Pacific (excluding high income)20151.037230e+12
4East Asia & Pacific (excluding high income)20141.039942e+12
+
+
+ +
+ +
+
+
-

The data for the long-term external debt stock is currently in units. To improve a table's or chart's readability, convert the units to billions and round the number to 0 decimal places. To do this, create a function called "formatNum" that you can then run on your DataFrame.

+

Basic statistics

+
+
+
+
+
+
In [11]:
+
+
+
EXD_cleaned.info()
+
+
+
+ +
+
+ + +
+ +
+ + +
+
<class 'pandas.core.frame.DataFrame'>
+RangeIndex: 60 entries, 0 to 59
+Data columns (total 3 columns):
+ #   Column             Non-Null Count  Dtype  
+---  ------             --------------  -----  
+ 0   country            60 non-null     object 
+ 1   date               60 non-null     object 
+ 2   ExternalDebtStock  60 non-null     float64
+dtypes: float64(1), object(2)
+memory usage: 1.5+ KB
+
+
+
+
+
+
-
In [9]:
+
In [12]:
-
# Creating a function that will change units to billions and round to 0 decimal places
-def formatNum(x):
-    y = x/1000000000
-    z = round(y)
-    return(z)
-
-# Running the function on the desired data column
-EXDreshaped.ExternalDebtStock = formatNum(EXDreshaped.ExternalDebtStock)
+
EXD_cleaned.describe(include='all')
 
+
+
+ + +
+ +
Out[12]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
countrydateExternalDebtStock
count60606.000000e+01
unique610NaN
topSub-Saharan Africa (excluding high income)2015NaN
freq106NaN
meanNaNNaN7.196861e+11
stdNaNNaN4.553943e+11
minNaNNaN1.417278e+11
25%NaNNaN3.166273e+11
50%NaNNaN5.567326e+11
75%NaNNaN1.177590e+12
maxNaNNaN1.534062e+12
+
+
+ +
+ +
+
+
-

These next two sections of code will clean up the naming of headers and regions. First, it will rename the column headers. Second, it will remove the redundant "(excluding high income)" from the region names. We can instead include that information in the title of the chart.

+

Let's improve the column names.

-
In [10]:
+
In [13]:
-
# Renaming column headers
-EXDclean = EXDreshaped.rename(index=str, columns={
-    "date":"Year",
-    "country":"Region",
-})
+
EXD_cleaned.columns=['Region', 'Year', 'ExternalDebtStock']
+EXD_cleaned.head()
 
+
+
+ + +
+ +
Out[13]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RegionYearExternalDebtStock
0East Asia & Pacific (excluding high income)20181.404732e+12
1East Asia & Pacific (excluding high income)20171.287079e+12
2East Asia & Pacific (excluding high income)20161.170963e+12
3East Asia & Pacific (excluding high income)20151.037230e+12
4East Asia & Pacific (excluding high income)20141.039942e+12
+
+
+ +
+ +
+
+ +
+
+
+
+

Make the Year column the index, make it a datetime dtype, and sort it. This will improve the appearance of the DataFrame and make it possible to use pandas plotting methods to visualize the data in a line chart later.

+ +
+
-
In [15]:
+
In [14]:
-
# Remove the "(excluding high income)" from each of the region names
-EXDclean["Region"] = EXDclean["Region"].str.replace("excluding high income","").str.replace(")","").str.replace("(","")
+
EXD_cleaned.index = pd.to_datetime(EXD_cleaned['Year'])
+EXD_cleaned = EXD_cleaned.sort_index()
+EXD_cleaned.head()
 
+
+
+ + +
+ +
Out[14]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RegionYearExternalDebtStock
Year
2009-01-01Latin America & Caribbean (excluding high income)20097.469656e+11
2009-01-01South Asia20092.933071e+11
2009-01-01Middle East & North Africa (excluding high inc...20091.417278e+11
2009-01-01Europe & Central Asia (excluding high income)20099.504303e+11
2009-01-01East Asia & Pacific (excluding high income)20094.944027e+11
+
+
+ +
+ +
+
+
-

Now our data should be ready to present in a table or visualize in a chart. Let's take a look at the first five lines again so we can compare the cleaned up data to the raw output in section 3.

+

The data for the long-term external debt stock is currently in units. To improve readability, let's convert the units to billions and round the number to 0 decimal places.

@@ -13514,7 +13957,20 @@

Data CleaningIn [20]:

-
print(EXDclean.head())
+
EXD_cleaned['ExternalDebtStock'] = round(EXD_cleaned['ExternalDebtStock']/1_000_000_000, 0)
+
+ +
+
+
+ +
+
-
In [13]:
+
In [17]:
-
# Defining the data source
-source = EXDclean
-
-# Creating the chart
-chart = px.line(EXDclean, 
-                x="Year",
-                y="ExternalDebtStock",
-                color="Region",
-                title="Regional Long-term External Debt Stock (excluding High-Income countries)(USD billion)")
-chart.update_layout(
-                plot_bgcolor="white")
-
-# Displaying the chart
-chart
+
EXD_cleaned["Region"] = EXD_cleaned["Region"].str.replace("\(excluding high income\)","")
+EXD_cleaned['Region'].value_counts()
 
@@ -13584,92 +14086,200 @@

Data Visualization -
+
Out[17]:
-
- - + +
+
Middle East & North Africa     10
+East Asia & Pacific            10
+Europe & Central Asia          10
+Latin America & Caribbean      10
+South Asia                     10
+Sub-Saharan Africa             10
+Name: Region, dtype: int64
+
+ +
+ +

+
+
+
+
+
+

Now our data should be ready to present in a table or visualize in a chart. Let's take a look at the first five rows again so we can compare the cleaned up data to the raw output in section 3.

+ +
+
+
+
+
+
In [18]:
+
+
+
EXD_cleaned.head()
+
+
+
+ +
+
+
-
+
Out[18]:
-
+
- - -
- -
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RegionYearExternalDebtStock
Year
2009-01-01Latin America & Caribbean2009747.0
2009-01-01South Asia2009293.0
2009-01-01Middle East & North Africa2009142.0
2009-01-01Europe & Central Asia2009950.0
2009-01-01East Asia & Pacific2009494.0
+
+
+
+
+
+
+
+
+
+

Let's make a basic line graph with pandas (using Matplotlib as the backend plotting engine).

+
+
+
+
+
+
In [19]:
+
+
+
EXD_cleaned.groupby('Region')['ExternalDebtStock'].plot(
+    kind='line', 
+    legend='Region',
+    figsize=(10, 8),
+    title="Regional Long-term External Debt Stock (excluding High-Income countries)(USD billion)"
+);
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

Feel free to explore the data by modifying the chart or making your own!

+ +
+
+
+
+
+
+

Summary

You've seen how to retrieve data from the World Bank API and use it to make a visualization!

+

We can't wait to see what interesting insights you uncover with data from World Bank! 🌍

+ +
+