diff --git a/experiments/duckdb_census_of_population.ipynb b/experiments/duckdb_census_of_population.ipynb index 75c7202..e9881f6 100644 --- a/experiments/duckdb_census_of_population.ipynb +++ b/experiments/duckdb_census_of_population.ipynb @@ -2,18 +2,28 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 36, "id": "56ac906e", "metadata": {}, "outputs": [], "source": [ + "import buckaroo\n", "import duckdb\n", - "import pandas as pd" + "from IPython.core.interactiveshell import InteractiveShell\n", + "from lonboard import viz\n", + "import pandas as pd\n", + "\n", + "# Enable multiple outputs per cell\n", + "InteractiveShell.ast_node_interactivity = \"all\"\n", + "# Show all columns\n", + "pd.set_option('display.max_columns', None)\n", + "\n", + "output_data_folder = '/data/experiments'" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 2, "id": "708e293d", "metadata": {}, "outputs": [], @@ -28,36 +38,23 @@ "id": "5d97e882", "metadata": {}, "source": [ - "# DA" + "# 1.0 Total private dwellings and private dwellings per square kilometer for Ottawa\n", + "These values are from the 2021 Census of Population" ] }, { "cell_type": "code", - "execution_count": null, - "id": "c1b2ccf6", + "execution_count": 28, + "id": "580c82ad-f64d-439f-9055-2307fdf7cccd", "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b2fd24f9af2c45369a4aa93b249227c0", - "version_major": 2, - "version_minor": 0 - }, "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + "" ] }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 19, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -66,111 +63,41 @@ "con.execute(\"\"\"\n", "DROP TABLE IF EXISTS geo_data;\n", "CREATE TABLE geo_data AS\n", - "SELECT da.da_dguid, da_cop.* EXCLUDE dguid, da.geom FROM 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS da_cop,\n", - "'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS da\n", - "WHERE da_cop.dguid = da.da_dguid\n", + "SELECT geo.da_dguid, cop.count_total_4, CAST(round((cop.count_total_1 / (ST_Area_Spheroid(geo.geom) / 1000000.0)), 0) AS INTEGER) AS count_total_4_per_square_km, geo.geom \n", + "FROM 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n", + "'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n", + "WHERE geo.csd_name in ('Ottawa') AND cop.da_dguid = geo.da_dguid;\n", "\"\"\")" ] }, { - "cell_type": "code", - "execution_count": 26, - "id": "33a3819e", + "cell_type": "markdown", + "id": "c95fdadf-3dba-4328-8e1d-2a22baa3d293", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(57936,)]" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "con.execute(\"SELECT count(*) FROM geo_data;\")\n", - "con.fetchall()" + "## 1.1 Export result as a GeoJSON" ] }, { "cell_type": "code", - "execution_count": 20, - "id": "6d38118b", + "execution_count": 37, + "id": "0528eeb4-fb89-4640-94c6-c0eda4cbe764", "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ee1b19d9ad3c4a9483ba3cef3600ba8e", - "version_major": 2, - "version_minor": 0 - }, "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + "" ] }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 20, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "con.execute(\"\"\"\n", - "COPY geo_data TO 'da_2021_cop.gdb'\n", - "WITH (\n", - " FORMAT GDAL,\n", - " DRIVER 'OpenFileGDB',\n", - " GEOMETRY_TYPE 'POLYGON',\n", - " SRS 'EPSG:4326'\n", - ");\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "76ada8f9", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e4d6cb52cb864a0cac1941bb25315b79", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.execute(\"\"\"\n", - "COPY geo_data TO 'da_2021_cop.geojson'\n", + "con.execute(f\"\"\"\n", + "COPY geo_data TO '{output_data_folder}/da_2021_private_dwellings.geojson'\n", "WITH (\n", " FORMAT GDAL,\n", " DRIVER 'GeoJSON',\n", @@ -180,40 +107,71 @@ "\"\"\")" ] }, + { + "cell_type": "markdown", + "id": "34ac7bd0-5068-4681-9405-ebe68f8dcf28", + "metadata": {}, + "source": [ + "## 1.2 Export result as file geodatabase" + ] + }, { "cell_type": "code", - "execution_count": 10, - "id": "c32ab0f8", + "execution_count": 38, + "id": "6d38118b", "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c226f348d3f14c0abbeb2bfa907a7db6", - "version_major": 2, - "version_minor": 0 - }, "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + "" ] }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "con.execute(\"\"\"\n", - "COPY geo_data TO 'da_2021_cop_geom.parquet' (FORMAT PARQUET);\n", + "con.execute(f\"\"\"\n", + "COPY geo_data TO '{output_data_folder}/da_2021_private_dwellings.gdb'\n", + "WITH (\n", + " FORMAT GDAL,\n", + " DRIVER 'OpenFileGDB',\n", + " GEOMETRY_TYPE 'POLYGON',\n", + " SRS 'EPSG:4326'\n", + ");\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "id": "7d57086e-31d5-4462-803d-38e60b74a043", + "metadata": {}, + "source": [ + "## 1.3 Export result as GeoParquet" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c32ab0f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "con.execute(f\"\"\"\n", + "COPY geo_data TO '{output_data_folder}/da_2021_private_dwellings.parquet' (FORMAT PARQUET);\n", "\"\"\")" ] } @@ -234,7 +192,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.12.3" } }, "nbformat": 4,