From b7f8506708e68e52fe27f22deaa9186d57636899 Mon Sep 17 00:00:00 2001 From: Diego Ripley Date: Tue, 27 May 2025 15:22:10 +0000 Subject: [PATCH] Add DuckDB and lonboard example. Still need lots of work to get it right --- ...duckdb_census_of_population_lonboard.ipynb | 265 ++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 experiments/duckdb_census_of_population_lonboard.ipynb diff --git a/experiments/duckdb_census_of_population_lonboard.ipynb b/experiments/duckdb_census_of_population_lonboard.ipynb new file mode 100644 index 0000000..8b824f7 --- /dev/null +++ b/experiments/duckdb_census_of_population_lonboard.ipynb @@ -0,0 +1,265 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 330, + "id": "56ac906e", + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "import geopandas as gpd\n", + "import jenkspy\n", + "from lonboard import BitmapTileLayer, Map, PolygonLayer\n", + "from lonboard.colormap import apply_categorical_cmap\n", + "import numpy as np\n", + "import pyarrow as pa" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "708e293d", + "metadata": {}, + "outputs": [], + "source": [ + "con = duckdb.connect()\n", + "con.install_extension(\"spatial\")\n", + "con.load_extension(\"spatial\")" + ] + }, + { + "cell_type": "markdown", + "id": "5d97e882", + "metadata": {}, + "source": [ + "# 1.0 Total private dwellings and private dwellings per square kilometer for Ottawa\n", + "These values are from the 2021 Census of Population" + ] + }, + { + "cell_type": "code", + "execution_count": 345, + "id": "580c82ad-f64d-439f-9055-2307fdf7cccd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 345, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "con.execute(\"\"\"\n", + "DROP TABLE IF EXISTS geo_data;\n", + "CREATE TABLE geo_data AS\n", + "SELECT\n", + " geo.da_dguid,\n", + " cop.count_total_1,\n", + " cop.count_total_4,\n", + " cop.count_total_6,\n", + " cop.count_total_7,\n", + " ROUND(\n", + " (cop.count_total_4 / cop.count_total_7), 2\n", + " ) AS count_total_4_per_square_km,\n", + " geo.geom\n", + "FROM\n", + " 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n", + " 'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n", + "WHERE geo.csd_name IN ('Ottawa') AND cop.da_dguid = geo.da_dguid;\n", + "\n", + "\"\"\")\n", + "\n", + "con.execute(\"\"\"\n", + "COPY geo_data TO './da_2021_private_dwellings.parquet' (FORMAT PARQUET);\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 346, + "id": "e4794c4d-6013-40b5-8e59-046fc2495d34", + "metadata": {}, + "outputs": [], + "source": [ + "private_dwellings_per_square_km = con.execute(\"SELECT DISTINCT count_total_4_per_square_km FROM geo_data\").fetchall()\n", + "\n", + "values = np.array([v[0] for v in private_dwellings_per_square_km])\n", + "\n", + "# Compute Jenks breaks\n", + "num_classes = 5\n", + "breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)" + ] + }, + { + "cell_type": "code", + "execution_count": 347, + "id": "8672f3f8-82bf-439e-8558-cb3566f2062f", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a bin range mapping: (lower, upper) for each bin\n", + "bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n", + "\n", + "# Create a function to get the range string for a value\n", + "def jenks_range(value) -> str:\n", + " for i, (low, high) in enumerate(bin_ranges):\n", + " if low <= value <= high:\n", + " return f\"{int(low)}–{int(high)}\"\n", + " return \"unknown\"\n", + "\n", + "\n", + "dwellings_df = gpd.read_parquet('./da_2021_private_dwellings.parquet')\n", + "dwellings_df['category'] = dwellings_df[\"count_total_4_per_square_km\"].apply(lambda v: jenks_range(v))\n", + "dwellings_df['category'] = dwellings_df['category'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 353, + "id": "f265300a-9cf7-4ab7-8bdb-d66feae3a2f8", + "metadata": {}, + "outputs": [], + "source": [ + "# Categories to colors\n", + "cmap = {}\n", + "colors = [\n", + " [255, 255, 255],\n", + " [255, 191.25, 191.25],\n", + " [255, 127.50, 127.50],\n", + " [255, 63.75, 63.75],\n", + " [255, 0, 0]\n", + "]\n", + "for index, value in enumerate(dwellings_df['category'].unique()):\n", + " cmap[value] = colors[index]" + ] + }, + { + "cell_type": "code", + "execution_count": 355, + "id": "a6a2ae6c-61b7-4c0e-bbe7-a580a511ee5a", + "metadata": {}, + "outputs": [], + "source": [ + "# OpenStreetMap\n", + "\n", + "# We set `max_requests < 0` because `tile.openstreetmap.org` supports HTTP/2.\n", + "basemap = BitmapTileLayer(\n", + " data=\"https://tile.openstreetmap.org/{z}/{x}/{y}.png\",\n", + " tile_size=256,\n", + " max_requests=-1,\n", + " min_zoom=0,\n", + " max_zoom=19,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 356, + "id": "56e96627-0e82-436a-bd8e-e51546c7526b", + "metadata": {}, + "outputs": [], + "source": [ + "# Google Satellite\n", + "basemap = BitmapTileLayer(\n", + " data=\"http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={z}\",\n", + " tile_size=256,\n", + " max_requests=-1,\n", + " min_zoom=0,\n", + " max_zoom=19,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 357, + "id": "fef53303-147d-44af-b8f4-b824f64b486f", + "metadata": {}, + "outputs": [], + "source": [ + "get_color = apply_categorical_cmap(pa.array(dwellings_df['category']), cmap)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 358, + "id": "6935a061-41fc-4223-b155-4caf4c6df103", + "metadata": {}, + "outputs": [], + "source": [ + "cop_layer = PolygonLayer.from_geopandas(gdf=gpf,\n", + " stroked=True,\n", + " get_fill_color=get_color,\n", + " get_line_color=[255, 255, 255],\n", + " get_line_width=5,\n", + " line_width_units=\"meters\",\n", + " opacity=0.4,\n", + " auto_highlight = True,\n", + " highlight_color=[0,0,0,0]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 359, + "id": "1ab3dded-cff9-40cb-ac32-306581018083", + "metadata": {}, + "outputs": [], + "source": [ + "m = Map([basemap, cop_layer])" + ] + }, + { + "cell_type": "code", + "execution_count": 360, + "id": "3ca8da4b-d287-47f9-8488-cff7b02586b8", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fb9e669faec9417dbf4954620c6f9af6", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…" + ] + }, + "execution_count": 360, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}