Update DuckDB lonboard example

This commit is contained in:
Diego Ripley
2025-05-30 22:30:35 +00:00
parent 34d2c50046
commit 4eae3622d2
@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 1,
"id": "56ac906e", "id": "56ac906e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -25,28 +25,32 @@
"id": "5d97e882", "id": "5d97e882",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# 1.0 Total private dwellings and private dwellings per square kilometer for Ottawa\n", "# 1. Total private dwellings and private dwellings per square kilometer at Dissemination Area geographic level\n",
"These values are from the 2021 Census of Population" "These values are from the 2021 Census of Population"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 2,
"id": "580c82ad-f64d-439f-9055-2307fdf7cccd", "id": "580c82ad-f64d-439f-9055-2307fdf7cccd",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x7fd635715cb0>" "<duckdb.duckdb.DuckDBPyConnection at 0x7f05770247b0>"
] ]
}, },
"execution_count": 26, "execution_count": 2,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"\"\"\"\n",
"Vancouver CMA is geo.cma_dguid = '2021S0503933'\n",
"\"\"\"\n",
"\n",
"con.execute(\"\"\"\n", "con.execute(\"\"\"\n",
"DROP TABLE IF EXISTS geo_data;\n", "DROP TABLE IF EXISTS geo_data;\n",
"CREATE TABLE geo_data AS\n", "CREATE TABLE geo_data AS\n",
@@ -56,45 +60,43 @@
" cop.count_total_4,\n", " cop.count_total_4,\n",
" cop.count_total_6,\n", " cop.count_total_6,\n",
" cop.count_total_7,\n", " cop.count_total_7,\n",
" ROUND(\n", " CASE\n",
" (cop.count_total_4 / cop.count_total_7), 2\n", " WHEN cop.count_total_7 = 0.0 THEN 0\n",
" ) AS count_total_4_per_square_km,\n", " WHEN cop.count_total_4 = 0 THEN 0\n",
" WHEN cop.count_total_4 IS NULL THEN 0\n",
" ELSE \n",
" ROUND(\n",
" (cop.count_total_4 / cop.count_total_7), 2\n",
" ) \n",
" END AS count_total_4_per_square_km,\n",
" geo.geom\n", " geo.geom\n",
"FROM\n", "FROM\n",
" 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n", " 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n",
" 'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n", " 'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n",
"WHERE geo.csd_name IN ('Vancouver') AND cop.da_dguid = geo.da_dguid;\n", "WHERE geo.csd_name = 'Vancouver' AND cop.da_dguid = geo.da_dguid;\n",
"\n", "\n",
"\"\"\")\n", "\"\"\")\n",
"\n", "\n",
"con.execute(\"\"\"\n", "con.execute(\"\"\"\n",
"COPY geo_data TO './da_2021_private_dwellings.parquet' (FORMAT PARQUET);\n", "COPY geo_data TO './da_2021_characteristic.parquet' (FORMAT PARQUET);\n",
"\"\"\")" "\"\"\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 3,
"id": "e4794c4d-6013-40b5-8e59-046fc2495d34", "id": "e4794c4d-6013-40b5-8e59-046fc2495d34",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"private_dwellings_per_square_km = con.execute(\"SELECT DISTINCT count_total_4_per_square_km FROM geo_data\").fetchall()\n", "characteristic_values = con.execute(\"SELECT DISTINCT count_total_4_per_square_km FROM geo_data\").fetchall()\n",
"\n", "\n",
"values = np.array([v[0] for v in private_dwellings_per_square_km])\n", "values = np.array([v[0] for v in characteristic_values])\n",
"\n", "\n",
"# Compute Jenks breaks\n", "# Compute Jenks breaks\n",
"num_classes = 5\n", "num_classes = 5\n",
"breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)" "breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)\n",
] "\n",
},
{
"cell_type": "code",
"execution_count": 31,
"id": "8672f3f8-82bf-439e-8558-cb3566f2062f",
"metadata": {},
"outputs": [],
"source": [
"# Create a bin range mapping: (lower, upper) for each bin\n", "# Create a bin range mapping: (lower, upper) for each bin\n",
"bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n", "bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n",
"\n", "\n",
@@ -102,22 +104,14 @@
"def jenks_range(value) -> str:\n", "def jenks_range(value) -> str:\n",
" for i, (low, high) in enumerate(bin_ranges):\n", " for i, (low, high) in enumerate(bin_ranges):\n",
" if low <= value <= high:\n", " if low <= value <= high:\n",
" return f\"{int(low)}{int(high)}\"\n", " return f\"{int(low)}-{int(high)}\"\n",
" return \"unknown\"\n", " return \"unknown\"\n",
"\n", "\n",
"\n", "\n",
"dwellings_df = gpd.read_parquet('./da_2021_private_dwellings.parquet')\n", "characteristic_df = gpd.read_parquet('./da_2021_characteristic.parquet')\n",
"dwellings_df['category'] = dwellings_df[\"count_total_4_per_square_km\"].apply(lambda v: jenks_range(v))\n", "characteristic_df['category'] = characteristic_df[\"count_total_4_per_square_km\"].apply(lambda v: jenks_range(v))\n",
"dwellings_df['category'] = dwellings_df['category'].astype('category')" "characteristic_df['category'] = characteristic_df['category'].astype('category')\n",
] "\n",
},
{
"cell_type": "code",
"execution_count": 32,
"id": "f265300a-9cf7-4ab7-8bdb-d66feae3a2f8",
"metadata": {},
"outputs": [],
"source": [
"# Categories to colors\n", "# Categories to colors\n",
"cmap = {}\n", "cmap = {}\n",
"colors = [\n", "colors = [\n",
@@ -127,13 +121,13 @@
" [255, 63.75, 63.75],\n", " [255, 63.75, 63.75],\n",
" [255, 0, 0]\n", " [255, 0, 0]\n",
"]\n", "]\n",
"for index, value in enumerate(dwellings_df['category'].unique()):\n", "for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):\n",
" cmap[value] = colors[index]" " cmap[value] = colors[index]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 17,
"id": "a6a2ae6c-61b7-4c0e-bbe7-a580a511ee5a", "id": "a6a2ae6c-61b7-4c0e-bbe7-a580a511ee5a",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -152,7 +146,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 22,
"id": "56e96627-0e82-436a-bd8e-e51546c7526b", "id": "56e96627-0e82-436a-bd8e-e51546c7526b",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -169,53 +163,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 6,
"id": "fef53303-147d-44af-b8f4-b824f64b486f",
"metadata": {},
"outputs": [],
"source": [
"get_color = apply_categorical_cmap(pa.array(dwellings_df['category']), cmap)\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "6935a061-41fc-4223-b155-4caf4c6df103", "id": "6935a061-41fc-4223-b155-4caf4c6df103",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [
"cop_layer = PolygonLayer.from_geopandas(gdf=dwellings_df,\n",
" stroked=True,\n",
" get_fill_color=get_color,\n",
" get_line_color=[255, 255, 255],\n",
" get_line_width=5,\n",
" line_width_units=\"meters\",\n",
" opacity=0.4,\n",
" auto_highlight = True,\n",
" highlight_color=[0,0,0,0]\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "1ab3dded-cff9-40cb-ac32-306581018083",
"metadata": {},
"outputs": [],
"source": [
"m = Map([basemap, cop_layer])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "afb530c1-f652-4aaa-a3fc-1214a71ffdce",
"metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "5d5e30381eff4f839a04ae81eff1b1e2", "model_id": "0bf8da87ca7045eb9394ed83a01c2857",
"version_major": 2, "version_major": 2,
"version_minor": 1 "version_minor": 1
}, },
@@ -223,14 +178,184 @@
"Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…" "Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…"
] ]
}, },
"execution_count": 38, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)\n",
"\n",
"cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,\n",
" stroked=True,\n",
" get_fill_color=get_color,\n",
" get_line_color=[255, 255, 255],\n",
" get_line_width=5,\n",
" line_width_min_pixels=0.2,\n",
" line_width_units=\"meters\",\n",
" opacity=0.4,\n",
" auto_highlight = True\n",
" )\n",
"\n",
"m = Map([basemap, cop_layer])\n",
"\n",
"m" "m"
] ]
},
{
"cell_type": "markdown",
"id": "2e87d9fa-50d0-4278-99b3-7399b88aa010",
"metadata": {},
"source": [
"# 2. Percentage of people with income $100,000 and over\n",
"These values are from the 2021 Census of Population"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "3fcf04bc-2c8b-4e76-9c6d-7d1eb3892dbc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x7f05770247b0>"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"con.execute(\"\"\"\n",
"DROP TABLE IF EXISTS geo_data_2;\n",
"CREATE TABLE geo_data_2 AS\n",
"SELECT\n",
" geo.da_dguid,\n",
" cop.count_total_1,\n",
" cop.count_total_155,\n",
" cop.count_total_168,\n",
" CASE\n",
" WHEN cop.count_total_168 = 0.0 THEN 0\n",
" WHEN cop.count_total_155 = 0 THEN 0\n",
" WHEN cop.count_total_168 IS NULL THEN 0\n",
" WHEN cop.count_total_155 IS NULL THEN 0\n",
" ELSE \n",
" ((cop.count_total_168/cop.count_total_155) * 100) \n",
" END AS percentage_over_100k,\n",
" geo.geom\n",
"FROM\n",
" 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n",
" 'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n",
"WHERE geo.cma_dguid = '2021S0503933' AND cop.da_dguid = geo.da_dguid;\n",
"\"\"\")\n",
"\n",
"con.execute(\"\"\"\n",
"COPY geo_data_2 TO './da_2021_characteristic_2.parquet' (FORMAT PARQUET);\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "b730c891-d3b6-4fb4-a9ea-dd898e9e8490",
"metadata": {},
"outputs": [],
"source": [
"characteristic_values = con.execute(\"SELECT DISTINCT percentage_over_100k FROM geo_data_2\").fetchall()\n",
"\n",
"values = np.array([v[0] for v in characteristic_values])\n",
"\n",
"# Compute Jenks breaks\n",
"num_classes = 5\n",
"breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)\n",
"\n",
"# Create a bin range mapping: (lower, upper) for each bin\n",
"bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n",
"\n",
"# Create a function to get the range string for a value\n",
"def jenks_range(value) -> str:\n",
" for i, (low, high) in enumerate(bin_ranges):\n",
" if low <= value <= high:\n",
" return f\"{int(low)}-{int(high)}\"\n",
" return \"unknown\"\n",
"\n",
"\n",
"characteristic_df = gpd.read_parquet('./da_2021_characteristic_2.parquet')\n",
"characteristic_df['category'] = characteristic_df[\"percentage_over_100k\"].apply(lambda v: jenks_range(v))\n",
"characteristic_df['category'] = characteristic_df['category'].astype('category')\n",
"\n",
"# Categories to colors\n",
"cmap = {}\n",
"colors = [\n",
" [255, 255, 255],\n",
" [255, 191.25, 191.25],\n",
" [255, 127.50, 127.50],\n",
" [255, 63.75, 63.75],\n",
" [255, 0, 0]\n",
"]\n",
"for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):\n",
" cmap[value] = colors[index]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "8f766c5a-5d6d-490e-b082-6b6efe399409",
"metadata": {},
"outputs": [],
"source": [
"get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)\n",
"\n",
"cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,\n",
" stroked=True,\n",
" get_fill_color=get_color,\n",
" get_line_color=[255, 255, 255],\n",
" get_line_width=5,\n",
" line_width_min_pixels=0.2,\n",
" line_width_units=\"meters\",\n",
" opacity=0.4,\n",
" auto_highlight = True\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "85c8c731-538e-440a-b784-125968222b7c",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "91c9a47b91814bd28c7b5c0a10557973",
"version_major": 2,
"version_minor": 1
},
"text/plain": [
"Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m = Map([basemap, cop_layer])\n",
"\n",
"m"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e09e4973-9018-4065-b9f9-d4259019bcf5",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {