mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
Update DuckDB lonboard example
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 1,
|
||||||
"id": "56ac906e",
|
"id": "56ac906e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -25,28 +25,32 @@
|
|||||||
"id": "5d97e882",
|
"id": "5d97e882",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 1.0 Total private dwellings and private dwellings per square kilometer for Ottawa\n",
|
"# 1. Total private dwellings and private dwellings per square kilometer at Dissemination Area geographic level\n",
|
||||||
"These values are from the 2021 Census of Population"
|
"These values are from the 2021 Census of Population"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 2,
|
||||||
"id": "580c82ad-f64d-439f-9055-2307fdf7cccd",
|
"id": "580c82ad-f64d-439f-9055-2307fdf7cccd",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<duckdb.duckdb.DuckDBPyConnection at 0x7fd635715cb0>"
|
"<duckdb.duckdb.DuckDBPyConnection at 0x7f05770247b0>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 26,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"\"\"\"\n",
|
||||||
|
"Vancouver CMA is geo.cma_dguid = '2021S0503933'\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
"con.execute(\"\"\"\n",
|
"con.execute(\"\"\"\n",
|
||||||
"DROP TABLE IF EXISTS geo_data;\n",
|
"DROP TABLE IF EXISTS geo_data;\n",
|
||||||
"CREATE TABLE geo_data AS\n",
|
"CREATE TABLE geo_data AS\n",
|
||||||
@@ -56,45 +60,43 @@
|
|||||||
" cop.count_total_4,\n",
|
" cop.count_total_4,\n",
|
||||||
" cop.count_total_6,\n",
|
" cop.count_total_6,\n",
|
||||||
" cop.count_total_7,\n",
|
" cop.count_total_7,\n",
|
||||||
|
" CASE\n",
|
||||||
|
" WHEN cop.count_total_7 = 0.0 THEN 0\n",
|
||||||
|
" WHEN cop.count_total_4 = 0 THEN 0\n",
|
||||||
|
" WHEN cop.count_total_4 IS NULL THEN 0\n",
|
||||||
|
" ELSE \n",
|
||||||
" ROUND(\n",
|
" ROUND(\n",
|
||||||
" (cop.count_total_4 / cop.count_total_7), 2\n",
|
" (cop.count_total_4 / cop.count_total_7), 2\n",
|
||||||
" ) AS count_total_4_per_square_km,\n",
|
" ) \n",
|
||||||
|
" END AS count_total_4_per_square_km,\n",
|
||||||
" geo.geom\n",
|
" geo.geom\n",
|
||||||
"FROM\n",
|
"FROM\n",
|
||||||
" 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n",
|
" 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n",
|
||||||
" 'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n",
|
" 'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n",
|
||||||
"WHERE geo.csd_name IN ('Vancouver') AND cop.da_dguid = geo.da_dguid;\n",
|
"WHERE geo.csd_name = 'Vancouver' AND cop.da_dguid = geo.da_dguid;\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\"\"\")\n",
|
"\"\"\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"con.execute(\"\"\"\n",
|
"con.execute(\"\"\"\n",
|
||||||
"COPY geo_data TO './da_2021_private_dwellings.parquet' (FORMAT PARQUET);\n",
|
"COPY geo_data TO './da_2021_characteristic.parquet' (FORMAT PARQUET);\n",
|
||||||
"\"\"\")"
|
"\"\"\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 3,
|
||||||
"id": "e4794c4d-6013-40b5-8e59-046fc2495d34",
|
"id": "e4794c4d-6013-40b5-8e59-046fc2495d34",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"private_dwellings_per_square_km = con.execute(\"SELECT DISTINCT count_total_4_per_square_km FROM geo_data\").fetchall()\n",
|
"characteristic_values = con.execute(\"SELECT DISTINCT count_total_4_per_square_km FROM geo_data\").fetchall()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"values = np.array([v[0] for v in private_dwellings_per_square_km])\n",
|
"values = np.array([v[0] for v in characteristic_values])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Compute Jenks breaks\n",
|
"# Compute Jenks breaks\n",
|
||||||
"num_classes = 5\n",
|
"num_classes = 5\n",
|
||||||
"breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)"
|
"breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)\n",
|
||||||
]
|
"\n",
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 31,
|
|
||||||
"id": "8672f3f8-82bf-439e-8558-cb3566f2062f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Create a bin range mapping: (lower, upper) for each bin\n",
|
"# Create a bin range mapping: (lower, upper) for each bin\n",
|
||||||
"bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n",
|
"bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -102,22 +104,14 @@
|
|||||||
"def jenks_range(value) -> str:\n",
|
"def jenks_range(value) -> str:\n",
|
||||||
" for i, (low, high) in enumerate(bin_ranges):\n",
|
" for i, (low, high) in enumerate(bin_ranges):\n",
|
||||||
" if low <= value <= high:\n",
|
" if low <= value <= high:\n",
|
||||||
" return f\"{int(low)}–{int(high)}\"\n",
|
" return f\"{int(low)}-{int(high)}\"\n",
|
||||||
" return \"unknown\"\n",
|
" return \"unknown\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dwellings_df = gpd.read_parquet('./da_2021_private_dwellings.parquet')\n",
|
"characteristic_df = gpd.read_parquet('./da_2021_characteristic.parquet')\n",
|
||||||
"dwellings_df['category'] = dwellings_df[\"count_total_4_per_square_km\"].apply(lambda v: jenks_range(v))\n",
|
"characteristic_df['category'] = characteristic_df[\"count_total_4_per_square_km\"].apply(lambda v: jenks_range(v))\n",
|
||||||
"dwellings_df['category'] = dwellings_df['category'].astype('category')"
|
"characteristic_df['category'] = characteristic_df['category'].astype('category')\n",
|
||||||
]
|
"\n",
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 32,
|
|
||||||
"id": "f265300a-9cf7-4ab7-8bdb-d66feae3a2f8",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Categories to colors\n",
|
"# Categories to colors\n",
|
||||||
"cmap = {}\n",
|
"cmap = {}\n",
|
||||||
"colors = [\n",
|
"colors = [\n",
|
||||||
@@ -127,13 +121,13 @@
|
|||||||
" [255, 63.75, 63.75],\n",
|
" [255, 63.75, 63.75],\n",
|
||||||
" [255, 0, 0]\n",
|
" [255, 0, 0]\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"for index, value in enumerate(dwellings_df['category'].unique()):\n",
|
"for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):\n",
|
||||||
" cmap[value] = colors[index]"
|
" cmap[value] = colors[index]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 17,
|
||||||
"id": "a6a2ae6c-61b7-4c0e-bbe7-a580a511ee5a",
|
"id": "a6a2ae6c-61b7-4c0e-bbe7-a580a511ee5a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -152,7 +146,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 34,
|
"execution_count": 22,
|
||||||
"id": "56e96627-0e82-436a-bd8e-e51546c7526b",
|
"id": "56e96627-0e82-436a-bd8e-e51546c7526b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -169,53 +163,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 35,
|
"execution_count": 6,
|
||||||
"id": "fef53303-147d-44af-b8f4-b824f64b486f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"get_color = apply_categorical_cmap(pa.array(dwellings_df['category']), cmap)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 36,
|
|
||||||
"id": "6935a061-41fc-4223-b155-4caf4c6df103",
|
"id": "6935a061-41fc-4223-b155-4caf4c6df103",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"cop_layer = PolygonLayer.from_geopandas(gdf=dwellings_df,\n",
|
|
||||||
" stroked=True,\n",
|
|
||||||
" get_fill_color=get_color,\n",
|
|
||||||
" get_line_color=[255, 255, 255],\n",
|
|
||||||
" get_line_width=5,\n",
|
|
||||||
" line_width_units=\"meters\",\n",
|
|
||||||
" opacity=0.4,\n",
|
|
||||||
" auto_highlight = True,\n",
|
|
||||||
" highlight_color=[0,0,0,0]\n",
|
|
||||||
" )"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 37,
|
|
||||||
"id": "1ab3dded-cff9-40cb-ac32-306581018083",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"m = Map([basemap, cop_layer])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 38,
|
|
||||||
"id": "afb530c1-f652-4aaa-a3fc-1214a71ffdce",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "5d5e30381eff4f839a04ae81eff1b1e2",
|
"model_id": "0bf8da87ca7045eb9394ed83a01c2857",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 1
|
"version_minor": 1
|
||||||
},
|
},
|
||||||
@@ -223,14 +178,184 @@
|
|||||||
"Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…"
|
"Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 38,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)\n",
|
||||||
|
"\n",
|
||||||
|
"cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,\n",
|
||||||
|
" stroked=True,\n",
|
||||||
|
" get_fill_color=get_color,\n",
|
||||||
|
" get_line_color=[255, 255, 255],\n",
|
||||||
|
" get_line_width=5,\n",
|
||||||
|
" line_width_min_pixels=0.2,\n",
|
||||||
|
" line_width_units=\"meters\",\n",
|
||||||
|
" opacity=0.4,\n",
|
||||||
|
" auto_highlight = True\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"m = Map([basemap, cop_layer])\n",
|
||||||
|
"\n",
|
||||||
"m"
|
"m"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2e87d9fa-50d0-4278-99b3-7399b88aa010",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 2. Percentage of people with income $100,000 and over\n",
|
||||||
|
"These values are from the 2021 Census of Population"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"id": "3fcf04bc-2c8b-4e76-9c6d-7d1eb3892dbc",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<duckdb.duckdb.DuckDBPyConnection at 0x7f05770247b0>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 24,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"con.execute(\"\"\"\n",
|
||||||
|
"DROP TABLE IF EXISTS geo_data_2;\n",
|
||||||
|
"CREATE TABLE geo_data_2 AS\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" geo.da_dguid,\n",
|
||||||
|
" cop.count_total_1,\n",
|
||||||
|
" cop.count_total_155,\n",
|
||||||
|
" cop.count_total_168,\n",
|
||||||
|
" CASE\n",
|
||||||
|
" WHEN cop.count_total_168 = 0.0 THEN 0\n",
|
||||||
|
" WHEN cop.count_total_155 = 0 THEN 0\n",
|
||||||
|
" WHEN cop.count_total_168 IS NULL THEN 0\n",
|
||||||
|
" WHEN cop.count_total_155 IS NULL THEN 0\n",
|
||||||
|
" ELSE \n",
|
||||||
|
" ((cop.count_total_168/cop.count_total_155) * 100) \n",
|
||||||
|
" END AS percentage_over_100k,\n",
|
||||||
|
" geo.geom\n",
|
||||||
|
"FROM\n",
|
||||||
|
" 'https://data.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n",
|
||||||
|
" 'https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n",
|
||||||
|
"WHERE geo.cma_dguid = '2021S0503933' AND cop.da_dguid = geo.da_dguid;\n",
|
||||||
|
"\"\"\")\n",
|
||||||
|
"\n",
|
||||||
|
"con.execute(\"\"\"\n",
|
||||||
|
"COPY geo_data_2 TO './da_2021_characteristic_2.parquet' (FORMAT PARQUET);\n",
|
||||||
|
"\"\"\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 25,
|
||||||
|
"id": "b730c891-d3b6-4fb4-a9ea-dd898e9e8490",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"characteristic_values = con.execute(\"SELECT DISTINCT percentage_over_100k FROM geo_data_2\").fetchall()\n",
|
||||||
|
"\n",
|
||||||
|
"values = np.array([v[0] for v in characteristic_values])\n",
|
||||||
|
"\n",
|
||||||
|
"# Compute Jenks breaks\n",
|
||||||
|
"num_classes = 5\n",
|
||||||
|
"breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)\n",
|
||||||
|
"\n",
|
||||||
|
"# Create a bin range mapping: (lower, upper) for each bin\n",
|
||||||
|
"bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n",
|
||||||
|
"\n",
|
||||||
|
"# Create a function to get the range string for a value\n",
|
||||||
|
"def jenks_range(value) -> str:\n",
|
||||||
|
" for i, (low, high) in enumerate(bin_ranges):\n",
|
||||||
|
" if low <= value <= high:\n",
|
||||||
|
" return f\"{int(low)}-{int(high)}\"\n",
|
||||||
|
" return \"unknown\"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"characteristic_df = gpd.read_parquet('./da_2021_characteristic_2.parquet')\n",
|
||||||
|
"characteristic_df['category'] = characteristic_df[\"percentage_over_100k\"].apply(lambda v: jenks_range(v))\n",
|
||||||
|
"characteristic_df['category'] = characteristic_df['category'].astype('category')\n",
|
||||||
|
"\n",
|
||||||
|
"# Categories to colors\n",
|
||||||
|
"cmap = {}\n",
|
||||||
|
"colors = [\n",
|
||||||
|
" [255, 255, 255],\n",
|
||||||
|
" [255, 191.25, 191.25],\n",
|
||||||
|
" [255, 127.50, 127.50],\n",
|
||||||
|
" [255, 63.75, 63.75],\n",
|
||||||
|
" [255, 0, 0]\n",
|
||||||
|
"]\n",
|
||||||
|
"for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):\n",
|
||||||
|
" cmap[value] = colors[index]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"id": "8f766c5a-5d6d-490e-b082-6b6efe399409",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)\n",
|
||||||
|
"\n",
|
||||||
|
"cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,\n",
|
||||||
|
" stroked=True,\n",
|
||||||
|
" get_fill_color=get_color,\n",
|
||||||
|
" get_line_color=[255, 255, 255],\n",
|
||||||
|
" get_line_width=5,\n",
|
||||||
|
" line_width_min_pixels=0.2,\n",
|
||||||
|
" line_width_units=\"meters\",\n",
|
||||||
|
" opacity=0.4,\n",
|
||||||
|
" auto_highlight = True\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"id": "85c8c731-538e-440a-b784-125968222b7c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
|
"model_id": "91c9a47b91814bd28c7b5c0a10557973",
|
||||||
|
"version_major": 2,
|
||||||
|
"version_minor": 1
|
||||||
|
},
|
||||||
|
"text/plain": [
|
||||||
|
"Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"m = Map([basemap, cop_layer])\n",
|
||||||
|
"\n",
|
||||||
|
"m"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "e09e4973-9018-4065-b9f9-d4259019bcf5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
Reference in New Issue
Block a user