{ "cells": [ { "cell_type": "code", "execution_count": 7, "id": "3f027141", "metadata": {}, "outputs": [], "source": [ "import duckdb\n", "import geopandas as gpd\n", "import jenkspy\n", "from lonboard import BitmapTileLayer, Map, PolygonLayer\n", "from lonboard.colormap import apply_categorical_cmap\n", "import numpy as np\n", "import pyarrow as pa\n", "\n", "con = duckdb.connect()\n", "con.install_extension(\"spatial\")\n", "con.load_extension(\"spatial\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "0e528d32", "metadata": {}, "outputs": [], "source": [ "# OpenStreetMap\n", "\n", "# Google Satellite\n", "basemap = BitmapTileLayer(\n", " data=\"http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={z}\",\n", " tile_size=256,\n", " max_requests=-1,\n", " min_zoom=0,\n", " max_zoom=19,\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "a05a1a2c", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "16de2872de2b484d9e0b3c97803d98ba", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "con.execute(\"\"\"\n", "DROP TABLE IF EXISTS geo_data;\n", "CREATE TABLE geo_data AS\n", "SELECT\n", " geo.da_dguid,\n", " cop.count_total_1,\n", " cop.count_total_155,\n", " cop.count_total_168,\n", " CASE\n", " WHEN cop.count_total_168 = 0.0 THEN 0\n", " WHEN cop.count_total_155 = 0.0 THEN 0\n", " WHEN cop.count_total_168 IS NULL THEN 0\n", " WHEN cop.count_total_155 IS NULL THEN 0\n", " ELSE \n", " ((cop.count_total_168/cop.count_total_155) * 100) \n", " END AS percentage_over_100k,\n", " geo.geom\n", "FROM\n", " 'https://data-01.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n", " 'https://data-01.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n", "WHERE geo.csd_dguid in (\n", " '2021A00056001009', -- Whitehorse, YT\n", " '2021A00056106023', -- Yellowknife, NT\n", " '2021A00056204003', -- Iqaluit, NU\n", " '2021A00055915022', -- Vancouver, BC\n", " '2021A00054806016', -- Calgary, AB\n", " '2021A00054706027', -- Regina, SK\n", " '2021A00054611040', -- Winnipeg, MB\n", " '2021A00053506008', -- Ottawa, ON\n", " '2021A00052466023', -- Montréal, QC\n", " '2021A00051301006', -- Saint John, NB\n", " '2021A00051102075', -- Charlottetown, PE\n", " '2021A00051209034', -- Halifax, NS\n", " '2021A00051001519' -- St. John's, NL\n", " ) \n", "AND cop.da_dguid = geo.da_dguid;\n", "\"\"\")\n", "\n", "con.execute(\"\"\"\n", "COPY geo_data TO './da_2021_characteristic.parquet' (FORMAT PARQUET);\n", "\"\"\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "453d6956-9869-4a89-969f-b56a6e791c78", "metadata": {}, "outputs": [], "source": [ "characteristic_values = con.execute(\"SELECT DISTINCT percentage_over_100k FROM geo_data\").fetchall()\n", "\n", "values = np.array([v[0] for v in characteristic_values])\n", "\n", "# Compute Jenks breaks\n", "num_classes = 5\n", "breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)\n", "\n", "# Create a bin range mapping: (lower, upper) for each bin\n", "bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n", "\n", "# Create a function to get the range string for a value\n", "def jenks_range(value) -> str:\n", " for i, (low, high) in enumerate(bin_ranges):\n", " if low <= value <= high:\n", " return f\"{int(low)}-{int(high)}\"\n", " return \"unknown\"\n", "\n", "\n", "characteristic_df = gpd.read_parquet('./da_2021_characteristic.parquet')\n", "characteristic_df['category'] = characteristic_df[\"percentage_over_100k\"].apply(lambda v: jenks_range(v))\n", "characteristic_df['category'] = characteristic_df['category'].astype('category')\n", "\n", "# Categories to colors\n", "cmap = {}\n", "colors = [\n", " [255, 255, 255],\n", " [255, 191.25, 191.25],\n", " [255, 127.50, 127.50],\n", " [255, 63.75, 63.75],\n", " [255, 0, 0]\n", "]\n", "for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):\n", " cmap[value] = colors[index]" ] }, { "cell_type": "code", "execution_count": 11, "id": "68deef68-9ed5-4187-86f0-c23c6c164c8d", "metadata": {}, "outputs": [], "source": [ "get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)\n", "\n", "cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,\n", " stroked=True,\n", " get_fill_color=get_color,\n", " get_line_color=[255, 255, 255],\n", " get_line_width=5,\n", " line_width_min_pixels=0.2,\n", " line_width_units=\"meters\",\n", " opacity=0.4,\n", " auto_highlight = True\n", " )" ] }, { "cell_type": "code", "execution_count": 12, "id": "f5dd0922-4c72-4911-8e06-da3f8bdb09bd", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2ce00adb4a304296996a446110f56863", "version_major": 2, "version_minor": 1 }, "text/plain": [ "Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m = Map([basemap, cop_layer])\n", "\n", "m" ] }, { "cell_type": "code", "execution_count": null, "id": "d359f0cf-4c05-4fca-9176-709cd5ff977e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }