From 6b42a80529162d35b973bc7690f4950bd2f897ef Mon Sep 17 00:00:00 2001 From: Diego Ripley Date: Thu, 10 Jul 2025 16:11:11 +0000 Subject: [PATCH] Expand example to country --- .../presentation/over_100k_cop_2021.ipynb | 120 +++++++++++++++++- 1 file changed, 116 insertions(+), 4 deletions(-) diff --git a/experiments/presentation/over_100k_cop_2021.ipynb b/experiments/presentation/over_100k_cop_2021.ipynb index 954a09a..23c94d6 100644 --- a/experiments/presentation/over_100k_cop_2021.ipynb +++ b/experiments/presentation/over_100k_cop_2021.ipynb @@ -39,6 +39,14 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "8020a327-59cf-49ef-a6e3-122ea74f4eec", + "metadata": {}, + "source": [ + "# % of People Making Over $100,000 For Select Cities" + ] + }, { "cell_type": "code", "execution_count": 9, @@ -180,14 +188,118 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "f5dd0922-4c72-4911-8e06-da3f8bdb09bd", "metadata": {}, + "outputs": [], + "source": [ + "m = Map([basemap, cop_layer])\n", + "\n", + "m" + ] + }, + { + "cell_type": "markdown", + "id": "186b159b-044a-4841-92e1-6a310f91c756", + "metadata": {}, + "source": [ + "# % of People Making Over $100,000 For The Whole Country" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d359f0cf-4c05-4fca-9176-709cd5ff977e", + "metadata": {}, + "outputs": [], + "source": [ + "con.execute(\"\"\"\n", + "DROP TABLE IF EXISTS geo_data;\n", + "CREATE TABLE geo_data AS\n", + "SELECT\n", + " geo.da_dguid,\n", + " cop.count_total_1,\n", + " cop.count_total_155,\n", + " cop.count_total_168,\n", + " CASE\n", + " WHEN cop.count_total_168 = 0.0 THEN 0\n", + " WHEN cop.count_total_155 = 0.0 THEN 0\n", + " WHEN cop.count_total_168 IS NULL THEN 0\n", + " WHEN cop.count_total_155 IS NULL THEN 0\n", + " ELSE \n", + " ((cop.count_total_168/cop.count_total_155) * 100) \n", + " END AS percentage_over_100k,\n", + " geo.geom\n", + "FROM\n", + " 'https://data-01.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet' AS cop,\n", + " 'https://data-01.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/da_2021.parquet' AS geo\n", + "WHERE cop.da_dguid = geo.da_dguid;\n", + "\"\"\")\n", + "\n", + "con.execute(\"\"\"\n", + "COPY geo_data TO './da_2021_characteristic.parquet' (FORMAT PARQUET);\n", + "\"\"\")\n", + "\n", + "characteristic_values = con.execute(\"SELECT DISTINCT percentage_over_100k FROM geo_data\").fetchall()\n", + "\n", + "values = np.array([v[0] for v in characteristic_values])\n", + "\n", + "# Compute Jenks breaks\n", + "num_classes = 5\n", + "breaks = jenkspy.jenks_breaks(values, n_classes=num_classes)\n", + "\n", + "# Create a bin range mapping: (lower, upper) for each bin\n", + "bin_ranges = [(breaks[i], breaks[i+1]) for i in range(len(breaks)-1)]\n", + "\n", + "# Create a function to get the range string for a value\n", + "def jenks_range(value) -> str:\n", + " for i, (low, high) in enumerate(bin_ranges):\n", + " if low <= value <= high:\n", + " return f\"{int(low)}-{int(high)}\"\n", + " return \"unknown\"\n", + "\n", + "\n", + "characteristic_df = gpd.read_parquet('./da_2021_characteristic.parquet')\n", + "characteristic_df['category'] = characteristic_df[\"percentage_over_100k\"].apply(lambda v: jenks_range(v))\n", + "characteristic_df['category'] = characteristic_df['category'].astype('category')\n", + "\n", + "# Categories to colors\n", + "cmap = {}\n", + "colors = [\n", + " [255, 255, 255],\n", + " [255, 191.25, 191.25],\n", + " [255, 127.50, 127.50],\n", + " [255, 63.75, 63.75],\n", + " [255, 0, 0]\n", + "]\n", + "for index, value in enumerate(sorted(characteristic_df['category'].unique(), key=lambda x: int(x.split('-')[0]))):\n", + " cmap[value] = colors[index]\n", + "\n", + "\n", + "get_color = apply_categorical_cmap(pa.array(characteristic_df['category']), cmap)\n", + "\n", + "cop_layer = PolygonLayer.from_geopandas(gdf=characteristic_df,\n", + " stroked=True,\n", + " get_fill_color=get_color,\n", + " get_line_color=[255, 255, 255],\n", + " get_line_width=5,\n", + " line_width_min_pixels=0.2,\n", + " line_width_units=\"meters\",\n", + " opacity=0.4,\n", + " auto_highlight = True\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f4e168c8-ec3b-43db-a599-e4d0b01231a4", + "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2ce00adb4a304296996a446110f56863", + "model_id": "d556516d41394e13a62696c52bd6b32d", "version_major": 2, "version_minor": 1 }, @@ -195,7 +307,7 @@ "Map(custom_attribution='', layers=(BitmapTileLayer(data='http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={…" ] }, - "execution_count": 12, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -209,7 +321,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d359f0cf-4c05-4fca-9176-709cd5ff977e", + "id": "6ca88dbe-44de-408f-a791-4acb039758b0", "metadata": {}, "outputs": [], "source": []