diff --git a/experiments/presentation/cop_2021_structure_example.ipynb b/experiments/presentation/cop_2021_structure_example.ipynb new file mode 100644 index 0000000..d561ea7 --- /dev/null +++ b/experiments/presentation/cop_2021_structure_example.ipynb @@ -0,0 +1,516 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "90f9ff98-9caa-49b5-acb1-c42755c681b5", + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c3684ed1-7b99-4bdf-9bee-a3adea2d66a7", + "metadata": {}, + "outputs": [], + "source": [ + "con = duckdb.connect()\n", + "con.install_extension(\"spatial\")\n", + "con.load_extension(\"spatial\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ae35cdbf-ec32-40f7-a2e6-20407f04d4cb", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "caf012a82a014bbb8374d849ed9c2c9a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "con.execute(\"\"\"\n", + "DROP TABLE IF EXISTS geo_data;\n", + "CREATE TABLE geo_data AS\n", + "SELECT * FROM 'https://data-01.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet';\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "939f9204-ef9d-4eba-946c-a6b096d43fb5", + "metadata": {}, + "outputs": [], + "source": [ + "geo_data = con.sql(\"SELECT * FROM geo_data\").to_df()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7467c036-a4c5-4c3a-9278-a64c384c9ab7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
da_dguidcount_total_1count_total_4count_total_5count_total_6count_total_7count_total_8count_total_9count_total_10count_total_11...count_women_2622count_women_2623count_women_2624count_women_2625count_women_2626count_women_2627count_women_2628count_women_2629count_women_2630count_women_2631
02021S0512100101655002292171309.90.38500451020...015250252000020
12021S0512100101663401651463563.90.10340501525...150300302000020
22021S0512100101674072011843217.40.13405451510...100350352500025
32021S0512100101685232742482736.80.19525551515...1530350352500020
42021S0512100101692851311234305.10.07285401015...1010200201500015
..................................................................
579312021S0512620800244231661401055.90.40425902535...1510550504000040
579322021S0512620800255141621481.8287.395151605055...1510905906500060
579332021S0512620800268682762493.4251.6086528510085...201517010160120550110
579342021S05126208002777619616931.524.6577530011590...20101600160100000100
579352021S051262080028573143123146.93.905752057575...101510551057000065
\n", + "

57936 rows × 7432 columns

\n", + "
" + ], + "text/plain": [ + " da_dguid count_total_1 count_total_4 count_total_5 \\\n", + "0 2021S051210010165 500 229 217 \n", + "1 2021S051210010166 340 165 146 \n", + "2 2021S051210010167 407 201 184 \n", + "3 2021S051210010168 523 274 248 \n", + "4 2021S051210010169 285 131 123 \n", + "... ... ... ... ... \n", + "57931 2021S051262080024 423 166 140 \n", + "57932 2021S051262080025 514 162 148 \n", + "57933 2021S051262080026 868 276 249 \n", + "57934 2021S051262080027 776 196 169 \n", + "57935 2021S051262080028 573 143 123 \n", + "\n", + " count_total_6 count_total_7 count_total_8 count_total_9 \\\n", + "0 1309.9 0.38 500 45 \n", + "1 3563.9 0.10 340 50 \n", + "2 3217.4 0.13 405 45 \n", + "3 2736.8 0.19 525 55 \n", + "4 4305.1 0.07 285 40 \n", + "... ... ... ... ... \n", + "57931 1055.9 0.40 425 90 \n", + "57932 1.8 287.39 515 160 \n", + "57933 3.4 251.60 865 285 \n", + "57934 31.5 24.65 775 300 \n", + "57935 146.9 3.90 575 205 \n", + "\n", + " count_total_10 count_total_11 ... count_women_2622 \\\n", + "0 10 20 ... 0 \n", + "1 15 25 ... 15 \n", + "2 15 10 ... 10 \n", + "3 15 15 ... 15 \n", + "4 10 15 ... 10 \n", + "... ... ... ... ... \n", + "57931 25 35 ... 15 \n", + "57932 50 55 ... 15 \n", + "57933 100 85 ... 20 \n", + "57934 115 90 ... 20 \n", + "57935 75 75 ... 10 \n", + "\n", + " count_women_2623 count_women_2624 count_women_2625 count_women_2626 \\\n", + "0 15 25 0 25 \n", + "1 0 30 0 30 \n", + "2 0 35 0 35 \n", + "3 30 35 0 35 \n", + "4 10 20 0 20 \n", + "... ... ... ... ... \n", + "57931 10 55 0 50 \n", + "57932 10 90 5 90 \n", + "57933 15 170 10 160 \n", + "57934 10 160 0 160 \n", + "57935 15 105 5 105 \n", + "\n", + " count_women_2627 count_women_2628 count_women_2629 count_women_2630 \\\n", + "0 20 0 0 0 \n", + "1 20 0 0 0 \n", + "2 25 0 0 0 \n", + "3 25 0 0 0 \n", + "4 15 0 0 0 \n", + "... ... ... ... ... \n", + "57931 40 0 0 0 \n", + "57932 65 0 0 0 \n", + "57933 120 5 5 0 \n", + "57934 100 0 0 0 \n", + "57935 70 0 0 0 \n", + "\n", + " count_women_2631 \n", + "0 20 \n", + "1 20 \n", + "2 25 \n", + "3 20 \n", + "4 15 \n", + "... ... \n", + "57931 40 \n", + "57932 60 \n", + "57933 110 \n", + "57934 100 \n", + "57935 65 \n", + "\n", + "[57936 rows x 7432 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geo_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4af7039d-2454-403a-b7c4-ba0dc4ed6b03", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}