Files
d4c-datapkg-statistical/experiments/presentation/cop_2021_structure_example.ipynb
T

517 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "90f9ff98-9caa-49b5-acb1-c42755c681b5",
"metadata": {},
"outputs": [],
"source": [
"import duckdb\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c3684ed1-7b99-4bdf-9bee-a3adea2d66a7",
"metadata": {},
"outputs": [],
"source": [
"con = duckdb.connect()\n",
"con.install_extension(\"spatial\")\n",
"con.load_extension(\"spatial\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ae35cdbf-ec32-40f7-a2e6-20407f04d4cb",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "caf012a82a014bbb8374d849ed9c2c9a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<duckdb.duckdb.DuckDBPyConnection at 0x7fd222a8fb70>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"con.execute(\"\"\"\n",
"DROP TABLE IF EXISTS geo_data;\n",
"CREATE TABLE geo_data AS\n",
"SELECT * FROM 'https://data-01.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet';\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "939f9204-ef9d-4eba-946c-a6b096d43fb5",
"metadata": {},
"outputs": [],
"source": [
"geo_data = con.sql(\"SELECT * FROM geo_data\").to_df()\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7467c036-a4c5-4c3a-9278-a64c384c9ab7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>da_dguid</th>\n",
" <th>count_total_1</th>\n",
" <th>count_total_4</th>\n",
" <th>count_total_5</th>\n",
" <th>count_total_6</th>\n",
" <th>count_total_7</th>\n",
" <th>count_total_8</th>\n",
" <th>count_total_9</th>\n",
" <th>count_total_10</th>\n",
" <th>count_total_11</th>\n",
" <th>...</th>\n",
" <th>count_women_2622</th>\n",
" <th>count_women_2623</th>\n",
" <th>count_women_2624</th>\n",
" <th>count_women_2625</th>\n",
" <th>count_women_2626</th>\n",
" <th>count_women_2627</th>\n",
" <th>count_women_2628</th>\n",
" <th>count_women_2629</th>\n",
" <th>count_women_2630</th>\n",
" <th>count_women_2631</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2021S051210010165</td>\n",
" <td>500</td>\n",
" <td>229</td>\n",
" <td>217</td>\n",
" <td>1309.9</td>\n",
" <td>0.38</td>\n",
" <td>500</td>\n",
" <td>45</td>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>25</td>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2021S051210010166</td>\n",
" <td>340</td>\n",
" <td>165</td>\n",
" <td>146</td>\n",
" <td>3563.9</td>\n",
" <td>0.10</td>\n",
" <td>340</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>25</td>\n",
" <td>...</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2021S051210010167</td>\n",
" <td>407</td>\n",
" <td>201</td>\n",
" <td>184</td>\n",
" <td>3217.4</td>\n",
" <td>0.13</td>\n",
" <td>405</td>\n",
" <td>45</td>\n",
" <td>15</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2021S051210010168</td>\n",
" <td>523</td>\n",
" <td>274</td>\n",
" <td>248</td>\n",
" <td>2736.8</td>\n",
" <td>0.19</td>\n",
" <td>525</td>\n",
" <td>55</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>15</td>\n",
" <td>30</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2021S051210010169</td>\n",
" <td>285</td>\n",
" <td>131</td>\n",
" <td>123</td>\n",
" <td>4305.1</td>\n",
" <td>0.07</td>\n",
" <td>285</td>\n",
" <td>40</td>\n",
" <td>10</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>20</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57931</th>\n",
" <td>2021S051262080024</td>\n",
" <td>423</td>\n",
" <td>166</td>\n",
" <td>140</td>\n",
" <td>1055.9</td>\n",
" <td>0.40</td>\n",
" <td>425</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>35</td>\n",
" <td>...</td>\n",
" <td>15</td>\n",
" <td>10</td>\n",
" <td>55</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>40</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57932</th>\n",
" <td>2021S051262080025</td>\n",
" <td>514</td>\n",
" <td>162</td>\n",
" <td>148</td>\n",
" <td>1.8</td>\n",
" <td>287.39</td>\n",
" <td>515</td>\n",
" <td>160</td>\n",
" <td>50</td>\n",
" <td>55</td>\n",
" <td>...</td>\n",
" <td>15</td>\n",
" <td>10</td>\n",
" <td>90</td>\n",
" <td>5</td>\n",
" <td>90</td>\n",
" <td>65</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57933</th>\n",
" <td>2021S051262080026</td>\n",
" <td>868</td>\n",
" <td>276</td>\n",
" <td>249</td>\n",
" <td>3.4</td>\n",
" <td>251.60</td>\n",
" <td>865</td>\n",
" <td>285</td>\n",
" <td>100</td>\n",
" <td>85</td>\n",
" <td>...</td>\n",
" <td>20</td>\n",
" <td>15</td>\n",
" <td>170</td>\n",
" <td>10</td>\n",
" <td>160</td>\n",
" <td>120</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57934</th>\n",
" <td>2021S051262080027</td>\n",
" <td>776</td>\n",
" <td>196</td>\n",
" <td>169</td>\n",
" <td>31.5</td>\n",
" <td>24.65</td>\n",
" <td>775</td>\n",
" <td>300</td>\n",
" <td>115</td>\n",
" <td>90</td>\n",
" <td>...</td>\n",
" <td>20</td>\n",
" <td>10</td>\n",
" <td>160</td>\n",
" <td>0</td>\n",
" <td>160</td>\n",
" <td>100</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57935</th>\n",
" <td>2021S051262080028</td>\n",
" <td>573</td>\n",
" <td>143</td>\n",
" <td>123</td>\n",
" <td>146.9</td>\n",
" <td>3.90</td>\n",
" <td>575</td>\n",
" <td>205</td>\n",
" <td>75</td>\n",
" <td>75</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>15</td>\n",
" <td>105</td>\n",
" <td>5</td>\n",
" <td>105</td>\n",
" <td>70</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>65</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>57936 rows × 7432 columns</p>\n",
"</div>"
],
"text/plain": [
" da_dguid count_total_1 count_total_4 count_total_5 \\\n",
"0 2021S051210010165 500 229 217 \n",
"1 2021S051210010166 340 165 146 \n",
"2 2021S051210010167 407 201 184 \n",
"3 2021S051210010168 523 274 248 \n",
"4 2021S051210010169 285 131 123 \n",
"... ... ... ... ... \n",
"57931 2021S051262080024 423 166 140 \n",
"57932 2021S051262080025 514 162 148 \n",
"57933 2021S051262080026 868 276 249 \n",
"57934 2021S051262080027 776 196 169 \n",
"57935 2021S051262080028 573 143 123 \n",
"\n",
" count_total_6 count_total_7 count_total_8 count_total_9 \\\n",
"0 1309.9 0.38 500 45 \n",
"1 3563.9 0.10 340 50 \n",
"2 3217.4 0.13 405 45 \n",
"3 2736.8 0.19 525 55 \n",
"4 4305.1 0.07 285 40 \n",
"... ... ... ... ... \n",
"57931 1055.9 0.40 425 90 \n",
"57932 1.8 287.39 515 160 \n",
"57933 3.4 251.60 865 285 \n",
"57934 31.5 24.65 775 300 \n",
"57935 146.9 3.90 575 205 \n",
"\n",
" count_total_10 count_total_11 ... count_women_2622 \\\n",
"0 10 20 ... 0 \n",
"1 15 25 ... 15 \n",
"2 15 10 ... 10 \n",
"3 15 15 ... 15 \n",
"4 10 15 ... 10 \n",
"... ... ... ... ... \n",
"57931 25 35 ... 15 \n",
"57932 50 55 ... 15 \n",
"57933 100 85 ... 20 \n",
"57934 115 90 ... 20 \n",
"57935 75 75 ... 10 \n",
"\n",
" count_women_2623 count_women_2624 count_women_2625 count_women_2626 \\\n",
"0 15 25 0 25 \n",
"1 0 30 0 30 \n",
"2 0 35 0 35 \n",
"3 30 35 0 35 \n",
"4 10 20 0 20 \n",
"... ... ... ... ... \n",
"57931 10 55 0 50 \n",
"57932 10 90 5 90 \n",
"57933 15 170 10 160 \n",
"57934 10 160 0 160 \n",
"57935 15 105 5 105 \n",
"\n",
" count_women_2627 count_women_2628 count_women_2629 count_women_2630 \\\n",
"0 20 0 0 0 \n",
"1 20 0 0 0 \n",
"2 25 0 0 0 \n",
"3 25 0 0 0 \n",
"4 15 0 0 0 \n",
"... ... ... ... ... \n",
"57931 40 0 0 0 \n",
"57932 65 0 0 0 \n",
"57933 120 5 5 0 \n",
"57934 100 0 0 0 \n",
"57935 70 0 0 0 \n",
"\n",
" count_women_2631 \n",
"0 20 \n",
"1 20 \n",
"2 25 \n",
"3 20 \n",
"4 15 \n",
"... ... \n",
"57931 40 \n",
"57932 60 \n",
"57933 110 \n",
"57934 100 \n",
"57935 65 \n",
"\n",
"[57936 rows x 7432 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4af7039d-2454-403a-b7c4-ba0dc4ed6b03",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}