mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
DuckDB issue with duplicate column names (ex. 'Value' and 'VALUE' are treated the same)
This commit is contained in:
@@ -0,0 +1,484 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a2239884-1380-45bb-8ad4-648ef2c5b46b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"DuckDB treats all column names in a case insensitive manner. So \"Value\" and \"VALUE\" are treated the same, thus \"VALUE\" becomes \"VALUE_1\"\n",
|
||||||
|
"\n",
|
||||||
|
"An example of this happening is productId 38100105"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 36,
|
||||||
|
"id": "5909b14d-1f07-46bc-84bf-09f269e15e41",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from collections import Counter\n",
|
||||||
|
"import glob\n",
|
||||||
|
"import pprint\n",
|
||||||
|
"\n",
|
||||||
|
"import duckdb\n",
|
||||||
|
"import pyarrow.parquet as pq"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 37,
|
||||||
|
"id": "04e61bd3-ab4c-46aa-9c0b-de949699ca0a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>REF_DATE</th>\n",
|
||||||
|
" <th>REF_START_DATE</th>\n",
|
||||||
|
" <th>REF_END_DATE</th>\n",
|
||||||
|
" <th>GEO</th>\n",
|
||||||
|
" <th>DGUID</th>\n",
|
||||||
|
" <th>Value</th>\n",
|
||||||
|
" <th>UOM</th>\n",
|
||||||
|
" <th>UOM_ID</th>\n",
|
||||||
|
" <th>SCALAR_FACTOR</th>\n",
|
||||||
|
" <th>SCALAR_ID</th>\n",
|
||||||
|
" <th>VECTOR</th>\n",
|
||||||
|
" <th>COORDINATE</th>\n",
|
||||||
|
" <th>VALUE_1</th>\n",
|
||||||
|
" <th>STATUS</th>\n",
|
||||||
|
" <th>SYMBOL</th>\n",
|
||||||
|
" <th>TERMINATED</th>\n",
|
||||||
|
" <th>DECIMALS</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>1961</td>\n",
|
||||||
|
" <td>1961-01-01</td>\n",
|
||||||
|
" <td>1961-12-31</td>\n",
|
||||||
|
" <td>Newfoundland and Labrador</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822242</td>\n",
|
||||||
|
" <td>2.1</td>\n",
|
||||||
|
" <td>470.0</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>1961</td>\n",
|
||||||
|
" <td>1961-01-01</td>\n",
|
||||||
|
" <td>1961-12-31</td>\n",
|
||||||
|
" <td>Newfoundland and Labrador</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822243</td>\n",
|
||||||
|
" <td>2.2</td>\n",
|
||||||
|
" <td>539.1</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>1961</td>\n",
|
||||||
|
" <td>1961-01-01</td>\n",
|
||||||
|
" <td>1961-12-31</td>\n",
|
||||||
|
" <td>Nova Scotia</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822244</td>\n",
|
||||||
|
" <td>3.1</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>1961</td>\n",
|
||||||
|
" <td>1961-01-01</td>\n",
|
||||||
|
" <td>1961-12-31</td>\n",
|
||||||
|
" <td>Nova Scotia</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822245</td>\n",
|
||||||
|
" <td>3.2</td>\n",
|
||||||
|
" <td>76.6</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>1961</td>\n",
|
||||||
|
" <td>1961-01-01</td>\n",
|
||||||
|
" <td>1961-12-31</td>\n",
|
||||||
|
" <td>New Brunswick</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822246</td>\n",
|
||||||
|
" <td>4.1</td>\n",
|
||||||
|
" <td>637.9</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>753</th>\n",
|
||||||
|
" <td>2010</td>\n",
|
||||||
|
" <td>2010-01-01</td>\n",
|
||||||
|
" <td>2010-12-31</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822241</td>\n",
|
||||||
|
" <td>1.2</td>\n",
|
||||||
|
" <td>124971.2</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>754</th>\n",
|
||||||
|
" <td>2011</td>\n",
|
||||||
|
" <td>2011-01-01</td>\n",
|
||||||
|
" <td>2011-12-31</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822240</td>\n",
|
||||||
|
" <td>1.1</td>\n",
|
||||||
|
" <td>120498.5</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>755</th>\n",
|
||||||
|
" <td>2011</td>\n",
|
||||||
|
" <td>2011-01-01</td>\n",
|
||||||
|
" <td>2011-12-31</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822241</td>\n",
|
||||||
|
" <td>1.2</td>\n",
|
||||||
|
" <td>120498.5</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>756</th>\n",
|
||||||
|
" <td>2012</td>\n",
|
||||||
|
" <td>2012-01-01</td>\n",
|
||||||
|
" <td>2012-12-31</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822240</td>\n",
|
||||||
|
" <td>1.1</td>\n",
|
||||||
|
" <td>113132.6</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>757</th>\n",
|
||||||
|
" <td>2012</td>\n",
|
||||||
|
" <td>2012-01-01</td>\n",
|
||||||
|
" <td>2012-12-31</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>Present value calculation, timber stocks, meth...</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>millions</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>v3822241</td>\n",
|
||||||
|
" <td>1.2</td>\n",
|
||||||
|
" <td>113132.6</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>758 rows × 17 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" REF_DATE REF_START_DATE REF_END_DATE GEO DGUID \\\n",
|
||||||
|
"0 1961 1961-01-01 1961-12-31 Newfoundland and Labrador None \n",
|
||||||
|
"1 1961 1961-01-01 1961-12-31 Newfoundland and Labrador None \n",
|
||||||
|
"2 1961 1961-01-01 1961-12-31 Nova Scotia None \n",
|
||||||
|
"3 1961 1961-01-01 1961-12-31 Nova Scotia None \n",
|
||||||
|
"4 1961 1961-01-01 1961-12-31 New Brunswick None \n",
|
||||||
|
".. ... ... ... ... ... \n",
|
||||||
|
"753 2010 2010-01-01 2010-12-31 Canada None \n",
|
||||||
|
"754 2011 2011-01-01 2011-12-31 Canada None \n",
|
||||||
|
"755 2011 2011-01-01 2011-12-31 Canada None \n",
|
||||||
|
"756 2012 2012-01-01 2012-12-31 Canada None \n",
|
||||||
|
"757 2012 2012-01-01 2012-12-31 Canada None \n",
|
||||||
|
"\n",
|
||||||
|
" Value UOM UOM_ID \\\n",
|
||||||
|
"0 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"1 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"2 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"3 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"4 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
".. ... ... ... \n",
|
||||||
|
"753 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"754 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"755 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"756 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"757 Present value calculation, timber stocks, meth... Dollars 81 \n",
|
||||||
|
"\n",
|
||||||
|
" SCALAR_FACTOR SCALAR_ID VECTOR COORDINATE VALUE_1 STATUS SYMBOL \\\n",
|
||||||
|
"0 millions 6 v3822242 2.1 470.0 None None \n",
|
||||||
|
"1 millions 6 v3822243 2.2 539.1 None None \n",
|
||||||
|
"2 millions 6 v3822244 3.1 0.0 None None \n",
|
||||||
|
"3 millions 6 v3822245 3.2 76.6 None None \n",
|
||||||
|
"4 millions 6 v3822246 4.1 637.9 None None \n",
|
||||||
|
".. ... ... ... ... ... ... ... \n",
|
||||||
|
"753 millions 6 v3822241 1.2 124971.2 None None \n",
|
||||||
|
"754 millions 6 v3822240 1.1 120498.5 None None \n",
|
||||||
|
"755 millions 6 v3822241 1.2 120498.5 None None \n",
|
||||||
|
"756 millions 6 v3822240 1.1 113132.6 None None \n",
|
||||||
|
"757 millions 6 v3822241 1.2 113132.6 None None \n",
|
||||||
|
"\n",
|
||||||
|
" TERMINATED DECIMALS \n",
|
||||||
|
"0 None 1 \n",
|
||||||
|
"1 None 1 \n",
|
||||||
|
"2 None 1 \n",
|
||||||
|
"3 None 1 \n",
|
||||||
|
"4 None 1 \n",
|
||||||
|
".. ... ... \n",
|
||||||
|
"753 None 1 \n",
|
||||||
|
"754 None 1 \n",
|
||||||
|
"755 None 1 \n",
|
||||||
|
"756 None 1 \n",
|
||||||
|
"757 None 1 \n",
|
||||||
|
"\n",
|
||||||
|
"[758 rows x 17 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 37,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"con = duckdb.connect()\n",
|
||||||
|
"\n",
|
||||||
|
"issue = con.execute(\"SELECT * FROM '/data/tables/output/en/june_20_2025/38100105.parquet'\").df()\n",
|
||||||
|
"\n",
|
||||||
|
"issue"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 30,
|
||||||
|
"id": "cbf8953d-8523-42e8-b28c-6b464869ce61",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"files = glob.glob(\"/data/tables/output/en/june_20_2025/*.parquet\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 31,
|
||||||
|
"id": "d52074f9-7746-4569-9aea-57c204eda2eb",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"{'10100164': ['VALUE'],\n",
|
||||||
|
" '13100902': ['STATUS'],\n",
|
||||||
|
" '13100904': ['STATUS'],\n",
|
||||||
|
" '23100049': ['VALUE'],\n",
|
||||||
|
" '23100050': ['VALUE'],\n",
|
||||||
|
" '23100268': ['STATUS'],\n",
|
||||||
|
" '36100374': ['VALUE'],\n",
|
||||||
|
" '36100396': ['VALUE'],\n",
|
||||||
|
" '36100397': ['VALUE'],\n",
|
||||||
|
" '36100658': ['VALUE'],\n",
|
||||||
|
" '38100104': ['VALUE'],\n",
|
||||||
|
" '38100105': ['VALUE']}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"duplicate_column_names = {}\n",
|
||||||
|
"for file in files:\n",
|
||||||
|
" # Open the Parquet file metadata\n",
|
||||||
|
" dataset = pq.ParquetFile(file)\n",
|
||||||
|
" # Get the column names\n",
|
||||||
|
" column_names = [x.upper() for x in dataset.schema.names]\n",
|
||||||
|
" count_of_column_names = [x for x in column_names if column_names.count(x) > 1]\n",
|
||||||
|
" if count_of_column_names:\n",
|
||||||
|
" product_id = file.split('/')[-1].split('.parquet')[0]\n",
|
||||||
|
" duplicate_column_names[product_id] = list(set(count_of_column_names))\n",
|
||||||
|
"\n",
|
||||||
|
"pprint.pprint(duplicate_column_names)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 35,
|
||||||
|
"id": "85dc8ce4-3ba5-4db8-bc2c-9ee101296e84",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/data/tables/output/en/june_20_2025/11100235.parquet\n",
|
||||||
|
"['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2012 Version 1.1', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
|
||||||
|
"/data/tables/output/en/june_20_2025/11100236.parquet\n",
|
||||||
|
"['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2012 Version 1.1', 'North American Industry Classification System (NAICS)', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
|
||||||
|
"/data/tables/output/en/june_20_2025/20100014.parquet\n",
|
||||||
|
"['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2012 Version 1.2', 'Financial variables', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
|
||||||
|
"/data/tables/output/en/june_20_2025/20100015.parquet\n",
|
||||||
|
"['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2022 Version 1.0', 'Financial variables', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
|
||||||
|
"/data/tables/output/en/june_20_2025/23100313.parquet\n",
|
||||||
|
"['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'Distance-capacity public transit service area', 'Location', 'Gender', 'Demographic and socio-economic', 'Sustainable Development Goals (SDGs) 11.2.1 indicator', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for file in files:\n",
|
||||||
|
" # Open the Parquet file metadata\n",
|
||||||
|
" dataset = pq.ParquetFile(file)\n",
|
||||||
|
" # Get the column names\n",
|
||||||
|
" column_names = dataset.schema.names\n",
|
||||||
|
" has_dot = ['.' in x for x in column_names if '.' in x]\n",
|
||||||
|
" if has_dot:\n",
|
||||||
|
" print(file)\n",
|
||||||
|
" print(column_names)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "ebc2fb59-105b-425a-9c92-ad04be934df6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user