diff --git a/experiments/statcan_products/check_duplicate_column_names.ipynb b/experiments/statcan_products/check_duplicate_column_names.ipynb
new file mode 100644
index 0000000..bb49ba9
--- /dev/null
+++ b/experiments/statcan_products/check_duplicate_column_names.ipynb
@@ -0,0 +1,484 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a2239884-1380-45bb-8ad4-648ef2c5b46b",
+ "metadata": {},
+ "source": [
+ "DuckDB treats all column names in a case insensitive manner. So \"Value\" and \"VALUE\" are treated the same, thus \"VALUE\" becomes \"VALUE_1\"\n",
+ "\n",
+ "An example of this happening is productId 38100105"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "5909b14d-1f07-46bc-84bf-09f269e15e41",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from collections import Counter\n",
+ "import glob\n",
+ "import pprint\n",
+ "\n",
+ "import duckdb\n",
+ "import pyarrow.parquet as pq"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "04e61bd3-ab4c-46aa-9c0b-de949699ca0a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " REF_DATE | \n",
+ " REF_START_DATE | \n",
+ " REF_END_DATE | \n",
+ " GEO | \n",
+ " DGUID | \n",
+ " Value | \n",
+ " UOM | \n",
+ " UOM_ID | \n",
+ " SCALAR_FACTOR | \n",
+ " SCALAR_ID | \n",
+ " VECTOR | \n",
+ " COORDINATE | \n",
+ " VALUE_1 | \n",
+ " STATUS | \n",
+ " SYMBOL | \n",
+ " TERMINATED | \n",
+ " DECIMALS | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1961 | \n",
+ " 1961-01-01 | \n",
+ " 1961-12-31 | \n",
+ " Newfoundland and Labrador | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822242 | \n",
+ " 2.1 | \n",
+ " 470.0 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1961 | \n",
+ " 1961-01-01 | \n",
+ " 1961-12-31 | \n",
+ " Newfoundland and Labrador | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822243 | \n",
+ " 2.2 | \n",
+ " 539.1 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1961 | \n",
+ " 1961-01-01 | \n",
+ " 1961-12-31 | \n",
+ " Nova Scotia | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822244 | \n",
+ " 3.1 | \n",
+ " 0.0 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1961 | \n",
+ " 1961-01-01 | \n",
+ " 1961-12-31 | \n",
+ " Nova Scotia | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822245 | \n",
+ " 3.2 | \n",
+ " 76.6 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1961 | \n",
+ " 1961-01-01 | \n",
+ " 1961-12-31 | \n",
+ " New Brunswick | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822246 | \n",
+ " 4.1 | \n",
+ " 637.9 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 753 | \n",
+ " 2010 | \n",
+ " 2010-01-01 | \n",
+ " 2010-12-31 | \n",
+ " Canada | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822241 | \n",
+ " 1.2 | \n",
+ " 124971.2 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 754 | \n",
+ " 2011 | \n",
+ " 2011-01-01 | \n",
+ " 2011-12-31 | \n",
+ " Canada | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822240 | \n",
+ " 1.1 | \n",
+ " 120498.5 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 755 | \n",
+ " 2011 | \n",
+ " 2011-01-01 | \n",
+ " 2011-12-31 | \n",
+ " Canada | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822241 | \n",
+ " 1.2 | \n",
+ " 120498.5 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 756 | \n",
+ " 2012 | \n",
+ " 2012-01-01 | \n",
+ " 2012-12-31 | \n",
+ " Canada | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822240 | \n",
+ " 1.1 | \n",
+ " 113132.6 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 757 | \n",
+ " 2012 | \n",
+ " 2012-01-01 | \n",
+ " 2012-12-31 | \n",
+ " Canada | \n",
+ " None | \n",
+ " Present value calculation, timber stocks, meth... | \n",
+ " Dollars | \n",
+ " 81 | \n",
+ " millions | \n",
+ " 6 | \n",
+ " v3822241 | \n",
+ " 1.2 | \n",
+ " 113132.6 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
758 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " REF_DATE REF_START_DATE REF_END_DATE GEO DGUID \\\n",
+ "0 1961 1961-01-01 1961-12-31 Newfoundland and Labrador None \n",
+ "1 1961 1961-01-01 1961-12-31 Newfoundland and Labrador None \n",
+ "2 1961 1961-01-01 1961-12-31 Nova Scotia None \n",
+ "3 1961 1961-01-01 1961-12-31 Nova Scotia None \n",
+ "4 1961 1961-01-01 1961-12-31 New Brunswick None \n",
+ ".. ... ... ... ... ... \n",
+ "753 2010 2010-01-01 2010-12-31 Canada None \n",
+ "754 2011 2011-01-01 2011-12-31 Canada None \n",
+ "755 2011 2011-01-01 2011-12-31 Canada None \n",
+ "756 2012 2012-01-01 2012-12-31 Canada None \n",
+ "757 2012 2012-01-01 2012-12-31 Canada None \n",
+ "\n",
+ " Value UOM UOM_ID \\\n",
+ "0 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "1 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "2 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "3 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "4 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ ".. ... ... ... \n",
+ "753 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "754 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "755 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "756 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "757 Present value calculation, timber stocks, meth... Dollars 81 \n",
+ "\n",
+ " SCALAR_FACTOR SCALAR_ID VECTOR COORDINATE VALUE_1 STATUS SYMBOL \\\n",
+ "0 millions 6 v3822242 2.1 470.0 None None \n",
+ "1 millions 6 v3822243 2.2 539.1 None None \n",
+ "2 millions 6 v3822244 3.1 0.0 None None \n",
+ "3 millions 6 v3822245 3.2 76.6 None None \n",
+ "4 millions 6 v3822246 4.1 637.9 None None \n",
+ ".. ... ... ... ... ... ... ... \n",
+ "753 millions 6 v3822241 1.2 124971.2 None None \n",
+ "754 millions 6 v3822240 1.1 120498.5 None None \n",
+ "755 millions 6 v3822241 1.2 120498.5 None None \n",
+ "756 millions 6 v3822240 1.1 113132.6 None None \n",
+ "757 millions 6 v3822241 1.2 113132.6 None None \n",
+ "\n",
+ " TERMINATED DECIMALS \n",
+ "0 None 1 \n",
+ "1 None 1 \n",
+ "2 None 1 \n",
+ "3 None 1 \n",
+ "4 None 1 \n",
+ ".. ... ... \n",
+ "753 None 1 \n",
+ "754 None 1 \n",
+ "755 None 1 \n",
+ "756 None 1 \n",
+ "757 None 1 \n",
+ "\n",
+ "[758 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "con = duckdb.connect()\n",
+ "\n",
+ "issue = con.execute(\"SELECT * FROM '/data/tables/output/en/june_20_2025/38100105.parquet'\").df()\n",
+ "\n",
+ "issue"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "cbf8953d-8523-42e8-b28c-6b464869ce61",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "files = glob.glob(\"/data/tables/output/en/june_20_2025/*.parquet\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "d52074f9-7746-4569-9aea-57c204eda2eb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'10100164': ['VALUE'],\n",
+ " '13100902': ['STATUS'],\n",
+ " '13100904': ['STATUS'],\n",
+ " '23100049': ['VALUE'],\n",
+ " '23100050': ['VALUE'],\n",
+ " '23100268': ['STATUS'],\n",
+ " '36100374': ['VALUE'],\n",
+ " '36100396': ['VALUE'],\n",
+ " '36100397': ['VALUE'],\n",
+ " '36100658': ['VALUE'],\n",
+ " '38100104': ['VALUE'],\n",
+ " '38100105': ['VALUE']}\n"
+ ]
+ }
+ ],
+ "source": [
+ "duplicate_column_names = {}\n",
+ "for file in files:\n",
+ " # Open the Parquet file metadata\n",
+ " dataset = pq.ParquetFile(file)\n",
+ " # Get the column names\n",
+ " column_names = [x.upper() for x in dataset.schema.names]\n",
+ " count_of_column_names = [x for x in column_names if column_names.count(x) > 1]\n",
+ " if count_of_column_names:\n",
+ " product_id = file.split('/')[-1].split('.parquet')[0]\n",
+ " duplicate_column_names[product_id] = list(set(count_of_column_names))\n",
+ "\n",
+ "pprint.pprint(duplicate_column_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "85dc8ce4-3ba5-4db8-bc2c-9ee101296e84",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/data/tables/output/en/june_20_2025/11100235.parquet\n",
+ "['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2012 Version 1.1', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
+ "/data/tables/output/en/june_20_2025/11100236.parquet\n",
+ "['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2012 Version 1.1', 'North American Industry Classification System (NAICS)', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
+ "/data/tables/output/en/june_20_2025/20100014.parquet\n",
+ "['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2012 Version 1.2', 'Financial variables', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
+ "/data/tables/output/en/june_20_2025/20100015.parquet\n",
+ "['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'North American Product Classification System (NAPCS) Canada 2022 Version 1.0', 'Financial variables', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n",
+ "/data/tables/output/en/june_20_2025/23100313.parquet\n",
+ "['REF_DATE', 'REF_START_DATE', 'REF_END_DATE', 'GEO', 'DGUID', 'Distance-capacity public transit service area', 'Location', 'Gender', 'Demographic and socio-economic', 'Sustainable Development Goals (SDGs) 11.2.1 indicator', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS']\n"
+ ]
+ }
+ ],
+ "source": [
+ "for file in files:\n",
+ " # Open the Parquet file metadata\n",
+ " dataset = pq.ParquetFile(file)\n",
+ " # Get the column names\n",
+ " column_names = dataset.schema.names\n",
+ " has_dot = ['.' in x for x in column_names if '.' in x]\n",
+ " if has_dot:\n",
+ " print(file)\n",
+ " print(column_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ebc2fb59-105b-425a-9c92-ad04be934df6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}