From e929850d4a45592fe460fa1c8459244e8c0a944b Mon Sep 17 00:00:00 2001 From: Diego Ripley Date: Sat, 21 Jun 2025 18:03:29 +0000 Subject: [PATCH] Finish comment on issue with Value and VALUE columns being treated the same by DuckDB --- experiments/statcan_products/process_files_multiprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/statcan_products/process_files_multiprocessing.py b/experiments/statcan_products/process_files_multiprocessing.py index 1581125..b118e3d 100644 --- a/experiments/statcan_products/process_files_multiprocessing.py +++ b/experiments/statcan_products/process_files_multiprocessing.py @@ -230,7 +230,7 @@ def process_cube(product_id, language="en"): - productId 17100009 has DECIMAL = 0 (int64) - productId 35100076 has multiple DECIMAL precisions [0, 1, 2] (int64, float64, float64) - productId 10100164 has two columns named the same "Value" and "VALUE". It is processed fine with the read_csv, and when it is exported as parquet. - DuckDB has an issue with it, but Pandas and Polars are able to handle "Value" and "VALUE" + DuckDB has an issue with it (as it is case insensitive), but Pandas and Polars are able to handle "Value" and "VALUE" """ cur.execute("SELECT product_id FROM downloaded WHERE product_id = ?", (product_id,)) result = cur.fetchone()