diff --git a/data/ca-ab_edmonton-2023A00054811061_orthoimagery_2023_075mm.pmtiles.torrent b/data/ca-ab_edmonton-2023A00054811061_orthoimagery_2023_075mm.pmtiles.torrent deleted file mode 100644 index a02eca2..0000000 Binary files a/data/ca-ab_edmonton-2023A00054811061_orthoimagery_2023_075mm.pmtiles.torrent and /dev/null differ diff --git a/data/ca-ab_red_deer-2024A00054808011_orthoimagery_2024_075mm.pmtiles.torrent b/data/ca-ab_red_deer-2024A00054808011_orthoimagery_2024_075mm.pmtiles.torrent deleted file mode 100644 index b848f41..0000000 Binary files a/data/ca-ab_red_deer-2024A00054808011_orthoimagery_2024_075mm.pmtiles.torrent and /dev/null differ diff --git a/data/ca-ab_red_deer-2025A00054808011_orthoimagery_2025_075mm.pmtiles.torrent b/data/ca-ab_red_deer-2025A00054808011_orthoimagery_2025_075mm.pmtiles.torrent deleted file mode 100644 index 9883447..0000000 Binary files a/data/ca-ab_red_deer-2025A00054808011_orthoimagery_2025_075mm.pmtiles.torrent and /dev/null differ diff --git a/data/ca-bc_burnaby-2020A00055915025_orthoimagery_2020_075mm.pmtiles.torrent b/data/ca-bc_burnaby-2020A00055915025_orthoimagery_2020_075mm.pmtiles.torrent deleted file mode 100644 index 89a0e4f..0000000 Binary files a/data/ca-bc_burnaby-2020A00055915025_orthoimagery_2020_075mm.pmtiles.torrent and /dev/null differ diff --git a/data/ca-bc_vancouver-2022A00055915022_orthoimagery_2022_075mm.pmtiles.torrent b/data/ca-bc_vancouver-2022A00055915022_orthoimagery_2022_075mm.pmtiles.torrent deleted file mode 100644 index 499499b..0000000 Binary files a/data/ca-bc_vancouver-2022A00055915022_orthoimagery_2022_075mm.pmtiles.torrent and /dev/null differ diff --git a/data/ca-mb_winnipeg-2024A00054611040_orthoimagery_2024_075mm.pmtiles.torrent b/data/ca-mb_winnipeg-2024A00054611040_orthoimagery_2024_075mm.pmtiles.torrent deleted file mode 100644 index 75b989e..0000000 Binary files a/data/ca-mb_winnipeg-2024A00054611040_orthoimagery_2024_075mm.pmtiles.torrent and /dev/null differ diff --git a/data/ca-yt_whitehorse-2019A000556001009_orthoimagery_2019_200mm.pmtiles.torrent b/data/ca-yt_whitehorse-2019A000556001009_orthoimagery_2019_200mm.pmtiles.torrent deleted file mode 100644 index 8b1aa52..0000000 Binary files a/data/ca-yt_whitehorse-2019A000556001009_orthoimagery_2019_200mm.pmtiles.torrent and /dev/null differ diff --git a/data/ca_nrcan_land_cover_2020_30m.pmtiles.torrent b/data/ca_nrcan_land_cover_2020_30m.pmtiles.torrent deleted file mode 100644 index 0083374..0000000 Binary files a/data/ca_nrcan_land_cover_2020_30m.pmtiles.torrent and /dev/null differ diff --git a/data/ca_statcan_2021A000011124_d4c-datapkg-statistical_dissemination_areas_cartographic_2021_v0.1.0-beta.parquet.torrent b/data/ca_statcan_2021A000011124_d4c-datapkg-statistical_dissemination_areas_cartographic_2021_v0.1.0-beta.parquet.torrent deleted file mode 100644 index 9d7c9f9..0000000 Binary files a/data/ca_statcan_2021A000011124_d4c-datapkg-statistical_dissemination_areas_cartographic_2021_v0.1.0-beta.parquet.torrent and /dev/null differ diff --git a/data/ca_versatiles_satellite_2025-08-10.pmtiles.torrent b/data/ca_versatiles_satellite_2025-08-10.pmtiles.torrent deleted file mode 100644 index a05f310..0000000 Binary files a/data/ca_versatiles_satellite_2025-08-10.pmtiles.torrent and /dev/null differ diff --git a/scripts/02_libtorrent_experiments/03_generate_torrent_large_number_files.py b/scripts/02_libtorrent_experiments/03_generate_torrent_large_number_files.py new file mode 100644 index 0000000..9591c32 --- /dev/null +++ b/scripts/02_libtorrent_experiments/03_generate_torrent_large_number_files.py @@ -0,0 +1,67 @@ +import libtorrent as lt +import sys +import os +import urllib.parse + +### Edit Me ### +DATA_DIR = "~/Documents/Personal/Projects/dataforcanada/d4c-infra-distribution/data/storage-no-cache/input" +DATA_DIR = os.path.expanduser(DATA_DIR) +TRACKER_URL = "udp://tracker.opentrackr.org:1337/announce" +CREATOR = "Data for Canada" +DATASET_ID = "ca-on_geospatial-ontario-2024A000235_d4c-datapkg-orthoimagery_2024_16cm_v0.0.1-beta" +TORRENT_COMMENT = "See more information at https://github.com/dataforcanada/d4c-datapkg-orthoimagery/issues/3#issuecomment-3867197437" + +# For this dataset, this should create a torrent with ~76,000 pieces +PIECE_SIZE = 64 * 1024 * 1024 # 64 MiB + +# Source of truth (AKA long-term storage) +DATASET_ID_SOT = f"{DATASET_ID}" +DATASET_ID_SOT_PATH= f"{DATA_DIR}/{DATASET_ID_SOT}" + +# HTTP URL(s) to the dataset +WEB_SEED_URLS = [ + f"https://data.source.coop/dataforcanada/d4c-datapkg-orthoimagery/archive/{DATASET_ID}" +] +### Edit Me ### + +def generate_data_package_torrent(filepath, TORRENT_COMMENT, WEB_SEED_URLS): + parent_dir = os.path.dirname(filepath) + filename = os.path.basename(filepath) + + # Create file storage. + # Can either add a file, or folder of file(s) + fs = lt.file_storage() + lt.add_files(fs, filepath) + + # Create torrent object with explicit piece size + t = lt.create_torrent(fs, piece_size=PIECE_SIZE) + + t.add_tracker(TRACKER_URL) + t.set_creator(CREATOR) + t.set_comment(TORRENT_COMMENT) + + # TODO: Add once Source Cooperative upload is complete + # Add the HTTP web seed (where the data is) + #for seed_url in WEB_SEED_URLS: + # t.add_url_seed(seed_url) + + # Hash the files + msg = f"Hashing file(s). '{filepath}'" + print(msg) + lt.set_piece_hashes(t, parent_dir) + + # Generate the torrent file content + torrent_dict = t.generate() + torrent_bytes = lt.bencode(torrent_dict) + + # Save the .torrent file to DATA_DIR + torrent_filename = f"{DATA_DIR}/{filename}.torrent" + with open(torrent_filename, "wb") as f: + f.write(torrent_bytes) + print(f"Success: Saved '{torrent_filename}'") + +if __name__ == '__main__': + if os.path.exists(DATASET_ID_SOT_PATH): + link = generate_data_package_torrent(DATASET_ID_SOT_PATH, TORRENT_COMMENT, WEB_SEED_URLS) + else: + print(f"Error: File not found: {DATASET_ID_SOT_PATH}") \ No newline at end of file