From 61cce53e4c12a1344e30c65996e1eb0564a0b0c7 Mon Sep 17 00:00:00 2001 From: Martha Date: Thu, 4 Dec 2025 11:55:40 +0000 Subject: [PATCH] basic place geometry creation from initial query --- src/geo_assistant/agent/state.py | 5 +- src/geo_assistant/tools/overture.py | 491 ++++++++++++++++------------ 2 files changed, 286 insertions(+), 210 deletions(-) diff --git a/src/geo_assistant/agent/state.py b/src/geo_assistant/agent/state.py index 1022728..d22ad72 100644 --- a/src/geo_assistant/agent/state.py +++ b/src/geo_assistant/agent/state.py @@ -1,10 +1,7 @@ from langchain.agents import AgentState as BaseAgentState from geojson_pydantic import FeatureCollection from typing import Optional -from pydantic import Field class AgentState(BaseAgentState): - feature_collection: Optional[FeatureCollection] = Field( - default=None, description="FeatureCollection to be used for the analysis" - ) + place: Optional[FeatureCollection] diff --git a/src/geo_assistant/tools/overture.py b/src/geo_assistant/tools/overture.py index d9691f4..f63e03c 100644 --- a/src/geo_assistant/tools/overture.py +++ b/src/geo_assistant/tools/overture.py @@ -1,248 +1,327 @@ -from typing import Optional, Annotated +from typing import Annotated import duckdb -from geojson_pydantic import Feature -from shapely import wkt -from shapely.geometry import mapping +import json from langchain_core.tools import tool from langgraph.types import Command from langchain_core.messages import ToolMessage from langchain_core.tools.base import InjectedToolCallId +def create_database_connection(): + """Create and configure a DuckDB connection with necessary extensions. + + Args: + database_path: Path to the DuckDB database file + + Returns: + Configured DuckDB connection + """ + connection = duckdb.connect() + connection.execute("INSTALL spatial;") + connection.execute("INSTALL httpfs;") + connection.load_extension("spatial") + connection.load_extension("httpfs") + return connection + + @tool -def get_overture_locations( - area_of_interest: Feature, - place_name: Optional[str] = None, - place_type: Optional[str] = None, - overture_release: str = "2024-11-13.0", - similarity_threshold: float = 0.6, - tool_call_id: Annotated[str, InjectedToolCallId] = "", +def get_place( + place_name: str, tool_call_id: Annotated[str, InjectedToolCallId] = "" ) -> Command: - """ - Get locations from Overture Maps. + """Get place location from Overture Maps based on user input place name.""" - Parameters - ---------- - area_of_interest : Feature - Area of interest to search for locations in - place_name : str, optional - Name of the place to search for - place_type : str, optional - Type of the place to search for - overture_release : str - Overture Maps release version - similarity_threshold : float - Minimum similarity score (0-1) for fuzzy name matching - tool_call_id : str - Tool call ID + db_connection = create_database_connection() - Returns - ------- - Command - Command that updates state with location features - """ + location_results = db_connection.execute( + f""" + LOAD spatial; - con = duckdb.connect() + SET s3_region='us-west-2'; - con.execute("INSTALL spatial;") - con.execute("LOAD spatial;") + SELECT + id, + jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) AS similarity_score, + names.primary AS name, + confidence, + CAST(socials AS JSON) AS socials, + ST_AsGeoJSON(geometry) AS geometry, + FROM read_parquet( + 's3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*', + filename=true, + hive_partitioning=1 + ) + WHERE jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) > 0.5 + ORDER BY similarity_score DESC + LIMIT 1; + """ + ).fetchall() - con.execute("INSTALL httpfs;") - con.execute("LOAD httpfs;") + db_connection.close() - con.execute( - """ - CREATE OR REPLACE TABLE aoi AS - SELECT ST_GeomFromGeoJSON(?) AS geom - """, - [area_of_interest.geometry.model_dump_json()], - ) + geometry = json.loads(location_results[0][-1]) - base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=places/type=place/*" - - where_conditions = ["ST_Within(ST_GeomFromWKB(geometry), (SELECT geom FROM aoi))"] - - if place_type: - where_conditions.append(f"categories.primary = '{place_type}'") - - if place_name: - where_conditions.append( - f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) >= {similarity_threshold}" - ) - - where_clause = " AND ".join(where_conditions) - - query = f""" - SELECT - id, - ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt, - names.primary as name, - categories.primary as primary_category, - confidence, - websites, - phones, - addresses - FROM read_parquet('{base_url}', filename=true, hive_partitioning=1) - WHERE {where_clause} - """ - - result = con.execute(query).fetchall() - columns = [desc[0] for desc in con.description] - - locations = [dict(zip(columns, row)) for row in result] - - # Convert locations to GeoJSON Features - features = [] - for loc in locations: - # Parse WKT geometry to GeoJSON - geom_wkt = loc.get("geometry_wkt") - if geom_wkt: - shapely_geom = wkt.loads(geom_wkt) - geom_dict = mapping(shapely_geom) - - # Create properties from location data - properties = { - "id": loc.get("id"), - "name": loc.get("name"), - "primary_category": loc.get("primary_category"), - "confidence": loc.get("confidence"), - "websites": loc.get("websites"), - "phones": loc.get("phones"), - "addresses": loc.get("addresses"), + # Create FeatureCollection + feature_collection = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": geometry, + "properties": { + "name": location_results[0][2], + "overture_id": location_results[0][0], + }, } - - feature = Feature(geometry=geom_dict, properties=properties) - features.append(feature) - - con.close() - - tool_message = f"Found {len(features)} locations matching the criteria" + ], + } return Command( update={ - "features": features, - "messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)], + "place": feature_collection, + "messages": [ + ToolMessage( + content=f"Found place with Overture name: {location_results[0][2]} based on user query", + tool_call_id=tool_call_id, + ) + ], }, ) -@tool -def geocode_division( - query: str, - level: Optional[str] = None, - overture_release: str = "2024-11-13.0", - similarity_threshold: float = 0.6, - limit: int = 10, - tool_call_id: Annotated[str, InjectedToolCallId] = "", -) -> Command: - """ - Geocode a place name using Overture divisions data. +# @tool +# def get_overture_locations( +# area_of_interest: Feature, +# place_name: Optional[str] = None, +# place_type: Optional[str] = None, +# overture_release: str = "2024-11-13.0", +# similarity_threshold: float = 0.6, +# tool_call_id: Annotated[str, InjectedToolCallId] = "", +# ) -> Command: +# """ +# Get locations from Overture Maps. - Parameters - ---------- - query : str - Place name to search for (e.g., "San Francisco", "California", "United States") - level : str, optional - Division level to filter by. Options: - - 'country' - - 'region' (states, provinces) - - 'county' (counties, districts) - - 'locality' (cities, towns) - - 'localadmin' (local administrative areas) - - 'neighborhood' - overture_release : str - Overture Maps release version - similarity_threshold : float - Minimum similarity score (0-1) for fuzzy name matching - limit : int - Maximum number of results to return +# Parameters +# ---------- +# area_of_interest : Feature +# Area of interest to search for locations in +# place_name : str, optional +# Name of the place to search for +# place_type : str, optional +# Type of the place to search for +# overture_release : str +# Overture Maps release version +# similarity_threshold : float +# Minimum similarity score (0-1) for fuzzy name matching +# tool_call_id : str +# Tool call ID - Returns - ------- - Command - Command that updates state with division features - """ +# Returns +# ------- +# Command +# Command that updates state with location features +# """ - con = duckdb.connect() +# con = duckdb.connect() - con.execute("INSTALL spatial;") - con.execute("LOAD spatial;") +# con.execute("INSTALL spatial;") +# con.execute("LOAD spatial;") - con.execute("INSTALL httpfs;") - con.execute("LOAD httpfs;") +# con.execute("INSTALL httpfs;") +# con.execute("LOAD httpfs;") - base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=divisions/type=division/*" +# con.execute( +# """ +# CREATE OR REPLACE TABLE aoi AS +# SELECT ST_GeomFromGeoJSON(?) AS geom +# """, +# [area_of_interest.geometry.model_dump_json()], +# ) - where_conditions = [ - f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) >= {similarity_threshold}" - ] +# base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=places/type=place/*" - if level: - where_conditions.append(f"subtype = '{level}'") +# where_conditions = ["ST_Within(ST_GeomFromWKB(geometry), (SELECT geom FROM aoi))"] - where_clause = " AND ".join(where_conditions) +# if place_type: +# where_conditions.append(f"categories.primary = '{place_type}'") - query_sql = f""" - SELECT - id, - ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt, - names.primary as name, - names.common as common_names, - subtype as division_level, - country, - region, - hierarchies, - population, - capital, - wikidata, - sources, - jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) as similarity_score - FROM read_parquet('{base_url}', filename=true, hive_partitioning=1) - WHERE {where_clause} - ORDER BY similarity_score DESC - LIMIT {limit} - """ +# if place_name: +# where_conditions.append( +# f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) >= {similarity_threshold}" +# ) - result = con.execute(query_sql).fetchall() - columns = [desc[0] for desc in con.description] +# where_clause = " AND ".join(where_conditions) - divisions = [dict(zip(columns, row)) for row in result] +# query = f""" +# SELECT +# id, +# ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt, +# names.primary as name, +# categories.primary as primary_category, +# confidence, +# websites, +# phones, +# addresses +# FROM read_parquet('{base_url}', filename=true, hive_partitioning=1) +# WHERE {where_clause} +# """ - # Convert divisions to GeoJSON Features - features = [] - for div in divisions: - # Parse WKT geometry to GeoJSON - geom_wkt = div.get("geometry_wkt") - if geom_wkt: - shapely_geom = wkt.loads(geom_wkt) - geom_dict = mapping(shapely_geom) +# result = con.execute(query).fetchall() +# columns = [desc[0] for desc in con.description] - # Create properties from division data - properties = { - "id": div.get("id"), - "name": div.get("name"), - "common_names": div.get("common_names"), - "division_level": div.get("division_level"), - "country": div.get("country"), - "region": div.get("region"), - "hierarchies": div.get("hierarchies"), - "population": div.get("population"), - "capital": div.get("capital"), - "wikidata": div.get("wikidata"), - "sources": div.get("sources"), - "similarity_score": div.get("similarity_score"), - } +# locations = [dict(zip(columns, row)) for row in result] - feature = Feature(geometry=geom_dict, properties=properties) - features.append(feature) +# # Convert locations to GeoJSON Features +# features = [] +# for loc in locations: +# # Parse WKT geometry to GeoJSON +# geom_wkt = loc.get("geometry_wkt") +# if geom_wkt: +# shapely_geom = wkt.loads(geom_wkt) +# geom_dict = mapping(shapely_geom) - con.close() +# # Create properties from location data +# properties = { +# "id": loc.get("id"), +# "name": loc.get("name"), +# "primary_category": loc.get("primary_category"), +# "confidence": loc.get("confidence"), +# "websites": loc.get("websites"), +# "phones": loc.get("phones"), +# "addresses": loc.get("addresses"), +# } - tool_message = f"Found {len(features)} divisions matching '{query}'" +# feature = Feature(geometry=geom_dict, properties=properties) +# features.append(feature) - return Command( - update={ - "features": features, - "messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)], - }, - ) +# con.close() + +# tool_message = f"Found {len(features)} locations matching the criteria" + +# return Command( +# update={ +# "features": features, +# "messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)], +# }, +# ) + + +# @tool +# def geocode_division( +# query: str, +# level: Optional[str] = None, +# overture_release: str = "2024-11-13.0", +# similarity_threshold: float = 0.6, +# limit: int = 10, +# tool_call_id: Annotated[str, InjectedToolCallId] = "", +# ) -> Command: +# """ +# Geocode a place name using Overture divisions data. + +# Parameters +# ---------- +# query : str +# Place name to search for (e.g., "San Francisco", "California", "United States") +# level : str, optional +# Division level to filter by. Options: +# - 'country' +# - 'region' (states, provinces) +# - 'county' (counties, districts) +# - 'locality' (cities, towns) +# - 'localadmin' (local administrative areas) +# - 'neighborhood' +# overture_release : str +# Overture Maps release version +# similarity_threshold : float +# Minimum similarity score (0-1) for fuzzy name matching +# limit : int +# Maximum number of results to return + +# Returns +# ------- +# Command +# Command that updates state with division features +# """ + +# con = duckdb.connect() + +# con.execute("INSTALL spatial;") +# con.execute("LOAD spatial;") + +# con.execute("INSTALL httpfs;") +# con.execute("LOAD httpfs;") + +# base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=divisions/type=division/*" + +# where_conditions = [ +# f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) >= {similarity_threshold}" +# ] + +# if level: +# where_conditions.append(f"subtype = '{level}'") + +# where_clause = " AND ".join(where_conditions) + +# query_sql = f""" +# SELECT +# id, +# ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt, +# names.primary as name, +# names.common as common_names, +# subtype as division_level, +# country, +# region, +# hierarchies, +# population, +# capital, +# wikidata, +# sources, +# jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) as similarity_score +# FROM read_parquet('{base_url}', filename=true, hive_partitioning=1) +# WHERE {where_clause} +# ORDER BY similarity_score DESC +# LIMIT {limit} +# """ + +# result = con.execute(query_sql).fetchall() +# columns = [desc[0] for desc in con.description] + +# divisions = [dict(zip(columns, row)) for row in result] + +# # Convert divisions to GeoJSON Features +# features = [] +# for div in divisions: +# # Parse WKT geometry to GeoJSON +# geom_wkt = div.get("geometry_wkt") +# if geom_wkt: +# shapely_geom = wkt.loads(geom_wkt) +# geom_dict = mapping(shapely_geom) + +# # Create properties from division data +# properties = { +# "id": div.get("id"), +# "name": div.get("name"), +# "common_names": div.get("common_names"), +# "division_level": div.get("division_level"), +# "country": div.get("country"), +# "region": div.get("region"), +# "hierarchies": div.get("hierarchies"), +# "population": div.get("population"), +# "capital": div.get("capital"), +# "wikidata": div.get("wikidata"), +# "sources": div.get("sources"), +# "similarity_score": div.get("similarity_score"), +# } + +# feature = Feature(geometry=geom_dict, properties=properties) +# features.append(feature) + +# con.close() + +# tool_message = f"Found {len(features)} divisions matching '{query}'" + +# return Command( +# update={ +# "features": features, +# "messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)], +# }, +# )