Merge pull request #4 from developmentseed/mdw/vector-query

feat: Vector Query to get Geocoded Location from Place
This commit is contained in:
Daniel Wiesmann
2025-12-04 12:02:55 +00:00
committed by GitHub
2 changed files with 64 additions and 227 deletions
+1 -4
View File
@@ -1,10 +1,7 @@
from langchain.agents import AgentState as BaseAgentState
from geojson_pydantic import FeatureCollection
from typing import Optional
from pydantic import Field
class AgentState(BaseAgentState):
feature_collection: Optional[FeatureCollection] = Field(
default=None, description="FeatureCollection to be used for the analysis"
)
place: Optional[FeatureCollection]
+63 -223
View File
@@ -1,248 +1,88 @@
from typing import Optional, Annotated
from typing import Annotated
import duckdb
from geojson_pydantic import Feature
from shapely import wkt
from shapely.geometry import mapping
import json
from langchain_core.tools import tool
from langgraph.types import Command
from langchain_core.messages import ToolMessage
from langchain_core.tools.base import InjectedToolCallId
@tool
def get_overture_locations(
area_of_interest: Feature,
place_name: Optional[str] = None,
place_type: Optional[str] = None,
overture_release: str = "2024-11-13.0",
similarity_threshold: float = 0.6,
tool_call_id: Annotated[str, InjectedToolCallId] = "",
) -> Command:
def create_database_connection():
"""Create and configure a DuckDB connection with necessary extensions.
Args:
database_path: Path to the DuckDB database file
Returns:
Configured DuckDB connection
"""
Get locations from Overture Maps.
Parameters
----------
area_of_interest : Feature
Area of interest to search for locations in
place_name : str, optional
Name of the place to search for
place_type : str, optional
Type of the place to search for
overture_release : str
Overture Maps release version
similarity_threshold : float
Minimum similarity score (0-1) for fuzzy name matching
tool_call_id : str
Tool call ID
Returns
-------
Command
Command that updates state with location features
"""
con = duckdb.connect()
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")
con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")
con.execute(
"""
CREATE OR REPLACE TABLE aoi AS
SELECT ST_GeomFromGeoJSON(?) AS geom
""",
[area_of_interest.geometry.model_dump_json()],
)
base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=places/type=place/*"
where_conditions = ["ST_Within(ST_GeomFromWKB(geometry), (SELECT geom FROM aoi))"]
if place_type:
where_conditions.append(f"categories.primary = '{place_type}'")
if place_name:
where_conditions.append(
f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) >= {similarity_threshold}"
)
where_clause = " AND ".join(where_conditions)
query = f"""
SELECT
id,
ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt,
names.primary as name,
categories.primary as primary_category,
confidence,
websites,
phones,
addresses
FROM read_parquet('{base_url}', filename=true, hive_partitioning=1)
WHERE {where_clause}
"""
result = con.execute(query).fetchall()
columns = [desc[0] for desc in con.description]
locations = [dict(zip(columns, row)) for row in result]
# Convert locations to GeoJSON Features
features = []
for loc in locations:
# Parse WKT geometry to GeoJSON
geom_wkt = loc.get("geometry_wkt")
if geom_wkt:
shapely_geom = wkt.loads(geom_wkt)
geom_dict = mapping(shapely_geom)
# Create properties from location data
properties = {
"id": loc.get("id"),
"name": loc.get("name"),
"primary_category": loc.get("primary_category"),
"confidence": loc.get("confidence"),
"websites": loc.get("websites"),
"phones": loc.get("phones"),
"addresses": loc.get("addresses"),
}
feature = Feature(geometry=geom_dict, properties=properties)
features.append(feature)
con.close()
tool_message = f"Found {len(features)} locations matching the criteria"
return Command(
update={
"features": features,
"messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)],
},
)
connection = duckdb.connect()
connection.execute("INSTALL spatial;")
connection.execute("INSTALL httpfs;")
connection.load_extension("spatial")
connection.load_extension("httpfs")
return connection
@tool
def geocode_division(
query: str,
level: Optional[str] = None,
overture_release: str = "2024-11-13.0",
similarity_threshold: float = 0.6,
limit: int = 10,
tool_call_id: Annotated[str, InjectedToolCallId] = "",
def get_place(
place_name: str, tool_call_id: Annotated[str, InjectedToolCallId] = ""
) -> Command:
"""
Geocode a place name using Overture divisions data.
"""Get place location from Overture Maps based on user input place name."""
Parameters
----------
query : str
Place name to search for (e.g., "San Francisco", "California", "United States")
level : str, optional
Division level to filter by. Options:
- 'country'
- 'region' (states, provinces)
- 'county' (counties, districts)
- 'locality' (cities, towns)
- 'localadmin' (local administrative areas)
- 'neighborhood'
overture_release : str
Overture Maps release version
similarity_threshold : float
Minimum similarity score (0-1) for fuzzy name matching
limit : int
Maximum number of results to return
db_connection = create_database_connection()
Returns
-------
Command
Command that updates state with division features
"""
location_results = db_connection.execute(
f"""
LOAD spatial;
con = duckdb.connect()
SET s3_region='us-west-2';
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")
SELECT
id,
jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) AS similarity_score,
names.primary AS name,
confidence,
CAST(socials AS JSON) AS socials,
ST_AsGeoJSON(geometry) AS geometry,
FROM read_parquet(
's3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*',
filename=true,
hive_partitioning=1
)
WHERE jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) > 0.5
ORDER BY similarity_score DESC
LIMIT 1;
"""
).fetchall()
con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")
db_connection.close()
base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=divisions/type=division/*"
geometry = json.loads(location_results[0][-1])
where_conditions = [
f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) >= {similarity_threshold}"
]
if level:
where_conditions.append(f"subtype = '{level}'")
where_clause = " AND ".join(where_conditions)
query_sql = f"""
SELECT
id,
ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt,
names.primary as name,
names.common as common_names,
subtype as division_level,
country,
region,
hierarchies,
population,
capital,
wikidata,
sources,
jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) as similarity_score
FROM read_parquet('{base_url}', filename=true, hive_partitioning=1)
WHERE {where_clause}
ORDER BY similarity_score DESC
LIMIT {limit}
"""
result = con.execute(query_sql).fetchall()
columns = [desc[0] for desc in con.description]
divisions = [dict(zip(columns, row)) for row in result]
# Convert divisions to GeoJSON Features
features = []
for div in divisions:
# Parse WKT geometry to GeoJSON
geom_wkt = div.get("geometry_wkt")
if geom_wkt:
shapely_geom = wkt.loads(geom_wkt)
geom_dict = mapping(shapely_geom)
# Create properties from division data
properties = {
"id": div.get("id"),
"name": div.get("name"),
"common_names": div.get("common_names"),
"division_level": div.get("division_level"),
"country": div.get("country"),
"region": div.get("region"),
"hierarchies": div.get("hierarchies"),
"population": div.get("population"),
"capital": div.get("capital"),
"wikidata": div.get("wikidata"),
"sources": div.get("sources"),
"similarity_score": div.get("similarity_score"),
# Create FeatureCollection
feature_collection = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": geometry,
"properties": {
"name": location_results[0][2],
"overture_id": location_results[0][0],
},
}
feature = Feature(geometry=geom_dict, properties=properties)
features.append(feature)
con.close()
tool_message = f"Found {len(features)} divisions matching '{query}'"
],
}
return Command(
update={
"features": features,
"messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)],
"place": feature_collection,
"messages": [
ToolMessage(
content=f"Found place with Overture name: {location_results[0][2]} based on user query",
tool_call_id=tool_call_id,
)
],
},
)