mirror of
https://github.com/dataforcanada/d4c-service-geo-assistant.git
synced 2026-06-15 15:31:02 +02:00
Merge pull request #4 from developmentseed/mdw/vector-query
feat: Vector Query to get Geocoded Location from Place
This commit is contained in:
@@ -1,10 +1,7 @@
|
|||||||
from langchain.agents import AgentState as BaseAgentState
|
from langchain.agents import AgentState as BaseAgentState
|
||||||
from geojson_pydantic import FeatureCollection
|
from geojson_pydantic import FeatureCollection
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pydantic import Field
|
|
||||||
|
|
||||||
|
|
||||||
class AgentState(BaseAgentState):
|
class AgentState(BaseAgentState):
|
||||||
feature_collection: Optional[FeatureCollection] = Field(
|
place: Optional[FeatureCollection]
|
||||||
default=None, description="FeatureCollection to be used for the analysis"
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -1,248 +1,88 @@
|
|||||||
from typing import Optional, Annotated
|
from typing import Annotated
|
||||||
import duckdb
|
import duckdb
|
||||||
from geojson_pydantic import Feature
|
import json
|
||||||
from shapely import wkt
|
|
||||||
from shapely.geometry import mapping
|
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from langgraph.types import Command
|
from langgraph.types import Command
|
||||||
from langchain_core.messages import ToolMessage
|
from langchain_core.messages import ToolMessage
|
||||||
from langchain_core.tools.base import InjectedToolCallId
|
from langchain_core.tools.base import InjectedToolCallId
|
||||||
|
|
||||||
|
|
||||||
@tool
|
def create_database_connection():
|
||||||
def get_overture_locations(
|
"""Create and configure a DuckDB connection with necessary extensions.
|
||||||
area_of_interest: Feature,
|
|
||||||
place_name: Optional[str] = None,
|
Args:
|
||||||
place_type: Optional[str] = None,
|
database_path: Path to the DuckDB database file
|
||||||
overture_release: str = "2024-11-13.0",
|
|
||||||
similarity_threshold: float = 0.6,
|
Returns:
|
||||||
tool_call_id: Annotated[str, InjectedToolCallId] = "",
|
Configured DuckDB connection
|
||||||
) -> Command:
|
|
||||||
"""
|
"""
|
||||||
Get locations from Overture Maps.
|
connection = duckdb.connect()
|
||||||
|
connection.execute("INSTALL spatial;")
|
||||||
Parameters
|
connection.execute("INSTALL httpfs;")
|
||||||
----------
|
connection.load_extension("spatial")
|
||||||
area_of_interest : Feature
|
connection.load_extension("httpfs")
|
||||||
Area of interest to search for locations in
|
return connection
|
||||||
place_name : str, optional
|
|
||||||
Name of the place to search for
|
|
||||||
place_type : str, optional
|
|
||||||
Type of the place to search for
|
|
||||||
overture_release : str
|
|
||||||
Overture Maps release version
|
|
||||||
similarity_threshold : float
|
|
||||||
Minimum similarity score (0-1) for fuzzy name matching
|
|
||||||
tool_call_id : str
|
|
||||||
Tool call ID
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Command
|
|
||||||
Command that updates state with location features
|
|
||||||
"""
|
|
||||||
|
|
||||||
con = duckdb.connect()
|
|
||||||
|
|
||||||
con.execute("INSTALL spatial;")
|
|
||||||
con.execute("LOAD spatial;")
|
|
||||||
|
|
||||||
con.execute("INSTALL httpfs;")
|
|
||||||
con.execute("LOAD httpfs;")
|
|
||||||
|
|
||||||
con.execute(
|
|
||||||
"""
|
|
||||||
CREATE OR REPLACE TABLE aoi AS
|
|
||||||
SELECT ST_GeomFromGeoJSON(?) AS geom
|
|
||||||
""",
|
|
||||||
[area_of_interest.geometry.model_dump_json()],
|
|
||||||
)
|
|
||||||
|
|
||||||
base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=places/type=place/*"
|
|
||||||
|
|
||||||
where_conditions = ["ST_Within(ST_GeomFromWKB(geometry), (SELECT geom FROM aoi))"]
|
|
||||||
|
|
||||||
if place_type:
|
|
||||||
where_conditions.append(f"categories.primary = '{place_type}'")
|
|
||||||
|
|
||||||
if place_name:
|
|
||||||
where_conditions.append(
|
|
||||||
f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) >= {similarity_threshold}"
|
|
||||||
)
|
|
||||||
|
|
||||||
where_clause = " AND ".join(where_conditions)
|
|
||||||
|
|
||||||
query = f"""
|
|
||||||
SELECT
|
|
||||||
id,
|
|
||||||
ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt,
|
|
||||||
names.primary as name,
|
|
||||||
categories.primary as primary_category,
|
|
||||||
confidence,
|
|
||||||
websites,
|
|
||||||
phones,
|
|
||||||
addresses
|
|
||||||
FROM read_parquet('{base_url}', filename=true, hive_partitioning=1)
|
|
||||||
WHERE {where_clause}
|
|
||||||
"""
|
|
||||||
|
|
||||||
result = con.execute(query).fetchall()
|
|
||||||
columns = [desc[0] for desc in con.description]
|
|
||||||
|
|
||||||
locations = [dict(zip(columns, row)) for row in result]
|
|
||||||
|
|
||||||
# Convert locations to GeoJSON Features
|
|
||||||
features = []
|
|
||||||
for loc in locations:
|
|
||||||
# Parse WKT geometry to GeoJSON
|
|
||||||
geom_wkt = loc.get("geometry_wkt")
|
|
||||||
if geom_wkt:
|
|
||||||
shapely_geom = wkt.loads(geom_wkt)
|
|
||||||
geom_dict = mapping(shapely_geom)
|
|
||||||
|
|
||||||
# Create properties from location data
|
|
||||||
properties = {
|
|
||||||
"id": loc.get("id"),
|
|
||||||
"name": loc.get("name"),
|
|
||||||
"primary_category": loc.get("primary_category"),
|
|
||||||
"confidence": loc.get("confidence"),
|
|
||||||
"websites": loc.get("websites"),
|
|
||||||
"phones": loc.get("phones"),
|
|
||||||
"addresses": loc.get("addresses"),
|
|
||||||
}
|
|
||||||
|
|
||||||
feature = Feature(geometry=geom_dict, properties=properties)
|
|
||||||
features.append(feature)
|
|
||||||
|
|
||||||
con.close()
|
|
||||||
|
|
||||||
tool_message = f"Found {len(features)} locations matching the criteria"
|
|
||||||
|
|
||||||
return Command(
|
|
||||||
update={
|
|
||||||
"features": features,
|
|
||||||
"messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
def geocode_division(
|
def get_place(
|
||||||
query: str,
|
place_name: str, tool_call_id: Annotated[str, InjectedToolCallId] = ""
|
||||||
level: Optional[str] = None,
|
|
||||||
overture_release: str = "2024-11-13.0",
|
|
||||||
similarity_threshold: float = 0.6,
|
|
||||||
limit: int = 10,
|
|
||||||
tool_call_id: Annotated[str, InjectedToolCallId] = "",
|
|
||||||
) -> Command:
|
) -> Command:
|
||||||
"""
|
"""Get place location from Overture Maps based on user input place name."""
|
||||||
Geocode a place name using Overture divisions data.
|
|
||||||
|
|
||||||
Parameters
|
db_connection = create_database_connection()
|
||||||
----------
|
|
||||||
query : str
|
|
||||||
Place name to search for (e.g., "San Francisco", "California", "United States")
|
|
||||||
level : str, optional
|
|
||||||
Division level to filter by. Options:
|
|
||||||
- 'country'
|
|
||||||
- 'region' (states, provinces)
|
|
||||||
- 'county' (counties, districts)
|
|
||||||
- 'locality' (cities, towns)
|
|
||||||
- 'localadmin' (local administrative areas)
|
|
||||||
- 'neighborhood'
|
|
||||||
overture_release : str
|
|
||||||
Overture Maps release version
|
|
||||||
similarity_threshold : float
|
|
||||||
Minimum similarity score (0-1) for fuzzy name matching
|
|
||||||
limit : int
|
|
||||||
Maximum number of results to return
|
|
||||||
|
|
||||||
Returns
|
location_results = db_connection.execute(
|
||||||
-------
|
f"""
|
||||||
Command
|
LOAD spatial;
|
||||||
Command that updates state with division features
|
|
||||||
"""
|
|
||||||
|
|
||||||
con = duckdb.connect()
|
SET s3_region='us-west-2';
|
||||||
|
|
||||||
con.execute("INSTALL spatial;")
|
SELECT
|
||||||
con.execute("LOAD spatial;")
|
id,
|
||||||
|
jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) AS similarity_score,
|
||||||
|
names.primary AS name,
|
||||||
|
confidence,
|
||||||
|
CAST(socials AS JSON) AS socials,
|
||||||
|
ST_AsGeoJSON(geometry) AS geometry,
|
||||||
|
FROM read_parquet(
|
||||||
|
's3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*',
|
||||||
|
filename=true,
|
||||||
|
hive_partitioning=1
|
||||||
|
)
|
||||||
|
WHERE jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) > 0.5
|
||||||
|
ORDER BY similarity_score DESC
|
||||||
|
LIMIT 1;
|
||||||
|
"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
con.execute("INSTALL httpfs;")
|
db_connection.close()
|
||||||
con.execute("LOAD httpfs;")
|
|
||||||
|
|
||||||
base_url = f"s3://overturemaps-us-west-2/release/{overture_release}/theme=divisions/type=division/*"
|
geometry = json.loads(location_results[0][-1])
|
||||||
|
|
||||||
where_conditions = [
|
# Create FeatureCollection
|
||||||
f"jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) >= {similarity_threshold}"
|
feature_collection = {
|
||||||
]
|
"type": "FeatureCollection",
|
||||||
|
"features": [
|
||||||
if level:
|
{
|
||||||
where_conditions.append(f"subtype = '{level}'")
|
"type": "Feature",
|
||||||
|
"geometry": geometry,
|
||||||
where_clause = " AND ".join(where_conditions)
|
"properties": {
|
||||||
|
"name": location_results[0][2],
|
||||||
query_sql = f"""
|
"overture_id": location_results[0][0],
|
||||||
SELECT
|
},
|
||||||
id,
|
|
||||||
ST_AsText(ST_GeomFromWKB(geometry)) as geometry_wkt,
|
|
||||||
names.primary as name,
|
|
||||||
names.common as common_names,
|
|
||||||
subtype as division_level,
|
|
||||||
country,
|
|
||||||
region,
|
|
||||||
hierarchies,
|
|
||||||
population,
|
|
||||||
capital,
|
|
||||||
wikidata,
|
|
||||||
sources,
|
|
||||||
jaro_winkler_similarity(LOWER(names.primary), LOWER('{query}')) as similarity_score
|
|
||||||
FROM read_parquet('{base_url}', filename=true, hive_partitioning=1)
|
|
||||||
WHERE {where_clause}
|
|
||||||
ORDER BY similarity_score DESC
|
|
||||||
LIMIT {limit}
|
|
||||||
"""
|
|
||||||
|
|
||||||
result = con.execute(query_sql).fetchall()
|
|
||||||
columns = [desc[0] for desc in con.description]
|
|
||||||
|
|
||||||
divisions = [dict(zip(columns, row)) for row in result]
|
|
||||||
|
|
||||||
# Convert divisions to GeoJSON Features
|
|
||||||
features = []
|
|
||||||
for div in divisions:
|
|
||||||
# Parse WKT geometry to GeoJSON
|
|
||||||
geom_wkt = div.get("geometry_wkt")
|
|
||||||
if geom_wkt:
|
|
||||||
shapely_geom = wkt.loads(geom_wkt)
|
|
||||||
geom_dict = mapping(shapely_geom)
|
|
||||||
|
|
||||||
# Create properties from division data
|
|
||||||
properties = {
|
|
||||||
"id": div.get("id"),
|
|
||||||
"name": div.get("name"),
|
|
||||||
"common_names": div.get("common_names"),
|
|
||||||
"division_level": div.get("division_level"),
|
|
||||||
"country": div.get("country"),
|
|
||||||
"region": div.get("region"),
|
|
||||||
"hierarchies": div.get("hierarchies"),
|
|
||||||
"population": div.get("population"),
|
|
||||||
"capital": div.get("capital"),
|
|
||||||
"wikidata": div.get("wikidata"),
|
|
||||||
"sources": div.get("sources"),
|
|
||||||
"similarity_score": div.get("similarity_score"),
|
|
||||||
}
|
}
|
||||||
|
],
|
||||||
feature = Feature(geometry=geom_dict, properties=properties)
|
}
|
||||||
features.append(feature)
|
|
||||||
|
|
||||||
con.close()
|
|
||||||
|
|
||||||
tool_message = f"Found {len(features)} divisions matching '{query}'"
|
|
||||||
|
|
||||||
return Command(
|
return Command(
|
||||||
update={
|
update={
|
||||||
"features": features,
|
"place": feature_collection,
|
||||||
"messages": [ToolMessage(content=tool_message, tool_call_id=tool_call_id)],
|
"messages": [
|
||||||
|
ToolMessage(
|
||||||
|
content=f"Found place with Overture name: {location_results[0][2]} based on user query",
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
)
|
||||||
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user