mirror of
https://github.com/dataforcanada/d4c-service-geo-assistant.git
synced 2026-06-13 14:31:01 +02:00
Tool/naip fetcher (#12)
* Add initial NAIP fetcher * Swap to Element84's EarthSearch API for NAIP STAC search and download * clip to bounds of aoi * Swap to Element84's EarthSearch API for NAIP STAC search and download * rename bands and remove dask chunking * Add DS_Store to .gitignore * restrict date range for naip test * Adjust timerange for tests * Add xarray to pyproj * Reduce aoi size * revert test to use tmp path * Update return types for tool to ensure state gets updated * Mark naip test as xfail * Fix geom creation --------- Co-authored-by: lillythomas <lillyelizathomas@gmail.com> Co-authored-by: Daniel Wiesmann <yellowcap@users.noreply.github.com>
This commit is contained in:
@@ -209,6 +209,7 @@ __marimo__/
|
||||
# VSCode
|
||||
.vscode/
|
||||
|
||||
**.DS_Store
|
||||
# Notebook
|
||||
.ipynb_checkpoints
|
||||
nbs/*
|
||||
|
||||
@@ -17,6 +17,11 @@ dependencies = [
|
||||
"python-dotenv",
|
||||
"duckdb",
|
||||
"shapely",
|
||||
"pystac-client",
|
||||
"planetary-computer",
|
||||
"odc-stac>=0.3.9",
|
||||
"xarray",
|
||||
"matplotlib",
|
||||
"geopandas>=1.1.1",
|
||||
"dspy>=3.0.4",
|
||||
"watchdog>=6.0.0",
|
||||
|
||||
@@ -4,13 +4,16 @@ from langgraph.checkpoint.memory import InMemorySaver
|
||||
from langchain.agents import create_agent
|
||||
from geo_assistant.agent.state import GeoAssistantState
|
||||
from geo_assistant.agent.llms import llm
|
||||
from geo_assistant.tools.naip import fetch_naip_img
|
||||
from geo_assistant.tools.overture import get_place
|
||||
from geo_assistant.tools.buffer import get_search_area
|
||||
|
||||
SYSTEM_PROMPT = """
|
||||
You are a helpful assistant that can answer questions and help with tasks.
|
||||
|
||||
You have location and division tools available to you. Only use this data if the user asks for it.
|
||||
You have access to the following tools:
|
||||
- Overture location lookup tool: use this to get geographic information about locations in the US based on the user's query
|
||||
- NAIP imagery fetch tool: use this to fetch NAIP aerial imagery for a given area of interest returned by the overture location lookup tool and date range (do your best to extract the date range from the user's query if provided, otherwise ask the user to specify a date range)
|
||||
|
||||
The current date and time is {now}.
|
||||
"""
|
||||
@@ -20,7 +23,11 @@ async def create_graph():
|
||||
checkpointer = InMemorySaver()
|
||||
graph = create_agent(
|
||||
model=llm,
|
||||
tools=[get_place, get_search_area],
|
||||
tools=[
|
||||
get_place,
|
||||
get_search_area,
|
||||
fetch_naip_img,
|
||||
],
|
||||
system_prompt=SYSTEM_PROMPT.format(
|
||||
now=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
),
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
from langchain.agents import AgentState
|
||||
from geojson_pydantic import Feature
|
||||
from typing import Optional
|
||||
from pydantic import Field
|
||||
|
||||
|
||||
class GeoAssistantState(AgentState):
|
||||
place: Optional[Feature]
|
||||
search_area: Optional[Feature]
|
||||
naip_png_path: Optional[str] = Field(
|
||||
default=None, description="Path to the saved NAIP RGB PNG image"
|
||||
)
|
||||
|
||||
@@ -7,6 +7,8 @@ from langchain_core.tools import tool
|
||||
from typing import Annotated
|
||||
from geo_assistant.agent.state import GeoAssistantState
|
||||
|
||||
from geojson_pydantic import Feature
|
||||
|
||||
|
||||
@tool
|
||||
async def get_search_area(
|
||||
@@ -47,7 +49,11 @@ async def get_search_area(
|
||||
f"{len(gdf)} features found after buffer operation, should be just 1. "
|
||||
"Was a Multi-Point/LineString/Polygon geometry passed in?"
|
||||
)
|
||||
buffer_feature = gdf.iloc[0].geometry.__geo_interface__
|
||||
buffer_feature = Feature(
|
||||
type="Feature",
|
||||
geometry=gdf.iloc[0].geometry.__geo_interface__,
|
||||
properties={},
|
||||
)
|
||||
|
||||
return Command(
|
||||
update={
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
# tools/naip_mpc_tools.py
|
||||
from typing import Dict, Any, Optional, Annotated
|
||||
from pathlib import Path
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import numpy as np
|
||||
import xarray as xr
|
||||
import matplotlib.pyplot as plt
|
||||
from langchain_core.tools import tool
|
||||
from pystac_client import Client
|
||||
from odc.stac import stac_load
|
||||
from langgraph.types import Command
|
||||
from langchain_core.messages import ToolMessage
|
||||
from langchain_core.tools.base import InjectedToolCallId
|
||||
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# PC_STAC_URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
|
||||
E84_STAC_URL = "https://earth-search.aws.element84.com/v1"
|
||||
|
||||
|
||||
@tool("fetch_naip_img")
|
||||
async def fetch_naip_img(
|
||||
aoi_geojson: Dict[str, Any],
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
tool_call_id: Annotated[Optional[str], InjectedToolCallId] = None,
|
||||
) -> Command:
|
||||
"""
|
||||
Query Microsoft Planetary Computer for NAIP imagery intersecting an AOI and
|
||||
date range, load all matching items into an xarray data cube using odc-stac,
|
||||
and save a simple RGB composite as a PNG.
|
||||
|
||||
Args:
|
||||
aoi_geojson: GeoJSON Polygon/MultiPolygon in EPSG:4326.
|
||||
start_date: Start date (YYYY-MM-DD).
|
||||
end_date: End date (YYYY-MM-DD).
|
||||
|
||||
"""
|
||||
# --- 1. STAC search on Element84's EarthSearch API ---
|
||||
catalog = Client.open(E84_STAC_URL)
|
||||
|
||||
search = catalog.search(
|
||||
collections=["naip"],
|
||||
intersects=aoi_geojson,
|
||||
datetime=f"{start_date}/{end_date}",
|
||||
)
|
||||
|
||||
items = list(search.items())
|
||||
if len(items) == 0:
|
||||
return Command(
|
||||
update={
|
||||
"messages": [
|
||||
ToolMessage(
|
||||
content="No NAIP imagery found for the specified area and date range.",
|
||||
tool_call_id=tool_call_id,
|
||||
)
|
||||
],
|
||||
"naip_png_path": None,
|
||||
}
|
||||
)
|
||||
|
||||
# --- 2. Load as xarray cube with odc.stac ---
|
||||
# NAIP in MPC: 4-band multi-band asset (R,G,B,NIR) in one asset named "image".
|
||||
# odc.stac exposes these as measurements 'red','green','blue','nir' for this collection
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
ds: xr.Dataset = stac_load(
|
||||
items,
|
||||
bands=["Red", "Green", "Blue"], # use only RGB
|
||||
geopolygon=aoi_geojson,
|
||||
resolution=1.0, # NAIP native ~1 m
|
||||
executor=executor,
|
||||
)
|
||||
if ds.dims.get("time", 0) == 0:
|
||||
return Command(
|
||||
update={
|
||||
"messages": [
|
||||
ToolMessage(
|
||||
content="Unable to load NAIP RGB image, dataset has no time dimension",
|
||||
tool_call_id=tool_call_id,
|
||||
)
|
||||
],
|
||||
"naip_png_path": None,
|
||||
}
|
||||
)
|
||||
|
||||
# --- 3. Build an RGB composite from the cube ---
|
||||
# For the PNG, we’ll just use the first time slice (you can swap in “latest”
|
||||
# or a temporal reduction if you prefer).
|
||||
red = ds["Red"].isel(time=0)
|
||||
green = ds["Green"].isel(time=0)
|
||||
blue = ds["Blue"].isel(time=0)
|
||||
|
||||
# Stack into (y, x, 3) array
|
||||
rgb = xr.concat([red, green, blue], dim="band") # (band, y, x)
|
||||
rgb = rgb.transpose("y", "x", "band") # (y, x, band)
|
||||
|
||||
# Convert to uint8 for PNG with a simple contrast stretch.
|
||||
arr = rgb.values.astype("float32")
|
||||
# Robust min/max to avoid a few hot pixels blowing out the stretch
|
||||
vmin = np.nanpercentile(arr, 2)
|
||||
vmax = np.nanpercentile(arr, 98)
|
||||
if vmax <= vmin:
|
||||
vmin, vmax = np.nanmin(arr), np.nanmax(arr)
|
||||
|
||||
arr = np.clip((arr - vmin) / (vmax - vmin + 1e-6), 0, 1)
|
||||
arr_uint8 = (arr * 255).astype("uint8")
|
||||
|
||||
# --- 4. Save PNG ---
|
||||
|
||||
out_path = Path("naip_rgb.png")
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
plt.imsave(out_path.as_posix(), arr_uint8)
|
||||
|
||||
return Command(
|
||||
update={
|
||||
"messages": [
|
||||
ToolMessage(
|
||||
content=f"NAIP RGB image saved to {out_path.as_posix()}",
|
||||
tool_call_id=tool_call_id,
|
||||
)
|
||||
],
|
||||
"naip_png_path": out_path.as_posix(),
|
||||
}
|
||||
)
|
||||
@@ -12,7 +12,12 @@ def geo_assistant_fixture():
|
||||
geometry=Point(type="Point", coordinates=[-9.1393, 38.7223]),
|
||||
properties={"name": "Neighbourhood Cafe Lisbon"},
|
||||
)
|
||||
return GeoAssistantState(place=place_geojson, search_area=None, messages=[])
|
||||
return GeoAssistantState(
|
||||
place=place_geojson,
|
||||
search_area=None,
|
||||
messages=[],
|
||||
naip_png_path="path/to/naip.png",
|
||||
)
|
||||
|
||||
|
||||
async def test_get_search_area(geo_assistant_fixture):
|
||||
@@ -37,4 +42,4 @@ async def test_get_search_area(geo_assistant_fixture):
|
||||
|
||||
# Verify the buffer was created around the correct place
|
||||
search_area = command.update["search_area"]
|
||||
assert search_area["type"] == "Polygon"
|
||||
assert search_area.geometry.type == "Polygon"
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from shapely.geometry import box, mapping
|
||||
from langchain_core.tools.base import ToolCall
|
||||
|
||||
from geo_assistant.tools.naip import fetch_naip_img
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.xfail
|
||||
async def test_fetch_naip(tmp_path):
|
||||
"""
|
||||
Integration test: hit MPC STAC for NAIP around Union Market (DC),
|
||||
load imagery via odc-stac, and save an RGB PNG.
|
||||
|
||||
NOTE: This test requires:
|
||||
- Internet access (to reach Planetary Computer STAC + blobs)
|
||||
- Planetary Computer / NAIP service to be up
|
||||
"""
|
||||
|
||||
# Union Market coordinates from GeoNames: 38.90789, -76.99831
|
||||
# N 38°54′28″ W 76°59′54″
|
||||
# We'll use a small neighborhood AOI around that point.
|
||||
# :contentReference[oaicite:0]{index=0}
|
||||
lat = 38.90789
|
||||
lon = -76.99831
|
||||
|
||||
# ~0.01 degrees (~1.1 km) buffer in each direction
|
||||
aoi = box(lon - 0.0001, lat - 0.0001, lon + 0.0001, lat + 0.0001)
|
||||
aoi_geojson = mapping(aoi)
|
||||
|
||||
out_png = tmp_path / "naip_test_img.png"
|
||||
|
||||
tool_call = ToolCall(
|
||||
name="fetch_naip_img",
|
||||
args={
|
||||
"aoi_geojson": aoi_geojson,
|
||||
"start_date": "2021-01-01",
|
||||
"end_date": "2021-12-31",
|
||||
"out_png_path": str(out_png),
|
||||
"resolution": 1.0,
|
||||
},
|
||||
type="tool_call",
|
||||
id="test_tool_call_id",
|
||||
)
|
||||
|
||||
# Call the actual tool – no STAC / odc-stac mocking
|
||||
result = await fetch_naip_img.ainvoke(tool_call["args"])
|
||||
|
||||
# Basic sanity checks on result
|
||||
assert result["stac_item_count"] > 0, "Expected at least one NAIP item"
|
||||
assert "time" in result["dataset_dims"]
|
||||
assert result["dataset_dims"]["time"] >= 1
|
||||
|
||||
# PNG should have been written to disk
|
||||
png_path = Path(result["png_path"])
|
||||
assert png_path == out_png
|
||||
assert png_path.is_file(), f"PNG was not created at {png_path}"
|
||||
Reference in New Issue
Block a user