In [1]:
# Import libraries
import geopandas as gpd
import pandas as pd
import pygmt
import numpy as np
import scipy
import shapely
import sys
import xarray as xr
from scipy.spatial import cKDTree
from shapely.geometry import Point

# Show versions
print("Python version:", sys.version)
print("Geopandas version:", gpd.__version__)
print("Pandas version:", pd.__version__)
print("PyGMT version:", pygmt.__version__)
print("Numpy version:", np.__version__)
print("Scipy version:", scipy.__version__)
print("Shapely version:", shapely.__version__)
print("Xarray version:", xr.__version__)
Python version: 3.11.13 | packaged by conda-forge | (main, Jun  4 2025, 14:39:58) [MSC v.1943 64 bit (AMD64)]
Geopandas version: 1.1.1
Pandas version: 2.3.3
PyGMT version: v0.17.0
Numpy version: 2.3.3
Scipy version: 1.16.2
Shapely version: 2.1.2
Xarray version: 2025.10.1
In [2]:
# Read Walmart data
df = pd.read_csv("walmart_2018_11_06.csv")
coords = np.vstack([df["longitude"], df["latitude"]]).T

# Create query-able database we will use later to find nearest-neighbors 
tree = cKDTree(coords)

# Define region (continental US)
region = [-125, -66.5, 24, 49.5]

# Establish bin size / resolution for grid (in degrees)
# Larger will make code run faster
spacing = 0.10

# Create arrays of longitude and latitude values with specified spacing with np.arange 
# Add spacing to second parameter to include end value
lon = np.arange(region[0], region[1] + spacing, spacing)
lat = np.arange(region[2], region[3] + spacing, spacing)

# Create an array of every possible coordinate pair using meshgrid, ravel, and column_stack
lon2d, lat2d = np.meshgrid(lon, lat)
grid_points = np.column_stack((lon2d.ravel(), lat2d.ravel()))

# Find nearest Walmart using the 
_, nearest_idx = tree.query(grid_points, k=1)
nearest_coords = coords[nearest_idx]

# Define function to calculate Haversine distance between each grid point and its nearest Walmart
def haversine(lon1, lat1, lon2, lat2):
    R = 6371.0 # Radius of Earth
    dlon = np.radians(lon2 - lon1)
    dlat = np.radians(lat2 - lat1)
    a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon/2)**2
    return 2 * R * np.arcsin(np.sqrt(a))

# Use function to generate a distance to nearest Walmart for every coordinate in grid_points
distances_km = haversine(
    grid_points[:, 0], grid_points[:, 1],
    nearest_coords[:, 0], nearest_coords[:, 1]
)

# Reshape distances_km to a 2d array
dist_grid = distances_km.reshape(lat.size, lon.size)

# Load USA shapefile
usa = gpd.read_file("cb_2018_us_nation_20m.shp")

# Make sure shapefile has CRS
if usa.crs is None:
    usa.set_crs("EPSG:4269", inplace=True)  # typical for Census TIGER shapefiles
usa = usa.to_crs("EPSG:4326")  # convert to WGS84 lon/lat to match grid points

# Create GeoDataFrame for grid points
grid_gdf = gpd.GeoDataFrame(
    geometry=[Point(xy) for xy in grid_points],
    crs="EPSG:4326"  # set CRS for the grid
)

# patial join: keep only points inside USA polygon
inside_us = gpd.sjoin(grid_gdf, usa, predicate="within", how="inner")

# Create mask
mask = grid_gdf.index.isin(inside_us.index).reshape(lat.size, lon.size)

# Apply mask
dist_grid_masked = np.where(mask, dist_grid, np.nan)

# Convert to xarray 
grid_xr = xr.DataArray(
    dist_grid_masked,
    coords={"lat": lat, "lon": lon},
    dims=("lat", "lon"),
    name="distance_km"
)

# Remote locations in the contiguous US, according to ChatGPT
remote_coords = {
    "Borah Peak": (-113.8, 44.2),
    "Hells Canyon": (-116.8, 45.9),
    "Okefenokee Swamp": (-82.3, 30.6),
    "Frank Church": (-114.3, 45.0),
    "Boundary Waters": (-91.2, 47.9)
}

# Initialize figure
fig = pygmt.Figure()

# Create basemap
fig.basemap(
    region=region, 
    projection="M6i", 
    frame=["af", "+tDistance to Nearest Walmart"]
)

fig.coast(
    shorelines="black", 
    borders=["1/0.5p,black", "2/0.5p,black"], 
    resolution="i"
)

# Add grid to map
fig.grdimage(grid=grid_xr, cmap="rainbow")

# Plot Walmart locations
fig.plot(
    x=df["longitude"], 
    y=df["latitude"], 
    style="c0.07c", fill="red", 
    pen="black", 
    label="Walmart locations"
)

# Plot "remote" locations as stars
fig.plot(
    x=[c[0] for c in remote_coords.values()],
    y=[c[1] for c in remote_coords.values()],
    style="a0.4c",  # triangle
    fill="orange",
    pen="black",
    label="AI's 'Remote Places'"
)

# Add colorbar, add legend
fig.colorbar(frame='af+lDistance to nearest Walmart (km)')
fig.legend(position="JBL+jBL+o0.2c", box="+gwhite+p0.5p,black")

# Show and save figure
#fig.show()
#fig.savefig("walmart_effect.jpg")
In [ ]: