Matchup satellite data to ship, glider, or animal tracks#
Author: Eli Holmes (NOAA Fisheries)
In this tutorial you will extract PACE data around a set of points defined by longitude, latitude, and time coordinates, like that produced by an animal telemetry tag, and ship track, or a glider tract. This tutorial is based off this CoastWatch tutorial.
Datasets used
Level 3 8-day average Chlorophyll-a from PACE version 3
A fake loggerhead turtle telemetry track.
Import the required Python modules
from IPython.display import clear_output
import pandas as pd
import numpy as np
import os
import warnings
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import matplotlib.pyplot as plt
import xarray as xr
warnings.filterwarnings('ignore')
1. Load the track data and plot#
We will load into a pandas data frame.
track_path = os.path.join('/home/jovyan/proj_2024_PACEToolkit',
'data',
'turtle_track.dat')
df_track = pd.read_csv(track_path, parse_dates=[['year', 'month', 'day']] )
df_track.mean_lon = df_track.mean_lon -180
print(df.head(2))
date lat lon matched_lat matched_lon \
0 2024-05-04 00:00:00 32.678728 -3.380567 32.6875 -3.395828
1 2024-06-23 00:00:00 35.057734 -4.139105 35.0625 -4.1458282
matched_chla
0 nan
1 nan
Plot the track on a map
df = df_track
plt.figure(figsize=(14, 10))
crs = ccrs.PlateCarree(central_longitude=180)
# Label axes of a Plate Carree projection with a central longitude of 180:
ax1 = plt.subplot(211, projection=crs)
# Use the lon and lat ranges to set the extent of the map
# the 120, 260 lon range will show the whole Pacific
# the 15, 55 lat range with capture the range of the data
ax1.set_extent([120-180, 260-180, 15, 55], crs)
# Set the tick marks to be slightly inside the map extents
ax1.set_xticks(range(125-180, 255-180, 20), crs=crs)
ax1.set_yticks(range(20, 60, 10), crs=crs)
# Add feature to the map
ax1.add_feature(cfeature.LAND, facecolor='0.6')
ax1.coastlines()
# Format the lat and lon axis labels
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)
# Bring the lon and lat data into a numpy array
x, y = df.mean_lon.to_numpy(), df.mean_lat.to_numpy()
ax1 = plt.scatter(x, y, transform=crs, color='grey', marker="x")
ax1 = plt.plot(x, y, transform=crs, color='k')
# start point in green star
ax1 = plt.plot(x[0], y[0],
marker='*',
color='g',
transform=crs,
markersize=10)
# end point in red X
ax1 = plt.plot(x[-1], y[-1],
marker='X',
color='r',
transform=crs,
markersize=10)
plt.title('Animal Track for Turtle', fontsize=20)
plt.show()

2. Extract PACE data corresponding to points on the track#
We will use the Level-3 mapped (gridded) Chl-a data product. Short Name: PACE_OCI_L3M_CHL_NRT
import earthaccess
auth = earthaccess.login(persist=True)
Set up the date that we want. The bounding box does not matter since these data are global and a single file for the whole globe.
i = 0
tspan = (df_track.year_month_day[i], df_track.year_month_day[i])
Search for the granuales (files) needed for each point.
results = earthaccess.search_data(
short_name="PACE_OCI_L3M_CHL_NRT",
temporal=tspan
)
# There are 9 Chl-a products: Day, 8-day, and month and 3 spatial resolution.
len(results)
9
# We can look at the urls
[result.data_links() for result in results]
[['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.1deg.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.4km.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.1deg.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.4km.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.1deg.NRT.nc'],
['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.4km.NRT.nc']]
Although the finest scale might seem to be best (daily, 4km), there will be many missing values. Let’s use the 8 day average, 4km.
# This helps us see that we need to filter; this gets us the one file we want
results = earthaccess.search_data(
short_name="PACE_OCI_L3M_CHL_NRT",
temporal=tspan,
granule_name="*.8D*.4km*"
)
results[0]
Data: PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.4km.NRT.nc
Size: 42.88 MB
Cloud Hosted: True
3. Get the Chl-a value for the lat, lon#
paths = earthaccess.open(results)
ds = xr.open_dataset(paths[0])
ds
<xarray.Dataset> Size: 149MB Dimensions: (lat: 4320, lon: 8640, rgb: 3, eightbitcolor: 256) Coordinates: * lat (lat) float32 17kB 89.98 89.94 89.9 89.85 ... -89.9 -89.94 -89.98 * lon (lon) float32 35kB -180.0 -179.9 -179.9 ... 179.9 179.9 180.0 Dimensions without coordinates: rgb, eightbitcolor Data variables: chlor_a (lat, lon) float32 149MB ... palette (rgb, eightbitcolor) uint8 768B ... Attributes: (12/64) product_name: PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chl... instrument: OCI title: OCI Level-3 Standard Mapped Image project: Ocean Biology Processing Group (NASA/G... platform: PACE source: satellite observations from OCI-PACE ... ... identifier_product_doi: 10.5067/PACE/OCI/L3M/CHL/2.0 keywords: Earth Science > Oceans > Ocean Chemist... keywords_vocabulary: NASA Global Change Master Directory (G... data_bins: 3300524 data_minimum: 0.006873704 data_maximum: 99.78542
# The value might be an NA
ds_i = ds.sel(lat=df_track.mean_lat[i], lon=df_track.mean_lon[i], method='nearest')
Consolidate the downloaded satellite data into the track data frame#
data = [{'date': df_track.loc[i]['year_month_day'],
'lat': df_track.loc[i]['mean_lat'],
'lon': df_track.loc[i]['mean_lon'],
'matched_lat': ds_i.lat.values,
'matched_lon': ds_i.lon.values,
'matched_chla': ds_i.chlor_a.values}]
df_i = pd.DataFrame(data)
df_i
date | lat | lon | matched_lat | matched_lon | matched_chla | |
---|---|---|---|---|---|---|
0 | 2024-05-04 | 32.678728 | -3.380567 | 32.6875 | -3.395828 | nan |
Run a loop to get all the data#
df_all = [{'date': np.nan, 'mean_lat': np.nan, 'mean_lon': np.nan,
'matched_lat': np.nan, 'matched_lon': np.nan, 'matched_chla': np.nan}]
df_all = pd.DataFrame(df_all)
%%capture
nrow = df.shape[0]
for i in range(nrow):
tspan = (df_track.year_month_day[i], df_track.year_month_day[i])
try:
results = earthaccess.search_data(
short_name="PACE_OCI_L3M_CHL_NRT",
temporal=tspan,
granule_name="*.8D*.4km*")
paths = earthaccess.open(results);
ds = xr.open_dataset(paths[0])
ds_i = ds.sel(lat=df_track.mean_lat[i], lon=df_track.mean_lon[i], method='nearest')
df_i = [{
'date': df_track.loc[i]['year_month_day'],
'mean_lat': df_track.loc[i]['mean_lat'],
'mean_lon': df_track.loc[i]['mean_lon'],
'matched_lat': ds_i.lat.values,
'matched_lon': ds_i.lon.values,
'matched_chla': ds_i.chlor_a.values}]
except:
df_i = [{
'date': df_track.loc[i]['year_month_day'],
'mean_lat': df_track.loc[i]['mean_lat'],
'mean_lon': df_track.loc[i]['mean_lon'],
'matched_lat': np.nan,
'matched_lon': np.nan,
'matched_chla': np.nan}]
df_i = pd.DataFrame(df_i)
print(df_i)
df_all.loc[i] = df_i.iloc[0]
df_all
date | mean_lat | mean_lon | matched_lat | matched_lon | matched_chla | |
---|---|---|---|---|---|---|
0 | 2024-05-04 00:00:00 | 32.678728 | -3.380567 | 32.6875 | -3.395828 | NaN |
1 | 2024-06-23 00:00:00 | 35.057734 | -4.139105 | 35.0625 | -4.1458282 | nan |
2 | 2024-08-12 00:00:00 | 40.405759 | 0.592618 | NaN | NaN | NaN |
3 | 2024-10-01 00:00:00 | 41.684803 | 3.510212 | NaN | NaN | NaN |
4 | 2024-11-20 00:00:00 | 37.366229 | 6.999747 | NaN | NaN | NaN |
5 | 2024-01-09 00:00:00 | 32.137928 | 13.315151 | NaN | NaN | NaN |
6 | 2024-02-28 00:00:00 | 32.111258 | 19.015780 | NaN | NaN | NaN |
7 | 2024-04-19 00:00:00 | 34.912238 | 16.367856 | 34.895832 | 16.354172 | 0.147828 |
8 | 2024-06-08 00:00:00 | 34.696608 | 14.311562 | 34.6875 | 14.312506 | 0.08844449 |
9 | 2024-07-28 00:00:00 | 37.091750 | 12.854459 | 37.104164 | 12.854173 | 0.14210209 |
10 | 2024-09-16 00:00:00 | 41.769329 | 14.078758 | NaN | NaN | NaN |
11 | 2024-11-05 00:00:00 | 38.227959 | 12.044436 | NaN | NaN | NaN |
12 | 2024-12-25 00:00:00 | 34.738578 | 11.760026 | NaN | NaN | NaN |
13 | 2024-02-13 00:00:00 | 31.739644 | 15.063135 | NaN | NaN | NaN |
14 | 2024-04-04 00:00:00 | 34.343242 | 19.306553 | 34.354164 | 19.312506 | 0.07754332 |
15 | 2024-05-24 00:00:00 | 35.127712 | 25.605040 | 35.145832 | 25.604174 | nan |
16 | 2024-07-13 00:00:00 | 38.460826 | 30.280468 | 38.479164 | 30.27084 | nan |
17 | 2024-09-01 00:00:00 | 39.337495 | 35.722453 | NaN | NaN | NaN |
18 | 2024-10-21 00:00:00 | 35.867928 | 43.007301 | NaN | NaN | NaN |
19 | 2024-12-10 00:00:00 | 30.185402 | 45.638647 | NaN | NaN | NaN |
20 | 2024-01-29 00:00:00 | 28.328588 | 52.406424 | NaN | NaN | NaN |
21 | 2024-03-19 00:00:00 | 25.981078 | 59.552975 | 25.979164 | 59.562508 | nan |
22 | 2024-05-08 00:00:00 | 24.836619 | 65.871575 | 24.854164 | 65.85417 | 1.0086322 |
23 | 2024-06-27 00:00:00 | 23.724174 | 68.571045 | 23.729164 | 68.56251 | nan |
24 | 2024-08-16 00:00:00 | 26.781771 | 65.757907 | NaN | NaN | NaN |
Save your work#
df_all.matched_chla = df_all.matched_chla.astype('float')
df_all.to_csv('chl_matchup.csv', index=False, encoding='utf-8')
4. Plot chlorophyll matchup data onto a map#
First plot a histogram of the chlorophyll data#
print('Range:', df_all.matched_chla.min(), df_all.matched_chla.max())
_ = df_all.matched_chla.hist(bins=40)
Range: inf -inf

Map the chlorophyll data#
df = df_all
crs = ccrs.PlateCarree(central_longitude=180)
plt.figure(figsize=(14, 10))
# Label axes of a Plate Carree projection with a central longitude of 180:
# set the projection
ax1 = plt.subplot(211, projection=crs)
# Use the lon and lat ranges to set the extent of the map
ax1.set_extent([120 - 180, 255 - 180, 15, 55], crs)
# set the tick marks to be slightly inside the map extents
ax1.set_xticks(range(120 - 180,255 - 180,20), crs=crs)
ax1.set_yticks(range(20,50,10), crs=crs)
# Add geographical features
ax1.add_feature(cfeature.LAND, facecolor='0.6')
ax1.coastlines()
# format the lat and lon axis labels
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)
# build and plot coordinates onto map
x,y = list(df.mean_lon),list(df.mean_lat)
ax1 = plt.scatter(x, y, transform=crs, color='grey', marker="x")
ax1 = plt.scatter(x, y, transform=crs, marker='o', c=np.log(df.matched_chla), cmap=plt.get_cmap('jet') )
ax1=plt.plot(x[0],y[0],marker='*', transform=crs, markersize=10)
ax1=plt.plot(x[-1],y[-1],marker='X', transform=crs, markersize=10)
# control color bar values spacing
levs2 = np.arange(-2.5, 0, 0.5)
cbar=plt.colorbar(ticks=levs2, shrink=0.75, aspect=10)
cbar.set_label("Chl a (mg/m^3)", size=15, labelpad=20)
# set the labels to be exp(levs2) so the label reflect values of chl-a, not log(chl-a)
cbar.ax.set_yticklabels(np.around(np.exp(levs2), 2), size=10)
plt.title("Chlorophyll Matchup to Animal Track", size=15)
plt.show()
