Matchup satellite data to ship, glider, or animal tracks

Matchup satellite data to ship, glider, or animal tracks#

Author: Eli Holmes (NOAA Fisheries)

In this tutorial you will extract PACE data around a set of points defined by longitude, latitude, and time coordinates, like that produced by an animal telemetry tag, and ship track, or a glider tract. This tutorial is based off this CoastWatch tutorial.

Datasets used

Level 3 8-day average Chlorophyll-a from PACE version 3
A fake loggerhead turtle telemetry track.

Import the required Python modules

from IPython.display import clear_output
import pandas as pd
import numpy as np
import os
import warnings
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import matplotlib.pyplot as plt
import xarray as xr
warnings.filterwarnings('ignore')

1. Load the track data and plot#

We will load into a pandas data frame.

track_path = os.path.join('/home/jovyan/proj_2024_PACEToolkit',
                          'data',
                          'turtle_track.dat')

df_track = pd.read_csv(track_path, parse_dates=[['year', 'month', 'day']] )
df_track.mean_lon = df_track.mean_lon -180
print(df.head(2))

                  date        lat       lon matched_lat matched_lon  \
0  2024-05-04 00:00:00  32.678728 -3.380567     32.6875   -3.395828   
1  2024-06-23 00:00:00  35.057734 -4.139105     35.0625  -4.1458282   

  matched_chla  
0          nan  
1          nan  

Plot the track on a map

df = df_track
plt.figure(figsize=(14, 10))
crs = ccrs.PlateCarree(central_longitude=180)
# Label axes of a Plate Carree projection with a central longitude of 180:
ax1 = plt.subplot(211, projection=crs)

# Use the lon and lat ranges to set the extent of the map
# the 120, 260 lon range will show the whole Pacific
# the 15, 55 lat range with capture the range of the data
ax1.set_extent([120-180, 260-180, 15, 55], crs)

# Set the tick marks to be slightly inside the map extents
ax1.set_xticks(range(125-180, 255-180, 20), crs=crs)
ax1.set_yticks(range(20, 60, 10), crs=crs)

# Add feature to the map
ax1.add_feature(cfeature.LAND, facecolor='0.6')
ax1.coastlines()

# Format the lat and lon axis labels
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)

# Bring the lon and lat data into a numpy array 
x, y = df.mean_lon.to_numpy(), df.mean_lat.to_numpy()
ax1 = plt.scatter(x, y, transform=crs, color='grey', marker="x")
ax1 = plt.plot(x, y, transform=crs, color='k')
# start point in green star
ax1 = plt.plot(x[0], y[0],
               marker='*',
               color='g',
               transform=crs,
               markersize=10)
# end point in red X
ax1 = plt.plot(x[-1], y[-1],
               marker='X',
               color='r',
               transform=crs,
               markersize=10)
plt.title('Animal Track for Turtle', fontsize=20)

plt.show()

../_images/9b23bfb4900a472684f01be09a5cb0f55cc3d0f2af8cea85ce167d5d6fbd17b5.png

2. Extract PACE data corresponding to points on the track#

We will use the Level-3 mapped (gridded) Chl-a data product. Short Name: PACE_OCI_L3M_CHL_NRT

import earthaccess
auth = earthaccess.login(persist=True)

Set up the date that we want. The bounding box does not matter since these data are global and a single file for the whole globe.

i = 0
tspan = (df_track.year_month_day[i], df_track.year_month_day[i])

Search for the granuales (files) needed for each point.

results = earthaccess.search_data(
    short_name="PACE_OCI_L3M_CHL_NRT",
    temporal=tspan
)

# There are 9 Chl-a products: Day, 8-day, and month and 3 spatial resolution.
len(results)

# We can look at the urls
[result.data_links() for result in results]

[['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.4km.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.4km.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.4km.NRT.nc']]

Although the finest scale might seem to be best (daily, 4km), there will be many missing values. Let’s use the 8 day average, 4km.

# This helps us see that we need to filter; this gets us the one file we want
results = earthaccess.search_data(
    short_name="PACE_OCI_L3M_CHL_NRT",
    temporal=tspan, 
    granule_name="*.8D*.4km*"
)

results[0]

Data: PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.4km.NRT.nc

Size: 42.88 MB

Cloud Hosted: True

3. Get the Chl-a value for the lat, lon#

# The value might be an NA
ds_i = ds.sel(lat=df_track.mean_lat[i], lon=df_track.mean_lon[i], method='nearest')

Consolidate the downloaded satellite data into the track data frame#

data = [{'date': df_track.loc[i]['year_month_day'], 
         'lat': df_track.loc[i]['mean_lat'], 
         'lon': df_track.loc[i]['mean_lon'], 
         'matched_lat': ds_i.lat.values, 
         'matched_lon': ds_i.lon.values, 
         'matched_chla': ds_i.chlor_a.values}]
df_i = pd.DataFrame(data)
df_i

	date	lat	lon	matched_lat	matched_lon	matched_chla
0	2024-05-04	32.678728	-3.380567	32.6875	-3.395828	nan

Run a loop to get all the data#

df_all = [{'date': np.nan, 'mean_lat': np.nan, 'mean_lon': np.nan, 
           'matched_lat': np.nan, 'matched_lon': np.nan, 'matched_chla': np.nan}]
df_all = pd.DataFrame(df_all)

%%capture

nrow = df.shape[0]
for i in range(nrow):
    tspan = (df_track.year_month_day[i], df_track.year_month_day[i])
    try:
        results = earthaccess.search_data(
          short_name="PACE_OCI_L3M_CHL_NRT",
          temporal=tspan, 
          granule_name="*.8D*.4km*")
        paths = earthaccess.open(results);
        ds = xr.open_dataset(paths[0])
        ds_i = ds.sel(lat=df_track.mean_lat[i], lon=df_track.mean_lon[i], method='nearest')
        df_i = [{
            'date': df_track.loc[i]['year_month_day'], 
            'mean_lat': df_track.loc[i]['mean_lat'], 
            'mean_lon': df_track.loc[i]['mean_lon'], 
            'matched_lat': ds_i.lat.values, 
            'matched_lon': ds_i.lon.values, 
            'matched_chla': ds_i.chlor_a.values}]
    except:
        df_i = [{
            'date': df_track.loc[i]['year_month_day'], 
            'mean_lat': df_track.loc[i]['mean_lat'], 
            'mean_lon': df_track.loc[i]['mean_lon'], 
            'matched_lat': np.nan, 
            'matched_lon': np.nan, 
            'matched_chla': np.nan}]        
    df_i = pd.DataFrame(df_i)
    print(df_i)
    df_all.loc[i] = df_i.iloc[0]

df_all

	date	mean_lat	mean_lon	matched_lat	matched_lon	matched_chla
0	2024-05-04 00:00:00	32.678728	-3.380567	32.6875	-3.395828	NaN
1	2024-06-23 00:00:00	35.057734	-4.139105	35.0625	-4.1458282	nan
2	2024-08-12 00:00:00	40.405759	0.592618	NaN	NaN	NaN
3	2024-10-01 00:00:00	41.684803	3.510212	NaN	NaN	NaN
4	2024-11-20 00:00:00	37.366229	6.999747	NaN	NaN	NaN
5	2024-01-09 00:00:00	32.137928	13.315151	NaN	NaN	NaN
6	2024-02-28 00:00:00	32.111258	19.015780	NaN	NaN	NaN
7	2024-04-19 00:00:00	34.912238	16.367856	34.895832	16.354172	0.147828
8	2024-06-08 00:00:00	34.696608	14.311562	34.6875	14.312506	0.08844449
9	2024-07-28 00:00:00	37.091750	12.854459	37.104164	12.854173	0.14210209
10	2024-09-16 00:00:00	41.769329	14.078758	NaN	NaN	NaN
11	2024-11-05 00:00:00	38.227959	12.044436	NaN	NaN	NaN
12	2024-12-25 00:00:00	34.738578	11.760026	NaN	NaN	NaN
13	2024-02-13 00:00:00	31.739644	15.063135	NaN	NaN	NaN
14	2024-04-04 00:00:00	34.343242	19.306553	34.354164	19.312506	0.07754332
15	2024-05-24 00:00:00	35.127712	25.605040	35.145832	25.604174	nan
16	2024-07-13 00:00:00	38.460826	30.280468	38.479164	30.27084	nan
17	2024-09-01 00:00:00	39.337495	35.722453	NaN	NaN	NaN
18	2024-10-21 00:00:00	35.867928	43.007301	NaN	NaN	NaN
19	2024-12-10 00:00:00	30.185402	45.638647	NaN	NaN	NaN
20	2024-01-29 00:00:00	28.328588	52.406424	NaN	NaN	NaN
21	2024-03-19 00:00:00	25.981078	59.552975	25.979164	59.562508	nan
22	2024-05-08 00:00:00	24.836619	65.871575	24.854164	65.85417	1.0086322
23	2024-06-27 00:00:00	23.724174	68.571045	23.729164	68.56251	nan
24	2024-08-16 00:00:00	26.781771	65.757907	NaN	NaN	NaN

Save your work#

df_all.matched_chla = df_all.matched_chla.astype('float')
df_all.to_csv('chl_matchup.csv', index=False, encoding='utf-8')

4. Plot chlorophyll matchup data onto a map#

First plot a histogram of the chlorophyll data#

print('Range:', df_all.matched_chla.min(), df_all.matched_chla.max())
_ = df_all.matched_chla.hist(bins=40)

Range: inf -inf

../_images/bc3d597bcef68a25d4c30679c3aa70c6c279b036c501df950a56d208e54fbf0f.png

Map the chlorophyll data#

df = df_all
crs = ccrs.PlateCarree(central_longitude=180)

plt.figure(figsize=(14, 10))

# Label axes of a Plate Carree projection with a central longitude of 180:

# set the projection
ax1 = plt.subplot(211, projection=crs)

# Use the lon and lat ranges to set the extent of the map
ax1.set_extent([120 - 180, 255 - 180, 15, 55], crs)

# set the tick marks to be slightly inside the map extents
ax1.set_xticks(range(120 - 180,255 - 180,20), crs=crs)
ax1.set_yticks(range(20,50,10), crs=crs)

# Add geographical features
ax1.add_feature(cfeature.LAND, facecolor='0.6')
ax1.coastlines()

# format the lat and lon axis labels
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)

# build and plot coordinates onto map
x,y = list(df.mean_lon),list(df.mean_lat)
ax1 = plt.scatter(x, y, transform=crs, color='grey', marker="x")
ax1 = plt.scatter(x, y, transform=crs, marker='o', c=np.log(df.matched_chla), cmap=plt.get_cmap('jet') )
ax1=plt.plot(x[0],y[0],marker='*', transform=crs, markersize=10)
ax1=plt.plot(x[-1],y[-1],marker='X', transform=crs, markersize=10)

# control color bar values spacing
levs2 = np.arange(-2.5, 0, 0.5)
cbar=plt.colorbar(ticks=levs2, shrink=0.75, aspect=10)
cbar.set_label("Chl a (mg/m^3)", size=15, labelpad=20)

# set the labels to be exp(levs2) so the label reflect values of chl-a, not log(chl-a)
cbar.ax.set_yticklabels(np.around(np.exp(levs2), 2), size=10)

plt.title("Chlorophyll Matchup to Animal Track", size=15)
plt.show()

../_images/42928f494023c254236775a3439cc52b553924d680a77dfe84b1e42e08bef596.png