Matchup satellite data to ship, glider, or animal tracks#

Author: Eli Holmes (NOAA Fisheries)

In this tutorial you will extract PACE data around a set of points defined by longitude, latitude, and time coordinates, like that produced by an animal telemetry tag, and ship track, or a glider tract. This tutorial is based off this CoastWatch tutorial.

Datasets used

  • Level 3 8-day average Chlorophyll-a from PACE version 3

  • A fake loggerhead turtle telemetry track.

Import the required Python modules

from IPython.display import clear_output
import pandas as pd
import numpy as np
import os
import warnings
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import matplotlib.pyplot as plt
import xarray as xr
warnings.filterwarnings('ignore')

1. Load the track data and plot#

We will load into a pandas data frame.

track_path = os.path.join('/home/jovyan/proj_2024_PACEToolkit',
                          'data',
                          'turtle_track.dat')

df_track = pd.read_csv(track_path, parse_dates=[['year', 'month', 'day']] )
df_track.mean_lon = df_track.mean_lon -180
print(df.head(2))
                  date        lat       lon matched_lat matched_lon  \
0  2024-05-04 00:00:00  32.678728 -3.380567     32.6875   -3.395828   
1  2024-06-23 00:00:00  35.057734 -4.139105     35.0625  -4.1458282   

  matched_chla  
0          nan  
1          nan  

Plot the track on a map

df = df_track
plt.figure(figsize=(14, 10))
crs = ccrs.PlateCarree(central_longitude=180)
# Label axes of a Plate Carree projection with a central longitude of 180:
ax1 = plt.subplot(211, projection=crs)

# Use the lon and lat ranges to set the extent of the map
# the 120, 260 lon range will show the whole Pacific
# the 15, 55 lat range with capture the range of the data
ax1.set_extent([120-180, 260-180, 15, 55], crs)

# Set the tick marks to be slightly inside the map extents
ax1.set_xticks(range(125-180, 255-180, 20), crs=crs)
ax1.set_yticks(range(20, 60, 10), crs=crs)

# Add feature to the map
ax1.add_feature(cfeature.LAND, facecolor='0.6')
ax1.coastlines()

# Format the lat and lon axis labels
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)

# Bring the lon and lat data into a numpy array 
x, y = df.mean_lon.to_numpy(), df.mean_lat.to_numpy()
ax1 = plt.scatter(x, y, transform=crs, color='grey', marker="x")
ax1 = plt.plot(x, y, transform=crs, color='k')
# start point in green star
ax1 = plt.plot(x[0], y[0],
               marker='*',
               color='g',
               transform=crs,
               markersize=10)
# end point in red X
ax1 = plt.plot(x[-1], y[-1],
               marker='X',
               color='r',
               transform=crs,
               markersize=10)
plt.title('Animal Track for Turtle', fontsize=20)

plt.show()
../_images/9b23bfb4900a472684f01be09a5cb0f55cc3d0f2af8cea85ce167d5d6fbd17b5.png

2. Extract PACE data corresponding to points on the track#

We will use the Level-3 mapped (gridded) Chl-a data product. Short Name: PACE_OCI_L3M_CHL_NRT

import earthaccess
auth = earthaccess.login(persist=True)

Set up the date that we want. The bounding box does not matter since these data are global and a single file for the whole globe.

i = 0
tspan = (df_track.year_month_day[i], df_track.year_month_day[i])

Search for the granuales (files) needed for each point.

results = earthaccess.search_data(
    short_name="PACE_OCI_L3M_CHL_NRT",
    temporal=tspan
)
# There are 9 Chl-a products: Day, 8-day, and month and 3 spatial resolution.
len(results)
9
# We can look at the urls
[result.data_links() for result in results]
[['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.4km.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240501_20240531.L3m.MO.CHL.V2_0.chlor_a.4km.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.0p1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.1deg.NRT.nc'],
 ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chlor_a.4km.NRT.nc']]

Although the finest scale might seem to be best (daily, 4km), there will be many missing values. Let’s use the 8 day average, 4km.

# This helps us see that we need to filter; this gets us the one file we want
results = earthaccess.search_data(
    short_name="PACE_OCI_L3M_CHL_NRT",
    temporal=tspan, 
    granule_name="*.8D*.4km*"
)
results[0]

Data: PACE_OCI.20240430_20240507.L3m.8D.CHL.V2_0.chlor_a.4km.NRT.nc

Size: 42.88 MB

Cloud Hosted: True

3. Get the Chl-a value for the lat, lon#

paths = earthaccess.open(results)
ds = xr.open_dataset(paths[0])
ds
<xarray.Dataset> Size: 149MB
Dimensions:  (lat: 4320, lon: 8640, rgb: 3, eightbitcolor: 256)
Coordinates:
  * lat      (lat) float32 17kB 89.98 89.94 89.9 89.85 ... -89.9 -89.94 -89.98
  * lon      (lon) float32 35kB -180.0 -179.9 -179.9 ... 179.9 179.9 180.0
Dimensions without coordinates: rgb, eightbitcolor
Data variables:
    chlor_a  (lat, lon) float32 149MB ...
    palette  (rgb, eightbitcolor) uint8 768B ...
Attributes: (12/64)
    product_name:                      PACE_OCI.20240504.L3m.DAY.CHL.V2_0.chl...
    instrument:                        OCI
    title:                             OCI Level-3 Standard Mapped Image
    project:                           Ocean Biology Processing Group (NASA/G...
    platform:                          PACE
    source:                            satellite observations from OCI-PACE
    ...                                ...
    identifier_product_doi:            10.5067/PACE/OCI/L3M/CHL/2.0
    keywords:                          Earth Science > Oceans > Ocean Chemist...
    keywords_vocabulary:               NASA Global Change Master Directory (G...
    data_bins:                         3300524
    data_minimum:                      0.006873704
    data_maximum:                      99.78542
# The value might be an NA
ds_i = ds.sel(lat=df_track.mean_lat[i], lon=df_track.mean_lon[i], method='nearest')

Consolidate the downloaded satellite data into the track data frame#

data = [{'date': df_track.loc[i]['year_month_day'], 
         'lat': df_track.loc[i]['mean_lat'], 
         'lon': df_track.loc[i]['mean_lon'], 
         'matched_lat': ds_i.lat.values, 
         'matched_lon': ds_i.lon.values, 
         'matched_chla': ds_i.chlor_a.values}]
df_i = pd.DataFrame(data)
df_i
date lat lon matched_lat matched_lon matched_chla
0 2024-05-04 32.678728 -3.380567 32.6875 -3.395828 nan

Run a loop to get all the data#

df_all = [{'date': np.nan, 'mean_lat': np.nan, 'mean_lon': np.nan, 
           'matched_lat': np.nan, 'matched_lon': np.nan, 'matched_chla': np.nan}]
df_all = pd.DataFrame(df_all)
%%capture

nrow = df.shape[0]
for i in range(nrow):
    tspan = (df_track.year_month_day[i], df_track.year_month_day[i])
    try:
        results = earthaccess.search_data(
          short_name="PACE_OCI_L3M_CHL_NRT",
          temporal=tspan, 
          granule_name="*.8D*.4km*")
        paths = earthaccess.open(results);
        ds = xr.open_dataset(paths[0])
        ds_i = ds.sel(lat=df_track.mean_lat[i], lon=df_track.mean_lon[i], method='nearest')
        df_i = [{
            'date': df_track.loc[i]['year_month_day'], 
            'mean_lat': df_track.loc[i]['mean_lat'], 
            'mean_lon': df_track.loc[i]['mean_lon'], 
            'matched_lat': ds_i.lat.values, 
            'matched_lon': ds_i.lon.values, 
            'matched_chla': ds_i.chlor_a.values}]
    except:
        df_i = [{
            'date': df_track.loc[i]['year_month_day'], 
            'mean_lat': df_track.loc[i]['mean_lat'], 
            'mean_lon': df_track.loc[i]['mean_lon'], 
            'matched_lat': np.nan, 
            'matched_lon': np.nan, 
            'matched_chla': np.nan}]        
    df_i = pd.DataFrame(df_i)
    print(df_i)
    df_all.loc[i] = df_i.iloc[0]
df_all
date mean_lat mean_lon matched_lat matched_lon matched_chla
0 2024-05-04 00:00:00 32.678728 -3.380567 32.6875 -3.395828 NaN
1 2024-06-23 00:00:00 35.057734 -4.139105 35.0625 -4.1458282 nan
2 2024-08-12 00:00:00 40.405759 0.592618 NaN NaN NaN
3 2024-10-01 00:00:00 41.684803 3.510212 NaN NaN NaN
4 2024-11-20 00:00:00 37.366229 6.999747 NaN NaN NaN
5 2024-01-09 00:00:00 32.137928 13.315151 NaN NaN NaN
6 2024-02-28 00:00:00 32.111258 19.015780 NaN NaN NaN
7 2024-04-19 00:00:00 34.912238 16.367856 34.895832 16.354172 0.147828
8 2024-06-08 00:00:00 34.696608 14.311562 34.6875 14.312506 0.08844449
9 2024-07-28 00:00:00 37.091750 12.854459 37.104164 12.854173 0.14210209
10 2024-09-16 00:00:00 41.769329 14.078758 NaN NaN NaN
11 2024-11-05 00:00:00 38.227959 12.044436 NaN NaN NaN
12 2024-12-25 00:00:00 34.738578 11.760026 NaN NaN NaN
13 2024-02-13 00:00:00 31.739644 15.063135 NaN NaN NaN
14 2024-04-04 00:00:00 34.343242 19.306553 34.354164 19.312506 0.07754332
15 2024-05-24 00:00:00 35.127712 25.605040 35.145832 25.604174 nan
16 2024-07-13 00:00:00 38.460826 30.280468 38.479164 30.27084 nan
17 2024-09-01 00:00:00 39.337495 35.722453 NaN NaN NaN
18 2024-10-21 00:00:00 35.867928 43.007301 NaN NaN NaN
19 2024-12-10 00:00:00 30.185402 45.638647 NaN NaN NaN
20 2024-01-29 00:00:00 28.328588 52.406424 NaN NaN NaN
21 2024-03-19 00:00:00 25.981078 59.552975 25.979164 59.562508 nan
22 2024-05-08 00:00:00 24.836619 65.871575 24.854164 65.85417 1.0086322
23 2024-06-27 00:00:00 23.724174 68.571045 23.729164 68.56251 nan
24 2024-08-16 00:00:00 26.781771 65.757907 NaN NaN NaN

Save your work#

df_all.matched_chla = df_all.matched_chla.astype('float')
df_all.to_csv('chl_matchup.csv', index=False, encoding='utf-8')

4. Plot chlorophyll matchup data onto a map#

First plot a histogram of the chlorophyll data#

print('Range:', df_all.matched_chla.min(), df_all.matched_chla.max())
_ = df_all.matched_chla.hist(bins=40)
Range: inf -inf
../_images/bc3d597bcef68a25d4c30679c3aa70c6c279b036c501df950a56d208e54fbf0f.png

Map the chlorophyll data#

df = df_all
crs = ccrs.PlateCarree(central_longitude=180)

plt.figure(figsize=(14, 10))

# Label axes of a Plate Carree projection with a central longitude of 180:

# set the projection
ax1 = plt.subplot(211, projection=crs)

# Use the lon and lat ranges to set the extent of the map
ax1.set_extent([120 - 180, 255 - 180, 15, 55], crs)

# set the tick marks to be slightly inside the map extents
ax1.set_xticks(range(120 - 180,255 - 180,20), crs=crs)
ax1.set_yticks(range(20,50,10), crs=crs)

# Add geographical features
ax1.add_feature(cfeature.LAND, facecolor='0.6')
ax1.coastlines()

# format the lat and lon axis labels
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)

# build and plot coordinates onto map
x,y = list(df.mean_lon),list(df.mean_lat)
ax1 = plt.scatter(x, y, transform=crs, color='grey', marker="x")
ax1 = plt.scatter(x, y, transform=crs, marker='o', c=np.log(df.matched_chla), cmap=plt.get_cmap('jet') )
ax1=plt.plot(x[0],y[0],marker='*', transform=crs, markersize=10)
ax1=plt.plot(x[-1],y[-1],marker='X', transform=crs, markersize=10)

# control color bar values spacing
levs2 = np.arange(-2.5, 0, 0.5)
cbar=plt.colorbar(ticks=levs2, shrink=0.75, aspect=10)
cbar.set_label("Chl a (mg/m^3)", size=15, labelpad=20)

# set the labels to be exp(levs2) so the label reflect values of chl-a, not log(chl-a)
cbar.ax.set_yticklabels(np.around(np.exp(levs2), 2), size=10)

plt.title("Chlorophyll Matchup to Animal Track", size=15)
plt.show()
../_images/42928f494023c254236775a3439cc52b553924d680a77dfe84b1e42e08bef596.png