In [4]:
Copied!
import sdm
import pandas as pd
import matplotlib.pyplot as plt
import cartopy
import numpy as np
import sdm
import pandas as pd
import matplotlib.pyplot as plt
import cartopy
import numpy as np
In [2]:
Copied!
df = sdm.load_trawl_data()
df
df = sdm.load_trawl_data()
df
Out[2]:
TOWDATETIME_EST | LAT | LON | MEAN_DEPTH | SWEPT_AREA_km | acadian redfish | alewife | alligatorfish | american lobster | american plaice | ... | Rrs_707 | Rrs_708 | Rrs_709 | Rrs_711 | Rrs_712 | Rrs_713 | Rrs_714 | Rrs_717 | Rrs_719 | Rrs_brightness | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2024-03-07 10:58:00 | 38.659194 | -74.828083 | 24 | 0.021282 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.000851 | 0.000825 | 0.000797 | 0.000765 | 0.000738 | 0.000717 | 0.000689 | 0.000552 | 0.000540 | 1.090205 |
1 | 2024-03-07 14:32:00 | 38.498596 | -74.477395 | 40 | 0.024311 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.000386 | 0.000378 | 0.000364 | 0.000344 | 0.000333 | 0.000325 | 0.000315 | 0.000229 | 0.000261 | 0.907649 |
2 | 2024-03-07 17:44:00 | 38.411974 | -74.502885 | 39 | 0.023835 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.000372 | 0.000364 | 0.000350 | 0.000330 | 0.000318 | 0.000310 | 0.000292 | 0.000210 | 0.000254 | 0.935354 |
3 | 2024-03-07 20:29:00 | 38.505238 | -74.832046 | 25 | 0.021510 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.000674 | 0.000653 | 0.000632 | 0.000606 | 0.000588 | 0.000574 | 0.000553 | 0.000430 | 0.000434 | 1.050127 |
4 | 2024-03-07 22:45:00 | 38.556784 | -74.897495 | 21 | 0.019174 | 0.0 | 0.0 | 0.0 | 27.0 | 0.0 | ... | 0.001309 | 0.001268 | 0.001221 | 0.001175 | 0.001134 | 0.001093 | 0.001046 | 0.000848 | 0.000791 | 1.327782 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
363 | 2024-05-12 12:30:00 | 41.305711 | -70.636614 | 23 | 0.018435 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.000283 | 0.000275 | 0.000266 | 0.000253 | 0.000246 | 0.000243 | 0.000237 | 0.000181 | 0.000218 | 0.529729 |
364 | 2024-05-12 18:45:00 | 40.467061 | -71.381195 | 73 | 0.022568 | 0.0 | 2.0 | 0.0 | 2.0 | 0.0 | ... | 0.000159 | 0.000155 | 0.000150 | 0.000142 | 0.000140 | 0.000137 | 0.000138 | 0.000097 | 0.000150 | 0.446814 |
365 | 2024-05-13 01:19:00 | 39.618178 | -72.027289 | 219 | 0.024705 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.000108 | 0.000104 | 0.000102 | 0.000094 | 0.000093 | 0.000094 | 0.000095 | 0.000057 | 0.000104 | 0.450264 |
366 | 2024-05-13 03:42:00 | 39.762143 | -72.213598 | 98 | 0.020737 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | ... | 0.000146 | 0.000143 | 0.000141 | 0.000134 | 0.000134 | 0.000133 | 0.000130 | 0.000068 | 0.000124 | 0.446797 |
367 | 2024-05-13 07:15:00 | 40.175585 | -71.934953 | 76 | 0.023215 | 0.0 | 4.0 | 0.0 | 1.0 | 0.0 | ... | 0.000162 | 0.000159 | 0.000155 | 0.000148 | 0.000147 | 0.000145 | 0.000142 | 0.000082 | 0.000137 | 0.421705 |
368 rows × 372 columns
Most common species¶
Let's get species with abuncance > 50000.
In [6]:
Copied!
matched=[]
for item in df.columns:
if 'Rrs' in item: # Using the 'in' operator for substring check
pass
else:
matched.append(item)
sub = df[list(matched)].copy() #subset (remove RRS columns)
sub['station'] = np.arange(1,len(sub)+1) #add station col
tot_sum = {}
for s in sub.columns[5:-2]:
tot_sum.update({s:int(sub[s].sum())}) #get total sum of all values
sorted_tot_sum = sorted(tot_sum.items(), key=lambda item: item[1],reverse=True) #sort by max-->min
abu=[]
for val in sorted_tot_sum:
if val[1]>50000: #if more than 50000 observations
abu.append(val[0])
abu
matched=[]
for item in df.columns:
if 'Rrs' in item: # Using the 'in' operator for substring check
pass
else:
matched.append(item)
sub = df[list(matched)].copy() #subset (remove RRS columns)
sub['station'] = np.arange(1,len(sub)+1) #add station col
tot_sum = {}
for s in sub.columns[5:-2]:
tot_sum.update({s:int(sub[s].sum())}) #get total sum of all values
sorted_tot_sum = sorted(tot_sum.items(), key=lambda item: item[1],reverse=True) #sort by max-->min
abu=[]
for val in sorted_tot_sum:
if val[1]>50000: #if more than 50000 observations
abu.append(val[0])
abu
Out[6]:
['butterfish', 'silver hake', 'longfin squid', 'spot', 'northern searobin', 'atlantic croaker']
This is a single survey in 2024.
In [13]:
Copied!
df['TOWDATETIME_EST'] = pd.to_datetime(df['TOWDATETIME_EST'])
df['TOWDATETIME_EST'].agg(['min', 'max'])
df['TOWDATETIME_EST'] = pd.to_datetime(df['TOWDATETIME_EST'])
df['TOWDATETIME_EST'].agg(['min', 'max'])
Out[13]:
min 2024-03-07 10:58:00 max 2024-05-13 07:15:00 Name: TOWDATETIME_EST, dtype: datetime64[ns]
Each station appears once.
In [10]:
Copied!
station_counts = sub['station'].value_counts().sort_index()
station_counts = station_counts.rename("count").reset_index()
station_counts.columns = ["station", "count"]
station_counts['count'].unique()
station_counts = sub['station'].value_counts().sort_index()
station_counts = station_counts.rename("count").reset_index()
station_counts.columns = ["station", "count"]
station_counts['count'].unique()
Out[10]:
array([1])
Make a plot¶
Look at the abundances by station.
In [15]:
Copied!
for s in sub[abu].columns:
plt.plot(sub.station, sub[s],label=s)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', )
plt.xlabel('Station #')
plt.ylabel('Count')
plt.title('Species with more than 50000 observations')
plt.show()
for s in sub[abu].columns:
plt.plot(sub.station, sub[s],label=s)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', )
plt.xlabel('Station #')
plt.ylabel('Count')
plt.title('Species with more than 50000 observations')
plt.show()
Map the species presence/absence by station location.
In [22]:
Copied!
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
fig, axs = plt.subplots(2, 3, figsize=(12, 8), subplot_kw={'projection': ccrs.PlateCarree()})
axs = axs.flatten()
absent_color = 'gray'
present_color = 'orange'
for i, ax in enumerate(axs):
species = sub[abu].columns[i]
absent = sub[sub[species] == 0]
present = sub[sub[species] != 0]
ax.scatter(absent.LON, absent.LAT, c=absent_color, s=8, label='Absent')
ax.scatter(present.LON, present.LAT, c=present_color, s=8, label='Present')
ax.add_feature(cfeature.COASTLINE, linewidth=1)
ax.add_feature(cfeature.LAND, zorder=100, facecolor='lightgrey')
ax.set_title(species)
# Add legend only to the first subplot to avoid repetition
ax.legend(loc='lower right')
fig.suptitle('Species Presence by Station', fontsize=14)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
fig, axs = plt.subplots(2, 3, figsize=(12, 8), subplot_kw={'projection': ccrs.PlateCarree()})
axs = axs.flatten()
absent_color = 'gray'
present_color = 'orange'
for i, ax in enumerate(axs):
species = sub[abu].columns[i]
absent = sub[sub[species] == 0]
present = sub[sub[species] != 0]
ax.scatter(absent.LON, absent.LAT, c=absent_color, s=8, label='Absent')
ax.scatter(present.LON, present.LAT, c=present_color, s=8, label='Present')
ax.add_feature(cfeature.COASTLINE, linewidth=1)
ax.add_feature(cfeature.LAND, zorder=100, facecolor='lightgrey')
ax.set_title(species)
# Add legend only to the first subplot to avoid repetition
ax.legend(loc='lower right')
fig.suptitle('Species Presence by Station', fontsize=14)
plt.tight_layout()
plt.show()
In [ ]:
Copied!