import xarray as xr
import zarr
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from datetime import datetime, timedelta
import seaborn as sns
import math
import pickle
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
Warning: ecCodes 2.21.0 or higher is recommended. You are running version 2.16.0
stranding_data = pd.read_csv('2015_2021_hatchling_stranding_summary.csv', names = ['Id', 'Date','Species','Location'])
# import historical stranding data
stranding_data = stranding_data.loc[stranding_data['Species'] == 'Caretta caretta']
#Correct a few inconsistent spellings
stranding_data.loc[stranding_data['Location'] == 'Mosselbay', 'Location'] = 'Mossel Bay'
stranding_data.loc[stranding_data['Location'] == 'Witsand', 'Location'] = 'Witsands'
stranding_data.loc[stranding_data['Location'] == 'Tenikwa PLETT', 'Location'] = 'Plettenberg Bay'
stranding_data.loc[stranding_data['Location'] == 'Tenikwa', 'Location'] = 'Plettenberg Bay'
stranding_data = stranding_data.groupby('Location').filter(lambda x: len(x) >= 10) #consider only loactions with greater then 10 strandings
# Manually set locations for most common strandings
latlong_dict = {}
latlong_dict['Struisbaai'] = [-34.80, 20.10]
latlong_dict['Mossel Bay'] = [-34.15, 22.20]
latlong_dict['Muizenberg'] = [-34.11, 18.52]
latlong_dict['Witsands'] = [-34.41, 20.92]
latlong_dict['Arniston'] = [-34.67, 20.27]
latlong_dict['Plettenberg Bay'] = [-34.07, 23.44]
latlong_dict['Hermanus'] = [-34.44, 19.25]
latlong_dict['Sedgefield'] = [-34.10, 22.78]
latlong_dict['Gaansbaai'] = [-34.64, 19.35]
stranding_count = stranding_data.Location.value_counts()
location = []
lats = []
lons = []
count = []
for index, n in stranding_count.items():
location.append(index)
lats.append(latlong_dict[index][0])
lons.append(latlong_dict[index][1])
count.append(n)
fig = plt.figure(figsize=(16,9), dpi = 350)
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines('50m', linewidth=0.8)
extent = (np.min(lons)-0.2,np.max(lons)+0.6, np.min(lats)-0.2,np.max(lats)+0.2)
ax.set_extent(extent)
scatterplot = ax.scatter(x = lons, y = lats, c=count, s = [x*10 for x in count], cmap = 'viridis', vmin = 0, vmax = 400, alpha=0.9,transform=ccrs.PlateCarree())
for loc,lat,lon in zip(location, lats, lons):
ax.text(lon+0.08, lat-0.03, str(loc))
plt.colorbar(scatterplot, shrink = 0.5, orientation = 'horizontal', label = 'Number Of Strandings', pad=0.05)
plt.title('Location of Turtle Hatchling Strandings in the Western Cape')
plt.show()
stranding_freq = stranding_data.groupby(stranding_data.Date).count()['Species'].rename('Strandings')
stranding_freq.index = pd.to_datetime(stranding_freq.index, infer_datetime_format = True)
weekly={}
yearly = {}
for Location in list(set(stranding_data.Location)):
location_data = stranding_data.loc[stranding_data['Location'] == str(Location)]
location_freq = location_data.groupby(location_data.Date).count()['Species'].rename('Standings')
location_freq.index = pd.to_datetime(location_freq.index, infer_datetime_format=True)
weekly[str(Location)] = location_freq.groupby(location_freq.index.week).sum()
yearly[str(Location)] = location_freq.groupby(location_freq.index.year).sum()
weekly = pd.DataFrame(data = weekly).reindex(np.linspace(1,52,52, dtype = int))
yearly = pd.DataFrame(data = yearly)
fig = px.bar(yearly, labels={
"value": "Number of Strandings",
"Date": "Year"
},
title="Turtle Strandings in the Western Cape")
fig.update_layout(
legend=dict(title = 'Stranding Location'))
fig.show()
fig = px.bar(weekly,labels={
"value": "Number of Strandings",
"Date": "Week of the Year"
},
title="Turtle Strandings in the Western Cape")
fig.update_layout(
legend=dict(title = 'Stranding Location'))
fig.show()
location = 'Struisbaai'
ds = xr.open_dataset('ERA5_reanalysis_East_Coast.nc').load()
# ensure lat lon corresponds to a point where sst is available
ocean_lons = ds.sst[0].dropna(dim = 'lat', how = 'any').dropna(dim = 'lon', how = 'any').lon.values
ocean_lats = ds.sst[0].dropna(dim = 'lat', how = 'any').dropna(dim = 'lon', how = 'any').lat.values
def return_nearest_ocean_latlon(latlon):
ocean_lat = min(ocean_lats, key=lambda x:abs(x-latlon[0]))
ocean_lon = min(ocean_lons, key=lambda x:abs(x-latlon[1]))
return [ocean_lat,ocean_lon]
ocean_latlong_dict = {}
for index in latlong_dict:
ocean_latlong_dict[index] = return_nearest_ocean_latlon(latlong_dict[index])
ds_loc = ds
ds_loc = ds_loc.sel(lat = ocean_latlong_dict[str(location)][0], method = 'nearest').sel(lon = ocean_latlong_dict[str(location)][1], method = 'nearest')
location_data = stranding_data.loc[stranding_data['Location'] == str(location)]
location_freq = location_data.groupby(location_data.Date).count()['Species'].rename('Standings')
location_freq.index = pd.to_datetime(location_freq.index, infer_datetime_format=True)
location_freq = location_freq.sort_values(ascending=False)
location_freq = location_freq[location_freq.values>1]
dates_sorted = list(location_freq.sort_values(ascending=False).index)
ws = {}
sst = {}
direc = {}
for date in dates_sorted:
date_7 = date + timedelta(days = -7)
d = ds_loc.sel(time = slice(str(date_7), str(date)))
if len(d.ws.values) >= 169:
ws[date] = d.ws.values
sst[date] = d.sst.values
direc[date] = d.dir.values
else:
pass
ws = pd.DataFrame(ws)
sst = pd.DataFrame(sst)
direc = pd.DataFrame(direc)
n_events = len(direc.columns)-1
show_hide = {}
for x in range(n_events):
trues = []
for i in range(n_events):
if i ==x:
trues.append(True)
trues.append(True)
else:
trues.append(False)
trues.append(False)
show_hide[x] = trues
def get_vector(direction):
if direction < 90 and direction > 0:
x = math.sin(math.radians(direction))
y = math.cos(math.radians(direction))
elif direction > 90 and direction <180:
x = math.sin(math.radians(180 - direction))
y = -math.cos(math.radians(180 - direction))
elif direction >180 and direction <270:
x = -math.cos(math.radians(270 - direction))
y = -math.sin(math.radians(270 - direction))
else:
x = -math.sin(math.radians(360-direction))
y = math.cos(math.radians(360-direction))
return(x,-y)
arrows = {}
for n in range(n_events):
arrows[n] = []
direc_ = direc.iloc[:,n].values[0::10]
for x_,i in zip(range(160,0,-10),range(16)):
ax_,ay_ = get_vector(direc_[i])
arrows[n].append(dict(
x=x_,
y=-5,
xref = 'x',
yref = 'y',
axref = 'pixel',
ayref = 'pixel',
ax=(ax_*25),
ay=(ay_*30),
arrowhead = 2,
arrowsize = 1,
arrowwidth = 1.4,
xanchor = 'centre',
yanchor = 'centre',
row = 1,
col = 1
)
)
button_dicts = []
for i in range(n_events):
button_dicts.append(
dict(label=str(dates_sorted[i].date()) + ', '+str(location_freq[dates_sorted[i]]) + ' Strandings',
method="update",
args=[
{"visible": show_hide[i]},{'annotations':arrows[i]}
])
)
fig = make_subplots(rows=2, cols=1, row_heights=[0.7, 0.3])
#fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.update_xaxes(autorange="reversed")
for i in range(n_events):
if i == 0:
fig.add_trace(
go.Scatter(x = np.linspace(168,0,169), y= ws.iloc[:,i].values, name='Wind Speed (knots)', visible=True),
row = 1, col = 1
)
fig.add_trace(
go.Scatter(x = np.linspace(168,0,169), y= sst.iloc[:,i].values, name='Sea Surface Temperature (celsius)', visible=True),
row = 2, col = 1
)
else:
fig.add_trace(
go.Scatter(x = np.linspace(168,0,169), y= ws.iloc[:,i].values, name='Wind Speed (knots)', visible=False),
row = 1, col = 1
)
fig.add_trace(
go.Scatter(x = np.linspace(168,0,169), y= sst.iloc[:,i].values, name='Sea Surface Temperature (celsius)', visible=False),
row = 2, col = 1
)
fig.update_yaxes(range=[-10, 30], row = 1)
fig.update_xaxes(range=[170, 0], row = 1)
fig.update_xaxes(range=[170, 0], row = 2)
fig.update_xaxes(title="Hours Before Strandings", row = 2)
fig.update_xaxes(tick0=168, dtick=24)
fig.update_layout(
updatemenus=[
dict(
buttons=button_dicts,
direction="down",
showactive=True
)
]
)
# Update remaining layout properties
fig.update_layout(
title_text=str(location)+" Stranding Events",
showlegend=True,
)
fig.show()
fig.write_html("historical_"+str(location)+".html")
This plot is interactive, use the dropdown menu on the left to change stranding event and activate wind direction arrows.
A full screen version of this plot and other locations is available at:
# forecast data is only available in 3 hour steps so matching reanalysis to this simplifies things
ds_loc = xr.merge([ds_loc.where(ds_loc.time.dt.hour == 0).dropna(dim = 'time'),
ds_loc.where(ds_loc.time.dt.hour == 3).dropna(dim = 'time'),
ds_loc.where(ds_loc.time.dt.hour == 6).dropna(dim = 'time'),
ds_loc.where(ds_loc.time.dt.hour == 9).dropna(dim = 'time'),
ds_loc.where(ds_loc.time.dt.hour == 12).dropna(dim = 'time'),
ds_loc.where(ds_loc.time.dt.hour == 15).dropna(dim = 'time'),
ds_loc.where(ds_loc.time.dt.hour == 18).dropna(dim = 'time'),
ds_loc.where(ds_loc.time.dt.hour == 21).dropna(dim = 'time'),])
ds_loc_daily = ds_loc.resample(time = 'D', closed = "right").mean().load()
ds_seasonal = ds_loc_daily.where(ds_loc.time.dt.season == 'MAM').dropna(dim='time', how = 'all')
strandings = list(set(stranding_data.loc[stranding_data['Location'] == str(location)].Date))
strandings = pd.to_datetime(strandings)
march = [x for x in strandings if x.month == 3]
april = [x for x in strandings if x.month == 4]
may = [x for x in strandings if x.month == 5]
dates = march + april + may
dates.sort()
lagged = {}
for lag in [0, -1,-3,-7]:
dates_1 = [date + timedelta(days = lag) for date in dates]
lagged[lag] = ds_loc_daily.sel(time = dates_1)
def make_plot():
fig, axs = plt.subplots(nrows=3,ncols=2,figsize=(16,12))
axs=axs.flatten()
for i,var in enumerate(['sst','t2m','ws','dir','u10','v10']):
sns.kdeplot(ds_seasonal[str(var)].values, ax = axs[i], label = 'Autumn', lw= 5)
for key in lagged:
sns.kdeplot(lagged[key][str(var)].values, ax = axs[i], label = 'Stranding Days '+str(key), lw = 2.5)
#axs[i].set_title(str(var))
axs[0].legend()
axs[0].set_xlabel('Sea Surface Temperature ($^\circ$C)')
axs[1].set_xlabel('Air Temperature ($^\circ$C)')
axs[2].set_xlabel('Wind Speed (knots)')
axs[3].set_xlabel('Wind Direction')
axs[4].set_xlabel('Eastward Wind Component (knots)')
axs[5].set_xlabel('Northward Wind Component (knots)')
fig.suptitle('', fontsize=14, fontweight='bold')
make_plot()
df = ds_loc_daily.to_dataframe()
x = ds_loc_daily[['u10','v10']].rolling(time = 5).mean(dim = 'time')
df['u10_5m'] = x.u10
df['v10_5m'] = x.v10
sst_delta = (ds_loc_daily.sst.values[7:] - ds_loc_daily.sst.values[:-7])
df['sst_delta'] = np.concatenate([np.zeros(7), sst_delta])
df['u10_1'] = np.concatenate([df['u10'].iloc[1:].values, np.zeros(1)])
df = df.fillna(0)
location_data = stranding_data.loc[stranding_data['Location'] == str(location)]
location_freq = location_data.groupby(location_data.Date).count()['Species'].rename('Standings')
location_freq.index = pd.to_datetime(location_freq.index)
df['strandings'] = location_freq.reindex(df.index).fillna(0)
df['month'] = df.index.month
fig, axs = plt.subplots(nrows=1,ncols=3,figsize=(16,4))
axs=axs.flatten()
for i,var in enumerate(['v10_5m', 'u10_5m', 'sst_delta']):
sns.kdeplot(df[var], label = 'all', ax = axs[i],lw = 5)
sns.kdeplot(df[var].where(df.strandings >= 1), label = 'stranding', ax = axs[i],lw = 3)
axs[0].legend()
axs[0].set_xlabel('5-Day Average Northward Wind Component (knots)')
axs[1].set_xlabel('5-Day Average Eastward Wind Component (knots)')
axs[2].set_xlabel('Change in Sea Surface Temperature over 7 days')
plt.show()
from sklearn.ensemble import RandomForestRegressor
train = df.iloc[:2045]
test = df.iloc[2045:]
sel_var = ['month', 'ws','u10_1', 'u10_5m','sst_delta']
train_features = np.array(train[sel_var])
train_labels = np.array(train['strandings'])
test_features = np.array(test[sel_var])
test_labels = np.array(test['strandings'])
# Instantiate model
rf = RandomForestRegressor(n_estimators = 100, random_state = 42)
# Train the model on training data
rf.fit(train_features, train_labels)
#save model
pickle.dump(rf, open('model_train.pkl', 'wb'))
# Use the forest's predict method on the test data
predictions = rf.predict(test_features)
# Calculate the absolute errors
errors = abs(predictions - test_labels)
#Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 5), 'degrees.')
Mean Absolute Error: 0.17265 degrees.
out = pd.DataFrame({'day': test.index, 'predictions' : list(predictions), "test_labels": list(test_labels)}).set_index('day')
pred_ = out.predictions.where(out.predictions>=1).dropna()
maybe_ = out.predictions.where(out.predictions>0.6).where(out.predictions<1).dropna()
test_ = out.test_labels.where(out.test_labels>=1).dropna()
fig = go.Figure()
fig.add_trace(go.Scatter(x=pred_.index, y=pred_.values,
marker = dict(size=12, color = 'red'),
mode='markers',
name='Predicted'))
fig.add_trace(go.Scatter(x=maybe_.index, y=maybe_.values,
marker = dict(size=12, color = 'yellow'),
mode='markers',
name='Maybe'))
fig.add_trace(go.Scatter(x=test_.index, y=test_.values,
marker = dict(size=12, color = 'black'),
mode='markers',
name='Observed'))
fig.update_yaxes(title="Number of Strandings")
fig.show()
rf = pickle.load(open('model_no_sst_train.pkl', 'rb'))
# Use the forest's predict method on the test data
predictions = rf.predict(test_features[:,:-1])
# Calculate the absolute errors
errors = abs(predictions - test_labels)
#Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 5), 'degrees.')
Mean Absolute Error: 0.18096 degrees.
out = pd.DataFrame({'day': test.index, 'predictions' : list(predictions), "test_labels": list(test_labels)}).set_index('day')
pred_ = out.predictions.where(out.predictions>=1).dropna()
maybe_ = out.predictions.where(out.predictions>0.6).where(out.predictions<1).dropna()
test_ = out.test_labels.where(out.test_labels>=1).dropna()
fig = go.Figure()
fig.add_trace(go.Scatter(x=pred_.index, y=pred_.values,
marker = dict(size=12, color = 'red'),
mode='markers',
name='Predicted'))
fig.add_trace(go.Scatter(x=maybe_.index, y=maybe_.values,
marker = dict(size=12, color = 'yellow'),
mode='markers',
name='Maybe'))
fig.add_trace(go.Scatter(x=test_.index, y=test_.values,
marker = dict(size=12, color = 'black'),
mode='markers',
name='Observed'))
fig.update_yaxes(title="Number of Strandings")
fig.show()
a 5-day forecast implementation of the above can be viewed at - https://petemarsh.com/forecast
while the notebook implementing this can be viewed at - https://petemarsh.com/Turtle_Forecast_Notebook
To reproduce this notebook please refer to https://github.com/peterm790/Turtle_Stranding
ends