diff --git a/relative_training_data.ipynb b/relative_training_data.ipynb
new file mode 100644
index 00000000..2666f348
--- /dev/null
+++ b/relative_training_data.ipynb
@@ -0,0 +1,2082 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Fake data feature requests / MWE\n",
+ "\n",
+ "In this notebook we outline two main features we'd like to use in springtime.\n",
+ "To this end, we first make some fake sample data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import geopandas as gpd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " year \n",
+ " DOY_firstbloom \n",
+ " geometry \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 129 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 136 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 153 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 135 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 153 \n",
+ " POINT (1.04712 0.28730) \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 167 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 168 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 179 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 128 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 175 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year DOY_firstbloom geometry\n",
+ "0 2000 129 POINT (-0.47519 0.04296)\n",
+ "1 2001 136 POINT (-1.48409 -0.50742)\n",
+ "2 2002 153 POINT (0.94426 -0.21199)\n",
+ "3 2003 135 POINT (0.29152 -1.54929)\n",
+ "4 2004 153 POINT (1.04712 0.28730)\n",
+ "5 2005 167 POINT (-0.84866 -0.80568)\n",
+ "6 2006 168 POINT (-1.10011 0.54719)\n",
+ "7 2007 179 POINT (-0.52378 0.05342)\n",
+ "8 2008 128 POINT (-0.78217 0.39899)\n",
+ "9 2009 175 POINT (-1.15685 -0.01451)"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# We have random observations for 10 points:\n",
+ "obs = gpd.GeoDataFrame(\n",
+ " data = {\n",
+ " 'year': np.arange(2000, 2010), \n",
+ " 'DOY_firstbloom': np.random.randint(120, 180, size=10),\n",
+ " 'geometry': gpd.GeoSeries.from_xy(*np.random.randn(2, 10))\n",
+ " },\n",
+ ")\n",
+ "obs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1 -0.258835\n",
+ "2 -0.542725\n",
+ "3 0.544299\n",
+ "4 0.934947\n",
+ "5 -0.515771\n",
+ " ... \n",
+ "361 -0.008609\n",
+ "362 0.358386\n",
+ "363 -0.700696\n",
+ "364 -0.749247\n",
+ "365 -0.039406\n",
+ "Name: temperature, Length: 365, dtype: float64"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# For each of these years/locations, we can retrieve temperature records for each DOY\n",
+ "get_temperature = lambda year, geometry: pd.Series(np.random.randn(365), index=np.arange(1, 366), name='temperature')\n",
+ "get_temperature(obs.geometry[0], 2000)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Combining observations and weather data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " 3 \n",
+ " 4 \n",
+ " 5 \n",
+ " 6 \n",
+ " 7 \n",
+ " 8 \n",
+ " 9 \n",
+ " 10 \n",
+ " ... \n",
+ " 356 \n",
+ " 357 \n",
+ " 358 \n",
+ " 359 \n",
+ " 360 \n",
+ " 361 \n",
+ " 362 \n",
+ " 363 \n",
+ " 364 \n",
+ " 365 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 0.307213 \n",
+ " -1.730910 \n",
+ " 1.142865 \n",
+ " -1.329763 \n",
+ " 0.176960 \n",
+ " -0.065294 \n",
+ " 0.480736 \n",
+ " -0.871330 \n",
+ " 0.000798 \n",
+ " 0.656874 \n",
+ " ... \n",
+ " -1.513503 \n",
+ " -0.264486 \n",
+ " 1.128968 \n",
+ " 1.585038 \n",
+ " 0.769073 \n",
+ " 0.223731 \n",
+ " -0.271954 \n",
+ " 0.934556 \n",
+ " -0.371321 \n",
+ " 0.527513 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 0.981330 \n",
+ " -0.629055 \n",
+ " 0.147775 \n",
+ " 0.177528 \n",
+ " -0.658201 \n",
+ " -0.508874 \n",
+ " 0.010616 \n",
+ " 0.054272 \n",
+ " 0.078030 \n",
+ " 1.134169 \n",
+ " ... \n",
+ " 1.293825 \n",
+ " -1.521819 \n",
+ " 0.562700 \n",
+ " -0.441175 \n",
+ " -0.623304 \n",
+ " 0.559502 \n",
+ " 0.194984 \n",
+ " -1.984875 \n",
+ " -1.686339 \n",
+ " 0.896396 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " -0.064179 \n",
+ " 1.031772 \n",
+ " 0.426700 \n",
+ " 0.785975 \n",
+ " 0.846932 \n",
+ " -1.573482 \n",
+ " 0.928517 \n",
+ " 0.015023 \n",
+ " -0.640847 \n",
+ " 2.018753 \n",
+ " ... \n",
+ " -0.733125 \n",
+ " -0.889653 \n",
+ " -2.083637 \n",
+ " 2.219374 \n",
+ " 0.512767 \n",
+ " 1.210230 \n",
+ " -0.257335 \n",
+ " -1.888650 \n",
+ " -0.897753 \n",
+ " -0.627711 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " -0.287284 \n",
+ " -1.271925 \n",
+ " 0.878782 \n",
+ " 0.853996 \n",
+ " 0.727292 \n",
+ " -2.365385 \n",
+ " -1.079522 \n",
+ " 1.152702 \n",
+ " 2.159849 \n",
+ " 0.015764 \n",
+ " ... \n",
+ " -1.521924 \n",
+ " -1.431645 \n",
+ " -0.872895 \n",
+ " -0.025135 \n",
+ " -0.834000 \n",
+ " 0.496014 \n",
+ " -0.605926 \n",
+ " -0.553788 \n",
+ " 0.050255 \n",
+ " 0.720794 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " -0.835895 \n",
+ " -0.371376 \n",
+ " 0.480107 \n",
+ " 0.292897 \n",
+ " 1.442436 \n",
+ " 0.949195 \n",
+ " 0.480992 \n",
+ " -1.092184 \n",
+ " -1.895761 \n",
+ " 0.180935 \n",
+ " ... \n",
+ " -0.232243 \n",
+ " -0.401761 \n",
+ " -0.222144 \n",
+ " 0.480351 \n",
+ " 1.092475 \n",
+ " 1.234285 \n",
+ " -0.249237 \n",
+ " -0.484025 \n",
+ " -0.531681 \n",
+ " -1.456187 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 0.208490 \n",
+ " 0.354984 \n",
+ " -0.152511 \n",
+ " 0.098195 \n",
+ " 1.386408 \n",
+ " 1.055924 \n",
+ " -1.034276 \n",
+ " -1.005105 \n",
+ " -0.900521 \n",
+ " 0.395284 \n",
+ " ... \n",
+ " -2.157407 \n",
+ " 0.280113 \n",
+ " -0.883831 \n",
+ " -1.388701 \n",
+ " 0.528675 \n",
+ " 0.267038 \n",
+ " -1.368319 \n",
+ " -1.096792 \n",
+ " 1.099992 \n",
+ " 0.396587 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " -0.624989 \n",
+ " -2.104158 \n",
+ " -1.639389 \n",
+ " -0.117674 \n",
+ " -1.728902 \n",
+ " 0.895439 \n",
+ " -1.073810 \n",
+ " 0.188181 \n",
+ " -0.330365 \n",
+ " 0.207237 \n",
+ " ... \n",
+ " -0.641100 \n",
+ " -1.031611 \n",
+ " -0.560739 \n",
+ " 0.426600 \n",
+ " -0.040521 \n",
+ " 0.255512 \n",
+ " 0.110917 \n",
+ " -0.603269 \n",
+ " -0.485403 \n",
+ " 0.088915 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 0.658223 \n",
+ " 0.719648 \n",
+ " 0.669712 \n",
+ " -0.267893 \n",
+ " 0.282330 \n",
+ " 0.091997 \n",
+ " 0.575704 \n",
+ " 0.075245 \n",
+ " -0.616297 \n",
+ " 0.765266 \n",
+ " ... \n",
+ " 0.766638 \n",
+ " -1.262343 \n",
+ " -0.449191 \n",
+ " -0.380143 \n",
+ " 0.122539 \n",
+ " 0.858159 \n",
+ " 0.265301 \n",
+ " -1.452599 \n",
+ " -1.632551 \n",
+ " -0.077418 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 1.216177 \n",
+ " 0.608043 \n",
+ " 1.819014 \n",
+ " -0.205315 \n",
+ " -0.924444 \n",
+ " 0.214525 \n",
+ " 0.713723 \n",
+ " 1.169773 \n",
+ " 0.143641 \n",
+ " 1.259080 \n",
+ " ... \n",
+ " -1.660429 \n",
+ " 1.097643 \n",
+ " 0.751109 \n",
+ " -0.353375 \n",
+ " 0.765719 \n",
+ " 0.048287 \n",
+ " -0.363845 \n",
+ " 0.698244 \n",
+ " 1.445932 \n",
+ " -0.277977 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " -0.815981 \n",
+ " -0.317910 \n",
+ " 0.518929 \n",
+ " 1.616109 \n",
+ " 0.355642 \n",
+ " 0.211716 \n",
+ " -1.197219 \n",
+ " -0.135824 \n",
+ " 0.597120 \n",
+ " 0.388386 \n",
+ " ... \n",
+ " 1.039741 \n",
+ " -0.685464 \n",
+ " 1.042412 \n",
+ " 1.016426 \n",
+ " 1.344937 \n",
+ " -1.247285 \n",
+ " 1.420088 \n",
+ " -0.006036 \n",
+ " -1.963635 \n",
+ " -0.336042 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
10 rows × 365 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 1 2 3 4 5 6 7 \\\n",
+ "0 0.307213 -1.730910 1.142865 -1.329763 0.176960 -0.065294 0.480736 \n",
+ "1 0.981330 -0.629055 0.147775 0.177528 -0.658201 -0.508874 0.010616 \n",
+ "2 -0.064179 1.031772 0.426700 0.785975 0.846932 -1.573482 0.928517 \n",
+ "3 -0.287284 -1.271925 0.878782 0.853996 0.727292 -2.365385 -1.079522 \n",
+ "4 -0.835895 -0.371376 0.480107 0.292897 1.442436 0.949195 0.480992 \n",
+ "5 0.208490 0.354984 -0.152511 0.098195 1.386408 1.055924 -1.034276 \n",
+ "6 -0.624989 -2.104158 -1.639389 -0.117674 -1.728902 0.895439 -1.073810 \n",
+ "7 0.658223 0.719648 0.669712 -0.267893 0.282330 0.091997 0.575704 \n",
+ "8 1.216177 0.608043 1.819014 -0.205315 -0.924444 0.214525 0.713723 \n",
+ "9 -0.815981 -0.317910 0.518929 1.616109 0.355642 0.211716 -1.197219 \n",
+ "\n",
+ " 8 9 10 ... 356 357 358 359 \\\n",
+ "0 -0.871330 0.000798 0.656874 ... -1.513503 -0.264486 1.128968 1.585038 \n",
+ "1 0.054272 0.078030 1.134169 ... 1.293825 -1.521819 0.562700 -0.441175 \n",
+ "2 0.015023 -0.640847 2.018753 ... -0.733125 -0.889653 -2.083637 2.219374 \n",
+ "3 1.152702 2.159849 0.015764 ... -1.521924 -1.431645 -0.872895 -0.025135 \n",
+ "4 -1.092184 -1.895761 0.180935 ... -0.232243 -0.401761 -0.222144 0.480351 \n",
+ "5 -1.005105 -0.900521 0.395284 ... -2.157407 0.280113 -0.883831 -1.388701 \n",
+ "6 0.188181 -0.330365 0.207237 ... -0.641100 -1.031611 -0.560739 0.426600 \n",
+ "7 0.075245 -0.616297 0.765266 ... 0.766638 -1.262343 -0.449191 -0.380143 \n",
+ "8 1.169773 0.143641 1.259080 ... -1.660429 1.097643 0.751109 -0.353375 \n",
+ "9 -0.135824 0.597120 0.388386 ... 1.039741 -0.685464 1.042412 1.016426 \n",
+ "\n",
+ " 360 361 362 363 364 365 \n",
+ "0 0.769073 0.223731 -0.271954 0.934556 -0.371321 0.527513 \n",
+ "1 -0.623304 0.559502 0.194984 -1.984875 -1.686339 0.896396 \n",
+ "2 0.512767 1.210230 -0.257335 -1.888650 -0.897753 -0.627711 \n",
+ "3 -0.834000 0.496014 -0.605926 -0.553788 0.050255 0.720794 \n",
+ "4 1.092475 1.234285 -0.249237 -0.484025 -0.531681 -1.456187 \n",
+ "5 0.528675 0.267038 -1.368319 -1.096792 1.099992 0.396587 \n",
+ "6 -0.040521 0.255512 0.110917 -0.603269 -0.485403 0.088915 \n",
+ "7 0.122539 0.858159 0.265301 -1.452599 -1.632551 -0.077418 \n",
+ "8 0.765719 0.048287 -0.363845 0.698244 1.445932 -0.277977 \n",
+ "9 1.344937 -1.247285 1.420088 -0.006036 -1.963635 -0.336042 \n",
+ "\n",
+ "[10 rows x 365 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# We can transpose the data and combine it with the \"observations\"\n",
+ "weather = obs.apply(lambda row: get_temperature(row.year, row.geometry), axis=1)\n",
+ "weather"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " year \n",
+ " DOY_firstbloom \n",
+ " geometry \n",
+ " 1 \n",
+ " 2 \n",
+ " 3 \n",
+ " 4 \n",
+ " 5 \n",
+ " 6 \n",
+ " 7 \n",
+ " ... \n",
+ " 356 \n",
+ " 357 \n",
+ " 358 \n",
+ " 359 \n",
+ " 360 \n",
+ " 361 \n",
+ " 362 \n",
+ " 363 \n",
+ " 364 \n",
+ " 365 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 129 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " 0.307213 \n",
+ " -1.730910 \n",
+ " 1.142865 \n",
+ " -1.329763 \n",
+ " 0.176960 \n",
+ " -0.065294 \n",
+ " 0.480736 \n",
+ " ... \n",
+ " -1.513503 \n",
+ " -0.264486 \n",
+ " 1.128968 \n",
+ " 1.585038 \n",
+ " 0.769073 \n",
+ " 0.223731 \n",
+ " -0.271954 \n",
+ " 0.934556 \n",
+ " -0.371321 \n",
+ " 0.527513 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 136 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " 0.981330 \n",
+ " -0.629055 \n",
+ " 0.147775 \n",
+ " 0.177528 \n",
+ " -0.658201 \n",
+ " -0.508874 \n",
+ " 0.010616 \n",
+ " ... \n",
+ " 1.293825 \n",
+ " -1.521819 \n",
+ " 0.562700 \n",
+ " -0.441175 \n",
+ " -0.623304 \n",
+ " 0.559502 \n",
+ " 0.194984 \n",
+ " -1.984875 \n",
+ " -1.686339 \n",
+ " 0.896396 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 153 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " -0.064179 \n",
+ " 1.031772 \n",
+ " 0.426700 \n",
+ " 0.785975 \n",
+ " 0.846932 \n",
+ " -1.573482 \n",
+ " 0.928517 \n",
+ " ... \n",
+ " -0.733125 \n",
+ " -0.889653 \n",
+ " -2.083637 \n",
+ " 2.219374 \n",
+ " 0.512767 \n",
+ " 1.210230 \n",
+ " -0.257335 \n",
+ " -1.888650 \n",
+ " -0.897753 \n",
+ " -0.627711 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 135 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " -0.287284 \n",
+ " -1.271925 \n",
+ " 0.878782 \n",
+ " 0.853996 \n",
+ " 0.727292 \n",
+ " -2.365385 \n",
+ " -1.079522 \n",
+ " ... \n",
+ " -1.521924 \n",
+ " -1.431645 \n",
+ " -0.872895 \n",
+ " -0.025135 \n",
+ " -0.834000 \n",
+ " 0.496014 \n",
+ " -0.605926 \n",
+ " -0.553788 \n",
+ " 0.050255 \n",
+ " 0.720794 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 153 \n",
+ " POINT (1.04712 0.28730) \n",
+ " -0.835895 \n",
+ " -0.371376 \n",
+ " 0.480107 \n",
+ " 0.292897 \n",
+ " 1.442436 \n",
+ " 0.949195 \n",
+ " 0.480992 \n",
+ " ... \n",
+ " -0.232243 \n",
+ " -0.401761 \n",
+ " -0.222144 \n",
+ " 0.480351 \n",
+ " 1.092475 \n",
+ " 1.234285 \n",
+ " -0.249237 \n",
+ " -0.484025 \n",
+ " -0.531681 \n",
+ " -1.456187 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 167 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " 0.208490 \n",
+ " 0.354984 \n",
+ " -0.152511 \n",
+ " 0.098195 \n",
+ " 1.386408 \n",
+ " 1.055924 \n",
+ " -1.034276 \n",
+ " ... \n",
+ " -2.157407 \n",
+ " 0.280113 \n",
+ " -0.883831 \n",
+ " -1.388701 \n",
+ " 0.528675 \n",
+ " 0.267038 \n",
+ " -1.368319 \n",
+ " -1.096792 \n",
+ " 1.099992 \n",
+ " 0.396587 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 168 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " -0.624989 \n",
+ " -2.104158 \n",
+ " -1.639389 \n",
+ " -0.117674 \n",
+ " -1.728902 \n",
+ " 0.895439 \n",
+ " -1.073810 \n",
+ " ... \n",
+ " -0.641100 \n",
+ " -1.031611 \n",
+ " -0.560739 \n",
+ " 0.426600 \n",
+ " -0.040521 \n",
+ " 0.255512 \n",
+ " 0.110917 \n",
+ " -0.603269 \n",
+ " -0.485403 \n",
+ " 0.088915 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 179 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " 0.658223 \n",
+ " 0.719648 \n",
+ " 0.669712 \n",
+ " -0.267893 \n",
+ " 0.282330 \n",
+ " 0.091997 \n",
+ " 0.575704 \n",
+ " ... \n",
+ " 0.766638 \n",
+ " -1.262343 \n",
+ " -0.449191 \n",
+ " -0.380143 \n",
+ " 0.122539 \n",
+ " 0.858159 \n",
+ " 0.265301 \n",
+ " -1.452599 \n",
+ " -1.632551 \n",
+ " -0.077418 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 128 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " 1.216177 \n",
+ " 0.608043 \n",
+ " 1.819014 \n",
+ " -0.205315 \n",
+ " -0.924444 \n",
+ " 0.214525 \n",
+ " 0.713723 \n",
+ " ... \n",
+ " -1.660429 \n",
+ " 1.097643 \n",
+ " 0.751109 \n",
+ " -0.353375 \n",
+ " 0.765719 \n",
+ " 0.048287 \n",
+ " -0.363845 \n",
+ " 0.698244 \n",
+ " 1.445932 \n",
+ " -0.277977 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 175 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " -0.815981 \n",
+ " -0.317910 \n",
+ " 0.518929 \n",
+ " 1.616109 \n",
+ " 0.355642 \n",
+ " 0.211716 \n",
+ " -1.197219 \n",
+ " ... \n",
+ " 1.039741 \n",
+ " -0.685464 \n",
+ " 1.042412 \n",
+ " 1.016426 \n",
+ " 1.344937 \n",
+ " -1.247285 \n",
+ " 1.420088 \n",
+ " -0.006036 \n",
+ " -1.963635 \n",
+ " -0.336042 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
10 rows × 368 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year DOY_firstbloom geometry 1 2 \\\n",
+ "0 2000 129 POINT (-0.47519 0.04296) 0.307213 -1.730910 \n",
+ "1 2001 136 POINT (-1.48409 -0.50742) 0.981330 -0.629055 \n",
+ "2 2002 153 POINT (0.94426 -0.21199) -0.064179 1.031772 \n",
+ "3 2003 135 POINT (0.29152 -1.54929) -0.287284 -1.271925 \n",
+ "4 2004 153 POINT (1.04712 0.28730) -0.835895 -0.371376 \n",
+ "5 2005 167 POINT (-0.84866 -0.80568) 0.208490 0.354984 \n",
+ "6 2006 168 POINT (-1.10011 0.54719) -0.624989 -2.104158 \n",
+ "7 2007 179 POINT (-0.52378 0.05342) 0.658223 0.719648 \n",
+ "8 2008 128 POINT (-0.78217 0.39899) 1.216177 0.608043 \n",
+ "9 2009 175 POINT (-1.15685 -0.01451) -0.815981 -0.317910 \n",
+ "\n",
+ " 3 4 5 6 7 ... 356 357 \\\n",
+ "0 1.142865 -1.329763 0.176960 -0.065294 0.480736 ... -1.513503 -0.264486 \n",
+ "1 0.147775 0.177528 -0.658201 -0.508874 0.010616 ... 1.293825 -1.521819 \n",
+ "2 0.426700 0.785975 0.846932 -1.573482 0.928517 ... -0.733125 -0.889653 \n",
+ "3 0.878782 0.853996 0.727292 -2.365385 -1.079522 ... -1.521924 -1.431645 \n",
+ "4 0.480107 0.292897 1.442436 0.949195 0.480992 ... -0.232243 -0.401761 \n",
+ "5 -0.152511 0.098195 1.386408 1.055924 -1.034276 ... -2.157407 0.280113 \n",
+ "6 -1.639389 -0.117674 -1.728902 0.895439 -1.073810 ... -0.641100 -1.031611 \n",
+ "7 0.669712 -0.267893 0.282330 0.091997 0.575704 ... 0.766638 -1.262343 \n",
+ "8 1.819014 -0.205315 -0.924444 0.214525 0.713723 ... -1.660429 1.097643 \n",
+ "9 0.518929 1.616109 0.355642 0.211716 -1.197219 ... 1.039741 -0.685464 \n",
+ "\n",
+ " 358 359 360 361 362 363 364 \\\n",
+ "0 1.128968 1.585038 0.769073 0.223731 -0.271954 0.934556 -0.371321 \n",
+ "1 0.562700 -0.441175 -0.623304 0.559502 0.194984 -1.984875 -1.686339 \n",
+ "2 -2.083637 2.219374 0.512767 1.210230 -0.257335 -1.888650 -0.897753 \n",
+ "3 -0.872895 -0.025135 -0.834000 0.496014 -0.605926 -0.553788 0.050255 \n",
+ "4 -0.222144 0.480351 1.092475 1.234285 -0.249237 -0.484025 -0.531681 \n",
+ "5 -0.883831 -1.388701 0.528675 0.267038 -1.368319 -1.096792 1.099992 \n",
+ "6 -0.560739 0.426600 -0.040521 0.255512 0.110917 -0.603269 -0.485403 \n",
+ "7 -0.449191 -0.380143 0.122539 0.858159 0.265301 -1.452599 -1.632551 \n",
+ "8 0.751109 -0.353375 0.765719 0.048287 -0.363845 0.698244 1.445932 \n",
+ "9 1.042412 1.016426 1.344937 -1.247285 1.420088 -0.006036 -1.963635 \n",
+ "\n",
+ " 365 \n",
+ "0 0.527513 \n",
+ "1 0.896396 \n",
+ "2 -0.627711 \n",
+ "3 0.720794 \n",
+ "4 -1.456187 \n",
+ "5 0.396587 \n",
+ "6 0.088915 \n",
+ "7 -0.077418 \n",
+ "8 -0.277977 \n",
+ "9 -0.336042 \n",
+ "\n",
+ "[10 rows x 368 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "combined = pd.concat([obs, weather], axis=1)\n",
+ "combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " year \n",
+ " DOY_firstbloom \n",
+ " geometry \n",
+ " temperature \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 129 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " 0 0.307213\n",
+ "1 -1.730910\n",
+ "2 1.14286... \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 136 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " 0 0.981330\n",
+ "1 -0.629055\n",
+ "2 0.14777... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 153 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " 0 -0.064179\n",
+ "1 1.031772\n",
+ "2 0.42670... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 135 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " 0 -0.287284\n",
+ "1 -1.271925\n",
+ "2 0.87878... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 153 \n",
+ " POINT (1.04712 0.28730) \n",
+ " 0 -0.835895\n",
+ "1 -0.371376\n",
+ "2 0.48010... \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 167 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " 0 0.208490\n",
+ "1 0.354984\n",
+ "2 -0.15251... \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 168 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " 0 -0.624989\n",
+ "1 -2.104158\n",
+ "2 -1.63938... \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 179 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " 0 0.658223\n",
+ "1 0.719648\n",
+ "2 0.66971... \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 128 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " 0 1.216177\n",
+ "1 0.608043\n",
+ "2 1.81901... \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 175 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " 0 -0.815981\n",
+ "1 -0.317910\n",
+ "2 0.51892... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year DOY_firstbloom geometry \\\n",
+ "0 2000 129 POINT (-0.47519 0.04296) \n",
+ "1 2001 136 POINT (-1.48409 -0.50742) \n",
+ "2 2002 153 POINT (0.94426 -0.21199) \n",
+ "3 2003 135 POINT (0.29152 -1.54929) \n",
+ "4 2004 153 POINT (1.04712 0.28730) \n",
+ "5 2005 167 POINT (-0.84866 -0.80568) \n",
+ "6 2006 168 POINT (-1.10011 0.54719) \n",
+ "7 2007 179 POINT (-0.52378 0.05342) \n",
+ "8 2008 128 POINT (-0.78217 0.39899) \n",
+ "9 2009 175 POINT (-1.15685 -0.01451) \n",
+ "\n",
+ " temperature \n",
+ "0 0 0.307213\n",
+ "1 -1.730910\n",
+ "2 1.14286... \n",
+ "1 0 0.981330\n",
+ "1 -0.629055\n",
+ "2 0.14777... \n",
+ "2 0 -0.064179\n",
+ "1 1.031772\n",
+ "2 0.42670... \n",
+ "3 0 -0.287284\n",
+ "1 -1.271925\n",
+ "2 0.87878... \n",
+ "4 0 -0.835895\n",
+ "1 -0.371376\n",
+ "2 0.48010... \n",
+ "5 0 0.208490\n",
+ "1 0.354984\n",
+ "2 -0.15251... \n",
+ "6 0 -0.624989\n",
+ "1 -2.104158\n",
+ "2 -1.63938... \n",
+ "7 0 0.658223\n",
+ "1 0.719648\n",
+ "2 0.66971... \n",
+ "8 0 1.216177\n",
+ "1 0.608043\n",
+ "2 1.81901... \n",
+ "9 0 -0.815981\n",
+ "1 -0.317910\n",
+ "2 0.51892... "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# However, having so many columns is not good: we want to do feature extraction to reduce the number of columns\n",
+ "# Also, what if we have multiple variables? We need to make compound column names that are difficult to work with.\n",
+ "# How can we do this differently?\n",
+ "\n",
+ "# One option is to do the feature extraction before combining the dataframes.\n",
+ "# Or, we could just put more complex data types in the columns. The advantage of\n",
+ "# the latter is that we can use DOY_firstbloom in our feature extraction (see\n",
+ "# later on).\n",
+ "\n",
+ "# For example, we could insert weather as a pandas series:\n",
+ "combined = obs.assign(temperature=[pd.Series(v) for v in weather.values])\n",
+ "\n",
+ "# This is much more succinct:\n",
+ "combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " year \n",
+ " DOY_firstbloom \n",
+ " geometry \n",
+ " min \n",
+ " mean \n",
+ " max \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 129 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " -2.679822 \n",
+ " -0.001827 \n",
+ " 2.738706 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 136 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " -2.418653 \n",
+ " 0.018380 \n",
+ " 3.495607 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 153 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " -3.731281 \n",
+ " -0.006298 \n",
+ " 2.662745 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 135 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " -2.365385 \n",
+ " 0.059820 \n",
+ " 3.454617 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 153 \n",
+ " POINT (1.04712 0.28730) \n",
+ " -3.077907 \n",
+ " -0.008102 \n",
+ " 2.633713 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 167 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " -3.060988 \n",
+ " -0.050673 \n",
+ " 2.959994 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 168 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " -2.418248 \n",
+ " -0.013136 \n",
+ " 2.767561 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 179 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " -3.296214 \n",
+ " 0.006644 \n",
+ " 2.638542 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 128 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " -2.470182 \n",
+ " 0.020328 \n",
+ " 2.689228 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 175 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " -2.493179 \n",
+ " -0.025411 \n",
+ " 2.570942 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year DOY_firstbloom geometry min mean \\\n",
+ "0 2000 129 POINT (-0.47519 0.04296) -2.679822 -0.001827 \n",
+ "1 2001 136 POINT (-1.48409 -0.50742) -2.418653 0.018380 \n",
+ "2 2002 153 POINT (0.94426 -0.21199) -3.731281 -0.006298 \n",
+ "3 2003 135 POINT (0.29152 -1.54929) -2.365385 0.059820 \n",
+ "4 2004 153 POINT (1.04712 0.28730) -3.077907 -0.008102 \n",
+ "5 2005 167 POINT (-0.84866 -0.80568) -3.060988 -0.050673 \n",
+ "6 2006 168 POINT (-1.10011 0.54719) -2.418248 -0.013136 \n",
+ "7 2007 179 POINT (-0.52378 0.05342) -3.296214 0.006644 \n",
+ "8 2008 128 POINT (-0.78217 0.39899) -2.470182 0.020328 \n",
+ "9 2009 175 POINT (-1.15685 -0.01451) -2.493179 -0.025411 \n",
+ "\n",
+ " max \n",
+ "0 2.738706 \n",
+ "1 3.495607 \n",
+ "2 2.662745 \n",
+ "3 3.454617 \n",
+ "4 2.633713 \n",
+ "5 2.959994 \n",
+ "6 2.767561 \n",
+ "7 2.638542 \n",
+ "8 2.689228 \n",
+ "9 2.570942 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Eventually, we don't want a complex object like a pd.Series in our data\n",
+ "# so here we can do some feature extractions:\n",
+ "features = combined.temperature.apply(lambda s: s.agg(['min', 'mean', 'max']))\n",
+ "pd.concat([combined, features], axis=1).drop('temperature', axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# These feature extractions can now make use of the structure in the pandas\n",
+ "# series. It could also work on dataframes, or even on custom types\n",
+ "# e.g. \"ModisDataFrame\" which could have its own methods."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Sliding window feature extraction\n",
+ "\n",
+ "The second feature request relates to the fact that we'd like to be able to predict **during** the growing season, i.e. we don't always have access to the full weather timeseries.\n",
+ "To that end, we propose to evaluate our feature extractions in a \"sliding window manner\".\n",
+ "\n",
+ "Notice that if we had done the feature extraction in an earlier stage, this procedure would be more difficult."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current DOY: 30\n",
+ "Current DOY: 60\n",
+ "Current DOY: 90\n",
+ "Current DOY: 120\n",
+ "Current DOY: 150\n",
+ "Current DOY: 180\n",
+ "Current DOY: 210\n",
+ "Current DOY: 240\n",
+ "Current DOY: 270\n",
+ "Current DOY: 300\n",
+ "Current DOY: 330\n",
+ "Current DOY: 360\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " year \n",
+ " Days_until_firstbloom \n",
+ " geometry \n",
+ " min \n",
+ " mean \n",
+ " max \n",
+ " growing_degree_day \n",
+ " offset \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 99 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " -1.730910 \n",
+ " -0.162147 \n",
+ " 1.651318 \n",
+ " 21.555756 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 106 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " -2.350626 \n",
+ " 0.058488 \n",
+ " 1.225816 \n",
+ " 22.582670 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 123 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " -1.573482 \n",
+ " 0.323914 \n",
+ " 2.342201 \n",
+ " 28.081309 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 105 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " -2.365385 \n",
+ " 0.349394 \n",
+ " 2.942407 \n",
+ " 31.086575 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 123 \n",
+ " POINT (1.04712 0.28730) \n",
+ " -2.634010 \n",
+ " 0.100050 \n",
+ " 1.730784 \n",
+ " 28.519667 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 137 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " -1.236073 \n",
+ " 0.520351 \n",
+ " 2.663564 \n",
+ " 29.219836 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 138 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " -2.104158 \n",
+ " -0.145457 \n",
+ " 2.123773 \n",
+ " 23.504370 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 149 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " -1.411370 \n",
+ " 0.138744 \n",
+ " 1.540947 \n",
+ " 17.021493 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 98 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " -1.985387 \n",
+ " 0.358649 \n",
+ " 2.126795 \n",
+ " 24.811240 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 145 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " -2.289814 \n",
+ " 0.177645 \n",
+ " 1.981664 \n",
+ " 23.086738 \n",
+ " 30 \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 69 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " -2.679822 \n",
+ " -0.268477 \n",
+ " 1.651318 \n",
+ " 47.154256 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 76 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " -1.905146 \n",
+ " -0.271571 \n",
+ " 1.438610 \n",
+ " 47.434034 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 93 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " -2.556728 \n",
+ " -0.118921 \n",
+ " 1.763059 \n",
+ " 51.212831 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 75 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " -2.344805 \n",
+ " -0.589733 \n",
+ " 0.971137 \n",
+ " 56.817790 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 93 \n",
+ " POINT (1.04712 0.28730) \n",
+ " -1.667008 \n",
+ " 0.095689 \n",
+ " 1.675573 \n",
+ " 51.395087 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 107 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " -2.749049 \n",
+ " -0.138087 \n",
+ " 1.746457 \n",
+ " 51.807783 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 108 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " -2.418248 \n",
+ " -0.169521 \n",
+ " 2.239476 \n",
+ " 45.974837 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 119 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " -1.877488 \n",
+ " 0.032869 \n",
+ " 2.339075 \n",
+ " 48.909758 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 68 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " -1.794195 \n",
+ " 0.102334 \n",
+ " 1.665198 \n",
+ " 48.141322 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 115 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " -1.576640 \n",
+ " -0.397232 \n",
+ " 0.890273 \n",
+ " 40.782806 \n",
+ " 60 \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 39 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " -2.510686 \n",
+ " -0.024325 \n",
+ " 2.738706 \n",
+ " 72.572063 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 46 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " -2.083939 \n",
+ " 0.110830 \n",
+ " 2.237018 \n",
+ " 74.145259 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 63 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " -2.687842 \n",
+ " 0.256801 \n",
+ " 2.318554 \n",
+ " 77.993194 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 45 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " -1.923695 \n",
+ " 0.315876 \n",
+ " 2.853286 \n",
+ " 86.059123 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 63 \n",
+ " POINT (1.04712 0.28730) \n",
+ " -1.619006 \n",
+ " -0.322039 \n",
+ " 2.633713 \n",
+ " 76.107665 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 77 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " -1.453476 \n",
+ " 0.087559 \n",
+ " 2.959994 \n",
+ " 75.045232 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 78 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " -1.770168 \n",
+ " 0.251152 \n",
+ " 2.327559 \n",
+ " 71.546147 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 89 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " -2.003270 \n",
+ " 0.273057 \n",
+ " 2.339075 \n",
+ " 74.034425 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 38 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " -2.178999 \n",
+ " 0.009964 \n",
+ " 2.333445 \n",
+ " 76.151754 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 85 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " -2.448074 \n",
+ " -0.123826 \n",
+ " 1.726888 \n",
+ " 62.903792 \n",
+ " 90 \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " 9 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " -1.759288 \n",
+ " 0.255768 \n",
+ " 2.280133 \n",
+ " 96.333066 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " 16 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " -1.687264 \n",
+ " -0.048790 \n",
+ " 1.425064 \n",
+ " 95.158674 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 33 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " -2.558622 \n",
+ " -0.553904 \n",
+ " 1.672368 \n",
+ " 112.108506 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " 15 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " -1.367312 \n",
+ " 0.266562 \n",
+ " 2.853286 \n",
+ " 109.082099 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 33 \n",
+ " POINT (1.04712 0.28730) \n",
+ " -3.077907 \n",
+ " -0.131941 \n",
+ " 1.701810 \n",
+ " 100.203567 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 47 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " -1.707630 \n",
+ " 0.083315 \n",
+ " 2.419678 \n",
+ " 98.394958 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 48 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " -1.522845 \n",
+ " 0.009362 \n",
+ " 2.054160 \n",
+ " 91.635259 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 59 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " -2.154758 \n",
+ " 0.097271 \n",
+ " 2.405996 \n",
+ " 99.643757 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " 8 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " -2.249100 \n",
+ " -0.105677 \n",
+ " 2.292550 \n",
+ " 101.847952 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 55 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " -1.785297 \n",
+ " 0.129849 \n",
+ " 1.762686 \n",
+ " 82.931286 \n",
+ " 120 \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2000 \n",
+ " -21 \n",
+ " POINT (-0.47519 0.04296) \n",
+ " -2.484236 \n",
+ " -0.165396 \n",
+ " 1.972587 \n",
+ " 123.360710 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2001 \n",
+ " -14 \n",
+ " POINT (-1.48409 -0.50742) \n",
+ " -1.515987 \n",
+ " 0.308416 \n",
+ " 2.317104 \n",
+ " 122.714397 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2002 \n",
+ " 3 \n",
+ " POINT (0.94426 -0.21199) \n",
+ " -1.372304 \n",
+ " -0.094755 \n",
+ " 2.080179 \n",
+ " 135.090858 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2003 \n",
+ " -15 \n",
+ " POINT (0.29152 -1.54929) \n",
+ " -2.341702 \n",
+ " 0.068816 \n",
+ " 2.686143 \n",
+ " 136.355286 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " 3 \n",
+ " POINT (1.04712 0.28730) \n",
+ " -1.977684 \n",
+ " 0.037744 \n",
+ " 2.232353 \n",
+ " 125.372210 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2005 \n",
+ " 17 \n",
+ " POINT (-0.84866 -0.80568) \n",
+ " -2.741930 \n",
+ " -0.375609 \n",
+ " 1.989916 \n",
+ " 125.594786 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2006 \n",
+ " 18 \n",
+ " POINT (-1.10011 0.54719) \n",
+ " -1.330190 \n",
+ " -0.000005 \n",
+ " 1.929659 \n",
+ " 111.302583 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2007 \n",
+ " 29 \n",
+ " POINT (-0.52378 0.05342) \n",
+ " -2.265239 \n",
+ " -0.140482 \n",
+ " 1.872427 \n",
+ " 122.516384 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2008 \n",
+ " -22 \n",
+ " POINT (-0.78217 0.39899) \n",
+ " -1.579945 \n",
+ " 0.066553 \n",
+ " 1.361647 \n",
+ " 123.346219 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2009 \n",
+ " 25 \n",
+ " POINT (-1.15685 -0.01451) \n",
+ " -2.278652 \n",
+ " -0.072205 \n",
+ " 1.526836 \n",
+ " 104.627512 \n",
+ " 150 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year Days_until_firstbloom geometry min mean \\\n",
+ "0 2000 99 POINT (-0.47519 0.04296) -1.730910 -0.162147 \n",
+ "1 2001 106 POINT (-1.48409 -0.50742) -2.350626 0.058488 \n",
+ "2 2002 123 POINT (0.94426 -0.21199) -1.573482 0.323914 \n",
+ "3 2003 105 POINT (0.29152 -1.54929) -2.365385 0.349394 \n",
+ "4 2004 123 POINT (1.04712 0.28730) -2.634010 0.100050 \n",
+ "5 2005 137 POINT (-0.84866 -0.80568) -1.236073 0.520351 \n",
+ "6 2006 138 POINT (-1.10011 0.54719) -2.104158 -0.145457 \n",
+ "7 2007 149 POINT (-0.52378 0.05342) -1.411370 0.138744 \n",
+ "8 2008 98 POINT (-0.78217 0.39899) -1.985387 0.358649 \n",
+ "9 2009 145 POINT (-1.15685 -0.01451) -2.289814 0.177645 \n",
+ "0 2000 69 POINT (-0.47519 0.04296) -2.679822 -0.268477 \n",
+ "1 2001 76 POINT (-1.48409 -0.50742) -1.905146 -0.271571 \n",
+ "2 2002 93 POINT (0.94426 -0.21199) -2.556728 -0.118921 \n",
+ "3 2003 75 POINT (0.29152 -1.54929) -2.344805 -0.589733 \n",
+ "4 2004 93 POINT (1.04712 0.28730) -1.667008 0.095689 \n",
+ "5 2005 107 POINT (-0.84866 -0.80568) -2.749049 -0.138087 \n",
+ "6 2006 108 POINT (-1.10011 0.54719) -2.418248 -0.169521 \n",
+ "7 2007 119 POINT (-0.52378 0.05342) -1.877488 0.032869 \n",
+ "8 2008 68 POINT (-0.78217 0.39899) -1.794195 0.102334 \n",
+ "9 2009 115 POINT (-1.15685 -0.01451) -1.576640 -0.397232 \n",
+ "0 2000 39 POINT (-0.47519 0.04296) -2.510686 -0.024325 \n",
+ "1 2001 46 POINT (-1.48409 -0.50742) -2.083939 0.110830 \n",
+ "2 2002 63 POINT (0.94426 -0.21199) -2.687842 0.256801 \n",
+ "3 2003 45 POINT (0.29152 -1.54929) -1.923695 0.315876 \n",
+ "4 2004 63 POINT (1.04712 0.28730) -1.619006 -0.322039 \n",
+ "5 2005 77 POINT (-0.84866 -0.80568) -1.453476 0.087559 \n",
+ "6 2006 78 POINT (-1.10011 0.54719) -1.770168 0.251152 \n",
+ "7 2007 89 POINT (-0.52378 0.05342) -2.003270 0.273057 \n",
+ "8 2008 38 POINT (-0.78217 0.39899) -2.178999 0.009964 \n",
+ "9 2009 85 POINT (-1.15685 -0.01451) -2.448074 -0.123826 \n",
+ "0 2000 9 POINT (-0.47519 0.04296) -1.759288 0.255768 \n",
+ "1 2001 16 POINT (-1.48409 -0.50742) -1.687264 -0.048790 \n",
+ "2 2002 33 POINT (0.94426 -0.21199) -2.558622 -0.553904 \n",
+ "3 2003 15 POINT (0.29152 -1.54929) -1.367312 0.266562 \n",
+ "4 2004 33 POINT (1.04712 0.28730) -3.077907 -0.131941 \n",
+ "5 2005 47 POINT (-0.84866 -0.80568) -1.707630 0.083315 \n",
+ "6 2006 48 POINT (-1.10011 0.54719) -1.522845 0.009362 \n",
+ "7 2007 59 POINT (-0.52378 0.05342) -2.154758 0.097271 \n",
+ "8 2008 8 POINT (-0.78217 0.39899) -2.249100 -0.105677 \n",
+ "9 2009 55 POINT (-1.15685 -0.01451) -1.785297 0.129849 \n",
+ "0 2000 -21 POINT (-0.47519 0.04296) -2.484236 -0.165396 \n",
+ "1 2001 -14 POINT (-1.48409 -0.50742) -1.515987 0.308416 \n",
+ "2 2002 3 POINT (0.94426 -0.21199) -1.372304 -0.094755 \n",
+ "3 2003 -15 POINT (0.29152 -1.54929) -2.341702 0.068816 \n",
+ "4 2004 3 POINT (1.04712 0.28730) -1.977684 0.037744 \n",
+ "5 2005 17 POINT (-0.84866 -0.80568) -2.741930 -0.375609 \n",
+ "6 2006 18 POINT (-1.10011 0.54719) -1.330190 -0.000005 \n",
+ "7 2007 29 POINT (-0.52378 0.05342) -2.265239 -0.140482 \n",
+ "8 2008 -22 POINT (-0.78217 0.39899) -1.579945 0.066553 \n",
+ "9 2009 25 POINT (-1.15685 -0.01451) -2.278652 -0.072205 \n",
+ "\n",
+ " max growing_degree_day offset \n",
+ "0 1.651318 21.555756 30 \n",
+ "1 1.225816 22.582670 30 \n",
+ "2 2.342201 28.081309 30 \n",
+ "3 2.942407 31.086575 30 \n",
+ "4 1.730784 28.519667 30 \n",
+ "5 2.663564 29.219836 30 \n",
+ "6 2.123773 23.504370 30 \n",
+ "7 1.540947 17.021493 30 \n",
+ "8 2.126795 24.811240 30 \n",
+ "9 1.981664 23.086738 30 \n",
+ "0 1.651318 47.154256 60 \n",
+ "1 1.438610 47.434034 60 \n",
+ "2 1.763059 51.212831 60 \n",
+ "3 0.971137 56.817790 60 \n",
+ "4 1.675573 51.395087 60 \n",
+ "5 1.746457 51.807783 60 \n",
+ "6 2.239476 45.974837 60 \n",
+ "7 2.339075 48.909758 60 \n",
+ "8 1.665198 48.141322 60 \n",
+ "9 0.890273 40.782806 60 \n",
+ "0 2.738706 72.572063 90 \n",
+ "1 2.237018 74.145259 90 \n",
+ "2 2.318554 77.993194 90 \n",
+ "3 2.853286 86.059123 90 \n",
+ "4 2.633713 76.107665 90 \n",
+ "5 2.959994 75.045232 90 \n",
+ "6 2.327559 71.546147 90 \n",
+ "7 2.339075 74.034425 90 \n",
+ "8 2.333445 76.151754 90 \n",
+ "9 1.726888 62.903792 90 \n",
+ "0 2.280133 96.333066 120 \n",
+ "1 1.425064 95.158674 120 \n",
+ "2 1.672368 112.108506 120 \n",
+ "3 2.853286 109.082099 120 \n",
+ "4 1.701810 100.203567 120 \n",
+ "5 2.419678 98.394958 120 \n",
+ "6 2.054160 91.635259 120 \n",
+ "7 2.405996 99.643757 120 \n",
+ "8 2.292550 101.847952 120 \n",
+ "9 1.762686 82.931286 120 \n",
+ "0 1.972587 123.360710 150 \n",
+ "1 2.317104 122.714397 150 \n",
+ "2 2.080179 135.090858 150 \n",
+ "3 2.686143 136.355286 150 \n",
+ "4 2.232353 125.372210 150 \n",
+ "5 1.989916 125.594786 150 \n",
+ "6 1.929659 111.302583 150 \n",
+ "7 1.872427 122.516384 150 \n",
+ "8 1.361647 123.346219 150 \n",
+ "9 1.526836 104.627512 150 "
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "window_size = 30\n",
+ "copies = []\n",
+ "for doy in range(window_size, 365, window_size):\n",
+ " print(f\"Current DOY: {doy}\")\n",
+ "\n",
+ " # Make doy relative to \"current\" day\n",
+ " data_copy = combined.copy().rename({'DOY_firstbloom': 'Days_until_firstbloom'}, axis=1)\n",
+ " data_copy.Days_until_firstbloom -= doy\n",
+ "\n",
+ " # Extract features relevant at \"current\" day in the growing season\n",
+ " features = data_copy.temperature.apply(lambda s: s.loc[(doy-window_size):doy].agg(['min', 'mean', 'max']))\n",
+ " gdd_feature = data_copy.temperature.apply(lambda s: s.abs().cumsum().loc[doy]).rename('growing_degree_day')\n",
+ "\n",
+ " # Combine time-specific features with time-specific target variable\n",
+ " combined_copy = pd.concat([data_copy, features, gdd_feature], axis=1).drop('temperature', axis=1) \n",
+ " combined_copy['offset'] = doy\n",
+ " copies.append(combined_copy)\n",
+ "\n",
+ "final_df = pd.concat(copies)\n",
+ "final_df.head(50)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "final_df.plot.scatter(x='growing_degree_day', y='Days_until_firstbloom')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "springtime",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.0"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}