From 2a3568799a44236d64e82da0bbefcc82061eb7cc Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Fri, 22 Sep 2023 14:01:55 +0200 Subject: [PATCH] Create example notebook --- relative_training_data.ipynb | 2082 ++++++++++++++++++++++++++++++++++ 1 file changed, 2082 insertions(+) create mode 100644 relative_training_data.ipynb diff --git a/relative_training_data.ipynb b/relative_training_data.ipynb new file mode 100644 index 00000000..2666f348 --- /dev/null +++ b/relative_training_data.ipynb @@ -0,0 +1,2082 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fake data feature requests / MWE\n", + "\n", + "In this notebook we outline two main features we'd like to use in springtime.\n", + "To this end, we first make some fake sample data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import geopandas as gpd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearDOY_firstbloomgeometry
02000129POINT (-0.47519 0.04296)
12001136POINT (-1.48409 -0.50742)
22002153POINT (0.94426 -0.21199)
32003135POINT (0.29152 -1.54929)
42004153POINT (1.04712 0.28730)
52005167POINT (-0.84866 -0.80568)
62006168POINT (-1.10011 0.54719)
72007179POINT (-0.52378 0.05342)
82008128POINT (-0.78217 0.39899)
92009175POINT (-1.15685 -0.01451)
\n", + "
" + ], + "text/plain": [ + " year DOY_firstbloom geometry\n", + "0 2000 129 POINT (-0.47519 0.04296)\n", + "1 2001 136 POINT (-1.48409 -0.50742)\n", + "2 2002 153 POINT (0.94426 -0.21199)\n", + "3 2003 135 POINT (0.29152 -1.54929)\n", + "4 2004 153 POINT (1.04712 0.28730)\n", + "5 2005 167 POINT (-0.84866 -0.80568)\n", + "6 2006 168 POINT (-1.10011 0.54719)\n", + "7 2007 179 POINT (-0.52378 0.05342)\n", + "8 2008 128 POINT (-0.78217 0.39899)\n", + "9 2009 175 POINT (-1.15685 -0.01451)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We have random observations for 10 points:\n", + "obs = gpd.GeoDataFrame(\n", + " data = {\n", + " 'year': np.arange(2000, 2010), \n", + " 'DOY_firstbloom': np.random.randint(120, 180, size=10),\n", + " 'geometry': gpd.GeoSeries.from_xy(*np.random.randn(2, 10))\n", + " },\n", + ")\n", + "obs" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 -0.258835\n", + "2 -0.542725\n", + "3 0.544299\n", + "4 0.934947\n", + "5 -0.515771\n", + " ... \n", + "361 -0.008609\n", + "362 0.358386\n", + "363 -0.700696\n", + "364 -0.749247\n", + "365 -0.039406\n", + "Name: temperature, Length: 365, dtype: float64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# For each of these years/locations, we can retrieve temperature records for each DOY\n", + "get_temperature = lambda year, geometry: pd.Series(np.random.randn(365), index=np.arange(1, 366), name='temperature')\n", + "get_temperature(obs.geometry[0], 2000)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combining observations and weather data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
12345678910...356357358359360361362363364365
00.307213-1.7309101.142865-1.3297630.176960-0.0652940.480736-0.8713300.0007980.656874...-1.513503-0.2644861.1289681.5850380.7690730.223731-0.2719540.934556-0.3713210.527513
10.981330-0.6290550.1477750.177528-0.658201-0.5088740.0106160.0542720.0780301.134169...1.293825-1.5218190.562700-0.441175-0.6233040.5595020.194984-1.984875-1.6863390.896396
2-0.0641791.0317720.4267000.7859750.846932-1.5734820.9285170.015023-0.6408472.018753...-0.733125-0.889653-2.0836372.2193740.5127671.210230-0.257335-1.888650-0.897753-0.627711
3-0.287284-1.2719250.8787820.8539960.727292-2.365385-1.0795221.1527022.1598490.015764...-1.521924-1.431645-0.872895-0.025135-0.8340000.496014-0.605926-0.5537880.0502550.720794
4-0.835895-0.3713760.4801070.2928971.4424360.9491950.480992-1.092184-1.8957610.180935...-0.232243-0.401761-0.2221440.4803511.0924751.234285-0.249237-0.484025-0.531681-1.456187
50.2084900.354984-0.1525110.0981951.3864081.055924-1.034276-1.005105-0.9005210.395284...-2.1574070.280113-0.883831-1.3887010.5286750.267038-1.368319-1.0967921.0999920.396587
6-0.624989-2.104158-1.639389-0.117674-1.7289020.895439-1.0738100.188181-0.3303650.207237...-0.641100-1.031611-0.5607390.426600-0.0405210.2555120.110917-0.603269-0.4854030.088915
70.6582230.7196480.669712-0.2678930.2823300.0919970.5757040.075245-0.6162970.765266...0.766638-1.262343-0.449191-0.3801430.1225390.8581590.265301-1.452599-1.632551-0.077418
81.2161770.6080431.819014-0.205315-0.9244440.2145250.7137231.1697730.1436411.259080...-1.6604291.0976430.751109-0.3533750.7657190.048287-0.3638450.6982441.445932-0.277977
9-0.815981-0.3179100.5189291.6161090.3556420.211716-1.197219-0.1358240.5971200.388386...1.039741-0.6854641.0424121.0164261.344937-1.2472851.420088-0.006036-1.963635-0.336042
\n", + "

10 rows × 365 columns

\n", + "
" + ], + "text/plain": [ + " 1 2 3 4 5 6 7 \\\n", + "0 0.307213 -1.730910 1.142865 -1.329763 0.176960 -0.065294 0.480736 \n", + "1 0.981330 -0.629055 0.147775 0.177528 -0.658201 -0.508874 0.010616 \n", + "2 -0.064179 1.031772 0.426700 0.785975 0.846932 -1.573482 0.928517 \n", + "3 -0.287284 -1.271925 0.878782 0.853996 0.727292 -2.365385 -1.079522 \n", + "4 -0.835895 -0.371376 0.480107 0.292897 1.442436 0.949195 0.480992 \n", + "5 0.208490 0.354984 -0.152511 0.098195 1.386408 1.055924 -1.034276 \n", + "6 -0.624989 -2.104158 -1.639389 -0.117674 -1.728902 0.895439 -1.073810 \n", + "7 0.658223 0.719648 0.669712 -0.267893 0.282330 0.091997 0.575704 \n", + "8 1.216177 0.608043 1.819014 -0.205315 -0.924444 0.214525 0.713723 \n", + "9 -0.815981 -0.317910 0.518929 1.616109 0.355642 0.211716 -1.197219 \n", + "\n", + " 8 9 10 ... 356 357 358 359 \\\n", + "0 -0.871330 0.000798 0.656874 ... -1.513503 -0.264486 1.128968 1.585038 \n", + "1 0.054272 0.078030 1.134169 ... 1.293825 -1.521819 0.562700 -0.441175 \n", + "2 0.015023 -0.640847 2.018753 ... -0.733125 -0.889653 -2.083637 2.219374 \n", + "3 1.152702 2.159849 0.015764 ... -1.521924 -1.431645 -0.872895 -0.025135 \n", + "4 -1.092184 -1.895761 0.180935 ... -0.232243 -0.401761 -0.222144 0.480351 \n", + "5 -1.005105 -0.900521 0.395284 ... -2.157407 0.280113 -0.883831 -1.388701 \n", + "6 0.188181 -0.330365 0.207237 ... -0.641100 -1.031611 -0.560739 0.426600 \n", + "7 0.075245 -0.616297 0.765266 ... 0.766638 -1.262343 -0.449191 -0.380143 \n", + "8 1.169773 0.143641 1.259080 ... -1.660429 1.097643 0.751109 -0.353375 \n", + "9 -0.135824 0.597120 0.388386 ... 1.039741 -0.685464 1.042412 1.016426 \n", + "\n", + " 360 361 362 363 364 365 \n", + "0 0.769073 0.223731 -0.271954 0.934556 -0.371321 0.527513 \n", + "1 -0.623304 0.559502 0.194984 -1.984875 -1.686339 0.896396 \n", + "2 0.512767 1.210230 -0.257335 -1.888650 -0.897753 -0.627711 \n", + "3 -0.834000 0.496014 -0.605926 -0.553788 0.050255 0.720794 \n", + "4 1.092475 1.234285 -0.249237 -0.484025 -0.531681 -1.456187 \n", + "5 0.528675 0.267038 -1.368319 -1.096792 1.099992 0.396587 \n", + "6 -0.040521 0.255512 0.110917 -0.603269 -0.485403 0.088915 \n", + "7 0.122539 0.858159 0.265301 -1.452599 -1.632551 -0.077418 \n", + "8 0.765719 0.048287 -0.363845 0.698244 1.445932 -0.277977 \n", + "9 1.344937 -1.247285 1.420088 -0.006036 -1.963635 -0.336042 \n", + "\n", + "[10 rows x 365 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We can transpose the data and combine it with the \"observations\"\n", + "weather = obs.apply(lambda row: get_temperature(row.year, row.geometry), axis=1)\n", + "weather" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearDOY_firstbloomgeometry1234567...356357358359360361362363364365
02000129POINT (-0.47519 0.04296)0.307213-1.7309101.142865-1.3297630.176960-0.0652940.480736...-1.513503-0.2644861.1289681.5850380.7690730.223731-0.2719540.934556-0.3713210.527513
12001136POINT (-1.48409 -0.50742)0.981330-0.6290550.1477750.177528-0.658201-0.5088740.010616...1.293825-1.5218190.562700-0.441175-0.6233040.5595020.194984-1.984875-1.6863390.896396
22002153POINT (0.94426 -0.21199)-0.0641791.0317720.4267000.7859750.846932-1.5734820.928517...-0.733125-0.889653-2.0836372.2193740.5127671.210230-0.257335-1.888650-0.897753-0.627711
32003135POINT (0.29152 -1.54929)-0.287284-1.2719250.8787820.8539960.727292-2.365385-1.079522...-1.521924-1.431645-0.872895-0.025135-0.8340000.496014-0.605926-0.5537880.0502550.720794
42004153POINT (1.04712 0.28730)-0.835895-0.3713760.4801070.2928971.4424360.9491950.480992...-0.232243-0.401761-0.2221440.4803511.0924751.234285-0.249237-0.484025-0.531681-1.456187
52005167POINT (-0.84866 -0.80568)0.2084900.354984-0.1525110.0981951.3864081.055924-1.034276...-2.1574070.280113-0.883831-1.3887010.5286750.267038-1.368319-1.0967921.0999920.396587
62006168POINT (-1.10011 0.54719)-0.624989-2.104158-1.639389-0.117674-1.7289020.895439-1.073810...-0.641100-1.031611-0.5607390.426600-0.0405210.2555120.110917-0.603269-0.4854030.088915
72007179POINT (-0.52378 0.05342)0.6582230.7196480.669712-0.2678930.2823300.0919970.575704...0.766638-1.262343-0.449191-0.3801430.1225390.8581590.265301-1.452599-1.632551-0.077418
82008128POINT (-0.78217 0.39899)1.2161770.6080431.819014-0.205315-0.9244440.2145250.713723...-1.6604291.0976430.751109-0.3533750.7657190.048287-0.3638450.6982441.445932-0.277977
92009175POINT (-1.15685 -0.01451)-0.815981-0.3179100.5189291.6161090.3556420.211716-1.197219...1.039741-0.6854641.0424121.0164261.344937-1.2472851.420088-0.006036-1.963635-0.336042
\n", + "

10 rows × 368 columns

\n", + "
" + ], + "text/plain": [ + " year DOY_firstbloom geometry 1 2 \\\n", + "0 2000 129 POINT (-0.47519 0.04296) 0.307213 -1.730910 \n", + "1 2001 136 POINT (-1.48409 -0.50742) 0.981330 -0.629055 \n", + "2 2002 153 POINT (0.94426 -0.21199) -0.064179 1.031772 \n", + "3 2003 135 POINT (0.29152 -1.54929) -0.287284 -1.271925 \n", + "4 2004 153 POINT (1.04712 0.28730) -0.835895 -0.371376 \n", + "5 2005 167 POINT (-0.84866 -0.80568) 0.208490 0.354984 \n", + "6 2006 168 POINT (-1.10011 0.54719) -0.624989 -2.104158 \n", + "7 2007 179 POINT (-0.52378 0.05342) 0.658223 0.719648 \n", + "8 2008 128 POINT (-0.78217 0.39899) 1.216177 0.608043 \n", + "9 2009 175 POINT (-1.15685 -0.01451) -0.815981 -0.317910 \n", + "\n", + " 3 4 5 6 7 ... 356 357 \\\n", + "0 1.142865 -1.329763 0.176960 -0.065294 0.480736 ... -1.513503 -0.264486 \n", + "1 0.147775 0.177528 -0.658201 -0.508874 0.010616 ... 1.293825 -1.521819 \n", + "2 0.426700 0.785975 0.846932 -1.573482 0.928517 ... -0.733125 -0.889653 \n", + "3 0.878782 0.853996 0.727292 -2.365385 -1.079522 ... -1.521924 -1.431645 \n", + "4 0.480107 0.292897 1.442436 0.949195 0.480992 ... -0.232243 -0.401761 \n", + "5 -0.152511 0.098195 1.386408 1.055924 -1.034276 ... -2.157407 0.280113 \n", + "6 -1.639389 -0.117674 -1.728902 0.895439 -1.073810 ... -0.641100 -1.031611 \n", + "7 0.669712 -0.267893 0.282330 0.091997 0.575704 ... 0.766638 -1.262343 \n", + "8 1.819014 -0.205315 -0.924444 0.214525 0.713723 ... -1.660429 1.097643 \n", + "9 0.518929 1.616109 0.355642 0.211716 -1.197219 ... 1.039741 -0.685464 \n", + "\n", + " 358 359 360 361 362 363 364 \\\n", + "0 1.128968 1.585038 0.769073 0.223731 -0.271954 0.934556 -0.371321 \n", + "1 0.562700 -0.441175 -0.623304 0.559502 0.194984 -1.984875 -1.686339 \n", + "2 -2.083637 2.219374 0.512767 1.210230 -0.257335 -1.888650 -0.897753 \n", + "3 -0.872895 -0.025135 -0.834000 0.496014 -0.605926 -0.553788 0.050255 \n", + "4 -0.222144 0.480351 1.092475 1.234285 -0.249237 -0.484025 -0.531681 \n", + "5 -0.883831 -1.388701 0.528675 0.267038 -1.368319 -1.096792 1.099992 \n", + "6 -0.560739 0.426600 -0.040521 0.255512 0.110917 -0.603269 -0.485403 \n", + "7 -0.449191 -0.380143 0.122539 0.858159 0.265301 -1.452599 -1.632551 \n", + "8 0.751109 -0.353375 0.765719 0.048287 -0.363845 0.698244 1.445932 \n", + "9 1.042412 1.016426 1.344937 -1.247285 1.420088 -0.006036 -1.963635 \n", + "\n", + " 365 \n", + "0 0.527513 \n", + "1 0.896396 \n", + "2 -0.627711 \n", + "3 0.720794 \n", + "4 -1.456187 \n", + "5 0.396587 \n", + "6 0.088915 \n", + "7 -0.077418 \n", + "8 -0.277977 \n", + "9 -0.336042 \n", + "\n", + "[10 rows x 368 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined = pd.concat([obs, weather], axis=1)\n", + "combined" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearDOY_firstbloomgeometrytemperature
02000129POINT (-0.47519 0.04296)0 0.307213\n", + "1 -1.730910\n", + "2 1.14286...
12001136POINT (-1.48409 -0.50742)0 0.981330\n", + "1 -0.629055\n", + "2 0.14777...
22002153POINT (0.94426 -0.21199)0 -0.064179\n", + "1 1.031772\n", + "2 0.42670...
32003135POINT (0.29152 -1.54929)0 -0.287284\n", + "1 -1.271925\n", + "2 0.87878...
42004153POINT (1.04712 0.28730)0 -0.835895\n", + "1 -0.371376\n", + "2 0.48010...
52005167POINT (-0.84866 -0.80568)0 0.208490\n", + "1 0.354984\n", + "2 -0.15251...
62006168POINT (-1.10011 0.54719)0 -0.624989\n", + "1 -2.104158\n", + "2 -1.63938...
72007179POINT (-0.52378 0.05342)0 0.658223\n", + "1 0.719648\n", + "2 0.66971...
82008128POINT (-0.78217 0.39899)0 1.216177\n", + "1 0.608043\n", + "2 1.81901...
92009175POINT (-1.15685 -0.01451)0 -0.815981\n", + "1 -0.317910\n", + "2 0.51892...
\n", + "
" + ], + "text/plain": [ + " year DOY_firstbloom geometry \\\n", + "0 2000 129 POINT (-0.47519 0.04296) \n", + "1 2001 136 POINT (-1.48409 -0.50742) \n", + "2 2002 153 POINT (0.94426 -0.21199) \n", + "3 2003 135 POINT (0.29152 -1.54929) \n", + "4 2004 153 POINT (1.04712 0.28730) \n", + "5 2005 167 POINT (-0.84866 -0.80568) \n", + "6 2006 168 POINT (-1.10011 0.54719) \n", + "7 2007 179 POINT (-0.52378 0.05342) \n", + "8 2008 128 POINT (-0.78217 0.39899) \n", + "9 2009 175 POINT (-1.15685 -0.01451) \n", + "\n", + " temperature \n", + "0 0 0.307213\n", + "1 -1.730910\n", + "2 1.14286... \n", + "1 0 0.981330\n", + "1 -0.629055\n", + "2 0.14777... \n", + "2 0 -0.064179\n", + "1 1.031772\n", + "2 0.42670... \n", + "3 0 -0.287284\n", + "1 -1.271925\n", + "2 0.87878... \n", + "4 0 -0.835895\n", + "1 -0.371376\n", + "2 0.48010... \n", + "5 0 0.208490\n", + "1 0.354984\n", + "2 -0.15251... \n", + "6 0 -0.624989\n", + "1 -2.104158\n", + "2 -1.63938... \n", + "7 0 0.658223\n", + "1 0.719648\n", + "2 0.66971... \n", + "8 0 1.216177\n", + "1 0.608043\n", + "2 1.81901... \n", + "9 0 -0.815981\n", + "1 -0.317910\n", + "2 0.51892... " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# However, having so many columns is not good: we want to do feature extraction to reduce the number of columns\n", + "# Also, what if we have multiple variables? We need to make compound column names that are difficult to work with.\n", + "# How can we do this differently?\n", + "\n", + "# One option is to do the feature extraction before combining the dataframes.\n", + "# Or, we could just put more complex data types in the columns. The advantage of\n", + "# the latter is that we can use DOY_firstbloom in our feature extraction (see\n", + "# later on).\n", + "\n", + "# For example, we could insert weather as a pandas series:\n", + "combined = obs.assign(temperature=[pd.Series(v) for v in weather.values])\n", + "\n", + "# This is much more succinct:\n", + "combined" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearDOY_firstbloomgeometryminmeanmax
02000129POINT (-0.47519 0.04296)-2.679822-0.0018272.738706
12001136POINT (-1.48409 -0.50742)-2.4186530.0183803.495607
22002153POINT (0.94426 -0.21199)-3.731281-0.0062982.662745
32003135POINT (0.29152 -1.54929)-2.3653850.0598203.454617
42004153POINT (1.04712 0.28730)-3.077907-0.0081022.633713
52005167POINT (-0.84866 -0.80568)-3.060988-0.0506732.959994
62006168POINT (-1.10011 0.54719)-2.418248-0.0131362.767561
72007179POINT (-0.52378 0.05342)-3.2962140.0066442.638542
82008128POINT (-0.78217 0.39899)-2.4701820.0203282.689228
92009175POINT (-1.15685 -0.01451)-2.493179-0.0254112.570942
\n", + "
" + ], + "text/plain": [ + " year DOY_firstbloom geometry min mean \\\n", + "0 2000 129 POINT (-0.47519 0.04296) -2.679822 -0.001827 \n", + "1 2001 136 POINT (-1.48409 -0.50742) -2.418653 0.018380 \n", + "2 2002 153 POINT (0.94426 -0.21199) -3.731281 -0.006298 \n", + "3 2003 135 POINT (0.29152 -1.54929) -2.365385 0.059820 \n", + "4 2004 153 POINT (1.04712 0.28730) -3.077907 -0.008102 \n", + "5 2005 167 POINT (-0.84866 -0.80568) -3.060988 -0.050673 \n", + "6 2006 168 POINT (-1.10011 0.54719) -2.418248 -0.013136 \n", + "7 2007 179 POINT (-0.52378 0.05342) -3.296214 0.006644 \n", + "8 2008 128 POINT (-0.78217 0.39899) -2.470182 0.020328 \n", + "9 2009 175 POINT (-1.15685 -0.01451) -2.493179 -0.025411 \n", + "\n", + " max \n", + "0 2.738706 \n", + "1 3.495607 \n", + "2 2.662745 \n", + "3 3.454617 \n", + "4 2.633713 \n", + "5 2.959994 \n", + "6 2.767561 \n", + "7 2.638542 \n", + "8 2.689228 \n", + "9 2.570942 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Eventually, we don't want a complex object like a pd.Series in our data\n", + "# so here we can do some feature extractions:\n", + "features = combined.temperature.apply(lambda s: s.agg(['min', 'mean', 'max']))\n", + "pd.concat([combined, features], axis=1).drop('temperature', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# These feature extractions can now make use of the structure in the pandas\n", + "# series. It could also work on dataframes, or even on custom types\n", + "# e.g. \"ModisDataFrame\" which could have its own methods." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sliding window feature extraction\n", + "\n", + "The second feature request relates to the fact that we'd like to be able to predict **during** the growing season, i.e. we don't always have access to the full weather timeseries.\n", + "To that end, we propose to evaluate our feature extractions in a \"sliding window manner\".\n", + "\n", + "Notice that if we had done the feature extraction in an earlier stage, this procedure would be more difficult." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current DOY: 30\n", + "Current DOY: 60\n", + "Current DOY: 90\n", + "Current DOY: 120\n", + "Current DOY: 150\n", + "Current DOY: 180\n", + "Current DOY: 210\n", + "Current DOY: 240\n", + "Current DOY: 270\n", + "Current DOY: 300\n", + "Current DOY: 330\n", + "Current DOY: 360\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearDays_until_firstbloomgeometryminmeanmaxgrowing_degree_dayoffset
0200099POINT (-0.47519 0.04296)-1.730910-0.1621471.65131821.55575630
12001106POINT (-1.48409 -0.50742)-2.3506260.0584881.22581622.58267030
22002123POINT (0.94426 -0.21199)-1.5734820.3239142.34220128.08130930
32003105POINT (0.29152 -1.54929)-2.3653850.3493942.94240731.08657530
42004123POINT (1.04712 0.28730)-2.6340100.1000501.73078428.51966730
52005137POINT (-0.84866 -0.80568)-1.2360730.5203512.66356429.21983630
62006138POINT (-1.10011 0.54719)-2.104158-0.1454572.12377323.50437030
72007149POINT (-0.52378 0.05342)-1.4113700.1387441.54094717.02149330
8200898POINT (-0.78217 0.39899)-1.9853870.3586492.12679524.81124030
92009145POINT (-1.15685 -0.01451)-2.2898140.1776451.98166423.08673830
0200069POINT (-0.47519 0.04296)-2.679822-0.2684771.65131847.15425660
1200176POINT (-1.48409 -0.50742)-1.905146-0.2715711.43861047.43403460
2200293POINT (0.94426 -0.21199)-2.556728-0.1189211.76305951.21283160
3200375POINT (0.29152 -1.54929)-2.344805-0.5897330.97113756.81779060
4200493POINT (1.04712 0.28730)-1.6670080.0956891.67557351.39508760
52005107POINT (-0.84866 -0.80568)-2.749049-0.1380871.74645751.80778360
62006108POINT (-1.10011 0.54719)-2.418248-0.1695212.23947645.97483760
72007119POINT (-0.52378 0.05342)-1.8774880.0328692.33907548.90975860
8200868POINT (-0.78217 0.39899)-1.7941950.1023341.66519848.14132260
92009115POINT (-1.15685 -0.01451)-1.576640-0.3972320.89027340.78280660
0200039POINT (-0.47519 0.04296)-2.510686-0.0243252.73870672.57206390
1200146POINT (-1.48409 -0.50742)-2.0839390.1108302.23701874.14525990
2200263POINT (0.94426 -0.21199)-2.6878420.2568012.31855477.99319490
3200345POINT (0.29152 -1.54929)-1.9236950.3158762.85328686.05912390
4200463POINT (1.04712 0.28730)-1.619006-0.3220392.63371376.10766590
5200577POINT (-0.84866 -0.80568)-1.4534760.0875592.95999475.04523290
6200678POINT (-1.10011 0.54719)-1.7701680.2511522.32755971.54614790
7200789POINT (-0.52378 0.05342)-2.0032700.2730572.33907574.03442590
8200838POINT (-0.78217 0.39899)-2.1789990.0099642.33344576.15175490
9200985POINT (-1.15685 -0.01451)-2.448074-0.1238261.72688862.90379290
020009POINT (-0.47519 0.04296)-1.7592880.2557682.28013396.333066120
1200116POINT (-1.48409 -0.50742)-1.687264-0.0487901.42506495.158674120
2200233POINT (0.94426 -0.21199)-2.558622-0.5539041.672368112.108506120
3200315POINT (0.29152 -1.54929)-1.3673120.2665622.853286109.082099120
4200433POINT (1.04712 0.28730)-3.077907-0.1319411.701810100.203567120
5200547POINT (-0.84866 -0.80568)-1.7076300.0833152.41967898.394958120
6200648POINT (-1.10011 0.54719)-1.5228450.0093622.05416091.635259120
7200759POINT (-0.52378 0.05342)-2.1547580.0972712.40599699.643757120
820088POINT (-0.78217 0.39899)-2.249100-0.1056772.292550101.847952120
9200955POINT (-1.15685 -0.01451)-1.7852970.1298491.76268682.931286120
02000-21POINT (-0.47519 0.04296)-2.484236-0.1653961.972587123.360710150
12001-14POINT (-1.48409 -0.50742)-1.5159870.3084162.317104122.714397150
220023POINT (0.94426 -0.21199)-1.372304-0.0947552.080179135.090858150
32003-15POINT (0.29152 -1.54929)-2.3417020.0688162.686143136.355286150
420043POINT (1.04712 0.28730)-1.9776840.0377442.232353125.372210150
5200517POINT (-0.84866 -0.80568)-2.741930-0.3756091.989916125.594786150
6200618POINT (-1.10011 0.54719)-1.330190-0.0000051.929659111.302583150
7200729POINT (-0.52378 0.05342)-2.265239-0.1404821.872427122.516384150
82008-22POINT (-0.78217 0.39899)-1.5799450.0665531.361647123.346219150
9200925POINT (-1.15685 -0.01451)-2.278652-0.0722051.526836104.627512150
\n", + "
" + ], + "text/plain": [ + " year Days_until_firstbloom geometry min mean \\\n", + "0 2000 99 POINT (-0.47519 0.04296) -1.730910 -0.162147 \n", + "1 2001 106 POINT (-1.48409 -0.50742) -2.350626 0.058488 \n", + "2 2002 123 POINT (0.94426 -0.21199) -1.573482 0.323914 \n", + "3 2003 105 POINT (0.29152 -1.54929) -2.365385 0.349394 \n", + "4 2004 123 POINT (1.04712 0.28730) -2.634010 0.100050 \n", + "5 2005 137 POINT (-0.84866 -0.80568) -1.236073 0.520351 \n", + "6 2006 138 POINT (-1.10011 0.54719) -2.104158 -0.145457 \n", + "7 2007 149 POINT (-0.52378 0.05342) -1.411370 0.138744 \n", + "8 2008 98 POINT (-0.78217 0.39899) -1.985387 0.358649 \n", + "9 2009 145 POINT (-1.15685 -0.01451) -2.289814 0.177645 \n", + "0 2000 69 POINT (-0.47519 0.04296) -2.679822 -0.268477 \n", + "1 2001 76 POINT (-1.48409 -0.50742) -1.905146 -0.271571 \n", + "2 2002 93 POINT (0.94426 -0.21199) -2.556728 -0.118921 \n", + "3 2003 75 POINT (0.29152 -1.54929) -2.344805 -0.589733 \n", + "4 2004 93 POINT (1.04712 0.28730) -1.667008 0.095689 \n", + "5 2005 107 POINT (-0.84866 -0.80568) -2.749049 -0.138087 \n", + "6 2006 108 POINT (-1.10011 0.54719) -2.418248 -0.169521 \n", + "7 2007 119 POINT (-0.52378 0.05342) -1.877488 0.032869 \n", + "8 2008 68 POINT (-0.78217 0.39899) -1.794195 0.102334 \n", + "9 2009 115 POINT (-1.15685 -0.01451) -1.576640 -0.397232 \n", + "0 2000 39 POINT (-0.47519 0.04296) -2.510686 -0.024325 \n", + "1 2001 46 POINT (-1.48409 -0.50742) -2.083939 0.110830 \n", + "2 2002 63 POINT (0.94426 -0.21199) -2.687842 0.256801 \n", + "3 2003 45 POINT (0.29152 -1.54929) -1.923695 0.315876 \n", + "4 2004 63 POINT (1.04712 0.28730) -1.619006 -0.322039 \n", + "5 2005 77 POINT (-0.84866 -0.80568) -1.453476 0.087559 \n", + "6 2006 78 POINT (-1.10011 0.54719) -1.770168 0.251152 \n", + "7 2007 89 POINT (-0.52378 0.05342) -2.003270 0.273057 \n", + "8 2008 38 POINT (-0.78217 0.39899) -2.178999 0.009964 \n", + "9 2009 85 POINT (-1.15685 -0.01451) -2.448074 -0.123826 \n", + "0 2000 9 POINT (-0.47519 0.04296) -1.759288 0.255768 \n", + "1 2001 16 POINT (-1.48409 -0.50742) -1.687264 -0.048790 \n", + "2 2002 33 POINT (0.94426 -0.21199) -2.558622 -0.553904 \n", + "3 2003 15 POINT (0.29152 -1.54929) -1.367312 0.266562 \n", + "4 2004 33 POINT (1.04712 0.28730) -3.077907 -0.131941 \n", + "5 2005 47 POINT (-0.84866 -0.80568) -1.707630 0.083315 \n", + "6 2006 48 POINT (-1.10011 0.54719) -1.522845 0.009362 \n", + "7 2007 59 POINT (-0.52378 0.05342) -2.154758 0.097271 \n", + "8 2008 8 POINT (-0.78217 0.39899) -2.249100 -0.105677 \n", + "9 2009 55 POINT (-1.15685 -0.01451) -1.785297 0.129849 \n", + "0 2000 -21 POINT (-0.47519 0.04296) -2.484236 -0.165396 \n", + "1 2001 -14 POINT (-1.48409 -0.50742) -1.515987 0.308416 \n", + "2 2002 3 POINT (0.94426 -0.21199) -1.372304 -0.094755 \n", + "3 2003 -15 POINT (0.29152 -1.54929) -2.341702 0.068816 \n", + "4 2004 3 POINT (1.04712 0.28730) -1.977684 0.037744 \n", + "5 2005 17 POINT (-0.84866 -0.80568) -2.741930 -0.375609 \n", + "6 2006 18 POINT (-1.10011 0.54719) -1.330190 -0.000005 \n", + "7 2007 29 POINT (-0.52378 0.05342) -2.265239 -0.140482 \n", + "8 2008 -22 POINT (-0.78217 0.39899) -1.579945 0.066553 \n", + "9 2009 25 POINT (-1.15685 -0.01451) -2.278652 -0.072205 \n", + "\n", + " max growing_degree_day offset \n", + "0 1.651318 21.555756 30 \n", + "1 1.225816 22.582670 30 \n", + "2 2.342201 28.081309 30 \n", + "3 2.942407 31.086575 30 \n", + "4 1.730784 28.519667 30 \n", + "5 2.663564 29.219836 30 \n", + "6 2.123773 23.504370 30 \n", + "7 1.540947 17.021493 30 \n", + "8 2.126795 24.811240 30 \n", + "9 1.981664 23.086738 30 \n", + "0 1.651318 47.154256 60 \n", + "1 1.438610 47.434034 60 \n", + "2 1.763059 51.212831 60 \n", + "3 0.971137 56.817790 60 \n", + "4 1.675573 51.395087 60 \n", + "5 1.746457 51.807783 60 \n", + "6 2.239476 45.974837 60 \n", + "7 2.339075 48.909758 60 \n", + "8 1.665198 48.141322 60 \n", + "9 0.890273 40.782806 60 \n", + "0 2.738706 72.572063 90 \n", + "1 2.237018 74.145259 90 \n", + "2 2.318554 77.993194 90 \n", + "3 2.853286 86.059123 90 \n", + "4 2.633713 76.107665 90 \n", + "5 2.959994 75.045232 90 \n", + "6 2.327559 71.546147 90 \n", + "7 2.339075 74.034425 90 \n", + "8 2.333445 76.151754 90 \n", + "9 1.726888 62.903792 90 \n", + "0 2.280133 96.333066 120 \n", + "1 1.425064 95.158674 120 \n", + "2 1.672368 112.108506 120 \n", + "3 2.853286 109.082099 120 \n", + "4 1.701810 100.203567 120 \n", + "5 2.419678 98.394958 120 \n", + "6 2.054160 91.635259 120 \n", + "7 2.405996 99.643757 120 \n", + "8 2.292550 101.847952 120 \n", + "9 1.762686 82.931286 120 \n", + "0 1.972587 123.360710 150 \n", + "1 2.317104 122.714397 150 \n", + "2 2.080179 135.090858 150 \n", + "3 2.686143 136.355286 150 \n", + "4 2.232353 125.372210 150 \n", + "5 1.989916 125.594786 150 \n", + "6 1.929659 111.302583 150 \n", + "7 1.872427 122.516384 150 \n", + "8 1.361647 123.346219 150 \n", + "9 1.526836 104.627512 150 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "window_size = 30\n", + "copies = []\n", + "for doy in range(window_size, 365, window_size):\n", + " print(f\"Current DOY: {doy}\")\n", + "\n", + " # Make doy relative to \"current\" day\n", + " data_copy = combined.copy().rename({'DOY_firstbloom': 'Days_until_firstbloom'}, axis=1)\n", + " data_copy.Days_until_firstbloom -= doy\n", + "\n", + " # Extract features relevant at \"current\" day in the growing season\n", + " features = data_copy.temperature.apply(lambda s: s.loc[(doy-window_size):doy].agg(['min', 'mean', 'max']))\n", + " gdd_feature = data_copy.temperature.apply(lambda s: s.abs().cumsum().loc[doy]).rename('growing_degree_day')\n", + "\n", + " # Combine time-specific features with time-specific target variable\n", + " combined_copy = pd.concat([data_copy, features, gdd_feature], axis=1).drop('temperature', axis=1) \n", + " combined_copy['offset'] = doy\n", + " copies.append(combined_copy)\n", + "\n", + "final_df = pd.concat(copies)\n", + "final_df.head(50)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "final_df.plot.scatter(x='growing_degree_day', y='Days_until_firstbloom')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "springtime", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}