###########################################################################################
# R script wth simplified random walk model for in-canopy transfer modelling of trace gases
# 2018-10-17 - simple version as support for Master 2 Clues course
# Copyright B. Loubet Benjamin.Loubet@inra.fr
###########################################################################################



#=====================================================================================
# libraries
#=====================================================================================
library(chron)
library(tidyr)

#=====================================================================================
# external dependecies
#=====================================================================================

source('Atmosphere_SurfaceResistances.R')
source('DespikeBen.R')
source('Graph_hourly_data_general_v3.R')
source('RandomWalk_model_2018_function.R')

#=====================================================================================
# load one dataset and set controls on ouptuts
# dataset from the COV3ER ADEME project number 1562C0032. 17 October 2018
# Copyright INRA, B. Loubet Benjamin.Loubet@inra.fr
# dataset of 
# - methanol fluxes measured over an oilseed rape Canopy with a PTR-Qi-TOF-MS
# - methanol profiles measured in an oilseed rape Canopy with a PTR-Qi-TOF-MS
# - ICOS datasets
#=====================================================================================

#either load it manually
load("m33.033.Rdata")

#or get it from a calling R script
#MergedDataBase is then set before sourcing this file

#output controls (to control outputs). Comment if sourcing this script
#outputC = 'micromet'
#outputC = 'VOCs'

#=====================================================================================
# filter dataset for NAs
#=====================================================================================
#filter NA in Campbells
filtercampbelldata=(MergedDataBase==-6999) | (MergedDataBase==-9999)
MergedDataBase[filtercampbelldata]=NA

#filter for non chamber data
filterchamber=(MergedDataBase$ValveINRAChambers !=0)
is.na(filterchamber)=F
MergedDataBase=MergedDataBase[filterchamber,]

#filter NA in dates
filterDates=is.na(MergedDataBase$Date)
MergedDataBase=MergedDataBase[!filterDates,]

#drop unused levels
MergedDataBase=droplevels(MergedDataBase)
Names=names(MergedDataBase)
summary(MergedDataBase)

# To unselect PTR parameters
filterPTR=grepl("Means", Names)


#=====================================================================================
# Eddy Covariance data set
#=====================================================================================

filterECmast = (MergedDataBase$ValvePTR==3)
ECdataset=MergedDataBase[filterECmast,!filterPTR]
MixingRatios  = ECdataset$Profile.MixingRatios_VOCs
Vd = -100*ECdataset$Fluxes_VOC_EC  * 0.0224 / MixingRatios #flux in nmol m-2 s-1 -> ppb m s-1 : * air molar volume = 0.0224 m3 mol-1
# max deposition velocity (cm s-1)
VmaxH2O= Vmax(ustar = ECdataset$Ustar..m.s.1.,
              U_at_z=ECdataset$wind_speed,
              z0=1.3*0.1,
              Species='H2O')*100

ECdataset$z_L = 2 /ECdataset$LObukhov # stability parameter assuming z-d = 2 m
#ECdataset$MixingRatios_VOCs_withour0=ECdataset$Profile.MixingRatios_VOCs+ECdataset$Calib.MixingRatios_VOCs
ECdataset$Vd_VOC_EC=Vd
ECdataset$Vmax=VmaxH2O
summary(ECdataset)

#=====================================================================================
# profile data
#=====================================================================================

filterprofile = ((!is.na(MergedDataBase$z)) & (MergedDataBase$ValveINRAChambers==1))
Profiledataset=MergedDataBase[filterprofile,!filterPTR]
Profiledataset=subset(Profiledataset,select=c(Date,Profile.MixingRatios_VOCs,z))#,ValvePTR,ValveINRAChambers))
Profiledataset=unique(Profiledataset)
Profiledataset=spread(Profiledataset,key='z',value=Profile.MixingRatios_VOCs)
summary(Profiledataset)


#merge datasets
Profiledataset=merge(subset(ECdataset,select=Date),Profiledataset,by='Date')
ECdatasetsamedatesAsprofiles=ECdataset[ECdataset$Date %in% Profiledataset$Date,]
ECdatasetsamedatesAsprofiles = cbind(ECdatasetsamedatesAsprofiles,Profiledataset[,-1])

#=====================================================================================
# Calculate dates, hours, etc. 
#=====================================================================================
Date = ECdatasetsamedatesAsprofiles$Date
hours = as.integer(hours(ECdatasetsamedatesAsprofiles$Date))
DOY = as.integer(julian(ECdatasetsamedatesAsprofiles$Date,origin = as.POSIXct("2017-01-01", tz = "GMT")))
month = months(ECdatasetsamedatesAsprofiles$Date)
monthnb = as.numeric(format(ECdatasetsamedatesAsprofiles$Date,"%m"))
weeknb = as.numeric(format(ECdatasetsamedatesAsprofiles$Date,"%U"))
hourlydates=cut(ECdatasetsamedatesAsprofiles$Date,"hours")
timeinformation = data.frame(Date,hours,DOY,month,monthnb,weeknb,hourlydates)
weeknb.hours = as.factor(paste0(weeknb,'.',hours))




#=====================================================================================
# Inverse Modelling analysis
#=====================================================================================

# Set canopy heights, zo and d
ECdatasetsamedatesAsprofiles$hc=1.55
ECdatasetsamedatesAsprofiles$d=0.7*ECdatasetsamedatesAsprofiles$hc
ECdatasetsamedatesAsprofiles$zo=0.1*ECdatasetsamedatesAsprofiles$hc

#chose a week to perform calculations
#dateschosen = (weeknb %in% c(20))
dateschosen = T #-> uncomment for all weeks

weeknb.hours=(weeknb + hours/24)

db=aggregate(ECdatasetsamedatesAsprofiles[dateschosen,],
             by = list(weeknb.hours=weeknb.hours[dateschosen]),FUN=mean,na.rm=T)
profile=aggregate(Profiledataset[dateschosen,],
                  by = list(weeknb.hours=weeknb.hours[dateschosen]),FUN=mean,na.rm=T)
summary(db)

# Set vertical discretisation z_mesh[i]
zmeas = c(0.05,0.25,0.8,1.55,3.0) #m
zsources = c(0,0.15,0.5, 1, 1.55) #m
zmesh = seq(0,zmeas[5],0.1)       #m

# Calculate the dispersion matrix Dij based on a Randow walk model -> uncomment lines below if needed
# DIJ = RandomWalk_model_2018(ustar = db$Ustar,     #canopy height m s-1
#                             LMO=3/db$z_L,         #obukhov length m (3 = measurement height)
#                             hc= db$hc,            #canopy height m
#                             d = db$d,             #displacement height m
#                             zsources = zsources,  #source heights m
#                             z = zmesh)            #target heights m
# 
# save(x=DIJ,file='DIJ.test.Rdata')


#load an existing Dij (already calculated in a previous run)
load('DIJ.AllPeriod.Rdata')

# Calculate S[j] as solution of (C[j] - (zref)) = Dij * S[j] 
# C(zref) set to highest level in profile
dij = DIJ[1:4,1:4,] # DIJ limited to 4 layers
C = profile[,-c(1:2,7)]-profile[,7] # C[j] - C(zref)
S = C + NA #initiate S

# Inverse system (C[j] - (zref)) = Dij * S[j] 
# for each date
for (i in 1:dim(S)[1]){ # dim(S)[1] is time length
  if(any(is.na(C[i,]))){
    S[i,]=NA
  }else {
    a= lm(as.numeric(C[i,]) ~ dij[,,i] - 1)
    S[i,] = a$coefficients
  }
  
}

#print(S)
cumS = t(apply(S,1,cumsum))

#filter weeks
filterdates = db$weeknb.hours > 18 & db$weeknb.hours < 22

title=MergedDataBase$masses[1]
pdf(paste0(title,'.IncanopySources.pdf'),width=21,height=7)
sourceplot=data.frame(week.hours=as.numeric(db$weeknb.hours[filterdates]),
                      Profile=cumS[filterdates,],
                      EC=db$Fluxes_VOC_EC[filterdates],
                      Chamber = db$chamber.Emission[filterdates])

sourceplot = gather(data=sourceplot,key=height,value=Flux,-week.hours,-EC,-Chamber)
a=ggplot(sourceplot, aes(x=week.hours, y=Flux, fill=height)) + 
  geom_area()+
  ggtitle(title)+
  geom_line(aes(x=week.hours,y=EC),size=2)+
  geom_line(aes(x=week.hours,y=Chamber),size=2,color = "blue")+
  theme(text = element_text(size = 24)) # this will change all text size 
print(a)
dev.off()



