Sankeys

Code
knitr::opts_chunk$set(warning=F, message=F, results=F, dev='svg')

library(tidyverse) #loads multiple packages (see https://tidyverse.tidyverse.org/)

#core tidyverse packages loaded:
# ggplot2, for data visualisation. https://ggplot2.tidyverse.org/
# dplyr, for data manipulation. https://dplyr.tidyverse.org/
# tidyr, for data tidying. https://tidyr.tidyverse.org/
# readr, for data import. https://readr.tidyverse.org/
# purrr, for functional programming. https://purrr.tidyverse.org/
# tibble, for tibbles, a modern re-imagining of data frames. https://tibble.tidyverse.org/
# stringr, for strings. https://stringr.tidyverse.org/
# forcats, for factors. https://forcats.tidyverse.org/
# lubridate, for date/times. https://lubridate.tidyverse.org/

#also loads the following packages (less frequently used):
# Working with specific types of vectors:
#     hms, for times. https://hms.tidyverse.org/
# Importing other types of data:
#     feather, for sharing with Python and other languages. https://github.com/wesm/feather
#     haven, for SPSS, SAS and Stata files. https://haven.tidyverse.org/
#     httr, for web apis. https://httr.r-lib.org/
#     jsonlite for JSON. https://arxiv.org/abs/1403.2805
#     readxl, for .xls and .xlsx files. https://readxl.tidyverse.org/
#     rvest, for web scraping. https://rvest.tidyverse.org/
#     xml2, for XML. https://xml2.r-lib.org/
# Modelling
#     modelr, for modelling within a pipeline. https://modelr.tidyverse.org/
#     broom, for turning models into tidy data. https://broom.tidymodels.org/

# Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#loading relevant packages
library(networkD3) #for sankey
library(htmlwidgets) #for sankey
library(readxl) #to read excel file
library(cowplot) #for plot_grid, multiple plots

 #seed for reproducibility of random generation
set.seed(123)

#setting graphs theme
theme_set(
  theme_minimal() +
    theme(
      plot.title = element_text(face="bold")
      )
  )

#setting viridis theme for colors
scale_colour_continuous <- scale_colour_viridis_c
scale_colour_discrete   <- scale_colour_viridis_d
scale_colour_binned     <- scale_colour_viridis_b
#setting viridis theme for fill
scale_fill_continuous <- scale_fill_viridis_c
scale_fill_discrete   <- scale_fill_viridis_d
scale_fill_binned     <- scale_fill_viridis_b

Source <- "Source: \ncomputation by Thomas Starck"

# Load the function file
source("functions.R")

Load data

We load the data produced in the preparation pages, for each water agency basin and at the national scale: wastewater treatment plants inflows and outflows, industry discharge to sewers networks, French nutrient ingestion/excretion, share of population with individual autonomous system (not connected to sewers),

Code
#human ingestion/excretion
path_source <- "output_data/nutrient_ingestion_excretion/" #source of the data
#load France excretions
file_human_excretions <- read_csv(paste0(path_source, "excretions_human_france_kt_year.csv")) %>%
  mutate(basin = "Metropolitan France")
#load water agency basins excretions and merge with France excretions
file_human_excretions <- bind_rows(
  file_human_excretions,
  read_csv(paste0(path_source, "excretions_human_basins_kt_year.csv"))
)  

#large industries network discharge at water agency scale and national scale
path_source <- "output_data/industry_sewers_network_discharge/" #source of the data
file_industry_discharge <- read_csv(paste0(path_source, "industry_sewers_network_discharge_GEREP_basins.csv")) #load data 
#take the mean of the 2015-2020 period
file_industry_discharge <- file_industry_discharge %>%
  filter(Year>2014 & Year<2021) %>%
  group_by(basin) %>%
  summarise(
    across(
      c(
        NGL_in, NGL_out, Pt_in, Pt_out, 
        DBO5_in, DBO5_out, DCO_in, DCO_out, 
        MES_in, MES_out
      ), ~signif(mean(.x, na.rm=T), 3)
    )
  )

#inflow and outflows of WWTP, by basin and at the national scale
path_source <- "output_data/0_final_data/" #source of the data
file_in_out_flows <- read_csv(paste0(path_source, "basins_flows_2015_2020.csv")) #load data

#discharged without treatment (% as share of pollution entering WWTP)
path_source <- "source_data/10_discharge_without_treatment/" #source of the data
file_direct_discharge <- read_csv(paste0(path_source, "discharge_without_treatment_estimations.csv")) #load data

#connection to sewer networks
path_source <- "source_data/pop_sewage_connection/" #source of the data
file_sewage_connection <- read_excel(paste0(path_source, "pop_basins_sewage_connection.xlsx"), range = "A1:F8")

#individual autonomous system balance
path_source <- "source_data/pop_sewage_connection/" #source of the data
file_IAS_balance <- read_csv(paste0(path_source, "balance_individual_autonomous_system.csv"))

#sludge destination
path_source <- "output_data/sludge_destination/"
file_sludge_destination <- read_csv(paste0(path_source, "sludge_destination.csv"))

#N:P ratio of sludge
path_source <- "output_data/sludge_composition/" #source of the data
file_sludge_composition <- read_csv(paste0(path_source, "N_P_sludge_review.csv"))
N_P_ratio_sludge_review <- 
  file_sludge_composition$N_P_ratio[file_sludge_composition$compost=="not composted"]

#NH3 volatilization during composting
N_compost_volatilization <- 0.3

#proportion excretions to sewers from people with Individual Autonomous System (due to excretions in public spaces connected to sewers)
excre_not_at_house <- 0.2
Code
#compute direct discharge at national scale
temp <- left_join(file_in_out_flows, file_direct_discharge, by="basin") %>%
  select(basin, Pt_in_adj, NGL_in_adj, percent_loss) %>%
  filter(basin != "Metropolitan France")
#compute national percent direct discharge, weighted by basin pollution flows
perc_Pt_loss <- weighted.mean(temp$percent_loss, temp$Pt_in_adj)
perc_NGL_loss <- weighted.mean(temp$percent_loss, temp$NGL_in_adj)
perc_loss <- round(mean(perc_Pt_loss, perc_NGL_loss), 2)

#very similar for N and P, we take the average 
file_direct_discharge <- 
  rbind(
    file_direct_discharge, 
    data.frame(basin = "Metropolitan France", percent_loss = perc_loss)
    )
rm(perc_Pt_loss, perc_NGL_loss, perc_loss, temp)

Sankeys P

Code
f_P_file <- function(file_name, basin_selected){
  #select values of basin 
  human_excretions <- file_human_excretions %>%
  filter(basin == basin_selected)

  industry_discharge <- file_industry_discharge %>%
    filter(basin == basin_selected) %>%
    select(basin, NGL_in, Pt_in, DBO5_in, DCO_in, MES_in)
  
  sewage_connection <- file_sewage_connection %>%
    filter(basin == basin_selected)
  
  in_out_flows <- file_in_out_flows %>%
    filter(basin == basin_selected) 
  
  direct_discharge <- file_direct_discharge %>%
    filter(basin == basin_selected) 
  
  sludge_destination <- file_sludge_destination %>%
    filter(basin == basin_selected)
  
  
  #create empty sankey file
  sankey <- 
    data.frame(
      source = character(), 
      target = character(),
      flow_group = character(),
      value = numeric()
      )
  
  #NETWORK REPARTITION
  #P excretions to sewage network
  sankey[nrow(sankey) + 1,] = 
    list(
      "excretion", "sewage network", "excretion", 
      human_excretions$P_excretion*(
        (1-sewage_connection$share_IAS) + sewage_connection$share_IAS*(excre_not_at_house)
        )
      )
  
  #P industry to sewage network
  sankey[nrow(sankey) + 1,] = 
    list(
      "large industries", "sewage network", "large industries", 
      industry_discharge$Pt_in
      )
  
  #P from network to WWTP (flow in WWTP)
  sankey[nrow(sankey) + 1,] = 
    list(
      "sewage network", "WWTP", "WWTP", 
      in_out_flows$Pt_in_adj
      )
  #P out WWTP in water
  sankey[nrow(sankey) + 1,] = 
    list(
      "WWTP", "water", "water", 
      in_out_flows$Pt_out_adj
      )
  #P discharge without treatment
  sankey[nrow(sankey) + 1,] = 
    list(
      "sewage network", "water", "water", 
      in_out_flows$Pt_in_adj*direct_discharge$percent_loss
      )
  #P residual 
  sankey[nrow(sankey) + 1,] = 
    list(
      "residual", "sewage network", "large industries", 
      sankey$value[sankey$source=="sewage network" & sankey$target=="water"] +
        sankey$value[sankey$source=="sewage network" & sankey$target=="WWTP"] -
        sankey$value[sankey$source=="large industries"] -
        sankey$value[sankey$source=="excretion" & sankey$target=="sewage network"] 
      )
  
  
  #P excretions to IAS
  sankey[nrow(sankey) + 1,] = 
    list(
      "excretion", "IAS", "excretion", 
      human_excretions$P_excretion*sewage_connection$share_IAS*(1-excre_not_at_house)
      )
  #residuals P to IAS
  sankey[nrow(sankey) + 1,] = 
    list(
      "residual", "IAS", "sewage network", 
      sankey$value[sankey$source=="residual" & sankey$target=="sewage network"]*
        sewage_connection$share_IAS/(1-sewage_connection$share_IAS)
      )
  
  #P IAS to water and to WWTP
  sankey[nrow(sankey) + 1,] = 
    list(
      "IAS", "groundwater", "water", 
      IAS_balance$water*(
        sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
        )
      )
  sankey[nrow(sankey) + 1,] = 
    list(
      "IAS", "WWTP", "WWTP", 
      IAS_balance$sludge*(
        sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
        )
      )
  
  
  #P in sludge (add also the flow IAS to WWTP for approximation rounding)
  sankey[nrow(sankey) + 1,] = 
    list(
      "WWTP", "sludge", "sludge", 
      in_out_flows$Pt_in_adj-in_out_flows$Pt_out_adj +
        sankey$value[sankey$source=="IAS" & sankey$target=="WWTP"]
      )
  #P in sludge returning to farming system
  sankey[nrow(sankey) + 1,] = 
    list(
      "sludge", "composted, spread", "sludge", 
      sankey$value[sankey$target=="sludge"]*(sludge_destination$spread+sludge_destination$composted)
      )
  #P not returning to farming system
  sankey[nrow(sankey) + 1,] = 
    list(
      "sludge", "landfill, incineration..", "lost", 
      sankey$value[sankey$target=="sludge"]-sankey$value[sankey$target=="composted, spread"]
      )

  
  #rounding the results
  sankey$value <- round(sankey$value, 2)
  
  #save file
  f_save_csv_files(
    sankey,
    "output_data/sankey_flows/phosphorus/",
    file_name
  )
  
  return(sankey)
}

f_sankey <- function(sankey){
  # nodes names for the sankey 
  nodes <- data.frame(
    name=c(as.character(sankey$source), as.character(sankey$target)) %>% 
      unique())
  
  # With networkD3, connection must be provided using id, not using real name like in the links dataframe. So we need to add it.
  sankey$IDsource <- match(sankey$source, nodes$name)-1
  sankey$IDtarget <- match(sankey$target, nodes$name)-1
  
  # Colors groups (for nodes and flow links)
  nodes$group <- as.factor(c("nodes_group"))
  my_color <-
  'd3.scaleOrdinal()
  .domain(["excretion","large industries" ,"lost", "water", "sludge", "nodes_group"])
  .range(["#7d6608" , "grey" , "#5e5e5e", "#2e86c1", "#6e2c00", "black"])'

  #to ba able to show decimales on the snkey graph
  # see here https://stackoverflow.com/questions/72129768/r-networkd3-issues
  # see also https://stackoverflow.com/questions/74259905/network-d3-sankey-node-value-too-precise-using-htmlwidgets
customJS <- '
  function(el,x) { 
      var link = d3.selectAll(".link");
  
      var format = d3.formatLocale({"decimal": ".", "thousands": ",", "grouping": [3], "currency": ["", "\u00a0€"]}).format(",.1f");
  
      link.select("title").select("body")
          .html(function(d) { return "<pre>" + d.source.name + " \u2192 " + d.target.name +
              "\\n" + format(d.value) + " ktP" + "<pre>"; });
              
      d3.select(el).selectAll(".node text")
          .html(function(d) { return d.name + " " + format(d.value) + " ktP"; });
              
  }
  '
  
  # Make the sankey
  p <- 
    sankeyNetwork(
      Links = sankey, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
      Value = "value", NodeID = "name", colourScale=my_color,
      fontSize=25, units = "ktP", nodePadding = 50,
      sinksRight = T, margin = c(top = 0, right = 0, bottom = 0, left = 0),
      LinkGroup="flow_group", NodeGroup="group"
      )
  
onRender(p, customJS)
}

f_table <- function(sankey){
  # table to see the flow values between nodes
  knitr::kable(
    sankey %>% 
      select(source, target, value) %>%
      mutate(value = round(value, digits = 1)), 
    caption ="Values of the flows (ktP)") %>%
    kableExtra::kable_styling(full_width = F)
}
Code
#select IAS balance of P for all sankeys
IAS_balance <- file_IAS_balance %>%
  filter(nutrient=="P")
Code
sankey_P <- f_P_file("sankey_P_flows_France.csv", "Metropolitan France")

Sankey

Code
f_sankey(sankey_P)

Table

Code
f_table(sankey_P)
Values of the flows (ktP)
source target value
excretion sewage network 24.9
large industries sewage network 1.1
sewage network WWTP 30.8
WWTP water 6.6
sewage network water 3.4
residual sewage network 8.1
excretion IAS 3.5
residual IAS 1.5
IAS groundwater 4.2
IAS WWTP 0.7
WWTP sludge 24.9
sludge composted, spread 19.2
sludge landfill, incineration.. 5.7
Code
sankey_P <- f_P_file("sankey_P_flows_Artois_Picardie.csv", "Artois-Picardie")

Sankey

Code
f_sankey(sankey_P)

Table

Code
f_table(sankey_P)
Values of the flows (ktP)
source target value
excretion sewage network 1.9
large industries sewage network 0.2
sewage network WWTP 2.2
WWTP water 0.4
sewage network water 0.4
residual sewage network 0.6
excretion IAS 0.2
residual IAS 0.1
IAS groundwater 0.3
IAS WWTP 0.0
WWTP sludge 1.9
sludge composted, spread 1.8
sludge landfill, incineration.. 0.1
Code
sankey_P <- f_P_file("sankey_P_flows_Rhin_Meuse.csv", "Rhin-Meuse")

Sankey

Code
f_sankey(sankey_P)

Table

Code
f_table(sankey_P)
Values of the flows (ktP)
source target value
excretion sewage network 1.8
large industries sewage network 0.1
sewage network WWTP 2.0
WWTP water 0.3
sewage network water 0.4
residual sewage network 0.5
excretion IAS 0.1
residual IAS 0.0
IAS groundwater 0.1
IAS WWTP 0.0
WWTP sludge 1.7
sludge composted, spread 1.4
sludge landfill, incineration.. 0.3
Code
sankey_P <- f_P_file("sankey_P_flows_Seine_Normandie.csv", "Seine-Normandie")

Sankey

Code
f_sankey(sankey_P)

Table

Code
f_table(sankey_P)
Values of the flows (ktP)
source target value
excretion sewage network 7.5
large industries sewage network 0.1
sewage network WWTP 9.1
WWTP water 1.5
sewage network water 0.9
residual sewage network 2.4
excretion IAS 0.4
residual IAS 0.2
IAS groundwater 0.5
IAS WWTP 0.1
WWTP sludge 7.7
sludge composted, spread 5.1
sludge landfill, incineration.. 2.6
Code
sankey_P <- f_P_file("sankey_P_flows_Loire_Bretagne.csv", "Loire-Bretagne")

Sankey

Code
f_sankey(sankey_P)

Table

Code
f_table(sankey_P)
Values of the flows (ktP)
source target value
excretion sewage network 4.6
large industries sewage network 0.5
sewage network WWTP 5.8
WWTP water 0.8
sewage network water 0.9
residual sewage network 1.6
excretion IAS 1.1
residual IAS 0.5
IAS groundwater 1.4
IAS WWTP 0.2
WWTP sludge 5.2
sludge composted, spread 4.7
sludge landfill, incineration.. 0.6
Code
sankey_P <- f_P_file("sankey_P_flows_Adour_Garonne.csv", "Adour-Garonne")

Sankey

Code
f_sankey(sankey_P)

Table

Code
f_table(sankey_P)
Values of the flows (ktP)
source target value
excretion sewage network 2.6
large industries sewage network 0.1
sewage network WWTP 3.5
WWTP water 1.1
sewage network water 0.2
residual sewage network 1.1
excretion IAS 0.8
residual IAS 0.5
IAS groundwater 1.1
IAS WWTP 0.2
WWTP sludge 2.6
sludge composted, spread 2.3
sludge landfill, incineration.. 0.3
Code
sankey_P <- f_P_file("sankey_P_flows_Rhone_Mediterranee.csv", "Rhône-Méditerranée")

Sankey

Code
f_sankey(sankey_P)

Table

Code
f_table(sankey_P)
Values of the flows (ktP)
source target value
excretion sewage network 6.1
large industries sewage network 0.1
sewage network WWTP 8.2
WWTP water 2.5
sewage network water 0.6
residual sewage network 2.5
excretion IAS 0.7
residual IAS 0.4
IAS groundwater 0.9
IAS WWTP 0.2
WWTP sludge 5.9
sludge composted, spread 4.2
sludge landfill, incineration.. 1.7

Sankeys N

Code
f_N_file <- function(file_name, basin_selected){
  #select basin values
  human_excretions <- file_human_excretions %>%
  filter(basin == basin_selected)

  industry_discharge <- file_industry_discharge %>%
    filter(basin == basin_selected) %>%
    select(basin, NGL_in, Pt_in, DBO5_in, DCO_in, MES_in)
  
  sewage_connection <- file_sewage_connection %>%
    filter(basin == basin_selected)
  
  in_out_flows <- file_in_out_flows %>%
    filter(basin == basin_selected) 
  
  direct_discharge <- file_direct_discharge %>%
    filter(basin == basin_selected) 
  
  sludge_destination <- file_sludge_destination %>%
    filter(basin == basin_selected) 
  
  #create empty sankey file
  sankey <- 
    data.frame(
      source = character(), 
      target = character(),
      flow_group = character(),
      value = numeric()
      )
  
  #NETWORK REPARTITION
  #N excretions to sewage network
  sankey[nrow(sankey) + 1,] = 
    list(
      "excretion", "sewage network", "excretion", 
      human_excretions$N_excretion*(
        (1-sewage_connection$share_IAS) + sewage_connection$share_IAS*(excre_not_at_house)
        )
      )
  #N industry to sewage network
  sankey[nrow(sankey) + 1,] = 
    list(
      "large industries", "sewage network", "large industries", 
      industry_discharge$NGL_in
      )
  
  #N from network to WWTP (taking into account IAS inputs)
  sankey[nrow(sankey) + 1,] = 
    list(
      "sewage network", "WWTP", "WWTP", 
      in_out_flows$NGL_in_adj
      )
  #N out of WWTP to water
  sankey[nrow(sankey) + 1,] = 
    list(
      "WWTP", "water", "water", 
      in_out_flows$NGL_out_adj
      )
  #N discharge without treatment
  sankey[nrow(sankey) + 1,] = 
    list(
      "sewage network", "water", "water", 
      in_out_flows$NGL_in_adj*direct_discharge$percent_loss
      )
  
  #N residual to sewage
  sankey[nrow(sankey) + 1,] = 
    list(
      "residual", "sewage network", "large industries", 
      sankey$value[sankey$source=="sewage network" & sankey$target=="water"] +
        sankey$value[sankey$source=="sewage network" & sankey$target=="WWTP"] -
        sankey$value[sankey$source=="large industries"] -
        sankey$value[sankey$source=="excretion" & sankey$target=="sewage network"] 
      )
  
  #N excretions to IAS
  sankey[nrow(sankey) + 1,] = 
    list(
      "excretion", "IAS", "excretion", 
      human_excretions$N_excretion*sewage_connection$share_IAS*(1-excre_not_at_house)
      )
  #N residual to IAS
  sankey[nrow(sankey) + 1,] = 
    list(
      "residual", "IAS", "sewage network", 
      sankey$value[sankey$source=="residual" & sankey$target=="sewage network"]*
        sewage_connection$share_IAS/(1-sewage_connection$share_IAS)
      )
  
  #N IAS to water, air and to WWTP
  sankey[nrow(sankey) + 1,] = 
    list(
      "IAS", "groundwater", "water", 
      IAS_balance$water*(
        sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
        )
      )
  sankey[nrow(sankey) + 1,] = 
    list(
      "IAS", "WWTP", "WWTP", 
      IAS_balance$sludge*(
        sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
        )
      )
  sankey[nrow(sankey) + 1,] = 
    list(
      "IAS", "air", "air", 
      IAS_balance$air*(
        sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
        )
      )

  #N in sludge
  sankey[nrow(sankey) + 1,] =
    list(
      "WWTP", "sludge", "sludge",
      P_sludge*N_P_ratio_sludge_review
      )
  #N de-nitrified
  sankey[nrow(sankey) + 1,] =
    list(
      "WWTP", "air", "air",
      #N into WWTP
      sankey$value[sankey$source=="sewage network" & sankey$target=="WWTP"]
      #minus N into into water
      -sankey$value[sankey$source=="WWTP" & sankey$target=="water"]
      #minus N in sludge
      -sankey$value[sankey$source=="WWTP" & sankey$target=="sludge"] +
      #add N IAS to WWTP to keep consistent mass balance (approximation)+
      sankey$value[sankey$source=="IAS" & sankey$target=="WWTP"]
      )
  
  #N in sludge directly spread
  sankey[nrow(sankey) + 1,] = 
    list(
      "sludge", "spread", "sludge", 
      sankey$value[sankey$target=="sludge"]*(sludge_destination$spread)
      )
  
  #N in sludge composted
  sankey[nrow(sankey) + 1,] = 
    list(
      "sludge", "composted", "sludge", 
      sankey$value[sankey$target=="sludge"]*(sludge_destination$composted)
      )
  
  #N not returning to farming system
  sankey[nrow(sankey) + 1,] =
    list(
      "sludge", "landfill, incineration..", "lost",
      sankey$value[sankey$target=="sludge"]-
        sankey$value[sankey$target=="composted"]-
        sankey$value[sankey$target=="spread"]
      )

  #N in sludge composted N losses volatilizations
  sankey[nrow(sankey) + 1,] = 
    list(
      "composted", "air", "air", 
      sankey$value[sankey$target=="composted"]*N_compost_volatilization
      )
  
  #N compost returning to farming system
  sankey[nrow(sankey) + 1,] =
    list(
      "composted", "recycled", "sludge",
      sankey$value[sankey$target=="composted"]*(1-N_compost_volatilization)
      )
  
  #N spread returning to farming system
  sankey[nrow(sankey) + 1,] =
    list(
      "spread", "recycled", "sludge",
      sankey$value[sankey$target=="spread"]
    )
  
  
  
  
  
  
  
  
  #recompute N IAS to water and to WWTP
  sankey$value[sankey$source=="IAS" & sankey$target=="groundwater"] <- 
    IAS_balance$water*(
      sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
    )
  sankey$value[sankey$source=="IAS" & sankey$target=="WWTP"] <- 
    IAS_balance$sludge*(
      sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
    )
  sankey$value[sankey$source=="IAS" & sankey$target=="air"] <- 
    IAS_balance$air*(
      sankey$value[sankey$source=="excretion" & sankey$target=="IAS"] +
        sankey$value[sankey$source=="residual" & sankey$target=="IAS"]
    )
  
  #recompute N from network to WWTP (taking into account IAS inputs)
  sankey$value[sankey$source=="sewage network" & sankey$target=="WWTP"] <- 
    in_out_flows$NGL_in_adj-sankey$value[sankey$source=="IAS" & sankey$target=="WWTP"]
  
  #rounding the results
  sankey$value <- round(sankey$value, 2)
  
  #save file
  f_save_csv_files(
    sankey,
    "output_data/sankey_flows/nitrogen/",
    file_name
  )
  
  return(sankey)
}

f_sankey <- function(sankey){
  # nodes names for the sankey 
  nodes <- data.frame(
    name=c(as.character(sankey$source), as.character(sankey$target)) %>% 
      unique())
  
  # With networkD3, connection must be provided using id, not using real name like in the links dataframe. So we need to add it.
  sankey$IDsource <- match(sankey$source, nodes$name)-1
  sankey$IDtarget <- match(sankey$target, nodes$name)-1
  
  # Colors groups (for nodes and flow links)
  nodes$group <- as.factor(c("nodes_group"))
  my_color <-
  'd3.scaleOrdinal()
  .domain(["excretion", "large industries", "lost", "water", "sludge", "air", "nodes_group"])
  .range(["#7d6608", "grey", "#5e5e5e", "#2e86c1", "#6e2c00", "#3ab02a", "black"])'
  
  #to ba able to show decimales on the snkey graph
  # see here https://stackoverflow.com/questions/72129768/r-networkd3-issues
  # see also https://stackoverflow.com/questions/74259905/network-d3-sankey-node-value-too-precise-using-htmlwidgets
customJS <- '
  function(el,x) { 
      var link = d3.selectAll(".link");
  
      var format = d3.formatLocale({"decimal": ".", "thousands": ",", "grouping": [3], "currency": ["", "\u00a0€"]}).format(",.1f");
  
      link.select("title").select("body")
          .html(function(d) { return "<pre>" + d.source.name + " \u2192 " + d.target.name +
              "\\n" + format(d.value) + " ktN" + "<pre>"; });
              
      d3.select(el).selectAll(".node text")
          .html(function(d) { return d.name + " " + format(d.value) + " ktN"; });
              
  }
  '
  
  # Make the sankey
  p <- 
    sankeyNetwork(
      Links = sankey, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
      Value = "value", NodeID = "name", colourScale=my_color,
      fontSize=25, units = "ktP", nodePadding = 50,
      sinksRight = T, margin = c(top = 0, right = 0, bottom = 0, left = 0),
      LinkGroup="flow_group", NodeGroup="group"
      )
  
onRender(p, customJS)
}

f_table <- function(sankey){
  # table to see the flow values between nodes
  knitr::kable(
    sankey %>% 
      select(source, target, value) %>%
      mutate(value = round(value, digits = 1)), 
    caption ="Values of the flows (ktN)") %>%
    kableExtra::kable_styling(full_width = F)
}

#source of files for P sludge
source_data <- "output_data/sankey_flows/phosphorus/"
Code
#select IAS N balance for all sankeys
IAS_balance <- file_IAS_balance %>%
  filter(nutrient=="N")
Code
P_sludge <- read_csv(paste0(source_data, "sankey_P_flows_France.csv")) 
P_sludge <- P_sludge$value[P_sludge$source=="WWTP" & P_sludge$target=="sludge" ]

sankey_N <- f_N_file("sankey_N_flows_France.csv", "Metropolitan France")

Sankey

Code
f_sankey(sankey_N)

Table

Code
f_table(sankey_N)
Values of the flows (ktN)
source target value
excretion sewage network 260.9
large industries sewage network 7.5
sewage network WWTP 256.0
WWTP water 72.1
sewage network water 28.4
residual sewage network 18.0
excretion IAS 36.1
residual IAS 3.2
IAS groundwater 28.7
IAS WWTP 2.0
IAS air 9.1
WWTP sludge 49.9
WWTP air 138.0
sludge spread 17.0
sludge composted 21.4
sludge landfill, incineration.. 11.5
composted air 6.4
composted recycled 15.0
spread recycled 17.0
Code
P_sludge <- read_csv(paste0(source_data, "sankey_P_flows_Artois_Picardie.csv")) 
P_sludge <- P_sludge$value[P_sludge$source=="WWTP" & P_sludge$target=="sludge" ]

sankey_N <- f_N_file("sankey_N_flows_Artois_Picardie.csv", "Artois-Picardie")

Sankey

Code
f_sankey(sankey_N)

Table

Code
f_table(sankey_N)
Values of the flows (ktN)
source target value
excretion sewage network 19.3
large industries sewage network 1.0
sewage network WWTP 17.9
WWTP water 4.0
sewage network water 3.6
residual sewage network 1.3
excretion IAS 2.7
residual IAS 0.2
IAS groundwater 2.1
IAS WWTP 0.1
IAS air 0.7
WWTP sludge 3.8
WWTP air 10.4
sludge spread 2.5
sludge composted 1.2
sludge landfill, incineration.. 0.1
composted air 0.3
composted recycled 0.8
spread recycled 2.5
Code
P_sludge <- read_csv(paste0(source_data, "sankey_P_flows_Rhin_Meuse.csv")) 
P_sludge <- P_sludge$value[P_sludge$source=="WWTP" & P_sludge$target=="sludge" ]

sankey_N <- f_N_file("sankey_N_flows_Rhin_Meuse.csv", "Rhin-Meuse")

Sankey

Code
f_sankey(sankey_N)

Table

Code
f_table(sankey_N)
Values of the flows (ktN)
source target value
excretion sewage network 19.1
large industries sewage network 0.9
sewage network WWTP 17.0
WWTP water 3.0
sewage network water 3.4
residual sewage network 0.5
excretion IAS 0.9
residual IAS 0.0
IAS groundwater 0.7
IAS WWTP 0.0
IAS air 0.2
WWTP sludge 3.4
WWTP air 10.7
sludge spread 1.4
sludge composted 1.4
sludge landfill, incineration.. 0.6
composted air 0.4
composted recycled 1.0
spread recycled 1.4
Code
P_sludge <- read_csv(paste0(source_data, "sankey_P_flows_Seine_Normandie.csv")) 
P_sludge <- P_sludge$value[P_sludge$source=="WWTP" & P_sludge$target=="sludge" ]

sankey_N <- f_N_file("sankey_N_flows_Seine_Normandie.csv", "Seine-Normandie")

Sankey

Code
f_sankey(sankey_N)

Table

Code
f_table(sankey_N)
Values of the flows (ktN)
source target value
excretion sewage network 78.3
large industries sewage network 1.0
sewage network WWTP 78.7
WWTP water 20.0
sewage network water 7.9
residual sewage network 7.6
excretion IAS 4.7
residual IAS 0.6
IAS groundwater 3.8
IAS WWTP 0.3
IAS air 1.2
WWTP sludge 15.4
WWTP air 43.9
sludge spread 5.2
sludge composted 4.9
sludge landfill, incineration.. 5.2
composted air 1.5
composted recycled 3.5
spread recycled 5.2
Code
P_sludge <- read_csv(paste0(source_data, "sankey_P_flows_Loire_Bretagne.csv")) 
P_sludge <- P_sludge$value[P_sludge$source=="WWTP" & P_sludge$target=="sludge" ]

sankey_N <- f_N_file("sankey_N_flows_Loire_Bretagne.csv", "Loire-Bretagne")

Sankey

Code
f_sankey(sankey_N)

Table

Code
f_table(sankey_N)
Values of the flows (ktN)
source target value
excretion sewage network 48.4
large industries sewage network 2.9
sewage network WWTP 46.4
WWTP water 7.1
sewage network water 7.0
residual sewage network 2.8
excretion IAS 11.6
residual IAS 0.9
IAS groundwater 9.1
IAS WWTP 0.6
IAS air 2.9
WWTP sludge 10.5
WWTP air 30.1
sludge spread 5.7
sludge composted 3.7
sludge landfill, incineration.. 1.1
composted air 1.1
composted recycled 2.6
spread recycled 5.7
Code
P_sludge <- read_csv(paste0(source_data, "sankey_P_flows_Adour_Garonne.csv")) 
P_sludge <- P_sludge$value[P_sludge$source=="WWTP" & P_sludge$target=="sludge" ]

sankey_N <- f_N_file("sankey_N_flows_Adour_Garonne.csv", "Adour-Garonne")

Sankey

Code
f_sankey(sankey_N)

Table

Code
f_table(sankey_N)
Values of the flows (ktN)
source target value
excretion sewage network 27.4
large industries sewage network 0.6
sewage network WWTP 28.5
WWTP water 11.0
sewage network water 2.0
residual sewage network 3.1
excretion IAS 8.6
residual IAS 1.3
IAS groundwater 7.3
IAS WWTP 0.5
IAS air 2.3
WWTP sludge 5.2
WWTP air 13.3
sludge spread 0.8
sludge composted 3.8
sludge landfill, incineration.. 0.6
composted air 1.1
composted recycled 2.7
spread recycled 0.8
Code
P_sludge <- read_csv(paste0(source_data, "sankey_P_flows_Rhone_Mediterranee.csv")) 
P_sludge <- P_sludge$value[P_sludge$source=="WWTP" & P_sludge$target=="sludge" ]

sankey_N <- f_N_file("sankey_N_flows_Rhone_Mediterranee.csv", "Rhône-Méditerranée")

Sankey

Code
f_sankey(sankey_N)

Table

Code
f_table(sankey_N)
Values of the flows (ktN)
source target value
excretion sewage network 64.8
large industries sewage network 1.1
sewage network WWTP 67.6
WWTP water 27.0
sewage network water 4.8
residual sewage network 6.9
excretion IAS 7.2
residual IAS 1.0
IAS groundwater 6.0
IAS WWTP 0.4
IAS air 1.9
WWTP sludge 11.7
WWTP air 29.7
sludge spread 1.6
sludge composted 6.7
sludge landfill, incineration.. 3.4
composted air 2.0
composted recycled 4.7
spread recycled 1.6

Uncertainties French Flows

Code
#function to extract sankey value from source and target nodes
f_extract_flow <- function(source_label, target_label, sankey_file){
  
  value_extracted <- sankey_file %>% 
    filter(
      source == source_label, 
      target == target_label
      ) %>%
    pull(value)
  
  return(value_extracted)
}

#function to add 5-95 percentile range to main sankey file
f_add_min_max <- function(source_label, target_label, distribution, sankey_file){
  
  #add 2.5% percentile
  sankey_file$min[sankey_file$source==source_label & sankey_file$target==target_label] <- 
    round(quantile(distribution, c(.025, .975)), 1)[1]
  
  #add 97.5% percentile
  sankey_file$max[sankey_file$source==source_label & sankey_file$target==target_label] <- 
    round(quantile(distribution, c(.025, .975)), 1)[2]
  
  return(sankey_file)
}


#number of iterations for monte-carlo simualtion: 1 million
num_vals <- as.integer(10^6)

#for normal distributions (all at 10% uncertainty) we consider that 95% of values are in the +/- 10% interval
#so the 2 sigmas interval (95% of values) is +/- 10%, i.e. sigma = 10%/2 of the mean
per_SD_normal <-  0.1/2

#function to generate normal distribution from a sankey value, defined by its source and target nodes
f_generate_normal_dstribution <- function(source_label, target_label, sankey_file, print){
  
  sankey_value <- f_extract_flow(source_label, target_label, sankey_file)
  distr <- rnorm(n = num_vals, mean = sankey_value, sd = sankey_value * per_SD_normal)
  
  # if want to plot the distribution
  if (print==TRUE) {
    hist(distr)
  }
  
  return(distr)
}

P

Code
#reload Sankey P for whole France
sankey_P <- read_csv("output_data/sankey_flows/phosphorus/sankey_P_flows_France.csv")
sankey_P$min <- NA #create min column for low range of percentile ditribution
sankey_P$max <- NA #create max column for high range of percentile ditribution



# P IN WWTP (sewage network to WWTP)
# normal distribution 10% uncertainty
source <- "sewage network" ; target <- "WWTP" #source and target of the flow
P_in_WWTP <- f_generate_normal_dstribution(source, target, sankey_P, print = F) #create flow distribution
sankey_P <- f_add_min_max(source, target, P_in_WWTP, sankey_P) #add min and max percentiles values



# P OUT WWTP (WWTP to water), 
# normal distribution 10% uncertainty
source <- "WWTP" ; target <- "water" #source and target of the flow
P_out_WWTP <- f_generate_normal_dstribution(source, target, sankey_P, print = F) #create flow distribution
sankey_P <- f_add_min_max(source, target, P_out_WWTP, sankey_P) #add min and max percentiles values



# P INDUSTRIES (large industries to sewage network)
# normal distribution 10% uncertainty
source <- "large industries" ; target <- "sewage network" #source and target of the flow
P_industries <- f_generate_normal_dstribution(source, target, sankey_P, print = F) #create flow distribution
sankey_P <- f_add_min_max(source, target, P_industries, sankey_P) #add min and max percentiles values



# P EXCRETIONS TO SEWERS AND IAS

# excretions. Uncertainty is a uniform distribution based on our value of excretion by per capita (0.44 kgP/year), compared to the range of our literature review (0.44 to 0.58)
P_excreted_value <- f_extract_flow("excretion", "sewage network", sankey_P) + f_extract_flow("excretion", "IAS", sankey_P)
P_excreted <- runif(n = num_vals, min = P_excreted_value*0.44/0.44, max = P_excreted_value*0.58/0.44)

# fraction of the population using individual autonomous systems, uniform distribution 15 to 20% (best estimate 18%)
ias_pop <- runif(n = num_vals, min = 0.15, max = 0.2)

# fraction of the excretion from people using IAS that is excreted in public spaces connected to sewers, uniform distribution 5 to 35%
frac_from_ias_to_sewers <- runif(n = num_vals, min = 0.05, max = 0.35)

# P EXCRETIONS TO IAS
P_excr_IAS <- P_excreted*ias_pop*(1-frac_from_ias_to_sewers)
sankey_P <- f_add_min_max("excretion", "IAS", P_excr_IAS, sankey_P) #add min and max percentiles values

# P EXCRETIONS TO SEWERS 
P_excr_sewers <-  P_excreted - P_excr_IAS
sankey_P <- f_add_min_max("excretion", "sewage network", P_excr_sewers, sankey_P) #add min and max percentiles values



# RESIDUALS TO SEWERS

# fraction lost in the sewers before reaching WWTPs, uniform distribution 5 to 20 %
frac_sewer_discharge <- runif(n = num_vals, min = 0.05, max = 0.2) #create flow distribution

# incoming P in the sewers
P_in_sewers <-  P_in_WWTP / (1 - frac_sewer_discharge)

# first compute residuals coming to sewers
P_residual_sewers = P_in_sewers - P_excr_sewers - P_industries #compute residuals

#but sometimes residuals to sewers are <0, not possible. Handle this problem
missing_input <- P_residual_sewers < 0 # checks if negative, i.e. P in sewers < P exr + P industries: this means there cannot be a residual
P_in_sewers[missing_input] <- P_excr_sewers[missing_input] + P_industries[missing_input]  # if so, update P in sewers, equals to excr + industries
P_residual_sewers[missing_input] = 0  # and set the residual to zero
sankey_P <- f_add_min_max("residual", "sewage network", P_residual_sewers, sankey_P) #add min and max percentiles values



# RESIDUALS TO IAS
P_residual_IAS <-  P_residual_sewers * ias_pop / (1 - ias_pop)  # residual assigned to IAS
sankey_P <- f_add_min_max("residual", "IAS", P_residual_IAS, sankey_P) #add min and max percentiles values



# LOSSES IN SEWERS
P_lost_sewers <-  P_in_sewers*frac_sewer_discharge
sankey_P <- f_add_min_max("sewage network", "water", P_lost_sewers, sankey_P) #add min and max percentiles values



# IAS TO GROUNDWATER

# share of flow coming to IAS ending in gorundwater (uniform 80-90%)
frac_loss_IAS = runif(n=num_vals, min=0.8, max=0.9)

# IAS to groundwater
P_loss_IAS <-  frac_loss_IAS * (P_residual_IAS + P_excr_IAS)
sankey_P <- f_add_min_max("IAS", "groundwater", P_loss_IAS, sankey_P) #add min and max percentiles values



# IAS TO WWTP 
P_IAS_to_WWTP <-  P_residual_IAS + P_excr_IAS - P_loss_IAS
sankey_P <- f_add_min_max("IAS", "WWTP", P_IAS_to_WWTP, sankey_P) #add min and max percentiles values



# P SLUDGE (from WWTP to sludge)
P_sludge <-  P_in_WWTP - P_out_WWTP + P_IAS_to_WWTP
sankey_P <- f_add_min_max("WWTP", "sludge", P_sludge, sankey_P) #add min and max percentiles values



# P COMPOSTED OR SPREAD

# share of sludge that is composted or spread, normal distribution 10% uncertainty
frac_sluge_recycled_value <- f_extract_flow("sludge", "composted, spread", sankey_P) /f_extract_flow("WWTP", "sludge", sankey_P)
frac_sluge_recycled <- rnorm(n=num_vals, mean=frac_sluge_recycled_value, sd=frac_sluge_recycled_value*per_SD_normal)

# sludge composted or spread
P_recycled <-  frac_sluge_recycled*P_sludge
sankey_P <- f_add_min_max("sludge", "composted, spread", P_recycled, sankey_P) #add min and max percentiles values



# P LANDFILLED OR BURNT
P_landfilled_burnt <-  (P_sludge - P_recycled)
sankey_P <- f_add_min_max("sludge", "landfill, incineration..", P_landfilled_burnt, sankey_P) #add min and max percentiles values
Code
# now save the Sankey with the min and max values
write_csv(sankey_P, "output_data/sankey_flows/phosphorus/sankey_P_flows_France.csv")


# Global Assessment

# total P
P_tot <-  (P_excreted + P_residual_sewers + P_residual_IAS + P_industries)
round(quantile(P_tot, c(.025, .975)), 1)

# removal efficiency
removal_efficiency <- (1-(P_out_WWTP)/P_in_WWTP)*100
round(quantile(removal_efficiency, c(.025, .975)))

# IAS and sewers losses
P_IAS_and_sewers <- (P_IAS_to_WWTP + P_lost_sewers)
round(quantile(P_IAS_and_sewers, c(.025, .975)), 1)

# total residuals
P_residuals <- (P_residual_IAS + P_residual_sewers)
round(quantile(P_residuals, c(.025, .975)), 1)

#OUT

# % diffuse losses + surface water
ratio_losses <-  (P_loss_IAS + P_out_WWTP + P_lost_sewers) / P_tot *100
round(quantile(ratio_losses, c(.025, .975)))

# % recycled
ratio_spread <-  P_recycled / P_tot *100
round(quantile(ratio_spread, c(.025, .975)))

# % landfilled or burnt
ratio_landfilled_burnt <- P_landfilled_burnt/P_tot*100
round(quantile(ratio_landfilled_burnt, c(.025, .975)))


# IN

# share residuals
ratio_residual <-  (P_residual_IAS + P_residual_sewers) / P_tot *100
round(quantile(ratio_residual, c(.025, .975)))

# share excretion
ratio_excretions <-  P_excreted / P_tot *100
round(quantile(ratio_excretions, c(.025, .975)))

# share industries
ratio_industries <-  P_industries / P_tot *100
round(quantile(ratio_industries, c(.025, .975)))

N

Code
#reload Sankey N for whole France
sankey_N <- read_csv("output_data/sankey_flows/nitrogen/sankey_N_flows_France.csv")
sankey_N$min <- NA #create min column for low range of percentile ditribution
sankey_N$max <- NA #create max column for high range of percentile ditribution



# N IN WWTP (sewage network to WWTP)
# normal distribution 10% uncertainty
source <- "sewage network" ; target <- "WWTP" #source and target of the flow
N_in_WWTP <- f_generate_normal_dstribution(source, target, sankey_N, print = F) #create flow distribution
sankey_N <- f_add_min_max(source, target, N_in_WWTP, sankey_N) #add min and max percentiles values



# N OUT WWTP (WWTP to water), 
# normal distribution 10% uncertainty
source <- "WWTP" ; target <- "water" #source and target of the flow
N_out_WWTP <- f_generate_normal_dstribution(source, target, sankey_N, print = F) #create flow distribution
sankey_N <- f_add_min_max(source, target, N_out_WWTP, sankey_N) #add min and max percentiles values



# N INDUSTRIES (large industries to sewage network)
# normal distribution 10% uncertainty
source <- "large industries" ; target <- "sewage network" #source and target of the flow
N_industries <- f_generate_normal_dstribution(source, target, sankey_N, print = F) #create flow distribution
sankey_N <- f_add_min_max(source, target, N_industries, sankey_N) #add min and max percentiles values



# N EXCRETIONS TO SEWERS AND IAS

# excretions, normal distribution 10% uncertainty
N_excreted_value <- f_extract_flow("excretion", "sewage network", sankey_N) + f_extract_flow("excretion", "IAS", sankey_N)
N_excreted <- rnorm(n = num_vals, mean = N_excreted_value, sd = N_excreted_value * per_SD_normal)

# fraction of the population using individual autonomous systems, uniform distribution 15 to 20% (best estimate 18%)
ias_pop <- runif(n = num_vals, min = 0.15, max = 0.2)

# fraction of the excretion from people using IAS that is excreted in public spaces connected to sewers, uniform distribution 5 to 35%
frac_from_ias_to_sewers <- runif(n = num_vals, min = 0.05, max = 0.35)

# N EXCRETIONS TO IAS
N_excr_IAS <- N_excreted*ias_pop*(1-frac_from_ias_to_sewers)
sankey_N <- f_add_min_max("excretion", "IAS", N_excr_IAS, sankey_N) #add min and max percentiles values

# N EXCRETIONS TO SEWERS 
N_excr_sewers <-  N_excreted - N_excr_IAS
sankey_N <- f_add_min_max("excretion", "sewage network", N_excr_sewers, sankey_N) #add min and max percentiles values



# RESIDUALS TO SEWERS

# fraction lost in the sewers before reaching WWTPs, uniform distribution 5 to 20 %
frac_sewer_discharge <- runif(n = num_vals, min = 0.05, max = 0.2) #create flow distribution

# incoming N in the sewers
N_in_sewers <-  N_in_WWTP / (1 - frac_sewer_discharge)

# first compute residuals coming to sewers
N_residual_sewers = N_in_sewers - N_excr_sewers - N_industries #compute residuals

#but sometimes residuals to sewers are <0, not possible. Handle this problem
missing_input <- N_residual_sewers < 0 # checks if negative, i.e. P in sewers < P exr + P industries: this means there cannot be a residual
N_in_sewers[missing_input] <- N_excr_sewers[missing_input] + N_industries[missing_input]  # if so, update P in sewers, equals to excr + industries
N_residual_sewers[missing_input] = 0  # and set the residual to zero
sankey_N <- f_add_min_max("residual", "sewage network", N_residual_sewers, sankey_N) #add min and max percentiles values



# RESIDUALS TO IAS
N_residual_IAS <-  N_residual_sewers * ias_pop / (1 - ias_pop)  # residual assigned to IAS
sankey_N <- f_add_min_max("residual", "IAS", N_residual_IAS, sankey_N) #add min and max percentiles values



# LOSSES IN SEWERS
N_lost_sewers <-  N_in_sewers*frac_sewer_discharge
sankey_N <- f_add_min_max("sewage network", "water", N_lost_sewers, sankey_N) #add min and max percentiles values



# IAS TO GROUNDWATER, WWTP AND AIR

# share of flow coming to IAS ending in groundwater 
frac_IAS_groundwater = runif(n=num_vals, min=0.7, max=0.95) # groundwater (uniform 80-90%)
frac_IAS_air = runif(n=num_vals, min=0.01, max=0.25) # groundwater (uniform 0-25%)
frac_IAS_WWTP = runif(n=num_vals, min=0.045, max = 0.055) # sludge, i.e. to WWTP (10% uncertainty on median value of 5%)

# IAS to groundwater
N_IAS_groundwater <-  frac_IAS_groundwater * (N_residual_IAS + N_excr_IAS)
sankey_N <- f_add_min_max("IAS", "groundwater", N_IAS_groundwater, sankey_N) #add min and max percentiles values

# IAS TO WWTP 
N_IAS_to_WWTP <-  frac_IAS_WWTP * (N_residual_IAS + N_excr_IAS)
sankey_N <- f_add_min_max("IAS", "WWTP", N_IAS_to_WWTP, sankey_N) #add min and max percentiles values

# IAS TO AIR 
N_IAS_to_air <-  frac_IAS_air * (N_residual_IAS + N_excr_IAS)
sankey_N <- f_add_min_max("IAS", "air", N_IAS_to_air, sankey_N) #add min and max percentiles values



# N SLUDGE (from WWTP to sludge)
N_P_ratio <- runif(n=num_vals, min = 1, max = 3) # N:P sludge ration between 1 and 3
N_sludge <-  N_P_ratio*P_sludge
sankey_N <- f_add_min_max("WWTP", "sludge", N_sludge, sankey_N) #add min and max percentiles values



# N WWTP TO AIR
N_WWTP_to_air <-  N_in_WWTP - N_out_WWTP - N_sludge + N_IAS_to_WWTP #add N_IAS_to_WWTP to have consistent mass balance: false but very small
sankey_N <- f_add_min_max("WWTP", "air", N_WWTP_to_air, sankey_N) #add min and max percentiles values



# N COMPOSTED, SPREAD, LANDFILLED

# share of sludge that is composted, directly spread, or landfilled/burnt. normal distribution 10% uncertainty
frac_sluge_composted_value <- f_extract_flow("sludge", "composted", sankey_N) /f_extract_flow("WWTP", "sludge", sankey_N)
frac_sluge_spread_value <- f_extract_flow("sludge", "spread", sankey_N) /f_extract_flow("WWTP", "sludge", sankey_N)
frac_sluge_landfilled_burnt_value <- f_extract_flow("sludge", "landfill, incineration..", sankey_N) /f_extract_flow("WWTP", "sludge", sankey_N)

# 10% uncertainties on these fractions
frac_sluge_composted <- rnorm(n = num_vals, mean = frac_sluge_composted_value, sd = frac_sluge_composted_value * per_SD_normal)
frac_sluge_spread <- rnorm(n = num_vals, mean = frac_sluge_spread_value, sd = frac_sluge_spread_value * per_SD_normal)
frac_sluge_landfilled_burnt <- rnorm(n = num_vals, mean = frac_sluge_landfilled_burnt_value, sd = frac_sluge_landfilled_burnt_value * per_SD_normal)

# N composted, spread, or landfilled/burnt
N_composted <-  frac_sluge_composted*N_sludge
N_spread <-  frac_sluge_spread*N_sludge
N_landfilled_burnt <-  frac_sluge_landfilled_burnt*N_sludge

# add these 3 flows to sankey
sankey_N <- f_add_min_max("sludge", "composted", N_composted, sankey_N) #add min and max percentiles values
sankey_N <- f_add_min_max("sludge", "spread", N_spread, sankey_N) #add min and max percentiles values
sankey_N <- f_add_min_max("spread", "recycled", N_spread, sankey_N) #add min and max percentiles values
sankey_N <- f_add_min_max("sludge", "landfill, incineration..", N_landfilled_burnt, sankey_N) #add min and max percentiles values



# N VOLATILIZED DURING COMPOST
frac_volatilized <- runif(n = num_vals, min = 0.2, max = 0.4)
N_compost_air <- N_composted*frac_volatilized
sankey_N <- f_add_min_max("composted", "air", N_compost_air, sankey_N) #add min and max percentiles values
N_compost_recycled <- N_composted*(1-frac_volatilized)
sankey_N <- f_add_min_max("composted", "recycled", N_landfilled_burnt, sankey_N) #add min and max percentiles values
Code
# now save the Sankey with the min and max values
write_csv(sankey_N, "output_data/sankey_flows/nitrogen/sankey_N_flows_France.csv")


# Global Assessment

# total N
N_tot <-  (N_excreted + N_residual_sewers + N_residual_IAS + N_industries)
round(quantile(N_tot, c(.025, .975)), 1)

# removal efficiency
removal_efficiency <- (1-(N_out_WWTP)/N_in_WWTP)*100
round(quantile(removal_efficiency, c(.025, .975)))


#OUT

# % diffuse losses + surface water
ratio_losses <-  (N_IAS_groundwater + N_out_WWTP + N_lost_sewers) / N_tot *100
round(quantile(ratio_losses, c(.025, .975)))

# % recycled
ratio_recycled <-  (N_compost_recycled + N_spread) / N_tot *100
round(quantile(ratio_recycled, c(.025, .975)))

# % air
ratio_landfilled_burnt <- (N_WWTP_to_air + N_IAS_to_air + N_compost_air)/N_tot*100
round(quantile(ratio_landfilled_burnt, c(.025, .975)))


# IN

# share residuals
ratio_residual <-  (N_residual_IAS + N_residual_sewers) / N_tot *100
round(quantile(ratio_residual, c(.025, .975)))

# share excretion
ratio_excretions <-  N_excreted / N_tot *100
round(quantile(ratio_excretions, c(.025, .975)))

# share industries
ratio_industries <-  N_industries / N_tot *100
round(quantile(ratio_industries, c(.025, .975)))

Results

Code
#remove files not used anymore
rm(
  IAS_balance, sewage_connection, sludge_destination, industry_discharge, 
  in_out_flows, human_excretions
)

#group all sankey files, for N and P
path_source <- "output_data/sankey_flows/phosphorus/"
sankey_P <- bind_rows(
  read_csv(paste0(path_source, "sankey_P_flows_Adour_Garonne.csv")) %>% mutate(basin = "Adour-Garonne"),
  read_csv(paste0(path_source, "sankey_P_flows_Artois_Picardie.csv")) %>% mutate(basin = "Artois-Picardie"),
  read_csv(paste0(path_source, "sankey_P_flows_Loire_Bretagne.csv")) %>% mutate(basin = "Loire-Bretagne"),
  read_csv(paste0(path_source, "sankey_P_flows_Rhin_Meuse.csv")) %>% mutate(basin = "Rhin-Meuse"),
  read_csv(paste0(path_source, "sankey_P_flows_Rhone_Mediterranee.csv")) %>% mutate(basin = "Rhône-Méditerranée"),
  read_csv(paste0(path_source, "sankey_P_flows_Seine_Normandie.csv")) %>% mutate(basin = "Seine-Normandie"),
  read_csv(paste0(path_source, "sankey_P_flows_France.csv")) %>% mutate(basin = "Metropolitan France")
)
path_source <- "output_data/sankey_flows/nitrogen/"
sankey_N <- bind_rows(
  read_csv(paste0(path_source, "sankey_N_flows_Adour_Garonne.csv")) %>% mutate(basin = "Adour-Garonne"),
  read_csv(paste0(path_source, "sankey_N_flows_Artois_Picardie.csv")) %>% mutate(basin = "Artois-Picardie"),
  read_csv(paste0(path_source, "sankey_N_flows_Loire_Bretagne.csv")) %>% mutate(basin = "Loire-Bretagne"),
  read_csv(paste0(path_source, "sankey_N_flows_Rhin_Meuse.csv")) %>% mutate(basin = "Rhin-Meuse"),
  read_csv(paste0(path_source, "sankey_N_flows_Rhone_Mediterranee.csv")) %>% mutate(basin = "Rhône-Méditerranée"),
  read_csv(paste0(path_source, "sankey_N_flows_Seine_Normandie.csv")) %>% mutate(basin = "Seine-Normandie"),
  read_csv(paste0(path_source, "sankey_N_flows_France.csv")) %>% mutate(basin = "Metropolitan France")
)

#group nutrient flows in 1 file
sankey_N_flows <- sankey_N %>%
  mutate(flow = paste(source, "to", target)) %>%
  select(basin, flow, value) %>%
  spread(flow, value)
sankey_P_flows <- sankey_P %>%
  mutate(flow = paste(source, "to", target)) %>%
  select(basin, flow, value) %>%
  spread(flow, value)
N_P_flows_basins <- bind_rows(
  sankey_P_flows %>% mutate(nutrient="Phosphorus"),
  sankey_N_flows %>% mutate(nutrient="Nitrogen")
)
rm(sankey_N_flows, sankey_P_flows)

#group nutrient nodes in 1 file
sankey_P_nodes <- bind_rows(
  #compute all target nodes
  sankey_P %>%
    group_by(basin, node = target) %>%
    summarise(value = sum(value, na.rm=T)), 
  #compute primary source nodes
  sankey_P %>%
    group_by(basin, node = source) %>%
    summarise(value = sum(value, na.rm=T)) %>%
    filter(node %in% c("excretion", "large industries", "residual"))
) %>%
  spread(node, value) %>%
  mutate(
    recycled = `composted, spread`,
    perc_recycled = round(recycled/(recycled + water + groundwater + `landfill, incineration..`)*100,0)
  )
sankey_N_nodes <- bind_rows(
  #compute all target nodes
  sankey_N %>%
    group_by(basin, node = target) %>%
    summarise(value = sum(value, na.rm=T)), 
  #compute primary source nodes
  sankey_N %>%
    group_by(basin, node = source) %>%
    summarise(value = sum(value, na.rm=T)) %>%
    filter(node %in% c("excretion", "large industries", "residual"))
) %>%
  spread(node, value) %>%
  mutate(
    perc_recycled = round(recycled/(recycled + air + water + groundwater + `landfill, incineration..`)*100,0)
  )
N_P_nodes_basins <- bind_rows(
  sankey_P_nodes %>% mutate(nutrient = "Phosphorus"), 
  sankey_N_nodes %>% mutate(nutrient = "Nitrogen")
)
rm(sankey_N_nodes, sankey_P_nodes)

#adding population to both files
N_P_flows_basins <- left_join(
  file_sewage_connection %>% select(basin, pop, pop_sewage, pop_IAS),
  N_P_flows_basins
)

#adding population to both files
N_P_nodes_basins <- left_join(
  file_sewage_connection %>% select(basin, pop, pop_sewage, pop_IAS),
  N_P_nodes_basins
)
Code
temp <- N_P_nodes_basins%>%
  mutate(ToHighlight = ifelse(basin == "Metropolitan France", "yes", "no"))

ggplot(temp) +
  geom_col(
    aes(basin, excretion/pop, fill=ToHighlight), 
    alpha=.8, colour="black"
    ) +
  geom_label(
    aes(basin, excretion/pop, label=signif(excretion/pop, 2))
    ) +
  facet_wrap(vars(nutrient)) +
  ylim(0, 6) +
  theme(legend.position = "none") +
  coord_flip() +
  labs(
    y="kg/year", x="",
    title = "N and P excretions per capita",
    subtitle = "kg per year"
  )

Code
temp <- N_P_nodes_basins%>%
  mutate(ToHighlight = ifelse(basin == "Metropolitan France", "yes", "no"))

ggplot(temp) +
  geom_col(
    aes(basin, excretion/pop*1000/365, fill=ToHighlight),
    alpha=.8, colour="black") +
  geom_label(
    aes(basin, excretion/pop*1000/365, label=signif(excretion/pop*1000/365, 3))
    ) +
  ylim(0, 16) +
  facet_wrap(vars(nutrient)) +
  theme(legend.position = "none") +
  coord_flip() +
  labs(
    y="g/day", x="",
    title = "N and P excretions per capita",
    subtitle = "g per day"
  )

Code
#source
temp <- N_P_nodes_basins %>% 
  filter(nutrient == "Phosphorus") %>%
  select(basin, excretion, residual, `large industries`, pop) %>%
  gather(node, value, excretion, residual, `large industries`) %>%
  mutate(value = round(value/pop, 3), in_out = "in")
#sink
temp2 <- N_P_nodes_basins %>% 
  filter(nutrient == "Phosphorus") %>%
  select(basin, water, groundwater, `landfill, incineration..`, recycled, pop) %>%
  gather(node, value, water, groundwater, `landfill, incineration..`, recycled) %>%
  mutate(value = round(value/pop, 3), in_out = "out")
#values for sankey per capita
sankey_P_cap <- bind_rows(
  #in
  temp %>%
    select(
      source = node, flow_group = node, value, basin, pop
    ) %>%
    mutate(
      target = "Sanitation System"
    ),
  #out
  temp2 %>%
    select(
      target = node, flow_group = node, value, basin, pop
    ) %>%
    mutate(
      source = "Sanitation System"
    )
  ) %>%
  mutate(value = 1000*value) #convert in g per cap per year

#combine 2 files for graphs
temp <- bind_rows(temp, temp2) %>% select(-pop) 

nodes <- c(
  "large industries", "residual", "excretion", #nodes in #nodes in
  "landfill, incineration..", "water", "groundwater", "recycled" #nodes out
  )
nodes_colour <- c(
  "#5e5e5e", "#440154FF", "#7d6608", #nodes in
  "black", "#2e86c1", "#236aaa","#6e2c00", "" #nodes out
  )
temp$node <- 
  factor(
    temp$node,
    levels = nodes
  )
color_theme <- scale_fill_manual(label = nodes, values = nodes_colour)


plot_grid(
  ggplot(temp %>% filter(basin=="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="stack", alpha=.8
      ) +
    color_theme +
    labs(
      fill= "", title = "Source and destination of P in sanitation system",
      subtitle = "Metropolitan France",
      y="kgP/year", x=""
    ),
  ggplot(temp %>% filter(basin!="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="stack", alpha=.8
      ) +
    facet_wrap(vars(basin)) +
    color_theme +
    labs(
      title="",
      fill="", subtitle = "by basin",
      y="", x=""
    ) +
    theme(legend.position = "none"),
  rel_widths = c(0.5, 0.5)
)

Code
#source
temp <- N_P_nodes_basins %>% 
  filter(nutrient == "Nitrogen") %>%
  select(basin, excretion, residual, `large industries`, pop) %>%
  gather(node, value, excretion, residual, `large industries`) %>%
  mutate(value = round(value/pop, 3), in_out = "in")
#sink
temp2 <- N_P_nodes_basins %>% 
  filter(nutrient == "Nitrogen") %>%
  select(basin, water, groundwater, air, `landfill, incineration..`, recycled, pop) %>%
  gather(node, value, water, groundwater, air, `landfill, incineration..`, recycled) %>%
  mutate(value = round(value/pop, 3), in_out = "out")
#values for sankey per capita
sankey_N_cap <- bind_rows(
  #in
  temp %>%
    select(
      source = node, flow_group = node, value, basin, pop
    ) %>%
    mutate(
      target = "Sanitation System"
    ),
  #out
  temp2 %>%
    select(
      target = node, flow_group = node, value, basin, pop
    ) %>%
    mutate(
      source = "Sanitation System"
    )
  )
#combine 2 files
temp <- bind_rows(temp, temp2) %>% select(-pop)

nodes <- c(
  "large industries", "residual", "excretion", #nodes in #nodes in
  "landfill, incineration..", "water", "groundwater", "air", "recycled" #nodes out
  )
nodes_colour <- c(
  "#5e5e5e", "#440154FF", "#7d6608", #nodes in
  "black", "#2e86c1", "#236aaa", "#3ab02a", "#6e2c00" #nodes out
  )
temp$node <- 
  factor(
    temp$node,
    levels = nodes
  )
color_theme <- scale_fill_manual(label = nodes, values = nodes_colour)


plot_grid(
  ggplot(temp %>% filter(basin=="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="stack", alpha=.8
      ) +
    color_theme +
    labs(
      fill= "", title = "Source and destination of N in sanitation system",
      subtitle = "Metropolitan France",
      y="kgN/year", x=""
    ),
  ggplot(temp %>% filter(basin!="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="stack", alpha=.8
      ) +
    facet_wrap(vars(basin)) +
    color_theme +
    labs(
      title="",
      fill="", subtitle = "by basin",
      y="", x=""
    ) +
    theme(legend.position = "none"),
  rel_widths = c(0.5, 0.5)
)

Code
#source
temp <- N_P_nodes_basins %>% 
  filter(nutrient == "Phosphorus") %>%
  select(basin, excretion, residual, `large industries`, pop) %>%
  gather(node, value, excretion, residual, `large industries`) %>%
  mutate(value = round(value/pop, 3), in_out = "in")
#sink
temp2 <- N_P_nodes_basins %>% 
  filter(nutrient == "Phosphorus") %>%
  select(basin, water, groundwater, `landfill, incineration..`, recycled, pop) %>%
  gather(node, value, water, groundwater, `landfill, incineration..`, recycled) %>%
  mutate(value = round(value/pop, 3), in_out = "out")
#combine 2 files
temp <- bind_rows(temp, temp2) %>% select(-pop) 

nodes <- c(
  "large industries", "residual", "excretion", #nodes in #nodes in
  "landfill, incineration..", "water", "groundwater", "recycled" #nodes out
  )
nodes_colour <- c(
  "#5e5e5e", "#440154FF", "#7d6608", #nodes in
  "black", "#2e86c1", "#236aaa", "#6e2c00", "" #nodes out
  )
temp$node <- 
  factor(
    temp$node,
    levels = nodes
  )
color_theme <- scale_fill_manual(label = nodes, values = nodes_colour)


plot_grid(
  ggplot(temp %>% filter(basin=="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="fill", alpha=.8
      ) +
    color_theme +
    labs(
      fill= "", title = "Source and destination of P in sanitation system",
      subtitle = "Metropolitan France",
      y="kgP/year", x=""
    ),
  ggplot(temp %>% filter(basin!="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="fill", alpha=.8
      ) +
    facet_wrap(vars(basin)) +
    color_theme +
    labs(
      title="",
      fill="", subtitle = "by basin",
      y="", x=""
    ) +
    theme(legend.position = "none"),
  rel_widths = c(0.5, 0.5)
)

Code
#source
temp <- N_P_nodes_basins %>% 
  filter(nutrient == "Nitrogen") %>%
  select(basin, excretion, residual, `large industries`, pop) %>%
  gather(node, value, excretion, residual, `large industries`) %>%
  mutate(value = round(value/pop, 3), in_out = "in")
#sink
temp2 <- N_P_nodes_basins %>% 
  filter(nutrient == "Nitrogen") %>%
  select(basin, water, groundwater, air, `landfill, incineration..`, recycled, pop) %>%
  gather(node, value, water, groundwater, air, `landfill, incineration..`, recycled) %>%
  mutate(value = round(value/pop, 3), in_out = "out")
#combine 2 files
temp <- bind_rows(temp, temp2) %>% select(-pop)


nodes <- c(
  "large industries", "residual", "excretion", #nodes in #nodes in
  "landfill, incineration..", "water", "groundwater", "air", "recycled" #nodes out
  )
nodes_colour <- c(
  "#5e5e5e", "#440154FF", "#7d6608", #nodes in
  "black", "#2e86c1", "#236aaa", "#3ab02a", "#6e2c00" #nodes out
  )
temp$node <- 
  factor(
    temp$node,
    levels = nodes
  )
color_theme <- scale_fill_manual(label = nodes, values = nodes_colour)



plot_grid(
  ggplot(temp %>% filter(basin=="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="fill", alpha=.8
      ) +
    color_theme +
    labs(
      fill= "", title = "Source and destination of N in sanitation system",
      subtitle = "Metropolitan France",
      y="kgN/year", x=""
    ),
  ggplot(temp %>% filter(basin!="Metropolitan France")) +
    geom_col(
      aes(in_out, value, fill=node), 
      position="fill", alpha=.8
      ) +
    facet_wrap(vars(basin)) +
    color_theme +
    labs(
      title="",
      fill="", subtitle = "by basin",
      y="", x=""
    ) +
    theme(legend.position = "none"),
  rel_widths = c(0.5, 0.5)
)

Code
temp <- N_P_nodes_basins %>%
  select(basin, excretion, recycled, water, groundwater, nutrient) %>%
  gather(step, value, excretion, recycled, water, groundwater) %>%
  spread(nutrient, value) %>%
  mutate(
    N_P_ratio = Nitrogen/Phosphorus,
    ToHighlight = ifelse(basin == "Metropolitan France", "yes", "no" ))
temp$step <- 
  factor(temp$step, levels=c("excretion", "water", "groundwater", "recycled"))


plot_grid(
  ggplot(temp %>% filter(basin=="Metropolitan France")) +
    geom_col(aes(step, N_P_ratio, fill=step), alpha=.8) +
    geom_label(
      aes(step, N_P_ratio, label=signif(N_P_ratio, 3))
      ) +
    theme(
      legend.position = "none",
      axis.text.x = element_text(angle = 45)
      ) +
    labs(
      y="", x="", 
      title = "N:P ratio at the different steps of sanitation",
      subtitle = "Metropolitan France"
    ),
  ggplot(temp %>% filter(basin!="Metropolitan France"),) +
    geom_col(aes(step, N_P_ratio, fill=step), alpha=.8) +
    geom_label(
      aes(step, N_P_ratio, label=signif(N_P_ratio, 3))
      ) +
    facet_wrap(vars(basin), ncol=2) +
    theme(
      legend.position = "none",
      axis.text.y=element_blank()
      ) +
    coord_flip() +
    labs(
      y="", x="", title = "",
      subtitle = "by basin"
    ),
  rel_widths = c(0.3, 0.7)
)

Code
temp <- N_P_nodes_basins %>% 
  filter(nutrient=="Nitrogen") %>%
  mutate(ToHighlight = ifelse(basin=="Metropolitan France", "yes", "no"))
#Nitrogen recycling
ggplot(temp) +
  geom_col(
    aes(basin, perc_recycled, fill = ToHighlight), 
    alpha=.8, color="black") +
  geom_text(
    aes(label=paste0(perc_recycled, "%"), x=basin, y=perc_recycled),
    family = "Times New Roman", fontface = "italic", hjust=-0.2
        ) +
  ylim(0,100) +
  coord_flip() +
  theme(
    legend.position = "none"
  ) +
  labs(
    x="", y="%",
    subtitle = "by basin",
    title = "% of excreted Nitrogen recycled",
    caption = "Source:\ncomputation by Thomas Starck from multiple sources"
  ) 

Code
temp <- N_P_nodes_basins %>% 
  filter(nutrient=="Phosphorus") %>%
  mutate(ToHighlight = ifelse(basin=="Metropolitan France", "yes", "no"))

#Phosphorus recycling
ggplot(temp) +
  geom_col(
    aes(basin, perc_recycled, fill = ToHighlight), 
    alpha=.8, color="black") +
  geom_text(
    aes(label=paste0(perc_recycled, "%"), x=basin, y=perc_recycled),
    family = "Times New Roman", fontface = "italic", hjust=-0.2
        ) +
  ylim(0,100) +
  coord_flip() +
  theme(
    legend.position = "none"
  ) +
  labs(
    x="", y="%",
    subtitle = "by basin",
    title = "% of excreted Phosphorus recycled",
    caption = "Source:\ncomputation by Thomas Starck from multiple sources"
  ) 

Code
f_sankey <- function(sankey){
  # nodes names for the sankey 
  nodes <- data.frame(
    name=c(as.character(sankey$source), as.character(sankey$target)) %>% 
      unique())
  
  # With networkD3, connection must be provided using id, not using real name like in the links dataframe. So we need to add it.
  sankey$IDsource <- match(sankey$source, nodes$name)-1
  sankey$IDtarget <- match(sankey$target, nodes$name)-1
  
  # Colors groups (for nodes and flow links)
  nodes$group <- as.factor(c("nodes_group"))
  
    my_color <-
  'd3.scaleOrdinal()
  .domain(["excretion", "large industries", "residual", "landfill, incineration..", "water", "groundwater", "recycled", "air", "nodes_group"])
  .range(["#7d6608", "grey", "grey", "#5e5e5e", "#2e86c1", "#2e86c1", "#6e2c00", "#3ab02a", "black"])'

  #to ba able to show decimales on the snkey graph
  # see here https://stackoverflow.com/questions/72129768/r-networkd3-issues
  # see also https://stackoverflow.com/questions/74259905/network-d3-sankey-node-value-too-precise-using-htmlwidgets
customJS <- '
  function(el,x) { 
      var link = d3.selectAll(".link");
  
      var format = d3.formatLocale({"decimal": ".", "thousands": ",", "grouping": [3], "currency": ["", "\u00a0€"]}).format(",.1f");
  
      link.select("title").select("body")
          .html(function(d) { return "<pre>" + d.source.name + " \u2192 " + d.target.name +
              "\\n" + format(d.value) + " kgN" + "<pre>"; });
              
      d3.select(el).selectAll(".node text")
          .html(function(d) { return d.name + " " + format(d.value) + " kgN"; });
              
  }
  '
  
  # Make the sankey
  p <- 
    sankeyNetwork(
      Links = sankey, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
      Value = "value", NodeID = "name", colourScale=my_color,
      fontSize=25, units = "kgN", nodePadding = 50,
      sinksRight = T, margin = c(top = 0, right = 0, bottom = 0, left = 0),
      LinkGroup="flow_group", NodeGroup="group"
      )
  
onRender(p, customJS)
}
Code
temp <- sankey_N_cap %>% filter(basin=="Metropolitan France")
f_save_csv_files(
  temp,
  "output_data/sankey_flows/nitrogen/simplified/",
  "sankey_N_France_per_cap.csv"
)

f_sankey(temp)
Code
f_sankey <- function(sankey){
  # nodes names for the sankey 
  nodes <- data.frame(
    name=c(as.character(sankey$source), as.character(sankey$target)) %>% 
      unique())
  
  # With networkD3, connection must be provided using id, not using real name like in the links dataframe. So we need to add it.
  sankey$IDsource <- match(sankey$source, nodes$name)-1
  sankey$IDtarget <- match(sankey$target, nodes$name)-1
  
  # Colors groups (for nodes and flow links)
  nodes$group <- as.factor(c("nodes_group"))
  
    my_color <-
  'd3.scaleOrdinal()
  .domain(["excretion", "large industries", "residual", "landfill, incineration..", "water", "groundwater", "recycled", "nodes_group"])
  .range(["#7d6608", "grey", "grey", "#5e5e5e", "#2e86c1", "#2e86c1", "#6e2c00", "black"])'

  #to ba able to show decimales on the snkey graph
  # see here https://stackoverflow.com/questions/72129768/r-networkd3-issues
  # see also https://stackoverflow.com/questions/74259905/network-d3-sankey-node-value-too-precise-using-htmlwidgets
customJS <- '
  function(el,x) { 
      var link = d3.selectAll(".link");
  
      var format = d3.formatLocale({"decimal": ".", "thousands": ",", "grouping": [3], "currency": ["", "\u00a0€"]}).format(",.1f");
  
      link.select("title").select("body")
          .html(function(d) { return "<pre>" + d.source.name + " \u2192 " + d.target.name +
              "\\n" + format(d.value) + " gP" + "<pre>"; });
              
      d3.select(el).selectAll(".node text")
          .html(function(d) { return d.name + " " + format(d.value) + " gP"; });
              
  }
  '
  
  # Make the sankey
  p <- 
    sankeyNetwork(
      Links = sankey, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
      Value = "value", NodeID = "name", colourScale=my_color,
      fontSize=25, units = "gP", nodePadding = 50,
      sinksRight = T, margin = c(top = 0, right = 0, bottom = 0, left = 0),
      LinkGroup="flow_group", NodeGroup="group"
      )
  
onRender(p, customJS)
}
Code
temp <- sankey_P_cap %>% filter(basin=="Metropolitan France")
f_save_csv_files(
  temp,
  "output_data/sankey_flows/phosphorus/simplified/",
  "sankey_P_France_per_cap.csv"
)

f_sankey(temp)
Code
f_sankey <- function(sankey){
  # nodes names for the sankey 
  nodes <- data.frame(
    name=c(as.character(sankey$source), as.character(sankey$target)) %>% 
      unique())
  
  # With networkD3, connection must be provided using id, not using real name like in the links dataframe. So we need to add it.
  sankey$IDsource <- match(sankey$source, nodes$name)-1
  sankey$IDtarget <- match(sankey$target, nodes$name)-1
  
  # Colors groups (for nodes and flow links)
  nodes$group <- as.factor(c("nodes_group"))
  
    my_color <-
  'd3.scaleOrdinal()
  .domain(["excretion", "large industries", "residual", "landfill, incineration..", "water", "groundwater", "recycled", "air", "nodes_group"])
  .range(["#7d6608", "grey", "grey", "#5e5e5e", "#2e86c1", "#2e86c1", "#6e2c00", "#3ab02a", "black"])'

  #to ba able to show decimales on the snkey graph
  # see here https://stackoverflow.com/questions/72129768/r-networkd3-issues
  # see also https://stackoverflow.com/questions/74259905/network-d3-sankey-node-value-too-precise-using-htmlwidgets
customJS <- '
  function(el,x) { 
      var link = d3.selectAll(".link");
  
      var format = d3.formatLocale({"decimal": ".", "thousands": ",", "grouping": [3], "currency": ["", "\u00a0€"]}).format(",.1f");
  
      link.select("title").select("body")
          .html(function(d) { return "<pre>" + d.source.name + " \u2192 " + d.target.name +
              "\\n" + format(d.value) + "%" + "<pre>"; });
              
      d3.select(el).selectAll(".node text")
          .html(function(d) { return d.name + " " + format(d.value) + "%"; });
              
  }
  '
  
  # Make the sankey
  p <- 
    sankeyNetwork(
      Links = sankey, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
      Value = "value", NodeID = "name", colourScale=my_color,
      fontSize=25, units = "%", nodePadding = 50,
      sinksRight = T, margin = c(top = 0, right = 0, bottom = 0, left = 0),
      LinkGroup="flow_group", NodeGroup="group"
      )
  
onRender(p, customJS)
}
Code
total <- sum(sankey_N_cap %>% filter(basin=="Metropolitan France") %>% pull(value))/2
temp <- sankey_N_cap %>% 
    filter(basin=="Metropolitan France") %>% 
    mutate(value=round(value/total*100))
f_save_csv_files(
  temp, 
  "output_data/sankey_flows/nitrogen/simplified/",
  "sankey_N_France_percent.csv"
)


f_sankey(temp)
Code
f_sankey <- function(sankey){
  # nodes names for the sankey 
  nodes <- data.frame(
    name=c(as.character(sankey$source), as.character(sankey$target)) %>% 
      unique())
  
  # With networkD3, connection must be provided using id, not using real name like in the links dataframe. So we need to add it.
  sankey$IDsource <- match(sankey$source, nodes$name)-1
  sankey$IDtarget <- match(sankey$target, nodes$name)-1
  
  # Colors groups (for nodes and flow links)
  nodes$group <- as.factor(c("nodes_group"))
  
    my_color <-
  'd3.scaleOrdinal()
  .domain(["excretion", "large industries", "residual", "landfill, incineration..", "water", "groundwater", "recycled", "nodes_group"])
  .range(["#7d6608", "grey", "grey", "#5e5e5e", "#2e86c1", "#2e86c1", "#6e2c00", "black"])'

  #to ba able to show decimales on the snkey graph
  # see here https://stackoverflow.com/questions/72129768/r-networkd3-issues
  # see also https://stackoverflow.com/questions/74259905/network-d3-sankey-node-value-too-precise-using-htmlwidgets
customJS <- '
  function(el,x) { 
      var link = d3.selectAll(".link");
  
      var format = d3.formatLocale({"decimal": ".", "thousands": ",", "grouping": [3], "currency": ["", "\u00a0€"]}).format(",.1f");
  
      link.select("title").select("body")
          .html(function(d) { return "<pre>" + d.source.name + " \u2192 " + d.target.name +
              "\\n" + format(d.value) + "%" + "<pre>"; });
              
      d3.select(el).selectAll(".node text")
          .html(function(d) { return d.name + " " + format(d.value) + "%"; });
              
  }
  '
  
  # Make the sankey
  p <- 
    sankeyNetwork(
      Links = sankey, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
      Value = "value", NodeID = "name", colourScale=my_color,
      fontSize=25, units = "%", nodePadding = 50,
      sinksRight = T, margin = c(top = 0, right = 0, bottom = 0, left = 0),
      LinkGroup="flow_group", NodeGroup="group"
      )
  
onRender(p, customJS)
}
Code
total <- sum(sankey_P_cap %>% filter(basin=="Metropolitan France") %>% pull(value))/2
temp <- sankey_P_cap %>% 
    filter(basin=="Metropolitan France") %>%
    mutate(value=round(value/total*100))
f_save_csv_files(
  temp,
  "output_data/sankey_flows/phosphorus/simplified/",
  "sankey_P_France_percent.csv"
)

f_sankey(temp)

Comparison of sludge composition

We use a French review of sludge composition from 2014, link to the study (page 413)

Code
path_source <- "source_data/0_sludge_composition/"
review_sludge_composition <- read_csv(paste0(path_source, "sludge_composition_ESCO_MAFOR.csv")) 
review_sludge_composition$compost <- factor(
  review_sludge_composition$compost,
  c("not composted", "composted")
)
Source <- "Source: French Collective Scientifice Expertise on\nOrganic Fertilizers, 2014"

To assess our sludge composition, we use the sludge production data for each basin, and take the mean over 2015-2020. We then combine this with the N and P flows computed in the Sankey diagram to deduce the sludge compositions at the basin scales.

Code
sludge_production <- read_csv("output_data/basins/basin_sanitation_portal.csv")
sludge_production <- sludge_production %>%
  select(basin, Year, sludge_production) %>%
  filter(Year>2014)

ggplot(sludge_production) +
  geom_area(aes(Year, sludge_production, fill=basin)) +
  facet_wrap(vars(basin), scales="free_y") +
  theme(legend.position = "none")

Code
sludge_production <- sludge_production %>%
  group_by(basin) %>%
  summarise(
    sludge_production = round(mean(sludge_production, na.rm=T), 3)
    )

# P in sludge
temp <- sankey_P %>% 
  filter(target=="sludge") %>%
  select(basin, sludge_P = value) %>%
  left_join(sludge_production) %>%
  mutate(
    P = round(sludge_P/sludge_production)
    )

# N in sludge
temp2 <- sankey_N %>% 
  filter(target=="sludge") %>%
  select(basin, sludge_N = value) %>%
  left_join(sludge_production) %>%
  mutate(
    N = round(sludge_N/sludge_production)
    )

#combine both, computes N:P ratio
temp <- left_join(temp , temp2) %>%
  mutate(
    N_P_ratio = N/P
  ) 



temp <- bind_rows(
  review_sludge_composition %>% 
    filter(compost=="not composted") %>%
    select(Study, P, N, N_P_ratio) %>%
    mutate(source = "French review"),
  temp %>%
    select(Study = basin, P, N, N_P_ratio) %>%
    mutate(source = "This study")
)
Code
#we use rounded values for the graph
rounded_values <- review_sludge_composition %>%
  mutate(
    P = round(P, 0), 
    N= round(N, 0),
    N_P_ratio = round(N_P_ratio, 1)
  )
Code
g_composition <- function(dataset, select_variable){
  #order studies by increasing value
  dataset$Study <- reorder(dataset$Study, dataset %>% pull(!!as.symbol(select_variable)))
  #remove empty values
  dataset <- dataset %>% filter(is.na(!!as.symbol(select_variable))==F)
  
  ggplot(dataset) +
    geom_col(aes(Study, !!as.symbol(select_variable), fill=source)) +
    geom_text(
      aes(Study, !!as.symbol(select_variable), label=!!as.symbol(select_variable)),
      hjust=0, family = "Times New Roman", fontface="italic"
      ) +
    coord_flip() +
    labs(
      x="", y="", 
      fill = "", 
      caption = Source
    ) 
}
Code
g_composition(temp, "P") +
  labs(
    title = "P content in urban sludge",
    subtitle = "g of P per kg of Dry Matter"
  ) +
  ylim(0, 40)

Code
ggplot(temp) +
  geom_jitter(aes("", P, fill=source), shape=21, width = .1) +
  ylim(0, 40) +
  facet_wrap(vars(source)) +
  theme(legend.position = "none") +
  labs(
    x="", y="gP / kg DM",
    title = "P content in urban sludge"
  )

Code
g_composition(temp, "N") +
  labs(
    title = "Review of N content in urban sludge",
    subtitle = "g of N per kg of Dry Matter"
  ) +
  ylim(0, 60)

Code
ggplot(temp) +
  geom_jitter(aes("", N, fill=source), shape=21, width=.1) +
  ylim(0, 50) +
  facet_wrap(vars(source)) +
  theme(legend.position = "none") +
  labs(
    x="", y="gN / kg DM",
    title = "N content in urban sludge"
  )

Code
rm(list = ls())