This page describes the data used to estimate nitrogend and phosphorus excretions by French people.
Code
knitr::opts_chunk$set(warning=F, message=F, results=F, dev='svg')library(tidyverse) #loads multiple packages (see https://tidyverse.tidyverse.org/)#core tidyverse packages loaded:# ggplot2, for data visualisation. https://ggplot2.tidyverse.org/# dplyr, for data manipulation. https://dplyr.tidyverse.org/# tidyr, for data tidying. https://tidyr.tidyverse.org/# readr, for data import. https://readr.tidyverse.org/# purrr, for functional programming. https://purrr.tidyverse.org/# tibble, for tibbles, a modern re-imagining of data frames. https://tibble.tidyverse.org/# stringr, for strings. https://stringr.tidyverse.org/# forcats, for factors. https://forcats.tidyverse.org/# lubridate, for date/times. https://lubridate.tidyverse.org/#also loads the following packages (less frequently used):# Working with specific types of vectors:# hms, for times. https://hms.tidyverse.org/# Importing other types of data:# feather, for sharing with Python and other languages. https://github.com/wesm/feather# haven, for SPSS, SAS and Stata files. https://haven.tidyverse.org/# httr, for web apis. https://httr.r-lib.org/# jsonlite for JSON. https://arxiv.org/abs/1403.2805# readxl, for .xls and .xlsx files. https://readxl.tidyverse.org/# rvest, for web scraping. https://rvest.tidyverse.org/# xml2, for XML. https://xml2.r-lib.org/# Modelling# modelr, for modelling within a pipeline. https://modelr.tidyverse.org/# broom, for turning models into tidy data. https://broom.tidymodels.org/# Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors#loading additional relevant packageslibrary(cowplot) #for plot_grid(), multiple plotslibrary(DT) #for interactive tableslibrary(ggtext)library(viridis)#path for data path_output <-"output_data/nutrient_ingestion_excretion/"path_source <-"source_data/0_nutrient_excretion/0_INCA3/"#setting graphs themetheme_set(theme_minimal() +theme(plot.title =element_text(face="bold")) )#setting viridis theme for colorsscale_colour_continuous <- scale_colour_viridis_cscale_colour_discrete <- scale_colour_viridis_dscale_colour_binned <- scale_colour_viridis_b#setting viridis theme for fillscale_fill_continuous <- scale_fill_viridis_cscale_fill_discrete <- scale_fill_viridis_dscale_fill_binned <- scale_fill_viridis_bSource <-"Source: INCA3\nComputation Thomas Starck"# Load the function filesource("functions.R")
There is a R package reporting in details the data of this INCA3 survey (see here and here)
Code
#normally the INCA3 package should be installed in renv/library/R-4.3/aarch64-apple-darwin20/inca3#if this is not the case, you can uncomment the following line to install package from the internet#remotes::install_github("thinkr-open/inca3", build_vignettes = FALSE)library(inca3) #package inca with data on food ingestion
Here are the data files available in the package (in user guide document) :
used for our analysis
DESCRIPTION_INDIV : used for age and sex of each individual. Also includes data on weight, height, BMI (not used)…
APPORTS_NUT_ALIM : used, reports synthetized daily nutrients intakes for each individuals (about 4000).
not used
HABITUDES_INDIV, HABITUDES_MEN, ACTPHYS_SEDENT, FPQ : not used, describes individuals and households habits.
CONSO_CA_PROD, CONSO_CA_INDIV : not used, tables about food supplements consumption.
OCCASIONS : not used (day of consumption, location…).
CONSO_COMPO_ALIM : very complete (250k obs, each individual, each aliment), not used because we do not need this level of precision.
NOMENCLATURE : possibly to convert in different food groups, could be a key table
GloboDiet_gpe_name ?
CONSO_GPE_INCA3 : no used, food group names
We load a file we produced ourselves, to translate nutrients as reported by INCA3 data (nutriment1, nutriment2…) into plain language (see in the user guide p78).
Code
#my files for foodstuff group names and nutrient keysnutriments_contrib_INCA3 <-read_csv(paste(path_source, "nutriments_contrib_INCA3.csv", sep=""))
Then we load the file reporting, for each individual, the nutrient intakes per day (40 000 observations). We only keep a few of them (K, P, protein, energy, carbohydrates, lipids)
Code
#quick glimpse at apports_nut_alim: nutriments for each individualglimpse(apports_nut_alim)#report individual ID, nutriments 1-54 (quantity/j), contribution 3-45 (%AESA, or for 100 kcal nutriment), 4114 individuals#load data of interest from apports_nut_alimfile_apports_nut_alim <- apports_nut_alim %>%# we only focus on individuals ID and nutrients: remove POPULATION columnselect(-POPULATION) %>%#gather nutriments in 1 columngather(key = nutriment_contrib_nb, value = value_nutriment, nutriment1:contrib54 ) %>%#we join the nutriment column with our simplified nutrient namesleft_join( nutriments_contrib_INCA3, by="nutriment_contrib_nb" ) %>%#only select data on energy, carbohydrates, lipids, proteins, phosphorus, and potassiumfilter( nutriment_contrib_name %in%c("Energie-Apport énergétique total (AET) (kcal/j)", "Energie-Apport énergétique sans alcool (AESA) (kcal/j)","Protéines (g/j)", "Glucides (g/j)", "Lipides (g/j)", "Phosphore (mg/j)", "Potassium (mg/j)", "Contribution des Protéines à l'AESA (%)", "Contribution des Glucides à l'AESA (%)", "Contribution des Lipides à l'AESA (%)") )#simplified file easier to handle: spread nutrients into columnsfile_apports_nut_alim_simple <- file_apports_nut_alim %>%select(ID = NOIND, nutriment_contrib_name_simple, value_nutriment ) %>%spread( nutriment_contrib_name_simple, value_nutriment ) %>%#convert P and K intakes into g per day mutate(phosphorus = phosphorus/1000,potassium = potassium/1000 )
We load the file describing each individual in the survey. We modify it, to create age categories (more or less precise) to later merge it with the data we have about French population in each city, by age class.
Code
#load file describing individualsfile_individuals <- description_indiv %>%#select and rename variables of interestselect(ID = NOIND, sex = sex_PS, age_class = tage_PS, height = taille, weight = poids, BMI = imc ) %>%#create fine and large age categoriesmutate(age_class_large =case_when( age_class %in%c(1,2,3,4)~"child (<10 yr)", age_class %in%c(5,6)~"adolescent (11-18 yr)", age_class %in%c(7,8,9)~"adult (>18 yr)"),age_class =case_when( age_class ==1~"<1", age_class ==2~"1-3", age_class ==3~"4-6", age_class ==4~"7-10", age_class ==5~"11-14", age_class ==6~"15-17", age_class ==7~"18-44", age_class ==8~"45-64", age_class ==9~"65-79") ) %>%#male or female made more explicitmutate(sex =case_when( sex ==1~"M", sex ==2~"F") )#transform ages intervals into factorsfile_individuals$age_class <-factor(file_individuals$age_class, levels =c("<1", "1-3", "4-6", "7-10", "11-14", "15-17", "18-44", "45-64", "65-79"))#transform large age categories into factorsfile_individuals$age_class_large <-factor( file_individuals$age_class_large, levels =c("child (<10 yr)", "adolescent (11-18 yr)", "adult (>18 yr)" ) )#create and age median variable from ages intervals, used for our plots of intakes vs agefile_individuals <- file_individuals %>%mutate(age_median =case_when( age_class =="<1"~1, age_class =="1-3"~2, age_class =="4-6"~5, age_class =="7-10"~8.5, age_class =="11-14"~12.5, age_class =="15-17"~16, age_class =="18-44"~31, age_class =="45-64"~54.5, age_class =="65-79"~72) )
We gather the nutrient intakes file and the individual description file, based on individuals IDs.
Code
#joining tablesID_intakes_day <-inner_join(file_individuals, file_apports_nut_alim_simple, by ="ID") %>%rename(energy = AET)
To convert proteins intake to nitrogen, we use Jone’s factor of 6.25.
Code
#jone's factorN_factor <-6.25#transform protein intakes to N intakesID_intakes_day <- ID_intakes_day %>%mutate(nitrogen = proteins/N_factor)
Nutrient intakes by age and sex
Here are presented micro and macronutrients intakes by age and sex.
Code
# temporary file summarizing nutrients intakes by (median) age and sextemp <-# gather together the 2 following filesbind_rows(# summarize data for both sexes together, precise age categories ID_intakes_day %>%select( sex, age_class, age_median, energy, proteins, lipids, carbohydrates, nitrogen, phosphorus, potassium ) %>%mutate(sex="both" ) %>%group_by( sex, age_class, age_median ) %>%summarise_all( mean, na.rm=T ),# summarize data with discriminated sex (male/ffemale) and precise age categories ID_intakes_day %>%select( sex, age_class, age_median, energy, proteins, lipids, carbohydrates, nitrogen, phosphorus, potassium ) %>%group_by( sex, age_class, age_median ) %>%summarise_all( mean, na.rm=T ) ) %>%select(#reordering columns of interest sex, age_class, age_median, energy, carbohydrates, lipids, proteins, nitrogen, phosphorus, potassium ) %>%#3 significant digitsmutate(across(c( energy, carbohydrates, lipids, proteins, nitrogen, phosphorus, potassium ), ~signif(.x, 3) ) )
Code
#function to plot evolution of nutrient intakes with agef_graph <-function(dataset, nutrient_select, nutrient_label){ g <-ggplot(dataset) +geom_line(aes(age_median, !!as.symbol(nutrient_select), color=sex) ) +scale_y_continuous(limits =c(0, NA),#second axis, intakes by yearsec.axis =sec_axis(trans=~.*(365/1000), name="kg/year" ) ) +theme(legend.position =c(.8, .2) ) +labs(title =paste(nutrient_label, "intakes by age and sex"), x="Age", y="g/day",caption = Source,color="sex" )return(g)}
We do not yet give a mean value for whole France, because an unweighed mean would not consider the age distribution of the population and would give excessive weight to young population (which are less numerous). This issues is addressed in the next section Nutrient excretion in France, where you can see in the France tab the distribution of excretions by age categories. Our weighted national value can be seen in Comparison with litterature review
We prepare the population data by cities and 5-year age categories, and transform it into age categories compatible with the INCA3 survey. The source of this data is INSEE, here.
Code
#path of population by city and by age filepath_source <-"source_data/population/"#load filepopulation_age_commune <- readxl::read_excel(paste0(path_source, "pop-sexe-age-quinquennal6818.xls"), sheet ="COM_2018",skip=13 )#give explicit sex and age wording categoriespopulation_age_commune <- population_age_commune %>%select(INSEE_REG = RR,INSEE_DEP = DR,INSEE_COM = CR,NOM= LIBELLE,#5-years age categories and sexage_0_4_sex_H = ageq_rec01s1rpop2018,age_0_4_sex_F = ageq_rec01s2rpop2018,age_5_9_sex_H = ageq_rec02s1rpop2018,age_5_9_sex_F = ageq_rec02s2rpop2018,age_10_14_sex_H = ageq_rec03s1rpop2018,age_10_14_sex_F = ageq_rec03s2rpop2018,age_15_19_sex_H = ageq_rec04s1rpop2018,age_15_19_sex_F = ageq_rec04s2rpop2018,age_20_24_sex_H = ageq_rec05s1rpop2018,age_20_24_sex_F = ageq_rec05s2rpop2018,age_25_29_sex_H = ageq_rec06s1rpop2018,age_25_29_sex_F = ageq_rec06s2rpop2018,age_30_34_sex_H = ageq_rec07s1rpop2018,age_30_34_sex_F = ageq_rec07s2rpop2018,age_35_39_sex_H = ageq_rec08s1rpop2018,age_35_39_sex_F = ageq_rec08s2rpop2018,age_40_44_sex_H = ageq_rec09s1rpop2018,age_40_44_sex_F = ageq_rec09s2rpop2018,age_45_49_sex_H = ageq_rec10s1rpop2018,age_45_49_sex_F = ageq_rec10s2rpop2018,age_50_54_sex_H = ageq_rec11s1rpop2018,age_50_54_sex_F = ageq_rec11s2rpop2018,age_55_59_sex_H = ageq_rec12s1rpop2018,age_55_59_sex_F = ageq_rec12s2rpop2018,age_60_64_sex_H = ageq_rec13s1rpop2018,age_60_64_sex_F = ageq_rec13s2rpop2018,age_65_69_sex_H = ageq_rec14s1rpop2018,age_65_69_sex_F = ageq_rec14s2rpop2018,age_70_74_sex_H = ageq_rec15s1rpop2018,age_70_74_sex_F = ageq_rec15s2rpop2018,age_75_79_sex_H = ageq_rec16s1rpop2018,age_75_79_sex_F = ageq_rec16s2rpop2018,age_80_84_sex_H = ageq_rec17s1rpop2018,age_80_84_sex_F = ageq_rec17s2rpop2018,age_85_89_sex_H = ageq_rec18s1rpop2018,age_85_89_sex_F = ageq_rec18s2rpop2018,age_90_94_sex_H = ageq_rec19s1rpop2018,age_90_94_sex_F = ageq_rec19s2rpop2018,age_95_sex_H = ageq_rec20s1rpop2018,age_95_sex_F = ageq_rec20s2rpop2018)#grouping in similar age and sex categories as INCA3 denominationpopulation_age_commune <- population_age_commune %>%mutate(INSEE_COM =paste0(INSEE_DEP, INSEE_COM),age_0_9_H = age_0_4_sex_H + age_5_9_sex_H,age_0_9_F = age_0_4_sex_F + age_5_9_sex_F,age_10_18_H = age_10_14_sex_H + age_15_19_sex_H,age_10_18_F = age_10_14_sex_F + age_15_19_sex_F,age_19_44_H = age_20_24_sex_H + age_25_29_sex_H + age_30_34_sex_H + age_35_39_sex_H + age_40_44_sex_H,age_19_44_F = age_20_24_sex_F + age_25_29_sex_F + age_30_34_sex_F + age_35_39_sex_F + age_40_44_sex_F,age_45_64_H = age_45_49_sex_H + age_50_54_sex_H + age_55_59_sex_H + age_60_64_sex_H,age_45_64_F = age_45_49_sex_F + age_50_54_sex_F + age_55_59_sex_F + age_60_64_sex_F,age_65_H = age_65_69_sex_H + age_70_74_sex_H + age_75_79_sex_H + age_80_84_sex_H + age_85_89_sex_H + age_90_94_sex_H + age_95_sex_H,age_65_F = age_65_69_sex_F + age_70_74_sex_F + age_75_79_sex_F + age_80_84_sex_F + age_85_89_sex_F + age_90_94_sex_F + age_95_sex_F, ) %>%select( INSEE_REG, INSEE_DEP, INSEE_COM, NOM, age_0_9_H, age_0_9_F, age_10_18_H, age_10_18_F, age_19_44_H, age_19_44_F, age_45_64_H, age_45_64_F, age_65_H, age_65_F )#gathering age and sexpopulation_age_commune <- population_age_commune %>%gather("age_sex", "capita" , age_0_9_H:age_65_F)population_age_commune <- population_age_commune %>%mutate(sex =case_when(grepl("H", age_sex, fixed = T) ~"M", T ~"F" ),age_class =substring(age_sex, 1, nchar(age_sex)-2) ) %>%select(-age_sex)population_age_commune <- population_age_commune %>%mutate(age_class =case_when( age_class =="age_0_9"~"< 10", age_class =="age_10_18"~"10 - 18", age_class =="age_19_44"~"18 - 45", age_class =="age_45_64"~"45 - 65", age_class =="age_65"~"> 65" ) )population_age_commune$age_class <-factor( population_age_commune$age_class, levels =c("> 65","45 - 65","18 - 45","10 - 18","< 10" ) )#have to merge Paris arrondissements in 1 unique commune#first we merge the arrondissements in one temporary file. 75056 is the code for Paris in the map filetemp <- population_age_commune %>%filter( INSEE_COM %in%c("75101", "75102", "75103", "75104", "75105", "75106", "75107", "75108", "75109", "75110","75111", "75112", "75113", "75114", "75115", "75116", "75117", "75118", "75119", "75120")) %>%mutate(INSEE_COM ="75056", NOM ="Paris") %>%group_by(INSEE_REG, INSEE_DEP, INSEE_COM, NOM, age_class, sex) %>%summarise(capita =sum(capita, na.rm = T))#then we remove arrondissements from the main filepopulation_age_commune <- population_age_commune %>%filter(!INSEE_COM %in%c("75101", "75102", "75103", "75104", "75105", "75106", "75107", "75108", "75109", "75110","75111", "75112", "75113", "75114", "75115", "75116", "75117", "75118", "75119", "75120"))#finally we merge the two filespopulation_age_commune <-bind_rows(population_age_commune, temp)#we remove overseas territoriespopulation_age_commune <- population_age_commune %>%filter(!INSEE_DEP %in%c("971", "972", "973", "974"))#creating file summarized at departmental, regional and national scale#departmentpopulation_age_department <- population_age_commune %>%select(INSEE_DEP, age_class, sex, capita) %>%group_by(INSEE_DEP, age_class, sex) %>%summarise(capita =sum(capita, na.rm = T))#regionpopulation_age_region <- population_age_commune %>%select(INSEE_REG, age_class, sex, capita) %>%group_by(INSEE_REG, age_class, sex) %>%summarise(capita =sum(capita, na.rm = T))#francepopulation_age_france <- population_age_commune %>%select(age_class, sex, capita) %>%group_by(age_class, sex) %>%summarise(capita =sum(capita, na.rm = T))
We also want the nutrient excetions by water agencies basins, which is an administrative category not present in our population dataset. We join the cities with our key file linking each cities to water agencies basins.
Code
#for basin we use our key file city / basinpath_source <-"source_data/maps/towns_basins/"#load key file attributing each city to a basinkey_cities_basins <-bind_rows(read_csv(paste0(path_source, "ADOUR-GARONNE.csv")) %>%mutate(basin ="Adour-Garonne"),read_csv(paste0(path_source, "ARTOIS-PICARDIE.csv")) %>%mutate(basin ="Artois-Picardie"),read_csv(paste0(path_source, "CORSE.csv")) %>%mutate(basin ="Rhône-Méditerranée"),read_csv(paste0(path_source, "LOIRE-BRETAGNE.csv")) %>%mutate(basin ="Loire-Bretagne"),read_csv(paste0(path_source, "RHIN-MEUSE.csv")) %>%mutate(basin ="Rhin-Meuse"),read_csv(paste0(path_source, "RHONE-MEDITERRANEE.csv")) %>%mutate(basin ="Rhône-Méditerranée"),read_csv(paste0(path_source, "SEINE-NORMANDIE.csv")) %>%mutate(basin ="Seine-Normandie"))key_cities_basins <- key_cities_basins %>%select(basin, INSEE_COM = admin4)#join population commune data with basin keypopulation_age_commune <-left_join(population_age_commune, key_cities_basins, by="INSEE_COM")#unmatched citiestest <- population_age_commune %>%filter(is.na(basin))sum(test$capita/10^6, na.rm = T) #unmatched population represent 0.6 million individualssum(population_age_commune$capita/10^6, na.rm = T) #over 64.8 million in Metropolesum(test$capita/10^6, na.rm = T)/sum(population_age_commune$capita/10^6, na.rm = T)*100#so 0.1% lostrm(test)#we remove unmatched citiespopulation_age_commune <- population_age_commune %>%filter(is.na(basin)==F)#summarize the sex/age categories by water agency basinpopulation_age_basins <- population_age_commune %>%select(basin, age_class, sex, capita) %>%group_by(basin, age_class, sex) %>%summarise(capita =sum(capita, na.rm = T))
We load the excretion data by age, and combine it with population age/sex data to have the quantities excreted in France, by basin, by region, by department and by commune.
Code
path_source <-"output_data/nutrient_ingestion_excretion/"#creating the excretion file for 5 age categorieschildren_excretion <-read_csv(paste(path_source, "nutrients_age_large_sex_kg_per_year.csv", sep="")) %>%select( sex, age_class = age_class_large, N_per_cap = nitrogen, P_per_cap = phosphorus, K_per_cap = potassium) %>%filter( sex !="both", age_class !="adult (>18 yr)") %>%mutate(#giving similar age name as population fileage_class =case_when( age_class =="child (<10 yr)"~"< 10", age_class =="adolescent (11-18 yr)"~"10 - 18"))adult_excretion <-read_csv(paste(path_source, "nutrients_age_detailed_sex_kg_per_year.csv", sep="")) %>%select(sex, age_class, N_per_cap = nitrogen, P_per_cap = phosphorus, K_per_cap = potassium) %>%filter( sex !="both", age_class %in%c("18-44", "45-64", "65-79") ) %>%mutate(#giving similar age name as population fileage_class =case_when( age_class =="18-44"~"18 - 45", age_class =="45-64"~"45 - 65", age_class =="65-79"~"> 65") )all_excretion <-bind_rows(children_excretion, adult_excretion) all_excretion$age_class <-factor( all_excretion$age_class, levels =c("> 65","45 - 65","18 - 45","10 - 18","< 10" ) )#adding excretions to the populations filesexcretion_commune <-left_join(population_age_commune, all_excretion, by =c("sex", "age_class")) %>%mutate(N_excretion =signif(capita*N_per_cap, 3),P_excretion =signif(capita*P_per_cap, 3),K_excretion =signif(capita*K_per_cap, 3) )excretion_department <-left_join(population_age_department, all_excretion, by =c("sex", "age_class")) %>%mutate(N_excretion =signif(capita*N_per_cap, 3),P_excretion =signif(capita*P_per_cap, 3),K_excretion =signif(capita*K_per_cap, 3) )#non existing region numbers : does not change anything, probably overseas territoriesexcretion_region <-left_join(population_age_region, all_excretion, by =c("sex", "age_class")) %>%mutate(N_excretion =signif(capita*N_per_cap, 3),P_excretion =signif(capita*P_per_cap, 3),K_excretion =signif(capita*K_per_cap, 3) )excretion_basins <-left_join(population_age_basins, all_excretion, by =c("sex", "age_class")) %>%mutate(N_excretion =signif(capita*N_per_cap, 3),P_excretion =signif(capita*P_per_cap, 3),K_excretion =signif(capita*K_per_cap, 3) )excretion_france <-left_join(population_age_france, all_excretion, by =c("sex", "age_class")) %>%mutate(N_excretion =signif(capita*N_per_cap/10^6, 3),P_excretion =signif(capita*P_per_cap/10^6, 3),K_excretion =signif(capita*K_per_cap/10^6, 3) )#remove temporary excretion files and key cities / basinsrm(adult_excretion, children_excretion, all_excretion, key_cities_basins, temp)#remove incomplete population filesrm(population_age_basins, population_age_commune, population_age_department, population_age_region, population_age_france)
We compare the basin population we estimated by attributing eahc city to a basin with values reported by the different water agencies. There is a rather good matching. We apply a correction coefficient to our computed excretions based on our estimation of population values to match the population figures reported by the water agencies.
Code
#reported population by basintemp2 <-read_csv("source_data/pop_sewage_connection/pop_basins_sewage_connection.csv") %>%filter(basin!="Metropolitan France")#comparison between the 2 populations valuesggplot(temp) +geom_col(aes(basin, capita, fill = basin), alpha=.8) +geom_point(data=temp2, aes(basin, pop) ) +theme(axis.text.x =element_text(angle =20, vjust =0.5, hjust=1),legend.position ="none" ) +labs(x="", y="",title ="Basins population (million)",subtitle ="bar: our estimation; point: reported by water agencies",caption = Source )
temp2 <- temp %>%gather( variable_study, value, N_excretion, P_excretion, K_excretion, capita ) %>%mutate(variable_study =case_when( variable_study =="N_excretion"~"N excretion\nktN per year", variable_study =="P_excretion"~"P excretion\nktP per year", variable_study =="K_excretion"~"K excretion\nktK per year", variable_study =="capita"~"population\nmillion" ) )temp2$basin <-factor( temp2$basin,levels =c("Seine-Normandie","Rhône-Méditerranée","Loire-Bretagne","Adour-Garonne","Artois-Picardie","Rhin-Meuse" ) )ggplot(temp2) +geom_col(aes(basin, value, fill = basin), alpha=.8) +facet_wrap(vars(variable_study), scales ="free_y") +theme(axis.text.x =element_text(angle =45, vjust =0.5, hjust=1),legend.position ="none" ) +labs(x="", y="",title ="N, P and K excretions by basin (ktons)",subtitle ="for population, the unit is million inhabitants",caption = Source )
We combine our excretion figures by basin with the share of population connected to sewers of to individual autonomous systems.
Code
#population connected to sewerstemp2 <-read_csv("source_data/pop_sewage_connection/pop_basins_sewage_connection.csv") %>%filter(basin!="Metropolitan France")temp <- temp %>%gather( variable_study, value, N_excretion, P_excretion, K_excretion, capita ) %>%mutate(variable_study =case_when( variable_study =="N_excretion"~"N excretion\nktN per year", variable_study =="P_excretion"~"P excretion\nktP per year", variable_study =="K_excretion"~"K excretion\nktK per year", variable_study =="capita"~"population\nmillion" ) )temp <-left_join( temp, temp2 %>%select(basin, percent_sewage), by="basin")temp <- temp %>%mutate(to_sewers = value*percent_sewage,to_IAS = value*(1-percent_sewage) ) %>%select(-value) %>%gather( destination, value, to_sewers, to_IAS ) %>%mutate(destination =case_when( destination=="to_sewers"~"to sewers", destination=="to_IAS"~"to individual\nautonomous\nsystem" ) )temp$basin <-factor( temp$basin,levels =c("Seine-Normandie","Rhône-Méditerranée","Loire-Bretagne","Adour-Garonne","Artois-Picardie","Rhin-Meuse" ) )ggplot(temp) +geom_col(aes(basin, value, fill = destination), alpha=.8, position =position_dodge()) +facet_wrap(vars(variable_study), scales ="free_y") +theme(axis.text.x =element_text(angle =45, hjust=1), ) +labs(x="", y="",title ="N, P and K excretions by basin (ktons)",subtitle ="for population, the unit is million inhabitants",caption = Source,fill="excretion desination" )
Comparison with litterature review
Code
#Caption of our graphsSource <-"Source: INCA3 and litterature review by Tanguy Fardet\ncomputation by Thomas Starck"#path of the literature reviewpath_source <-"source_data/0_nutrient_excretion/"#load literature review filereview_excretion <-read_csv(paste0(path_source, "n_p_fractions.csv"))#change column names into more explicitreview_excretion <- review_excretion %>%select(#total P and N excretions urine + feces (absolute values) and % in urine and fecesP_excr =`total P excretion (kg/y)`, perc_P_urine =`% urine P`, perc_P_feces =`% feces P`, N_excr =`total N excretion (kg/y)`, perc_N_urine =`% urine N`, perc_N_feces =`% feces N`, N_P_ratio =`N:P ratio`, year, country, authors ) %>%mutate(#study: name of authors, year, countrystudy =paste0(authors, ", ", year, ", ", country),#P and N in urine and feces in absolute values, 2 significant digitsP_urine =round(P_excr*perc_P_urine/100, 2),P_feces =round(P_excr*perc_P_feces/100, 2),N_urine =round(N_excr*perc_N_urine/100, 2),N_feces =round(N_excr*perc_N_feces/100, 2) ) %>%select(-c(year, country, authors)) #function for graph literature review N and P excretionsg_excr <-function(dataset, label_nutrient, y_lim, our_value, label_unit){#mean total excretion mean_excr <-round(mean(dataset %>%pull(excr), na.rm=T), 2)#order studies by increasing value dataset$study <-reorder(dataset$study, dataset %>%pull(value_urine_feces))#remove empty values dataset <- dataset %>%filter(is.na(value_urine_feces)==F)#colors of urine and feces colors_fill <-scale_fill_manual(limits =c("urine", "feces"),values=c("#e3dc00", "#591000") )plot_grid(#plot of the review valuesggplot(dataset) +#urine and feces valuesgeom_col(aes(study, value_urine_feces, fill=urine_feces) ) +#urine + feces value (black contour)geom_col(aes(study, excr),color="black", fill="transparent" ) +#label for urine+feces valuegeom_text(aes(study, excr, label=excr),hjust=-0.1, family ="Times New Roman", fontface="italic" ) + colors_fill +#colorsylim(c(0, y_lim)) +coord_flip() +theme(legend.position ="none",axis.text.x =element_blank(),plot.subtitle = ggtext::element_markdown() #for colored title ) +labs(x="", y="", title =paste("Review of", label_nutrient, "excretion per capita"),subtitle ="in<span style='color:#e3dc00;'>urine,</span><span style='color:#591000;'>feces,</span>and urine+feces", ),#plot of our values (INCA3)ggplot(data_frame(study ="This study (based on INCA3)", value = our_value)) +geom_col(aes(study, value), fill="black" ) +geom_label(aes(study, value, label=value),hjust=-0.2, family ="Times New Roman", fontface="bold" ) +coord_flip() +ylim(c(0, y_lim)) +labs(x="", y=label_unit, fill ="", caption = Source ),ncol=1, rel_heights =c(0.8, 0.2), align="v" )}