# GLOBAL VARIABLE
ITERS=5
OTU_OTU_PVALUE = 0.05
OTU_OTU_RVALUE = 0.5
OTU_PHENOTYPE_PVALUE = 1
###### Load the data

otu_data_fungi <- read.mothur.shared("data/Fungi1415_trim.contigs.trim.unique.precluster.pick.pick.subsample.nn.unique_list.shared")
rownames(otu_data_fungi) <- paste0("F", rownames(otu_data_fungi))

# upload meta data file
bigmetadata <- read.csv("data/block_diversity_selected_tunnel_ww.csv", header = T, row.names = 1, stringsAsFactors = FALSE)
rownames(bigmetadata) <- paste0("F", rownames(bigmetadata))

### select otu_data based on metadata
otu_data <- otu_data_fungi[rownames(bigmetadata), ]
dim(otu_data)
## [1]   160 16151
#rownames(otu_data)


otu_data = na.omit(otu_data)

dim(otu_data)
## [1]   160 16151
# now select metadata based on od
meta_data <- bigmetadata[rownames(otu_data), , drop = FALSE]

all.equal(row.names(meta_data), row.names(otu_data))
## [1] TRUE
# upload taxanomy file
tax_fungi <- read.mothur.taxonomy("data/Fungi1415_trim.contigs.trim.unique.precluster.pick.pick.subsample.nn.unique_list.0.03.cons.taxonomy")



# checking if taxonomy file and count files have same otus
all.equal(colnames(otu_data), row.names(tax_fungi))
## [1] TRUE
#
########## read in phenome data/ Yield data
# read in data
pheno_data <- read.csv("data/tomato_yield.csv", header = TRUE, stringsAsFactors = FALSE, row.names = 1)
rownames(pheno_data) <- paste0("F", rownames(pheno_data))

hist(pheno_data$Marketable)

pheno_data_mean <- aggregate(pheno_data$Marketable, by=list(Rootstock=pheno_data$Rootstock), FUN=mean)


pheno_data_sel <- pheno_data %>%
  select(Rootstock, Compartment, Marketable, Study_site, Year, Sample_name)

meta_withpheo = inner_join(meta_data, pheno_data_sel)
## Joining, by = c("Rootstock", "Compartment", "Study_site", "Year", "Sample_name")
rownames(meta_withpheo)<- rownames(meta_data)

# create a phyloseq object~ can combine count, metadata, taxonomy, and phylogentic tree.
tax.mat <- tax_table(as.matrix(tax_fungi))
otu.mat = otu_table(t(otu_data), taxa_are_rows = TRUE)
sample.mat <- sample_data(meta_withpheo)
physeq = phyloseq(otu.mat, tax.mat, sample.mat)


## Assign color to the taxa on the whole phyloseq object so that the same color is assigned for a taxon across treatments
physeq = taxacolor(phyobj = physeq, coloredby = "Phylum")

physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 16151 taxa and 160 samples ]
## sample_data() Sample Data:       [ 160 samples by 10 sample variables ]
## tax_table()   Taxonomy Table:    [ 16151 taxa by 9 taxonomic ranks ]

At this point we have a phyloseq object for all the treatments. Now, we will process the phyloseq object the split it by treatment combinations.

#### All the combinations of treatment factor

region <- unique(as.character(data.frame(sample_data(physeq))$Compartment))
region
## [1] "Rhizosphere" "Endosphere"
treatment <- unique(as.character(data.frame(sample_data(physeq))$Rootstock))
treatment
## [1] "Selfgraft"  "RST-04-106" "Nongraft"   "Maxifort"

Getting phyloseq object for endosphere

# Endosphere 
ng_endo <- subset_samples(physeq, c(Compartment =="Endosphere" & Rootstock=="Nongraft")) %>%
  prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
ng_endo_sparcc = otu_table(ng_endo)
write.table(data.frame(OTU_id = rownames(ng_endo_sparcc), ng_endo_sparcc), file = "data/sparcc_data/ng_endo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)
  


sg_endo <- subset_samples(physeq, c(Compartment =="Endosphere" & Rootstock=="Selfgraft")) %>%
    prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
sg_endo_sparcc = otu_table(sg_endo)
write.table(data.frame(OTU_id = rownames(sg_endo_sparcc), sg_endo_sparcc), file = "data/sparcc_data/sg_endo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)
  


rst_endo <- subset_samples(physeq, c(Compartment =="Endosphere" & Rootstock=="RST-04-106")) %>%
    prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
rst_endo_sparcc = otu_table(rst_endo)
write.table(data.frame(OTU_id = rownames(rst_endo_sparcc), rst_endo_sparcc), file = "data/sparcc_data/rst_endo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)
  


maxi_endo <- subset_samples(physeq, c(Compartment =="Endosphere" & Rootstock=="Maxifort")) %>%
    prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
maxi_endo_sparcc = otu_table(maxi_endo)
write.table(data.frame(OTU_id = rownames(maxi_endo_sparcc), maxi_endo_sparcc), file = "data/sparcc_data/maxi_endo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)

Getting phyloseq object for rhizosphere

# Rhizosphere

ng_rhizo <- subset_samples(physeq, c(Compartment =="Rhizosphere" & Rootstock=="Nongraft")) %>%
    prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
ng_rhizo_sparcc = otu_table(ng_rhizo)
write.table(data.frame(OTU_id = rownames(ng_rhizo_sparcc), ng_rhizo_sparcc), file = "data/sparcc_data/ng_rhizo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)
  

  
sg_rhizo <- subset_samples(physeq, c(Compartment =="Rhizosphere" & Rootstock=="Selfgraft")) %>%
    prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
sg_rhizo_sparcc = otu_table(sg_rhizo)
write.table(data.frame(OTU_id = rownames(sg_rhizo_sparcc), sg_rhizo_sparcc), file = "data/sparcc_data/sg_rhizo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)
  

  
rst_rhizo <- subset_samples(physeq, c(Compartment =="Rhizosphere" & Rootstock=="RST-04-106"))%>%
  prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
rst_rhizo_sparcc = otu_table(rst_rhizo)
write.table(data.frame(OTU_id = rownames(rst_rhizo_sparcc), rst_rhizo_sparcc), file = "data/sparcc_data/rst_rhizo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)
  


maxi_rhizo <- subset_samples(physeq, c(Compartment =="Rhizosphere" & Rootstock=="Maxifort")) %>%
  prune_taxa(taxa_sums(.) > 2, .)
# Inputfile for running SparCC
maxi_rhizo_sparcc = otu_table(maxi_rhizo)
write.table(data.frame(OTU_id = rownames(maxi_rhizo_sparcc), maxi_rhizo_sparcc), file = "data/sparcc_data/maxi_rhizo_sparcc.txt", sep = "\t", row.names = FALSE, quote = FALSE)

Read in output from sparcc, then run SparCC.

Nongraft and Endosphere

ng_endo_sparcc.cor <- read.delim("data/sparcc_output/Endosphere_Nongraft_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
ng_endo_sparcc.pval <- read.delim("data/sparcc_output/Endosphere_Nongraft_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_ng_endo <- PhONA(physeqobj = ng_endo, cordata = ng_endo_sparcc.cor,
      pdata = ng_endo_sparcc.pval, model = "lasso",
      iters = ITERS, defineTreatment = "Nongraft",nodesize = 5,
      PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "Nongraft")
## Total number of iterations used: 5

Selfgraft and Endosphere

sg_endo_sparcc.cor <- read.delim("data/sparcc_output/Endosphere_Selfgraft_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
sg_endo_sparcc.pval <- read.delim("data/sparcc_output/Endosphere_Selfgraft_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_sg_endo <-PhONA(physeqobj = sg_endo, cordata = sg_endo_sparcc.cor,
      pdata = sg_endo_sparcc.pval, model = "lasso",
      iters = ITERS, defineTreatment = "Selfgraft",nodesize = 5,
      PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "Selfgraft")
## Total number of iterations used: 5

RST-04-106 and Endosphere

rst_endo_sparcc.cor <- read.delim("data/sparcc_output/Endosphere_RST-04-106_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
rst_endo_sparcc.pval <- read.delim("data/sparcc_output/Endosphere_RST-04-106_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_rst_endo <-PhONA(physeqobj = rst_endo, cordata = rst_endo_sparcc.cor,
      pdata = rst_endo_sparcc.pval, model = "lasso",
      iters = ITERS, defineTreatment = "RST-04-106",nodesize = 5,
      PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "RST-04-106")
## Total number of iterations used: 5

Maxifort and Endosphere

maxi_endo_sparcc.cor <- read.delim("data/sparcc_output/Endosphere_Maxifort_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
maxi_endo_sparcc.pval <- read.delim("data/sparcc_output/Endosphere_Maxifort_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_maxi_endo <-PhONA(physeqobj = maxi_endo, cordata = maxi_endo_sparcc.cor,
      pdata = maxi_endo_sparcc.pval, model = "lasso",
      iters = ITERS, defineTreatment = "Maxifort",nodesize = 5,
      PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "Maxifort")
## Total number of iterations used: 5

Nongraft and Rhizosphere

ng_rhizo_sparcc.cor <- read.delim("data/sparcc_output/Rhizosphere_Nongraft_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
ng_rhizo_sparcc.pval <- read.delim("data/sparcc_output/Rhizosphere_Nongraft_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_ng_rhizo <- PhONA(physeqobj = ng_rhizo, cordata = ng_rhizo_sparcc.cor,
      pdata = ng_rhizo_sparcc.pval, model = "lasso",
      iters = ITERS, defineTreatment = "Nongraft",nodesize = 5,
      PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "Nongraft")
## Total number of iterations used: 5

Selfgraft and Rhizosphere

sg_rhizo_sparcc.cor <- read.delim("data/sparcc_output/Rhizosphere_Selfgraft_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
sg_rhizo_sparcc.pval <- read.delim("data/sparcc_output/Rhizosphere_Selfgraft_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_sg_rhizo <- PhONA(physeqobj = sg_rhizo, cordata = sg_rhizo_sparcc.cor,
      pdata = sg_rhizo_sparcc.pval, model = "lasso",
      iters = ITERS, defineTreatment = "Selfgraft",nodesize = 5,
      PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "Selfgraft")
## Total number of iterations used: 5

RST-04-106 and Rhizosphere

rst_rhizo_sparcc.cor <- read.delim("data/sparcc_output/Rhizosphere_RST-04-106_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
rst_rhizo_sparcc.pval <- read.delim("data/sparcc_output/Rhizosphere_RST-04-106_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_rst_rhizo <- PhONA(physeqobj = rst_rhizo, cordata = rst_rhizo_sparcc.cor,
      pdata = rst_rhizo_sparcc.pval, model = "lasso",
      iters = ITERS, defineTreatment = "RST-04-106",nodesize = 5,
      PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "RST-04-106")
## Total number of iterations used: 5

Maxifort and Rhizosphere

maxi_rhizo_sparcc.cor <- read.delim("data/sparcc_output/Rhizosphere_Maxifort_cor_sparcc.out", sep = "\t", header = T, row.names = 1)
maxi_rhizo_sparcc.pval <- read.delim("data/sparcc_output/Rhizosphere_Maxifort_pvals.two_sided.txt", sep = "\t", header = T, row.names = 1)

phona_maxi_rhizo <- PhONA(physeqobj = maxi_rhizo, cordata = maxi_rhizo_sparcc.cor,
      pdata = maxi_rhizo_sparcc.pval, model = "lasso",
      defineTreatment = "Maxifort",nodesize = 5,
      iters = ITERS, PhenoNodesize = 12, definePhenotype = "Marketable", PhenoNodelabel = "Maxifort")
## Total number of iterations used: 5

Role analyses using SA algorithm as implemented in rnetcarto package.

Endosphere

# Endosphere 

role_df_Endosphere <- rbind(phona_ng_endo$roles,
                            phona_sg_endo$roles,
                            phona_rst_endo$roles,
                            phona_maxi_endo$roles)
rolePlot(role_df_Endosphere)

If you are not happy with the plot, you can pass any ggplot paramters to rolePlot function.

rolePlot(role_df_Endosphere) + 
  scale_color_manual(breaks= c("Nongraft", "Selfgraft", "RST-04-106", "Maxifort"),values = c("mediumaquamarine","sienna4","#ffae19","blueviolet"))

Rhizosphere

# Endosphere 

role_df_Rhizosphere <- rbind(phona_ng_rhizo$roles,
                             phona_sg_rhizo$roles,
                             phona_rst_rhizo$roles,
                             phona_maxi_rhizo$roles)
rolePlot(role_df_Rhizosphere) + 
  scale_color_manual(breaks= c("Nongraft", "Selfgraft", "RST-04-106", "Maxifort"),values = c("mediumaquamarine","sienna4","#ffae19","blueviolet"))

Summary of all the graphs:

Endosphere

summary_graph_endo_df <- rbind(phona_ng_endo$graph_summary,
                          phona_sg_endo$graph_summary,
                          phona_rst_endo$graph_summary,
                          phona_maxi_endo$graph_summary)
kable(summary_graph_endo_df)
node edge nodeDegree avgpath trans mod connectance wtc nModules.SA top3hub top3hubv n_postiveL n_negativeL Treatment
16 17 1.062 1.978 0.513 0.547 0.066 5 3 12;15;13 1;0.885;0.822 14 1 Nongraft
15 12 0.800 1.909 0.600 0.694 0.053 5 5 5;11;12 1;1;1 10 0 Selfgraft
15 12 0.800 1.294 0.815 0.653 0.053 6 4 12;8;10 1;0.64;0.64 11 0 RST-04-106
28 37 1.321 1.615 0.749 0.700 0.047 8 6 7;15;8 1;1;0.93 26 10 Maxifort

Rhizosphere

summary_graph_rhizo_df <- rbind(phona_ng_rhizo$graph_summary,
                          phona_sg_rhizo$graph_summary,
                          phona_rst_rhizo$graph_summary,
                          phona_maxi_rhizo$graph_summary)
kable(summary_graph_rhizo_df)
node edge nodeDegree avgpath trans mod connectance wtc nModules.SA top3hub top3hubv n_postiveL n_negativeL Treatment
43 69 1.605 3.206 0.367 0.633 0.037 7 7 12;18;22 1;0.719;0.711 48 15 Nongraft
29 39 1.345 5.865 0.374 0.627 0.046 5 5 10;6;5 1;0.819;0.815 30 6 Selfgraft
35 45 1.286 2.140 0.642 0.679 0.037 10 7 20;26;15 1;1;0.866 35 9 RST-04-106
61 180 2.951 3.102 0.591 0.349 0.048 9 7 39;23;24 1;0.932;0.897 107 69 Maxifort