This function allows to read and reformat the output taxonomy file from mothur to a data frame
read.mothur.taxonomy <- function(tax.file) {
tbl <- read.delim(tax.file, header = FALSE, row.names = 1) %>%
.[2:nrow(.),]
split <- strsplit(as.character(tbl$V3), ";", fixed = TRUE)
kingdom <- sapply(split, "[", 1) %>% sub("\\([0-9.]+\\)", "", .)
phylum <- sapply(split, "[", 2) %>% sub("\\([0-9.]+\\)", "", .)
class <- sapply(split, "[", 3) %>% sub("\\([0-9.]+\\)", "", .)
order <- sapply(split, "[", 4) %>% sub("\\([0-9.]+\\)", "", .)
family <- sapply(split, "[", 5) %>% sub("\\([0-9.]+\\)", "", .)
genus <- sapply(split, "[", 6) %>% sub("\\([0-9.]+\\)", "", .)
species<- sapply(split, "[", 7) %>% sub("\\([0-9.]+\\)", "", .)
tax_df <- data.frame(Count= tbl$V2, Kingdom=kingdom, Phylum = phylum, Class = class, Order = order,Family = family, Genus = genus, Species = species) %>%
`rownames<-`(rownames(tbl))
return(tax_df)
}
Example
library(devtools)
install_github("ravinpoudel/myFunctions")
library(myFunctions)
library(igraph)
library(magrittr)
library(tidyverse)
library(data.table)
# upload taxanomy file without using read.mothur.taxonomy function
tax.reg <- read.delim(Sys.glob("*.taxonomy"), sep="\t", header=F)
head(tax.reg)
## V1 V2
## 1 OTU Size
## 2 Otu00001 130638
## 3 Otu00002 119471
## 4 Otu00003 108083
## 5 Otu00004 82435
## 6 Otu00005 50921
## V3
## 1 Taxonomy
## 2 k__Fungi(100);p__Ascomycota(100);c__Pezizomycetes(100);o__Pezizales(100);f__Pyronemataceae(100);f__Pyronemataceae_unclassified(100);
## 3 k__Fungi(100);p__Ascomycota(100);c__Pezizomycetes(100);o__Pezizales(100);f__Pyronemataceae(100);f__Pyronemataceae_unclassified(100);
## 4 k__Fungi(100);p__Basidiomycota(100);c__Agaricomycetes(100);o__Cantharellales(100);f__Ceratobasidiaceae(100);g__Thanatephorus(100);s__Thanatephorus_cucumeris(100);
## 5 k__Fungi(100);p__Ascomycota(100);c__Dothideomycetes(100);o__Pleosporales(100);f__Pleosporaceae(100);g__Alternaria(100);s__Alternaria_porri(100);
## 6 k__Fungi(100);p__Ascomycota(100);c__Pezizomycetes(100);o__Pezizales(100);f__Pyronemataceae(100);g__unclassified_Pyronemataceae(100);s__Pyronemataceae_sp(100);
# upload taxanomy file using read.mothur.taxonomy function
tax.fn <- read.mothur.taxonomy(Sys.glob("*.taxonomy"))
head(tax.fn)
## Count Kingdom Phylum Class
## Otu00001 130638 k__Fungi p__Ascomycota c__Pezizomycetes
## Otu00002 119471 k__Fungi p__Ascomycota c__Pezizomycetes
## Otu00003 108083 k__Fungi p__Basidiomycota c__Agaricomycetes
## Otu00004 82435 k__Fungi p__Ascomycota c__Dothideomycetes
## Otu00005 50921 k__Fungi p__Ascomycota c__Pezizomycetes
## Otu00006 49323 k__Fungi p__Ascomycota c__Dothideomycetes
## Order Family
## Otu00001 o__Pezizales f__Pyronemataceae
## Otu00002 o__Pezizales f__Pyronemataceae
## Otu00003 o__Cantharellales f__Ceratobasidiaceae
## Otu00004 o__Pleosporales f__Pleosporaceae
## Otu00005 o__Pezizales f__Pyronemataceae
## Otu00006 o__Pleosporales f__Pleosporales_family_Incertae_sedis
## Genus Species
## Otu00001 f__Pyronemataceae_unclassified <NA>
## Otu00002 f__Pyronemataceae_unclassified <NA>
## Otu00003 g__Thanatephorus s__Thanatephorus_cucumeris
## Otu00004 g__Alternaria s__Alternaria_porri
## Otu00005 g__unclassified_Pyronemataceae s__Pyronemataceae_sp
## Otu00006 g__Phoma s__Phoma_sp_UASWS0872