|
|
|
@ -153,12 +153,19 @@ option_list <- list(
|
|
|
|
|
metavar = "character" |
|
|
|
|
), |
|
|
|
|
make_option( |
|
|
|
|
c("-p", "--prefix"), |
|
|
|
|
c("-x", "--prefix"), |
|
|
|
|
type = "character", |
|
|
|
|
default = NULL, |
|
|
|
|
help = "prefix to use for sequence header", |
|
|
|
|
metavar = "character" |
|
|
|
|
), |
|
|
|
|
make_option( |
|
|
|
|
c("-o", "--orientation"), |
|
|
|
|
type = "character", |
|
|
|
|
default = "fr", |
|
|
|
|
help = "specify if libraries were generated in mixed orientation", |
|
|
|
|
metavar = "character" |
|
|
|
|
), |
|
|
|
|
make_option( |
|
|
|
|
c("-c", "--cpus"), |
|
|
|
|
type = "integer", |
|
|
|
@ -310,22 +317,28 @@ sample.names <- read.table(opt$samples, h = F, sep = "\t", stringsAsFactors = F)
|
|
|
|
|
path <- paste0(opt$dir, "/Validated_data/", opt$analysis) |
|
|
|
|
fnFs.fr <- paste0(path, "/", sample.names, "_clip_fr_R1.fastq.gz") |
|
|
|
|
fnRs.fr <- paste0(path, "/", sample.names, "_clip_fr_R2.fastq.gz") |
|
|
|
|
fnFs.rf <- paste0(path, "/", sample.names, "_clip_rf_R1.fastq.gz") |
|
|
|
|
fnRs.rf <- paste0(path, "/", sample.names, "_clip_rf_R2.fastq.gz") |
|
|
|
|
names(fnFs.fr) <- sample.names |
|
|
|
|
names(fnRs.fr) <- sample.names |
|
|
|
|
names(fnFs.rf) <- sample.names |
|
|
|
|
names(fnRs.rf) <- sample.names |
|
|
|
|
|
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
fnFs.rf <- paste0(path, "/", sample.names, "_clip_rf_R1.fastq.gz") |
|
|
|
|
fnRs.rf <- paste0(path, "/", sample.names, "_clip_rf_R2.fastq.gz") |
|
|
|
|
names(fnFs.rf) <- sample.names |
|
|
|
|
names(fnRs.rf) <- sample.names |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# Place filtered files in Filtered/ subdirectory |
|
|
|
|
filtFs.fr <- file.path(opt$dir, "Intermediate_results", opt$analysis, "Filtered", paste0(sample.names, "_filt_fr_R1.fastq")) |
|
|
|
|
filtRs.fr <- file.path(opt$dir, "Intermediate_results", opt$analysis, "Filtered", paste0(sample.names, "_filt_fr_R2.fastq")) |
|
|
|
|
filtFs.rf <- file.path(opt$dir, "Intermediate_results", opt$analysis, "Filtered", paste0(sample.names, "_filt_rf_R1.fastq")) |
|
|
|
|
filtRs.rf <- file.path(opt$dir, "Intermediate_results", opt$analysis, "Filtered", paste0(sample.names, "_filt_rf_R2.fastq")) |
|
|
|
|
names(filtFs.fr) <- sample.names |
|
|
|
|
names(filtRs.fr) <- sample.names |
|
|
|
|
names(filtFs.rf) <- sample.names |
|
|
|
|
names(filtRs.rf) <- sample.names |
|
|
|
|
|
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
filtFs.rf <- file.path(opt$dir, "Intermediate_results", opt$analysis, "Filtered", paste0(sample.names, "_filt_rf_R1.fastq")) |
|
|
|
|
filtRs.rf <- file.path(opt$dir, "Intermediate_results", opt$analysis, "Filtered", paste0(sample.names, "_filt_rf_R2.fastq")) |
|
|
|
|
names(filtFs.rf) <- sample.names |
|
|
|
|
names(filtRs.rf) <- sample.names |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Filter and trim reads with optimized parameters #### |
|
|
|
@ -345,28 +358,39 @@ filt.out.fr <- filterAndTrim(
|
|
|
|
|
compress = F, |
|
|
|
|
multithread = opt$cpus |
|
|
|
|
) |
|
|
|
|
filt.out.rf <- filterAndTrim( |
|
|
|
|
fwd = fnFs.rf, |
|
|
|
|
filt = filtFs.rf, |
|
|
|
|
rev = fnRs.rf, |
|
|
|
|
filt.rev = filtRs.rf, |
|
|
|
|
truncLen = c(opt$truncLen_R1, opt$truncLen_R2), |
|
|
|
|
maxN = 0, |
|
|
|
|
minQ = 2, |
|
|
|
|
maxEE = c(opt$error_R1, opt$error_R2), |
|
|
|
|
truncQ = 0, |
|
|
|
|
rm.phix = TRUE, |
|
|
|
|
compress = F, |
|
|
|
|
multithread = opt$cpus |
|
|
|
|
) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
filt.out.rf <- filterAndTrim( |
|
|
|
|
fwd = fnFs.rf, |
|
|
|
|
filt = filtFs.rf, |
|
|
|
|
rev = fnRs.rf, |
|
|
|
|
filt.rev = filtRs.rf, |
|
|
|
|
truncLen = c(opt$truncLen_R1, opt$truncLen_R2), |
|
|
|
|
maxN = 0, |
|
|
|
|
minQ = 2, |
|
|
|
|
maxEE = c(opt$error_R1, opt$error_R2), |
|
|
|
|
truncQ = 0, |
|
|
|
|
rm.phix = TRUE, |
|
|
|
|
compress = F, |
|
|
|
|
multithread = opt$cpus |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# Repeat quality check after trimming |
|
|
|
|
quality_check( |
|
|
|
|
c(filtFs.fr, filtFs.rf), |
|
|
|
|
c(filtRs.fr, filtRs.rf), |
|
|
|
|
file_base = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/QualityProfileFiltered_", opt$name), |
|
|
|
|
cpus = opt$cpus |
|
|
|
|
) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
quality_check( |
|
|
|
|
c(filtFs.fr, filtFs.rf), |
|
|
|
|
c(filtRs.fr, filtRs.rf), |
|
|
|
|
file_base = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/QualityProfileFiltered_", opt$name), |
|
|
|
|
cpus = opt$cpus |
|
|
|
|
) |
|
|
|
|
} else { |
|
|
|
|
quality_check( |
|
|
|
|
filtFs.fr, |
|
|
|
|
filtRs.fr, |
|
|
|
|
file_base = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/QualityProfileFiltered_", opt$name), |
|
|
|
|
cpus = opt$cpus |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Denoising #### |
|
|
|
@ -375,13 +399,17 @@ quality_check(
|
|
|
|
|
if(opt$loess == "mod") { |
|
|
|
|
errF.fr <- learnErrors(filtFs.fr, errorEstimationFunction = loessErrfun2, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errR.fr <- learnErrors(filtRs.fr, errorEstimationFunction = loessErrfun2, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errF.rf <- learnErrors(filtFs.rf, errorEstimationFunction = loessErrfun2, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errR.rf <- learnErrors(filtRs.rf, errorEstimationFunction = loessErrfun2, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
errF.rf <- learnErrors(filtFs.rf, errorEstimationFunction = loessErrfun2, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errR.rf <- learnErrors(filtRs.rf, errorEstimationFunction = loessErrfun2, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
errF.fr <- learnErrors(filtFs.fr, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errR.fr <- learnErrors(filtRs.fr, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errF.rf <- learnErrors(filtFs.rf, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errR.rf <- learnErrors(filtRs.rf, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
errF.rf <- learnErrors(filtFs.rf, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
errR.rf <- learnErrors(filtRs.rf, multithread = opt$cpus, randomize = TRUE, verbose = 1, MAX_CONSIST = 10) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
save.image(paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/workspace_denoising_", opt$name, ".Rdata")) |
|
|
|
|
|
|
|
|
@ -389,19 +417,25 @@ save.image(paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/workspace_d
|
|
|
|
|
pdf(paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/ErrorProfiles_", opt$name, ".pdf")) |
|
|
|
|
barplot(log10(dada2:::checkConvergence(errF.fr) + 1), main = "Convergence_fwd") |
|
|
|
|
barplot(log10(dada2:::checkConvergence(errR.fr) + 1), main = "Convergence_rev") |
|
|
|
|
barplot(log10(dada2:::checkConvergence(errF.rf) + 1), main = "Convergence_fwd") |
|
|
|
|
barplot(log10(dada2:::checkConvergence(errR.rf) + 1), main = "Convergence_rev") |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
barplot(log10(dada2:::checkConvergence(errF.rf) + 1), main = "Convergence_fwd") |
|
|
|
|
barplot(log10(dada2:::checkConvergence(errR.rf) + 1), main = "Convergence_rev") |
|
|
|
|
} |
|
|
|
|
plotErrors(errF.fr, nominalQ = TRUE) |
|
|
|
|
plotErrors(errR.fr, nominalQ = TRUE) |
|
|
|
|
plotErrors(errF.rf, nominalQ = TRUE) |
|
|
|
|
plotErrors(errR.rf, nominalQ = TRUE) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
plotErrors(errF.rf, nominalQ = TRUE) |
|
|
|
|
plotErrors(errR.rf, nominalQ = TRUE) |
|
|
|
|
} |
|
|
|
|
dev.off() |
|
|
|
|
|
|
|
|
|
# Dereplicate and denoise samples |
|
|
|
|
dadaFs.fr <- dada(filtFs.fr, err = errF.fr, multithread = opt$cpus, pool = TRUE) |
|
|
|
|
dadaRs.fr <- dada(filtRs.fr, err = errR.fr, multithread = opt$cpus, pool = TRUE) |
|
|
|
|
dadaFs.rf <- dada(filtFs.rf, err = errF.rf, multithread = opt$cpus, pool = TRUE) |
|
|
|
|
dadaRs.rf <- dada(filtRs.rf, err = errR.rf, multithread = opt$cpus, pool = TRUE) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
dadaFs.rf <- dada(filtFs.rf, err = errF.rf, multithread = opt$cpus, pool = TRUE) |
|
|
|
|
dadaRs.rf <- dada(filtRs.rf, err = errR.rf, multithread = opt$cpus, pool = TRUE) |
|
|
|
|
} |
|
|
|
|
save.image(paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/workspace_denoising_", opt$name, ".Rdata")) |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -417,15 +451,17 @@ mergers.fr0 <- mergePairs(
|
|
|
|
|
verbose = TRUE, |
|
|
|
|
returnRejects = TRUE |
|
|
|
|
) |
|
|
|
|
mergers.rf0 <- mergePairs( |
|
|
|
|
dadaFs.rf, |
|
|
|
|
filtFs.rf, |
|
|
|
|
dadaRs.rf, |
|
|
|
|
filtRs.rf, |
|
|
|
|
minOverlap = 10, |
|
|
|
|
verbose = TRUE, |
|
|
|
|
returnRejects = TRUE |
|
|
|
|
) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
mergers.rf0 <- mergePairs( |
|
|
|
|
dadaFs.rf, |
|
|
|
|
filtFs.rf, |
|
|
|
|
dadaRs.rf, |
|
|
|
|
filtRs.rf, |
|
|
|
|
minOverlap = 10, |
|
|
|
|
verbose = TRUE, |
|
|
|
|
returnRejects = TRUE |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# rescue unmerged |
|
|
|
|
if(!is.null(opt$rescue)) { |
|
|
|
@ -435,10 +471,12 @@ if(!is.null(opt$rescue)) {
|
|
|
|
|
writeFasta(unmerged.fr[[1]], file = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Fs.fr.fasta")) |
|
|
|
|
writeFasta(unmerged.fr[[2]], file = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Rs.fr.fasta")) |
|
|
|
|
|
|
|
|
|
unmerged.rf <- extract_unmerged(dadaFs.rf, dadaRs.rf, mergers.rf0) |
|
|
|
|
writeFasta(unmerged.rf[[1]], file = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Fs.rf.fasta")) |
|
|
|
|
writeFasta(unmerged.rf[[2]], file = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Rs.rf.fasta")) |
|
|
|
|
|
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
unmerged.rf <- extract_unmerged(dadaFs.rf, dadaRs.rf, mergers.rf0) |
|
|
|
|
writeFasta(unmerged.rf[[1]], file = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Fs.rf.fasta")) |
|
|
|
|
writeFasta(unmerged.rf[[2]], file = paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Rs.rf.fasta")) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# mapping |
|
|
|
|
system( |
|
|
|
|
paste0( |
|
|
|
@ -454,21 +492,23 @@ if(!is.null(opt$rescue)) {
|
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_unmerged.fr.bam" |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
system( |
|
|
|
|
paste0( |
|
|
|
|
"bbmap.sh threads=", |
|
|
|
|
opt$cpus, |
|
|
|
|
" ref=", |
|
|
|
|
opt$rescue, |
|
|
|
|
" in=", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Fs.rf.fasta", |
|
|
|
|
" in2=", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Rs.rf.fasta", |
|
|
|
|
" out=", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_unmerged.rf.bam" |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
system( |
|
|
|
|
paste0( |
|
|
|
|
"bbmap.sh threads=", |
|
|
|
|
opt$cpus, |
|
|
|
|
" ref=", |
|
|
|
|
opt$rescue, |
|
|
|
|
" in=", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Fs.rf.fasta", |
|
|
|
|
" in2=", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_Rs.rf.fasta", |
|
|
|
|
" out=", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_unmerged.rf.bam" |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# extract insert size |
|
|
|
|
system( |
|
|
|
|
paste0( |
|
|
|
@ -478,15 +518,17 @@ if(!is.null(opt$rescue)) {
|
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_is.fr.txt" |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
system( |
|
|
|
|
paste0( |
|
|
|
|
"samtools view -F2304 -f66 -m50 ", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_unmerged.rf.bam", |
|
|
|
|
" | cut -f1,9 > ", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_is.rf.txt" |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
system( |
|
|
|
|
paste0( |
|
|
|
|
"samtools view -F2304 -f66 -m50 ", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_unmerged.rf.bam", |
|
|
|
|
" | cut -f1,9 > ", |
|
|
|
|
opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_is.rf.txt" |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# read insert sizes |
|
|
|
|
is.fr <- read.table( |
|
|
|
|
paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_is.fr.txt"), |
|
|
|
@ -494,19 +536,23 @@ if(!is.null(opt$rescue)) {
|
|
|
|
|
sep = "\t", |
|
|
|
|
col.names = c("seqID", "insert") |
|
|
|
|
) |
|
|
|
|
is.rf <- read.table( |
|
|
|
|
paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_is.rf.txt"), |
|
|
|
|
h = F, |
|
|
|
|
sep = "\t", |
|
|
|
|
col.names = c("seqID", "insert") |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
is.rf <- read.table( |
|
|
|
|
paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/unmerged_", opt$name, "_is.rf.txt"), |
|
|
|
|
h = F, |
|
|
|
|
sep = "\t", |
|
|
|
|
col.names = c("seqID", "insert") |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# filter to insert sizes that exceed maximum length of merged sequences |
|
|
|
|
is_long.fr <- is.fr[is.fr$insert > (opt$truncLen_R1 + opt$truncLen_R2 - 10), ] %>% |
|
|
|
|
separate(seqID, into = c("sample_index", "row_index"), sep = "_", remove = F, convert = T) |
|
|
|
|
is_long.rf <- is.rf[-is.rf$insert > (opt$truncLen_R1 + opt$truncLen_R2 - 10), ] %>% |
|
|
|
|
separate(seqID, into = c("sample_index", "row_index"), sep = "_", remove = F, convert = T) |
|
|
|
|
|
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
is_long.rf <- is.rf[-is.rf$insert > (opt$truncLen_R1 + opt$truncLen_R2 - 10), ] %>% |
|
|
|
|
separate(seqID, into = c("sample_index", "row_index"), sep = "_", remove = F, convert = T) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# retrieve and concatenate sequence |
|
|
|
|
mergers.fr <- mergers.fr0 |
|
|
|
|
for(i in 1:length(mergers.fr)) { |
|
|
|
@ -529,52 +575,60 @@ if(!is.null(opt$rescue)) {
|
|
|
|
|
mergers.fr[[i]] <- mergers.fr[[i]][mergers.fr[[i]]$accept, ] |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
mergers.rf <- mergers.rf0 |
|
|
|
|
for(i in 1:length(mergers.rf)) { |
|
|
|
|
if(i %in% unique(is_long.rf$sample_index)) { |
|
|
|
|
tmp_index <- is_long.rf$row_index[is_long.rf$sample_index == i] |
|
|
|
|
if(length(tmp_index) > 0) { |
|
|
|
|
mergers.rf[[i]]$sequence[tmp_index] <- paste0( |
|
|
|
|
unmerged.rf[[1]][paste0(is_long.rf$seqID[is_long.rf$sample_index == i], "/1")], |
|
|
|
|
"NNNNNNNNNN", |
|
|
|
|
rc(unmerged.rf[[2]][paste0(is_long.rf$seqID[is_long.rf$sample_index == i], "/2")]) |
|
|
|
|
) |
|
|
|
|
mergers.rf[[i]]$nmatch[tmp_index] <- 0 |
|
|
|
|
mergers.rf[[i]]$nmismatch[tmp_index] <- 0 |
|
|
|
|
mergers.rf[[i]]$nindel[tmp_index] <- 0 |
|
|
|
|
mergers.rf[[i]]$prefer[tmp_index] <- NA |
|
|
|
|
mergers.rf[[i]]$accept[tmp_index] <- TRUE |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
mergers.rf <- mergers.rf0 |
|
|
|
|
for(i in 1:length(mergers.rf)) { |
|
|
|
|
if(i %in% unique(is_long.rf$sample_index)) { |
|
|
|
|
tmp_index <- is_long.rf$row_index[is_long.rf$sample_index == i] |
|
|
|
|
if(length(tmp_index) > 0) { |
|
|
|
|
mergers.rf[[i]]$sequence[tmp_index] <- paste0( |
|
|
|
|
unmerged.rf[[1]][paste0(is_long.rf$seqID[is_long.rf$sample_index == i], "/1")], |
|
|
|
|
"NNNNNNNNNN", |
|
|
|
|
rc(unmerged.rf[[2]][paste0(is_long.rf$seqID[is_long.rf$sample_index == i], "/2")]) |
|
|
|
|
) |
|
|
|
|
mergers.rf[[i]]$nmatch[tmp_index] <- 0 |
|
|
|
|
mergers.rf[[i]]$nmismatch[tmp_index] <- 0 |
|
|
|
|
mergers.rf[[i]]$nindel[tmp_index] <- 0 |
|
|
|
|
mergers.rf[[i]]$prefer[tmp_index] <- NA |
|
|
|
|
mergers.rf[[i]]$accept[tmp_index] <- TRUE |
|
|
|
|
mergers.rf[[i]] <- mergers.rf[[i]][mergers.rf[[i]]$accept, ] |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
mergers.rf[[i]] <- mergers.rf[[i]][mergers.rf[[i]]$accept, ] |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
mergers.rf[[i]] <- mergers.rf[[i]][mergers.rf[[i]]$accept, ] |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
mergers.fr <- lapply(mergers.fr0, function(x) x[x$accept, ]) |
|
|
|
|
mergers.rf <- lapply(mergers.rf0, function(x) x[x$accept, ]) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
mergers.rf <- lapply(mergers.rf0, function(x) x[x$accept, ]) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# Create sequence table |
|
|
|
|
seqtab.fr <- makeSequenceTable(mergers.fr) |
|
|
|
|
seqtab.rf <- makeSequenceTable(mergers.rf) |
|
|
|
|
msg(paste0("There are ", ncol(seqtab.fr), " ASVs in fr orientation, and ", ncol(seqtab.rf), " ASVs in rf orientation after merging.\n")) |
|
|
|
|
|
|
|
|
|
# This is the step at which separate denoising runs should be combined |
|
|
|
|
# (e.g. if data comes from different sequencer runs or lanes, |
|
|
|
|
# or if fwd-rev and rev-fwd orientation were processed separately) |
|
|
|
|
|
|
|
|
|
# Generate reverse complement of rf |
|
|
|
|
seqtab.rf.rc <- seqtab.rf |
|
|
|
|
colnames(seqtab.rf.rc) <- rc(colnames(seqtab.rf)) |
|
|
|
|
|
|
|
|
|
# Merge sequence tables |
|
|
|
|
seqtab <- mergeSequenceTables( |
|
|
|
|
seqtab.fr, |
|
|
|
|
seqtab.rf.rc, |
|
|
|
|
repeats = "sum" |
|
|
|
|
) |
|
|
|
|
msg(paste0("The combined fr and rf tables contain ", ncol(seqtab), " ASVs.\n")) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
seqtab.rf <- makeSequenceTable(mergers.rf) |
|
|
|
|
msg(paste0("There are ", ncol(seqtab.fr), " ASVs in fr orientation, and ", ncol(seqtab.rf), " ASVs in rf orientation after merging.\n")) |
|
|
|
|
|
|
|
|
|
# This is the step at which separate denoising runs should be combined |
|
|
|
|
# (e.g. if data comes from different sequencer runs or lanes, |
|
|
|
|
# or if fwd-rev and rev-fwd orientation were processed separately) |
|
|
|
|
|
|
|
|
|
# Generate reverse complement of rf |
|
|
|
|
seqtab.rf.rc <- seqtab.rf |
|
|
|
|
colnames(seqtab.rf.rc) <- rc(colnames(seqtab.rf)) |
|
|
|
|
|
|
|
|
|
# Merge sequence tables |
|
|
|
|
seqtab <- mergeSequenceTables( |
|
|
|
|
seqtab.fr, |
|
|
|
|
seqtab.rf.rc, |
|
|
|
|
repeats = "sum" |
|
|
|
|
) |
|
|
|
|
} else { |
|
|
|
|
seqtab <- seqtab.fr |
|
|
|
|
} |
|
|
|
|
msg(paste0("The merged sequence table contains ", ncol(seqtab), " ASVs.\n")) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Chimera removal and further cleaning steps #### |
|
|
|
@ -617,36 +671,59 @@ nSeq <- read.table(paste0(opt$dir, "/Intermediate_results/", opt$analysis, "/nse
|
|
|
|
|
nSeq <- nSeq[sample.names, ] |
|
|
|
|
|
|
|
|
|
getN <- function(x) sum(getUniques(x)) |
|
|
|
|
track <- cbind( |
|
|
|
|
nSeq$Demux, |
|
|
|
|
filt.out.fr[, 1], |
|
|
|
|
filt.out.rf[, 1], |
|
|
|
|
filt.out.fr[, 2], |
|
|
|
|
filt.out.rf[, 2], |
|
|
|
|
sapply(dadaFs.fr, getN), |
|
|
|
|
sapply(dadaRs.fr, getN), |
|
|
|
|
sapply(dadaFs.rf, getN), |
|
|
|
|
sapply(dadaRs.rf, getN), |
|
|
|
|
sapply(mergers.fr, getN), |
|
|
|
|
sapply(mergers.rf, getN), |
|
|
|
|
rowSums(seqtab.nochim), |
|
|
|
|
rowSums(seqtab.nochim2) |
|
|
|
|
) |
|
|
|
|
colnames(track) <- c( |
|
|
|
|
"Demux", |
|
|
|
|
"Clipped_fr", |
|
|
|
|
"Clipped_rf", |
|
|
|
|
"Filtered_fr", |
|
|
|
|
"Filtered_rf", |
|
|
|
|
"Denoised_fwd_fr", |
|
|
|
|
"Denoised_rev_fr", |
|
|
|
|
"Denoised_fwd_rf", |
|
|
|
|
"Denoised_rev_rf", |
|
|
|
|
"Merged_fr", |
|
|
|
|
"Merged_rf", |
|
|
|
|
"Nochim", |
|
|
|
|
"Tabled" |
|
|
|
|
) |
|
|
|
|
if(opt$orientation == "mixed") { |
|
|
|
|
track <- cbind( |
|
|
|
|
nSeq$Demux, |
|
|
|
|
filt.out.fr[, 1], |
|
|
|
|
filt.out.rf[, 1], |
|
|
|
|
filt.out.fr[, 2], |
|
|
|
|
filt.out.rf[, 2], |
|
|
|
|
sapply(dadaFs.fr, getN), |
|
|
|
|
sapply(dadaRs.fr, getN), |
|
|
|
|
sapply(dadaFs.rf, getN), |
|
|
|
|
sapply(dadaRs.rf, getN), |
|
|
|
|
sapply(mergers.fr, getN), |
|
|
|
|
sapply(mergers.rf, getN), |
|
|
|
|
rowSums(seqtab.nochim), |
|
|
|
|
rowSums(seqtab.nochim2) |
|
|
|
|
) |
|
|
|
|
colnames(track) <- c( |
|
|
|
|
"Demux", |
|
|
|
|
"Clipped_fr", |
|
|
|
|
"Clipped_rf", |
|
|
|
|
"Filtered_fr", |
|
|
|
|
"Filtered_rf", |
|
|
|
|
"Denoised_fwd_fr", |
|
|
|
|
"Denoised_rev_fr", |
|
|
|
|
"Denoised_fwd_rf", |
|
|
|
|
"Denoised_rev_rf", |
|
|
|
|
"Merged_fr", |
|
|
|
|
"Merged_rf", |
|
|
|
|
"Nochim", |
|
|
|
|
"Tabled" |
|
|
|
|
) |
|
|
|
|
} else { |
|
|
|
|
track <- cbind( |
|
|
|
|
nSeq$Demux, |
|
|
|
|
filt.out.fr[, 1], |
|
|
|
|
filt.out.fr[, 2], |
|
|
|
|
sapply(dadaFs.fr, getN), |
|
|
|
|
sapply(dadaRs.fr, getN), |
|
|
|
|
sapply(mergers.fr, getN), |
|
|
|
|
rowSums(seqtab.nochim), |
|
|
|
|
rowSums(seqtab.nochim2) |
|
|
|
|
) |
|
|
|
|
colnames(track) <- c( |
|
|
|
|
"Demux", |
|
|
|
|
"Clipped_fr", |
|
|
|
|
"Filtered_fr", |
|
|
|
|
"Denoised_fwd_fr", |
|
|
|
|
"Denoised_rev_fr", |
|
|
|
|
"Merged_fr", |
|
|
|
|
"Nochim", |
|
|
|
|
"Tabled" |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
rownames(track) <- c(sample.names) |
|
|
|
|
track <- data.frame(track) |
|
|
|
|
|
|
|
|
|