Skip to content

Commit 0ae338a

Browse files
authored
Merge pull request #33 from Juke34/salmon
Salmon
2 parents 5883eba + 3f7c09f commit 0ae338a

11 files changed

+256
-68
lines changed

README.md

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,22 @@ You can choose to run one or several aligner in parallel.
4848
| bbmap ||| ⚠️ | ⚠️ |
4949
| bowtie ||| ⚠️ | ⚠️ |
5050
| bowtie2 ||| ⚠️ | ⚠️ |
51-
| bwaaln || ✅ R1 and R2 independently aligned then merged with bwa sampe | | |
52-
| bwamem ||| ⚠️ | ⚠️ |
53-
| bwamem2 ||| ⚠️ | ⚠️ |
51+
| bwaaln || ✅ R1 and R2 independently aligned then merged with bwa sampe | ⚠️ | ⚠️ |
52+
| bwamem ||| | |
53+
| bwamem2 ||| | |
5454
| bwasw ||| ⚠️ | ⚠️ |
5555
| graphmap2 | ⚠️ | ⚠️ R1 and R2 independently aligned then merged with cat |||
5656
| hisat2 ||| ⚠️ | ⚠️ |
5757
| kallisto ||| ⚠️ | ⚠️ |
5858
| minimap2 | ⚠️ | ⚠️ |||
59-
| ngmlr | ⚠️ | 🚫 |||
59+
| ngmlr | ⚠️ | ⚠️ R1 and R2 independently aligned then merged with cat |||
6060
| novoalign |||| ⚠️ |
6161
| nucmer || ✅ R1 and R2 are concatenated then aligned | ⚠️ | ⚠️ |
62+
| salmon ||| ⚠️ | ⚠️ |
6263
| star ||| ✅ use STARlong | ✅ use STARlong |
6364
| star 2pass mode ||| ⚠️ | ⚠️ |
6465
| subread ||| ⚠️ | ⚠️ |
65-
| sublong | ⚠️ | 🚫 |||
66+
| sublong | ⚠️ | ⚠️ R1 and R2 independently aligned then merged with cat |||
6667

6768
*Legend*
6869
✅ Recommended
@@ -94,6 +95,7 @@ It is then translated to the correct option in the following aligners:
9495
| ngmlr | 🚫 | 🚫 | 🚫 |
9596
| novoalign | 🚫 | 🚫 | 🚫 |
9697
| nucmer | 🚫 | 🚫 | 🚫 |
98+
| salmon | U SR SF IU MU OU ISF ISR MSF MSR OSR OSF | identical | strand information and read orientation |
9799
| star | 🚫 | 🚫 | 🚫 |
98100
| star 2pass mode | 🚫 | 🚫 | 🚫 |
99101
| subread | -S fr / -S rf / -S ff | ISF ISR IU / OSF OSR OU / MSF MSR MU | read orientation |
@@ -128,6 +130,7 @@ If you provide an annotation file the pipeline will pass automatically the file
128130
| ngmlr | 🚫 |
129131
| novoalign | 🚫 |
130132
| nucmer | 🚫 |
133+
| salmon | 🚫 |
131134
| star | GTF / GFF ( --sjdbGTFfile + --sjdbGTFtagExonParentTranscript Parent in case of GFF ) |
132135
| star 2pass mode | GTF / GFF (--sjdbGTFfile + --sjdbGTFtagExonParentTranscript Parent in case of GFF ) |
133136
| subread | GTF or compatible GFF format (-a) |
@@ -343,18 +346,22 @@ On success you should get a message looking like this:
343346
--bowtie2_options additional options for bowtie2
344347
--bwaaln_options additional options for bwaaln
345348
--bwamem_options additional options for bwamem
346-
--bwamem2_options additional options for bwamem2
349+
--bwamem2_options additional options for bwamem2
347350
--bwasw_options additional options for bwasw
348351
--graphmap2_options additional options for graphmap2
349352
--hisat2_options additional options for hisat2
350353
--kallisto_options additional options for kallisto
354+
--kallisto_index_options additional options for kallisto index
351355
--minimap2_options additional options for minimap2 (default: -a (to get sam output))
352356
--minimap2_index_options additional options for minimap2 index
353357
--ngmlr_options additional options for ngmlr
354358
--novoalign_options additional options for novoalign
355359
--novoalign_license license for novoalign. You can ask for one month free trial license at http://www.novocraft.com/products/novoalign/
356360
--nucmer_options additional options for nucmer
361+
--salmon_options additional options for salmon
362+
--salmon_index_options additional options for salmon index
357363
--star_options additional options for star
364+
--star_index_options additional options for star index
358365
--star_2pass set to true to run STAR in 2pass mode (default: false)
359366
--read_length [Optional][used by STAR] length of the reads, if none provided it is automatically deduced
360367
--subread_options additional options for subread

aline.nf

Lines changed: 109 additions & 41 deletions
Large diffs are not rendered by default.

modules/graphmap2.nf

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,16 @@ process graphmap2 {
4444
path "*graphmap2.log", emit: graphmap2_summary
4545

4646
script:
47+
// catch filename
4748
fileName = reads[0].baseName.replace('.fastq','')
48-
read_file=reads[0]
49+
4950
// Check if the owler option is set
5051
if ( params.graphmap2_options.contains("owler") ){
5152

5253
if (params.read_type == "short_paired"){
5354
// For paired-end we concat output
5455
"""
55-
graphmap2 ${params.graphmap2_options} -t ${task.cpus} -r ${read_file} -d ${read_file} -o ${fileName}_graphmap2.mhap 2> ${fileName}_graphmap2.log
56+
graphmap2 ${params.graphmap2_options} -t ${task.cpus} -r ${reads[0]} -d ${reads[0]} -o ${fileName}_graphmap2.mhap 2> ${fileName}_graphmap2.log
5657
graphmap2 ${params.graphmap2_options} -t ${task.cpus} -r ${reads[1]} -d ${reads[1]} -o ${reads[1].baseName}_graphmap2.mhap 2> ${reads[1].baseName}_graphmap2.log
5758
cat ${fileName}_graphmap2.mhap > ${fileName}_graphmap2_concatR1R2.mhap
5859
rm ${fileName}_graphmap2.mhap
@@ -61,7 +62,7 @@ process graphmap2 {
6162
"""
6263
} else {
6364
"""
64-
graphmap2 ${params.graphmap2_options} -t ${task.cpus} -r ${read_file} -d ${read_file} -o ${fileName}_graphmap2.mhap 2> ${fileName}_graphmap2.log
65+
graphmap2 ${params.graphmap2_options} -t ${task.cpus} -r ${reads[0]} -d ${reads[0]} -o ${fileName}_graphmap2.mhap 2> ${fileName}_graphmap2.log
6566
"""
6667
}
6768
}
@@ -76,7 +77,7 @@ process graphmap2 {
7677
if (params.read_type == "short_paired"){
7778

7879
"""
79-
graphmap2 ${graphmap2_options} -i ${graphmap2_index_files} -t ${task.cpus} -r ${genome} -d ${read_file} -o ${fileName}_graphmap2.sam 2> ${fileName}_graphmap2.log
80+
graphmap2 ${graphmap2_options} -i ${graphmap2_index_files} -t ${task.cpus} -r ${genome} -d ${reads[0]} -o ${fileName}_graphmap2.sam 2> ${fileName}_graphmap2.log
8081
graphmap2 ${graphmap2_options} -i ${graphmap2_index_files} -t ${task.cpus} -r ${genome} -d ${reads[1]} -o ${reads[1].baseName}_graphmap2.sam 2> ${reads[1].baseName}_graphmap2.log
8182
8283
# Merge sam
@@ -88,7 +89,7 @@ process graphmap2 {
8889
} else {
8990
"""
9091
91-
graphmap2 ${graphmap2_options} -i ${graphmap2_index_files} -t ${task.cpus} -r ${genome} -d ${read_file} -o ${fileName}_graphmap2.sam 2> ${fileName}_graphmap2.log
92+
graphmap2 ${graphmap2_options} -i ${graphmap2_index_files} -t ${task.cpus} -r ${genome} -d ${reads[0]} -o ${fileName}_graphmap2.sam 2> ${fileName}_graphmap2.log
9293
"""
9394
}
9495
}

modules/kallisto.nf

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ process kallisto {
5555
}
5656
}
5757

58+
// For paired-end reads, Kallisto automatically estimates the fragment length distribution from the data and does not require you to specify it manually
5859
if (params.read_type == "short_paired"){
5960
"""
6061
kallisto quant ${read_orientation} ${params.kallisto_options} \
@@ -70,7 +71,7 @@ process kallisto {
7071
} else {
7172

7273
// Use read length (-l) and sd (-s) from params?
73-
def l_s_params = params.kallisto_options
74+
def l_s_params = ""
7475
def read_length_copy = read_length // to avoid error "Variable read_length already defined in the process scope "
7576
if ( !params.kallisto_options.contains("-l ") ){
7677
l_s_params += " -l ${read_length}"
@@ -82,7 +83,8 @@ process kallisto {
8283
}
8384

8485
"""
85-
kallisto quant ${read_orientation} ${l_s_params} \
86+
kallisto quant ${read_orientation} ${params.kallisto_options} \
87+
${l_s_params} \
8688
-t ${task.cpus} \
8789
--pseudobam \
8890
-i ${kallisto_index} \

modules/ngmlr.nf

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,25 @@ process ngmlr {
1919
path "*.log", emit: ngmlr_summary
2020

2121
script:
22-
22+
// catch filename
2323
fileName = reads[0].baseName.replace('.fastq','')
2424

25-
"""
26-
ngmlr ${params.ngmlr_options} -t ${task.cpus} -r ${genome} -q ${reads} -o ${fileName}_ngmlr.sam 2> ${fileName}_ngmlr.log
27-
"""
25+
// For paired-end we concat output
26+
if (params.read_type == "short_paired"){
27+
"""
28+
ngmlr ${params.ngmlr_options} -t ${task.cpus} -r ${genome} -q ${reads[0]} -o ${fileName}_ngmlr.sam 2> ${fileName}_ngmlr.log
29+
ngmlr ${params.ngmlr_options} -t ${task.cpus} -r ${genome} -q ${reads[1]} -o ${reads[1].baseName}_ngmlr.sam 2> ${fileName}_ngmlr.log
30+
31+
# Merge sam
32+
cat ${fileName}_ngmlr.sam > ${fileName}_ngmlr_concatR1R2.sam
33+
rm ${fileName}_ngmlr.sam
34+
awk '!/^@HD/ && !/^@SQ/ && !/^@RG/ && !/^@PG/ && !/^@CO/ && NF' ${reads[1].baseName}_ngmlr.sam >> ${fileName}_ngmlr_concatR1R2.sam
35+
rm ${reads[1].baseName}_ngmlr.sam
36+
"""
37+
} else {
38+
"""
39+
ngmlr ${params.ngmlr_options} -t ${task.cpus} -r ${genome} -q ${reads[0]} -o ${fileName}_ngmlr.sam 2> ${fileName}_ngmlr.log
40+
"""
41+
}
2842

2943
}

modules/salmon.nf

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ process salmon_index {
1414

1515
script:
1616
"""
17-
salmon index -t ${genome_fasta} -i salmon_index --threads ${task.cpus}
17+
salmon index ${params.salmon_index_options} -t ${genome_fasta} -i salmon_index --threads ${task.cpus}
1818
"""
1919
}
2020

@@ -65,4 +65,72 @@ process set_tuple_withUserLib{
6565

6666
"""
6767
"""
68+
}
69+
70+
// Use salmon as aligner - output sorted sam
71+
process salmon {
72+
label 'salmon'
73+
publishDir "${params.outdir}/${outpath}", pattern: "*/*.json", mode: 'copy'
74+
75+
input:
76+
tuple val(sample), path(fastq), val(library), val(read_length)
77+
path salmon_index
78+
val outpath
79+
80+
output:
81+
tuple val(sample), path ("*.sam"), emit: tuple_sample_sam
82+
path "*.log", emit: salmon_summary
83+
84+
script:
85+
86+
// set input according to read_type parameter
87+
def input = "-r ${fastq[0]}"
88+
if (params.read_type == "short_paired"){
89+
input = "-1 ${fastq[0]} -2 ${fastq[1]}" // if short reads check paired or not
90+
}
91+
92+
// deal with library type
93+
def read_orientation=""
94+
if (! params.salmon_options.contains("-l ") && ! params.salmon_options.contains("--libType ") &&
95+
! params.skip_libray_usage){
96+
read_orientation = "-l ${library}"
97+
}
98+
99+
// catch filename
100+
def filename = "${fastq[0].baseName.replace('.fastq','')}"
101+
102+
// Salmon automatically estimates the fragment length distribution for paired-end reads (like Kallisto)
103+
if (params.read_type == "short_paired"){
104+
"""
105+
salmon quant -i ${salmon_index} ${params.salmon_options} \
106+
${read_orientation} \
107+
${input} \
108+
--thread ${task.cpus} \
109+
--writeMappings \
110+
--output ${filename} > ${filename}.sam 2> ${filename}.log
111+
"""
112+
} else {
113+
114+
// Use read length (--fldMean) and sd (--fldSD) from params?
115+
def l_s_params = ""
116+
def read_length_copy = read_length // to avoid error "Variable read_length already defined in the process scope "
117+
if ( !params.salmon_options.contains("--fldMean ") ){
118+
l_s_params += " --fldMean ${read_length}"
119+
}
120+
if ( !params.salmon_options.contains("--fldSD ") ){
121+
// 10% of read length will be used as Estimated standard deviation of fragment length
122+
def tenPercent = (read_length_copy.toInteger() * 10 / 100) as int
123+
l_s_params += " --fldSD ${tenPercent}"
124+
}
125+
126+
"""
127+
salmon quant -i ${salmon_index} ${params.salmon_options} \
128+
${l_s_params} \
129+
${read_orientation} \
130+
${input} \
131+
--thread ${task.cpus} \
132+
--writeMappings \
133+
--output ${filename} > ${filename}.sam 2> ${filename}.log
134+
"""
135+
}
68136
}

modules/samtools.nf

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ process samtools_sam2bam_nucmer {
2424

2525
}
2626

27-
2827
process samtools_sam2bam {
2928
label 'samtools'
3029
tag "$sample"
@@ -41,6 +40,23 @@ process samtools_sam2bam {
4140
samtools view -@ ${task.cpus} ${sam} -b -o ${sam.baseName}.bam
4241
"""
4342

43+
}
44+
process samtools_merge_bam {
45+
label 'samtools'
46+
tag "$sample"
47+
48+
input:
49+
tuple val(sample), path(bam)
50+
51+
output:
52+
tuple val(sample), path ("*.bam"), emit: tuple_sample_bam
53+
54+
script:
55+
56+
"""
57+
samtools merge -@ ${task.cpus} ${bam[0].baseName}_concatR1R2.bam *.bam
58+
"""
59+
4460
}
4561
/*
4662
http://www.htslib.org/doc/samtools-sort.html

modules/subread.nf

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ process sublong {
108108
publishDir "${params.outdir}/${outpath}", pattern: "*.log", mode: 'copy'
109109

110110
input:
111-
tuple val(sample), path(fastq), val(library), val(read_length)
111+
tuple val(sample), path(reads), val(library), val(read_length)
112112
path genome
113113
path index
114114
val outpath
@@ -120,12 +120,22 @@ process sublong {
120120
script:
121121

122122
// remove fastq.gz
123-
def fileName = fastq[0].baseName.replace('.fastq','') + "_sublong"
123+
def fileName = reads[0].baseName.replace('.fastq','') + "_sublong"
124124

125125
// prepare index name
126126
def index_prefix = genome.baseName + "_index"
127127

128-
"""
129-
sublong -T ${task.cpus} -i ${index_prefix} -r ${fastq} -o ${fileName}.bam ${params.sublong_options} > ${fileName}_sublong.log
130-
"""
128+
129+
130+
// For paired-end we concat output
131+
if (params.read_type == "short_paired"){
132+
"""
133+
sublong -T ${task.cpus} -i ${index_prefix} -r ${reads[0]} -o ${fileName}.bam ${params.sublong_options} > ${fileName}_sublong.log
134+
sublong -T ${task.cpus} -i ${index_prefix} -r ${reads[1]} -o ${reads[1].baseName}.bam ${params.sublong_options} > ${fileName}_sublong.log
135+
"""
136+
} else {
137+
"""
138+
sublong -T ${task.cpus} -i ${index_prefix} -r ${reads[0]} -o ${fileName}.bam ${params.sublong_options} > ${fileName}_sublong.log
139+
"""
140+
}
131141
}

profiles/test_illumina_paired.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
params {
88
reads = "$baseDir/test/illumina/"
99
genome = "$baseDir/test/yeast.fa"
10-
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,minimap2,nucmer,star,subread'
10+
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,minimap2,ngmlr,nucmer,salmon,star,subread,sublong'
1111
star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857
1212
multiqc_config = "$baseDir/config/multiqc_conf.yml"
1313
}

profiles/test_illumina_single.config

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ params {
88
reads = "$baseDir/test/illumina/"
99
genome = "$baseDir/test/yeast.fa"
1010
params.read_type = "short_single"
11-
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
11+
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,salmon,star,subread,sublong'
1212
trimming_fastp = true
1313
fastqc = true
1414
samtools_stats = true
15+
salmon_options ="--minAssignedFrags 1"
1516
star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857
1617
multiqc_config = "$baseDir/config/multiqc_conf.yml"
1718
}

profiles/test_pacbio.config

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ params {
88
reads = "$baseDir/test/pacbio/"
99
genome = "$baseDir/test/yeast.fa"
1010
read_type = "pacbio"
11-
aligner = 'bbmap,bowtie,bowtie2,,bwamem,bwamem2,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
11+
aligner = 'bbmap,bowtie,bowtie2,,bwamem,bwamem2,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,salmon,star,subread,sublong'
12+
salmon_options ="--minAssignedFrags 1"
1213
star_options = '--outFilterMismatchNmax 100 --seedSearchLmax 30 --seedSearchStartLmax 30 --seedPerReadNmax 100000 --seedPerWindowNmax 100 --alignTranscriptsPerReadNmax 100000 --alignTranscriptsPerWindowNmax 10000'
1314
star_index_options = '--genomeSAindexNbases 9'
1415
multiqc_config = "$baseDir/config/multiqc_conf.yml"

0 commit comments

Comments
 (0)