Skip to content

Commit 5883eba

Browse files
authored
Merge pull request #32 from Juke34/bwamem2
add bwamem2
2 parents 2a6365c + 48e2ec3 commit 5883eba

8 files changed

+136
-24
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ You can choose to run one or several aligner in parallel.
5050
| bowtie2 ||| ⚠️ | ⚠️ |
5151
| bwaaln || ✅ R1 and R2 independently aligned then merged with bwa sampe |||
5252
| bwamem ||| ⚠️ | ⚠️ |
53+
| bwamem2 ||| ⚠️ | ⚠️ |
5354
| bwasw ||| ⚠️ | ⚠️ |
5455
| graphmap2 | ⚠️ | ⚠️ R1 and R2 independently aligned then merged with cat |||
5556
| hisat2 ||| ⚠️ | ⚠️ |
@@ -83,6 +84,7 @@ It is then translated to the correct option in the following aligners:
8384
| bowtie2 | --fr / --rf / --ff | ISF ISR IU / OSF OSR OU / MSF MSR MU| read orientation |
8485
| bwaaln | 🚫 | 🚫 | 🚫 |
8586
| bwamem | 🚫 | 🚫 | 🚫 |
87+
| bwamem2 | 🚫 | 🚫 | 🚫 |
8688
| bwasw | 🚫 | 🚫 | 🚫 |
8789
| graphmap2 | 🚫 | 🚫 | 🚫 |
8890
| hisat2 | --rna-strandness [ F / R / FR / RF ] | SF / SR / ISF OSF MSF / ISR OSR MSR | strand information |
@@ -117,6 +119,7 @@ If you provide an annotation file the pipeline will pass automatically the file
117119
| bowtie2 | 🚫 |
118120
| bwaaln | 🚫 |
119121
| bwamem | 🚫 |
122+
| bwamem2 | 🚫 |
120123
| bwasw | 🚫 |
121124
| graphmap2 | GTF (--gtf) |
122125
| hisat2 | 🚫 |
@@ -316,7 +319,7 @@ On success you should get a message looking like this:
316319
--reads path to the reads file or folder
317320
--reads_extension extension of the reads files (default: .fastq.gz)
318321
--genome path to the genome file
319-
--aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong]
322+
--aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwamem2, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong]
320323
--outdir path to the output directory (default: alignment_results)
321324
--annotation [Optional][used by graphmap2, STAR, subread] Absolute path to the annotation file (gtf or gff3)
322325
@@ -340,6 +343,7 @@ On success you should get a message looking like this:
340343
--bowtie2_options additional options for bowtie2
341344
--bwaaln_options additional options for bwaaln
342345
--bwamem_options additional options for bwamem
346+
--bwamem2_options additional options for bwamem2
343347
--bwasw_options additional options for bwasw
344348
--graphmap2_options additional options for graphmap2
345349
--hisat2_options additional options for hisat2

aline.nf

Lines changed: 61 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,14 @@ params.annotation = ""
3838
params.trimming_fastp = false
3939

4040
// Aligner params
41-
align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'star', 'subread', 'sublong' ]
41+
align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwamem2', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'star', 'subread', 'sublong' ]
4242
params.aligner = ''
4343
params.bbmap_options = ''
4444
params.bowtie_options = ''
4545
params.bowtie2_options = ''
4646
params.bwaaln_options = ''
4747
params.bwamem_options = ''
48+
params.bwamem2_options = ''
4849
params.bwasw_options = ''
4950
params.graphmap2_options = '' // owler option is possible
5051
params.hisat2_options = ''
@@ -158,13 +159,13 @@ if ("bbmap" in aligner_list && !params.relax){
158159
}
159160

160161
// --- bwa aln tool ---
161-
//if ("bwaaln" in aligner_list && !params.relax){
162-
// if (params.read_type == "pacbio" || params.read_type == "ont"){
163-
// log.warn("""Error: bwaaln is not suitable for long reads.
164-
//However, if you know what you are doing you can activate the AliNe --relax parameter to use it anyway.\n""")
165-
// stop_pipeline = true
166-
// }
167-
//}
162+
if ("bwaaln" in aligner_list && !params.relax){
163+
if (params.read_type == "pacbio" || params.read_type == "ont"){
164+
log.warn("""Error: bwaaln is not suitable for long reads.
165+
However, if you know what you are doing you can activate the AliNe --relax parameter to use it anyway.\n""")
166+
stop_pipeline = true
167+
}
168+
}
168169

169170
// --- bwa mem tool ---
170171
if ("bwamem" in aligner_list && !params.relax){
@@ -180,20 +181,29 @@ if ("bwamem" in aligner_list && !params.relax){
180181
}
181182
}
182183

183-
// --- bwa sw tool ---
184-
if ("bwasw" in aligner_list && !params.relax){
184+
// --- bwa mem2 tool ---
185+
if ("bwamem2" in aligner_list && !params.relax){
185186
if (params.read_type == "pacbio"){
186-
if ( !params.bwasw_options.contains(" pacbio") ){
187-
params.replace("bwamem_options", "${params.bwasw_options} -x pacbio")
187+
if ( !params.bwamem2_options.contains(" pacbio") ){
188+
params.replace("bwamem2_options", "${params.bwamem2_options} -x pacbio")
188189
}
189190
}
190191
if (params.read_type == "ont"){
191-
if ( !params.bwasw_options.contains(" ont2d") ){
192-
params.replace("bwamem_options", "${params.bwasw_options} -x ont2d")
192+
if ( !params.bwamem2_options.contains(" ont2d") ){
193+
params.replace("bwamem2_options", "${params.bwamem2_options} -x ont2d")
193194
}
194195
}
195196
}
196197

198+
// --- bwa sw tool ---
199+
if ("bwasw" in aligner_list && !params.relax){
200+
if (params.read_type == "pacbio" || params.read_type == "ont"){
201+
log.warn("""Error: bwasw is not suitable for long reads.
202+
However, if you know what you are doing you can activate the AliNe --relax parameter to use it anyway.\n""")
203+
stop_pipeline = true
204+
}
205+
}
206+
197207
// --- graphmap2 tool ---
198208
if ("graphmap2" in aligner_list ){
199209
if (annotation_file && !params.graphmap2_options.contains("--gtf ") ){
@@ -353,11 +363,12 @@ include {bbmap_index; bbmap} from "$baseDir/modules/bbmap.nf"
353363
include {bowtie_index; bowtie} from "$baseDir/modules/bowtie.nf"
354364
include {bowtie2_index; bowtie2} from "$baseDir/modules/bowtie2.nf"
355365
include {bwa_index; bwaaln; bwamem; bwasw} from "$baseDir/modules/bwa.nf"
366+
include {bwamem2_index; bwamem2} from "$baseDir/modules/bwamem2.nf"
356367
include {seqkit_convert} from "$baseDir/modules/seqkit.nf"
357368
include {graphmap2_index; graphmap2} from "$baseDir/modules/graphmap2.nf"
358369
include {fastp} from "$baseDir/modules/fastp.nf"
359370
include {fastqc as fastqc_raw; fastqc as fastqc_fastp; fastqc as fastqc_ali_bbmap; fastqc as fastqc_ali_bowtie ; fastqc as fastqc_ali_bowtie2 ;
360-
fastqc as fastqc_ali_bwaaln; fastqc as fastqc_ali_bwamem; fastqc as fastqc_ali_bwasw; fastqc as fastqc_ali_graphmap2 ;
371+
fastqc as fastqc_ali_bwaaln; fastqc as fastqc_ali_bwamem; fastqc as fastqc_ali_bwamem2; fastqc as fastqc_ali_bwasw; fastqc as fastqc_ali_graphmap2 ;
361372
fastqc as fastqc_ali_hisat2; fastqc as fastqc_ali_kallisto; fastqc as fastqc_ali_minimap2; fastqc as fastqc_ali_ngmlr;
362373
fastqc as fastqc_ali_novoalign ; fastqc as fastqc_ali_nucmer; fastqc as fastqc_ali_star; fastqc as fastqc_ali_subread ;
363374
fastqc as fastqc_ali_sublong } from "$baseDir/modules/fastqc.nf"
@@ -370,16 +381,16 @@ include {nucmer} from "$baseDir/modules/mummer4.nf"
370381
include {novoalign_index; novoalign} from "$baseDir/modules/novoalign.nf"
371382
include {salmon_index; salmon_guess_lib; set_tuple_withUserLib} from "$baseDir/modules/salmon.nf"
372383
include {samtools_sam2bam_nucmer; samtools_sam2bam as samtools_sam2bam_bowtie; samtools_sam2bam as samtools_sam2bam_bowtie2; samtools_sam2bam as samtools_sam2bam_bwaaln;
373-
samtools_sam2bam as samtools_sam2bam_bwamem; samtools_sam2bam as samtools_sam2bam_bwasw; samtools_sam2bam as samtools_sam2bam_graphmap2;
384+
samtools_sam2bam as samtools_sam2bam_bwamem; samtools_sam2bam as samtools_sam2bam_bwamem2; samtools_sam2bam as samtools_sam2bam_bwasw; samtools_sam2bam as samtools_sam2bam_graphmap2;
374385
samtools_sam2bam as samtools_sam2bam_hisat2; samtools_sam2bam as samtools_sam2bam_minimap2;
375386
samtools_sam2bam as samtools_sam2bam_ngmlr; samtools_sam2bam as samtools_sam2bam_novoalign } from "$baseDir/modules/samtools.nf"
376387
include {samtools_sort as samtools_sort_bbmap; samtools_sort as samtools_sort_bowtie; samtools_sort as samtools_sort_bowtie2; samtools_sort as samtools_sort_bwaaln;
377-
samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_graphmap2;
388+
samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwamem2; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_graphmap2;
378389
samtools_sort as samtools_sort_hisat2; samtools_sort as samtools_sort_minimap2; samtools_sort as samtools_sort_ngmlr;
379390
samtools_sort as samtools_sort_novoalign; samtools_sort as samtools_sort_nucmer;
380391
samtools_sort as samtools_sort_sublong } from "$baseDir/modules/samtools.nf"
381392
include {samtools_stats as samtools_stats_ali_bbmap; samtools_stats as samtools_stats_ali_bowtie ; samtools_stats as samtools_stats_ali_bowtie2 ;
382-
samtools_stats as samtools_stats_ali_bwaaln; samtools_stats as samtools_stats_ali_bwamem; samtools_stats as samtools_stats_ali_bwasw; samtools_stats as samtools_stats_ali_graphmap2 ;
393+
samtools_stats as samtools_stats_ali_bwaaln; samtools_stats as samtools_stats_ali_bwamem; samtools_stats as samtools_stats_ali_bwamem2; samtools_stats as samtools_stats_ali_bwasw; samtools_stats as samtools_stats_ali_graphmap2 ;
383394
samtools_stats as samtools_stats_ali_hisat2; samtools_stats as samtools_stats_ali_kallisto; samtools_stats as samtools_stats_ali_minimap2; samtools_stats as samtools_stats_ali_ngmlr;
384395
samtools_stats as samtools_stats_ali_novoalign ; samtools_stats as samtools_stats_ali_nucmer; samtools_stats as samtools_stats_ali_star; samtools_stats as samtools_stats_ali_subread ;
385396
samtools_stats as samtools_stats_ali_sublong } from "$baseDir/modules/samtools.nf"
@@ -670,7 +681,7 @@ workflow align {
670681
}
671682
}
672683

673-
// ------------------- BWA -----------------
684+
// ------------------- BWA ALN/MEM/SW -----------------
674685
if ("bwaaln" in aligner_list || "bwamem" in aligner_list || "bwasw" in aligner_list){
675686
// index
676687
bwa_index(genome.collect(), "alignment/bwa/indicies")
@@ -739,6 +750,31 @@ workflow align {
739750
}
740751
}
741752

753+
// ------------------- BWA MEM2 -----------------
754+
if ("bwamem2" in aligner_list){
755+
// index
756+
bwamem2_index(genome.collect(), "alignment/bwamem2/indicies")
757+
// align
758+
bwamem2(reads, genome.collect(), bwamem2_index.out.collect(), "alignment/bwamem2/")
759+
logs.concat(bwamem2.out.bwamem2_summary).set{logs} // save log
760+
// convert sam to bam
761+
samtools_sam2bam_bwamem2(bwamem2.out.tuple_sample_sam)
762+
// sort
763+
samtools_sort_bwamem2(samtools_sam2bam_bwamem2.out.tuple_sample_bam, "alignment/bwamem2/")
764+
samtools_sort_bwamem2.out.tuple_sample_sortedbam.set{bwamem2_ali} // set name
765+
// save aligned reads
766+
sorted_bam.concat(bwamem2_ali).set{sorted_bam}
767+
// stat on aligned reads
768+
if(params.fastqc){
769+
fastqc_ali_bwamem2(bwamem2_ali, "fastqc/bwamem2", "bwamem2")
770+
logs.concat(fastqc_ali_bwamem2.out).set{logs} // save log
771+
}
772+
if(params.samtools_stats){
773+
samtools_stats_ali_bwamem2(bwamem2_ali, genome.collect(), "samtools_stats/bwamem2", "bwamem2")
774+
logs.concat(samtools_stats_ali_bwamem2.out).set{logs} // save log
775+
}
776+
}
777+
742778
// ------------------- GRAPHMAP2 -----------------
743779
if ("graphmap2" in aligner_list ){
744780
// index
@@ -1058,6 +1094,7 @@ def helpMSG() {
10581094
--bowtie2_options additional options for bowtie2
10591095
--bwaaln_options additional options for bwaaln
10601096
--bwamem_options additional options for bwamem
1097+
--bwamem2_options additional options for bwamem2
10611098
--bwasw_options additional options for bwasw
10621099
--graphmap2_options additional options for graphmap2
10631100
--hisat2_options additional options for hisat2
@@ -1106,6 +1143,11 @@ def printAlignerOptions(aligner_list, annotation_file, star_index_options) {
11061143
bwamem parameters
11071144
bwamem_options : ${params.bwamem_options}
11081145
"""}
1146+
if ("bwamem2" in aligner_list){
1147+
sentence += """
1148+
bwamem2 parameters
1149+
bwamem2_options : ${params.bwamem2_options}
1150+
"""}
11091151
if ("bwasw" in aligner_list){
11101152
sentence += """
11111153
bwasw parameters

config/softwares.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ process {
1414
withLabel: 'bwa' {
1515
container = 'quay.io/biocontainers/bwa:0.7.17--he4a0461_11'
1616
}
17+
withLabel: 'bwamem2' {
18+
container = 'quay.io/biocontainers/bwa-mem2:2.2.1--he70b90d_8'
19+
}
1720
withLabel: 'fastp' {
1821
container = 'quay.io/biocontainers/fastp:0.23.4--h125f33a_5'
1922
}

modules/bwamem2.nf

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/* Module related to bwamem2
2+
https://github.com/bwa-mem2/bwa-mem2?tab=readme-ov-file
3+
4+
info:
5+
We are happy to announce that the index size on disk is down by 8 times and in memory by 4 times due to moving to only one type of FM-index (2bit.64 instead of 2bit.64 and 8bit.32) and 8x compression of suffix array.
6+
For example, for human genome, index size on disk is down to ~10GB from ~80GB and memory footprint is down to ~10GB from ~40GB.
7+
There is a substantial reduction in index IO time due to the reduction and hardly any performance impact on read mapping.
8+
Due to this change in index structure (in commit #4b59796, 10th October 2020), you will need to rebuild the index.
9+
Added MC flag in the output sam file in commit a591e22. Output should match original bwa-mem version 0.7.17.
10+
*/
11+
12+
/*
13+
* To index with BWA MEM2
14+
*/
15+
process bwamem2_index {
16+
label 'bwamem2'
17+
tag "$genome_fasta"
18+
publishDir "${params.outdir}/${outpath}", mode: 'copy'
19+
20+
input:
21+
path(genome_fasta)
22+
val outpath
23+
24+
output:
25+
path("*")
26+
27+
script:
28+
"""
29+
bwa-mem2 index $genome_fasta -p ${genome_fasta.baseName}
30+
"""
31+
}
32+
33+
/*
34+
* To align with BWA MEM2
35+
*/
36+
process bwamem2 {
37+
label 'bwamem2'
38+
tag "$sample"
39+
publishDir "${params.outdir}/${outpath}", pattern: "*bwamem2.log", mode: 'copy'
40+
41+
input:
42+
tuple val(sample), path(reads), val(readtype), val(read_length)
43+
path genome
44+
path bwa_index_files
45+
val outpath
46+
47+
output:
48+
tuple val(sample), path ("*bwamem2.sam"), emit: tuple_sample_sam
49+
path "*bwamem2.log", emit: bwamem2_summary
50+
51+
script:
52+
fileName = reads[0].baseName.replace('.fastq','')
53+
54+
if (params.read_type == "short_paired"){
55+
"""
56+
bwa-mem2 mem ${params.bwamem2_options} -t ${task.cpus} ${genome.baseName} ${reads[0]} ${reads[1]} > ${fileName}_bwamem2.sam 2> ${fileName}_bwamem2.log
57+
"""
58+
} else {
59+
"""
60+
bwa-mem2 mem ${params.bwamem2_options} -t ${task.cpus} ${genome.baseName} ${reads} > ${fileName}_bwamem2.sam 2> ${fileName}_bwamem2.log
61+
"""
62+
}
63+
}

profiles/test_illumina_paired.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
params {
88
reads = "$baseDir/test/illumina/"
99
genome = "$baseDir/test/yeast.fa"
10-
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,nucmer,star,subread'
10+
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,minimap2,nucmer,star,subread'
1111
star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857
1212
multiqc_config = "$baseDir/config/multiqc_conf.yml"
1313
}

profiles/test_illumina_single.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ params {
88
reads = "$baseDir/test/illumina/"
99
genome = "$baseDir/test/yeast.fa"
1010
params.read_type = "short_single"
11-
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
11+
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
1212
trimming_fastp = true
1313
fastqc = true
1414
samtools_stats = true

profiles/test_ont.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ params {
88
reads = "$baseDir/test/nanopore"
99
genome = "$baseDir/test/yeast.fa"
1010
read_type = "ont"
11-
aligner = 'bbmap,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
11+
aligner = 'bowtie2,bwamem,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
1212
library_type = 'U'
1313
star_options = '--outFilterMismatchNmax 100 --seedSearchLmax 30 --seedSearchStartLmax 30 --seedPerReadNmax 100000 --seedPerWindowNmax 100 --alignTranscriptsPerReadNmax 100000 --alignTranscriptsPerWindowNmax 10000'
1414
multiqc_config = "$baseDir/config/multiqc_conf.yml"

profiles/test_pacbio.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ params {
88
reads = "$baseDir/test/pacbio/"
99
genome = "$baseDir/test/yeast.fa"
1010
read_type = "pacbio"
11-
aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
11+
aligner = 'bbmap,bowtie,bowtie2,,bwamem,bwamem2,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
1212
star_options = '--outFilterMismatchNmax 100 --seedSearchLmax 30 --seedSearchStartLmax 30 --seedPerReadNmax 100000 --seedPerWindowNmax 100 --alignTranscriptsPerReadNmax 100000 --alignTranscriptsPerWindowNmax 10000'
1313
star_index_options = '--genomeSAindexNbases 9'
1414
multiqc_config = "$baseDir/config/multiqc_conf.yml"

0 commit comments

Comments
 (0)