@@ -23,10 +23,47 @@ our @EXPORT_OK = qw(
23
23
24
24
local $0 =basename $0 ;
25
25
26
+ =pod
27
+
28
+ =head1 NAME Mashtree
29
+
30
+ =head1 SYNOPSIS
31
+
32
+ Helps run a mashtree analysis to make rapid trees for genomes.
33
+ Please see github.com/lskatz/Mashtree for more information.
34
+
35
+ =head1 VARIABLES
36
+
37
+ =over
38
+
39
+ =item $VERSION
40
+
41
+ =item $MASHTREE_VERSION (same value as $VERSION)
42
+
43
+ =item @fastqExt = qw(.fastq.gz .fastq .fq .fq.gz)
44
+
45
+ =item @fastaExt = qw(.fasta .fna .faa .mfa .fas .fsa .fa)
46
+
47
+ =item @bamExt = qw(.sorted.bam .bam)
48
+
49
+ =item @vcfExt = qw(.vcf.gz .vcf)
50
+
51
+ =item @mshExt = qw(.msh)
52
+
53
+ =item @richseqExt = qw(.gb .gbank .genbank .gbk .gbs .gbf .embl .ebl .emb .dat .swiss .sp)
54
+
55
+ =item $fhStick :shared
56
+
57
+ Used to mark whether a file is being read, so that Mashtree limits disk I/O
58
+
59
+ =back
60
+
61
+ =cut
62
+
26
63
# #####
27
64
# CONSTANTS
28
65
29
- our $VERSION = " 1.3.1 " ;
66
+ our $VERSION = " 1.4.0 " ;
30
67
our $MASHTREE_VERSION =$VERSION ;
31
68
our @fastqExt =qw( .fastq.gz .fastq .fq .fq.gz) ;
32
69
our @fastaExt =qw( .fasta .fna .faa .mfa .fas .fsa .fa) ;
@@ -41,6 +78,16 @@ our @richseqExt=qw(.gb .gbank .genbank .gbk .gbs .gbf .embl .ebl .emb .dat .swis
41
78
# Helpful things
42
79
my $fhStick :shared; # A thread can only open a fastq file if it has the talking stick.
43
80
81
+ =head1 METHODS
82
+
83
+ =over
84
+
85
+ =item $SIG{'__DIE__'}
86
+
87
+ Remakes how `die` works, so that it references the caller
88
+
89
+ =cut
90
+
44
91
# ################################################
45
92
# ## COMMON SUBS/TOOLS (not object subroutines) ##
46
93
# ################################################
@@ -53,6 +100,15 @@ $SIG{'__DIE__'} = sub {
53
100
$e =~ s / (at [^\s ]+? line \d +\. $)/ \n Stopped $1 / ;
54
101
die (" $0 : $callerSub : $e " );
55
102
};
103
+
104
+ =pod
105
+
106
+ =item logmsg
107
+
108
+ Prints a message to STDERR with the thread number and the program name, with a trailing newline.
109
+
110
+ =cut
111
+
56
112
# Centralized logmsg
57
113
# sub logmsg {print STDERR "$0: ".(caller(1))[3].": @_\n";}
58
114
sub logmsg {
@@ -69,7 +125,14 @@ sub logmsg {
69
125
print STDERR $msg ;
70
126
}
71
127
72
- # Opens a fastq file in a thread-safe way.
128
+ =pod
129
+
130
+ =item openFastq
131
+
132
+ Opens a fastq file in a thread-safe way.
133
+
134
+ =cut
135
+
73
136
sub openFastq{
74
137
my ($fastq ,$settings )=@_ ;
75
138
@@ -87,7 +150,14 @@ sub openFastq{
87
150
return $fh ;
88
151
}
89
152
90
- # Removes fastq extension, removes directory name,
153
+ =pod
154
+
155
+ =item _truncateFilename
156
+
157
+ Removes fastq extension, removes directory name,
158
+
159
+ =cut
160
+
91
161
sub _truncateFilename{
92
162
my ($file ,$settings )=@_ ;
93
163
# strip off msh and any other known extentions
@@ -101,9 +171,17 @@ sub _truncateFilename{
101
171
return $name ;
102
172
}
103
173
174
+ =pod
175
+
176
+ =item distancesToPhylip
177
+
178
+ 1. Read the mash distances
179
+ 2. Create a phylip file
180
+
181
+ Arguments: hash of distances, output directory, settings hash
182
+
183
+ =cut
104
184
105
- # 1. Read the mash distances
106
- # 2. Create a phylip file
107
185
sub distancesToPhylip{
108
186
my ($distances ,$outdir ,$settings )=@_ ;
109
187
@@ -172,6 +250,20 @@ sub distancesToPhylip{
172
250
return $phylip ;
173
251
}
174
252
253
+ =pod
254
+
255
+ =item sortNames
256
+
257
+ Sorts names.
258
+
259
+ Arguments:
260
+
261
+ 1. $name - array of names
262
+ 2. $settings - options
263
+ * $$settings{'sort-order'} is either "abc", "random", "input-order"
264
+
265
+ =cut
266
+
175
267
sub sortNames{
176
268
my ($name ,$settings )=@_ ;
177
269
my @sorted ;
@@ -187,8 +279,15 @@ sub sortNames{
187
279
return @sorted ;
188
280
}
189
281
190
- # Create tree file with Quicktree but bioperl
191
- # as a backup.
282
+ =pod
283
+
284
+ =item createTreeFromPhylip($phylip, $outdir, $settings)
285
+
286
+ Create tree file with Quicktree but bioperl
287
+ as a backup.
288
+
289
+ =cut
290
+
192
291
sub createTreeFromPhylip{
193
292
my ($phylip ,$outdir ,$settings )=@_ ;
194
293
@@ -225,8 +324,15 @@ sub createTreeFromPhylip{
225
324
226
325
}
227
326
228
- # Lee's implementation of a tree distance. The objective
229
- # is to return zero if two trees are the same.
327
+ =pod
328
+
329
+ =item treeDist($treeObj1, $treeObj2)
330
+
331
+ Lee's implementation of a tree distance. The objective
332
+ is to return zero if two trees are the same.
333
+
334
+ =cut
335
+
230
336
sub treeDist{
231
337
my ($treeObj1 ,$treeObj2 )=@_ ;
232
338
@@ -309,8 +415,15 @@ sub treeDist{
309
415
return $euclideanDistance ;
310
416
}
311
417
312
- # Find the distance between two mash sketch files
313
- # Alternatively: two hash lists.
418
+ =pod
419
+
420
+ =item mashDist($file1, $file2, $k, $settings)
421
+
422
+ Find the distance between two mash sketch files
423
+ Alternatively: two hash lists.
424
+
425
+ =cut
426
+
314
427
sub mashDist{
315
428
my ($file1 , $file2 , $k , $settings )=@_ ;
316
429
@@ -354,6 +467,14 @@ sub mashDist{
354
467
return $mash_distance ;
355
468
}
356
469
470
+ =pod
471
+
472
+ =item mashHashes($sketch)
473
+
474
+ Return an array of hashes, the kmer length, and the genome estimated length
475
+
476
+ =cut
477
+
357
478
sub mashHashes{
358
479
my ($sketch )=@_ ;
359
480
my @hash ;
@@ -393,9 +514,16 @@ sub mashHashes{
393
514
return (\@hash , $kmer , $length );
394
515
}
395
516
396
- # Compare unequal sized hashes. Treat the first
397
- # set of hashes as the reference (denominator)
398
- # set.
517
+ =pod
518
+
519
+ =item raw_mash_distance_unequal_sizes($hashes1, $hashes2)
520
+
521
+ Compare unequal sized hashes. Treat the first
522
+ set of hashes as the reference (denominator)
523
+ set.
524
+
525
+ =cut
526
+
399
527
sub raw_mash_distance_unequal_sizes{
400
528
my ($hashes1 , $hashes2 ) = @_ ;
401
529
@@ -416,7 +544,15 @@ sub raw_mash_distance_unequal_sizes{
416
544
return ($common ,$total );
417
545
}
418
546
419
- # https://github.com/onecodex/finch-rs/blob/master/src/distance.rs#L34
547
+ =pod
548
+
549
+ =item raw_mash_distance($hashes1, $hashes2)
550
+
551
+ Return the number of kmers in common and the number compared total. inspiration from
552
+ https://github.com/onecodex/finch-rs/blob/master/src/distance.rs#L34
553
+
554
+ =cut
555
+
420
556
sub raw_mash_distance{
421
557
my ($hashes1 , $hashes2 ) = @_ ;
422
558
@@ -472,7 +608,7 @@ sub raw_mash_distance{
472
608
# The only difference is that it isn't an object method
473
609
# and that it is called without an OO implementation.
474
610
475
- =head2 transfer_bootstrap_expectation
611
+ =item transfer_bootstrap_expectation
476
612
477
613
Title : transfer_bootstrap_expectation
478
614
Usage : my $tree_with_bs = transfer_bootstrap_expectation(\@bs_trees,$guide_tree);
@@ -482,6 +618,9 @@ sub raw_mash_distance{
482
618
Returns : L<Bio::Tree::TreeI>
483
619
Args : Arrayref of L<Bio::Tree::TreeI> s
484
620
Guide tree, L<Bio::Tree::TreeI> s
621
+
622
+ =back
623
+
485
624
=cut
486
625
487
626
sub transfer_bootstrap_expectation{
0 commit comments