Skip to content

[D] Update fasta #477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 13, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 22 additions & 32 deletions bench/algorithm/fasta/1.d
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
// translation from zig version. From https://github.com/cyrusmsk/lang_benchmark

import std;
import std.outbuffer: OutBuffer;

immutable maxLine = 60;
immutable im = 139_968;
Expand All @@ -11,29 +8,28 @@ immutable ic = 29_573;
uint seed = 42;

static struct AminoAcid {
char l;
ubyte l;
double p;
}

double nextRandom(double max) {
seed = (seed * ia + ic) % im;
return max * seed/im;
return max * seed / im;
}

void repeatAndWrap (OutBuffer b, immutable char[] seq, size_t count) {
void repeatAndWrap (immutable ubyte[] seq, size_t count) {
uint len = cast(uint) seq.length;
char[] paddedSeq = new char[](len + maxLine);
ubyte[] paddedSeq = new ubyte[](len + maxLine);
foreach (i, ref e; paddedSeq)
e = seq[i % len];

size_t off, idx;
size_t rem, lineLength;
while (idx < count) {
immutable rem = count - idx;
immutable size_t lineLength = min(maxLine, rem);
rem = count - idx;
lineLength = min(maxLine, rem);

// speed up the writeln with lockWriter
b.write(paddedSeq[off .. off + lineLength]);
b.write("\n");
writeln(cast(string)paddedSeq[off .. off + lineLength]);

off += lineLength;
if (off > len)
Expand All @@ -42,20 +38,20 @@ void repeatAndWrap (OutBuffer b, immutable char[] seq, size_t count) {
}
}

void generateAndWrap (OutBuffer b, immutable AminoAcid[] nucleotides, size_t count) {
void generateAndWrap (immutable AminoAcid[] nucleotides, size_t count) {
double cumProb = 0.0;
double[] cumProbTotal = new double[](nucleotides.length);
foreach(i, e; nucleotides) {
cumProb += e.p;
cumProbTotal[i] = cumProb * im;
}

char[] line = new char[](maxLine+1);
line[maxLine] = '\n';
size_t idx;
ubyte[maxLine+1] line; // was new before
line[maxLine] = cast(ubyte)'\n';
size_t idx, rem, lineLength;
while (idx < count) {
immutable rem = count - idx;
immutable size_t lineLength = min(maxLine, rem);
rem = count - idx;
lineLength = min(maxLine, rem);
foreach (ref col; line[0 .. lineLength]) {
immutable r = nextRandom(im);
size_t c;
Expand All @@ -65,21 +61,18 @@ void generateAndWrap (OutBuffer b, immutable AminoAcid[] nucleotides, size_t cou
col = nucleotides[c].l;
}
line[lineLength] = '\n';
b.write(line[0 .. lineLength + 1]);
write(cast(string)line[0 .. lineLength + 1]);

idx += lineLength;
}
}

void main(string[] args) {
immutable uint n = args.length > 1 ? args[1].to!uint : 100;
OutBuffer b = new OutBuffer();

static immutable char[72*3 + 71] homoSapiensAlu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
write(">ONE Homo sapiens alu\n");
repeatAndWrap(b, homoSapiensAlu, 2 * n);
write(b);
b.clear();
static immutable(ubyte[72*3 + 71]) homoSapiensAlu = cast(immutable(ubyte[287]))"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
writeln(">ONE Homo sapiens alu");
repeatAndWrap(homoSapiensAlu, 2 * n);

static immutable AminoAcid[15] iubNucleotideInfo = [
{ l:'a', p: 0.27 },
Expand All @@ -98,18 +91,15 @@ void main(string[] args) {
{ l:'W', p: 0.02 },
{ l:'Y', p: 0.02 },
];
write(">TWO IUB ambiguity codes\n");
generateAndWrap(b, iubNucleotideInfo, 3 * n);
write(b);
b.clear();
writeln(">TWO IUB ambiguity codes");
generateAndWrap(iubNucleotideInfo, 3 * n);

static immutable AminoAcid[4] homoSapienNucleotideInfo = [
{ l:'a', p: 0.3029549426680 },
{ l:'c', p: 0.1979883004921 },
{ l:'g', p: 0.1975473066391 },
{ l:'t', p: 0.3015094502008 },
];
write(">THREE Homo sapiens frequency\n");
generateAndWrap(b, homoSapienNucleotideInfo, 5 * n);
write(b);
writeln(">THREE Homo sapiens frequency");
generateAndWrap(homoSapienNucleotideInfo, 5 * n);
}
Loading