Skip to content

Commit 5238f5d

Browse files
committed
add sqlite db build rules
1 parent 59ff867 commit 5238f5d

File tree

3 files changed

+21
-4
lines changed

3 files changed

+21
-4
lines changed

Snakefile

+10-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ rule all:
44
input:
55
"nr_cluster_taxid_formatted_final_line_count.txt",
66
"nr_cluster_taxid_formatted_final.tsv.gz",
7-
"nr_cluster_uniq_reps_line_count.txt"
7+
"nr_cluster_uniq_reps_line_count.txt",
8+
"nr_cluster_taxid_formatted_final.sqlite"
89

910
#############################################################################
1011
## cluster NR with mmseqs2
@@ -150,13 +151,18 @@ rule compress_output:
150151
input: "nr_cluster_taxid_formatted_final.tsv"
151152
output: "nr_cluster_taxid_formatted_final.tsv.gz"
152153
shell: '''
153-
gzip {input}
154+
gzip -c {input} > {output}
154155
'''
155156

156157
rule get_line_count:
157-
input: "nr_cluster_taxid_formatted_final.tsv.gz"
158+
input: "nr_cluster_taxid_formatted_final.tsv"
158159
output: "nr_cluster_taxid_formatted_final_line_count.txt"
159160
shell:'''
160-
gunzip -c {input} | wc -l > {output}
161+
wc -l {input} > {output}
161162
'''
162163

164+
rule make_sqlite_db:
165+
input: tsv="nr_cluster_taxid_formatted_final.tsv"
166+
output: sqlite="nr_cluster_taxid_formatted_final.sqlite"
167+
conda: 'envs/r-sql.yml'
168+
script: "scripts/make_sqlite_db.R"

envs/r-sql.yml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
- defaults
5+
dependencies:
6+
- r-rsqlite=2.3.0

scripts/make_sqlite_db.R

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
library(RSQLite)
2+
3+
db <- dbConnect(SQLite(), dbname = snakemake@output[['sqlite']])
4+
dbWriteTable(conn = db, name = "nr_cluster_taxid_table", value = snakemake@input[['tsv']], sep="\t")
5+
dbDisconnect(db)

0 commit comments

Comments
 (0)