Makes a sequence dictionary from FASTA data, and writes it to a file. | (ns cljam.io.dict.writer
(:require [digest]
[cljam.io.sam.common :refer [sam-version]]
[cljam.util :refer [string->bytes graph?]])
(:import java.io.BufferedWriter)) |
DICTWriter | |
(deftype DICTWriter [^java.io.BufferedWriter writer url]
java.io.Closeable
(close [_]
(.close writer))) | |
Making dict | |
Equals to | (def ^:const ^:private upper-case-offset "Equals to `(- (byte (int \\A)) (byte (int \\a)))`." -32) |
(defn- upper-case [^long b]
(if (or (< b (byte (int \a))) (> b (byte (int \z))))
b
(byte (+ b upper-case-offset)))) | |
Normalizes the sequence string, calculates its MD5 hash, and returns it. | (defn- make-hash
[sequence']
(let [bases' ^bytes (string->bytes sequence')]
(loop [i 0]
(when (< i (count bases'))
(aset bases' i ^byte (upper-case (nth bases' i)))
(recur (inc i))))
(digest/md5 bases'))) |
(defn- init-dict-status
[]
{:sequence , :len 0}) | |
(defn- update-dict-status
[dict-status sequence']
{:sequence (str (:sequence dict-status) sequence')
:len (+ (long (:len dict-status)) (count (filter graph? sequence')))}) | |
Calculates sequence dictionary from the headers and sequences, returning it as a map. | (defn make-dict
[_headers sequences ur]
(loop [[seq* & rest'] sequences
name' (:name seq*)
dict-status (init-dict-status)
dicts {}]
(if seq*
(let [name'' (:name seq*)
new? (not= name'' name')
dict-status' (update-dict-status
(if new? (init-dict-status) dict-status) (:sequence seq*))
dicts' (if new?
(assoc dicts name' {:blen (:len dict-status)
:ur ur
:m5 (make-hash (:sequence dict-status))})
dicts)]
(recur rest' name'' dict-status' dicts'))
(assoc dicts name' {:blen (:len dict-status)
:ur ur
:m5 (make-hash (:sequence dict-status))})))) |
Writing | |
(defn- write-header! [^BufferedWriter wtr] (.write wtr (str "@HD\tVN:" sam-version "\tSO:unsorted")) (.newLine wtr)) | |
(defn- write-sequence! [^BufferedWriter wtr name' blen ur m5] (.write wtr (str "@SQ\tSN:" name' "\tLN:" blen "\tM5:" m5 "\tUR:" ur)) (.newLine wtr)) | |
(defn- write-dict*!
[wtr headers sequences ur]
(let [dicts (make-dict headers sequences ur)]
(write-header! wtr)
(doseq [header headers]
(let [dict (get dicts (:name header))]
(write-sequence! wtr
(:name header)
(:blen dict)
(:ur dict)
(:m5 dict)))))) | |
Writes dict for fasta to wtr. | (defn write-dict! [^DICTWriter wtr headers sequences ur] (write-dict*! (.writer wtr) headers sequences ur)) |