Encoder of BAM alignment blocks. | (ns cljam.io.bam.encoder
(:require [clojure.string :as cstr]
[cljam.util :as util]
[cljam.io.sam.util :as sam-util]
[cljam.io.sam.util.refs :as refs]
[cljam.io.sam.util.quality :as qual]
[cljam.io.sam.util.cigar :as cigar]
[cljam.io.sam.util.sequence :as seq]
[cljam.io.util.lsb.io-stream :as lsb]
[cljam.io.bam.common :as common])) |
(def ^:private ^:const fixed-tag-size 3) (def ^:private ^:const fixed-binary-array-tag-size 5) | |
(defn- get-next-ref-id [sa refs]
(condp = (:rnext sa)
"*" -1
"=" (if-let [id (refs/ref-id refs (:rname sa))] id -1)
(if-let [id (refs/ref-id refs (:rnext sa))] id -1))) | |
(defn- get-options-size ^long [sam-alignment]
(->> (map
(fn [op]
(let [[_ value] (first (seq op))]
(+ fixed-tag-size
(case (first (:type value))
\A 1
\i 4
\f 4
\Z (inc (count (:value value)))
\B (let [[array-type & array] (cstr/split (:value value) #",")]
(+ fixed-binary-array-tag-size
(* (count array)
(case (first array-type)
\c 1
\C 1
\s 2
\S 2
\i 4
\I 4
\f 4
0))))))))
(:options sam-alignment))
(reduce +))) | |
(defn- encode-qual [sam-alignment]
(if (= (:qual sam-alignment) "*")
(byte-array (.length ^String (:seq sam-alignment)) (util/ubyte 0xff))
(qual/fastq->phred (:qual sam-alignment)))) | |
(defn- encode-tag-value [writer val-type value]
(case val-type
\A (lsb/write-char writer (char value))
\c (lsb/write-ubyte writer (byte value))
\C (lsb/write-ubyte writer (int value))
\s (lsb/write-short writer (short value))
\S (lsb/write-ushort writer (int value))
\i (lsb/write-int writer (int value))
\I (lsb/write-uint writer (long value))
\f (lsb/write-float writer (float value))
\Z (do (lsb/write-string writer value)
(lsb/write-char writer (char 0)))
;; \H nil
\B (let [[array-type & array] (cstr/split value #",")]
(lsb/write-bytes writer (byte-array 1 (byte (int (first array-type)))))
(lsb/write-int writer (count array))
(case (first array-type)
\c (doseq [v array]
(lsb/write-ubyte writer (Byte/parseByte v)))
\C (doseq [v array]
(lsb/write-ubyte writer (Integer/parseInt v)))
\s (doseq [v array]
(lsb/write-short writer (Short/parseShort v)))
\S (doseq [v array]
(lsb/write-ushort writer (Integer/parseInt v)))
\i (doseq [v array]
(lsb/write-int writer (Integer/parseInt v)))
\I (doseq [v array]
(lsb/write-uint writer (Long/parseLong v)))
\f (doseq [v array]
(lsb/write-float writer (Float/parseFloat v))))
writer))) | |
Returns the number of bytes required to encode the given alignment. | (defn get-block-size
[aln]
(let [read-length (.length ^String (:seq aln))
cigar-length (cigar/count-op (:cigar aln))]
(+ common/fixed-block-size
(.length ^String (:qname aln))
1 ;; null
(* cigar-length 4)
(quot (inc read-length) 2)
read-length
(get-options-size aln)))) |
(defn- add-cigar-to-options
[options cigar]
(cons
{:CG {:type "B",:value (str "I," (cstr/join "," (cigar/encode-cigar cigar)))}}
options)) | |
Converts the alignment | (defn encode-alignment
[wrtr aln refs]
(let [aln (update aln :seq #(if (= % "*") "" %))
cigar-ops-count (cigar/count-op (:cigar aln))
[encoded-cigar cigar-ops-count opts*]
(if (> cigar-ops-count 65535)
[(cigar/->placeholder (:cigar aln))
2 (add-cigar-to-options (:options aln) (:cigar aln))]
[(cigar/encode-cigar (:cigar aln)) cigar-ops-count (:options aln)])]
;; refID
(lsb/write-int wrtr (or (refs/ref-id refs (:rname aln)) -1))
;; pos
(lsb/write-int wrtr (dec (long (:pos aln))))
;; bin_mq_nl
(lsb/write-ubyte wrtr (short (inc (.length ^String (:qname aln)))))
(lsb/write-ubyte wrtr (short (:mapq aln)))
(lsb/write-ushort wrtr (sam-util/compute-bin aln))
;; flag_nc
(lsb/write-ushort wrtr cigar-ops-count)
(lsb/write-ushort wrtr (:flag aln))
;; l_seq
(lsb/write-int wrtr (.length ^String (:seq aln)))
;; next_refID
(lsb/write-int wrtr (get-next-ref-id aln refs))
;; next_pos
(lsb/write-int wrtr (dec (long (:pnext aln))))
;; tlen
(lsb/write-int wrtr (:tlen aln))
;; read_name
(lsb/write-string wrtr (:qname aln))
(lsb/write-bytes wrtr (byte-array 1 (byte 0)))
;; cigar
(doseq [cigar encoded-cigar] (lsb/write-int wrtr cigar))
;; seq
(lsb/write-bytes wrtr (seq/str->compressed-bases (:seq aln)))
;; qual
(lsb/write-bytes wrtr (encode-qual aln))
;; options
(doseq [opt opts*]
(let [[tag value] (first (seq opt))]
(lsb/write-short
wrtr
(short (bit-or (bit-shift-left (byte (int (second (name tag)))) 8)
(byte (int (first (name tag)))))))
(lsb/write-bytes wrtr (.getBytes ^String (:type value)))
(encode-tag-value wrtr (first (:type value)) (:value value)))))) |