Encoder of BAM alignment blocks. | (ns cljam.io.bam.encoder (:require [clojure.string :as cstr] [cljam.util :as util] [cljam.io.sam.util :as sam-util] [cljam.io.sam.util.refs :as refs] [cljam.io.sam.util.quality :as qual] [cljam.io.sam.util.cigar :as cigar] [cljam.io.sam.util.sequence :as seq] [cljam.io.util.lsb.io-stream :as lsb] [cljam.io.bam.common :as common])) |
(def ^:private ^:const fixed-tag-size 3) (def ^:private ^:const fixed-binary-array-tag-size 5) | |
(defn- get-next-ref-id [sa refs] (condp = (:rnext sa) "*" -1 "=" (if-let [id (refs/ref-id refs (:rname sa))] id -1) (if-let [id (refs/ref-id refs (:rnext sa))] id -1))) | |
(defn- get-options-size ^long [sam-alignment] (->> (map (fn [op] (let [[_ value] (first (seq op))] (+ fixed-tag-size (case (first (:type value)) \A 1 \i 4 \f 4 \Z (inc (count (:value value))) \B (let [[array-type & array] (cstr/split (:value value) #",")] (+ fixed-binary-array-tag-size (* (count array) (case (first array-type) \c 1 \C 1 \s 2 \S 2 \i 4 \I 4 \f 4 0)))))))) (:options sam-alignment)) (reduce +))) | |
(defn- encode-qual [sam-alignment] (if (= (:qual sam-alignment) "*") (byte-array (.length ^String (:seq sam-alignment)) (util/ubyte 0xff)) (qual/fastq->phred (:qual sam-alignment)))) | |
(defn- encode-tag-value [writer val-type value] (case val-type \A (lsb/write-char writer (char value)) \c (lsb/write-ubyte writer (byte value)) \C (lsb/write-ubyte writer (int value)) \s (lsb/write-short writer (short value)) \S (lsb/write-ushort writer (int value)) \i (lsb/write-int writer (int value)) \I (lsb/write-uint writer (long value)) \f (lsb/write-float writer (float value)) \Z (do (lsb/write-string writer value) (lsb/write-char writer (char 0))) ;; \H nil \B (let [[array-type & array] (cstr/split value #",")] (lsb/write-bytes writer (byte-array 1 (byte (int (first array-type))))) (lsb/write-int writer (count array)) (case (first array-type) \c (doseq [v array] (lsb/write-ubyte writer (Byte/parseByte v))) \C (doseq [v array] (lsb/write-ubyte writer (Integer/parseInt v))) \s (doseq [v array] (lsb/write-short writer (Short/parseShort v))) \S (doseq [v array] (lsb/write-ushort writer (Integer/parseInt v))) \i (doseq [v array] (lsb/write-int writer (Integer/parseInt v))) \I (doseq [v array] (lsb/write-uint writer (Long/parseLong v))) \f (doseq [v array] (lsb/write-float writer (Float/parseFloat v)))) writer))) | |
Returns the number of bytes required to encode the given alignment. | (defn get-block-size [aln] (let [read-length (.length ^String (:seq aln)) cigar-length (cigar/count-op (:cigar aln))] (+ common/fixed-block-size (.length ^String (:qname aln)) 1 ;; null (* cigar-length 4) (quot (inc read-length) 2) read-length (get-options-size aln)))) |
(defn- add-cigar-to-options [options cigar] (cons {:CG {:type "B",:value (str "I," (cstr/join "," (cigar/encode-cigar cigar)))}} options)) | |
Converts the alignment | (defn encode-alignment [wrtr aln refs] (let [aln (update aln :seq #(if (= % "*") "" %)) cigar-ops-count (cigar/count-op (:cigar aln)) [encoded-cigar cigar-ops-count opts*] (if (> cigar-ops-count 65535) [(cigar/->placeholder (:cigar aln)) 2 (add-cigar-to-options (:options aln) (:cigar aln))] [(cigar/encode-cigar (:cigar aln)) cigar-ops-count (:options aln)])] ;; refID (lsb/write-int wrtr (or (refs/ref-id refs (:rname aln)) -1)) ;; pos (lsb/write-int wrtr (dec (long (:pos aln)))) ;; bin_mq_nl (lsb/write-ubyte wrtr (short (inc (.length ^String (:qname aln))))) (lsb/write-ubyte wrtr (short (:mapq aln))) (lsb/write-ushort wrtr (sam-util/compute-bin aln)) ;; flag_nc (lsb/write-ushort wrtr cigar-ops-count) (lsb/write-ushort wrtr (:flag aln)) ;; l_seq (lsb/write-int wrtr (.length ^String (:seq aln))) ;; next_refID (lsb/write-int wrtr (get-next-ref-id aln refs)) ;; next_pos (lsb/write-int wrtr (dec (long (:pnext aln)))) ;; tlen (lsb/write-int wrtr (:tlen aln)) ;; read_name (lsb/write-string wrtr (:qname aln)) (lsb/write-bytes wrtr (byte-array 1 (byte 0))) ;; cigar (doseq [cigar encoded-cigar] (lsb/write-int wrtr cigar)) ;; seq (lsb/write-bytes wrtr (seq/str->compressed-bases (:seq aln))) ;; qual (lsb/write-bytes wrtr (encode-qual aln)) ;; options (doseq [opt opts*] (let [[tag value] (first (seq opt))] (lsb/write-short wrtr (short (bit-or (bit-shift-left (byte (int (second (name tag)))) 8) (byte (int (first (name tag))))))) (lsb/write-bytes wrtr (.getBytes ^String (:type value))) (encode-tag-value wrtr (first (:type value)) (:value value)))))) |