Utilities related to SAM/BAM format. | (ns cljam.io.sam.util
(:require [clojure.string :as cstr]
cljam.io.protocols
[cljam.io.sam.util.cigar :as cigar]
[cljam.io.util.bin :as util-bin]
[cljam.io.sam.util.option :as opt])
(:import [cljam.io.protocols SAMAlignment])) |
(def ^:private ^:const linear-index-shift 14) (def ^:private ^:const linear-index-depth 5) | |
parse | |
Parse an alignment line, returning a map of the alignment. | (defn parse-alignment
[line]
(let [[qname flag rname pos-str mapq cigar rnext pnext tlen seq' qual & options] (cstr/split line #"\t")
pos (Integer/parseInt pos-str)
ref-length (int (cigar/count-ref cigar))
end (if (zero? ref-length) 0 (int (dec (+ pos ref-length))))]
(SAMAlignment. qname (Integer/parseInt flag) rname pos end (Integer/parseInt mapq)
cigar rnext (Integer/parseInt pnext) (Integer/parseInt tlen) (cstr/upper-case seq')
qual (map opt/parse-optional-field options)))) |
stringify | |
Converts an alignment data to a string representation of the SAM format. | (defn stringify-alignment
[a]
(->> a
((juxt :qname :flag :rname :pos :mapq :cigar :rnext :pnext :tlen :seq :qual
(comp opt/stringify-optional-fields :options)))
(cstr/join \tab)
cstr/trim)) |
indexing bin | |
Returns the end position in reference for the given alignment. | (defn get-end
^long
[{:keys [^long pos cigar]}]
(let [ref-length (cigar/count-ref cigar)]
(if (zero? (long ref-length))
pos
(dec (+ pos (long ref-length)))))) |
Returns indexing bin based on alignment start and end. | (defn compute-bin
[aln]
(let [beg (:pos aln)
end (get-end aln)]
(util-bin/reg->bin beg end linear-index-shift linear-index-depth))) |