Utilities related to SAM/BAM format.

(ns cljam.io.sam.util
  (:require [clojure.string :as cstr]
            cljam.io.protocols
            [cljam.io.sam.util.cigar :as cigar]
            [cljam.io.util.bin :as util-bin]
            [cljam.io.sam.util.option :as opt])
  (:import [cljam.io.protocols SAMAlignment]))
(def ^:private ^:const linear-index-shift 14)
(def ^:private ^:const linear-index-depth 5)

parse

Parse an alignment line, returning a map of the alignment.

(defn parse-alignment
  [line]
  (let [[qname flag rname pos-str mapq cigar rnext pnext tlen seq' qual & options] (cstr/split line #"\t")
        pos (Integer/parseInt pos-str)
        ref-length (int (cigar/count-ref cigar))
        end (if (zero? ref-length) 0 (int (dec (+ pos ref-length))))]
    (SAMAlignment. qname (Integer/parseInt flag) rname pos end (Integer/parseInt mapq)
                   cigar rnext (Integer/parseInt pnext) (Integer/parseInt tlen) (cstr/upper-case seq')
                   qual (map opt/parse-optional-field options))))

stringify

Converts an alignment data to a string representation of the SAM format.

(defn stringify-alignment
  [a]
  (->> a
       ((juxt :qname :flag :rname :pos :mapq :cigar :rnext :pnext :tlen :seq :qual
              (comp opt/stringify-optional-fields :options)))
       (cstr/join \tab)
       cstr/trim))

indexing bin

Returns the end position in reference for the given alignment.

(defn get-end
  ^long
  [{:keys [^long pos cigar]}]
  (let [ref-length (cigar/count-ref cigar)]
    (if (zero? (long ref-length))
      pos
      (dec (+ pos (long ref-length))))))

Returns indexing bin based on alignment start and end.

(defn compute-bin
  [aln]
  (let [beg (:pos aln)
        end (get-end aln)]
    (util-bin/reg->bin beg end linear-index-shift linear-index-depth)))