Utility functions for SAM optional fields. | (ns cljam.io.sam.util.option
(:require [clojure.string :as cstr]
[proton.core :as p])) |
parse | |
(defn- parse-tag-single [val-type val']
(case val-type
\Z val'
\A (first val')
\I (p/as-long val')
\i (p/as-long val')
\s (p/as-long val')
\S (p/as-long val')
\c (p/as-long val')
\C (p/as-long val')
\f (p/as-double val')
\H (p/hex->bytes val')
(-> "Unrecognized tag type: %s, for value: %s"
(format val-type val')
Exception.
throw))) | |
Parses an optional field string. | (defn parse-optional-field
[op]
(let [[tag val-type-str val'] (cstr/split op #":" 3)
val-type (first val-type-str)]
{(keyword tag) {:type val-type-str
:value (if (= val-type \B)
val'
(parse-tag-single val-type val'))}})) |
stringify | |
Converts a sequence of optional fields to a string. | (defn stringify-optional-fields
[options]
(->> options
(map
(fn [op]
(let [[tag {:keys [value] type' :type}] (first (seq op))]
(cstr/join \: [(name tag) type' value]))))
(cstr/join \tab))) |
accessors | |
Returns a value of an optional field named | (defn value-for-tag [tag aln] (:value (some tag (:options aln)))) |
CIGAR string of the mate alignment. | (def
^{:doc
:arglists '([aln])}
mate-cigar
(partial value-for-tag :MC)) |
Score of the alignment. | (def
^{:doc
:arglists '([aln])}
score
(partial value-for-tag :AS)) |
Barcode sequence. | (def
^{:doc
:arglists '([aln])}
barcode
(partial value-for-tag :BC)) |
Edit distance from reference of the alignment. | (def
^{:doc
:arglists '([aln])}
edit-distance
(partial value-for-tag :NM)) |
Parse mismatching positions in the SAM optional fields. Returns a sequence consisting of vectors which are one of [:match matching-length], [:mismatch a-reference-base-char] and [:deletion reference-bases-string]. | (defn parse-mismatching-positions-str
[s]
(when s
(let [[_ head tail] (re-matches #"(\d+)(.*)" s)]
(->> (re-seq #"(([A-Z])|\^([A-Z]+))(\d+)" tail)
(mapcat
(fn [[_ _ mismatch deletion match]]
[(if mismatch
[:mismatch (first mismatch)]
[:deletion deletion])
[:match (p/as-long match)]]))
(cons [:match (p/as-long head)]))))) |
Mismatching positions and bases of the alignment. | (def
^{:doc
:arglists '([aln])}
mismatching-positions
(comp parse-mismatching-positions-str (partial value-for-tag :MD))) |
Ratio of the primary alignment score and the alternative one. | (def
^{:doc
:arglists '([aln])}
primary-to-alt-score
(partial value-for-tag :pa)) |
Parse serialized supplementary alignments. | (defn parse-supplementary-alignments-str
[s]
(when s
(->> (re-seq #"(\S+?),(\d+),([+-]),((?:\d+[MIDSH])+),(\d+),(\d+);" s)
(map (fn [[_ & rests]]
(-> [:rname :pos :strand :cigar :mapq :edit-distance]
(zipmap rests)
(update :pos p/as-long)
(update :mapq p/as-long)
(update :strand (fn [^String strand]
(case (.charAt strand 0)
\+ :forward
\- :reverse)))
(update :edit-distance p/as-long))))))) |
List of supplementary alignments. | (def
^{:doc
:arglists '([aln])}
supplementary-alignments
(comp parse-supplementary-alignments-str (partial value-for-tag :SA))) |
Parse serialized supplementary alignments. | (defn parse-alternative-hits-str
[s]
(when s
(->> (re-seq #"(\S+?),([+-]?\d+),((?:\d+[MIDSH])+),(\d+);" s)
(map (fn [[_ & rests]]
(-> (zipmap [:rname :pos :cigar :edit-distance] rests)
(update :pos p/as-long)
(update :edit-distance p/as-long))))))) |
List of alternative alignments. | (def
^{:doc
:arglists '([aln])}
alternative-hits
(comp parse-alternative-hits-str (partial value-for-tag :XA))) |
Suboptimal alignment score. | (def
^{:doc
:arglists '([aln])}
suboptimal-score
(partial value-for-tag :XS)) |
Name of read group of the alignment. | (def
^{:doc
:arglists '([aln])}
read-group
(partial value-for-tag :RG)) |
Comment of reference sequence. | (def
^{:doc
:arglists '([aln])}
ref-comment
(partial value-for-tag :XR)) |