Functions to calculate pileup from the BAM. | (ns cljam.algo.pileup
(:require [cljam.io.sam :as sam]
[cljam.io.sam.util.cigar :as cigar]
[cljam.io.sam.util.flag :as flag]
[cljam.io.sam.util.quality :as qual]
[cljam.io.sam.util.refs :as refs]
[cljam.io.pileup :as plpio])
(:import [cljam.io.protocols SAMAlignment]
[cljam.io.pileup PileupBase LocusPile])) |
(defn- seq-step
[^long start ^long end ^long step-size alns]
(letfn [(starts-before? [^long pos ^SAMAlignment aln]
(<= (.pos aln) pos))
(ends-after? [^long pos ^SAMAlignment aln]
(<= pos (.end aln)))
(step [^long s carried-over rest-alns]
(lazy-seq
(when (<= s end)
(let [e (Math/min (+ s step-size) end)
[in out] (split-with #(starts-before? e %) rest-alns)
pile (into carried-over in)
carry (filterv #(ends-after? (inc e) %) pile)
out-pos (some-> out ^SAMAlignment (first) .pos)
next-s (if-not (seq carry) (or out-pos (inc end)) (inc e))]
(cons [s pile] (step next-s carry out))))))]
(let [[x :as xs] (drop-while #(< (.end ^SAMAlignment %) start) alns)]
(when x
(step (max start (.pos ^SAMAlignment x)) [] xs))))) | |
Returns a lazy sequence that each element contains [position (alignments piled-up at the locus)]. | (defn pileup-seq [^long start ^long end alns] (seq-step start end 0 alns)) |
(defn- quals-at-ref
[idx ^String qual]
(let [empty-qual? (and (= (.length qual) 1)
(= (.charAt qual 0) \*))]
(if empty-qual?
(short-array (count idx) (short 93)) ;; \~
(->> idx
(map (fn [[_ x]]
(if (number? x)
(qual/fastq-char->phred-byte (.charAt qual x))
93)))
short-array)))) | |
(deftype PosBase [^char base indel]) | |
(defn- seqs-at-ref
[idx ^String s]
(->> idx
(map (fn [[op x xs]]
(let [c (if (number? x) (.charAt s x) x)]
(case op
:m (->PosBase c nil)
:d (->PosBase c xs)
:i (->PosBase c (subs s (first xs) (last xs)))))))
object-array)) | |
Align bases and base quality scores with the reference coordinate. | (defn index-cigar
[^SAMAlignment aln]
(let [idx (cigar/to-index (.cigar aln))]
(assoc aln
:seqs-at-ref (seqs-at-ref idx (:seq aln))
:quals-at-ref (quals-at-ref idx (.qual aln))))) |
Basic predicate function for filtering alignments for mpileup. | (defn basic-mpileup-pred
[^long min-mapq]
(fn [^SAMAlignment aln]
(let [flag (.flag aln)]
(and flag
(zero? (bit-and (flag/encoded #{:unmapped
:filtered-out
:duplicated
:supplementary
:secondary}) flag))
(or (not (flag/multiple? flag)) (flag/properly-aligned? flag))
(not-empty (.cigar aln))
(<= min-mapq (.mapq aln)))))) |
Find a piled-up base and an indel from an alignment. | (defn resolve-base
[^long ref-pos ^SAMAlignment aln]
(let [relative-pos (- ref-pos (.pos aln))
qual (aget ^shorts (:quals-at-ref aln) relative-pos)
^PosBase pb (aget ^objects (:seqs-at-ref aln) relative-pos)
base (.base pb)
indel (.indel pb)
deletion? (number? indel)]
(plpio/->PileupBase
(zero? relative-pos)
(.mapq aln)
base
qual
(flag/reversed? (.flag aln))
(= ref-pos (.end aln))
(when-not deletion? indel)
(when deletion? indel)
(.qname aln)
(dissoc aln :seqs-at-ref :quals-at-ref)))) |
(defn- resolve-bases [[ref-pos alns]] [ref-pos (mapv (partial resolve-base ref-pos) alns)]) | |
Converts a pile into | (defn ->locus-pile
[chr [pos pile]]
(when (seq pile)
(LocusPile. chr pos (object-array pile)))) |
Returns a predicate for filtering piled-up reads by base quality at its position. | (defn filter-by-base-quality
[^long min-base-quality]
(fn [p]
(->> #(<= min-base-quality (.qual ^PileupBase %))
(partial filter)
(update p 1)))) |
(defn- unzip-2
[transform-fn]
(fn [rf]
(let [va (volatile! (transient []))
vb (volatile! (transient []))]
(fn
([] (rf))
([acc]
(-> acc
(rf (persistent! @va))
(rf (persistent! @vb))
rf))
([acc x]
(let [[a b] (transform-fn x)]
(vswap! va conj! a)
(vswap! vb conj! b)
acc)))))) | |
Merges corrected quals with the uncorrected part. | (defn- merge-corrected-quals
[^SAMAlignment aln ^long correct-start corrected-quals]
(let [^shorts quals (:quals-at-ref aln)
start (.pos aln)
len (count corrected-quals)
quals' (java.util.Arrays/copyOf quals (alength quals))
start' (if (< start correct-start) (- correct-start start) 0)]
(dotimes [i len]
(aset quals' (+ start' i) (short (corrected-quals i))))
quals')) |
(defn- correct-pair-qual
[^SAMAlignment a1 ^SAMAlignment a2]
(let [pos1 (.pos a1)
pos2 (.pos a2)
^shorts quals1 (:quals-at-ref a1)
^shorts quals2 (:quals-at-ref a2)
^objects seqs1 (:seqs-at-ref a1)
^objects seqs2 (:seqs-at-ref a2)]
(fn [^long pos]
(let [relative-pos1 (- pos pos1)
relative-pos2 (- pos pos2)
q1 (aget quals1 relative-pos1)
q2 (aget quals2 relative-pos2)
b1 (-> seqs1 ^PosBase (aget relative-pos1) .-base)
b2 (-> seqs2 ^PosBase (aget relative-pos2) .-base)]
(if (= b1 b2)
[(min 200 (+ q1 q2)) 0]
(if (<= q2 q1)
[(int (* 0.8 q1)) 0]
[0 (int (* 0.8 q2))])))))) | |
Correct quals of a pair. Returns a map with corrected quals. | (defn- correct-pair-quals
[^SAMAlignment a1 ^SAMAlignment a2]
(let [correct-start (max (.pos a1) (.pos a2))
correct-end (min (.end a1) (.end a2))]
(when (and (pos? (.pnext a1))
(<= correct-start (.end a1)))
(let [[quals1 quals2] (into []
(unzip-2 (correct-pair-qual a1 a2))
(range correct-start (inc correct-end)))
new-quals1 (merge-corrected-quals a1
correct-start
quals1)
new-quals2 (merge-corrected-quals a2
correct-start
quals2)]
(if (flag/r1? (.flag a1))
[new-quals1 new-quals2]
[new-quals2 new-quals1]))))) |
Makes a map which has corrected quals of all overlapping pairs. | (defn- make-corrected-quals-map
[alns]
(->> alns
(group-by (fn [x] (.qname ^SAMAlignment x)))
(into {} (keep
(fn [[qname xs]]
(when (<= 2 (count xs))
[qname (apply correct-pair-quals xs)])))))) |
Returns an alignment with corrected quals by looking up quals in the corrected quals map. | (defn- correct-quals-at-ref
[corrected-map ^SAMAlignment aln]
(if-not (= (.rnext aln) "=")
aln
(if-let [quals-pair (corrected-map (.qname aln))]
(let [new-quals (if (flag/r1? (.flag aln))
(first quals-pair)
(second quals-pair))]
(assoc aln :quals-at-ref new-quals))
aln))) |
Piles up alignments in given region and returns a lazy sequence of
The following options are available:
- | (defn pileup
([sam-reader region]
(pileup sam-reader region {}))
([sam-reader
{:keys [chr ^long start ^long end] :or {start 1 end Integer/MAX_VALUE}}
{:keys [^long min-base-quality ^long min-map-quality ignore-overlaps? ^long chunk-size]
:or {min-base-quality 13 min-map-quality 0 ignore-overlaps? false
chunk-size 5000}}]
(when-let [^long len (:len (refs/ref-by-name (sam/read-refs sam-reader) chr))]
(let [s (max 1 start)
e (min len end)
region {:chr chr :start s :end e}
filter-fn (if (pos? min-base-quality)
(partial map (filter-by-base-quality min-base-quality))
identity)]
(->> (sam/read-alignments sam-reader region)
(sequence
(comp
(filter (basic-mpileup-pred min-map-quality))
(map index-cigar)))
(seq-step start end chunk-size)
(mapcat (fn [[^long pos alns]]
(->> (if ignore-overlaps?
alns
(keep (partial correct-quals-at-ref
(make-corrected-quals-map alns)) alns))
(pileup-seq pos (min end (+ pos chunk-size))))))
(map resolve-bases)
filter-fn
(keep (partial ->locus-pile chr))))))) |
Align multiple piled-up seqs. | (defn align-pileup-seqs
[& xs]
(if (<= (count xs) 1)
(map (fn [{:keys [pos] :as m}] [pos [m]]) (first xs))
(letfn [(step [xs]
(lazy-seq
(when (some seq xs)
(let [min-pos (apply min (keep (comp :pos first) xs))]
(cons [min-pos (mapv
(fn [[{:keys [pos] :as m}]]
(when (= pos min-pos) m)) xs)]
(step (mapv
(fn [[{:keys [pos]} :as ys]]
(if (= pos min-pos) (next ys) ys)) xs)))))))]
(step xs)))) |
Pile up alignments from multiple sources. The following | (defn mpileup [region options & sam-readers] (apply align-pileup-seqs (map #(pileup % region options) sam-readers))) |
Creates a mpileup file from the BAM file. The following | (defn create-mpileup
([in-sam out-mplp]
(create-mpileup in-sam nil out-mplp))
([in-sam in-ref out-mplp]
(create-mpileup in-sam in-ref out-mplp nil))
([in-sam in-ref out-mplp region]
(create-mpileup in-sam in-ref out-mplp region nil))
([in-sam in-ref out-mplp region options]
(with-open [s (sam/reader in-sam)
w (plpio/writer out-mplp in-ref)]
(let [regs (if region
[region]
(map
(fn [{:keys [len] name' :name}]
{:chr name' :start 1 :end len})
(sam/read-refs s)))]
(doseq [reg regs]
(plpio/write-piles w (pileup s reg options))))))) |