Utilities for conversions between chromosomal positions and whole-genome positions. | (ns cljam.util.whole-genome) |
Creates a map of [chromosome-name offset&length] from refs. | (defn chr-to-whole-genome-index
[refs]
(->> refs
(map :len)
(reductions + 0)
(map (fn [r offset] [(:name r) {:offset offset, :len (:len r)}]) refs)
(into {}))) |
Transforms a position in a chromosome into a whole-genome position. | (defn ->whole-genome-coord
[chr->offset chr ^long pos]
(when-let [{:keys [^long offset ^long len]} (chr->offset chr)]
(when (<= 1 pos len)
(inc (+ offset (dec pos)))))) |
Creates a sorted-map of [offset chromosome-name&length] from refs. | (defn whole-genome-to-chr-index
[refs]
(into (sorted-map)
(map vector
(->> refs
(map :len)
(reductions + 0))
refs))) |
Transforms a whole-genome position to a vector of a chromosome name and a position in the chromosome. | (defn ->chr-and-pos
[offset->ref ^long wg-pos]
(when-let [[^long offset {:keys [len] name' :name}]
(first (rsubseq offset->ref <= (dec wg-pos)))]
(when (<= 1 (- wg-pos offset) len)
[name' (- wg-pos offset)]))) |
Transforms a region in whole-genome coordinate into a sequence of chromosomal regions. | (defn ->regions
[offset->ref ^long start ^long end]
(let [wg-len (let [[o r] (first (rsubseq offset->ref >= 0))]
(+ (long o) (long (:len r))))]
(when (and (<= start end)
(or (pos? start) (pos? end))
(or (<= start wg-len) (<= end wg-len)))
(let [start' (min (max 1 start) wg-len)
end' (min (max 1 end) wg-len)
[s-offset] (first (rsubseq offset->ref <= (dec start')))
[e-offset] (first (rsubseq offset->ref <= (dec end')))
regs (mapv
(fn [[_ r]] {:chr (:name r), :start 1, :end (:len r)})
(subseq offset->ref >= s-offset <= e-offset))]
(-> regs
(assoc-in [0 :start] (- start' (long s-offset)))
(update-in [(dec (count regs)) :end]
min
(- end' (long e-offset)))))))) |