(ns cljam.io.util.bin (:require [cljam.io.util.chunk :as util-chunk])) | |
Returns a maximum position of a binning index. The value is identical to the width of bin 0. | (defn max-pos ^long [^long min-shift ^long depth] (bit-shift-left 1 (+ min-shift (* 3 depth)))) |
Returns a left-most bin number of the given | (defn first-bin-of-level ^long [^long level] (quot (bit-shift-left 1 (* 3 level)) 7)) |
Returns a width shared by bins of the same given | (defn bin-width-of-level ^long [^long level ^long min-shift ^long depth] (bit-shift-left 1 (+ min-shift (* 3 (- depth level))))) |
Returns a level that the given | (defn bin-level
^long [^long bin]
(let [x (inc (quot (- 64 (Long/numberOfLeadingZeros bin)) 3))]
(cond-> x (< bin (first-bin-of-level x)) dec))) |
Returns the bin number of the parent bin. | (defn parent-bin
^long [^long bin]
(when-not (pos? bin)
(throw (ex-info "A child bin number must be positive." {:bin bin})))
(unsigned-bit-shift-right (dec bin) 3)) |
Returns a beginning position of the given | (defn bin-beg
^long [^long bin ^long min-shift ^long depth]
(let [level (bin-level bin)]
(inc (* (- bin (first-bin-of-level level))
(bin-width-of-level level min-shift depth))))) |
Returns a maximum bin number of a binning index with the given | (defn max-bin ^long [^long depth] (dec (first-bin-of-level (inc depth)))) |
Returns the distance between the bin corresponding to | (defn leading-bins-at-level ^long [^long pos ^long level ^long min-shift ^long depth] (unsigned-bit-shift-right pos (+ min-shift (* (- depth level) 3)))) |
Returns an offset of a linear index that the given | (defn pos->lidx-offset ^long [^long pos ^long linear-index-shift] (bit-shift-right (if (<= pos 0) 0 (dec pos)) linear-index-shift)) |
Returns all overlapping bins for the specified region [ | (defn reg->bins
[^long beg ^long end ^long min-shift ^long depth]
(let [max-pos' (max-pos min-shift depth)
beg (dec (Math/min max-pos' (Math/max 1 beg)))
end (dec (Math/min max-pos' (Math/max 1 end)))]
(into [0]
(mapcat
(fn [^long d]
(let [t (long (transduce
(map (fn [^long x] (bit-shift-left 1 (* x 3))))
+ 0 (range (inc d))))
s (+ min-shift (* 3 (- depth d 1)))]
(range (+ t (bit-shift-right beg s))
(+ t 1 (bit-shift-right end s))))))
(range depth)))) |
Calculates the smallest bin containing the given region [ | (defn reg->bin
^long [^long beg ^long end ^long min-shift ^long depth]
(let [max-pos' (max-pos min-shift depth)
beg (dec (Math/min max-pos' (Math/max 1 beg)))
end (dec (Math/min max-pos' (Math/max 1 end)))]
(loop [level depth]
(if-not (neg? level)
(let [beg-bins (leading-bins-at-level beg level min-shift depth)]
(if (= beg-bins (leading-bins-at-level end level min-shift depth))
(+ (first-bin-of-level level) beg-bins)
(recur (dec level))))
0)))) |
(defprotocol IBinningIndex (get-chunks [this ref-idx bins]) (get-min-offset [this ref-idx beg]) (get-min-shift [this]) (get-depth [this]) (get-chr-names [this])) | |
Calculates span information for random access from index data such as tabix. | (defn get-spans
[index-data ^long ref-idx ^long beg ^long end]
(let [bins (reg->bins
beg end (get-min-shift index-data) (get-depth index-data))
chunks (get-chunks index-data ref-idx bins)
min-offset (get-min-offset index-data ref-idx beg)]
(->> (util-chunk/optimize-chunks chunks min-offset)
(map vals)))) |