(ns cljam.io.util.bin (:require [cljam.io.util.chunk :as util-chunk])) | |
Returns a maximum position of a binning index. The value is identical to the width of bin 0. | (defn max-pos ^long [^long min-shift ^long depth] (bit-shift-left 1 (+ min-shift (* 3 depth)))) |
Returns a left-most bin number of the given | (defn first-bin-of-level ^long [^long level] (quot (bit-shift-left 1 (* 3 level)) 7)) |
Returns a width shared by bins of the same given | (defn bin-width-of-level ^long [^long level ^long min-shift ^long depth] (bit-shift-left 1 (+ min-shift (* 3 (- depth level))))) |
Returns a level that the given | (defn bin-level ^long [^long bin] (let [x (inc (quot (- 64 (Long/numberOfLeadingZeros bin)) 3))] (cond-> x (< bin (first-bin-of-level x)) dec))) |
Returns the bin number of the parent bin. | (defn parent-bin ^long [^long bin] (when-not (pos? bin) (throw (ex-info "A child bin number must be positive." {:bin bin}))) (unsigned-bit-shift-right (dec bin) 3)) |
Returns a beginning position of the given | (defn bin-beg ^long [^long bin ^long min-shift ^long depth] (let [level (bin-level bin)] (inc (* (- bin (first-bin-of-level level)) (bin-width-of-level level min-shift depth))))) |
Returns a maximum bin number of a binning index with the given | (defn max-bin ^long [^long depth] (dec (first-bin-of-level (inc depth)))) |
Returns the distance between the bin corresponding to | (defn leading-bins-at-level ^long [^long pos ^long level ^long min-shift ^long depth] (unsigned-bit-shift-right pos (+ min-shift (* (- depth level) 3)))) |
Returns an offset of a linear index that the given | (defn pos->lidx-offset ^long [^long pos ^long linear-index-shift] (bit-shift-right (if (<= pos 0) 0 (dec pos)) linear-index-shift)) |
Returns all overlapping bins for the specified region [ | (defn reg->bins [^long beg ^long end ^long min-shift ^long depth] (let [max-pos' (max-pos min-shift depth) beg (dec (Math/min max-pos' (Math/max 1 beg))) end (dec (Math/min max-pos' (Math/max 1 end)))] (into [0] (mapcat (fn [^long d] (let [t (long (transduce (map (fn [^long x] (bit-shift-left 1 (* x 3)))) + 0 (range (inc d)))) s (+ min-shift (* 3 (- depth d 1)))] (range (+ t (bit-shift-right beg s)) (+ t 1 (bit-shift-right end s)))))) (range depth)))) |
Calculates the smallest bin containing the given region [ | (defn reg->bin ^long [^long beg ^long end ^long min-shift ^long depth] (let [max-pos' (max-pos min-shift depth) beg (dec (Math/min max-pos' (Math/max 1 beg))) end (dec (Math/min max-pos' (Math/max 1 end)))] (loop [level depth] (if-not (neg? level) (let [beg-bins (leading-bins-at-level beg level min-shift depth)] (if (= beg-bins (leading-bins-at-level end level min-shift depth)) (+ (first-bin-of-level level) beg-bins) (recur (dec level)))) 0)))) |
(defprotocol IBinningIndex (get-chunks [this ref-idx bins]) (get-min-offset [this ref-idx beg]) (get-min-shift [this]) (get-depth [this]) (get-chr-names [this])) | |
Calculates span information for random access from index data such as tabix. | (defn get-spans [index-data ^long ref-idx ^long beg ^long end] (let [bins (reg->bins beg end (get-min-shift index-data) (get-depth index-data)) chunks (get-chunks index-data ref-idx bins) min-offset (get-min-offset index-data ref-idx beg)] (->> (util-chunk/optimize-chunks chunks min-offset) (map vals)))) |