(ns cljam.io.util.bin (:require [cljam.io.util.chunk :as util-chunk]))  | |
Returns a maximum position of a binning index. The value is identical to the width of bin 0.  | (defn max-pos ^long [^long min-shift ^long depth] (bit-shift-left 1 (+ min-shift (* 3 depth))))  | 
Returns a left-most bin number of the given   | (defn first-bin-of-level ^long [^long level] (quot (bit-shift-left 1 (* 3 level)) 7))  | 
Returns a width shared by bins of the same given   | (defn bin-width-of-level ^long [^long level ^long min-shift ^long depth] (bit-shift-left 1 (+ min-shift (* 3 (- depth level)))))  | 
Returns a level that the given   | (defn bin-level
  ^long [^long bin]
  (let [x (inc (quot (- 64 (Long/numberOfLeadingZeros bin)) 3))]
    (cond-> x (< bin (first-bin-of-level x)) dec))) | 
Returns the bin number of the parent bin.  | (defn parent-bin
  ^long [^long bin]
  (when-not (pos? bin)
    (throw (ex-info "A child bin number must be positive." {:bin bin})))
  (unsigned-bit-shift-right (dec bin) 3)) | 
Returns a beginning position of the given   | (defn bin-beg
  ^long [^long bin ^long min-shift ^long depth]
  (let [level (bin-level bin)]
    (inc (* (- bin (first-bin-of-level level))
            (bin-width-of-level level min-shift depth))))) | 
Returns a maximum bin number of a binning index with the given   | (defn max-bin ^long [^long depth] (dec (first-bin-of-level (inc depth))))  | 
Returns the distance between the bin corresponding to   | (defn leading-bins-at-level ^long [^long pos ^long level ^long min-shift ^long depth] (unsigned-bit-shift-right pos (+ min-shift (* (- depth level) 3))))  | 
Returns an offset of a linear index that the given   | (defn pos->lidx-offset ^long [^long pos ^long linear-index-shift] (bit-shift-right (if (<= pos 0) 0 (dec pos)) linear-index-shift))  | 
Returns all overlapping bins for the specified region [  | (defn reg->bins
  [^long beg ^long end ^long min-shift ^long depth]
  (let [max-pos' (max-pos min-shift depth)
        beg (dec (Math/min max-pos' (Math/max 1 beg)))
        end (dec (Math/min max-pos' (Math/max 1 end)))]
    (into [0]
          (mapcat
           (fn [^long d]
             (let [t (long (transduce
                            (map (fn [^long x] (bit-shift-left 1 (* x 3))))
                            + 0 (range (inc d))))
                   s (+ min-shift (* 3 (- depth d 1)))]
               (range (+ t (bit-shift-right beg s))
                      (+ t 1 (bit-shift-right end s))))))
          (range depth)))) | 
Calculates the smallest bin containing the given region [  | (defn reg->bin
  ^long [^long beg ^long end ^long min-shift ^long depth]
  (let [max-pos' (max-pos min-shift depth)
        beg (dec (Math/min max-pos' (Math/max 1 beg)))
        end (dec (Math/min max-pos' (Math/max 1 end)))]
    (loop [level depth]
      (if-not (neg? level)
        (let [beg-bins (leading-bins-at-level beg level min-shift depth)]
          (if (= beg-bins (leading-bins-at-level end level min-shift depth))
            (+ (first-bin-of-level level) beg-bins)
            (recur (dec level))))
        0)))) | 
(defprotocol IBinningIndex (get-chunks [this ref-idx bins]) (get-min-offset [this ref-idx beg]) (get-min-shift [this]) (get-depth [this]) (get-chr-names [this]))  | |
Calculates span information for random access from index data such as tabix.  | (defn get-spans
  [index-data ^long ref-idx ^long beg ^long end]
  (let [bins (reg->bins
              beg end (get-min-shift index-data) (get-depth index-data))
        chunks (get-chunks index-data ref-idx bins)
        min-offset (get-min-offset index-data ref-idx beg)]
    (->> (util-chunk/optimize-chunks chunks min-offset)
         (map vals)))) |