(ns cljam.io.util.chunk (:refer-clojure :exclude [compare]) (:require [cljam.io.util.bgzf :as bgzf])) | |
A chunk expresses a range in a BAM index.
It consists of a start position and a end position.
A chunk is a map like | |
(defrecord Chunk [^long beg ^long end]) | |
Returns a negative if chunk1 is earlier than chunk2, a positive if it is later, 0 if it is equal. | (defn compare
^long
[^Chunk chunk1 ^Chunk chunk2]
(let [ret (Long/signum (- (.beg chunk1) (.beg chunk2)))]
(if (zero? ret)
(Long/signum (- (.end chunk1) (.end chunk2)))
ret))) |
Returns true if the two chunks overlap. | (defn overlap?
[^Chunk chunk1 ^Chunk chunk2]
(let [comparison (compare chunk1 chunk2)]
(or (zero? comparison)
(let [left (if (neg? comparison) chunk1 chunk2)
right (if (pos? comparison) chunk1 chunk2)
left-fp (bgzf/get-block-address (.end left))
right-fp (bgzf/get-block-address (.beg right))]
(or (> left-fp right-fp)
(and (= left-fp right-fp)
(let [left-offset (bgzf/get-block-offset (.end left))
right-offset (bgzf/get-block-offset (.beg right))]
(> left-offset right-offset)))))))) |
Returns true if the two chunks are adjacent. | (defn adjacent?
[^Chunk chunk1 ^Chunk chunk2]
(or (and (= (bgzf/get-block-address (.end chunk1))
(bgzf/get-block-address (.beg chunk2)))
(= (bgzf/get-block-offset (.end chunk1))
(bgzf/get-block-offset (.beg chunk2))))
(and (= (bgzf/get-block-address (.beg chunk1))
(bgzf/get-block-address (.end chunk2)))
(= (bgzf/get-block-offset (.beg chunk1))
(bgzf/get-block-offset (.end chunk2)))))) |
Sorts | (defn optimize-chunks
[chunks ^long min-offset]
(let [chunks (sort compare chunks)]
(loop [[^Chunk f & r] chunks
^Chunk last-chunk nil
ret (transient [])]
(if f
(cond
(<= (.end f) min-offset) (recur r last-chunk ret)
(nil? last-chunk) (recur r f (conj! ret f))
(and (not (overlap? last-chunk f))
(not (adjacent? last-chunk f))) (recur r f (conj! ret f))
(> (.end f) (.end last-chunk)) (let [l (assoc last-chunk :end (.end f))] (recur r l (conj! (pop! ret) l)))
:else (recur r last-chunk ret))
(persistent! ret))))) |