Data structures for interval queries. | (ns cljam.util.intervals) |
(defprotocol IIntervals (find-overlap-intervals* [this start end])) | |
(deftype SortedMapIntervals [m] IIntervals (find-overlap-intervals* [_ start end] (mapcat (fn [[_ x]] (drop-while #(< (long (:end %)) (long start)) x)) (subseq m <= (long end))))) | |
(defn- make-sorted-map-intervals [intervals] (->> (group-by :start intervals) (map (fn [[k vs]] [k (sort-by :end vs)])) (into (sorted-map)) (->SortedMapIntervals))) | |
(defn- find-nclist-overlap-intervals [nclist ^long start ^long end] (->> (subseq nclist >= start) (map second) (take-while #(<= (long (:start (first %))) end)) (mapcat #(cons (first %) (find-nclist-overlap-intervals (second %) start end))))) | |
(deftype NclistIntervals [nclist] IIntervals (find-overlap-intervals* [_ start end] (find-nclist-overlap-intervals nclist start end))) | |
(declare make-nclist*) | |
(defn- make-nested-intervals [intervals] (when-first [{:keys [start end] :as head} intervals] (let [[nested-intervals rests] (split-with #(<= start (:start %) (:end %) end) (next intervals))] (cons [end [head (make-nclist* nested-intervals)]] (lazy-seq (make-nested-intervals rests)))))) | |
(defn- make-nclist* [intervals] (let [sorted-intervals (sort-by (juxt :start (comp - :end)) intervals)] (into (sorted-map) (make-nested-intervals sorted-intervals)))) | |
(defn- make-nclist [intervals] (->NclistIntervals (make-nclist* intervals))) | |
Make indexes for intervals to find overlaps. | (defn index-intervals ([intervals] (index-intervals intervals {})) ([intervals {:keys [structure] :or {structure :sorted-map}}] (->> intervals (group-by :chr) (into {} (map (fn [[k vs]] [k (case structure :sorted-map (make-sorted-map-intervals vs) :nclist (make-nclist vs))])))))) |
Find intervals that are on the given | (defn find-overlap-intervals [indexed-intervals chr start end] (some-> (get indexed-intervals chr) (find-overlap-intervals* start end))) |