Data structures for interval queries. | (ns cljam.util.intervals) |
(defprotocol IIntervals (find-overlap-intervals* [this start end])) | |
(deftype SortedMapIntervals [m]
IIntervals
(find-overlap-intervals* [_ start end]
(mapcat (fn [[_ x]] (drop-while #(< (long (:end %)) (long start)) x))
(subseq m <= (long end))))) | |
(defn- make-sorted-map-intervals
[intervals]
(->> (group-by :start intervals)
(map (fn [[k vs]] [k (sort-by :end vs)]))
(into (sorted-map))
(->SortedMapIntervals))) | |
(defn- find-nclist-overlap-intervals [nclist ^long start ^long end]
(->> (subseq nclist >= start)
(map second)
(take-while #(<= (long (:start (first %))) end))
(mapcat #(cons (first %)
(find-nclist-overlap-intervals (second %)
start end))))) | |
(deftype NclistIntervals [nclist]
IIntervals
(find-overlap-intervals* [_ start end]
(find-nclist-overlap-intervals nclist start end))) | |
(declare make-nclist*) | |
(defn- make-nested-intervals [intervals]
(when-first [{:keys [start end] :as head} intervals]
(let [[nested-intervals rests]
(split-with #(<= start (:start %) (:end %) end)
(next intervals))]
(cons [end [head (make-nclist* nested-intervals)]]
(lazy-seq (make-nested-intervals rests)))))) | |
(defn- make-nclist* [intervals]
(let [sorted-intervals (sort-by (juxt :start (comp - :end)) intervals)]
(into (sorted-map) (make-nested-intervals sorted-intervals)))) | |
(defn- make-nclist [intervals] (->NclistIntervals (make-nclist* intervals))) | |
Make indexes for intervals to find overlaps. | (defn index-intervals
([intervals]
(index-intervals intervals {}))
([intervals {:keys [structure] :or {structure :sorted-map}}]
(->> intervals
(group-by :chr)
(into {} (map (fn [[k vs]]
[k
(case structure
:sorted-map (make-sorted-map-intervals vs)
:nclist (make-nclist vs))])))))) |
Find intervals that are on the given | (defn find-overlap-intervals [indexed-intervals chr start end] (some-> (get indexed-intervals chr) (find-overlap-intervals* start end))) |