Functions to read and write the WIG (Wiggle) format. See https://genome.ucsc.edu/goldenpath/help/wiggle.html for the detail WIG specifications. | (ns cljam.io.wig
(:require [clojure.java.io :as cio]
[clojure.string :as cstr]
[proton.core :refer [as-long as-double]]
[cljam.io.protocols :as protocols]
[cljam.util :as util]
[cljam.util.chromosome :as chr])
(:import [java.net URL]
[java.io BufferedReader BufferedWriter Closeable])) |
(declare read-fields write-fields) | |
(defrecord WIGReader [^BufferedReader reader ^URL url]
Closeable
(close [this]
(.close ^Closeable (.reader this)))
protocols/IReader
(reader-url [this] (.url this))
(read [this] (read-fields this))
(read [this _] (read-fields this))
(indexed? [_] false)) | |
(defrecord WIGWriter [^BufferedWriter writer ^URL url]
Closeable
(close [this]
(.close ^Closeable (.writer this)))
protocols/IWriter
(writer-url [this] (.url this))) | |
Returns an open cljam.io.wig.WIGReader of f. Should be used inside with-open to ensure the reader is properly closed. | (defn reader ^WIGReader [f] (WIGReader. (cio/reader (util/compressor-input-stream f)) (util/as-url f))) |
Returns an open cljam.io.wig.WIGWriter of f. Should be used inside with-open to ensure the writer is properly closed. | (defn writer ^WIGWriter [f] (WIGWriter. (cio/writer (util/compressor-output-stream f)) (util/as-url f))) |
(def ^:private ^:const wig-fields [:track :chr :start :end :value]) | |
Returns true if given string is a blank, header, or comment line. | (defn- header-or-comment?
[^String s]
(or (empty? s)
(.startsWith s "browser")
(.startsWith s "#"))) |
Normalizes WIG lines. | (defn- normalize [m] (update m :chr chr/normalize-chromosome-key)) |
Converts vector ["key1=value1" "key2=value2" ...] to map {:key1 "value1" :key2 "value2" ...}. | (defn- fields->map
[fields]
(into {}
(map (fn [field]
(let [[k v] (cstr/split field #"=")]
[(keyword k) v])))
fields)) |
Converts string to Wiggle track data values which can be integer or real, positive or negative values. | (defn- str->wiggle-track-data
[^String s]
(if-let [l (as-long s)]
l
(as-double s))) |
Parses WIG lines and return a lazy sequence of flat map. | (defn- deserialize-wigs
[lines]
(letfn [(deserialize [lines pre-start track]
(lazy-seq
(when-first [line lines]
(let [fields (-> line cstr/trim (cstr/split #"\s+"))]
(case (first fields)
; track definition line
"track"
(let [type' (->> fields rest fields->map :type)]
(if (= type' "wiggle_0")
(deserialize (rest lines) pre-start {:line line})
(throw (ex-info "The track type with version must be `wiggle_0`" {}))))
; declaration line of variableStep
"variableStep"
(let [{:keys [chrom span]
:or {span "1"}} (->> fields rest fields->map)
span (as-long span)
track (assoc track :format :variable-step
:chr chrom
:span span
:step nil)]
(deserialize (rest lines) nil track))
; declaration line of fixedStep
"fixedStep"
(let [{:keys [chrom start span step]
:or {span "1", step "1"}} (->> fields rest fields->map)
step (long (as-long step))
pre-start (- (long (as-long start)) step)
span (as-long span)
track (assoc track :format :fixed-step
:chr chrom
:span span
:step step)]
(deserialize (rest lines) pre-start track))
; data line
(let [m (case (:format track)
:variable-step
(let [{:keys [chr span]} track
[start value] fields
start (long (as-long start))
end (dec (+ start (long span)))
value (str->wiggle-track-data value)]
{:track track
:chr chr
:start start
:end end
:value value})
:fixed-step
(let [{:keys [chr span step]} track
start (+ (long pre-start) (long step))
end (dec (+ start (long span)))
value (-> fields first str->wiggle-track-data)]
{:track track
:chr chr
:start start
:end end
:value value})
(throw
(IllegalArgumentException.
"Invalid wiggle format")))]
(cons m (deserialize (rest lines)
(:start m)
track))))))))]
(deserialize lines nil {:line nil
:format nil
:chr nil
:step nil
:span nil}))) |
Serializes a sequence of WIG fields into a lazy sequence of string. | (defn- serialize-wigs
[wigs]
{:pre [;; These fields are required.
(map (fn [wig] (every? some? ((apply juxt wig-fields) wig))) wigs)
;; There are two options for formatting wiggle data.
(map (fn [wig]
(let [format' (-> wig :track :format)]
(or (= format' :fixed-step)
(= format' :variable-step))))
wigs)]}
(letfn [(serialize [wigs]
(->> wigs
(sequence
(comp
(partition-by (juxt (comp :format :track)
#(- (long (:end %)) (long (:start %)))))
(map
(fn [[{{:keys [line span step] format' :format} :track
chr :chr start :start} :as xs]]
(case format'
:variable-step
(let [declaration-line (->> (cond-> ["variableStep"
(str "chrom=" chr)]
(not= span 1)
(conj (str "span=" span)))
(cstr/join \space))
data-lines (->> xs
(map (fn [{:keys [start value]}]
(cstr/join \space
[start value])))
(cstr/join \newline))]
(cstr/join \newline
(cond->> [declaration-line data-lines]
line (cons line))))
:fixed-step
(let [declaration-line (->> (cond-> ["fixedStep"
(str "chrom=" chr)
(str "start=" start)]
(not= step 1)
(conj (str "step=" step))
(not= span 1)
(conj (str "span=" span)))
(cstr/join \space))
data-lines (->> xs
(map (fn [{:keys [value]}] value))
(cstr/join \newline))]
(cstr/join \newline
(cond->> [declaration-line data-lines]
line (cons line)))))))))))]
(serialize wigs))) |
Reads sequence of WIG fields from reader | (defn read-fields
[^WIGReader rdr]
(->> rdr
.reader
line-seq
(remove header-or-comment?)
deserialize-wigs
(map normalize))) |
Writes sequence of WIG fields to writer. | (defn write-fields
[^WIGWriter wtr xs]
(let [w ^BufferedWriter (.writer wtr)]
(->> xs
serialize-wigs
(interpose \newline)
(run! #(.write w (str %)))))) |