(ns cljam.io.fasta.writer
(:require [clojure.java.io :as cio]
[clojure.string :as cstr]
[cljam.util :as util]
[cljam.io.protocols :as protocols])
(:import [java.io Closeable BufferedWriter])) | |
(declare write-sequences) | |
(deftype FASTAWriter [url ^int cols writer index-writer curr-offset]
Closeable
(close [this]
(.close ^Closeable (.writer this))
(when-let [iw (.index-writer this)]
(.close ^Closeable iw)))
protocols/IWriter
(writer-url [this]
(.url this))
protocols/ISequenceWriter
(write-sequences [this seqs]
(write-sequences this seqs))) | |
Returns an open | (defn writer
[f {:keys [cols create-index?]
:or {cols 80 create-index? true}}]
(let [abs-f (.getAbsolutePath (cio/file f))
wtr (cio/writer (util/compressor-output-stream abs-f))
index-writer (when create-index? (cio/writer (str abs-f ".fai")))]
(FASTAWriter. (util/as-url abs-f) cols wtr index-writer (volatile! 0)))) |
(defn- write-name
^long [^FASTAWriter w ^String n]
(let [wtr ^BufferedWriter (.writer w)]
(.write wtr (int \>))
(.write wtr n)
(.newLine wtr)
(+ 1 (.length n) (.length (System/lineSeparator))))) | |
(defn- write-seq-str
[^FASTAWriter w ^String s]
(let [wtr ^BufferedWriter (.writer w)
l ^int (.length s)
c (.cols w)
n (quot (dec (+ l c)) c)]
(dotimes [i n]
(.write wtr s (int (* i c)) (int (if (= i (dec n)) (- l (* c i)) c)))
(.newLine wtr))
[l (+ l (* n (.length (System/lineSeparator))))])) | |
(defn- write-seq
[^FASTAWriter w col]
(let [wtr ^BufferedWriter (.writer w)
nl-size (.length (System/lineSeparator))]
(loop [seq-len 0 written 0 xs (partition-all (.cols w) col)]
(if-let [x (first xs)]
(do (.write wtr (cstr/join x))
(.newLine wtr)
(recur (+ seq-len (count x)) (+ written nl-size (count x)) (next xs)))
[seq-len written])))) | |
(defn- write-sequence
[^FASTAWriter w {:keys [rname]
name' :name
sequence' :sequence
seq' :seq}]
(let [chr-name (or name' rname)
seq-data (or seq' sequence')
name-bytes (write-name w chr-name)
[seq-len seq-bytes] (if (string? seq-data)
(write-seq-str w seq-data)
(write-seq w seq-data))]
(when-let [iwtr ^BufferedWriter (.index-writer w)]
(let [c (Math/min (.cols w) (int seq-len))
offset (+ (long @(.curr-offset w)) name-bytes)]
(->> [chr-name seq-len offset c (+ c (.length (System/lineSeparator)))]
(cstr/join \tab)
(.write iwtr))
(.newLine iwtr)
(vreset! (.curr-offset w) (+ offset (long seq-bytes))))))) | |
Writes all sequences to | (defn write-sequences
[w xs]
(doseq [x xs]
(write-sequence w x))) |