The core of FASTA index features. | (ns cljam.io.fasta-index.core
(:require [clojure.java.io :as cio]
[clojure.tools.logging :as logging]
[cljam.io.fasta-index.writer :as writer]
[cljam.io.fasta-index.reader :as reader]
[cljam.util :as util])
(:import cljam.io.fasta_index.reader.FAIReader
cljam.io.fasta_index.writer.FAIWriter)) |
Writing | |
Returns an open | (defn writer [f] (FAIWriter. (cio/writer f) (util/as-url f))) |
Creates a FASTA index file from the sequences. | (defn create-index
[in-fa out-fai]
(with-open [r (cio/reader (util/compressor-input-stream in-fa))
w ^FAIWriter (writer out-fai)]
(try
(writer/write-index! r w)
(catch Exception e
(cio/delete-file (.url w))
(logging/error "Failed to create FASTA index")
(throw e))))) |
Reading | |
Returns an open | (defn reader
[f]
(FAIReader.
(with-open [rdr (cio/reader f)]
(reader/parse-fai rdr))
(util/as-url f))) |
Returns index data and a name [:name, :len, :offset, :line-blen, :line-len]
of the sequence named | (defn get-header
[^FAIReader fai name']
(merge {:name name'}
(get (.indices fai) name' nil))) |
Get offsets of all sequences in the FASTA file.
Returns a vector of maps where each element contains the following keys:
- | (defn get-headers
[^FAIReader fai]
(vec
(sort
#(compare (:offset %1) (:offset %2))
(map (fn [[k v]]
{:name k
:desc ""
:offset (:offset v)})
(.indices fai))))) |
Get fasta indices with the name of the sequence.
Returns a vector of maps where each element contains the following keys:
- | (defn get-indices
[^FAIReader fai]
(vec
(sort
#(compare (:offset %1) (:offset %2))
(map (fn [[k v]] (merge {:name k} v))
(.indices fai))))) |
Calculate byte spans for FASTA file. | (defn get-span
[^FAIReader fai name' ^long start ^long end]
(let [start (max 0 start)
end (max 0 end)]
(when-let [{^long index-offset :offset
^long index-len :len
^long index-line-len :line-len
^long index-line-blen :line-blen}
(get (.indices fai) name' nil)]
(let [start (min index-len start)
end (min index-len end)
proj (fn [^long pos]
(+ index-offset
(+ (* (quot pos index-line-blen)
index-line-len)
(rem pos index-line-blen))))]
(when (< start end)
[(proj start) (proj end)]))))) |