A type of VCF writer and internal functions to write VCF contents. See https://samtools.github.io/hts-specs/ for the detail VCF specifications. | (ns cljam.io.vcf.writer
(:require [clojure.string :as cstr]
[camel-snake-kebab.core :refer [->camelCaseString ->PascalCaseString]]
[cljam.io.protocols :as protocols]
[cljam.io.vcf.util :as vcf-util])
(:import [java.io Closeable BufferedWriter])) |
(declare write-variants) | |
VCFWriter | |
(deftype VCFWriter [url writer meta-info header]
Closeable
(close [this]
(.close ^Closeable (.writer this)))
protocols/IWriter
(writer-url [this] (.url this))
protocols/IVariantWriter
(write-variants [this variants]
(write-variants this variants))) | |
Vars and utilities | |
(def ^:private default-fileformat "VCFv4.3") | |
(def ^:private meta-info-prefix "##") (def ^:private header-prefix "#") | |
(defn- nil->dot [s] (if (nil? s) "." s)) | |
(defn- write-line
[^BufferedWriter bwtr ^String s]
(doto bwtr
(.write s)
(.newLine))) | |
Writing meta-information | |
Converts meta-info key to string. | (defn stringify-key
[k]
(if (#{:info :filter :format :alt :sample :pedigree} k)
(cstr/upper-case (name k))
(->camelCaseString k))) |
(defn- escape-special-chars [s] (-> s (cstr/replace "\\" "\\\\") (cstr/replace "\ "\\\))) | |
(defn- add-extra-fields
[fields m required-keys]
(reduce-kv (fn [fields k v]
(conj fields
(str (->PascalCaseString k) "=\
(escape-special-chars v) "\)))
fields
(apply dissoc m required-keys))) | |
(defn- pack-meta-info [fields m required-keys] (cstr/join \, (add-extra-fields fields m required-keys))) | |
(defn- stringify-meta-info-contig
[m]
(-> [(str "ID=" (:id m))]
(cond->
(:length m) (conj (str "length=" (:length m)))
(:assembly m) (conj (str "assembly=" (:assembly m)))
(:md-5 m) (conj (str "md5=" (:md-5 m)))
(:url m) (conj (str "URL=" (:url m)))
(:species m) (conj (str "species=\ (escape-special-chars (:species m)) "\))
(:taxonomy m) (conj (str "taxonomy=" (:taxonomy m)))
(:idx m) (conj (str "IDX=" (:idx m))))
(pack-meta-info m [:id :length :assembly :md-5 :url :species :taxonomy :idx]))) | |
(defn- stringify-meta-info-info
[m]
(-> [(str "ID=" (:id m))
(str "Number=" (nil->dot (:number m)))
(str "Type=" (nil->dot (:type m)))
(str "Description=\ (escape-special-chars (:description m)) "\)]
(cond->
(:source m) (conj (str "Source=\ (escape-special-chars (:source m)) "\))
(:version m) (conj (str "Version=\ (escape-special-chars (:version m)) "\))
(:idx m) (conj (str "IDX=" (:idx m))))
(pack-meta-info m [:id :number :type :description :source :version :idx]))) | |
(defn- stringify-meta-info-filter
[m]
(-> [(str "ID=" (:id m))
(str "Description=\ (escape-special-chars (:description m)) "\)]
(cond->
(:idx m) (conj (str "IDX=" (:idx m))))
(pack-meta-info m [:id :description :idx]))) | |
(defn- stringify-meta-info-format
[m]
(-> [(str "ID=" (:id m))
(str "Number=" (nil->dot (:number m)))
(str "Type=" (nil->dot (:type m)))
(str "Description=\ (escape-special-chars (:description m)) "\)]
(cond->
(:idx m) (conj (str "IDX=" (:idx m))))
(pack-meta-info m [:id :number :type :description :idx]))) | |
(defn- stringify-meta-info-alt
[m]
(-> [(str "ID=" (:id m))
(str "Description=\ (escape-special-chars (:description m)) "\)]
(pack-meta-info m [:id :description]))) | |
(defn- stringify-meta-info-sample
[m]
(-> [(str "ID=" (:id m))]
(cond->
(:genomes m) (conj (str "Genomes=" (:genomes m)))
(:mixture m) (conj (str "Mixture=" (:mixture m))))
(conj (str "Description=\ (escape-special-chars (:description m)) "\))
(pack-meta-info m [:id :genomes :mixture :description]))) | |
(defn- stringify-meta-info-pedigree
[m]
(->> (reduce-kv (fn [fields k v]
(conj fields
(if-let [[_ i] (re-matches #"name-(\d+)" (name k))]
(str "Name_" i "=" v)
(str (->PascalCaseString k) "=" v))))
[(str "ID=" (:id m))]
(dissoc m :id))
(cstr/join \,))) | |
Converts meta info to string by | (defn stringify-structured-line
[k m]
(let [f (case k
:contig stringify-meta-info-contig
:info stringify-meta-info-info
:filter stringify-meta-info-filter
:format stringify-meta-info-format
:alt stringify-meta-info-alt
:sample stringify-meta-info-sample
:pedigree stringify-meta-info-pedigree)]
(f m))) |
(defn- write-meta-info1
[^VCFWriter wtr k v]
(when-not (nil? v)
(if (sequential? v)
(doseq [x v]
(write-line (.writer wtr) (str meta-info-prefix
(stringify-key k)
"=<" (stringify-structured-line k x) ">")))
(write-line (.writer wtr) (str meta-info-prefix
(stringify-key k)
"=" v))))) | |
Writes vcf meta-info to the VCF file. | (defn write-meta-info
[^VCFWriter wtr meta-info]
(write-meta-info1 wtr :fileformat (:fileformat meta-info default-fileformat))
(doseq [k [:file-date :source :reference :contig :phasing]]
(write-meta-info1 wtr k (get meta-info k)))
(doseq [k [:info :filter :format :alt :sample :pedigree]]
(write-meta-info1 wtr k (get meta-info k)))) |
Writing header | |
Converts | (defn stringify-header ^String [header] (str header-prefix (cstr/join \tab header))) |
Writes vcf | (defn write-header [^VCFWriter wtr header] (write-line (.writer wtr) (stringify-header header))) |
Writing data lines | |
(defn- stringify-data-line-alt
[v]
(when v
(cstr/join \, v))) | |
(def ^:private ^:const precise-integer-limit 0x800000) | |
(defn- stringify-data-line-qual
[x]
(when x
(if (and (zero? (float (mod x 1)))
(< (float x) precise-integer-limit))
(str (int x))
(str x)))) | |
(defn- stringify-data-line ^String
[m header]
(let [m* (-> m
(update :alt stringify-data-line-alt)
(update :qual stringify-data-line-qual))]
(->> (concat [:chr :pos :id :ref :alt :qual :filter :info]
(map keyword (drop 8 header)))
(map #(get m* %))
(map nil->dot)
(cstr/join \tab)))) | |
Writes variants to the VCF file. | (defn write-variants
[^VCFWriter wtr variants]
(let [stringify-vals (vcf-util/variant-vals-stringifier (.meta-info wtr) (.header wtr))
header-kws (drop 8 (map keyword (.header wtr)))]
(doseq [v variants]
(write-line (.writer wtr)
(stringify-data-line
(if (some string? ((apply juxt :filter :info header-kws) v))
v
(stringify-vals v)) (.header wtr)))))) |