rule all:
    input:
        auspice_tree = "auspice/v1_tbdrm_tree.json",
        auspice_meta = "auspice/v1_tbdrm_meta.json",
        auspice_v2_cmd_line_config = "auspice/v2_tbdrm_cmdLineArgs.json",
        auspice_v2_config = "auspice/v2_tbdrm_config.json"

# Config variables to be used by rules
# Parameters are defined within their own rules

rule config:
    params:
        seq = "data/drm.vcf.gz",
        meta = "data/meta.tsv",
        exclude = "data/dropped_strains.txt",
        mask = "data/Locus_to_exclude_Mtb.bed",
        ref = "data/ref.fasta",
        drms = "data/DRMs-AAnuc.tsv",
        sites = "data/drm_sites.txt",
        generef = "data/Mtb_H37Rv_NCBI_Annot.gff",
        genes = "data/genes.txt",
        colors = "data/color.tsv",
        config = "data/config.json",
        geo_info = "data/lat_longs.tsv",
        v2_config = "data/v2_config.json"

config = rules.config.params #so we can use config.x rather than rules.config.params.x
#end of config definition

rule filter:
    input:
        seq = config.seq,
        meta = config.meta,
        exclude = config.exclude
    output:
        "results/filtered.vcf.gz"
    shell:
        """
        augur filter --sequences {input.seq} --metadata {input.meta} \
            --output {output} \
            --exclude {input.exclude}
        """

rule mask:
    input:
        seq = rules.filter.output,
        mask = config.mask
    output:
       "results/masked.vcf.gz"
    shell:
        "augur mask --sequences {input.seq} --output {output} --mask {input.mask}"

rule tree:
    input:
        aln = rules.mask.output,
        ref = config.ref,
        sites = config.sites
    output:
        "results/tree_raw.nwk"
    params:
        method = 'fasttree'
    shell:
        'augur tree --exclude-sites {input.sites} --alignment {input.aln} --vcf-reference {input.ref} --output {output} --method {params.method}'

rule refine:
    input:
        tree = rules.tree.output,
        aln = rules.mask.output,
        metadata = config.meta,
        ref = config.ref
    output:
        tree = "results/tree.nwk",
        node_data = "results/branch_lengths.json",
    params:
        root = 'min_dev',
        clock_rate = 1e-7,
        clock_std = 3e-8
    shell:
        """
        augur refine --tree {input.tree} --alignment {input.aln} --metadata {input.metadata} \
            --output-tree {output.tree} --output-node-data {output.node_data} --vcf-reference {input.ref} \
            --timetree --root {params.root} --coalescent 0.0001 \
            --clock-rate {params.clock_rate} --clock-std-dev {params.clock_std}
        """

rule ancestral:
    input:
        tree = rules.refine.output.tree,
        alignment = rules.mask.output,
        ref = config.ref
    output:
        nt_data = "results/nt_muts.json",
        vcf_out = "results/nt_muts.vcf"
    params:
        inference = "joint"
    shell:
        """
        augur ancestral --tree {input.tree} --alignment {input.alignment} \
            --output-node-data {output.nt_data} --inference {params.inference} \
            --output-vcf {output.vcf_out} --vcf-reference {input.ref}
        """

rule translate:
    input:
        tree = rules.refine.output.tree,
        ref = config.ref,
        gene_ref = config.generef,
        vcf = rules.ancestral.output.vcf_out,
        genes = config.genes
    output:
        aa_data = "results/aa_muts.json",
        vcf_out = "results/translations.vcf",
        vcf_ref = "results/translations_reference.fasta"
    shell:
        """
        augur translate --tree {input.tree} --genes {input.genes} --vcf-reference {input.ref} \
            --ancestral-sequences {input.vcf} --output-node-data {output.aa_data} --reference-sequence {input.gene_ref} \
            --alignment-output {output.vcf_out} --vcf-reference-output {output.vcf_ref}
        """

rule traits:
    input:
        tree = rules.refine.output.tree,
        meta = config.meta
    output:
        "results/traits.json"
    params:
        traits = 'country region'
    shell:
        'augur traits --tree {input.tree} --metadata {input.meta}'
        ' --columns {params.traits} --output-node-data {output}'

rule seqtraits:
    input:
        align = rules.ancestral.output.vcf_out,
        ref = config.ref,
        trans_align = rules.translate.output.vcf_out,
        trans_ref = rules.translate.output.vcf_ref,
        drms = config.drms
    output:
        drm_data = "results/drms.json",
    params:
        trait_count = "traits",
        label = "Drug_Resistance"
    shell:
        """
        augur sequence-traits \
            --ancestral-sequences {input.align} \
            --vcf-reference {input.ref} \
            --translations {input.trans_align} \
            --vcf-translate-reference {input.trans_ref} \
            --features {input.drms} --output-node-data {output.drm_data} \
            --count {params.trait_count} --label {params.label}
        """

rule export:
    message: "Exporting data files for for auspice v1"
    input:
        tree = rules.refine.output.tree,
        metadata = config.meta,
        branch_lengths = rules.refine.output.node_data,
        traits = rules.traits.output,
        nt_muts = rules.ancestral.output.nt_data,
        aa_muts = rules.translate.output.aa_data,
        drms = rules.seqtraits.output.drm_data,
        color_defs = config.colors,
        config = config.config,
        geo_info = config.geo_info
    output:
        tree = rules.all.input.auspice_tree,
        meta = rules.all.input.auspice_meta
    shell:
        """
        augur export v1 \
            --tree {input.tree} \
            --metadata {input.metadata} \
            --node-data {input.branch_lengths} {input.drms} {input.traits} {input.aa_muts} {input.nt_muts} \
            --auspice-config {input.config} \
            --colors {input.color_defs} \
            --lat-longs {input.geo_info} \
            --output-tree {output.tree} \
            --output-meta {output.meta}
        """


rule export_v2_no_config:
    message: "Exporting data files for for auspice v2 using command line args instead of a config file"
    input:
        tree = rules.refine.output.tree,
        metadata = config.meta,
        branch_lengths = rules.refine.output.node_data,
        traits = rules.traits.output,
        nt_muts = rules.ancestral.output.nt_data,
        aa_muts = rules.translate.output.aa_data,
        drms = rules.seqtraits.output.drm_data,
        color_defs = config.colors,
        geo_info = config.geo_info
    output:
        main = rules.all.input.auspice_v2_cmd_line_config
    params:
        title = '\'TB with DRMs\'',
        maints = "'Emma Hodcroft <https://neherlab.org/emma-hodcroft.html>' 'John Brown <http://www.google.com>'",
        geo = 'country',
        # extra_traits = 'host',
        # panels = 'tree map entropy frequencies'
    shell:
        """
        augur export v2 \
            --tree {input.tree} \
            --metadata {input.metadata} \
            --node-data {input.branch_lengths} {input.drms} {input.traits} {input.aa_muts} {input.nt_muts} \
            --colors {input.color_defs} \
            --lat-longs {input.geo_info} \
            --output {output.main} \
            --title {params.title} \
            --maintainers {params.maints} \
            --geo-resolutions {params.geo}
        """

rule export_v2_config:
    message: "Exporting data files for for auspice v2 using a config file"
    input:
        tree = rules.refine.output.tree,
        metadata = config.meta,
        branch_lengths = rules.refine.output.node_data,
        traits = rules.traits.output,
        nt_muts = rules.ancestral.output.nt_data,
        aa_muts = rules.translate.output.aa_data,
        drms = rules.seqtraits.output.drm_data,
        color_defs = config.colors,
        geo_info = config.geo_info,
        config = config.v2_config
    output:
        main = rules.all.input.auspice_v2_config
    shell:
        """
        augur export v2 \
            --tree {input.tree} \
            --metadata {input.metadata} \
            --node-data {input.branch_lengths} {input.drms} {input.traits} {input.aa_muts} {input.nt_muts} \
            --colors {input.color_defs} \
            --lat-longs {input.geo_info} \
            --output {output.main} \
            --auspice-config {input.config}
        """

rule clean:
    message: "Removing directories: {params}"
    params:
        "results ",
        "auspice"
    shell:
        "rm -rfv {params}"
