from snakemake.io import load_configfile


# setup workflow
def load_config(path):
    config = load_configfile(path)
    return config


# import modules
module distance:
    snakefile:
        "../../../workflow/Snakefile"
    config:
        load_config("config_distance/config.yaml")
    replace_prefix:
        {"results/": "results/distance/", "resources/": "resources/distance/"}


use rule * from distance as distance_*


module mutationrate:
    snakefile:
        "../../../workflow/Snakefile"
    config:
        load_config("config_mutationrate/config.yaml")
    replace_prefix:
        {"results/": "results/mutationrate/", "resources/": "resources/mutationrate/"}


use rule * from mutationrate as mutationrate_*


module longreads:
    snakefile:
        "../../../workflow/Snakefile"
    config:
        load_config("config_longreads/config.yaml")
    replace_prefix:
        {"results/": "results/longreads/", "resources/": "resources/longreads/"}


use rule * from longreads as longreads_*


module amplicon:
    snakefile:
        "../../../workflow/Snakefile"
    config:
        load_config("config_amplicon/config.yaml")
    replace_prefix:
        {"results/": "results/amplicon/", "resources/": "resources/amplicon/"}


use rule * from amplicon as amplicon_*


module realdata:
    snakefile:
        "../../../workflow/Snakefile"
    config:
        load_config("config_realdata/config.yaml")
    replace_prefix:
        {"results/": "results/realdata/", "resources/": "resources/realdata/"}


use rule * from realdata as realdata_*


# define required output files
rule all:
    input:
        "results/mutationrate/performance_measures/global/",
        "results/mutationrate/performance_measures/local/",
        "results/distance/performance_measures/global/",
        "results/distance/performance_measures/local/",
        "results/longreads/performance_measures/global/",
        "results/longreads/performance_measures/local/",
        "results/amplicon/performance_measures/local/",
        "results/realdata/performance_measures/global/",
        "results/realdata/performance_measures/local/",
    default_target: True


# distance rules
use rule run_method_local from amplicon as amplicon_run_method_local_global with:
    output:
        fname_result_haplos=f"results/amplicon/method_runs/{amplicon.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/haplotypes_.fasta",
        fname_result=f"results/amplicon/method_runs/{amplicon.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/snvs_.vcf",
        dname_work=directory(
            f"results/amplicon/method_runs/{amplicon.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/work/"
        ),
        fname_status=touch(
            f"results/amplicon/method_runs/{amplicon.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + amplicon.method_list)}}}/replicates/{{replicate}}/status_.txt"
        ),
    benchmark:
        f"results/amplicon/method_runs/{amplicon.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + amplicon.method_list)}}}/replicates/{{replicate}}/benchmark_.tsv"
    resources:
        mem_mb=8024,
        runtime=440,


use rule performance_measures_local from amplicon as amplicon_performance_measures_local with:
    input:
        vcf_list=[
            f"results/amplicon/method_runs/{params}/{method}/replicates/{replicate}/snvs_.vcf"
            for params in amplicon.paramspace.instance_patterns
            for method in amplicon.method_list
            for replicate in range(amplicon.config["replicate_count"])
        ],
        groundtruth_list=[
            f"results/amplicon/simulated_reads/{params}/replicates/{replicate}/ground_truth.csv"
            for params in amplicon.paramspace.instance_patterns
            for method in amplicon.method_list
            for replicate in range(amplicon.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/amplicon/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in amplicon.paramspace.instance_patterns
            for method in amplicon.method_list
            for replicate in range(amplicon.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/amplicon/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in amplicon.paramspace.instance_patterns
            for replicate in range(amplicon.config["replicate_count"])
        ],


# distance rules
use rule run_method_local from distance as distance_run_method_local_global with:
    output:
        fname_result_haplos=f"results/distance/method_runs/{distance.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/haplotypes_.fasta",
        fname_result=f"results/distance/method_runs/{distance.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/snvs_.vcf",
        dname_work=directory(
            f"results/distance/method_runs/{distance.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/work/"
        ),
        fname_status=touch(
            f"results/distance/method_runs/{distance.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + distance.method_list)}}}/replicates/{{replicate}}/status_.txt"
        ),
    benchmark:
        f"results/distance/method_runs/{distance.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + distance.method_list)}}}/replicates/{{replicate}}/benchmark_.tsv"
    resources:
        mem_mb=8024,
        runtime=440,


use rule performance_measures_local from distance as distance_performance_measures_local with:
    input:
        vcf_list=[
            f"results/distance/method_runs/{params}/{method}/replicates/{replicate}/snvs_.vcf"
            for params in distance.paramspace.instance_patterns
            for method in distance.method_list
            for replicate in range(distance.config["replicate_count"])
        ],
        # we also include `method_list_local` to align with `vcf_list`
        groundtruth_list=[
            f"results/distance/simulated_reads/{params}/replicates/{replicate}/ground_truth.csv"
            for params in distance.paramspace.instance_patterns
            for method in distance.method_list
            for replicate in range(distance.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/distance/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in distance.paramspace.instance_patterns
            for method in distance.method_list
            for replicate in range(distance.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/distance/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in distance.paramspace.instance_patterns
            for replicate in range(distance.config["replicate_count"])
        ],


use rule performance_measures_global from distance as distance_performance_measures_global with:
    input:
        predicted_haplos_list=[
            f"results/distance/method_runs/{params}/{method}/replicates/{replicate}/haplotypes_.fasta"
            for params in distance.paramspace.instance_patterns
            for method in distance.method_list_global
            for replicate in range(distance.config["replicate_count"])
        ],
        true_haplos_list=[
            f"results/distance/simulated_reads/{params}/replicates/{replicate}/haplotypes.fasta"
            for params in distance.paramspace.instance_patterns
            for method in distance.method_list_global
            for replicate in range(distance.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/distance/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in distance.paramspace.instance_patterns
            for replicate in range(distance.config["replicate_count"])
        ],
        runstatus_list=[
            f"results/distance/method_runs/{params}/{method}/replicates/{replicate}/status_.txt"
            for params in distance.paramspace.instance_patterns
            for method in distance.method_list_global
            for replicate in range(distance.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/distance/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in distance.paramspace.instance_patterns
            for method in distance.method_list_global
            for replicate in range(distance.config["replicate_count"])
        ],


# mutation rate rules
use rule run_method_local from mutationrate as mutationrate_run_method_local_global with:
    output:
        fname_result_haplos=f"results/mutationrate/method_runs/{mutationrate.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/haplotypes_.fasta",
        fname_result=f"results/mutationrate/method_runs/{mutationrate.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/snvs_.vcf",
        dname_work=directory(
            f"results/mutationrate/method_runs/{mutationrate.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/work/"
        ),
        fname_status=touch(
            f"results/mutationrate/method_runs/{mutationrate.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + mutationrate.method_list)}}}/replicates/{{replicate}}/status_.txt"
        ),
    benchmark:
        f"results/mutationrate/method_runs/{mutationrate.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + mutationrate.method_list)}}}/replicates/{{replicate}}/benchmark_.tsv"
    resources:
        mem_mb=8024,
        runtime=440,


use rule performance_measures_local from mutationrate as mutationrate_performance_measures_local with:
    input:
        vcf_list=[
            f"results/mutationrate/method_runs/{params}/{method}/replicates/{replicate}/snvs_.vcf"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_local
            + mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        # we also include `method_list_local` to align with `vcf_list`
        groundtruth_list=[
            f"results/mutationrate/simulated_reads/{params}/replicates/{replicate}/ground_truth.csv"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_local
            + mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/mutationrate/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_local
            + mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/mutationrate/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in mutationrate.paramspace.instance_patterns
            for replicate in range(mutationrate.config["replicate_count"])
        ],


use rule performance_measures_global from mutationrate as mutationrate_performance_measures_global with:
    input:
        predicted_haplos_list=[
            f"results/mutationrate/method_runs/{params}/{method}/replicates/{replicate}/haplotypes_.fasta"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        true_haplos_list=[
            f"results/mutationrate/simulated_reads/{params}/replicates/{replicate}/haplotypes.fasta"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/mutationrate/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in mutationrate.paramspace.instance_patterns
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        runstatus_list=[
            f"results/mutationrate/method_runs/{params}/{method}/replicates/{replicate}/status_.txt"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/mutationrate/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],


# longreads rate rules
use rule run_method_local from longreads as longreads_run_method_local_global with:
    output:
        fname_result_haplos=f"results/longreads/method_runs/{longreads.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/haplotypes_.fasta",
        fname_result=f"results/longreads/method_runs/{longreads.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/snvs_.vcf",
        dname_work=directory(
            f"results/longreads/method_runs/{longreads.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/work/"
        ),
        fname_status=touch(
            f"results/longreads/method_runs/{longreads.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + longreads.method_list)}}}/replicates/{{replicate}}/status_.txt"
        ),
    benchmark:
        f"results/longreads/method_runs/{longreads.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + longreads.method_list)}}}/replicates/{{replicate}}/benchmark_.tsv"
    resources:
        mem_mb=8024,
        runtime=440,


use rule performance_measures_local from longreads as longreads_performance_measures_local with:
    input:
        vcf_list=[
            f"results/longreads/method_runs/{params}/{method}/replicates/{replicate}/snvs_.vcf"
            for params in longreads.paramspace.instance_patterns
            for method in longreads.method_list
            for replicate in range(longreads.config["replicate_count"])
        ],
        # we also include `method_list_local` to align with `vcf_list`
        groundtruth_list=[
            f"results/longreads/simulated_reads/{params}/replicates/{replicate}/ground_truth.csv"
            for params in longreads.paramspace.instance_patterns
            for method in longreads.method_list
            for replicate in range(longreads.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/longreads/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in longreads.paramspace.instance_patterns
            for method in longreads.method_list
            for replicate in range(longreads.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/longreads/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in longreads.paramspace.instance_patterns
            for replicate in range(longreads.config["replicate_count"])
        ],


use rule performance_measures_global from longreads as longreads_performance_measures_global with:
    input:
        predicted_haplos_list=[
            f"results/mutationrate/method_runs/{params}/{method}/replicates/{replicate}/haplotypes_.fasta"
            for params in mutationrate.paramspace.instance_patterns
            for method in mutationrate.method_list_global
            for replicate in range(mutationrate.config["replicate_count"])
        ],
        true_haplos_list=[
            f"results/longreads/simulated_reads/{params}/replicates/{replicate}/haplotypes.fasta"
            for params in longreads.paramspace.instance_patterns
            for method in longreads.method_list_global
            for replicate in range(longreads.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/longreads/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in longreads.paramspace.instance_patterns
            for replicate in range(longreads.config["replicate_count"])
        ],
        runstatus_list=[
            f"results/longreads/method_runs/{params}/{method}/replicates/{replicate}/status_.txt"
            for params in longreads.paramspace.instance_patterns
            for method in longreads.method_list_global
            for replicate in range(longreads.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/longreads/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in longreads.paramspace.instance_patterns
            for method in longreads.method_list_global
            for replicate in range(longreads.config["replicate_count"])
        ],


# real data rules
use rule run_method_local from realdata as realdata_run_method_local_global with:
    output:
        fname_result_haplos=f"results/realdata/method_runs/{realdata.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/haplotypes_.fasta",
        fname_result=f"results/realdata/method_runs/{realdata.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/snvs_.vcf",
        dname_work=directory(
            f"results/realdata/method_runs/{realdata.paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/work/"
        ),
        fname_status=touch(
            f"results/realdata/method_runs/{realdata.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + realdata.method_list)}}}/replicates/{{replicate}}/status_.txt"
        ),
    benchmark:
        f"results/realdata/method_runs/{realdata.paramspace.wildcard_pattern}/{{method,{'|'.join(['markertoavoidemptyregex'] + realdata.method_list)}}}/replicates/{{replicate}}/benchmark_.tsv"
    resources:
        mem_mb=10024,
        runtime=440,


use rule performance_measures_local from realdata as realdata_performance_measures_local with:
    input:
        vcf_list=[
            f"results/realdata/method_runs/{params}/{method}/replicates/{replicate}/snvs_.vcf"
            for params in realdata.paramspace.instance_patterns
            for method in realdata.method_list_local + realdata.method_list_global
            for replicate in range(realdata.config["replicate_count"])
        ],
        # we also include `method_list_local` to align with `vcf_list`
        groundtruth_list=[
            f"results/realdata/simulated_reads/{params}/replicates/{replicate}/ground_truth.csv"
            for params in realdata.paramspace.instance_patterns
            for method in realdata.method_list_local + realdata.method_list_global
            for replicate in range(realdata.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/realdata/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in realdata.paramspace.instance_patterns
            for method in realdata.method_list_local + realdata.method_list_global
            for replicate in range(realdata.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/realdata/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in realdata.paramspace.instance_patterns
            for replicate in range(realdata.config["replicate_count"])
        ],


use rule performance_measures_global from realdata as realdata_performance_measures_global with:
    input:
        predicted_haplos_list=[
            f"results/realdata/method_runs/{params}/{method}/replicates/{replicate}/haplotypes_.fasta"
            for params in realdata.paramspace.instance_patterns
            for method in realdata.method_list_global
            for replicate in range(realdata.config["replicate_count"])
        ],
        true_haplos_list=[
            f"results/realdata/simulated_reads/{params}/replicates/{replicate}/haplotypes.fasta"
            for params in realdata.paramspace.instance_patterns
            for method in realdata.method_list_global
            for replicate in range(realdata.config["replicate_count"])
        ],
        haplostats_list=[
            f"results/realdata/haplo_stats/{params}/replicates/{replicate}/haplotypes_stats.csv"
            for params in realdata.paramspace.instance_patterns
            for replicate in range(realdata.config["replicate_count"])
        ],
        runstatus_list=[
            f"results/realdata/method_runs/{params}/{method}/replicates/{replicate}/status_.txt"
            for params in realdata.paramspace.instance_patterns
            for method in realdata.method_list_global
            for replicate in range(realdata.config["replicate_count"])
        ],
        benchmark_list=[
            f"results/realdata/method_runs/{params}/{method}/replicates/{replicate}/benchmark_.tsv"
            for params in realdata.paramspace.instance_patterns
            for method in realdata.method_list_global
            for replicate in range(realdata.config["replicate_count"])
        ],
