[{"id":"2","url":"https://workflowhub.eu/workflows/2","name":"Genomics - Read pre-processing","description":"Preprocessing of raw SARS-CoV-2 reads. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/2?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"3","url":"https://workflowhub.eu/workflows/3","name":"Virus genome assembly with Unicycler and Spades.","description":"Virus genome assembly with Unicycler and Spades,\r\nThe 2 assemblers works in parallel. The graph  visualization is made with Bandage.\r\nworkflow git repository : https://github.com/fjrmoreews/cwl-workflow-SARS-CoV-2/blob/master/Assembly/workflow/assembly-wf-virus.cwl\r\nBased on  https://github.com/galaxyproject/SARS-CoV-2/blob/master/genomics/2-Assembly/as_wf.png\r\n","organization":"CWL workflow SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/3?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"4","url":"https://workflowhub.eu/workflows/4","name":"Genomics - Read pre-processing without downloading from SRA","description":"Preprocessing of raw SARS-CoV-2 reads. This workflow contains an alternate starting point to avoid the data to be downloaded from the NCBI SRA. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/4?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"5","url":"https://workflowhub.eu/workflows/5","name":"Genomics - Assembly of the genome sequence","description":"This workflow uses Illumina and Oxford Nanopore reads that were pre-processed to remove human-derived sequences. Two assembly tools are used: spades and unicycler. In addition to assemblies (actual sequences) the two tools produce assembly graphs that can be used for visualization of assembly with bandage. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/5?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"6","url":"https://workflowhub.eu/workflows/6","name":"Genomics - MRCA analysis","description":"Dating the most recent common ancestor (MRCA) of SARS-CoV-2. The workflow is used to extract full length sequences of SARS-CoV-2, tidy up their names in FASTA files, produce a multiple sequences alignment and compute a maximum likelihood tree. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/6?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"7","url":"https://workflowhub.eu/workflows/7","name":"Genomics - PE Variation","description":"Analysis of variation within individual COVID-19 samples using Illumina Paired End data. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/7?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"8","url":"https://workflowhub.eu/workflows/8","name":"Genomics - SE Variation","description":"Analysis of variation within individual COVID-19 samples using Illumina Single End data. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/8?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"9","url":"https://workflowhub.eu/workflows/9","name":"Genomics - Analysis of S-protein polymorphism","description":"Analysis of S-protein polymorphism. This workflow includes: obtaining coding sequences of S proteins from a diverse group of coronaviruses and generating amino acid alignments to assess conservation of the polymorphic location. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/9?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"10","url":"https://workflowhub.eu/workflows/10","name":"Genomics - Recombination and selection analysis","description":"This workflow employs a recombination detection algorithm (GARD) developed by Kosakovsky Pond et al. and implemented in the hyphy package. More info can be found at https://covid19.galaxyproject.org/genomics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/10?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"11","url":"https://workflowhub.eu/workflows/11","name":"(old) SARS-COV2 version of the V-Pipe workflow","description":" A version of V-pipe (analysis of next generation sequencing (NGS) data from viral pathogens) specifically adapted to analyze high-throughput sequencing data of SARS-CoV-2. ","organization":"V-Pipe","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/11?version=1","name":"Version 1","author":["Ivan Topolsky"],"descriptor_type":["SMK"]}]},{"id":"12","url":"https://workflowhub.eu/workflows/12","name":"Cheminformatics - Enumerate ligands for docking","description":"This workflow is used form the preparation of protein and ligands for docking. More info can be found at https://covid19.galaxyproject.org/cheminformatics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/12?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"13","url":"https://workflowhub.eu/workflows/13","name":"Cheminformatics - Active site generation","description":"This workflow generates a file describing the active site of the protein for each of the fragment screening crystal structures using rDock s rbcavity. It also creates a single hybrid molecule that contains all the ligands - the \"frankenstein\" ligand. More info can be found at https://covid19.galaxyproject.org/cheminformatics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/13?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"14","url":"https://workflowhub.eu/workflows/14","name":"Cheminformatics - Docking","description":"Docking performed by rDock using as 3 different kind of inputs. More info can be found at https://covid19.galaxyproject.org/cheminformatics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/14?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"15","url":"https://workflowhub.eu/workflows/15","name":"Cheminformatics - SuCOS scoring","description":"This workflow generates binding scores that correlate well with binding affinities using an additional tool SuCOS Max, developed at Oxford University. More info can be found at https://covid19.galaxyproject.org/cheminformatics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/15?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"16","url":"https://workflowhub.eu/workflows/16","name":"Cheminformatics - TransFS scoring","description":"This workflow generates binding scores that correlate well with binding affinities using an additional tool TransFS, developed at Oxford University. More info can be found at https://covid19.galaxyproject.org/cheminformatics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/16?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"17","url":"https://workflowhub.eu/workflows/17","name":"Cheminformatics - Filter results","description":"This workflow combines SDF files from all fragments into a single dataset and filters to include only the lowest (best) scoring pose for each compound. This file of optimal poses for all ligands is used to compare to a database of Enamine and Chemspace compounds to select the best scoring 500 matches. More info can be found at https://covid19.galaxyproject.org/cheminformatics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/17?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"18","url":"https://workflowhub.eu/workflows/18","name":"Cheminformatics - XChem combined","description":"This workflow is used for the virtual screening of the SARS-CoV-2 main protease (de.NBI-cloud, STFC). It includes Charge enumeration, Generation of 3D conformations, Preparation of active site for docking using rDock, Docking, Scoring and Selection of compounds available. More info can be found at https://covid19.galaxyproject.org/cheminformatics/","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/18?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"19","url":"https://workflowhub.eu/workflows/19","name":"nf-core/viralrecon","description":"\u003c!DOCTYPE html\u003e\u003chtml\u003e\u003chead\u003e\u003cmeta charset=\"utf-8\"\u003e\u003cstyle\u003e\u003c/style\u003e\u003c/head\u003e\u003cbody id=\"preview\"\u003e\r\n\u003ch1 class=\"code-line\" data-line-start=0 data-line-end=1\u003e\u003ca id=\"nfcoreviralreconhttpsrawgithubusercontentcomnfcoreviralreconmasterdocsimagesnfcoreviralrecon_logopng_0\"\u003e\u003c/a\u003e\u003cimg src=\"https://raw.githubusercontent.com/nf-core/viralrecon/master/docs/images/nf-core-viralrecon_logo.png\" alt=\"nf-core/viralrecon\"\u003e\u003c/h1\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"2\" data-line-end=\"3\"\u003e\u003ca href=\"https://github.com/nf-core/viralrecon/actions\"\u003e\u003cimg src=\"https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg\" alt=\"GitHub Actions CI Status\"\u003e\u003c/a\u003e \u003ca href=\"https://github.com/nf-core/viralrecon/actions\"\u003e\u003cimg src=\"https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg\" alt=\"GitHub Actions Linting Status\"\u003e\u003c/a\u003e \u003ca href=\"https://www.nextflow.io/\"\u003e\u003cimg src=\"https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg\" alt=\"Nextflow\"\u003e\u003c/a\u003e \u003ca href=\"https://bioconda.github.io/\"\u003e\u003cimg src=\"https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg\" alt=\"install with bioconda\"\u003e\u003c/a\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"4\" data-line-end=\"5\"\u003e\u003ca href=\"https://hub.docker.com/r/nfcore/viralrecon\"\u003e\u003cimg src=\"https://img.shields.io/docker/automated/nfcore/viralrecon.svg\" alt=\"Docker\"\u003e\u003c/a\u003e \u003ca href=\"https://doi.org/10.5281/zenodo.3872730\"\u003e\u003cimg src=\"https://zenodo.org/badge/DOI/10.5281/zenodo.3872730.svg\" alt=\"DOI\"\u003e\u003c/a\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"8\" data-line-end=\"9\"\u003e\u003cstrong\u003enfcore/viralrecon\u003c/strong\u003e is a bioinformatics analysis pipeline used to perform assembly and intrahost/low-frequency variant calling for viral samples. The pipeline currently supports metagenomics and amplicon sequencing data derived from the Illumina sequencing platform.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"10\" data-line-end=\"11\"\u003eThis pipeline is a re-implementation of the \u003ca href=\"https://github.com/BU-ISCIII/SARS_Cov2_consensus-nf\"\u003eSARS_Cov2_consensus-nf\u003c/a\u003e and \u003ca href=\"https://github.com/BU-ISCIII/SARS_Cov2_assembly-nf\"\u003eSARS_Cov2_assembly-nf\u003c/a\u003e pipelines initially developed by \u003ca href=\"https://github.com/svarona\"\u003eSarai Varona\u003c/a\u003e and \u003ca href=\"https://github.com/saramonzon\"\u003eSara Monzon\u003c/a\u003e from \u003ca href=\"https://github.com/BU-ISCIII\"\u003eBU-ISCIII\u003c/a\u003e. Porting both of these pipelines to nf-core was an international collaboration between numerous contributors and developers, led by \u003ca href=\"https://github.com/drpatelh\"\u003eHarshil Patel\u003c/a\u003e from the \u003ca href=\"https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/\"\u003eThe Bioinformatics \u0026amp; Biostatistics Group\u003c/a\u003e at \u003ca href=\"https://www.crick.ac.uk/\"\u003eThe Francis Crick Institute\u003c/a\u003e, London. We appreciated the need to have a portable, reproducible and scalable pipeline for the analysis of COVID-19 sequencing samples and so the Avengers Assembled! Please come and join us and add yourself to the contributor list :)\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"12\" data-line-end=\"13\"\u003eWe have integrated a number of options in the pipeline to allow you to run specific aspects of the workflow if you so wish. For example, you can skip all of the assembly steps with the \u003ccode\u003e--skip_assembly\u003c/code\u003e parameter. See \u003ca href=\"docs/usage.md\"\u003eusage docs\u003c/a\u003e for all of the available options when running the pipeline.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"14\" data-line-end=\"15\"\u003ePlease click \u003ca href=\"https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html\"\u003ehere\u003c/a\u003e to see an example MultiQC report generated using the parameters defined in \u003ca href=\"https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config\"\u003ethis configuration file\u003c/a\u003e to run the pipeline on \u003ca href=\"https://zenodo.org/record/3735111\"\u003esamples\u003c/a\u003e which were prepared from the \u003ca href=\"https://artic.network/ncov-2019\"\u003encov-2019 ARTIC Network V1 amplicon set\u003c/a\u003e and sequenced on the Illumina MiSeq platform in 301bp paired-end format.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"16\" data-line-end=\"17\"\u003eThe pipeline is built using \u003ca href=\"https://www.nextflow.io\"\u003eNextflow\u003c/a\u003e, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests to run the pipeline on a full-sized dataset are passing on AWS cloud.\u003c/p\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=18 data-line-end=19\u003e\u003ca id=\"Pipeline_summary_18\"\u003e\u003c/a\u003ePipeline summary\u003c/h2\u003e\r\n\u003col\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"20\" data-line-end=\"21\"\u003eDownload samples via SRA, ENA or GEO ids (\u003ca href=\"https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html\"\u003e\u003ccode\u003eENA FTP\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://github.com/rvalieris/parallel-fastq-dump\"\u003e\u003ccode\u003eparallel-fastq-dump\u003c/code\u003e\u003c/a\u003e; \u003cem\u003eif required\u003c/em\u003e)\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"21\" data-line-end=\"22\"\u003eMerge re-sequenced FastQ files (\u003ca href=\"http://www.linfo.org/cat.html\"\u003e\u003ccode\u003ecat\u003c/code\u003e\u003c/a\u003e; \u003cem\u003eif required\u003c/em\u003e)\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"22\" data-line-end=\"23\"\u003eRead QC (\u003ca href=\"https://www.bioinformatics.babraham.ac.uk/projects/fastqc/\"\u003e\u003ccode\u003eFastQC\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"23\" data-line-end=\"24\"\u003eAdapter trimming (\u003ca href=\"https://github.com/OpenGene/fastp\"\u003e\u003ccode\u003efastp\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"24\" data-line-end=\"33\"\u003eVariant calling\u003cbr\u003e\r\ni. Read alignment (\u003ca href=\"http://bowtie-bio.sourceforge.net/bowtie2/index.shtml\"\u003e\u003ccode\u003eBowtie 2\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\nii. Sort and index alignments (\u003ca href=\"https://sourceforge.net/projects/samtools/files/samtools/\"\u003e\u003ccode\u003eSAMtools\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\niii. Primer sequence removal (\u003ca href=\"https://github.com/andersen-lab/ivar\"\u003e\u003ccode\u003eiVar\u003c/code\u003e\u003c/a\u003e; \u003cem\u003eamplicon data only\u003c/em\u003e)\u003cbr\u003e\r\niv. Duplicate read marking (\u003ca href=\"https://broadinstitute.github.io/picard/\"\u003e\u003ccode\u003epicard\u003c/code\u003e\u003c/a\u003e; \u003cem\u003eremoval optional\u003c/em\u003e)\u003cbr\u003e\r\nv. Alignment-level QC (\u003ca href=\"https://broadinstitute.github.io/picard/\"\u003e\u003ccode\u003epicard\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://sourceforge.net/projects/samtools/files/samtools/\"\u003e\u003ccode\u003eSAMtools\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\nvi. Choice of multiple variant calling and consensus sequence generation routes (\u003ca href=\"https://dkoboldt.github.io/varscan/\"\u003e\u003ccode\u003eVarScan 2\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://samtools.github.io/bcftools/bcftools.html\"\u003e\u003ccode\u003eBCFTools\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://github.com/arq5x/bedtools2/\"\u003e\u003ccode\u003eBEDTools\u003c/code\u003e\u003c/a\u003e \u003cem\u003e||\u003c/em\u003e \u003ca href=\"https://github.com/andersen-lab/ivar\"\u003e\u003ccode\u003eiVar variants and consensus\u003c/code\u003e\u003c/a\u003e \u003cem\u003e||\u003c/em\u003e \u003ca href=\"https://samtools.github.io/bcftools/bcftools.html\"\u003e\u003ccode\u003eBCFTools\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://github.com/arq5x/bedtools2/\"\u003e\u003ccode\u003eBEDTools\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Variant annotation (\u003ca href=\"http://snpeff.sourceforge.net/SnpEff.html\"\u003e\u003ccode\u003eSnpEff\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"http://snpeff.sourceforge.net/SnpSift.html\"\u003e\u003ccode\u003eSnpSift\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Consensus assessment report (\u003ca href=\"http://quast.sourceforge.net/quast\"\u003e\u003ccode\u003eQUAST\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"33\" data-line-end=\"43\"\u003e\u003cem\u003eDe novo\u003c/em\u003e assembly\u003cbr\u003e\r\ni. Primer trimming (\u003ca href=\"https://cutadapt.readthedocs.io/en/stable/guide.html\"\u003e\u003ccode\u003eCutadapt\u003c/code\u003e\u003c/a\u003e; \u003cem\u003eamplicon data only\u003c/em\u003e)\u003cbr\u003e\r\nii. Removal of host reads (\u003ca href=\"http://ccb.jhu.edu/software/kraken2/\"\u003e\u003ccode\u003eKraken 2\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\niii. Choice of multiple assembly tools (\u003ca href=\"http://cab.spbu.ru/software/spades/\"\u003e\u003ccode\u003eSPAdes\u003c/code\u003e\u003c/a\u003e \u003cem\u003e||\u003c/em\u003e \u003ca href=\"http://cab.spbu.ru/software/meta-spades/\"\u003e\u003ccode\u003emetaSPAdes\u003c/code\u003e\u003c/a\u003e \u003cem\u003e||\u003c/em\u003e \u003ca href=\"https://github.com/rrwick/Unicycler\"\u003e\u003ccode\u003eUnicycler\u003c/code\u003e\u003c/a\u003e \u003cem\u003e||\u003c/em\u003e \u003ca href=\"https://github.com/GATB/minia\"\u003e\u003ccode\u003eminia\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Blast to reference genome (\u003ca href=\"https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch\"\u003e\u003ccode\u003eblastn\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Contiguate assembly (\u003ca href=\"https://www.sanger.ac.uk/science/tools/pagit\"\u003e\u003ccode\u003eABACAS\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Assembly report (\u003ca href=\"https://github.com/BU-ISCIII/plasmidID\"\u003e\u003ccode\u003ePlasmidID\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Assembly assessment report (\u003ca href=\"http://quast.sourceforge.net/quast\"\u003e\u003ccode\u003eQUAST\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Call variants relative to reference (\u003ca href=\"https://github.com/lh3/minimap2\"\u003e\u003ccode\u003eMinimap2\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://github.com/ekg/seqwish\"\u003e\u003ccode\u003eseqwish\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://github.com/vgteam/vg\"\u003e\u003ccode\u003evg\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"https://github.com/rrwick/Bandage\"\u003e\u003ccode\u003eBandage\u003c/code\u003e\u003c/a\u003e)\u003cbr\u003e\r\n- Variant annotation (\u003ca href=\"http://snpeff.sourceforge.net/SnpEff.html\"\u003e\u003ccode\u003eSnpEff\u003c/code\u003e\u003c/a\u003e, \u003ca href=\"http://snpeff.sourceforge.net/SnpSift.html\"\u003e\u003ccode\u003eSnpSift\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"43\" data-line-end=\"45\"\u003ePresent QC and visualisation for raw read, alignment, assembly and variant calling results (\u003ca href=\"http://multiqc.info/\"\u003e\u003ccode\u003eMultiQC\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\r\n\u003c/ol\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=45 data-line-end=46\u003e\u003ca id=\"Quick_Start_45\"\u003e\u003c/a\u003eQuick Start\u003c/h2\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"47\" data-line-end=\"48\"\u003ei. Install \u003ca href=\"https://nf-co.re/usage/installation\"\u003e\u003ccode\u003enextflow\u003c/code\u003e\u003c/a\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"49\" data-line-end=\"50\"\u003eii. Install either \u003ca href=\"https://docs.docker.com/engine/installation/\"\u003e\u003ccode\u003eDocker\u003c/code\u003e\u003c/a\u003e or \u003ca href=\"https://www.sylabs.io/guides/3.0/user-guide/\"\u003e\u003ccode\u003eSingularity\u003c/code\u003e\u003c/a\u003e for full pipeline reproducibility (please only use \u003ca href=\"https://conda.io/miniconda.html\"\u003e\u003ccode\u003eConda\u003c/code\u003e\u003c/a\u003e as a last resort; see \u003ca href=\"https://nf-co.re/usage/configuration#basic-configuration-profiles\"\u003edocs\u003c/a\u003e)\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"51\" data-line-end=\"52\"\u003eiii. Download the pipeline and test it on a minimal dataset with a single command\u003c/p\u003e\r\n\u003cpre\u003e\u003ccode class=\"has-line-data\" data-line-start=\"54\" data-line-end=\"56\" class=\"language-bash\"\u003enextflow run nf-core/viralrecon -profile \u003cspan class=\"hljs-built_in\"\u003etest\u003c/span\u003e,\u0026lt;docker/singularity/conda/institute\u0026gt;\r\n\u003c/code\u003e\u003c/pre\u003e\r\n\u003cblockquote\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"57\" data-line-end=\"58\"\u003ePlease check \u003ca href=\"https://github.com/nf-core/configs#documentation\"\u003enf-core/configs\u003c/a\u003e to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use \u003ccode\u003e-profile \u0026lt;institute\u0026gt;\u003c/code\u003e in your command. This will enable either \u003ccode\u003edocker\u003c/code\u003e or \u003ccode\u003esingularity\u003c/code\u003e and set the appropriate execution settings for your local compute environment.\u003c/p\u003e\r\n\u003c/blockquote\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"59\" data-line-end=\"60\"\u003eiv. Start running your own analysis!\u003c/p\u003e\r\n\u003cpre\u003e\u003ccode class=\"has-line-data\" data-line-start=\"62\" data-line-end=\"64\" class=\"language-bash\"\u003enextflow run nf-core/viralrecon -profile \u0026lt;docker/singularity/conda/institute\u0026gt; --input samplesheet.csv --genome \u003cspan class=\"hljs-string\"\u003e'NC_045512.2'\u003c/span\u003e -profile docker\r\n\u003c/code\u003e\u003c/pre\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"65\" data-line-end=\"66\"\u003eSee \u003ca href=\"docs/usage.md\"\u003eusage docs\u003c/a\u003e for all of the available options when running the pipeline.\u003c/p\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=67 data-line-end=68\u003e\u003ca id=\"Documentation_67\"\u003e\u003c/a\u003eDocumentation\u003c/h2\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"69\" data-line-end=\"70\"\u003eThe nf-core/viralrecon pipeline comes with documentation about the pipeline, found in the \u003ccode\u003edocs/\u003c/code\u003e directory:\u003c/p\u003e\r\n\u003col\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"71\" data-line-end=\"72\"\u003e\u003ca href=\"https://nf-co.re/usage/installation\"\u003eInstallation\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"72\" data-line-end=\"76\"\u003ePipeline configuration\r\n\u003cul\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"73\" data-line-end=\"74\"\u003e\u003ca href=\"https://nf-co.re/usage/local_installation\"\u003eLocal installation\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"74\" data-line-end=\"75\"\u003e\u003ca href=\"https://nf-co.re/usage/adding_own_config\"\u003eAdding your own system config\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"75\" data-line-end=\"76\"\u003e\u003ca href=\"docs/usage.md#reference-genomes\"\u003eReference genomes\u003c/a\u003e\u003c/li\u003e\r\n\u003c/ul\u003e\r\n\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"76\" data-line-end=\"77\"\u003e\u003ca href=\"docs/usage.md\"\u003eRunning the pipeline\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"77\" data-line-end=\"78\"\u003e\u003ca href=\"docs/output.md\"\u003eOutput and how to interpret the results\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"78\" data-line-end=\"80\"\u003e\u003ca href=\"https://nf-co.re/usage/troubleshooting\"\u003eTroubleshooting\u003c/a\u003e\u003c/li\u003e\r\n\u003c/ol\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=80 data-line-end=81\u003e\u003ca id=\"Credits_80\"\u003e\u003c/a\u003eCredits\u003c/h2\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"82\" data-line-end=\"83\"\u003eThese scripts were originally written by \u003ca href=\"https://github.com/svarona\"\u003eSarai Varona\u003c/a\u003e, \u003ca href=\"https://github.com/MiguelJulia\"\u003eMiguel Juliá\u003c/a\u003e and \u003ca href=\"https://github.com/saramonzon\"\u003eSara Monzon\u003c/a\u003e from \u003ca href=\"https://github.com/BU-ISCIII\"\u003eBU-ISCIII\u003c/a\u003e and co-ordinated by Isabel Cuesta for the \u003ca href=\"https://eng.isciii.es/eng.isciii.es/Paginas/Inicio.html\"\u003eInstitute of Health Carlos III\u003c/a\u003e, Spain. Through collaboration with the nf-core community the pipeline has now been updated substantially to include additional processing steps, to standardise inputs/outputs and to improve pipeline reporting; implemented primarily by \u003ca href=\"https://github.com/drpatelh\"\u003eHarshil Patel\u003c/a\u003e from \u003ca href=\"https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/\"\u003eThe Bioinformatics \u0026amp; Biostatistics Group\u003c/a\u003e at \u003ca href=\"https://www.crick.ac.uk/\"\u003eThe Francis Crick Institute\u003c/a\u003e, London.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"84\" data-line-end=\"85\"\u003eMany thanks to others who have helped out and contributed along the way too, including (but not limited to):\u003c/p\u003e\r\n\u003ctable class=\"table table-striped table-bordered\"\u003e\r\n\u003cthead\u003e\r\n\u003ctr\u003e\r\n\u003cth\u003eName\u003c/th\u003e\r\n\u003cth\u003eAffiliation\u003c/th\u003e\r\n\u003c/tr\u003e\r\n\u003c/thead\u003e\r\n\u003ctbody\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/apeltzer\"\u003eAlexander Peltzer\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.boehringer-ingelheim.de/\"\u003eBoehringer Ingelheim, Germany\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/ameynert\"\u003eAlison Meynert\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.ed.ac.uk/\"\u003eUniversity of Edinburgh, Scotland\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/edgano\"\u003eEdgar Garriga Nogales\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.crg.eu/\"\u003eCentre for Genomic Regulation, Spain\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/ekg\"\u003eErik Garrison\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.ucsc.edu/\"\u003eUCSC, USA\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/ggabernet\"\u003eGisela Gabernet\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://portal.qbic.uni-tuebingen.de/portal/\"\u003eQBiC, University of Tübingen, Germany\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/jcurado-flomics\"\u003eJoao Curado\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.flomics.com/\"\u003eFlomics Biotech, Spain\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/JoseEspinosa\"\u003eJose Espinosa-Carrasco\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.crg.eu/\"\u003eCentre for Genomic Regulation, Spain\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/ktrns\"\u003eKatrin Sameith\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://genomecenter.tu-dresden.de\"\u003eDRESDEN-concept Genome Center, Germany\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/lcabus-flomics\"\u003eLluc Cabus\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.flomics.com/\"\u003eFlomics Biotech, Spain\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/mpozuelo-flomics\"\u003eMarta Pozuelo\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.flomics.com/\"\u003eFlomics Biotech, Spain\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/MaxUlysse\"\u003eMaxime Garcia\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.scilifelab.se/\"\u003eSciLifeLab, Sweden\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/heuermh\"\u003eMichael Heuer\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://https://rise.cs.berkeley.edu\"\u003eUC Berkeley, USA\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/ewels\"\u003ePhil Ewels\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.scilifelab.se/\"\u003eSciLifeLab, Sweden\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/subwaystation\"\u003eSimon Heumos\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://portal.qbic.uni-tuebingen.de/portal/\"\u003eQBiC, University of Tübingen, Germany\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/stevekm\"\u003eStephen Kelly\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://www.mskcc.org/\"\u003eMemorial Sloan Kettering Cancer Center, USA\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003e\u003ca href=\"https://github.com/thanhleviet\"\u003eThanh Le Viet\u003c/a\u003e\u003c/td\u003e\r\n\u003ctd\u003e\u003ca href=\"https://quadram.ac.uk/\"\u003eQuadram Institute, UK\u003c/a\u003e\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003c/tbody\u003e\r\n\u003c/table\u003e\r\n\u003cblockquote\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"105\" data-line-end=\"106\"\u003eListed in alphabetical order\u003c/p\u003e\r\n\u003c/blockquote\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=107 data-line-end=108\u003e\u003ca id=\"Contributions_and_Support_107\"\u003e\u003c/a\u003eContributions and Support\u003c/h2\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"109\" data-line-end=\"110\"\u003eIf you would like to contribute to this pipeline, please see the \u003ca href=\"https://github.com/nf-core/viralrecon/blob/master/.github/CONTRIBUTING.md\"\u003econtributing guidelines\u003c/a\u003e.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"111\" data-line-end=\"112\"\u003eFor further information or help, don’t hesitate to get in touch on \u003ca href=\"https://nfcore.slack.com/channels/viralrecon\"\u003eSlack\u003c/a\u003e (you can join with \u003ca href=\"https://nf-co.re/join/slack\"\u003ethis invite\u003c/a\u003e).\u003c/p\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=113 data-line-end=114\u003e\u003ca id=\"Citation_113\"\u003e\u003c/a\u003eCitation\u003c/h2\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"115\" data-line-end=\"116\"\u003eIf you use nf-core/viralrecon for your analysis, please cite it using the following doi: \u003ca href=\"https://doi.org/10.5281/zenodo.3872730\"\u003e10.5281/zenodo.3872730\u003c/a\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"117\" data-line-end=\"118\"\u003eAn extensive list of references for the tools used by the pipeline can be found in the \u003ca href=\"https://github.com/nf-core/viralrecon/blob/master/CITATIONS.md\"\u003e\u003ccode\u003eCITATIONS.md\u003c/code\u003e\u003c/a\u003e file.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"119\" data-line-end=\"120\"\u003eYou can cite the \u003ccode\u003enf-core\u003c/code\u003e publication as follows:\u003c/p\u003e\r\n\u003cblockquote\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"121\" data-line-end=\"122\"\u003e\u003cstrong\u003eThe nf-core framework for community-curated bioinformatics pipelines.\u003c/strong\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"123\" data-line-end=\"124\"\u003ePhilip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026amp; Sven Nahnsen.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"125\" data-line-end=\"127\"\u003e\u003cem\u003eNat Biotechnol.\u003c/em\u003e 2020 Feb 13. doi: \u003ca href=\"https://dx.doi.org/10.1038/s41587-020-0439-x\"\u003e10.1038/s41587-020-0439-x\u003c/a\u003e.\u003cbr\u003e\r\nReadCube: \u003ca href=\"https://rdcu.be/b1GjZ\"\u003eFull Access Link\u003c/a\u003e\u003c/p\u003e\r\n\u003c/blockquote\u003e\r\n\u003c/body\u003e\u003c/html\u003e","organization":"nf-core viralrecon","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/19?version=1","name":"Version 1","author":[],"descriptor_type":["NFL"]}]},{"id":"20","url":"https://workflowhub.eu/workflows/20","name":"nf-core/vipr","description":"\u003c!DOCTYPE html\u003e\u003chtml\u003e\u003chead\u003e\u003cmeta charset=\"utf-8\"\u003e\u003cstyle\u003e\u003c/style\u003e\u003c/head\u003e\u003cbody id=\"preview\"\u003e\r\n\u003ch1 class=\"code-line\" data-line-start=0 data-line-end=1\u003e\u003ca id=\"nfcoreviprhttpsrawgithubusercontentcomnfcoreviprmasterdocsimagesvipr_logosvg_0\"\u003e\u003c/a\u003e\u003cimg src=\"https://raw.githubusercontent.com/nf-core/vipr/master/docs/images/vipr_logo.png\" alt=\"nf-core/vipr\" height=\"200\" width=\"500\"\u003e\u003c/h1\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"2\" data-line-end=\"3\"\u003e\u003ca href=\"https://travis-ci.org/nf-core/vipr\"\u003e\u003cimg src=\"https://travis-ci.org/nf-core/vipr.svg?branch=master\" alt=\"Build Status\"\u003e\u003c/a\u003e \u003ca href=\"https://www.nextflow.io/\"\u003e\u003cimg src=\"https://img.shields.io/badge/nextflow-%E2%89%A50.31.1-brightgreen.svg\" alt=\"Nextflow\"\u003e\u003c/a\u003e \u003ca href=\"https://gitter.im/nf-core/Lobby\"\u003e\u003cimg src=\"https://img.shields.io/badge/gitter-%20join%20chat%20%E2%86%92-4fb99a.svg\" alt=\"Gitter\"\u003e\u003c/a\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"4\" data-line-end=\"5\"\u003e\u003ca href=\"http://bioconda.github.io/\"\u003e\u003cimg src=\"https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg\" alt=\"install with bioconda\"\u003e\u003c/a\u003e \u003ca href=\"https://hub.docker.com/r/nfcore/vipr/\"\u003e\u003cimg src=\"https://img.shields.io/docker/automated/nfcore/vipr.svg\" alt=\"Docker Container available\"\u003e\u003c/a\u003e \u003ca href=\"https://singularity-hub.org/collections/1405\"\u003e\u003cimg src=\"https://www.singularity-hub.org/static/img/hosted-singularity--hub-%23e32929.svg\" alt=\"https://www.singularity-hub.org/static/img/hosted-singularity--hub-%23e32929.svg\"\u003e\u003c/a\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"16\" data-line-end=\"17\"\u003e\u003cstrong\u003enf-core/vipr\u003c/strong\u003e is a bioinformatics best-practice analysis pipeline for assembly and intrahost / low-frequency variant calling for viral samples.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"18\" data-line-end=\"19\"\u003eThe pipeline is built using \u003ca href=\"https://www.nextflow.io\"\u003eNextflow\u003c/a\u003e, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker / singularity containers making installation trivial and results highly reproducible.\u003c/p\u003e\r\n\u003ch3 class=\"code-line\" data-line-start=20 data-line-end=21\u003e\u003ca id=\"Pipeline_Steps_20\"\u003e\u003c/a\u003ePipeline Steps\u003c/h3\u003e\r\n\u003ctable class=\"table table-striped table-bordered\"\u003e\r\n\u003cthead\u003e\r\n\u003ctr\u003e\r\n\u003cth\u003eStep\u003c/th\u003e\r\n\u003cth\u003eMain program/s\u003c/th\u003e\r\n\u003c/tr\u003e\r\n\u003c/thead\u003e\r\n\u003ctbody\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eTrimming, combining of read-pairs per sample and QC\u003c/td\u003e\r\n\u003ctd\u003eSkewer, FastQC\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eDecontamination\u003c/td\u003e\r\n\u003ctd\u003edecont\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eMetagenomics classification / Sample purity\u003c/td\u003e\r\n\u003ctd\u003eKraken\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eAssembly to contigs\u003c/td\u003e\r\n\u003ctd\u003eBBtools’ Tadpole\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eAssembly polishing\u003c/td\u003e\r\n\u003ctd\u003eViPR Tools\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eMapping to assembly\u003c/td\u003e\r\n\u003ctd\u003eBWA, LoFreq\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eLow frequency variant calling\u003c/td\u003e\r\n\u003ctd\u003eLoFreq\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003ctr\u003e\r\n\u003ctd\u003eCoverage and variant AF plots (two processes)\u003c/td\u003e\r\n\u003ctd\u003eBedtools, ViPR Tools\u003c/td\u003e\r\n\u003c/tr\u003e\r\n\u003c/tbody\u003e\r\n\u003c/table\u003e\r\n\u003ch3 class=\"code-line\" data-line-start=33 data-line-end=34\u003e\u003ca id=\"Documentation_33\"\u003e\u003c/a\u003eDocumentation\u003c/h3\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"35\" data-line-end=\"36\"\u003eDocumentation about the pipeline can be found in the \u003ccode\u003edocs/\u003c/code\u003e directory:\u003c/p\u003e\r\n\u003col\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"37\" data-line-end=\"38\"\u003e\u003ca href=\"docs/installation.md\"\u003eInstallation and configuration\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"38\" data-line-end=\"39\"\u003e\u003ca href=\"docs/usage.md\"\u003eRunning the pipeline\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"39\" data-line-end=\"41\"\u003e\u003ca href=\"docs/output.md\"\u003eOutput and how to interpret the results\u003c/a\u003e\u003c/li\u003e\r\n\u003c/ol\u003e\r\n\u003ch3 class=\"code-line\" data-line-start=41 data-line-end=42\u003e\u003ca id=\"Credits_41\"\u003e\u003c/a\u003eCredits\u003c/h3\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"43\" data-line-end=\"46\"\u003eThis pipeline was originally developed by Andreas Wilm (\u003ca href=\"https://github.com/andreas-wilm\"\u003eandreas-wilm\u003c/a\u003e) at \u003ca href=\"https://www.a-star.edu.sg/gis/\"\u003eGenome Institute of Singapore\u003c/a\u003e.\u003cbr\u003e\r\nIt started out as an ecosystem around LoFreq and went through a couple of iterations.\u003cbr\u003e\r\nThe current version had three predecessors \u003ca href=\"https://github.com/CSB5/vipr\"\u003eViPR 1\u003c/a\u003e, \u003ca href=\"https://github.com/CSB5/vipr2\"\u003eViPR 2\u003c/a\u003e and \u003ca href=\"https://github.com/gis-rpd/pipelines/tree/master/germs/vipr\"\u003eViPR 3\u003c/a\u003e.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"47\" data-line-end=\"48\"\u003eAn incomplete list of publications using (previous versions of) ViPR:\u003c/p\u003e\r\n\u003cul\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"49\" data-line-end=\"50\"\u003e\u003ca href=\"https://www.ncbi.nlm.nih.gov/pubmed/26327586\"\u003eSessions et. al., PLoS Negl Trop Dis., 2015\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"50\" data-line-end=\"52\"\u003e\u003ca href=\"https://www.ncbi.nlm.nih.gov/pubmed/26325059\"\u003eSim et al., PLoS Negl Trop Dis., 2015\u003c/a\u003e\u003c/li\u003e\r\n\u003c/ul\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"52\" data-line-end=\"53\"\u003ePlenty of people provided essential feedback, including:\u003c/p\u003e\r\n\u003cul\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"54\" data-line-end=\"55\"\u003e\u003ca href=\"https://www.duke-nus.edu.sg/content/sessions-october\"\u003eOctober SESSIONS\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"55\" data-line-end=\"56\"\u003e\u003ca href=\"https://www.a-star.edu.sg/gis/Our-People/Platform-Leaders\"\u003ePaola Florez DE SESSIONS\u003c/a\u003e\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"56\" data-line-end=\"57\"\u003eZHU Yuan\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"57\" data-line-end=\"58\"\u003eShuzhen SIM\u003c/li\u003e\r\n\u003cli class=\"has-line-data\" data-line-start=\"58\" data-line-end=\"59\"\u003eCHU Wenhan Collins\u003c/li\u003e\r\n\u003c/ul\u003e\r\n\u003c/body\u003e\u003c/html\u003e","organization":"nf-core viralrecon","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/20?version=1","name":"Version 1","author":[],"descriptor_type":["NFL"]}]},{"id":"22","url":"https://workflowhub.eu/workflows/22","name":"Pathway Analysis","description":"Given a set of pathways generated by RetroPath2.0, this workflow informs the user as to the theoretically best performing ones based on four criteria: FBA, thermodynamic feasibility, length of the pathway, and reaction rule score.","organization":"IBISBA Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/22?version=1","name":"Version 1","author":["Melchior du Lac"],"descriptor_type":["GALAXY"]}]},{"id":"23","url":"https://workflowhub.eu/workflows/23","name":"Genetic Design","description":"This workflow converts the top-ranking predicted pathways from the \"RetroSynthesis\" and \"Pathway Analysis\" workflows to plasmids intended to be expressed in the specified organism","organization":"IBISBA Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/23?version=1","name":"Version 1","author":["Melchior du Lac"],"descriptor_type":["GALAXY"]}]},{"id":"24","url":"https://workflowhub.eu/workflows/24","name":"RetroSynthesis","description":"Generate possible metabolic routes for the production of a target molecule in an organism of choice","organization":"IBISBA Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/24?version=1","name":"Version 1","author":["Melchior du Lac"],"descriptor_type":["GALAXY"]}]},{"id":"25","url":"https://workflowhub.eu/workflows/25","name":"Pathway Ranker","description":"The workflow runs the RetroSynthesis algorithm to generate a collection of heterologous pathways in a host organism of choice, converts them to SBML files, performs analysis on the pathways to then rank the theoretical best performing ones.","organization":"IBISBA Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/25?version=1","name":"Version 1","author":["Melchior du Lac"],"descriptor_type":["GALAXY"]}]},{"id":"26","url":"https://workflowhub.eu/workflows/26","name":"VIRify","description":"\u003cp class=\"has-line-data\" data-line-start=\"0\" data-line-end=\"1\"\u003e\u003cimg src=\"https://img.shields.io/badge/CWL-1.2.0--dev2-green\" alt=\"\"\u003e \u003cimg src=\"https://img.shields.io/badge/nextflow-20.01.0-brightgreen\" alt=\"\"\u003e \u003cimg src=\"https://img.shields.io/badge/uses-docker-blue.svg\" alt=\"\"\u003e \u003cimg src=\"https://img.shields.io/badge/uses-conda-yellow.svg\" alt=\"\"\u003e \u003cimg src=\"https://api.travis-ci.org/EBI-Metagenomics/emg-viral-pipeline.svg\" alt=\"\"\u003e\u003c/p\u003e\r\n\u003ch1 class=\"code-line\" data-line-start=2 data-line-end=3 \u003e\u003ca id=\"VIRify_2\"\u003e\u003c/a\u003eVIRify\u003c/h1\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"4\" data-line-end=\"5\"\u003e\u003cimg width=\"500px\" src=\"https://raw.githubusercontent.com/EBI-Metagenomics/emg-viral-pipeline/master/nextflow/figures/sankey.png\" alt=\"Sankey plot\"\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"6\" data-line-end=\"7\"\u003eVIRify is a recently developed pipeline for the detection, annotation, and taxonomic classification of viral contigs in metagenomic and metatranscriptomic assemblies. The pipeline is part of the repertoire of analysis services offered by \u003ca href=\"https://www.ebi.ac.uk/metagenomics/\"\u003eMGnify\u003c/a\u003e. VIRify’s taxonomic classification relies on the detection of taxon-specific profile hidden Markov models (HMMs), built upon a set of 22,014 orthologous protein domains and referred to as ViPhOGs.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"8\" data-line-end=\"9\"\u003eVIRify was implemented in CWL.\u003c/p\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=10 data-line-end=11 \u003e\u003ca id=\"What_do_I_need_10\"\u003e\u003c/a\u003eWhat do I need?\u003c/h2\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"12\" data-line-end=\"13\"\u003eThe current implementation uses CWL version 1.2 dev+2. It was tested using Toil version 4.10 as the workflow engine and conda to manage the software dependencies.\u003c/p\u003e\r\n\u003ch3 class=\"code-line\" data-line-start=14 data-line-end=15 \u003e\u003ca id=\"Docker__Singularity_support_14\"\u003e\u003c/a\u003eDocker - Singularity support\u003c/h3\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"16\" data-line-end=\"17\"\u003eSoon…\u003c/p\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=18 data-line-end=19 \u003e\u003ca id=\"Setup_environment_18\"\u003e\u003c/a\u003eSetup environment\u003c/h2\u003e\r\n\u003cpre\u003e\u003ccode class=\"has-line-data\" data-line-start=\"21\" data-line-end=\"24\" class=\"language-bash\"\u003econda env create \u003cspan class=\"hljs-operator\"\u003e-f\u003c/span\u003e cwl/requirements/conda_env.yml\r\nconda activate viral_pipeline\r\n\u003c/code\u003e\u003c/pre\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=25 data-line-end=26 \u003e\u003ca id=\"Basic_execution_25\"\u003e\u003c/a\u003eBasic execution\u003c/h2\u003e\r\n\u003cpre\u003e\u003ccode class=\"has-line-data\" data-line-start=\"28\" data-line-end=\"31\" class=\"language-bash\"\u003e\u003cspan class=\"hljs-built_in\"\u003ecd\u003c/span\u003e cwl/\r\nvirify.sh -h\r\n\u003c/code\u003e\u003c/pre\u003e\r\n\u003ch1 class=\"code-line\" data-line-start=32 data-line-end=33 \u003e\u003ca id=\"A_note_about_metatranscriptomes_32\"\u003e\u003c/a\u003eA note about metatranscriptomes\u003c/h1\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"34\" data-line-end=\"36\"\u003eAlthough VIRify has been benchmarked and validated with metagenomic data in mind, it is also possible to use this tool to detect RNA viruses in metatranscriptome assemblies (e.g. SARS-CoV-2). However, some additional considerations for this purpose are outlined below:\u003cbr\u003e\r\n\u003cstrong\u003e1. Quality control:\u003c/strong\u003e As for metagenomic data, a thorough quality control of the FASTQ sequence reads to remove low-quality bases, adapters and host contamination (if appropriate) is required prior to assembly. This is especially important for metatranscriptomes as small errors can further decrease the quality and contiguity of the assembly obtained. We have used \u003ca href=\"https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/\"\u003eTrimGalore\u003c/a\u003e for this purpose.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"37\" data-line-end=\"38\"\u003e\u003cstrong\u003e2. Assembly:\u003c/strong\u003e There are many assemblers available that are appropriate for either metagenomic or single-species transcriptomic data. However, to our knowledge, there is no assembler currently available specifically for metatranscriptomic data. From our preliminary investigations, we have found that transcriptome-specific assemblers (e.g. \u003ca href=\"http://cab.spbu.ru/software/spades/\"\u003ernaSPAdes\u003c/a\u003e) generate more contiguous and complete metatranscriptome assemblies compared to metagenomic alternatives (e.g. \u003ca href=\"https://github.com/voutcn/megahit/releases\"\u003eMEGAHIT\u003c/a\u003e and \u003ca href=\"http://cab.spbu.ru/software/spades/\"\u003emetaSPAdes\u003c/a\u003e).\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"39\" data-line-end=\"40\"\u003e\u003cstrong\u003e3. Post-processing:\u003c/strong\u003e Metatranscriptomes generate highly fragmented assemblies. Therefore, filtering contigs based on a set minimum length has a substantial impact in the number of contigs processed in VIRify. It has also been observed that the number of false-positive detections of \u003ca href=\"https://github.com/jessieren/VirFinder/releases\"\u003eVirFinder\u003c/a\u003e (one of the tools included in VIRify) is lower among larger contigs. The choice of a length threshold will depend on the complexity of the sample and the sequencing technology used, but in our experience any contigs \u0026lt;2 kb should be analysed with caution.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"41\" data-line-end=\"42\"\u003e\u003cstrong\u003e4. Classification:\u003c/strong\u003e The classification module of VIRify depends on the presence of a minimum number and proportion of phylogenetically-informative genes within each contig in order to confidently assign a taxonomic lineage. Therefore, short contigs typically obtained from metatranscriptome assemblies remain generally unclassified. For targeted classification of RNA viruses (for instance, to search for Coronavirus-related sequences), alternative DNA- or protein-based classification methods can be used. Two of the possible options are: (i) using \u003ca href=\"https://github.com/marbl/MashMap/releases\"\u003eMashMap\u003c/a\u003e to screen the VIRify contigs against a database of RNA viruses (e.g. Coronaviridae) or (ii) using \u003ca href=\"http://hmmer.org/download.html\"\u003ehmmsearch\u003c/a\u003e to screen the proteins obtained in the VIRify contigs against marker genes of the taxon of interest.\u003c/p\u003e\r\n\u003ch2\u003eContact us\u003c/h2\u003e\r\n\u003ca href=\"https://www.ebi.ac.uk/support/metagenomics\"\u003eMGnify helpdesk\u003c/a\u003e","organization":"MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/26?version=1","name":"v0.4.0","author":["Martin Beracochea"],"descriptor_type":["CWL"]}]},{"id":"27","url":"https://workflowhub.eu/workflows/27","name":"VIRify","description":"\u003cp class=\"has-line-data\" data-line-start=\"0\" data-line-end=\"1\"\u003e\u003cimg src=\"https://img.shields.io/badge/CWL-1.2.0--dev2-green\" alt=\"\"\u003e \u003cimg src=\"https://img.shields.io/badge/nextflow-20.01.0-brightgreen\" alt=\"\"\u003e \u003cimg src=\"https://img.shields.io/badge/uses-docker-blue.svg\" alt=\"\"\u003e \u003cimg src=\"https://img.shields.io/badge/uses-conda-yellow.svg\" alt=\"\"\u003e \u003cimg src=\"https://api.travis-ci.org/EBI-Metagenomics/emg-viral-pipeline.svg\" alt=\"\"\u003e\u003c/p\u003e\r\n\u003ch1 class=\"code-line\" data-line-start=2 data-line-end=3 \u003e\u003ca id=\"VIRify_2\"\u003e\u003c/a\u003eVIRify\u003c/h1\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"4\" data-line-end=\"5\"\u003e\u003cimg width=\"500px\" src=\"https://raw.githubusercontent.com/EBI-Metagenomics/emg-viral-pipeline/master/nextflow/figures/sankey.png\" alt=\"Sankey plot\"\u003e\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"6\" data-line-end=\"7\"\u003eVIRify is a recently developed pipeline for the detection, annotation, and taxonomic classification of viral contigs in metagenomic and metatranscriptomic assemblies. The pipeline is part of the repertoire of analysis services offered by \u003ca href=\"https://www.ebi.ac.uk/metagenomics/\"\u003eMGnify\u003c/a\u003e. VIRify’s taxonomic classification relies on the detection of taxon-specific profile hidden Markov models (HMMs), built upon a set of 22,014 orthologous protein domains and referred to as ViPhOGs.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"8\" data-line-end=\"9\"\u003eVIRify was implemented in CWL.\u003c/p\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=10 data-line-end=11 \u003e\u003ca id=\"What_do_I_need_10\"\u003e\u003c/a\u003eWhat do I need?\u003c/h2\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"12\" data-line-end=\"13\"\u003eThe current implementation uses CWL version 1.2 dev+2. It was tested using Toil version 4.10 as the workflow engine and conda to manage the software dependencies.\u003c/p\u003e\r\n\u003ch3 class=\"code-line\" data-line-start=14 data-line-end=15 \u003e\u003ca id=\"Docker__Singularity_support_14\"\u003e\u003c/a\u003eDocker - Singularity support\u003c/h3\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"16\" data-line-end=\"17\"\u003eSoon…\u003c/p\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=18 data-line-end=19 \u003e\u003ca id=\"Setup_environment_18\"\u003e\u003c/a\u003eSetup environment\u003c/h2\u003e\r\n\u003cpre\u003e\u003ccode class=\"has-line-data\" data-line-start=\"21\" data-line-end=\"24\" class=\"language-bash\"\u003econda env create \u003cspan class=\"hljs-operator\"\u003e-f\u003c/span\u003e cwl/requirements/conda_env.yml\r\nconda activate viral_pipeline\r\n\u003c/code\u003e\u003c/pre\u003e\r\n\u003ch2 class=\"code-line\" data-line-start=25 data-line-end=26 \u003e\u003ca id=\"Basic_execution_25\"\u003e\u003c/a\u003eBasic execution\u003c/h2\u003e\r\n\u003cpre\u003e\u003ccode class=\"has-line-data\" data-line-start=\"28\" data-line-end=\"31\" class=\"language-bash\"\u003e\u003cspan class=\"hljs-built_in\"\u003ecd\u003c/span\u003e cwl/\r\nvirify.sh -h\r\n\u003c/code\u003e\u003c/pre\u003e\r\n\u003ch1 class=\"code-line\" data-line-start=32 data-line-end=33 \u003e\u003ca id=\"A_note_about_metatranscriptomes_32\"\u003e\u003c/a\u003eA note about metatranscriptomes\u003c/h1\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"34\" data-line-end=\"36\"\u003eAlthough VIRify has been benchmarked and validated with metagenomic data in mind, it is also possible to use this tool to detect RNA viruses in metatranscriptome assemblies (e.g. SARS-CoV-2). However, some additional considerations for this purpose are outlined below:\u003cbr\u003e\r\n\u003cstrong\u003e1. Quality control:\u003c/strong\u003e As for metagenomic data, a thorough quality control of the FASTQ sequence reads to remove low-quality bases, adapters and host contamination (if appropriate) is required prior to assembly. This is especially important for metatranscriptomes as small errors can further decrease the quality and contiguity of the assembly obtained. We have used \u003ca href=\"https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/\"\u003eTrimGalore\u003c/a\u003e for this purpose.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"37\" data-line-end=\"38\"\u003e\u003cstrong\u003e2. Assembly:\u003c/strong\u003e There are many assemblers available that are appropriate for either metagenomic or single-species transcriptomic data. However, to our knowledge, there is no assembler currently available specifically for metatranscriptomic data. From our preliminary investigations, we have found that transcriptome-specific assemblers (e.g. \u003ca href=\"http://cab.spbu.ru/software/spades/\"\u003ernaSPAdes\u003c/a\u003e) generate more contiguous and complete metatranscriptome assemblies compared to metagenomic alternatives (e.g. \u003ca href=\"https://github.com/voutcn/megahit/releases\"\u003eMEGAHIT\u003c/a\u003e and \u003ca href=\"http://cab.spbu.ru/software/spades/\"\u003emetaSPAdes\u003c/a\u003e).\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"39\" data-line-end=\"40\"\u003e\u003cstrong\u003e3. Post-processing:\u003c/strong\u003e Metatranscriptomes generate highly fragmented assemblies. Therefore, filtering contigs based on a set minimum length has a substantial impact in the number of contigs processed in VIRify. It has also been observed that the number of false-positive detections of \u003ca href=\"https://github.com/jessieren/VirFinder/releases\"\u003eVirFinder\u003c/a\u003e (one of the tools included in VIRify) is lower among larger contigs. The choice of a length threshold will depend on the complexity of the sample and the sequencing technology used, but in our experience any contigs \u0026lt;2 kb should be analysed with caution.\u003c/p\u003e\r\n\u003cp class=\"has-line-data\" data-line-start=\"41\" data-line-end=\"42\"\u003e\u003cstrong\u003e4. Classification:\u003c/strong\u003e The classification module of VIRify depends on the presence of a minimum number and proportion of phylogenetically-informative genes within each contig in order to confidently assign a taxonomic lineage. Therefore, short contigs typically obtained from metatranscriptome assemblies remain generally unclassified. For targeted classification of RNA viruses (for instance, to search for Coronavirus-related sequences), alternative DNA- or protein-based classification methods can be used. Two of the possible options are: (i) using \u003ca href=\"https://github.com/marbl/MashMap/releases\"\u003eMashMap\u003c/a\u003e to screen the VIRify contigs against a database of RNA viruses (e.g. Coronaviridae) or (ii) using \u003ca href=\"http://hmmer.org/download.html\"\u003ehmmsearch\u003c/a\u003e to screen the proteins obtained in the VIRify contigs against marker genes of the taxon of interest.\u003c/p\u003e\r\n\u003ch2\u003eContact us\u003c/h2\u003e\r\n\u003ca href=\"https://www.ebi.ac.uk/support/metagenomics\"\u003eMGnify helpdesk\u003c/a\u003e","organization":"HoloFood at MGnify, MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/27?version=1","name":"Version 1","author":["Martin Beracochea"],"descriptor_type":["NFL"]}]},{"id":"28","url":"https://workflowhub.eu/workflows/28","name":"var-PE","description":"Analysis of variation within individual COVID-19 samples \r\nusing bowtie2, bwa, fastp, multiqc , picard ,samtools, snpEff \r\nWorkflow, tools and data are available on https://github.com/fjrmoreews/cwl-workflow-SARS-CoV-2/tree/master/Variation\r\nThis worklow was ported into CWL from a  Galaxy Workflow \r\n  ( https://github.com/galaxyproject/SARS-CoV-2/tree/master/genomics/4-Variation migrated to CWL).\r\n\r\n","organization":"CWL workflow SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/28?version=1","name":"Version 1","author":["Camille Juigné"],"descriptor_type":["CWL"]}]},{"id":"29","url":"https://workflowhub.eu/workflows/29","name":"Protein MD Setup tutorial using BioExcel Building Blocks (biobb) in CWL","description":"Common Workflow Language example that illustrate the process of setting up a simulation system containing a protein, step by step, using the [BioExcel Building Blocks](/projects/11) library (biobb). The particular example used is the Lysozyme protein (PDB code 1AKI). This workflow returns a resulting protein structure and simulated 3D trajectories.","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/29?version=1","name":"Version 1","author":["Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/29?version=2","name":"Version 2","author":["Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/29?version=3","name":"Version 3","author":["Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"30","url":"https://workflowhub.eu/workflows/30","name":"Genomic variants - SNPs and INDELs detection using GATK4.","description":"\r\nAuthor: AMBARISH KUMAR er.ambarish@gmail.com \u0026 ambari73_sit@jnu.ac.in\r\n\r\nThis is a proposed standard operating procedure for genomic variant detection using GATK4.\r\n\r\nIt is hoped to be effective and useful for getting SARS-CoV-2 genome variants.\r\n\r\n\r\n\r\nIt uses Illumina RNASEQ reads and genome sequence.\r\n","organization":"CWL workflow SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/30?version=1","name":"Version 1","author":["Ambarish Kumar"],"descriptor_type":["CWL"]}]},{"id":"31","url":"https://workflowhub.eu/workflows/31","name":"Genomic variants - SNPs and INDELs detection using VARSCAN2.","description":"\r\nAuthor: AMBARISH KUMAR er.ambarish@gmail.com; ambari73_sit@jnu.ac.in\r\n\r\nThis is a proposed standard operating procedure for genomic variant detection using VARSCAN.\r\n\r\nIt is hoped to be effective and useful for getting SARS-CoV-2 genome variants.\r\n\r\n\r\n\r\nIt uses Illumina RNASEQ reads and genome sequence.\r\n","organization":"CWL workflow SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/31?version=1","name":"Version 1","author":["Ambarish Kumar"],"descriptor_type":["CWL"]}]},{"id":"33","url":"https://workflowhub.eu/workflows/33","name":"Genomic variants - SNPs and INDELs detection using GATK4 spark based tools.","description":"\r\nAuthor: AMBARISH KUMAR er.ambarish@gmail.com \u0026 ambari73_sit@jnu.ac.in\r\n\r\nThis is a proposed standard operating procedure for genomic variant detection using GATK4.\r\n\r\nIt is hoped to be effective and useful for getting SARS-CoV-2 genome variants.\r\n\r\n\r\n\r\nIt uses Illumina RNASEQ reads and genome sequence.\r\n","organization":"CWL workflow SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/33?version=1","name":"Version 1","author":["Ambarish Kumar"],"descriptor_type":["CWL"]}]},{"id":"34","url":"https://workflowhub.eu/workflows/34","name":"Genomic variants - SNPs and INDELs detection using SAMTools.","description":"\r\nAuthor: AMBARISH KUMAR er.ambarish@gmail.com; ambari73_sit@jnu.ac.in\r\n\r\nThis is a proposed standard operating procedure for genomic variant detection using SAMTools.\r\n\r\nIt is hoped to be effective and useful for getting SARS-CoV-2 genome variants.\r\n\r\n\r\n\r\nIt uses Illumina RNASEQ reads and genome sequence.\r\n","organization":"CWL workflow SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/34?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"35","url":"https://workflowhub.eu/workflows/35","name":"COVID-19: GATK4","description":"Detects SNPs and INDELs.","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/35?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"36","url":"https://workflowhub.eu/workflows/36","name":"COVID-19: VARSCAN","description":"Detects SNPs and INDELs using VARSCAN2.","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/36?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"37","url":"https://workflowhub.eu/workflows/37","name":"Assembly using Tophat2 and annotation (alternate)","description":"Alignment, assembly and annotation of RNQSEQ reads using TOPHAT  (without filtering out host reads).","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/37?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"38","url":"https://workflowhub.eu/workflows/38","name":"Unicycler assembly and annotation","description":"Alignment, assembly RNASEQ reads and annotation of generated transcripts.","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/38?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"39","url":"https://workflowhub.eu/workflows/39","name":"StringTie assembly and annotation","description":"Alignment, assembly and annotation of RNASEQ reads as well as annotation of generated transcripts.","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/39?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"40","url":"https://workflowhub.eu/workflows/40","name":"Assembly using Tophat2 and annotation","description":"Alignment, assembly and annotation of generated transcripts from RNASEQ reads.","organization":"GalaxyProject SARS-CoV-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/40?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"41","url":"https://workflowhub.eu/workflows/41","name":"Nucleoli segmentation using CellProfiler (EOSC-Life D6)","description":"This workflow has been created as part of Demonstrator 6 of the project EOSC-Life (within WP3) and is focused on reusing publicly available RNAi screens to gain insights into the nucleolus biology. The workflow downloads images from the Image Data Resource (IDR), performs object segmentation (of nuclei and nucleoli) and feature extraction of the images and objects identified.\r\n\r\nTutorial: https://training.galaxyproject.org/training-material/topics/imaging/tutorials/tutorial-CP/tutorial.html","organization":"EOSC-Life WP3, Euro-BioImaging, IBISBA Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/41?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/41?version=2","name":"Version 2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"42","url":"https://workflowhub.eu/workflows/42","name":"Climate - Climate 101","description":"The tutorial for this workflow can be found on [Galaxy Training Network](https://training.galaxyproject.org/training-material/topics/climate/tutorials/climate-101/tutorial.html)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/42?version=1","name":"Version 1","author":["Anne Fouilloux"],"descriptor_type":["GALAXY"]}]},{"id":"43","url":"https://workflowhub.eu/workflows/43","name":"NMR pipe","description":"CWL workflow for NMR spectra Peak Picking\r\nThe workflow takes as input a series of 2D 1H 15N HSQC NMR spectra and uses nmrpipe tools to convert the spectra in nmrpipe format and performs an automatic peak picking.\r\nThis test uses a protein MDM2 with different ligands and peptide and generates a peak list with 1H and 15N chemical shift values for each spectrum. The difference among these peak lists can be used to characterize the ligand binding site on the protein.","organization":"NMR Workflow","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/43?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"44","url":"https://workflowhub.eu/workflows/44","name":"nf-core/rnaseq","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-rnaseq_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/rnaseq\" src=\"docs/images/nf-core-rnaseq_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/rnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1400710-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1400710)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rnaseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnaseq-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/rnaseq)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/rnaseq** is a bioinformatics pipeline that can be used to analyse RNA sequencing data obtained from organisms with a reference genome and annotation. It takes a samplesheet and FASTQ files as input, performs quality control (QC), trimming and (pseudo-)alignment, and produces a gene expression matrix and extensive QC report.\n\n![nf-core/rnaseq metro map](docs/images/nf-core-rnaseq_metro_map_grey_animated.svg)\n\n\u003e In case the image above is not loading, please have a look at the [static version](docs/images/nf-core-rnaseq_metro_map_grey.png).\n\n1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html))\n2. Auto-infer strandedness by subsampling and pseudoalignment ([`fq`](https://github.com/stjude-rust-labs/fq), [`Salmon`](https://combine-lab.github.io/salmon/))\n3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n4. UMI extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n5. Adapter and quality trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))\n6. Removal of genome contaminants ([`BBSplit`](http://seqanswers.com/forums/showthread.php?t=41288))\n7. Removal of ribosomal RNA ([`SortMeRNA`](https://github.com/biocore/sortmerna))\n8. Choice of multiple alignment and quantification routes (_For `STAR` the sentieon implementation can be chosen_):\n   1. [`STAR`](https://github.com/alexdobin/STAR) -\u003e [`Salmon`](https://combine-lab.github.io/salmon/)\n   2. [`STAR`](https://github.com/alexdobin/STAR) -\u003e [`RSEM`](https://github.com/deweylab/RSEM)\n   3. [`HiSAT2`](https://ccb.jhu.edu/software/hisat2/index.shtml) -\u003e **NO QUANTIFICATION**\n9. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n10. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n11. Duplicate read marking ([`picard MarkDuplicates`](https://broadinstitute.github.io/picard/))\n12. Transcript assembly and quantification ([`StringTie`](https://ccb.jhu.edu/software/stringtie/))\n13. Create bigWig coverage files ([`BEDTools`](https://github.com/arq5x/bedtools2/), [`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/))\n14. Extensive quality control:\n    1. [`RSeQC`](http://rseqc.sourceforge.net/)\n    2. [`Qualimap`](http://qualimap.bioinfo.cipf.es/)\n    3. [`dupRadar`](https://bioconductor.org/packages/release/bioc/html/dupRadar.html)\n    4. [`Preseq`](http://smithlabresearch.org/software/preseq/)\n    5. [`DESeq2`](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)\n    6. [`Kraken2`](https://ccb.jhu.edu/software/kraken2/) -\u003e [`Bracken`](https://ccb.jhu.edu/software/bracken/) on unaligned sequences; _optional_\n15. Pseudoalignment and quantification ([`Salmon`](https://combine-lab.github.io/salmon/) or ['Kallisto'](https://pachterlab.github.io/kallisto/); _optional_)\n16. Present QC for raw read, alignment, gene biotype, sample similarity, and strand-specificity checks ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/))\n\n\u003e **Note**\n\u003e The SRA download functionality has been removed from the pipeline (`\u003e=3.2`) and ported to an independent workflow called [nf-core/fetchngs](https://nf-co.re/fetchngs). You can provide `--nf_core_pipeline rnaseq` when running nf-core/fetchngs to download and auto-create a samplesheet containing publicly available samples that can be accepted directly as input by this pipeline.\n\n\u003e **Warning**\n\u003e Quantification isn't performed if using `--aligner hisat2` due to the lack of an appropriate option to calculate accurate expression estimates from HISAT2 derived genomic alignments. However, you can use this route if you have a preference for the alignment, QC and other types of downstream analysis compatible with the output of HISAT2.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n**samplesheet.csv**:\n\n```csv\nsample,fastq_1,fastq_2,strandedness\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto\nCONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,auto\nCONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,auto\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end). Rows with the same sample identifier are considered technical replicates and merged automatically. The strandedness refers to the library preparation and will be automatically inferred if set to `auto`.\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/rnaseq \\\n    --input \u003cSAMPLESHEET\u003e \\\n    --outdir \u003cOUTDIR\u003e \\\n    --gtf \u003cGTF\u003e \\\n    --fasta \u003cGENOME FASTA\u003e \\\n    -profile \u003cdocker/singularity/.../institute\u003e\n```\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnaseq/usage) and the [parameter documentation](https://nf-co.re/rnaseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnaseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/rnaseq/output).\n\nThis pipeline quantifies RNA-sequenced reads relative to genes/transcripts in the genome and normalizes the resulting data. It does not compare the samples statistically in order to assign significance in the form of FDR or P-values. For downstream analyses, the output files from this pipeline can be analysed directly in statistical environments like [R](https://www.r-project.org/), [Julia](https://julialang.org/) or via the [nf-core/differentialabundance](https://github.com/nf-core/differentialabundance/) pipeline.\n\n## Online videos\n\nA short talk about the history, current status and functionality on offer in this pipeline was given by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) on [8th February 2022](https://nf-co.re/events/2022/bytesize-32-nf-core-rnaseq) as part of the nf-core/bytesize series.\n\nYou can find numerous talks on the [nf-core events page](https://nf-co.re/events) from various topics including writing pipelines/modules in Nextflow DSL2, using nf-core tooling, running nf-core pipelines as well as more generic content like contributing to Github. Please check them out!\n\n## Credits\n\nThese scripts were originally written for use at the [National Genomics Infrastructure](https://ngisweden.scilifelab.se), part of [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden, by Phil Ewels ([@ewels](https://github.com/ewels)) and Rickard Hammarén ([@Hammarn](https://github.com/Hammarn)).\n\nThe pipeline was re-written in Nextflow DSL2 and is primarily maintained by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/).\n\nThe pipeline workflow diagram was initially designed by Sarah Guinchard ([@G-Sarah](https://github.com/G-Sarah)) and James Fellows Yates ([@jfy133](https://github.com/jfy133)), further modifications where made by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) and Maxime Garcia ([@maxulysse](https://github.com/maxulysse)).\n\nMany thanks to other who have helped out along the way too, including (but not limited to):\n\n- [Alex Peltzer](https://github.com/apeltzer)\n- [Colin Davenport](https://github.com/colindaven)\n- [Denis Moreno](https://github.com/Galithil)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Gregor Sturm](https://github.com/grst)\n- [Jacki Buros Novik](https://github.com/jburos)\n- [Lorena Pantano](https://github.com/lpantano)\n- [Matthias Zepper](https://github.com/MatthiasZepper)\n- [Maxime Garcia](https://github.com/maxulysse)\n- [Olga Botvinnik](https://github.com/olgabot)\n- [@orzechoj](https://github.com/orzechoj)\n- [Paolo Di Tommaso](https://github.com/pditommaso)\n- [Rob Syme](https://github.com/robsyme)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#rnaseq` channel](https://nfcore.slack.com/channels/rnaseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/rnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.1400710](https://doi.org/10.5281/zenodo.1400710)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/44?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/44?version=2","name":"1.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/44?version=3","name":"1.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/44?version=4","name":"1.3","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/44?version=5","name":"1.4","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/44?version=6","name":"1.4.1","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/44?version=7","name":"1.4.2","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/44?version=8","name":"2.0","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/44?version=9","name":"3.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/44?version=10","name":"3.1","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/44?version=11","name":"3.2","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/44?version=12","name":"3.3","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/44?version=13","name":"3.4","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/44?version=14","name":"3.5","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/44?version=15","name":"3.6","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/44?version=16","name":"3.7","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/44?version=17","name":"3.8","author":[],"descriptor_type":["NFL"]},{"id":"18","url":"https://workflowhub.eu/workflows/44?version=18","name":"3.8.1","author":[],"descriptor_type":["NFL"]},{"id":"19","url":"https://workflowhub.eu/workflows/44?version=19","name":"3.9","author":[],"descriptor_type":["NFL"]},{"id":"20","url":"https://workflowhub.eu/workflows/44?version=20","name":"3.10","author":[],"descriptor_type":["NFL"]},{"id":"21","url":"https://workflowhub.eu/workflows/44?version=21","name":"3.10.1","author":[],"descriptor_type":["NFL"]},{"id":"22","url":"https://workflowhub.eu/workflows/44?version=22","name":"3.11.0","author":[],"descriptor_type":["NFL"]},{"id":"23","url":"https://workflowhub.eu/workflows/44?version=23","name":"3.11.1","author":[],"descriptor_type":["NFL"]},{"id":"24","url":"https://workflowhub.eu/workflows/44?version=24","name":"3.11.2","author":[],"descriptor_type":["NFL"]},{"id":"25","url":"https://workflowhub.eu/workflows/44?version=25","name":"3.12.0","author":[],"descriptor_type":["NFL"]},{"id":"26","url":"https://workflowhub.eu/workflows/44?version=26","name":"3.13.1","author":[],"descriptor_type":["NFL"]},{"id":"27","url":"https://workflowhub.eu/workflows/44?version=27","name":"3.13.0","author":[],"descriptor_type":["NFL"]},{"id":"28","url":"https://workflowhub.eu/workflows/44?version=28","name":"3.13.2","author":[],"descriptor_type":["NFL"]},{"id":"29","url":"https://workflowhub.eu/workflows/44?version=29","name":"3.14.0","author":[],"descriptor_type":["NFL"]},{"id":"30","url":"https://workflowhub.eu/workflows/44?version=30","name":"3.15.0","author":[],"descriptor_type":["NFL"]},{"id":"31","url":"https://workflowhub.eu/workflows/44?version=31","name":"3.15.1","author":[],"descriptor_type":["NFL"]},{"id":"32","url":"https://workflowhub.eu/workflows/44?version=32","name":"3.16.0","author":[],"descriptor_type":["NFL"]},{"id":"33","url":"https://workflowhub.eu/workflows/44?version=33","name":"3.16.1","author":[],"descriptor_type":["NFL"]},{"id":"34","url":"https://workflowhub.eu/workflows/44?version=34","name":"3.17.0","author":[],"descriptor_type":["NFL"]},{"id":"35","url":"https://workflowhub.eu/workflows/44?version=35","name":"3.18.0","author":[],"descriptor_type":["NFL"]},{"id":"36","url":"https://workflowhub.eu/workflows/44?version=36","name":"3.19.0","author":[],"descriptor_type":["NFL"]},{"id":"37","url":"https://workflowhub.eu/workflows/44?version=37","name":"3.20.0","author":[],"descriptor_type":["NFL"]}]},{"id":"45","url":"https://workflowhub.eu/workflows/45","name":"NGTax","description":"Amplicon analysis workflow using NG-Tax\r\n\r\n**Steps:**\r\n\r\n* Quality control on the reads\r\n* Execute NGTax for ASV detection and classification\r\n\r\nFor more information about NG-Tax 2.0 have a look at https://doi.org/10.3389/fgene.2019.01366","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/45?version=1","name":"Version 1","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/45?version=2","name":"Version 2","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/45?version=3","name":"Version 3","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"4","url":"https://workflowhub.eu/workflows/45?version=4","name":"Version 4","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"5","url":"https://workflowhub.eu/workflows/45?version=5","name":"Version 5","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"6","url":"https://workflowhub.eu/workflows/45?version=6","name":"Version 6","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"7","url":"https://workflowhub.eu/workflows/45?version=7","name":"Version 7","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]}]},{"id":"46","url":"https://workflowhub.eu/workflows/46","name":"Copernicus Essential Climate Variable - select and plot","description":"Abstract CWL Automatically generated from the Galaxy workflow file: Workflow with Copernicus Essential Climate Variable - select and plot","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/46?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"47","url":"https://workflowhub.eu/workflows/47","name":"RATECE-PLANICA ski station (Slovenia) under CMIP-6 SSP585 condition","description":"Description: SSP-based RCP scenario with high radiative forcing by the end of century. Following approximately RCP8.5 global forcing pathway with SSP5 socioeconomic conditions. Concentration-driven.\r\nRationale: the scenario represents the high end of plausible future pathways. SSP5 is the only SSP with emissions high enough to produce the 8.5 W/m2 level of forcing in 2100.\r\n\r\nThis workflow is answering to the following scientific question:\r\n- Is it worth investing in artificial snowmaking equipment at RATECE-PLANICA?","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/47?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"49","url":"https://workflowhub.eu/workflows/49","name":"Population and community metrics calculation from Biodiversity data","description":"Galaxy-E (ecology.usegalaxy.eu) workflow to calculate species presence / absence, community metrics and compute generalized linear models to identify effects and significativity of these effects on biodiversity.","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/49?version=1","name":"Version 1","author":["Yvan Le Bras","Coline Royaux"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/49?version=2","name":"Version 2","author":["Yvan Le Bras","Coline Royaux"],"descriptor_type":["GALAXY"]}]},{"id":"50","url":"https://workflowhub.eu/workflows/50","name":"ONT --Tutorial-Nanopolish-variants","description":"Basic workflows inspired by the Nanopolish tutorials","organization":"NanoGalaxy","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/50?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"51","url":"https://workflowhub.eu/workflows/51","name":"ONT -- Assembly-Flye-AhrensLab","description":"Genome assembly: Flye-based WF for highly repetitive genomes [Schmid et al. NAR 2018]","organization":"NanoGalaxy","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/51?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"52","url":"https://workflowhub.eu/workflows/52","name":"ONT - Workflow-Wick-et.al.","description":"Genome assembly: Unicycler-based WF for Klebsiella pneumoniae [Wick et al. Microbial genomics 2017]","organization":"NanoGalaxy","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/52?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"53","url":"https://workflowhub.eu/workflows/53","name":"ONT -- Metagenomics-Kraken2-Krona","description":"Metagenomics: taxa classification","organization":"NanoGalaxy","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/53?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"54","url":"https://workflowhub.eu/workflows/54","name":"Jupyter Notebook GMX Notebook Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/54?version=1","name":"Version 1","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/54?version=2","name":"Version 2","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/54?version=3","name":"Version 3","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/54?version=4","name":"Version 4","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/54?version=5","name":"Version 5","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/54?version=6","name":"Version 6","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/54?version=7","name":"Version 7","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]},{"id":"8","url":"https://workflowhub.eu/workflows/54?version=8","name":"Version 8","author":["Genís Bayarri","Adam Hospital","Douglas Lowe"],"descriptor_type":[]}]},{"id":"55","url":"https://workflowhub.eu/workflows/55","name":"Jupyter Notebook Mutation Free Energy Calculations","description":"# Mutation Free Energy Calculations using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\n**Based on the official [pmx tutorial](http://pmx.mpibpc.mpg.de/sardinia2018_tutorial1/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate how to compute a **fast-growth mutation free energy** calculation, step by step, using the BioExcel **Building Blocks library (biobb)**. The particular example used is the **Staphylococcal nuclease** protein (PDB code 1STN), a small, minimal protein, appropriate for a short tutorial.\r\n\r\nThe **non-equilibrium free energy calculation** protocol performs a **fast alchemical transition** in the direction **WT-\u003eMut** and back **Mut-\u003eWT**. The two equilibrium trajectories needed for the tutorial, one for **Wild Type (WT)** and another for the **Mutated (Mut)** protein (Isoleucine 10 to Alanine -I10A-), have already been generated and are included in this example. We will name **WT as stateA** and **Mut as stateB**.\r\n\r\n![](https://raw.githubusercontent.com/bioexcel/biobb_wf_pmx_tutorial/master/biobb_wf_pmx_tutorial/notebooks/schema.png)\r\n\r\nThe tutorial calculates the **free energy difference** in the folded state of a protein. Starting from **two 1ns-length independent equilibrium simulations** (WT and mutant), snapshots are selected to start **fast (50ps) transitions** driving the system in the **forward** (WT to mutant) and **reverse** (mutant to WT) directions, and the **work values** required to perform these transitions are collected. With these values, **Crooks Gaussian Intersection** (CGI), **Bennett Acceptance Ratio** (BAR) and **Jarzynski estimator** methods are used to calculate the **free energy difference** between the two states.\r\n\r\n*Please note that for the sake of disk space this tutorial is using 1ns-length equilibrium trajectories, whereas in the [original example](http://pmx.mpibpc.mpg.de/sardinia2018_tutorial1/eq.mdp) the equilibrium trajectories used were obtained from 10ns-length simulations.*\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/55?version=1","name":"Version 1","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/55?version=2","name":"Version 2","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/55?version=3","name":"Version 3","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/55?version=4","name":"Version 4","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/55?version=5","name":"Version 5","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/55?version=6","name":"Version 6","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/55?version=7","name":"Version 7","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"8","url":"https://workflowhub.eu/workflows/55?version=8","name":"Version 8","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]}]},{"id":"56","url":"https://workflowhub.eu/workflows/56","name":"Jupyter Notebook Protein Ligand Complex MD Setup tutorial","description":"# Protein Ligand Complex MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/complex/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation system** containing a **protein in complex with a ligand**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **T4 lysozyme** L99A/M102Q protein (PDB code 3HTB), in complex with the **2-propylphenol** small molecule (3-letter Code JZ4). \r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/56?version=1","name":"Version 1","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/56?version=2","name":"Version 2","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/56?version=3","name":"Version 3","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/56?version=4","name":"Version 4","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/56?version=5","name":"Version 5","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/56?version=6","name":"Version 6","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/56?version=7","name":"Version 7","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"8","url":"https://workflowhub.eu/workflows/56?version=8","name":"Version 8","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"9","url":"https://workflowhub.eu/workflows/56?version=9","name":"Version 9","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]}]},{"id":"57","url":"https://workflowhub.eu/workflows/57","name":"nf-core/metaboigniter","description":"Pre-processing of mass spectrometry-based metabolomics data","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/57?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/57?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/57?version=3","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/57?version=4","name":"2.0.1","author":[],"descriptor_type":["NFL"]}]},{"id":"58","url":"https://workflowhub.eu/workflows/58","name":"RNA-Seq","description":"# RNA-Seq pipeline\r\nHere we provide the tools to perform paired end or single read RNA-Seq analysis including raw data quality control, differential expression (DE) analysis and functional annotation. As input files you may use either zipped fastq-files (.fastq.gz) or mapped read data (.bam files). In case of paired end reads, corresponding fastq files should be named using *.R1.fastq.gz* and *.R2.fastq.gz* suffixes.\r\n\r\n\r\n## Pipeline Workflow\r\nAll analysis steps are illustrated in the pipeline [flowchart](https://www.draw.io/?lightbox=1\u0026highlight=0000ff\u0026edit=_blank\u0026layers=1\u0026nav=1\u0026title=NGSpipe2go_RNAseq_pipeline.html#R7R1Zk5s489e4Knmwi%2Ft4nDOz2WSyO5OtbPYlJZCw%2BYLBATzXr%2F90AOYQNtgY8Ex2UjuDACG1Wn13ayJfLJ8%2BhGC1%2BBxA5E0kAT5N5MuJJImKJE3IPwE%2BsxZd11nDPHRh8tCm4d59QUmjkLSuXYiiwoNxEHixuyo22oHvIzsutIEwDB6LjzmBV%2FzqCsxRpeHeBl619ZsL40XSKmrm5sYNcueL5NOGlMzPAvbPeRis%2FeR7fuAjdmcJ0m6SOUYLAIPHXJN8NZEvwiCI2V%2FLpwvkEbCmEGPvXdfczYYcIj9u8sLiRdX%2F%2FXT3%2FOR9Xj7NbwX7r3%2FtaTa4%2BDmFBYIYNMllEMaLYB74wLvatJ7T%2BSLSrYCv%2FrdertLn52CFWzZvfQoC3HApksdQHD8nKw%2FWcYCbFvHSS%2B6iJzf%2Bl3Q4U5Or77k7l0%2FJt%2BjFc3rhx%2BFz7iVy%2BT1%2Fb%2FMavUrfi%2BIw%2BJmtM16H8yooE%2BhGwTq0E7g82Ld%2F339bf1xfvvz39Y%2Bv19%2Bjh%2F%2BmcoIgMQjnKN7yoJIAmkA394lkqT6gYInwGPEDIfJA7D4UERMk%2BD3PntssNP4jWWv%2But8IH53FrRF7D4s76%2Fbm%2FvrHy8003XgPwFsnn5pImodncA7dB%2FJFz5379Ib2a01Q9Dyk6J9d4r%2FmyW%2F6mhWWW%2FDAaF9pawnPcqvvAQt559lWugi8IKQPydf0P%2B6iOYEfJ%2FgkamTcIFpQpBQLKEquHNfz8p1K5CfrNL1D9658Pg8BdDEilJrtYOnaCQbNPRBFKTal21qgn43x4gUEcFNT2IZZDyiM0dNWTEjuGmpCaxLaOjU1dv2Yo1RGQoAXOSKlpIt8CPZwkVl80%2BhzpZGfY6BPB9giGUIBWzIWm8MWRTCr2KJ1gS0v3%2BLLr8K%2F%2F328vrsKlyL8%2BuH5j6l8MIt5SwylIT%2BRlaH4CXeNVfG1LzLy4RmRNPGlTfYv2c2k8dr10uFU8aBAVHpACnkopBDlx9sb9zE4%2B3O%2B%2BvLjW%2FDxXBGnEodJ9I8kXJAcdx0S%2FtgD3LcNk8OcCdMrLEDKjsmNaURBeIYfEJXVE4dXX0iTs%2FMlcH0CP3eFPBfzNtx4vp2Ns68Wm5mYsO9AuplO0ku0An5FHCk3ALLublz6TuiDCP2apbCYYQEkeHjOfypdqbQFqMCWFc1GpiVA3QKCaiqm4uiOo5qaIAlT2wRQRbbqiFDSZR0auqBrjgUdVRUECymKAnUgI6vwkUWInMJnFnFMVOczgkjS9dyNF2trhmUQfOEuLSfACIz%2FvP1wT4YuzQN8YXmBRdANRDHCq3idTirCf9%2FdnuF54j92T5iBrfa5FCcAB08qQOdiE2%2B5apCsLEKOBvWmlCK5MbDw27m9JAkQOa7vUh1CEt5Z5M771ptr1PP2AgAjSgujFbIpNZQEOlFiNQng2iMod%2FrzQ1GEGYwLCLkGPswmNyXTdh0sP1AmBDDNJ%2Fttjwl3gINPyF7HBN45HKwZSb3%2B1YXaYooFtcWoai2yztFaROlYOq6yW3jButuK%2FInp6gpreWTyTBZJRUAjbchshwoXXFu5eGMYihWQpcDJQyxtayyKJN%2F7K3AptiUfm0pqYcH0YgeB40RYRCovQjboA2wP5pCaRvr394LWsUvTKOgZG7WjB3VSa6o5DKU4bBt1rXWpSvGs4IkQPNefM5JnBSFE4RQ34wsmIyX7ghJEAW8CNbuTN%2BKT2xNJRgr5yZ5YAQizvqVt%2FKMiw%2FDEFb4waWNyG2LpK5rFTwVLWVmGhKYGRV2Fmg5EYAiKaIgSFiNFxZIMgGQw1R1bMETTAVARJB0CSVJkU9ZsQVRtLGealiqogoNFxkFkyNp5MiCVb2%2BTGGtkwcoi8PlnKn5V0akeV8qsFS0LQLQxduNB2RRZch0KlacylMo9JdXjVWwF8LnSWBEIY1hucdMGvGD405dXVBwB3nPk4lXB%2B1OYMxGMeq6EFPoE6ckSCkwmwM9pYEmcO74VrdgssFQ%2FY1D488tD9O0rnsq7P79Mv319P8vB3eWsRXWU1RausFt9jDRWodAGLh%2Fwzl%2FhSfhYEosYNcM7zs%2BoZhkPxzqhhv2%2FkYGRRt6WIe1s29fItUTecm3gnSUejJhICOepP8NDDnkrwE85HjUGOtT%2Bl5cVHhdujO4xESA9PoZgtY2DtxCRi4Z9WZQ48p5YFfiMo3mB1N%2BSWHNJLF2u0Yhi%2F0Qo%2FGL9j4RTYCpPXGhvR9QqUvZaQUu1DV3XHMlxREUxVUeVbRGopmTqjgMdxZqKGJUtyTItLFxZQFZMXVRUJBkOhIajiwKybSQpUB1G0KqZJQMQn7n9FrJaCVkRlom8zGSyN6Nzy%2F1RYaS1zNGaQ2Pe1eHgQwSgjbc2oSekZzYHISd3vlv77q81xYa1NcXUlawNIReU2cDg%2FbYpH0tMmTP5rysozHPiJJGqHTLbApr0ObkQrTwszcSdLnPWJaGXR5pUO%2Fltu5wi5RhoIticoMCXBSPuEvjEPYQBfJmTB1oIgZLxWwhsLgSmRuSTEQJXXM69xMPHGjJlzsJqI%2Fex9indQeSelruHkT6eJrvqjLJnn%2Fk9dkl3f22cYzx3SUUM4VKdVbltUXlqr6ky8kBuGnxR9ZwKuW3l08x3NHsAYdTArWxZqmBAKIuyqEhYaIVAFIGlSw6CyNEgmiITmhDaClINBe9QoBu6ZGL6AQSkmo4sQQgVU5Hso0uqmYdvI6funC0DWd1jjWXXRONogybHQYoSR12FwYMLCYLnfYY8ZN%2FKYukIKHRv8YsFIBVmzvkue43MleubFN4Fq3jWXjrLgXMnl5a2c%2Bnx8eMs6WEXPzb11mR5f3bMi%2F3p1W2ZfnC0fkuluIrFDo7nt5R%2B%2By0369W%2F%2FMPHBimNhE6wQUv3T9oFk%2BiStzZIUe3IKMYviErJ6spmXOnoLAzBc%2B6xFXkg2jJgVeMOuHZcpefTcW2Qm42gW1TnxU6MUZ60A99x5%2BuQ5mzkg454tq1RiJCgkVSGBflZ6K1mPiFltZLZdCOdQfRAMvquGUBIO3lsxi5rBTL%2BQ7XiWI0dkRt%2BGfKnXydB9wGSVYjA0sKiay08OE%2FsYVftSQbtEav4xukg8OpBWbpbC8aq2NkCywYT%2BCsNNOqTUKTHIPxJAghp3z6YU%2Bl7m9zNazukIUSM5UUtvvuuuwFiQIC1RxiYm4wggWXkrcOCF4BELrjEuOoH5HEfISIo4cXb%2FRGCv0noY5LTlXbrBTbRvXIL2Ghe7ydpiCfvLkfNyihF9iJBeGKCwQNImBFWusji%2F0SI4OQGMFg%2BjV1%2FHu2MsihpX6NWplRZ52gBHDVA1EoSWmfu7DSgcUDVSWkLxVenOnGT1MXqQgyapD6MMtVVLlnjZDLtKDpYW41HF4wC1kmatlXjKT8vG%2BohGs8uA3mmKVyDKP77gkoBTWzTv9aYCMfPkyTijhJ%2F6isMwSOFKU0iIPPAktPPZHN0aX3Fj%2BH%2FO%2FjGL3vGmjN5a6exjiFRrbGORyZLPCfx33%2BiV5eyVsNpaJZ20rEw4eT1HcSE1DJuyZzUeo1DUTWhHssPMOnxU2sPJn49kbrjkSylIclSjmM2akuylFIOvpxYmepIVuV5xZwcQLKaodWOPArkWcHjb5Saqp3XbekEpRRVOj6K7CgO9BtFEqpjjhNFxD5QZHvdhzeCIsd2U6hiaWlTZ1LH3oUKI9J6YESpQjL%2B0iGd1nlpTF06V8MOK%2BazPd7rJDZ8ppNnF6xTaZdS3sdy68OQFE3rh6QoRrckhWsw0oekKCNztXeHmnLncs6eQYmZWeX%2B69ldY4vLHUrcKqsVDTs%2Fom0lc3JFGKjtjStKDqzHNK4ct4SDlNZrGMiYwodt1ciPF2t6T1ZL2EQGlKgHiTXIQTkJNShapwgUy6HkSxdCSlxKC7JpuUvAIE9qygnSipI04J3t%2FITe4mGp5%2FgfBt0FIRfqJfGIqefi5hr%2FI4%2BH8UXg466BS1cWYWx9RFE8qVZA7AAHZIN8vIAFSgUJuH4dWWmNBA1XvL6YwG5fLy2cknh8hT2CT76s49Wa5sMEHiQoc1ahYfjmL3ucoSeZuzLIpuEynyXL6dmMu0n05%2BEpF3mETUt4XoOl6xFcu0HeAyK9TvpwX%2BpKsSirrKrN8HwPYtcQz%2FWR4blQQfUQRWsvHmmcVT5zLdo2nGN8HLqOg8Is9Bs9rTCsIlYErAq1Xob04QsZiB%2B69mKJ%2FM33x7vVj7CtVaEqw3C39dGS7FN9e8Tb2gvmI93TeGTTlGXZgedhYZHGCDH2hYC9mGyCcl49MityjzwqejGMp2vBVeX%2FxPnl8i5a3vijO22gnaq%2Bs%2F4vvko%2BScTpxICCQpdm0dxnCetdK%2FBpqPlOBd4czJS4ddyjIW1VyTxERIEaKXXbN7nw8opNa3a3hPleKzmF0BINZDuSbkuOYQFoObZkAmSqOhJMQZyKliWZJoIahrJsAMG0LUMVBc2EDrB1UVQUAxi6WvzIMXIKabQw%2Fp0t13W0cP3nH6wK7Q%2FWjDffDxZleV0HAQa90t22IdxtY9d5CzSj48fDmC2Qt0JhNLvbtlJANGwk6tAwdFs2bVGWbVWWoQmRIUBoy1PbkgQJiQCJoi7aUHdMVdaBaMqGZTqKSRZMEnTdGuFK7QBFumT8x8Yafv8XHiim9BmFIXQoJIDMlwKmksssh0FZkO%2FnACsMhFBTDBVYGQ%2FM1mMiz9jrKKZdbaq8bXI5cwHHeHmjBSZ%2BWc2zNIR6z6Di16fUa5wqtPygZFk6nEfyg78OL6%2BQl5gSo2L%2FIlPy4Y28dFS%2F%2FJHiwDixpsX0wTRwa5cTrcOwU9cnhoOnFRUz%2FbixNwQSarBkhIZYhylNSLO%2FWWAqDUf1XCsECSIf3V1Smkxrz4nSV1jqccNQZXFYzwmfEh2evzwcJSq5%2F9uSpQMokToMJWoaIdQhJao0RGuLhLFLWLaLn1eIr0W19N5SKy2rGsbK02LaQYSZ5xXqh0SVJ9WeRqmvg0aZI6RRqRNx%2FKFkYzhqTGlaHF4Z7KwxbnSPNuQin1p0T%2BNFlo8TeNY6XiylF1n61o7cidLz6mG5E425XcIHfjC34aFczEEgXof9srD2nEs7Sc5VjkuS1RFyrh3VgcYRVju6XNbUXrOTuKXVecYTulhpsMDygljHyEnlhwjKNt7btDar5c6%2FufM%2Bkkjx0O1k6O2Jiv4qiIpqGrOqd3VwsiKfAlnpVNwxBhN3DqUIH778uMrCbRpv902ETtHin6u6PUe09E4fokVhDu1pgfE6aMHA5rvzG1G8mL%2B4yvJGhnfXcPnpCU5PpvrgcKa646Rx7jTVZTFm%2FZnq4Hp1ByAI9%2FIW4Jdp%2FfckOpEKG33QFvzdkAy6PVkxT5KslC1uysD5FPzSPIcnb55caZ4CheqSAqlNA7lGWppH3WGrGao0DxGAzgP4jPUrqTHJC1NTTaqTCeSvRJwiH0T%2BnMRb9EH6yCfJYRh4MFJr8qfmz6I%2FXfKnjtFsIw9anOfkHA6Nk6CNnpSzZkzuZCowjcLh0DideJwOB0XbzsSGcjhcXv24vLpHv5pzsMstSUUbS0EvojvCv9tzLuV1JEIr8rCca6tFfDTB99W8ojgE9s%2BRxt4z4zqZcjLIxOD24EaksGOUqMdvMWWOawk%2FWp4RP1bt1YcINMhGasHUj37MRqnas14%2BHaPxMRumVOior2M29OKxGeM4ZsMY9TEbd4gKHzbRlT8gP1jy3ZoNSWT%2FeVW0tgw5CB6iJ5o0Oi9Mopzlk0VwCLSWv0vpP%2F3GPHb6Ti%2BpJIxsStAnuSL5OvP0rPtdh6vtdeDXqEvO62K15LyYRjUWsjuEDvgUfwcPXnI%2BoyFvt%2BQ8P1N5e13L3u3aRVNPJkb0LVMMlOFscoIYucsmjivD2ayvPURPSucQfit4IiefUzgS%2Bm4FIdaQprg54wH156ZbWBeZUyxMVSoZKeQneyI7EJ3clrbxjpzhIUmHpWi5XRt7a%2Be%2FM5AU6yPhX%2B8wiuNVY7UmWF5nMak4zdX8iQWF8P3mGksO1FaT1XxhSViAohrvyACWt5WL%2B4hitIryJ8M0Odv7WKePt%2Bz%2FjQxs0upo81M4qdxMi4emibMm5xyF1OLUuWjFNwEMWgN0P89M5%2By6Sx4simqVCQ96dnkzHpyNexBL5x9%2Bkyoj4BGCGIzT0klPiJlsSv%2B9e3FXKwTfbxtZLzqtD5YshQJzwweiu1LHBjPDroDLuC9KRKGmNQ16HH8ElisPibM7cUZhPJu%2FjHuc0h7jPGnTdslgkF7urAZxNHtB6kgasdOGlFcuFBEZESmzwHJSKvD4plwzvIrM%2FRaAO%2FnC7HqfoRSpH3l3vEznhxQdmsxAqzHdJUWemgYsJAAhHJ3Zqikvx3v7JwweWSlUViyJjProWZJJPSkyg9aRC2Ye%2FqcbuaArVYLRZ%2BTCjfDRWdwasfewuLNub%2B6vf7zcTOsDFyqoRegsl4%2BUDUwKj118QnMiPm5Ql3VXYRdWLbtI6sbnFolb451wiVJTpdR8bWl5HlYUSWYnnEQuchK5ghdKGq%2FJcwMcwki4SCBXkIAeRj%2FZlHNt4EbZveW6AJ1Rio8wqluK50Epe7Y7A90hXmOtLEPm7K0pNDeNFK48BQNrFESBsyLyK1fxbrM4g8uKewqIbYxxR0CuzNiRxy6Fh13qkbCrT4%2FmVvR%2Buw5NLlhO0BOVGMOvmVNly3Z8a16nrcB4s26V0Q5sQH9PByym7NvhyS88DtPFgT5cSjbkOScuM%2BdfByVb2Cij2N6AhauMnKJYPW6KL%2F9oR8LO%2BuM6Mkv5HzEGp50PbN8Y0el6riOaaJggW1aX%2BdUql2UhVpOaLeLRtMuqmf2S5fxSsxNiQZOQJMETGYMXl0HDMN7QkvEsy%2BnBvgXfiHn4knEtyydT36K7HL1qgGHtyuZNzd9N7%2BNX948r6%2F7%2BKXgOnj9%2B%2F%2FLPtKmlOT2OaOjM83Isr1zIJN%2F5vKL1k3meHDpEbRyfyDFJjat4Batnuh7RT%2FIJesKSgNEvF2uWNKYSyBFt4ks3ImoN8FGwJtfJrKjFLbONS1zreD2ynZxxvJyQrgvDFvzipipzzC0Dxy23oo34IgsnLtPLtod17%2BmJ42Js05zm1H8yksR1aWTVWTrEhhL3lPtHhqHz1o3iQbspi%2BOytOrx72ViVpal9y5C1UxcS%2FW0V4iZJTo1MJniQ38oKrVt1I1cBuvQez4nKdco3s30N%2FjCrmKWlS1fTs2WkG8uJSi6XDoBO9uoBd2oKibIegc%2BGf5yH17A5IhFuqS21UFbVQhstJn62SQ1qkm5HKB5nJRmXSyqQJrZjwr0ee3F7t8XzYOA1sslCAkKSQJ%2BLYvX26Hf4CHMQm8184n7rFbHmW70HIgekFdQc1gx4yUZ7y97W8hPPbqcnFJTjvjhHXnWp1KzNauv1nlZDT0I6UjbGcULbsQSvcxBnSL7eebfTA11E0m%2Bpv9xaVbePq6VaViRT%2BWtf7jTK4381JkK5yGALl7%2BUjNGM5IOQingnCSHpMR0ASCllQKPIXaAT6YszLRiLQRFUbKmPAtUjZnICRkSDXV2LEYo1aeC1CFSbQwLfWvtlVu8zJb%2Fd9ksTA9i9AKW1sfCGWmJQY9Roi2Y6VWi7DctyZESJO6apUNEZFWjuOhh2NbDPxf3hM6mRRoEjBiPzANRrMCz79B%2BrYEfk2PldhfF2dbbp%2FQkOhom5BEhgfG%2FCEXRshg0tHVU9MQomjJF14BUxcJvFwa3o4sPrAQkc%2BrmSkQ2evme5lOkggOCPqIDwbt%2BTQ7iYyeUW6Tjz5f3OZRhqZ4LBGK81vsvyraqYO8ur95POGXEk3MB6cgoMoMo3n8EV7tLlmf5r3t%2B4gxCWoEDeMVPuL7trSHbJ5TRp%2BkKdC%2B2%2FR5urBKAHYzkcBdQIi4M6wAySwGhilgNC5BEns9uj7hGfImlsDgvmuJpLT4HkEidV%2F8H). Specify the desired analysis details for your data in the *essential.vars.groovy* file (see below) and run the pipeline *rnaseq.pipeline.groovy* as described [here](https://gitlab.rlp.net/imbforge/NGSpipe2go/-/blob/master/README.md). A markdown file *DEreport.Rmd* will be generated in the output reports folder after running the pipeline. Subsequently, the *DEreport.Rmd* file can be converted to a final html report using the *knitr* R-package.\r\n\r\n\r\n### The pipelines includes\r\n- quality control of rawdata with FastQC and MultiQC\r\n- Read mapping to the reference genome using STAR\r\n- generation of bigWig tracks for visualisation of alignment with deeptools\r\n- Characterization of insert size for paired-end libraries\r\n- Read quantification with featureCounts (Subread) \r\n- Library complexity assessment with dupRadar\r\n- RNA class representation\r\n- Check for strand specificity\r\n- Visualization of gene body coverage\r\n- Illustration of sample relatedness with MDS plots and heatmaps\r\n- Differential Expression Analysis for depicted group comparisons with DESeq2\r\n- Enrichment analysis for DE results with clusterProfiler and ReactomePA\r\n- Additional DE analysis including multimapped reads\r\n\r\n\r\n### Pipeline parameter settings\r\n- targets.txt: tab-separated txt-file giving information about the analysed samples. The following columns are required \r\n  - sample: sample identifier for use in plots and and tables\r\n  - file: read counts file name (a unique sub-string of the file name is sufficient, this sub-string is grebbed against the count file names produced by the pipeline) \r\n  - group: variable for sample grouping (e.g. by condition)\r\n  - replicate: replicate number of samples belonging to the same group\r\n- contrasts.txt: indicate intended group comparisions for differential expression analysis, e.g. *KOvsWT=(KO-WT)* if targets.txt contains the groups *KO* and *WT*. Give 1 contrast per line.  \r\n- essential.vars.groovy: essential parameter describing the experiment including: \r\n  - ESSENTIAL_PROJECT: your project folder name\r\n  - ESSENTIAL_STAR_REF: path to STAR indexed reference genome\r\n  - ESSENTIAL_GENESGTF: genome annotation file in gtf-format\r\n  - ESSENTIAL_PAIRED: either paired end (\"yes\") or single read (\"no\") design\r\n  - ESSENTIAL_STRANDED: strandness of library (no|yes|reverse)\r\n  - ESSENTIAL_ORG: UCSC organism name\r\n  - ESSENTIAL_READLENGTH: read length of library\r\n  - ESSENTIAL_THREADS: number of threads for parallel tasks\r\n- additional (more specialized) parameter can be given in the var.groovy-files of the individual pipeline modules \r\n\r\n\r\n## Programs required\r\n- Bedtools\r\n- DEseq2\r\n- deeptools\r\n- dupRadar (provided by another project from imbforge)\r\n- FastQC\r\n- MultiQC\r\n- Picard\r\n- R packages DESeq2, clusterProfiler, ReactomePA\r\n- RSeQC\r\n- Samtools\r\n- STAR\r\n- Subread\r\n- UCSC utilities\r\n","organization":"IMBforge","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/58?version=1","name":"Version 1","author":["Sergi Sayols"],"descriptor_type":[]}]},{"id":"59","url":"https://workflowhub.eu/workflows/59","name":"ChIP-seq","description":"# ChIP-Seq pipeline\r\nHere we provide the tools to perform paired end or single read ChIP-Seq analysis including raw data quality control, read mapping, peak calling, differential binding analysis and functional annotation. As input files you may use either zipped fastq-files (.fastq.gz) or mapped read data (.bam files). In case of paired end reads, corresponding fastq files should be named using *.R1.fastq.gz* and *.R2.fastq.gz* suffixes.\r\n\r\n\r\n## Pipeline Workflow\r\nAll analysis steps are illustrated in the pipeline [flowchart](https://www.draw.io/?lightbox=1\u0026highlight=0000ff\u0026edit=_blank\u0026layers=1\u0026nav=1\u0026title=NGSpipe2go_ChIPseq_pipeline.html#R7R1Zc6M489e4KvNgF4fPx0kyzmSPTCbJfLOzLykMsq0NBgLYOX79p5NTYLC5kkyyNRuEAKnV6m712VPPNs8Xruas%2F7YNYPYUyXjuqec9RZGHitLD%2F0nGC22ZDGe0YeVCg3UKG27hK2CNEmvdQgN4sY6%2BbZs%2BdOKNum1ZQPdjbZrr2k%2FxbkvbjH%2FV0VYg1XCra2a69Sc0%2FDVrlcez8MZXAFdr9umpMqE3Fpr%2BsHLtrcW%2BZ9kWoHc2Gn8Nm6O31gz7KdKkfumpZ65t%2B%2FSvzfMZMDFYOcToc%2FOMu8GQXWD5RR44%2F%2B%2FFO%2F%2Fxv8eH1%2FXZ5Hopg0tf6geD8184LICBQMMubddf2yvb0swvYespmS%2FAr5XQ1X%2FbjcP7rzQHtYRP%2FWXbqOFcxt2A77%2Bwlde2vo2a1v7GZHfBM%2FT%2FwS8cjNjVr8id82f2LXLxwi8s332JPIQvf0XvhY%2BRK%2F6c52uuz0Yyxtf21tXBNXDhBvjAvXU0HVqr4FEj0hU9uQJ%2BRlfPd%2B2HAH%2FQ%2Bp6ml4itGv0kbdrpV99vf27%2F2J6%2F%2Fnt3eTf%2F5e3%2B7asM8egHczoO2QLiVYt8gqHABbDRQN0X1MEFpubDXRzhNbZvVkG%2FEIHQHwyHxPj0Vfpjub6a%2BuZufbO4%2Bno7v3%2F92ucbeqeZW%2FapnjI20QxODbjDXzThyiI3xo9bjPqnLtlWwSX6a8X%2BTx5buMkWNDDyLt6awN8IVpnaApinwRY9s03bJZ3UOfkRLtrStjh2yHjNDc1bE2SXY6iPr5bQNKMvlfBv8FJ%2Bh9AE9XTlagZEiJBo1u0N1BkGrUzN8zg2cXIhkc%2F6aPFsDLj%2BTMrDrB1wffCciwns7nSsDpQRfYqRbbR806DtKUIIJ2xZ1xEaOJSGg9HxaCTEavlD49GXMf6tA4%2FgBjGyz55DeahcDSIpUzmGRfJISmHQUJqlMWjM0eoY%2FHn96Z%2FfSf%2F8%2B8f85ou7kY27i5fLvno0V%2Bs8D6uQ1xRkNeqwalZDHv3sutpLpINjQ8v3Im%2B%2Bxg0hug2nihDd5hn91Uluf%2FQHHUGIbcFUCiHg%2FGasX%2Fxzvr4Bfz3qy18vd9%2FOXvvy6L1jYIyctYKOajfRkZ2FstAx1X%2BYj76jmZTX%2F2j0zVuDLO6b4KzJBsz5YrjPeTK%2B0fcI9n5GHeSh8yxg2GdK73OUSdPXpXh31Z9NzuZsfXndvwWP6Ilr6AATIjaLhnaaOwahlJExAQEoqWxz5AyOg0OfbCnoawv0NNrBfOrocA%2BW0IJEDEXog%2F%2FZM8dqFsRzNKtIm4alCxcsY19a%2Bz5WIXzG%2B0WZr6CPRLmBazoDC9EWZQ43i6WN6Az68%2BriFs9VWdnooo%2FX1rQXeJXADis75hwQiBTMMWp4GDPm%2Bho66K8BvztAEqK9e0nPJrsjh6AmQBHRPLObG8azTqOwaWuGhxU0GOBYHWMbWxMvnqRZBkZxz0OcCWom5VIaOc9XtIsr2IDPQN%2F6ZLiRDYhQFb8WWiuyNU888An9jzQ6GnQBmReZ3YlDbhnAw6ek4rPKPgJVck5IcDI1fUyQBQdNmZ8dKj9mDgVsLgECdHxy8J%2FoeOWggxaePBXCuLAz5Q2BfnAoBFcuny0MQzkFMkUAMd5WTvJJiR79hHpgEn%2BBvVx6mIomFqEC8aNd6Zn%2F%2FSsmSe%2BTnmOycyhKN3B%2Bk5WiEnPVAnPG5vvhAffb4j%2BsZECEGOtOBDKkgDwu7GdMHYlGFdPHhe0awO2jZixrYBBxxCfUU0JYPgruRDXxn4lsooIh%2Fg16OJphBO9W8thPK6IEXT5v4D8LtFjxm3kig4AfcVkuDfFscCZZFdhEByXpaLG9QPsd3pJSvQKoR3op2aD3F7bxkmpMcS7f4C2X17n8LeyYfpS3wOi7ECJt8PwhAZmFBIPI22ChL%2BDG9Ihz5xD9TkXz8LSNk5qD8N11zerq%2BsddhZOynK2ftz6Nz63iRWPza33dMBV1Dp0Fe5gNXrKXkRlVMXDcKCIRhIASMheVZfOlDCXC%2FphYEpE4sFAGdc38zCwNPhYjTrndwQRL%2FBkb9VqaRL%2BOlfi9mEBRhcycUK3LM4EAKJKZuVBQgm2jywjnLiGuKdPf4lpxcY0fMt6MuOYIRYcNGj60qHQgOaGERtv7ZLfge%2BPIPYT0fp%2FtoM9EPrDoqXufLu46PASnDuuF1XBOsm2d6nXQVCkpwDenYqHyNKIq64zmKlCADHaa62XqrbK6pUVQJnXnLECG2iq1LvWsQoJROa69gwbYpwoScavo5E4D3dIVejAGpBhABN%2Blj%2FWxNRYuoR7%2FunRiO%2F7gU9FhiIC5lwEq%2BQywDKt7WkMfYHcY%2FOyTS9yAqmd%2Fw6FI%2FyFgf7PyjgmHcz%2BRsaRRLRL%2FYGfVSMP4KtagRhI6t8lyaiHenVzSlCtbzNx7uPCjFrbujjsi%2FQSyxVzz%2FO9nhKoXETwet4h%2B%2BniEuo3WzDb5acjVnsiwiX0CDxbx5oce07cneP56uxggmpHD7xmv36DBYbYxD%2BwcuBvW76Mbj%2FqANgfsey9roOuUyRpE9CzBLph26C9yda6OM5gE8ZJiL5YqPz6NuOmAn5%2FG0xTlk8dymvSNy3smFWEgQmclfqbLIFPAXNhPrVOo%2BijCsCBF4F5lrft7xK1YQ3b6rcofQ4gie9y0f6MIs%2BfNOoki6rQJFMkXdj4IilS88mlvLDlxIpETRlEmVtGnwjU%2FGoVG1aLQXOS02KoaT4qh0eQgcVndKy5X6KZYmChVjppFfR6Eiyz%2F9o0%2BcpWFYG3quFJskdXjF7nSsK44j5h0X0lfeHerHRE5xnF%2BobCDT6YXcrI%2FO2%2FXyl%2F4mb6bWHkYUu5jOfW5yRelTB1B0KTz32yYj6CJ%2Fspo1gCC5p%2FEfyNoPQLS5F1g6LhiRYAQQ4f5ioC3gaFyHEOVfYy9AyhauXjXEooqx6BoYVX5wn7yIVAK68p5S8R38QZoRg8bdx0HmyX2WJaz4hBo04lHXJGwVdWhgSzYLf5T1vN7dPIH2%2BFD1XxohWeQKq%2BfH0awpE79fMXuTDMljsBjtgNj%2BniBKbImfXyu6UkQ%2FbbfM4GEdDD%2FBOkA15dvW5%2B6AeIcJqhDuIUi%2B0d61PM2Vnu%2BLhSN8WYLpgFp1A2NXQnHXcR54XhnvGgMPo%2FJn2sbaGL8%2BwrMHcBv7TXhyjAZTmOor0rjNOrz7XEk6hfE80nH8FxKoboLvK2JOWIXkf0EIRmaGzA%2BEcaiPYTo3qxv0zlcLoEb%2BjDRoZjQ8xsfyjX9tmZZPGNJx3Z3DTt5OBZkTBHtZB75Xf1OnnZ%2BJ5v2qqPbGI2sz7mUbpsmkhZIJCXlWEDT1z3uq%2FcBkHmsFkTmKtjSn7uLe%2Bfs7od%2FtQHj6832yr%2B%2Fajd3SxNOW4efV%2Fnpf%2B95ddaaOSJ33J2hT2mJ2gUOQpuOkqi4C3nN7uI48SI%2BpAYQmXtraL3cswwG97QdHR3vcU92mqWNg5uNkR596n5m%2BGLSsbrLMx6QPujWYA1MB7je4EY8dXHHvBDOdhzi8RohmhTsBbxjXOx7GEtBEJVvCweltTMhxLPh8oWwMh%2FzcH3r%2BXQ%2B3M0%2BTAfhI%2FTy1ohA4CaI3TQtADBHKTyxd39gncgFJQNZqiAt4Fyo%2FT7eobt97XdJaaE25ffw7bpiX1qoJ9snBVXMeIo4axF%2BEEMHP8sJ3FjbYESwFh7dOvEQ49TXaXIV0UeZmFOl8jjTsZtOBc%2BjtPJ42JRzd2TLJFJcAvLTS6a4RHfUsTpTjV4kcWVNvuAqv47Ss5EoTWU9umdxyMrwvZ9%2BEvSs%2B6mYh6M3SyedtWYhkYcoBIsSyu9nWOJCI4C6R6kfpY70X0wcoWVQk5y%2BRoKUjoAPPT%2FonaCVXhat3EMoj4l8Cc1rEQCUp5Kj31QS%2B64LLHQNUkmhHMj1nd2UAw90b5y0JgiOCxK46h11jiVw4NkHLOFTEdr2hXan0Xf41cBakaMhWlg0wN2KiITaagOsiJDYDaJGpT464fK0bNw8LYtRpvKErQJalvQ2GEuTNC1r0NtAHLfzFqL%2FqnPJrijeh5OivTSL56Zv2XNroiY9sY7K4lzcE0vbnGEtE6l%2FU5BK6og6%2BiQBle1uEBnwiDlsAVdPEBNIH8l8D02ELqOx62zs5QnepHmCV4P0Neb5UFuiWD8t9eHp9PHu35%2FX9zfWn456ubv%2FXVyhFKGapgmVEKqV06mjNK0812s3JewDBewmA4hE657rhdW2a3EiIGi4z7U40X%2FEMKZuhoaP8vcR96LCTA1Res2FHkleT20%2BO%2BjhHB2vzE%2BpGYVEfPjl%2Bdq0W4L8coR%2FRYL8mPz0qAkrKuCTn3oE%2FGHL7sRila7SJr%2F8rdLNyRPxFlW6C70wzbs%2Bu0FvO7V93wQW0LFj5pkNlkuoQ8DSEzRA8Rblsw%2FR9fnoqtfhSGBwb91A9e5T8785ajYq6hXYPWoGHQQIqoQtStQc08ZvpSnO%2BcPNkLJwtKUp2kjqNU7RaiBJI255aYkkiYs9vvtae5XKPoVjXqcNUYuCGolO2%2FzegkaicEqTjmok5HIpTZrSSGw03Sse6syCs3TNNIkTRSOMiwyxvBT%2BPmKM1anAz6vtGGNulupMRETays3tPF0MiFjA1c%2BYMYpFazGVnhcLPexuqHEF2J700h6NBTbuuuK3FpOtcWFf%2FeOATf%2F7zcyf6Rfj%2Frs%2FIFbJlKeFmXJbwljusA8iX7VXC7kBJBxaxzbsC2DZsRJeOby5fbpGw55Ov%2F28u%2FxC%2BDPaFc%2FE%2FL6KzSMZKMXvSjjdN46WQnx%2FSyHQdBAOmQC0cTmLaBQ42h36A%2FZCoKWh8mOI2q4Iwjz%2FTVsnoycxQXgO%2B4qhHFSio2MVN%2BLC00SYpUJw4Jfl2mjNSEBrEoCrt75GQO0%2Bbn0NYaR2q7EKcb1LwNxL6V2CRGQNcPqZwMtWqOjgWv%2BOsPpZdjakztdxDXh9JASZoFL%2BoeajlVClIInnbEL%2FO0FIbgBaVpxFAidDuXlo7YMFffdTeL2EFklFE%2BTvoaVPNIJuosoo1CNDQ0%2B9eNAjgAaON4gsSpEqlAWLcPbKVsQs%2Bf4PMrBeqVKclVbWrEn4mfFSMLkhz8IS9XWFPKdlndZO028gbbcsFw7W6xSTDcbdij6QVx7OT5CiPRkaPlt2UR9Iqlz1wmyDJ6%2FQcUhitpbPlOisy2IlbWuHz47kLEyVlY4GKXNNhTB1J4cGLd0sD27kAYHxYPXa7XEqB4zzTSuAlTjLkkVHT1GFzFlNpIznUO%2BwZQMnFY6ldukQJQtTTxRWkb1p9E3aLxBYBCKXwH4xqk3iOt6A0cHg4ialsJlcUAjjeeG645BGcnXdsDxgRW36DCCYyVP1MWHvaL8%2FGPaT1QsTauFRV2Hsz8sozrON4RmUNvnPovB%2Fuyb%2FybBdk%2F9X6Y%2Fl%2Bmrqm7v1zeLq6%2B38%2FvUrD%2FAS8MUUamHaK%2BQtSZXSUMRC%2FgIrEuUeoC59XYqFLDJZCGa38UWKe0FbaPF7cZdp1sSxgTFo9TTJjTbQMAjFFGFFnIpWwV24byyXjtQUXgxF0pFSHi%2BKIUG6VvgC7%2B5eJMNsT1GfP%2FdoemmTnA9IPjrbwYcHzfyUWq3DdmUV0E0WKRTsuqEAusO6oHuMVXqcFD0jSlgOzbCRwFV0LImlEoskTgwXp3UR80C5sox%2BrgrkmsRdj7nabS9yVSAXCpGrSTNkLnZ%2FXCukECxv0D7F1ONzamjJ2Y0fzRKVC4wPa2jp7MBatABVwGFmk8PEF3WYTRuP4jBt1mOB1AYwtxMatE46yX0AvVgSOQuLP%2BOakDO7xEigXb%2F0ETT1qMt4qHgny7n1MEZJDNeCFNvv9vQ5ies25aIUprbjZ1o3f06jXYleClBHRwNBjOR%2FEjlqEL%2BM97xkcSlWFmiSFOGSVWBPEbrTv%2FuEwAdqnoXAKqp4bsr6v0%2Fv3AFn%2BeuojsJawtXW5U7PXbSPxQuDoFOLlfxqzcVCKJBwO%2B42oJehK3jSnCfsxGEoKv4hmlKz3vtNA9JxgbZBpwUvE4qCHnkgbMcFIQ48kddBqqFmCHP1Y9xARKrdZII6cTcPzN3x%2Fkg1GGBJSdqT7T6YNislbGkrIn%2FumcP%2BdSvR4ALKtLwS3z2pboAIENrWJKUv2AgYLD1z68a0QDjtOy18Y0dlc8nb%2FxFSWgM8Ax3JEW5snXEEjRlbwELzwp7E1CNYdDeM0wnKzQYEIniQVmqKhfCcELPsAwAYJ0PAeEgSgtbK%2BzTYM7zE0TJXasu3o3bA1TXhNjQapuN8hH5D8kgpLQUVMZkKoagIzpx1qdzz5MkPrHKfC%2Bsw5eesfpO5OJo8phTODVp5CpaDMnEkk1dPWMadrEwcqWTX41ECC%2BvJxMGqtxKz61%2B43mzx5KDOC1kPjxROJqVqaU2AQB3KGrlWVOnVlrZjAz1satEsYG89IieTWZGTRODRowh9erKR7c259IzkOIOaSIJQjAZdesR0MM2PWqWD0qAUJUQXQXq6JHWU26OOohgOIfh5cvSOZKjiBfXePzYobaNDrttpV9ChYwnLKkSHhOiktowNeTJW88iQO%2BxCR5mta76c4sRGeIL7eG2IMfSK50U%2F789Kwr44cx5ORoNR%2FOgxkwaiWMk0f1YntWUIfIenkrr89vNKULzVBIGjSaymTmcSBBpwucRFEwsfR%2FADOIERzjTTixZcDOPj95w%2FqigzG2qM%2BQTywgmyEarZo0d19QmqTu%2BdDFIYqTMRyRSeacqTTMGZZh%2FCdtZdsmZzSYD7xPYRwXfixoZpoXcfbAH%2FWeD3ndErz3jy0Tw7b%2BEGmppLyNYDCLiMR0GFjQ8LomdhCUTwaGi8A8aQMGcq8x9aYt3MSWCjoAFXGPoZ5FKyLfMFK%2FilNLWNLEqF%2FpiBAeEaDZ7iJ8uG0gsSrLCkKonp5YxPZJWoa8xndDhk4DhjW4%2FlcGlktL1SjqX5tWJzeVBnU5EocWahKoqAWYgtNNJkUD4G4je%2FqIRfEEc5dH8fx8js95tn5NClvXT%2BjJB65stEAEwoFmU7uB13QALsgE79z2877%2BcdGv%2FJn9%2F6P%2B%2F2WoDroLUXCNmdXpAVE%2B9Zlm0yxiE7RlXza6O%2FFaqqDAtmy5g0aFRot15Yh8tZCKFVNKtiU6XLC6qq5KPXuHOqqpo0VUVXPTcZTcc0Vcq4XCkLhVVuqltThejwwn7yIVC2FnzcgsIaKxdsEDlHb9%2BgAw7s4wQ7lDW7QDNqUFflJcBITiJPa5WNXM1qrfJKbBaoRFeDdkoYDde2vV0%2Bnmx%2BINY4KUgkeWGgrvDG4%2BWfD80biy57R3mjOixXePpI3lgMJysoMde9nGD7zPaJQpgtEKbKw7QOw9BkmpvRHukt2b8hO%2BPN5nzrFHd6DGQ2Y%2BuYJOTTqE9gE9gXUVf0r7tBn89NVJaNQs3KaOhlUOf0unPymiq3W%2FpczMqlNuW1dhh3jGoeTiNnRbl452r6nmqb72eFqZAoxpzYmcJEqHup0THe1pQKoY895pYpz16jt%2BdZnajpK1KCytzjoC3KMTqectQocCndIiYVy0dpgSaRRXE8m8Rfwaqi06dyJKMJjzLjTEtNZCthNdOTLyodsCJNhQPOHFeiv9qQvu1vrC0rQSy97WajuRhXFen7WS%2FIE12%2Feo2SSaLdO4BQUkf9N0cpkzLWeKp0T8YaZVeOoAb6dPJKl4y0XF6lmO05QZkjUCfYfhrY8CPC75z8CKlcNMXSOEn14v7NCVn7yxj%2FpmRtlltm5WoGBKF3H2uOCvIrU%2FO8pFAvVYM%2Bs6RmQ1SIWFRNRpUqSOIkRpZ9yRJLZDolT23NZIsZhr5rT%2Fh0SYqGCHMJOabtR9OMmCnbduRlgAXxc8uChx3dPT%2BebyrvDZhAhpWCf5zdYgLKy4NKCM%2BeaHaqoKQwCQBkFYUJyWVj0LHlmpSyzNkveSOBFvoSCT2n701PpfQrNV0HnkchY8KFq7kU1LgqxjOF%2BwL%2Fc312g%2F%2B0fd8EFtCJJ5cNlkuow3hu2ryPYZHDcqG%2B3pBn%2BDoyyEaHcnlNpoc20Qq79RV6u%2B7ankfG5VK5hZYPZd9ILYqz1iw%2F8KMrigzEfc0IH5K4Iw3NHvA%2FYOGPGlBDNGQTzo2WGcFHFIQCpuYUXZ1YIdQYtJZA87cuUcMgFHDhYsu63c17UR8X09ZhMDzug5dAxD2jwL13EDylxlDA37wUWqLGNF3Yw0OOzyDGJIUj8ofFtDpRLlEFM%2BCnrCDZpCDhgixgBsqs%2FFkd69Vs24%2FKq2i%2B679tA4uiX%2F4P). Specify the desired analysis details for your data in the *essential.vars.groovy* file (see below) and run the pipeline *chipseq.pipeline.groovy* as described [here](https://gitlab.rlp.net/imbforge/NGSpipe2go/-/blob/master/README.md). A markdown file *ChIPreport.Rmd* will be generated in the output reports folder after running the pipeline. Subsequently, the *ChIPreport.Rmd* file can be converted to a final html report using the *knitr* R-package.\r\n\r\n\r\n### The pipelines includes\r\n- raw data quality control with FastQC, BamQC and MultiQC\r\n- mapping reads or read pairs to the reference genome using bowtie2 (default) or bowtie1\r\n- filter out multimapping reads from bowtie2 output with samtools (optional)\r\n- identify and remove duplicate reads with Picard MarkDuplicates (optional) \r\n- generation of bigWig tracks for visualisation of alignment with deeptools bamCoverage. For single end design, reads are extended to the average fragment size\r\n- characterization of insert size using Picard CollectInsertSizeMetrics (for paired end libraries only)\r\n- characterize library complexity by PCR Bottleneck Coefficient using the GenomicAlignments R-package (for single read libraries only) \r\n- characterize phantom peaks by cross correlation analysis using the spp R-package (for single read libraries only)\r\n- peak calling of IP samples vs. corresponding input controls using MACS2\r\n- peak annotation using the ChIPseeker R-package (optional)\r\n- differential binding analysis using the diffbind R-package (optional). For this, input peak files must be given in *NGSpipe2go/tools/diffbind/targets_diffbind.txt* and contrasts of interest in *NGSpipe2go/tools/diffbind/contrasts_diffbind.txt* (see below)\r\n\r\n\r\n### Pipeline-specific parameter settings\r\n- targets.txt: tab-separated txt-file giving information about the analysed samples. The following columns are required: \r\n  - IP: bam file name of IP sample\r\n  - IPname: IP sample name to be used in plots and tables \r\n  - INPUT: bam file name of corresponding input control sample\r\n  - INPUTname: input sample name to be used in plots and tables \r\n  - group: variable for sample grouping (e.g. by condition)\r\n\r\n- essential.vars.groovy: essential parameter describing the experiment including: \r\n  - ESSENTIAL_PROJECT: your project folder name\r\n  - ESSENTIAL_BOWTIE_REF: full path to bowtie2 indexed reference genome (bowtie1 indexed reference genome if bowtie1 is selected as mapper)\r\n  - ESSENTIAL_BOWTIE_GENOME: full path to the reference genome FASTA file\r\n  - ESSENTIAL_BSGENOME: Bioconductor genome sequence annotation package\r\n  - ESSENTIAL_TXDB: Bioconductor transcript-related annotation package\r\n  - ESSENTIAL_ANNODB: Bioconductor genome annotation package\r\n  - ESSENTIAL_BLACKLIST: files with problematic 'blacklist regions' to be excluded from analysis (optional)\r\n  - ESSENTIAL_PAIRED: either paired end (\"yes\") or single read (\"no\") design\r\n  - ESSENTIAL_READLEN: read length of library\r\n  - ESSENTIAL_FRAGLEN: mean length of library inserts and also minimum peak size called by MACS2\r\n  - ESSENTIAL_THREADS: number of threads for parallel tasks\r\n  - ESSENTIAL_USE_BOWTIE1: if true use bowtie1 for read mapping, otherwise bowtie2 by default\r\n\r\n- additional (more specialized) parameter can be given in the var.groovy-files of the individual pipeline modules \r\n\r\nIf differential binding analysis is selected it is required additionally:\r\n\r\n- contrasts_diffbind.txt: indicate intended group comparisions for differential binding analysis, e.g. *KOvsWT=(KO-WT)* if targets.txt contains the groups *KO* and *WT*. Give 1 contrast per line.  \r\n- targets_diffbind.txt: \r\n  - SampleID: IP sample name (as IPname in targets.txt)\r\n  - Condition: variable for sample grouping (as group in targets.txt)\r\n  - Replicate: number of replicate\r\n  - bamReads: bam file name of IP sample (as IP in targets.txt but with path relative to project directory)\r\n  - ControlID: input sample name (as INPUTname in targets.txt)\r\n  - bamControl: bam file name of corresponding input control sample (as INPUT in targets.txt but with path relative to project directory)\r\n  - Peaks: peak file name opbatined from peak caller (path relative to project directory)\r\n  - PeakCaller: name of peak caller (e.g. macs)\r\n\r\n## Programs required\r\n- Bedtools\r\n- Bowtie2\r\n- deepTools\r\n- encodeChIPqc (provided by another project from imbforge)\r\n- FastQC\r\n- MACS2\r\n- MultiQC\r\n- Picard\r\n- R with packages ChIPSeeker, diffbind, GenomicAlignments, spp and genome annotation packages\r\n- Samtools\r\n- UCSC utilities\r\n","organization":"IMBforge","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/59?version=1","name":"Version 1","author":["Sergi Sayols"],"descriptor_type":[]}]},{"id":"60","url":"https://workflowhub.eu/workflows/60","name":"DNA-seq","description":"# DNA-Seq pipeline\r\nHere we provide the tools to perform paired end or single read DNA-Seq analysis including raw data quality control, read mapping, variant calling and variant filtering. \r\n\r\n\r\n## Pipeline Workflow\r\nAll analysis steps are illustrated in the pipeline [flowchart](https://www.draw.io/?lightbox=1\u0026highlight=0000ff\u0026edit=_blank\u0026layers=1\u0026nav=1\u0026title=NGSpipe2go_DNAseq_pipeline.html#R7R1bd5s489f4nObBPoDvj3ESO%2BnXZpuk3Wz3pUcG2WaDgQJ2Lr%2F%2B0wgJIxAY29jGabZ7TowAIc2MRnNXrXkxfxl5yJ19dQxs1TTFeKk1L2uaprY0rQb%2FK8Zr2NLtdsOGqWca7KFVw4P5hlmjwloXpoF94cHAcazAdMVG3bFtrAdCG%2FI851l8bOJY4lddNMWphgcdWenWR9MIZqxV7fRXN66xOZ2xT%2Fc0Nr8x0p%2BmnrOw2fdsx8bhnTni3bA5%2BjNkOM%2BxpuZVrXnhOU4Q%2Fpq%2FXGALwMohFr43zLgbDdnDdlDkhaV%2Be%2FfwuPi8uHz79%2FvN9%2BFPf%2FlvXeUYWCJrwYBR0zoW6XFgmEsAr2VObXqj83sBYx14FA7RJfk1ZX%2Fpa2Mv2ULGRPvirRQawSsH%2FiyYW%2BSXSu5ZaIytQQTTC8dyPPpQc0j%2FI4%2F4gec8RVgiUBxMHDtgJKV2YNzIn2GD9Uj7ia4mpmXFOr3qwL%2BoU36HIrE5mHrIMAlwE826Mzd1cqnAIxbyffY7wq8STTKOHIavJfYC%2FBJrYsgaYWeOA%2B%2BVPMLuaj32CltTmsJQ9byi0JbCCHQWI84OfxCxVTGN%2Bl6RB%2FnBKEROLf9i21w%2BXM8nj8355L63%2FPEjqNc7Kdxhg6wjdul4wcyZOjayrlatMQQAXP5bzF3%2B%2FBS5pGX11hfHcRme%2FsNB8MpwihaBQ5piVIJfzOAf6LDRZlc%2FWffw%2B%2FIlfvHKL2wCgNhLcPmT9wcXq9foFX8vTW4hDGDimQuRNfnOwtPZU%2F3P6rBl%2FfwyuhtMHr%2BPTHXx2Ku3GatD3hQHOXBvygnFwxYKzKU4jl2Qfq18nsxue4G1nN2Pb68fhr%2FerusfHGIvHMJzAoI8BwBX75fEMnrtrsAy6n0txTLUnpZmGS1F2516rkZfR%2BdB%2F9v8WRuN0C%2F%2F4n%2Bvn%2BvdnVlGaQyC35EziKyFvmZdS3fVZj%2B9sKUPthQpNznAUn97DC6%2FK%2F%2F8%2B3l4f%2BXNVeP76PUm5DOngKwsbr7i35tx862QXBDHzValcNxW3zuSsW2cg05ALnXgvsCLoXFoWnw4aToQtoQDEEXzWESRN2zJFg9bp0AtfFOHG3Wf4vucPKC23JecHT9quNBq54PL2%2FP6A%2F4NMDZdbJlk1yTtg7g4MC4kNISjE5tDoWTHAe827TrFrBmgMXk7NkmiIOOJaZt03ycosddMpRzg%2By6Cr%2BlMElk9qPt%2BPfAWto4CGFv8ss4oWNKpEht02HUhtFRyLmGnCKQhD0%2BEDmdBAMaHc1hY2nBqBkTybHiW27Chq6E5H08c0i35eTt6AAxrU4dc1IF2LWcMEi1egplkyNHvk9%2BE8n0g%2FKFhI%2FKjwe81iDjrLF%2FTY8x8jsMaSeC%2FAVqSknel1pDlIMMHKwtMH2wqjrGwAI4Ksg1YY75POLSJrJBbI8IMsXfKE8YvWF8EdIYxpkEIDbo17SllJ598fEb%2B0EYXmR6moKAA%2BeTSWwb2QSXbBhDlLNPUJ7MVulIsJYqg9nQ6aUMJ12LjWk9k%2FCp9P21J9tMECIgy6MJPoiy6RG2EyYcyFJdKerwhMjy2pODK3dALw1BNgUyTQIy3FZZK2Pe%2BOSYlK%2FaxutYWENYVO3AmEx%2BYbAIJ0aC3x4vWO6bwy3%2F%2FFAThdcKvIPquJOEDaDicatdLswcSZn%2F42Ptr%2FB94AMjmAJYgUVh1pZxrToZPRC3KthTgWqG1RQnb6wFgEe51YvfIqgnqzLAF93QCPLq1rBNwv63YtmxHKibWusm2WeqpraZq4UkQ3uzJuXdcvJMJEceQlKJNvrFEnp8pKGU9toWclAL%2FfoAd9m3yBtdzlqaBC4g1Zh4RDSI56Za8KACDDibnu%2BFrdd%2FFujkhWrPwdeVT3giyGxw3aMjePCs6IRlatLWMXotxILYzxJg%2BN1mHOBo4ZJ%2BcWNRiMKFGgjj3fp6ZAX5wEWWVzx51WpQhtCS9O7IdWCa0KK2N%2BSa5jLHOTfZLmWHgoIIM%2F2BlJZmWgMYEbkqRZPqfvd7T01B%2Fsr4P6uhypGL1S11tv3dJRrDIlWq4LWyk61REromkhiHyg7sLysiLiBS%2FF4TRBTBCnWhjngNU5UxgwOiZDpuq1zBYsus%2BcdWsrN080tbhSdAIyeB%2F642wOdqg17LyZtz7mWblMvaTYO9km9eJ8vyFXl02OxlMnXoJWceKSPcl8Ps2d9Vzht9qphiV2pFwqs7mSmoRfi%2FlKmnu%2Fs58A6XykVZBPtKqHB8ZPJ4XZiKCiSxsuieMgwq6rkvWVUyoXO%2FZT%2FX1yceRMQufZb22fx4V6RvjZ7Q5j2odiEeVbDjj8QGcJ%2FW6R%2BVJubuwxDm1Xi%2BjVkqmnSlb6Pd%2FLQJ3AasJQgrJA6tFE1sxym89bykdT6EPyRiWVzQNM7SfhyZlGLfyiQkV3Kg%2BOv%2F%2BPwpuz0RU1MUAeRqrcpa7zhP6WYz%2BgSJNHVnnjOapySW5AtJKWDxsh4fxDNHctIBcr7G1xNDrQbS1bqsnrJRmP21jlqpr3BZdvsu2W7FloaRWhof9hRX41VwbS30SWw%2FPJsFiRPTQQsjVop6V8etqUVwj13KCVxdfwF1qFbm%2BOIuWzg%2FbnJjYGGGbPkTv%2Fxid5U12H1P7%2B%2B7hy1%2BX0LHueNj%2FWLRcOe92Gu1iy7YfPVn%2Bwu1VfuFazrSiq5aMrM7XrO6QNagHdJGG%2BxlGOqxivu1Vi8j3QNDtfkFy3kJeKxbeyT2o1TdDlan58bDNtZpf%2F1CaX0G3dHYod1VEaQ%2B7hCAqyn1EB9menWGQAAWKaQSRoT8z7ddfYUTQr7CZqIy%2F4EFyl8kuYXvjfm5IZB7JI5n%2BsaRTqdrTbtCnyN3GDFtE9PIb95nzlz%2Bb5yg8iNj2jYyIMKNoEcBS8Zx5TYwFgs2vUZP48b46hjkBXkLxSh9cCbb6wg9oV4jw2Vff9Gsx594qoCogiPVnZH36IfvRFBtjYNU5vrqq7K8HEiI73X6xPVfVSth0pVbajzD9TfbqTnqvlnvUDrRVFzbS3s8vF4ypFrHTfkXeE%2BWbc7KeSNfGwrVMiH81agd39Xhz8nV%2FYzNqiKuTN6M2O12Jnnl05456VL5x0i7jdlGBv3ou479DmeeKyPwbuHzawEDGD7ffahpYhqfMokW1blt3PAPZBGD0Hg70xiE4S%2BSgYVIcmKU35jBtpXaKHCbpPG42laNzGEk%2B88lH1zY3FkzSCe07iCqFHcq9Y5kVpGjvf6C9bLTnlQs4PNalRQlaO2O91GIQIh1o2xGCtm14%2FablIHZb%2F2F%2B%2BQ6Ip6%2Beex56jT3gQmijH%2Bs5EevY4lsJtz4z1WpY8Pmm2k0QXTiCgnGRxYNbkI%2FvsU727bHHSiwUlHtWb4GkA%2F2QP3c0iM6kLgfI2FQG519rkefwgHKPF5%2FT5rrVqYaoiHHS7WY6vveQcs%2B4uzBGzu0%2FLp7X7%2B77QV8fdervPha3TF2qV5jLHWu7yx32Vr6Tvacd3eMJJqgAnUyB8IM5LqrpVcOnAgGJwF4N%2FELtVVNhDklXBIvGuMeM2DRlTBZTmBxLWGKDKq%2B%2B7TaW%2BuQs%2FPaT7TzbtXhYE2Pgm0DkwHlAzIRvOTpl%2BdEGtC7Vaat0moplx4gWtbYkq0PtS2qfqWp%2FX%2Bu%2FLVn%2FCcDtNxUm4kB%2FbiqMVBFR04j4KEu3ix7Sl7hMpJDfVQ3ZaUH2s%2BOCs6o5jJ0XKGEAMeuUIY8dz8BenTRHTJutFVriALx67ehOvNRnGDXVxC34Fz3hIsOI%2BtYK1cMRPfKUmPIjHyTB9LzMTHq%2B2ZNJVnPA8%2FhgidxhWUzcFzpUUk9Fc449pWVPPBg7xmuqMbU9BQZvCUEiBjDDNk%2BomqAujP0KneSS4Abu%2FX6yzcA7W11PTJsm10YBqmFWFKJEJ0uaCr3jMd%2B5H2DXj%2Be1rkacnkPuNpx%2BDBrTEMmB0Yb9%2FyEDg0YZtUF7uGLKjVnYv1DU7ybqnPRbaamoJ9nh%2ByVEIsiLyZ1MecdtNd%2Btisltv%2FWqqqQm7Alox9G4jxJaeGMXiSxEzwYK0Jr99UhaMM2NXSmlyqc303WxcfDUgZQ2aqN5mOJHNsMlaJ20ct0kXXxKsLGuCRI74Ph9NHctrDbu1QaFcWP6Vu1xaluMs5wN7EhB7alCGBKlVVoHY1%2FFu3j13spESacTNCDzVojKrBArG6N5jJFRwZrlZYDQDLwtyhuG8JEoTI3uVXMH4lTojRs7PGXiHlOwzYHC6A3mlxHcIR%2BZidH6KZoT0uvtvn7k9phjyoSiN6R7WHdIOdYXfgBKseLhRwwto2H09zkJGzFBLFqWkcled4xVcUvCWp6M0ELPg98P6Fnl%2BQAwkY0dq%2F34%2Bjwlx6poYpcqkwd0rMrPnsjciVMb3m6FS7%2FgKa2gGhFxRsXUceZOAhu8iCTpkRHx8yVYE6cGJhI0B8lNaW4aBuWSMqoQOWcpG4p4iIQmqaYq20%2FKSHeQEsExE93NULMcOgmxrJKO1KpLQCUQZ9IEpvLA2DjXakmos1VCHQYpdWZnc0dK201AwKnHt8iVPkfxufBprQBGbFHO1bvlMD0xmllt94ohcW8sJq3xXYapBFQEYaFfBldTZB4C6hB4zygTnbtqpy8rpSBFWgkeeWnU1e4BqO8z6koKrKJKxUcp50jEi5VyJut8Yk4XHo%2FDqaLd5aBZ2iFAoB0ea4SXmbWZ5Q8dPeH6OBBzPYzmY8jizgKX5IkKwCoh1IjAk9moj1EsnFYPyARs4m62uJwCc3Z9a3qiD820d7wnOLCEgtRGUyrSrbHhb1BHe32Dx8Ih%2FQ2%2Bm1vUe7MBEkCghQXs3GQjYND3rYUnhJeAEdcEu7DtxMVdxV%2F%2FEUwL0tFTUtgRjbxbiJK0BJQXmteqeJfsbrI2Omy%2BfHFGL4aFKIQwzU%2FUqPWEqb17BRifiBamPfXPpBXRs9W1XDEo3%2FxUgbCFhIOnyesvr%2FXwdDYXK4pYmqRQ1CRqXEK6LC26M09A%2BwjuFJ0JaTScmJDf3VzKz0ou3NS9IKWzXlFFoHccwT%2BSqFl5twHs81%2BgIF3RLC7dcV%2FpDP0nypahlh1h0rVYFCFr5Ma8%2Fbsa5qYP0ZLIxs7Cp9IgnRwVjCOXgyZ1OmRj8eR8Dm1VtN51Osf1OUhL2h25us5GPIdcfMOeGR7RkuBDqsiHtk5t3craUDSKjTs7Dx%2FGJsX97gfcVwP3O%2Be3l4v6vI2oPMxvl9ScyC%2FtsOPRt0xSLiZsSoMls6TNhWe9DjykPwE01zFp8TD71BnzGyC6OFdv9UQQttqSKvKSXK0mP4m%2B9AJpR7UMV7hAWl7ds%2FXhJkc7x9xEw7vl893NTXPYt97uH2%2FsN62EnOs9lp84cPWJfaKdR0Mem08nikm0WJ5D1YpP0KjFeMhi8doToYSeqC%2BhIPgij4V0Hd9kNqb96yyxwhPRdPKio7LJ7OQUlWRwVIsXgKtStS21QoGWlXKA5hVPWs%2Fvugfa5jat4AdHTvwYFeYnF2E2Y%2BxEi4UfZnKkT6k4IC9h49HpZPK5SRYST46bJKv3taXBE8fnJ39mLf%2FCBtEqM4br4odCZjOG1Ok2x%2BEL1xebF7c6UXtogjG0%2Bq0jMwZpdFz6EGBqt66tvKJZtouN4F0CQLs9MRmmIxHbZF6y1ubQLAa6E6xRwZLjh2GlhZwgiT%2BtFEUuMP7YMguVHVjtePUfSuBk%2FY64NXTSWRjSEOlma3dOJj%2FpKTsXZ328WSeZhhFbuhywsVAeALFs3aI5YMIe%2B%2FDHjYdn8n0m8Uz6WsklLWno5gYjTFBSBYJvkjuiPGFCk2yJJSRMyAlJkxBSAQFiq1CbfFL%2Bc2NtpNVctHzXN7bGzvPRdb8SjedlFvEufBh4Re3pzbaSIKnd%2FJ5Sn83u7vU4ylimzOnR3fZUJjvM6KSorN%2BLU9m%2BvTbFQgx3r2H1DspHCkfnSGlnLcVKgdssSLDCwSwHP8tg94NLKudMPlQNi52wHtpUD29CZcUCJkgXdZZV4vL69DDeENaxkBYj2KR%2FZsx9gCO7a1D1Jn6Awdq6BMIgsizByiYHg4sfOrwl%2BBcd46%2FChx5k02HJZuFCOltKtZCso%2BJmYVU7slk4u%2Fpvjm0zrTZ7dKCbVS4QrIwJJh1DBV3kg8j8ybOpa1pzSP%2BTcrx4EYNOchMUQ%2BniKdqk06sO%2FKtl5HNPPWSYBNmJZkJnUD6Sst8pFJPkrHiGDGoVUGpCzJ5SDjklaybw0BmxbKjEBKB223vab3OMSVm0k2kUom8trGSLtcqLQ8%2Bkx7D2ozR3H7xe8aReK5WSFesMsww%2FVsKMkCNBlx%2BI9R3yevg7OoEAnF20k5z38ibGjkFI%2B%2FMLjYO9nXL65bxMGvPGQ3rQF1ZYIyG11bBCT5MFEYtoaiRZUQH2QiDSKnErj2QOOygwo%2FiJh%2FEDH9KYKvgF6bTXsKbdKz%2BwHWjnug%2B78Y5WU%2BQd%2FXRsr9QWpm7hUoNQM8cJ4todmdbsqwPBb82r%2FwM%3D). In case of paired end reads, corresponding fastq files should be named using *.R1.fastq.gz* and *.R2.fastq.gz* suffixes. Specify the desired analysis details for your data in the *essential.vars.groovy* file (see below) and run the pipeline *dnaseq.pipeline.groovy* as described [here](https://gitlab.rlp.net/imbforge/NGSpipe2go/-/blob/master/README.md). A markdown file *variantreport.Rmd* will be generated in the output reports folder after running the pipeline. Subsequently, the *variantreport.Rmd* file can be converted to a final html report using the *knitr* R-package.\r\nGATK requires chromosomes in bam files to be karyotypically ordered. Best you use an ordered genome fasta file as reference for the pipeline (assigned in *essential.vars.groovy*, see below).\r\n\r\n\r\n### The pipelines includes\r\n- quality control of rawdata with FastQC\r\n- Read mapping to the reference genome using BWA\r\n- identify and remove duplicate reads with Picard MarkDuplicates\r\n- Realign BAM files at Indel positions using GATK\r\n- Recalibrate Base Qualities in BAM files using GATK\r\n- Variant calling using GATK UnifiedGenotyper and GATK HaplotypeCaller\r\n- Calculate VQSLOD scores for further filtering variants using GATK VariantRecalibrator and ApplyRecalibration\r\n- Calculate the basic properties of variants as triplets for \"all\", \"known\" ,\"novel\" variants in comparison to dbSNP using GATK VariantEval\r\n\r\n\r\n### Pipeline parameter settings\r\n- essential.vars.groovy: essential parameter describing the experiment including: \r\n  - ESSENTIAL_PROJECT: your project folder name\r\n  - ESSENTIAL_BWA_REF: path to BWA indexed reference genome\r\n  - ESSENTIAL_CALL_REGION: bath to bed file containing region s to limit variant calling to (optional)\r\n  - ESSENTIAL_PAIRED: either paired end (\"yes\") or single end (\"no\") design\r\n  - ESSENTIAL_KNOWN_VARIANTS: dbSNP from GATK resource bundle (crucial for BaseQualityRecalibration step)\r\n  - ESSENTIAL_HAPMAP_VARIANTS: variants provided by the GATK bundle (essential for Variant Score Recalibration)\r\n  - ESSENTIAL_OMNI_VARIANTS: variants provided by the GATK bundle (essential for Variant Score Recalibration)\r\n  - ESSENTIAL_MILLS_VARIANTS: variants provided by the GATK bundle (essential for Variant Score Recalibration)\r\n  - ESSENTIAL_THOUSAND_GENOMES_VARIANTS: variants provided by the GATK bundle (essential for Variant Score Recalibration)\r\n  - ESSENTIAL_THREADS: number of threads for parallel tasks\r\n- additional (more specialized) parameter can be given in the var.groovy-files of the individual pipeline modules \r\n\r\n\r\n## Programs required\r\n- Bedtools\r\n- BWA\r\n- FastQC\r\n- GATK\r\n- Picard\r\n- Samtools\r\n","organization":"IMBforge","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/60?version=1","name":"Version 1","author":["Sergi Sayols"],"descriptor_type":[]}]},{"id":"61","url":"https://workflowhub.eu/workflows/61","name":"scRNA-seq MARS-seq","description":"# scRNA-Seq pipelines\r\n\r\nHere we forge the tools to analyze single cell RNA-Seq experiments. The analysis workflow is based on the Bioconductor packages [*scater*](https://bioconductor.org/packages/devel/bioc/vignettes/scater/inst/doc/overview.html) and [*scran*](https://bioconductor.org/packages/devel/bioc/vignettes/scran/inst/doc/scran.html) as well as the Bioconductor workflows by Lun ATL, McCarthy DJ, \u0026 Marioni JC [*A step-by-step workflow for low-level analysis of single-cell RNA-seq data.*](http://doi.org/10.12688/f1000research.9501.1) F1000Res. 2016 Aug 31 [revised 2016 Oct 31];5:2122 and Amezquita RA, Lun ATL et al. [*Orchestrating Single-Cell Analysis with Bioconductor*](https://osca.bioconductor.org/index.html) Nat Methods. 2020 Feb;17(2):137-145.\r\n\r\n## Implemented protocols\r\n - MARS-Seq (massively parallel single-cell RNA-sequencing): The protocol is based on the publications of Jaitin DA, et al. (2014). *Massively parallel single-cell RNA-seq for marker-free decomposition of tissues into cell types.* Science (New York, N.Y.), 343(6172), 776–779. https://doi.org/10.1126/science.1247651 and Keren-Shaul H., et al. (2019). *MARS-seq2.0: an experimental and analytical pipeline for indexed sorting combined with single-cell RNA sequencing.* Nature Protocols. https://doi.org/10.1038/s41596-019-0164-4. The MARS-Seq library preparation protocol is given [here](https://github.com/imbforge/NGSpipe2go/blob/master/resources/MARS-Seq_protocol_Step-by-Step_MML.pdf). The sequencing reads are demultiplexed according to the respective pool barcodes before they are used as input for the analysis pipeline.  \r\n- Smart-seq2: Libraries are generated using the [Smart-seq2 kit](http://www.nature.com/nmeth/journal/v10/n11/full/nmeth.2639.html). \r\n\r\n## Pipeline Workflow\r\nAll analysis steps are illustrated in the pipeline [flowchart](https://www.draw.io/?lightbox=1\u0026highlight=0000ff\u0026edit=_blank\u0026layers=1\u0026nav=1\u0026title=scRNA-Seq#R7R3ZcpvK8mtUlTxIxSIh6dF2oiyVODm2Uzk5LykEI4kYAWGxrXz9nZ6FdUBIQoDjm%2BTeIwYYZqZ7eu%2BegXq1fXrn697ms2sie6BI5tNAfTNQFFmdTPB%2FoGVHW6bKjDasfctkDyUNt9YfxBol1hpZJgoyD4aua4eWl200XMdBRphp033ffcw%2BtnLt7Fc9fY0KDbeGbhdbv1tmuGGtsjZPbrxH1nrDPj1TpvTGUjfu174bOex7jusgemer827YHIONbrqPqSb17UC98l03pL%2B2T1fIhmXlK0bfW5TcjYfsIyes88J76eNqcz0L7YfNzfL6%2Fe3i55%2F3Qw6AB92O2FoMFM3GHV6a1gOsrm2tHXJD%2Bx3BUC99sgzxJf61Zv8lry39fAseEumLt5LFCHd87Tfh1sa%2FZHzP1pfIvoyX9Mq1XZ88pC7IH%2FxIEPrufQwkvIiXK9cJGUbJGoxbDzbIZD2SfuKrlWXbqU4R%2BRN3yu8QGKqXhru1DHwp4d9rWw8C9juGo0T6D%2FXQcmGFhnMpnl4aKgxQD8gP0VOqiUHpHXK3KPR3%2BBF2dzbR6CtsMw1nDAsfU6g5ZW2bFFaOJZVtCbYd1nHfCV7gHww1xGjyYFz%2Fc%2Fs9%2Bhi9%2BfPf3Ye7xY%2Fg4b%2Bh%2FKLx5K0Gf8vwZO3rpoWhfQz6NIAtykzKYEtMU1PYMpYE2KLxB0%2FBlj%2Ffwzd30r%2F%2FfVzcvPW3snn3bvdhOJELwEMmprfs0vXDjbt2Hd1%2Bm7SmIAALkzzzyXU9BpZfKAx3DIR6FLq4KYUU6MkK%2F4XXRxN29SN1580T65lc7PiFg%2Bebegkuf6TvJa%2BRq%2BQ98wLYDr40ALYAaWhcWDYfThEBMwhXCvnAjXwDVexFtsdD3V%2BjsOo59iAsfSUi%2BcjGNOwhywpFSEFexfPWd6kHPNdywiDV81doSPBzPJtk8VMZZ9lU7vnJbFb1PP5BR5CgZzyVWhj75e7L%2Fa%2FN7Ofvf%2B9CZfHz%2BrusLYez54KwRaw6PyKNZ00j0km8SN3DiRIgctYDlH8YEDBc4AdkzXsq50vwcK1exsJerpTBxeVWtxwAiuUh28LcADdeptkc%2FcY%2Bflh7JJXzCTzdybfpwAp8tMr0uglDkLYvADrKYm2FmMuOfNsbORg7lIW1Xa5cjCn45%2FW7W5iasnbxxRD%2Fb2m7S%2BDe6AH0ggWfN6YKi8C4ub4I0G%2F4udX9EP8c8fsjzL7dh1167JLBWGXSiFF%2BGPqRY%2Bgh3m2FeVb0Gq%2FrIlkAKfUvhocuEEVE61YCurzg0gU2VvbSDbwxYILGwV3e6UnQbh6sYvLQCJEZEmJshfoSv50iM1j%2FRivLsYhCokivlnDndaXA%2Fezmbbu6GRBeE3jIINwGa%2BEwUdC5XTOyAQ2f%2F%2FwQxnIntHRgh7pjxpMbwrStFRY4CdPXMU9F%2FlETbgAHn5ARhbDeKRwsGUm5MncGHUgTqEBc20mrQPK4ARVILDMJhJTcEmBF0IOfWFH0sMoIk6cCI5fuZrwhtjyNhctVKSXVXkO5sGSKYMV422E6Q0HIHypZpWCa7cBdrQJgQjkgHCboiw0Zky4lff77R0bq36eaDtKKaaKnlqimTWoIslJX12xJQ%2FgWIP%2FL8heYgDGpBCtQPQ1g6T4BRbOcNaVpS9c3kT%2FEzSAaEEmBIT6heBLG8kl8J23jvSCihIrG8Dd%2BwtNNM%2B5b6Z8kRuEXjMKnjEHucOkr29FJEpeAI3GBpgi%2FcuDkmRXaZmeIUQdL8gaBTqpDqfBUDMPUU0o5IMOla%2B4KjQURKzRjPUXfejYaVAljycPF13mLxRsMSu14t5KDRYFUX1at%2FqGxOOaKWSx133DN5qcR9yu98tHvyPIB%2BzBwcf%2FS54ub2%2BEtIHL57GoO5OD5eq5rNz5Z2qn04U3ldF93Ml9MnxuELuuOTdWJtkvkdzIvsNU2PivSqfTKxry%2Fo2kZTWInnxbuNNo6HQOMEO7m5iZhfWkhk%2F%2BDn9IrA%2FMNn2zDR%2FhSJ3MEocJrbo6sOwn8EWtni5ywi0mNRqPmpqQ7IDW64QZ0XMnUQ%2F1cU4JGEUsn0hMRS9KabLWOoaSEX6aUpPQNUMksQ7cvmMc0BCXikvtPbbSCz7j4qZVNHEwr4lNKqxOPGytEt1iqgR4ffd2Lx3WaFq1kdDJ5pglUQpFOKM8PFuTxZUqWP0CBUzp11Tw3BY6D9NkocJ5Q%2FN%2Fi4VsOlfAlL9HZaPuQ7CC4p6XuYaQPh2xXXRAZ36GWsrJ4BN7wNTGnigxsy1pUx8u3bQpPHTVVSh7g5kysZl6mNK9ulM3Ycjl60P2gEaN%2FWZcnKaAFEJ0HIHkhy3cfLKLopC28IkSrZG%2BXsUH4Gr%2BYWRZmoyj9Ln0N5iq0JEuvXC8cHS4PpZZzL4dUqjnkqbyQKfyfyOuNxdlkueNYntbjjvIRQVnHc0eRc7xVuzP%2FYG8Nz%2BMsGM9geBZGmMjqy5Nbmgp4qh3xpPVEiollhIUehP9cEZJcR4D4HWHSF%2B6IDs7VU3cFA9YfybCJbxAGi%2Fnu%2FYB50nL8fBMtR3hvV%2FByxse3eHBA8Rex2xQeA88dvvHbGNHmmNvupeoUTqVUXUR3cpQ%2BS7hVrYS%2Bk0hO1rGURfgGCP1EygWgjYuUXtYEJEo73JlYh9ALQyy5ylVCTpC9dB9boCRKNSk5H0kY1yQJE6lpknBkEGTWQT3WpBxWnBbUKMSReDVfNo40DPpiwKqcg62a82ZTJGVvJUA%2FFYfU6bRRHBKKLT1yl%2B%2FBozKxRe1MbKlLo8Z9CdTO0ygljV%2F78XE8OT8%2Bys8mtaDRSO3aqNSWBFyAlqb52q%2FVxtve6xvVfHKt%2B2t1qPWHehzKhFoAljrvxb6fqrl9n%2BUr%2B59vOEHj9uHi8Z186%2F78cjn%2F8k19%2BrrZ3g97pD3%2FvXxo2gt8PJUPTWYn8aHauv3t3cVNbc3%2BBqvuxGjseRBkdE4dPsm3wBA%2FXIkfpyBzTiW%2B4QjguZxFmo6VdvHaMmk6ZZ0lfhMa3CR9TfwpOVoHvqzUQjNXVtYQAguZdy1vLdMkpDAHk6Tlhq2EOijJbiUJzsQBTskU2714WJNL%2FA%2Bv3hUw1wke5hW%2BlpNr%2FA8e98Mr18Fd6xYBLsII%2B4iCcFBMyG0ADdQZfDxrvSnggdBMr44PxoN6MtALlVi1ukyncQPNacmFWmGL1ncPkkQJ5iSUjnBFf4lCLwK2A%2FUrYINfFJgOvvnbqGI53fmeY7%2BiG0%2FDIhyJxTUm467jPzw9YCZNXnj%2B%2F0LfWjbg%2B3tkPyDoddBGZM2Uc6MKk7KQKh3BnWri%2BbRneC4VUN1HQWSHQT%2BR3cAEOo3i4DS3gELbOwJWDFVFirYWVLGh6WpE%2FjPR0Iw8G%2BMdPF2dmtdqfMKL35Iaz3XZuyUPD3aruSVnvd%2BStrvu6X7EIxvyvWi4to0l81RIPdIN2I%2BcQ%2FULwc%2BAzBO1Rf4ilnqVLqXerkx%2FXJjdK%2FXOO7PTVo67N6SnKPX6CFTJnlKfVsMumUixiFdkEWwsZ%2FczMHxHD9Dvn%2FQGVu5%2FhiTRaBEYI9o2utmap0Vi5rsqDbDMByv2eXHII%2FjGaINsD%2FnB6ObkRRJ2WViYfsWqfsXjxUQv3mywJX0wQqYrTBAmO0qNOI4O%2Feya1gpoFsEM8mAiChtREEJXmr4FRuosAyrHJZGnwLlpoYMQ40ewwcQgoLROkRyETJRGN1F2Yy%2BYeVvS6nxWj8HLSgMcXlzk7fREjF%2FR1uPPM3NjazGOcf029uGkeNtZnTFn8vUXnXS84h93zk%2FqBaU06B%2BxnBXy0ROmfhbJRavrKjGBGmwptQG7MaEJPFadRkeSmEjbWvo6Q%2BSz%2B1JykznYrTJuKzbyvLGQqtKtW%2BX%2B92P4aG23MlI%2Baab0dbwcXg6nz0XVODayOkN7yvSVHEURAL5xIqNpWa5UiHyj5LBAZIrUapyteKtMzxNCN82hMx9w6bjyocCnhdzVJp1BtASDoYJl1nDnoeAo2klMlKQ2E0nrwaQMZKudh9qhmPk5HE4yJ38FyRxLPQwf597O%2Fjslj63Q26hZp7Yzs%2FGQ45OMcMqzib%2FrQ650bSirPYmT4gSDG31Z2fjSOKnc89q4VWb2k7rMDgyYIi%2BRiCmw3mMNIGXVT9xrZ%2BRn6fq1dCKHMzLtWTKyfEjVRCnWg2iTkYnjwV9gOYimTBHTmvRuJveC3k3lrEljMt8jvOeeH8%2FboXdLfXsFxkRy5ExNamdgwkJqLy2t9Xdr3UaSJx6mwYZ5OEWbPkuKlhfNJ1q3ormQolUndv5fMhceTZCmYcJVbUswr00lLkzz2%2BcPl7Ts4R3esBLkiv%2FOBl6IiEa6Lr1uEh8J7mjAnSdJIUXmu7kh6wMfIGndBLN1k5gEiBn1Rq6MAcqWwW9H0MLTwrIdm0noknEfTqNm7dOo3LlBqwn8HRSPCML3luTvIB91je9o5A%2FsY%2Fh%2BCNtm0lQNk5xIpwhKfHVuzv3rJbr%2BmnOztVEm8tHmXDWLZi2Zc%2FmA%2B2bOXSMHXbrmDouFSm2R0OcKMBMlMbl%2BIFQbeoMPImcNrvU2bLnwSSiOiAejHEyIJ9KgdUJ8BmFxPOtWWBRb%2BP5631ej0mLtzNx%2BWvg0tZ8WPiysHWbcw9hHKtXS0DAeS8%2FKGxUE2cCzrTA8d9pkSv7k8zlc5uwgeTInJVaJmmXiabkIegaZU5O6lTkrTVy9CYAtxt6Hvm7c9zT%2BlVqtYMpskMxC%2F2AFUNEsoEfk9j7VpAFszwfqjQUmprNF4gstHz2qnNB%2FCWFWW0JoWkA4KX6fD%2Fso8nX2gso3CGLnHAN4%2BTvkuJlzSiqkhO7pGpVPSKEHDAG8JZ6I%2B3GdmUQ%2B0n2F9DDyc6fxkW%2Bsw1XbMd2FKO2kDjAL0LZdmowYkCkCTlcVWx4cWQ64weq%2BTcgkSoZIT%2BW60dRSA1RavIOLpSEKC3fewr0xDXm5hXvFKnaPHDIxzzzIISO3yEDngiAacYJgvzLg5uV1H3p%2FplwSaRMnZxFMqlYVXtr5a3RJssUpBuT8LbxT2flb1GOWy3HjKUz3mJXDWUj8GvN2Urs%2FrlNA%2FWk6QTZRJWdq19DxW7vACsg6Iy84%2FbScc53k8%2F%2BBnX5Q0HM492fOSxrE4lCxZpI8E7D22fR0aix0AT7%2F7PH97rs6x6%2Fy02rrqaNd8c54mJ1Yzz44dbLH9cfc4WE9sp7xuIy4lswfy%2FOQ%2BbpqZK3oio6%2BpaG3mIc9gE5IjpWnpj1PZ4dWIibC1M3NbVPXdV1bHt3IIxpBsv7T51EqR4zyWZtKc0q4ILq4ZR2c1%2BPtsRMA6odmEuF7RMaW%2BjYhYi%2FC1J9NWlTnNUtNnq%2FoTqdZXU3Y%2BqdtGvu5Y7KDGpWNhANMp4eFA0xnsxyKnSnhB2qf3LACLHVDAhi0QNag1mkiZWDKc2%2B6j84gqUkCE2oh14fVb4E5HBwGME%2Bjx%2FOJeMp76qfjolm8TU%2F9e%2BnjanM9C%2B2Hzc3y%2Bv3t4uef98NyR30BuYAPCPlc3mY1FrGzT2gNom2CvLS7AjtblrIzVrU5BSRhheV0VAZrKhR6Li3sLMKKLEVvhNNlCYlAVBvLFab%2FUxidEAmKpxwuYX8PUodbxgU5oa8VFUxsolTV8KoctxubWOlZdgvynZXegWPBSvPakY2v9CmOZS0vEqcsvnyFk0ay1iJtKVMxKlWJKgFY56LvkfLuIba%2FMyCXzGsE7cMudrBP89jVptOzEr1frs9TuCzP0BnGbO8L6tap2I4vze9VuRgv1ovT24F16F5qgMXMtWzmz0SQM8EjJDMcRjkTh%2BmyzL1FPRMLN2fa62Wg2wsw2OWRUyz%2FiLBTOxN2HlrxHSTfIR8ZQVHKkyp8DHV6EXfyIcSANNIR54k3grwWBSQfjaF5sUhrAZfLhIfCgDTxiN7QxEli%2F0E0XtGEoxSAd4sCLkh8RTXrL4yxEt9P1%2BjZnhBvnP3K%2FMb1rT94fnr8%2FVjGUJvZJAUlYVzcJMpsXtwkZ7Nq%2F%2FX5wEWbdrow7rratpC2cv%2BY2x%2FvrA9vl7e3T%2B7O3X388eXbsK6Rm5sQuq7ykjtdcpo9VXbv87ymx7mN3OyQCWKQ%2BATHYtQu9eJ6OwKP4B4%2BQU7UoPUZYjmBNXJx4YwW760VgA6iO8iN4JrNiljTYru3IrR8lyPbszN851N9px2XbBSegNrxMXIHEUJ88ZVUSibW64aPRT2eFAqSe6pIZvuhyULAF21izxPwchbwyplcvXUhX%2BkSbh%2F0wjjIjqu0ngv0x0L%2B0KDKuqggXPu2QiqrBlnLMB759u4S8m5hOvu4ZYId9Cqkqbnqm%2BH8wD1Xn72Op2ruUNaxoO6Qohb5qzptwPMgrhspdbu1zn4qa1UVtP26QGd1jIXD7lMd4x6f%2Fn4ayM9zMMnBMU6T3LklMyWNPnufV7STDn%2BvZ5GY9Acdz3K2XV1EElNWpReIlLcLqPuqxeafnyinIFJtO8JVFOqm7oW17Qe8JV2Y0Udb9wGsoKQrYpVluWyspk7ccRflHA02xao4unKUe3bWhHwYndrHKrN8l%2FY41j3EysS2r8Hu8eCklxf2nq1woyoyl%2B67C3wXBazkVjm1CoQeX8ZBJ6nCVwvyR8g508ut5eWzrFqVq7P1VoO%2FgxJnUb5gF2vGVM0yGBtfswMoCUvf6CbBAimrvzUU6jmf5tigKAJxKnD%2FqHzJGwcuZ8yl0UgHxBKStyI732LHns1%2F8m5EcoCS7dL8bohDxzwtQo5BE%2BNYkU6bnj5asfvtQiZX0nIHtIT0d0Xoj%2BfRzmm%2BePbIeILT%2BFcQZl2zVf0X5pSfkkGhwGbBvvvPFSVrmIKT4Rw5uWvX3%2BLP%2F9Ez2YPx2X0SCaGq19UHE3IQV8lKsIR681cUhOSIQZqCXzhjFE97BRuULuuD7lsQSUNSLS3HsEfJEhg7g0SCeRs9QK9rjusNySyV0BNeVJ%2BPjRUJ%2BHp1QcEaDm%2Bv38Zj%2Fvb54msChVpfubKjgMOC%2Bossuh67ZPhBtBx6rhfZvKLPIeu6o8zvPi4ym9Rx46tDh1Cz2wvHAfKE9g7vYJx6Y61IRSkoUkQX3kdBwHEiLq9AyjFAkMKQFGIi3JqNBAi%2Fd%2BgAcGORdOTDCnJc56%2FJAZhP8vpRkTEImb7aRBaA0ELbIzX8SKtQI2e%2FdFApfDzOWl20ec5OW79SeNYw3FalcD7gvlUK%2FxzZoYU5b%2B2suWiLSTZhoTHDRu0diFUzSW6vf6VV5b5A82rtmLJqtjOBfNz1CQpyp7ECXdb8PNRDKFw%2BRtt6U3SlapD9rPj5NZ2l5aysdeQXJNIeGXNo3U7%2BBZDlNz5aZT5QIKGYio982xs5kIFTSkaHCSk1qXa1oAsC7fDYiF4mVTZTySFM608aMdYPQz9yIOTVLE5D3CFfW73Oep9%2FdbMG6HZXHKsJ%2BhY0v0aWW9Bb1VqLJp4vHdv2gpDjMxtZjFxPPUC6OAK%2BpIHqhMC5Xf%2FedokdZ4u1xzXRf1PjFemB5d0e0YCVV8KSggO%2B%2B6q5AeKF0LHYCQyHjYCD3I78TFYbCJgklxor9oMk4F8K9n8EUBQr6sjAYoOftdW5BmjxKQDWmtfr2JYiuptUOY4TwuPtGr%2BIBcGAFT6mDelSyLSWY2JMoAiOf3x2A2poChHYG%2B4RAvxNFjFA5KyKpK5jyUxa3QZnTEw2fde746ISDK48Nf40yVvJCN4TVZSKK6oXNW0ge0csZosM07mVPCl9ec96lcuFpYv43NOX8SVmMWH6cYxgm8%2BuCSr12%2F8B). Specify desired analysis details for your data in the respective *essential.vars.groovy* file (see below) and run the selected pipeline *marsseq.pipeline.groovy* or *smartsseq.pipeline.groovy* as described [here](https://gitlab.rlp.net/imbforge/NGSpipe2go/-/blob/master/README.md). The analysis allows further parameter fine-tuning subsequent the initial analysis e.g. for plotting and QC thresholding. Therefore, a customisable *sc.report.Rmd* file will be generated in the output reports folder after running the pipeline. Go through the steps and modify the default settings where appropriate. Subsequently, the *sc.report.Rmd* file can be converted to a final html report using the *knitr* R-package.\r\n\r\n### The pipelines includes:\r\n- FastQC, MultiQC and other tools for rawdata quality control\r\n- Adapter trimming with Cutadapt\r\n- Mapping to the genome using STAR\r\n- generation of bigWig tracks for visualisation of alignment\r\n- Quantification with featureCounts (Subread) and UMI-tools (if UMIs are used for deduplication)\r\n- Downstream analysis in R using a pre-designed markdown report file (*sc.report.Rmd*). Modify this file to fit your custom parameter and thresholds and render it to your final html report. The Rmd file uses, among others, the following tools and methods:\r\n  - QC: the [scater](http://bioconductor.org/packages/release/bioc/html/scater.html) package.\r\n  - Normalization: the [scran](http://bioconductor.org/packages/release/bioc/html/scran.html) package.\r\n  - Differential expression analysis: the [scde](http://bioconductor.org/packages/release/bioc/html/scde.html) package.\r\n  - Trajectory analysis (pseudotime): the [monocle](https://bioconductor.org/packages/release/bioc/html/monocle.html) package.\r\n\r\n### Pipeline parameter settings\r\n- essential.vars.groovy: essential parameter describing the experiment \r\n  - project folder name\r\n  - reference genome\r\n  - experiment design\r\n  - adapter sequence, etc.\r\n- additional (more specialized) parameter can be given in the var.groovy-files of the individual pipeline modules \r\n- targets.txt: comma-separated txt-file giving information about the analysed samples. The following columns are required \r\n  - sample: sample identifier. Must be a unique substring of the input sample file name (e.g. common prefixes and suffixes may be removed). These names are grebbed against the count file names to merge targets.txt to the count data.\r\n  - plate: plate ID (number) \r\n  - row: plate row (letter)\r\n  - col: late column (number)\r\n  - cells: 0c/1c/10c (control wells)\r\n  - group: default variable for cell grouping (e.g. by condition)\r\n  \r\n  for pool-based libraries like MARSseq required additionally:\r\n  - pool: the pool ID comprises all cells from 1 library pool (i.e. a set of unique cell barcodes; the cell barcodes are re-used in other pools). Must be a unique substring of the input sample file name. For pool-based design, the pool ID is grebbed against the respective count data filename instead of the sample name as stated above.\r\n  - barcode: cell barcodes used as cell identifier in the count files. After merging the count data with targets.txt, the barcodes are replaced with sample IDs given in the sample column (i.e. here, sample names need not be a substring of input sample file name).\r\n\r\n### Programs required\r\n- FastQC\r\n- STAR\r\n- Samtools\r\n- Bedtools\r\n- Subread\r\n- Picard\r\n- UCSC utilities\r\n- RSeQC\r\n- UMI-tools\r\n- R\r\n\r\n## Resources\r\n- QC: the [scater](http://bioconductor.org/packages/release/bioc/html/scater.html) package.\r\n- Normalization: the [scran](http://bioconductor.org/packages/release/bioc/html/scran.html) package.\r\n- Trajectory analysis (pseudotime): the [monocle](https://bioconductor.org/packages/release/bioc/html/monocle.html) package.\r\n- A [tutorial](https://scrnaseq-course.cog.sanger.ac.uk/website/index.html) from Hemberg lab\r\n- Luecken and Theis 2019 [Current best practices in single‐cell RNA‐seq analysis: a tutorial](https://www.embopress.org/doi/10.15252/msb.20188746)\r\n\r\n\r\n","organization":"IMBforge","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/61?version=1","name":"Version 1","author":["Sergi Sayols"],"descriptor_type":[]}]},{"id":"62","url":"https://workflowhub.eu/workflows/62","name":"scRNA-seq Smart-seq 2","description":"# scRNA-Seq pipelines\r\n\r\nHere we forge the tools to analyze single cell RNA-Seq experiments. The analysis workflow is based on the Bioconductor packages [*scater*](https://bioconductor.org/packages/devel/bioc/vignettes/scater/inst/doc/overview.html) and [*scran*](https://bioconductor.org/packages/devel/bioc/vignettes/scran/inst/doc/scran.html) as well as the Bioconductor workflows by Lun ATL, McCarthy DJ, \u0026 Marioni JC [*A step-by-step workflow for low-level analysis of single-cell RNA-seq data.*](http://doi.org/10.12688/f1000research.9501.1) F1000Res. 2016 Aug 31 [revised 2016 Oct 31];5:2122 and Amezquita RA, Lun ATL et al. [*Orchestrating Single-Cell Analysis with Bioconductor*](https://osca.bioconductor.org/index.html) Nat Methods. 2020 Feb;17(2):137-145.\r\n\r\n## Implemented protocols\r\n - MARS-Seq (massively parallel single-cell RNA-sequencing): The protocol is based on the publications of Jaitin DA, et al. (2014). *Massively parallel single-cell RNA-seq for marker-free decomposition of tissues into cell types.* Science (New York, N.Y.), 343(6172), 776–779. https://doi.org/10.1126/science.1247651 and Keren-Shaul H., et al. (2019). *MARS-seq2.0: an experimental and analytical pipeline for indexed sorting combined with single-cell RNA sequencing.* Nature Protocols. https://doi.org/10.1038/s41596-019-0164-4. The MARS-Seq library preparation protocol is given [here](https://github.com/imbforge/NGSpipe2go/blob/master/resources/MARS-Seq_protocol_Step-by-Step_MML.pdf). The sequencing reads are demultiplexed according to the respective pool barcodes before they are used as input for the analysis pipeline.  \r\n- Smart-seq2: Libraries are generated using the [Smart-seq2 kit](http://www.nature.com/nmeth/journal/v10/n11/full/nmeth.2639.html). \r\n\r\n## Pipeline Workflow\r\nAll analysis steps are illustrated in the pipeline [flowchart](https://www.draw.io/?lightbox=1\u0026highlight=0000ff\u0026edit=_blank\u0026layers=1\u0026nav=1\u0026title=scRNA-Seq#R7R3ZcpvK8mtUlTxIxSIh6dF2oiyVODm2Uzk5LykEI4kYAWGxrXz9nZ6FdUBIQoDjm%2BTeIwYYZqZ7eu%2BegXq1fXrn697ms2sie6BI5tNAfTNQFFmdTPB%2FoGVHW6bKjDasfctkDyUNt9YfxBol1hpZJgoyD4aua4eWl200XMdBRphp033ffcw%2BtnLt7Fc9fY0KDbeGbhdbv1tmuGGtsjZPbrxH1nrDPj1TpvTGUjfu174bOex7jusgemer827YHIONbrqPqSb17UC98l03pL%2B2T1fIhmXlK0bfW5TcjYfsIyes88J76eNqcz0L7YfNzfL6%2Fe3i55%2F3Qw6AB92O2FoMFM3GHV6a1gOsrm2tHXJD%2Bx3BUC99sgzxJf61Zv8lry39fAseEumLt5LFCHd87Tfh1sa%2FZHzP1pfIvoyX9Mq1XZ88pC7IH%2FxIEPrufQwkvIiXK9cJGUbJGoxbDzbIZD2SfuKrlWXbqU4R%2BRN3yu8QGKqXhru1DHwp4d9rWw8C9juGo0T6D%2FXQcmGFhnMpnl4aKgxQD8gP0VOqiUHpHXK3KPR3%2BBF2dzbR6CtsMw1nDAsfU6g5ZW2bFFaOJZVtCbYd1nHfCV7gHww1xGjyYFz%2Fc%2Fs9%2Bhi9%2BfPf3Ye7xY%2Fg4b%2Bh%2FKLx5K0Gf8vwZO3rpoWhfQz6NIAtykzKYEtMU1PYMpYE2KLxB0%2FBlj%2Ffwzd30r%2F%2FfVzcvPW3snn3bvdhOJELwEMmprfs0vXDjbt2Hd1%2Bm7SmIAALkzzzyXU9BpZfKAx3DIR6FLq4KYUU6MkK%2F4XXRxN29SN1580T65lc7PiFg%2Bebegkuf6TvJa%2BRq%2BQ98wLYDr40ALYAaWhcWDYfThEBMwhXCvnAjXwDVexFtsdD3V%2BjsOo59iAsfSUi%2BcjGNOwhywpFSEFexfPWd6kHPNdywiDV81doSPBzPJtk8VMZZ9lU7vnJbFb1PP5BR5CgZzyVWhj75e7L%2Fa%2FN7Ofvf%2B9CZfHz%2BrusLYez54KwRaw6PyKNZ00j0km8SN3DiRIgctYDlH8YEDBc4AdkzXsq50vwcK1exsJerpTBxeVWtxwAiuUh28LcADdeptkc%2FcY%2Bflh7JJXzCTzdybfpwAp8tMr0uglDkLYvADrKYm2FmMuOfNsbORg7lIW1Xa5cjCn45%2FW7W5iasnbxxRD%2Fb2m7S%2BDe6AH0ggWfN6YKi8C4ub4I0G%2F4udX9EP8c8fsjzL7dh1167JLBWGXSiFF%2BGPqRY%2Bgh3m2FeVb0Gq%2FrIlkAKfUvhocuEEVE61YCurzg0gU2VvbSDbwxYILGwV3e6UnQbh6sYvLQCJEZEmJshfoSv50iM1j%2FRivLsYhCokivlnDndaXA%2Fezmbbu6GRBeE3jIINwGa%2BEwUdC5XTOyAQ2f%2F%2FwQxnIntHRgh7pjxpMbwrStFRY4CdPXMU9F%2FlETbgAHn5ARhbDeKRwsGUm5MncGHUgTqEBc20mrQPK4ARVILDMJhJTcEmBF0IOfWFH0sMoIk6cCI5fuZrwhtjyNhctVKSXVXkO5sGSKYMV422E6Q0HIHypZpWCa7cBdrQJgQjkgHCboiw0Zky4lff77R0bq36eaDtKKaaKnlqimTWoIslJX12xJQ%2FgWIP%2FL8heYgDGpBCtQPQ1g6T4BRbOcNaVpS9c3kT%2FEzSAaEEmBIT6heBLG8kl8J23jvSCihIrG8Dd%2BwtNNM%2B5b6Z8kRuEXjMKnjEHucOkr29FJEpeAI3GBpgi%2FcuDkmRXaZmeIUQdL8gaBTqpDqfBUDMPUU0o5IMOla%2B4KjQURKzRjPUXfejYaVAljycPF13mLxRsMSu14t5KDRYFUX1at%2FqGxOOaKWSx133DN5qcR9yu98tHvyPIB%2BzBwcf%2FS54ub2%2BEtIHL57GoO5OD5eq5rNz5Z2qn04U3ldF93Ml9MnxuELuuOTdWJtkvkdzIvsNU2PivSqfTKxry%2Fo2kZTWInnxbuNNo6HQOMEO7m5iZhfWkhk%2F%2BDn9IrA%2FMNn2zDR%2FhSJ3MEocJrbo6sOwn8EWtni5ywi0mNRqPmpqQ7IDW64QZ0XMnUQ%2F1cU4JGEUsn0hMRS9KabLWOoaSEX6aUpPQNUMksQ7cvmMc0BCXikvtPbbSCz7j4qZVNHEwr4lNKqxOPGytEt1iqgR4ffd2Lx3WaFq1kdDJ5pglUQpFOKM8PFuTxZUqWP0CBUzp11Tw3BY6D9NkocJ5Q%2FN%2Fi4VsOlfAlL9HZaPuQ7CC4p6XuYaQPh2xXXRAZ36GWsrJ4BN7wNTGnigxsy1pUx8u3bQpPHTVVSh7g5kysZl6mNK9ulM3Ycjl60P2gEaN%2FWZcnKaAFEJ0HIHkhy3cfLKLopC28IkSrZG%2BXsUH4Gr%2BYWRZmoyj9Ln0N5iq0JEuvXC8cHS4PpZZzL4dUqjnkqbyQKfyfyOuNxdlkueNYntbjjvIRQVnHc0eRc7xVuzP%2FYG8Nz%2BMsGM9geBZGmMjqy5Nbmgp4qh3xpPVEiollhIUehP9cEZJcR4D4HWHSF%2B6IDs7VU3cFA9YfybCJbxAGi%2Fnu%2FYB50nL8fBMtR3hvV%2FByxse3eHBA8Rex2xQeA88dvvHbGNHmmNvupeoUTqVUXUR3cpQ%2BS7hVrYS%2Bk0hO1rGURfgGCP1EygWgjYuUXtYEJEo73JlYh9ALQyy5ylVCTpC9dB9boCRKNSk5H0kY1yQJE6lpknBkEGTWQT3WpBxWnBbUKMSReDVfNo40DPpiwKqcg62a82ZTJGVvJUA%2FFYfU6bRRHBKKLT1yl%2B%2FBozKxRe1MbKlLo8Z9CdTO0ygljV%2F78XE8OT8%2Bys8mtaDRSO3aqNSWBFyAlqb52q%2FVxtve6xvVfHKt%2B2t1qPWHehzKhFoAljrvxb6fqrl9n%2BUr%2B59vOEHj9uHi8Z186%2F78cjn%2F8k19%2BrrZ3g97pD3%2FvXxo2gt8PJUPTWYn8aHauv3t3cVNbc3%2BBqvuxGjseRBkdE4dPsm3wBA%2FXIkfpyBzTiW%2B4QjguZxFmo6VdvHaMmk6ZZ0lfhMa3CR9TfwpOVoHvqzUQjNXVtYQAguZdy1vLdMkpDAHk6Tlhq2EOijJbiUJzsQBTskU2714WJNL%2FA%2Bv3hUw1wke5hW%2BlpNr%2FA8e98Mr18Fd6xYBLsII%2B4iCcFBMyG0ADdQZfDxrvSnggdBMr44PxoN6MtALlVi1ukyncQPNacmFWmGL1ncPkkQJ5iSUjnBFf4lCLwK2A%2FUrYINfFJgOvvnbqGI53fmeY7%2BiG0%2FDIhyJxTUm467jPzw9YCZNXnj%2B%2F0LfWjbg%2B3tkPyDoddBGZM2Uc6MKk7KQKh3BnWri%2BbRneC4VUN1HQWSHQT%2BR3cAEOo3i4DS3gELbOwJWDFVFirYWVLGh6WpE%2FjPR0Iw8G%2BMdPF2dmtdqfMKL35Iaz3XZuyUPD3aruSVnvd%2BStrvu6X7EIxvyvWi4to0l81RIPdIN2I%2BcQ%2FULwc%2BAzBO1Rf4ilnqVLqXerkx%2FXJjdK%2FXOO7PTVo67N6SnKPX6CFTJnlKfVsMumUixiFdkEWwsZ%2FczMHxHD9Dvn%2FQGVu5%2FhiTRaBEYI9o2utmap0Vi5rsqDbDMByv2eXHII%2FjGaINsD%2FnB6ObkRRJ2WViYfsWqfsXjxUQv3mywJX0wQqYrTBAmO0qNOI4O%2Feya1gpoFsEM8mAiChtREEJXmr4FRuosAyrHJZGnwLlpoYMQ40ewwcQgoLROkRyETJRGN1F2Yy%2BYeVvS6nxWj8HLSgMcXlzk7fREjF%2FR1uPPM3NjazGOcf029uGkeNtZnTFn8vUXnXS84h93zk%2FqBaU06B%2BxnBXy0ROmfhbJRavrKjGBGmwptQG7MaEJPFadRkeSmEjbWvo6Q%2BSz%2B1JykznYrTJuKzbyvLGQqtKtW%2BX%2B92P4aG23MlI%2Baab0dbwcXg6nz0XVODayOkN7yvSVHEURAL5xIqNpWa5UiHyj5LBAZIrUapyteKtMzxNCN82hMx9w6bjyocCnhdzVJp1BtASDoYJl1nDnoeAo2klMlKQ2E0nrwaQMZKudh9qhmPk5HE4yJ38FyRxLPQwf597O%2Fjslj63Q26hZp7Yzs%2FGQ45OMcMqzib%2FrQ650bSirPYmT4gSDG31Z2fjSOKnc89q4VWb2k7rMDgyYIi%2BRiCmw3mMNIGXVT9xrZ%2BRn6fq1dCKHMzLtWTKyfEjVRCnWg2iTkYnjwV9gOYimTBHTmvRuJveC3k3lrEljMt8jvOeeH8%2FboXdLfXsFxkRy5ExNamdgwkJqLy2t9Xdr3UaSJx6mwYZ5OEWbPkuKlhfNJ1q3ormQolUndv5fMhceTZCmYcJVbUswr00lLkzz2%2BcPl7Ts4R3esBLkiv%2FOBl6IiEa6Lr1uEh8J7mjAnSdJIUXmu7kh6wMfIGndBLN1k5gEiBn1Rq6MAcqWwW9H0MLTwrIdm0noknEfTqNm7dOo3LlBqwn8HRSPCML3luTvIB91je9o5A%2FsY%2Fh%2BCNtm0lQNk5xIpwhKfHVuzv3rJbr%2BmnOztVEm8tHmXDWLZi2Zc%2FmA%2B2bOXSMHXbrmDouFSm2R0OcKMBMlMbl%2BIFQbeoMPImcNrvU2bLnwSSiOiAejHEyIJ9KgdUJ8BmFxPOtWWBRb%2BP5631ej0mLtzNx%2BWvg0tZ8WPiysHWbcw9hHKtXS0DAeS8%2FKGxUE2cCzrTA8d9pkSv7k8zlc5uwgeTInJVaJmmXiabkIegaZU5O6lTkrTVy9CYAtxt6Hvm7c9zT%2BlVqtYMpskMxC%2F2AFUNEsoEfk9j7VpAFszwfqjQUmprNF4gstHz2qnNB%2FCWFWW0JoWkA4KX6fD%2Fso8nX2gso3CGLnHAN4%2BTvkuJlzSiqkhO7pGpVPSKEHDAG8JZ6I%2B3GdmUQ%2B0n2F9DDyc6fxkW%2Bsw1XbMd2FKO2kDjAL0LZdmowYkCkCTlcVWx4cWQ64weq%2BTcgkSoZIT%2BW60dRSA1RavIOLpSEKC3fewr0xDXm5hXvFKnaPHDIxzzzIISO3yEDngiAacYJgvzLg5uV1H3p%2FplwSaRMnZxFMqlYVXtr5a3RJssUpBuT8LbxT2flb1GOWy3HjKUz3mJXDWUj8GvN2Urs%2FrlNA%2FWk6QTZRJWdq19DxW7vACsg6Iy84%2FbScc53k8%2F%2BBnX5Q0HM492fOSxrE4lCxZpI8E7D22fR0aix0AT7%2F7PH97rs6x6%2Fy02rrqaNd8c54mJ1Yzz44dbLH9cfc4WE9sp7xuIy4lswfy%2FOQ%2BbpqZK3oio6%2BpaG3mIc9gE5IjpWnpj1PZ4dWIibC1M3NbVPXdV1bHt3IIxpBsv7T51EqR4zyWZtKc0q4ILq4ZR2c1%2BPtsRMA6odmEuF7RMaW%2BjYhYi%2FC1J9NWlTnNUtNnq%2FoTqdZXU3Y%2BqdtGvu5Y7KDGpWNhANMp4eFA0xnsxyKnSnhB2qf3LACLHVDAhi0QNag1mkiZWDKc2%2B6j84gqUkCE2oh14fVb4E5HBwGME%2Bjx%2FOJeMp76qfjolm8TU%2F9e%2BnjanM9C%2B2Hzc3y%2Bv3t4uef98NyR30BuYAPCPlc3mY1FrGzT2gNom2CvLS7AjtblrIzVrU5BSRhheV0VAZrKhR6Li3sLMKKLEVvhNNlCYlAVBvLFab%2FUxidEAmKpxwuYX8PUodbxgU5oa8VFUxsolTV8KoctxubWOlZdgvynZXegWPBSvPakY2v9CmOZS0vEqcsvnyFk0ay1iJtKVMxKlWJKgFY56LvkfLuIba%2FMyCXzGsE7cMudrBP89jVptOzEr1frs9TuCzP0BnGbO8L6tap2I4vze9VuRgv1ovT24F16F5qgMXMtWzmz0SQM8EjJDMcRjkTh%2BmyzL1FPRMLN2fa62Wg2wsw2OWRUyz%2FiLBTOxN2HlrxHSTfIR8ZQVHKkyp8DHV6EXfyIcSANNIR54k3grwWBSQfjaF5sUhrAZfLhIfCgDTxiN7QxEli%2F0E0XtGEoxSAd4sCLkh8RTXrL4yxEt9P1%2BjZnhBvnP3K%2FMb1rT94fnr8%2FVjGUJvZJAUlYVzcJMpsXtwkZ7Nq%2F%2FX5wEWbdrow7rratpC2cv%2BY2x%2FvrA9vl7e3T%2B7O3X388eXbsK6Rm5sQuq7ykjtdcpo9VXbv87ymx7mN3OyQCWKQ%2BATHYtQu9eJ6OwKP4B4%2BQU7UoPUZYjmBNXJx4YwW760VgA6iO8iN4JrNiljTYru3IrR8lyPbszN851N9px2XbBSegNrxMXIHEUJ88ZVUSibW64aPRT2eFAqSe6pIZvuhyULAF21izxPwchbwyplcvXUhX%2BkSbh%2F0wjjIjqu0ngv0x0L%2B0KDKuqggXPu2QiqrBlnLMB759u4S8m5hOvu4ZYId9Cqkqbnqm%2BH8wD1Xn72Op2ruUNaxoO6Qohb5qzptwPMgrhspdbu1zn4qa1UVtP26QGd1jIXD7lMd4x6f%2Fn4ayM9zMMnBMU6T3LklMyWNPnufV7STDn%2BvZ5GY9Acdz3K2XV1EElNWpReIlLcLqPuqxeafnyinIFJtO8JVFOqm7oW17Qe8JV2Y0Udb9wGsoKQrYpVluWyspk7ccRflHA02xao4unKUe3bWhHwYndrHKrN8l%2FY41j3EysS2r8Hu8eCklxf2nq1woyoyl%2B67C3wXBazkVjm1CoQeX8ZBJ6nCVwvyR8g508ut5eWzrFqVq7P1VoO%2FgxJnUb5gF2vGVM0yGBtfswMoCUvf6CbBAimrvzUU6jmf5tigKAJxKnD%2FqHzJGwcuZ8yl0UgHxBKStyI732LHns1%2F8m5EcoCS7dL8bohDxzwtQo5BE%2BNYkU6bnj5asfvtQiZX0nIHtIT0d0Xoj%2BfRzmm%2BePbIeILT%2BFcQZl2zVf0X5pSfkkGhwGbBvvvPFSVrmIKT4Rw5uWvX3%2BLP%2F9Ez2YPx2X0SCaGq19UHE3IQV8lKsIR681cUhOSIQZqCXzhjFE97BRuULuuD7lsQSUNSLS3HsEfJEhg7g0SCeRs9QK9rjusNySyV0BNeVJ%2BPjRUJ%2BHp1QcEaDm%2Bv38Zj%2Fvb54msChVpfubKjgMOC%2Bossuh67ZPhBtBx6rhfZvKLPIeu6o8zvPi4ym9Rx46tDh1Cz2wvHAfKE9g7vYJx6Y61IRSkoUkQX3kdBwHEiLq9AyjFAkMKQFGIi3JqNBAi%2Fd%2BgAcGORdOTDCnJc56%2FJAZhP8vpRkTEImb7aRBaA0ELbIzX8SKtQI2e%2FdFApfDzOWl20ec5OW79SeNYw3FalcD7gvlUK%2FxzZoYU5b%2B2suWiLSTZhoTHDRu0diFUzSW6vf6VV5b5A82rtmLJqtjOBfNz1CQpyp7ECXdb8PNRDKFw%2BRtt6U3SlapD9rPj5NZ2l5aysdeQXJNIeGXNo3U7%2BBZDlNz5aZT5QIKGYio982xs5kIFTSkaHCSk1qXa1oAsC7fDYiF4mVTZTySFM608aMdYPQz9yIOTVLE5D3CFfW73Oep9%2FdbMG6HZXHKsJ%2BhY0v0aWW9Bb1VqLJp4vHdv2gpDjMxtZjFxPPUC6OAK%2BpIHqhMC5Xf%2FedokdZ4u1xzXRf1PjFemB5d0e0YCVV8KSggO%2B%2B6q5AeKF0LHYCQyHjYCD3I78TFYbCJgklxor9oMk4F8K9n8EUBQr6sjAYoOftdW5BmjxKQDWmtfr2JYiuptUOY4TwuPtGr%2BIBcGAFT6mDelSyLSWY2JMoAiOf3x2A2poChHYG%2B4RAvxNFjFA5KyKpK5jyUxa3QZnTEw2fde746ISDK48Nf40yVvJCN4TVZSKK6oXNW0ge0csZosM07mVPCl9ec96lcuFpYv43NOX8SVmMWH6cYxgm8%2BuCSr12%2F8B). Specify desired analysis details for your data in the respective *essential.vars.groovy* file (see below) and run the selected pipeline *marsseq.pipeline.groovy* or *smartsseq.pipeline.groovy* as described [here](https://gitlab.rlp.net/imbforge/NGSpipe2go/-/blob/master/README.md). The analysis allows further parameter fine-tuning subsequent the initial analysis e.g. for plotting and QC thresholding. Therefore, a customisable *sc.report.Rmd* file will be generated in the output reports folder after running the pipeline. Go through the steps and modify the default settings where appropriate. Subsequently, the *sc.report.Rmd* file can be converted to a final html report using the *knitr* R-package.\r\n\r\n### The pipelines includes:\r\n- FastQC, MultiQC and other tools for rawdata quality control\r\n- Adapter trimming with Cutadapt\r\n- Mapping to the genome using STAR\r\n- generation of bigWig tracks for visualisation of alignment\r\n- Quantification with featureCounts (Subread) and UMI-tools (if UMIs are used for deduplication)\r\n- Downstream analysis in R using a pre-designed markdown report file (*sc.report.Rmd*). Modify this file to fit your custom parameter and thresholds and render it to your final html report. The Rmd file uses, among others, the following tools and methods:\r\n  - QC: the [scater](http://bioconductor.org/packages/release/bioc/html/scater.html) package.\r\n  - Normalization: the [scran](http://bioconductor.org/packages/release/bioc/html/scran.html) package.\r\n  - Differential expression analysis: the [scde](http://bioconductor.org/packages/release/bioc/html/scde.html) package.\r\n  - Trajectory analysis (pseudotime): the [monocle](https://bioconductor.org/packages/release/bioc/html/monocle.html) package.\r\n\r\n### Pipeline parameter settings\r\n- essential.vars.groovy: essential parameter describing the experiment \r\n  - project folder name\r\n  - reference genome\r\n  - experiment design\r\n  - adapter sequence, etc.\r\n- additional (more specialized) parameter can be given in the var.groovy-files of the individual pipeline modules \r\n- targets.txt: comma-separated txt-file giving information about the analysed samples. The following columns are required \r\n  - sample: sample identifier. Must be a unique substring of the input sample file name (e.g. common prefixes and suffixes may be removed). These names are grebbed against the count file names to merge targets.txt to the count data.\r\n  - plate: plate ID (number) \r\n  - row: plate row (letter)\r\n  - col: late column (number)\r\n  - cells: 0c/1c/10c (control wells)\r\n  - group: default variable for cell grouping (e.g. by condition)\r\n  \r\n  for pool-based libraries like MARSseq required additionally:\r\n  - pool: the pool ID comprises all cells from 1 library pool (i.e. a set of unique cell barcodes; the cell barcodes are re-used in other pools). Must be a unique substring of the input sample file name. For pool-based design, the pool ID is grebbed against the respective count data filename instead of the sample name as stated above.\r\n  - barcode: cell barcodes used as cell identifier in the count files. After merging the count data with targets.txt, the barcodes are replaced with sample IDs given in the sample column (i.e. here, sample names need not be a substring of input sample file name).\r\n\r\n### Programs required\r\n- FastQC\r\n- STAR\r\n- Samtools\r\n- Bedtools\r\n- Subread\r\n- Picard\r\n- UCSC utilities\r\n- RSeQC\r\n- UMI-tools\r\n- R\r\n\r\n## Resources\r\n- QC: the [scater](http://bioconductor.org/packages/release/bioc/html/scater.html) package.\r\n- Normalization: the [scran](http://bioconductor.org/packages/release/bioc/html/scran.html) package.\r\n- Trajectory analysis (pseudotime): the [monocle](https://bioconductor.org/packages/release/bioc/html/monocle.html) package.\r\n- A [tutorial](https://scrnaseq-course.cog.sanger.ac.uk/website/index.html) from Hemberg lab\r\n- Luecken and Theis 2019 [Current best practices in single‐cell RNA‐seq analysis: a tutorial](https://www.embopress.org/doi/10.15252/msb.20188746)\r\n\r\n\r\n","organization":"IMBforge","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/62?version=1","name":"Version 1","author":["Sergi Sayols"],"descriptor_type":[]}]},{"id":"63","url":"https://workflowhub.eu/workflows/63","name":"COVID-19 PubSeq Pangenome Generate","description":"","organization":"COVID-19 PubSeq: Public SARS-CoV-2 Sequence Resource","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/63?version=1","name":"Version 1","author":["Tazro Inutano","Michael R. Crusoe","Stian Soiland-Reyes"],"descriptor_type":["CWL"]}]},{"id":"64","url":"https://workflowhub.eu/workflows/64","name":"Metagenomic Binning from Assembly","description":"  **Workflow for Metagenomics binning from assembly.\u003cbr\u003e**\r\n\r\n  Minimal inputs are: Identifier, assembly (fasta) and an associated sorted BAM file\r\n\r\n  Summary\u003cbr\u003e\r\n    - MetaBAT2 (binning)\u003cbr\u003e\r\n    - MaxBin2 (binning)\u003cbr\u003e\r\n    - SemiBin2 (binning)\u003cbr\u003e\r\n    - Binette (bin merging)\u003cbr\u003e\r\n    - EukRep (eukaryotic classification)\u003cbr\u003e\r\n    - CheckM2 (bin completeness and contamination)\u003cbr\u003e\r\n    - BUSCO (bin completeness)\u003cbr\u003e\r\n    - GTDB-Tk (bin taxonomic classification)\u003cbr\u003e\r\n    - CoverM (bin abundances)\u003cbr\u003e\r\n    \r\nIncluding:\u003cbr\u003e\r\n   **Bin annotation (workflow: https://workflowhub.eu/workflows/1170):**\u003cbr\u003e\r\n        - Bakta\u003cbr\u003e\r\n        - Interproscan\u003cbr\u003e\r\n        - Eggnog\u003cbr\u003e\r\n        - KOfamscan\u003cbr\u003e\r\n        - To RDF conversion with SAPP (optional, default on) --\u003e https://workflowhub.eu/workflows/1174/\u003cbr\u003e\r\n\r\n  Other UNLOCK workflows on WorkflowHub: https://workflowhub.eu/projects/16/workflows?view=default\u003cbr\u003e\r\n  \r\n  **All tool CWL files and other workflows can be found here:**\u003cbr\u003e\r\n    https://gitlab.com/m-unlock/cwl\u003cbr\u003e\r\n\r\n  **How to setup and use an UNLOCK workflow:**\u003cbr\u003e\r\n  https://docs.m-unlock.nl/docs/workflows/setup.html\u003cbr\u003e\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/64?version=1","name":"Version 1","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/64?version=2","name":"Version 2","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/64?version=3","name":"Version 3","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"4","url":"https://workflowhub.eu/workflows/64?version=4","name":"Version 4","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"5","url":"https://workflowhub.eu/workflows/64?version=5","name":"Version 5","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"6","url":"https://workflowhub.eu/workflows/64?version=6","name":"Version 6","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"7","url":"https://workflowhub.eu/workflows/64?version=7","name":"Version 7","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"8","url":"https://workflowhub.eu/workflows/64?version=8","name":"Version 8","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"9","url":"https://workflowhub.eu/workflows/64?version=9","name":"Version 9","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"10","url":"https://workflowhub.eu/workflows/64?version=10","name":"Version 10","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"11","url":"https://workflowhub.eu/workflows/64?version=11","name":"Version 11","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]},{"id":"12","url":"https://workflowhub.eu/workflows/64?version=12","name":"Version 12","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]}]},{"id":"65","url":"https://workflowhub.eu/workflows/65","name":"CLM-FATES_ALP1_simulation_5years","description":"Abstract CWL Automatically generated from the Galaxy workflow file: CLM-FATES_ ALP1 simulation (5 years)","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/65?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"66","url":"https://workflowhub.eu/workflows/66","name":"RNA-RNA interactome analysis using BWA-MEM","description":"RNA-RNA interactome analysis using ChiRA tools suite. The aligner used is BWA-MEM.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/66?version=1","name":"Version 1","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]}]},{"id":"67","url":"https://workflowhub.eu/workflows/67","name":"RNA-RNA interactome analysis using CLAN","description":"RNA-RNA interactome analysis using ChiRA tools suite. The aligner used is CLAN.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/67?version=1","name":"Version 1","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]}]},{"id":"68","url":"https://workflowhub.eu/workflows/68","name":"MC_COVID19like_Assembly_Reads","description":"This WF is based on the official Covid19-Galaxy assembly workflow as available from https://covid19.galaxyproject.org/genomics/2-assembly/ . It has been adapted to suit the needs of the analysis of metagenomics sequencing data. Prior to be submitted to INDSC databases, these data need to be cleaned from contaminant reads, including reads of possible human origin. \r\n\r\nThe assembly of the SARS-CoV-2 genome is performed using both the Unicycler and the SPAdes assemblers, similar to the original WV.\r\n\r\nTo facilitate the deposition of raw sequencing reads in INDSC databases, different fastq files are saved during the different steps of the WV. Which reflect different levels of stringency/filtration:\r\n\r\n(1) Initially fastq are filtered to remove human reads. \r\n(2) Subsequently, a similarity search is performed against the reference assembly of the SARS-CoV-2 genome, to retain only SARS-CoV-2 like reads. \r\n(3) Finally, SARS-CoV-2 reads are assembled, and the bowtie2 program is used to identify (and save in the corresponding fastq files) only reads that are completely identical to the final assembly of the genome.\r\n\r\nAny of the fastq files produced in (1), (2) or (3) are suitable for being submitted in  raw reads repositories. While the files filtered according to (1) are richer and contain more data, including for example genomic sequences of different microbes living in the oral cavity; files filtered according to (3) contain only the reads that are completely identical to the final assembly. This should guarantee that any re-analysis/re-assembly of these always produce consistent and identical results. File obtained at (2) include all the reads in the sequencing reaction that had some degree of similarity with the reference SARS-CoV-2 genome, these may include subgenomic RNAs, but also polymorphic regions/variants in the case of a coinfection by multiple SARS-CoV-2 strains. Consequently, reanalysis of these data is not guarateed to produce identical and consistent results, depending on the parameters used during the assembly. However, these data contain more information.\r\n\r\nPlease feel free to comment,  ask questions and/or add suggestions\r\n\r\n","organization":"Italy-Covid-data-Portal","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/68?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"69","url":"https://workflowhub.eu/workflows/69","name":"Scipion Tutorial example reaching 2.9A resolution","description":"Scipion is a workflow engine mostly for Cryo-Electron Microscopy image processing. In this extremely simple workflow, we load the Relion 3.0 tutorial data and process it to 2.9A resolution.","organization":"UX trial team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/69?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"73","url":"https://workflowhub.eu/workflows/73","name":"Scipion workflow for Cryo electron microscopy of SARS-CoV-2 stabilized spike in prefusion state","description":"Continuous flexibility analysis of SARS-CoV-2 Spike prefusion structures","organization":"UX trial team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/73?version=1","name":"Version 1","author":["Carlos Oscar Sorzano Sanchez"],"descriptor_type":[]}]},{"id":"75","url":"https://workflowhub.eu/workflows/75","name":"Indices builder from GBOL RDF (TTL)","description":"Workflow to build different indices for different tools from a genome and transcriptome. \r\n\r\nThis workflow expects an (annotated) genome in GBOL ttl format.\r\n\r\nSteps:\r\n  - SAPP: rdf2gtf (genome fasta)\r\n  - SAPP: rdf2fasta (transcripts fasta)\r\n  - STAR index (Optional for Eukaryotic origin)\r\n  - bowtie2 index\r\n  - kallisto index\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/75?version=1","name":"Version 1","author":["Bart Nijsse"],"descriptor_type":["CWL"]}]},{"id":"77","url":"https://workflowhub.eu/workflows/77","name":"NonSpliced RNAseq workflow","description":"Workflow for NonSpliced RNAseq data with multiple aligners.\r\n\r\nSteps:  \r\n    - workflow_quality.cwl:\r\n        - FastQC (control)\r\n        - fastp (trimming)\r\n    - bowtie2 (read mapping)\r\n    - sam_to_sorted-bam\r\n    - featurecounts (transcript read counts)\r\n    - kallisto (transcript [pseudo]counts)\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/77?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst"],"descriptor_type":["CWL"]}]},{"id":"95","url":"https://workflowhub.eu/workflows/95","name":"Spliced RNAseq workflow","description":"Workflow for Spliced RNAseq data\r\n**Steps:**\r\n\r\n* workflow_quality.cwl:\r\n\t* FastQC (Read Quality Control)\r\n\t* fastp (Read Trimming)\r\n* STAR (Read mapping)\r\n* featurecounts (transcript read counts)\r\n* kallisto (transcript [pseudo]counts)\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/95?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst"],"descriptor_type":["CWL"]}]},{"id":"98","url":"https://workflowhub.eu/workflows/98","name":"Example of setting up a simulation system","description":"CWL version of the md_list.cwl workflow for HPC.\r\n","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/98?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/98?version=2","name":"Version 2","author":[],"descriptor_type":["CWL"]}]},{"id":"99","url":"https://workflowhub.eu/workflows/99","name":"COVID-19: read pre-processing","description":"Galaxy version of pre-processing of reads from COVID-19 samples. \r\nQC + human read cleaning\r\nBased on https://github.com/Finn-Lab/Metagen-FastQC/blob/master/metagen-fastqc.sh","organization":"IBISBA Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/99?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"100","url":"https://workflowhub.eu/workflows/100","name":"0: View complete virus identification","description":"Non-functional workflow to get a global view of possibilities for plant virus classification.","organization":"Integrated and Urban Plant Pathology Laboratory","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/100?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"101","url":"https://workflowhub.eu/workflows/101","name":"1: Plant virus detection with kraken2 (PE)","description":"Metagenomic dataset taxonomic classification using kraken2","organization":"Integrated and Urban Plant Pathology Laboratory","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/101?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"102","url":"https://workflowhub.eu/workflows/102","name":"2: Plant virus confirmation","description":"Mapping against all plant virus then make contig out of the mapped reads then blast them.","organization":"Integrated and Urban Plant Pathology Laboratory","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/102?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"103","url":"https://workflowhub.eu/workflows/103","name":"3: Plant virus exploration","description":"Just the cleaning then assembly of all reads. TO explore further follow one of the paths described in \"Global view\" (WF 0) ","organization":"Integrated and Urban Plant Pathology Laboratory","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/103?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"104","url":"https://workflowhub.eu/workflows/104","name":"ENA SARS-CoV-2 Nanopore Amplicon Sequencing Analysis Workflow","description":"A workflow for mapping and consensus generation of SARS-CoV2 whole genome amplicon nanopore data implemented in the Nextflow framework. Reads are mapped to a reference genome using Minimap2 after trimming the amplicon primers with a fixed length at both ends of the amplicons using Cutadapt. The consensus is called using Pysam based on a majority read support threshold per position of the Minimap2 alignment and positions with less than 30x coverage are masked using ‘N’.","organization":"SARS-CoV-2 Data Hubs","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/104?version=1","name":"Version 1","author":[],"descriptor_type":["NFL"]}]},{"id":"105","url":"https://workflowhub.eu/workflows/105","name":"ENA SARS-CoV2 Variant Calling","description":"A pipeline for mapping, calling, and annotation of SARS-CoV2 variants.","organization":"SARS-CoV-2 Data Hubs","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/105?version=1","name":"Version 1","author":[],"descriptor_type":["NFL"]}]},{"id":"106","url":"https://workflowhub.eu/workflows/106","name":"VariantCaller_GATK3.6","description":"Rare disease researchers workflow is that they submit their raw data (fastq), run the mapping and variant calling RD-Connect pipeline and obtain unannotated gvcf files to further submit to the RD-Connect GPAP or analyse on their own.\r\n\r\nThis demonstrator focuses on the variant calling pipeline. The raw genomic data is processed using the RD-Connect pipeline ([Laurie et al., 2016](https://www.ncbi.nlm.nih.gov/pubmed/27604516)) running on the standards (GA4GH) compliant, interoperable container orchestration platform.\r\n\r\nThis demonstrator will be aligned with the current implementation study on [Development of Architecture for Software Containers at ELIXIR and its use by EXCELERATE use-case communities](docs/Appendix%201%20-%20Project%20Plan%202018-biocontainers%2020171117.pdf) \r\n\r\nFor this implementation, different steps are required:\r\n\r\n1. Adapt the pipeline to CWL and dockerise elements \r\n2. Align with IS efforts on software containers to package the different components (Nextflow) \r\n3. Submit trio of Illumina NA12878 Platinum Genome or Exome to the GA4GH platform cloud (by Aspera or ftp server)\r\n4. Run the RD-Connect pipeline on the container platform\r\n5. Return corresponding gvcf files\r\n6. OPTIONAL: annotate and update to RD-Connect playground instance\r\n\r\nN.B: The demonstrator might have some manual steps, which will not be in production. \r\n\r\n## RD-Connect pipeline\r\n\r\nDetailed information about the RD-Connect pipeline can be found in [Laurie et al., 2016](https://www.ncbi.nlm.nih.gov/pubmed/?term=27604516)\r\n\r\n![alt text](https://raw.githubusercontent.com/inab/Wetlab2Variations/eosc-life/docs/RD-Connect_pipeline.jpg)\r\n\r\n## The applications\r\n\r\n**1\\. Name of the application: Adaptor removal**\r\nFunction: remove sequencing adaptors   \r\nContainer (readiness status, location, version): [cutadapt (v.1.18)](https://hub.docker.com/r/cnag/cutadapt)  \r\nRequired resources in cores and RAM: current container size 169MB  \r\nInput data (amount, format, directory..): raw fastq  \r\nOutput data: paired fastq without adaptors  \r\n\r\n**2\\. Name of the application: Mapping and bam sorting**\r\nFunction: align data to reference genome  \r\nContainer : [bwa-mem (v.0.7.17)](https://hub.docker.com/r/cnag/bwa) / [Sambamba (v. 0.6.8 )](https://hub.docker.com/r/cnag/sambamba)(or samtools)  \r\nResources :current container size 111MB / 32MB  \r\nInput data: paired fastq without adaptors  \r\nOutput data: sorted bam  \r\n\r\n**3\\. Name of the application: MarkDuplicates**  \r\nFunction: Mark (and remove) duplicates  \r\nContainer: [Picard (v.2.18.25)](https://hub.docker.com/r/cnag/picard)\r\nResources: current container size 261MB  \r\nInput data:sorted bam  \r\nOutput data: Sorted bam with marked (or removed) duplicates  \r\n\r\n**4\\. Name of the application: Base quality recalibration (BQSR)**  \r\nFunction: Base quality recalibration  \r\nContainer: [GATK (v.3.6-0)](https://hub.docker.com/r/cnag/gatk)\r\nResources: current container size 270MB  \r\nInput data: Sorted bam with marked (or removed) duplicates  \r\nOutput data: Sorted bam with marked duplicates \u0026 base quality recalculated  \r\n\r\n**5\\. Name of the application: Variant calling**  \r\nFunction: variant calling  \r\nContainer: [GATK (v.3.6-0)](https://hub.docker.com/r/cnag/gatk)\r\nResources: current container size 270MB  \r\nInput data:Sorted bam with marked duplicates \u0026 base quality recalculated  \r\nOutput data: unannotated gvcf per sample  \r\n\r\n**6\\. (OPTIONAL)Name of the application: Quality of the fastq**  \r\nFunction: report on the sequencing quality  \r\nContainer: [fastqc 0.11.8](https://hub.docker.com/r/cnag/fastqc)\r\nResources: current container size 173MB  \r\nInput data: raw fastq  \r\nOutput data: QC report \r\n\r\n## Licensing\r\n\r\nGATK declares that archived packages are made available for free to academic researchers under a limited license for non-commercial use. If you need to use one of these packages for commercial use. https://software.broadinstitute.org/gatk/download/archive ","organization":"EOSC-Life - Demonstrator 7: Rare Diseases","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/106?version=1","name":"Version 1","author":["José Mª Fernández"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/106?version=2","name":"Version 2","author":["José Mª Fernández"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/106?version=3","name":"Version 3","author":["José Mª Fernández"],"descriptor_type":["NFL"]}]},{"id":"107","url":"https://workflowhub.eu/workflows/107","name":"VariantCaller_GATK3.6","description":"Rare disease researchers workflow is that they submit their raw data (fastq), run the mapping and variant calling RD-Connect pipeline and obtain unannotated gvcf files to further submit to the RD-Connect GPAP or analyse on their own.\r\n\r\nThis demonstrator focuses on the variant calling pipeline. The raw genomic data is processed using the RD-Connect pipeline ([Laurie et al., 2016](https://www.ncbi.nlm.nih.gov/pubmed/27604516)) running on the standards (GA4GH) compliant, interoperable container orchestration platform.\r\n\r\nThis demonstrator will be aligned with the current implementation study on [Development of Architecture for Software Containers at ELIXIR and its use by EXCELERATE use-case communities](docs/Appendix%201%20-%20Project%20Plan%202018-biocontainers%2020171117.pdf) \r\n\r\nFor this implementation, different steps are required:\r\n\r\n1. Adapt the pipeline to CWL and dockerise elements \r\n2. Align with IS efforts on software containers to package the different components (Nextflow) \r\n3. Submit trio of Illumina NA12878 Platinum Genome or Exome to the GA4GH platform cloud (by Aspera or ftp server)\r\n4. Run the RD-Connect pipeline on the container platform\r\n5. Return corresponding gvcf files\r\n6. OPTIONAL: annotate and update to RD-Connect playground instance\r\n\r\nN.B: The demonstrator might have some manual steps, which will not be in production. \r\n\r\n## RD-Connect pipeline\r\n\r\nDetailed information about the RD-Connect pipeline can be found in [Laurie et al., 2016](https://www.ncbi.nlm.nih.gov/pubmed/?term=27604516)\r\n\r\n![alt text](https://raw.githubusercontent.com/inab/Wetlab2Variations/eosc-life/docs/RD-Connect_pipeline.jpg)\r\n\r\n## The applications\r\n\r\n**1\\. Name of the application: Adaptor removal**\r\nFunction: remove sequencing adaptors   \r\nContainer (readiness status, location, version): [cutadapt (v.1.18)](https://hub.docker.com/r/cnag/cutadapt)  \r\nRequired resources in cores and RAM: current container size 169MB  \r\nInput data (amount, format, directory..): raw fastq  \r\nOutput data: paired fastq without adaptors  \r\n\r\n**2\\. Name of the application: Mapping and bam sorting**  \r\nFunction: align data to reference genome  \r\nContainer : [bwa-mem (v.0.7.17)](https://hub.docker.com/r/cnag/bwa) / [Sambamba (v. 0.6.8 )](https://hub.docker.com/r/cnag/sambamba)(or samtools)  \r\nResources :current container size 111MB / 32MB  \r\nInput data: paired fastq without adaptors  \r\nOutput data: sorted bam  \r\n\r\n**3\\. Name of the application: MarkDuplicates**  \r\nFunction: Mark (and remove) duplicates  \r\nContainer: [Picard (v.2.18.25)](https://hub.docker.com/r/cnag/picard)\r\nResources: current container size 261MB  \r\nInput data:sorted bam  \r\nOutput data: Sorted bam with marked (or removed) duplicates  \r\n\r\n**4\\. Name of the application: Base quality recalibration (BQSR)**  \r\nFunction: Base quality recalibration  \r\nContainer: [GATK (v.3.6-0)](https://hub.docker.com/r/cnag/gatk)\r\nResources: current container size 270MB  \r\nInput data: Sorted bam with marked (or removed) duplicates  \r\nOutput data: Sorted bam with marked duplicates \u0026 base quality recalculated  \r\n\r\n**5\\. Name of the application: Variant calling**  \r\nFunction: variant calling  \r\nContainer: [GATK (v.3.6-0)](https://hub.docker.com/r/cnag/gatk)\r\nResources: current container size 270MB  \r\nInput data:Sorted bam with marked duplicates \u0026 base quality recalculated  \r\nOutput data: unannotated gvcf per sample  \r\n\r\n**6\\. (OPTIONAL)Name of the application: Quality of the fastq**  \r\nFunction: report on the sequencing quality  \r\nContainer: [fastqc 0.11.8](https://hub.docker.com/r/cnag/fastqc)\r\nResources: current container size 173MB  \r\nInput data: raw fastq  \r\nOutput data: QC report \r\n\r\n## Licensing\r\n\r\nGATK declares that archived packages are made available for free to academic researchers under a limited license for non-commercial use. If you need to use one of these packages for commercial use. https://software.broadinstitute.org/gatk/download/archive ","organization":"EOSC-Life - Demonstrator 7: Rare Diseases","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/107?version=1","name":"Version 1","author":["José Mª Fernández","Laura Rodriguez-Navas"],"descriptor_type":["CWL"]}]},{"id":"109","url":"https://workflowhub.eu/workflows/109","name":"sars-cov-2-variation-reporting/COVID-19-VARIATION-REPORTING","description":"This workflow takes a VCF dataset of variants produced by any of the *-variant-calling workflows in https://github.com/galaxyproject/iwc/tree/main/workflows/sars-cov-2-variant-calling and generates tabular lists of variants by Samples and by Variant, and an overview plot of variants and their allele-frequencies.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/109?version=1","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/109?version=2","name":"v0.1.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/109?version=3","name":"v0.1.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/109?version=4","name":"v0.1.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/109?version=5","name":"v0.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/109?version=6","name":"v0.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/109?version=7","name":"v0.3.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/109?version=8","name":"v0.3.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/109?version=9","name":"v0.3.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"110","url":"https://workflowhub.eu/workflows/110","name":"sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA","description":"COVID-19: variation analysis on ARTIC PE data\r\n---------------------------------------------\r\n\r\nThe workflow for Illumina-sequenced ampliconic data builds on the RNASeq workflow\r\nfor paired-end data using the same steps for mapping and variant calling, but\r\nadds extra logic for trimming amplicon primer sequences off reads with the ivar\r\npackage. In addition, this workflow uses ivar also to identify amplicons\r\naffected by primer-binding site mutations and, if possible, excludes reads\r\nderived from such \"tainted\" amplicons when calculating allele-frequencies\r\nof other variants.\r\n","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/110?version=1","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/110?version=2","name":"v0.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/110?version=3","name":"v0.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/110?version=4","name":"v0.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/110?version=5","name":"v0.4.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/110?version=6","name":"v0.4.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/110?version=7","name":"v0.5","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/110?version=8","name":"v0.5.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/110?version=9","name":"v0.5.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/110?version=10","name":"v0.5.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/110?version=11","name":"v0.5.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"111","url":"https://workflowhub.eu/workflows/111","name":"sars-cov-2-ont-artic-variant-calling/COVID-19-ARTIC-ONT","description":"COVID-19: variation analysis on ARTIC ONT data\n----------------------------------------------\n\nThis workflow for ONT-sequenced ARTIC data is modeled after the alignment/variant-calling steps of the [ARTIC pipeline](https://artic.readthedocs.io/en/latest/). It performs, essentially, the same steps as that pipeline’s minion command, i.e. read mapping with minimap2 and variant calling with medaka. Like the Illumina ARTIC workflow it uses ivar for primer trimming. Since ONT-sequenced reads have a much higher error rate than Illumina-sequenced reads and are therefor plagued more by false-positive variant calls, this workflow does make no attempt to handle amplicons affected by potential primer-binding site mutations.\n","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/111?version=1","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/111?version=2","name":"v0.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/111?version=3","name":"v0.2.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/111?version=4","name":"v0.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/111?version=5","name":"v0.3.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"112","url":"https://workflowhub.eu/workflows/112","name":"sars-cov-2-se-illumina-wgs-variant-calling/COVID-19-SE-WGS-ILLUMINA","description":"This workflows performs single end read mapping with bowtie2 followed by sensitive variant calling across a wide range of AFs with lofreq","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/112?version=1","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/112?version=2","name":"v0.1.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/112?version=3","name":"v0.1.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/112?version=4","name":"v0.1.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/112?version=5","name":"v0.1.5","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/112?version=6","name":"v0.1.6","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"113","url":"https://workflowhub.eu/workflows/113","name":"sars-cov-2-pe-illumina-wgs-variant-calling/COVID-19-PE-WGS-ILLUMINA","description":"COVID-19: variation analysis on WGS PE data\r\n-------------------------------------------\r\n\r\nThis workflows performs paired end read mapping with bwa-mem followed by\r\nsensitive variant calling across a wide range of AFs with lofreq and variant\r\nannotation with snpEff 4.5covid19.\r\n","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/113?version=1","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/113?version=2","name":"v0.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/113?version=3","name":"v0.2.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/113?version=4","name":"v0.2.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"114","url":"https://workflowhub.eu/workflows/114","name":"Trinity RNA Assembly","description":"A porting of the Trinity RNA assembly pipeline, https://trinityrnaseq.github.io, that uses Nextflow to handle the underlying sub-tasks.\r\nThis enables additional capabilities to better use HPC resources, such as packing of tasks to fill up nodes and use of node-local disks to improve I/O.\r\nBy design, the pipeline separates the workflow logic (main file) and the cluster-specific configuration (config files), improving portability.\r\n\r\nBased on a pipeline by Sydney Informatics Hub: https://github.com/Sydney-Informatics-Hub/SIH-Raijin-Trinity","organization":"Australian BioCommons","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/114?version=1","name":"Version 1","author":["Marco De La Pierre"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/114?version=2","name":"Version 2","author":["Marco De La Pierre"],"descriptor_type":["NFL"]}]},{"id":"115","url":"https://workflowhub.eu/workflows/115","name":"Object tracking using CellProfiler","description":"Workflow for tracking objects in Cell Profiler:\r\nhttps://training.galaxyproject.org/training-material/topics/imaging/tutorials/object-tracking-using-cell-profiler/tutorial.html","organization":"Euro-BioImaging, IBISBA Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/115?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"117","url":"https://workflowhub.eu/workflows/117","name":"Workflow constructed from history 'test dwc from PNDB Data package EML DwC annotations'","description":"Workflow to take DataOne data packages (raw datasets + metadata written in Ecological Metadata Standard) as input and create a DwC occurence.csv file almost ready to put in a Dawrin core Archive using eml-annotations at the attribute level","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/117?version=1","name":"Version 1","author":["Yvan Le Bras"],"descriptor_type":["GALAXY"]}]},{"id":"118","url":"https://workflowhub.eu/workflows/118","name":"COSIFER","description":"# COnSensus Interaction Network InFErence Service\r\nInference framework for reconstructing networks using a consensus approach between multiple methods and data sources.\r\n\r\n![alt text](https://github.com/PhosphorylatedRabbits/cosifer/raw/master/docs/_static/logo.png)\r\n\r\n## Reference\r\n[Manica, Matteo, Charlotte, Bunne, Roland, Mathis, Joris, Cadow, Mehmet Eren, Ahsen, Gustavo A, Stolovitzky, and María Rodríguez, Martínez. \"COSIFER: a python package for the consensus inference of molecular interaction networks\".Bioinformatics (2020)](https://doi.org/10.1093/bioinformatics/btaa942).","organization":"iPC: individualizedPaediatricCure","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/118?version=1","name":"Version 1","author":["Laura Rodriguez-Navas","José Mª Fernández"],"descriptor_type":["CWL"]}]},{"id":"119","url":"https://workflowhub.eu/workflows/119","name":"COSIFER","description":"# COnSensus Interaction Network InFErence Service\r\nInference framework for reconstructing networks using a consensus approach between multiple methods and data sources.\r\n\r\n![alt text](https://raw.githubusercontent.com/PhosphorylatedRabbits/cosifer/master/docs/_static/logo.png)\r\n\r\n## Reference\r\n[Manica, Matteo, Charlotte, Bunne, Roland, Mathis, Joris, Cadow, Mehmet Eren, Ahsen, Gustavo A, Stolovitzky, and María Rodríguez, Martínez. \"COSIFER: a python package for the consensus inference of molecular interaction networks\".Bioinformatics (2020)](https://doi.org/10.1093/bioinformatics/btaa942).","organization":"iPC: individualizedPaediatricCure","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/119?version=1","name":"Version 1","author":["Laura Rodriguez-Navas","José Mª Fernández"],"descriptor_type":["NFL"]}]},{"id":"120","url":"https://workflowhub.eu/workflows/120","name":"Jupyter Notebook Protein MD Setup tutorial","description":"# Protein MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Lysozyme** protein (PDB code 1AKI).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/120?version=1","name":"Version 1","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/120?version=2","name":"Version 2","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/120?version=3","name":"Version 3","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/120?version=4","name":"Version 4","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/120?version=5","name":"Version 5","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/120?version=6","name":"Version 6","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/120?version=7","name":"Version 7","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"8","url":"https://workflowhub.eu/workflows/120?version=8","name":"Version 8","author":["Genís Bayarri","Adam Hospital"],"descriptor_type":[]}]},{"id":"121","url":"https://workflowhub.eu/workflows/121","name":"Molecular Dynamics Simulation","description":"CWL version of the md_list.py workflow for HPC. This performs a system setup and runs a molecular dynamics simulation on the structure passed to this workflow. This workflow uses the md\\_gather.cwl sub-workflow to gather the outputs together to return these.\r\nTo work with more than one structure this workflow can be called from either the md\\_launch.cwl workflow, or the md\\_launch\\_mutate.cwl workflow. These use scatter for parallelising the workflow. md\\_launch.cwl operates on a list of individual input molecule files. md\\_launch\\_mutate.cwl operates on a single input molecule file, and a list of mutations to apply to that molecule. Within that list of mutations, a value of 'WT' will indicate that the molecule should be simulated without any mutation being applied.\r\n","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/121?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"122","url":"https://workflowhub.eu/workflows/122","name":"SPARQL query (in a file) on graph database","description":"A workflow querying on an endpoint of a graph database by a file containing a SPARQL query.","organization":"EOSC-Life-WP6-Demos","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/122?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/122?version=2","name":"Version 2","author":[],"descriptor_type":["CWL"]}]},{"id":"123","url":"https://workflowhub.eu/workflows/123","name":"Compute daily and monthly mean from meteorological station measurements","description":"This workflow is used to process timeseries from meteorological stations in Finland but can be applied to any timeseries according it follows the same format.\r\n\r\nTake a temperature timeseries from any meteorological station. Input format is csv and it must be standardized with 6 columns:\r\n\r\n1. Year\t(ex: 2021)\r\n2. month\t(ex: 1)\r\n3. day\t(ex: 15) \r\n4. Time\t(ex: 16:56)\r\n5. Time zone\t(such as UTC)\r\n6. Air temperature (degC)","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/123?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"124","url":"https://workflowhub.eu/workflows/124","name":"1: Plant virus detection with kraken2 (SE)","description":"Metagenomic dataset taxonomic classification using kraken2","organization":"Integrated and Urban Plant Pathology Laboratory","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/124?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"125","url":"https://workflowhub.eu/workflows/125","name":"MOLGENIS/VIP: Variant Interpretation Pipeline","description":"Variant Interpretation Pipeline (VIP) that annotates, filters and reports prioritized causal variants in humans, see https://github.com/molgenis/vip for more information.","organization":"MOLGENIS","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/125?version=1","name":"v7.8.0","author":[],"descriptor_type":[]}]},{"id":"126","url":"https://workflowhub.eu/workflows/126","name":"EJP-RD WP13 case-study CAKUT momix analysis","description":" Joint multi-omics dimensionality reduction approaches for CAKUT data using peptidome and proteome data\r\n \r\n **Brief description**\r\n In (Cantini et al. 2020), Cantini et al. evaluated 9 representative joint dimensionality reduction (jDR) methods for multi-omics integration and analysis and . The methods are Regularized Generalized Canonical Correlation Analysis (RGCCA), Multiple co-inertia analysis (MCIA), Multi-Omics Factor Analysis (MOFA), Multi-Study Factor Analysis (MSFA), iCluster, Integrative NMF (intNMF), Joint and Individual Variation Explained (JIVE), tensorial Independent Component Analysis (tICA), and matrix-tri-factorization (scikit-fusion) (Tenenhaus, Tenenhaus, and Groenen 2017; Bady et al. 2004; Argelaguet et al. 2018; De Vito et al. 2019; Shen, Olshen, and Ladanyi 2009; Chalise and Fridley 2017; Lock et al. 2013; Teschendorff et al. 2018; Žitnik and Zupan 2015).\r\n\r\nThe authors provided their benchmarking procedure, multi-omics  mix  (momix), as Jupyter Notebook on GitHub (https://github.com/ComputationalSystemsBiology/momix-notebook) and project environment through Conda. In momix, the factorization methods are called from an R script, and parameters of the methods are also set in that script. We did not modify the parameters of the methods in the provided script. We set factor number to 2.\r\n","organization":"EJPRD WP13 case-studies workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/126?version=1","name":"Version 1","author":["Ozan Ozisik","Juma Bayjan","Cenna Doornbos","Friederike Ehrhart","Matthias Haimel","Laura Rodriguez-Navas","José Mª Fernández"],"descriptor_type":["SMK"]}]},{"id":"127","url":"https://workflowhub.eu/workflows/127","name":"Jupyter Notebook Protein-ligand Docking tutorial (Cluster90)","description":"# Protein-ligand Docking tutorials using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorials aim to illustrate the process of **protein-ligand docking**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular examples used are based on the **Mitogen-activated protein kinase 14** (p38-α) protein (PDB code [3HEC](https://www.rcsb.org/structure/3HEC)), a well-known **Protein Kinase enzyme**,\r\n in complex with the FDA-approved **Imatinib** (PDB Ligand code [STI](https://www.rcsb.org/ligand/STI), DrugBank Ligand Code [DB00619](https://go.drugbank.com/drugs/DB00619)) and **Dasatinib** (PDB Ligand code [1N1](https://www.rcsb.org/ligand/1N1), DrugBank Ligand Code [DB01254](https://go.drugbank.com/drugs/DB01254)), small **kinase inhibitors** molecules used to treat certain types of **cancer**.\r\n\r\nThe tutorials will guide you through the process of identifying the **active site cavity** (pocket) without previous knowledge, and the final prediction of the **protein-ligand complex**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/127?version=1","name":"Version 1","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/127?version=2","name":"Version 2","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/127?version=3","name":"Version 3","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/127?version=4","name":"Version 4","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/127?version=5","name":"Version 5","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/127?version=6","name":"Version 6","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/127?version=7","name":"Version 7","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]},{"id":"8","url":"https://workflowhub.eu/workflows/127?version=8","name":"Version 8","author":["Douglas Lowe","Genís Bayarri","Adam Hospital"],"descriptor_type":[]}]},{"id":"128","url":"https://workflowhub.eu/workflows/128","name":"Jupyter Notebook Protein-ligand Docking tutorial (PDBe REST API)","description":"# Protein-ligand Docking tutorials using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorials aim to illustrate the process of **protein-ligand docking**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular examples used are based on the **Mitogen-activated protein kinase 14** (p38-α) protein (PDB code [3HEC](https://www.rcsb.org/structure/3HEC)), a well-known **Protein Kinase enzyme**,\r\n in complex with the FDA-approved **Imatinib** (PDB Ligand code [STI](https://www.rcsb.org/ligand/STI), DrugBank Ligand Code [DB00619](https://go.drugbank.com/drugs/DB00619)) and **Dasatinib** (PDB Ligand code [1N1](https://www.rcsb.org/ligand/1N1), DrugBank Ligand Code [DB01254](https://go.drugbank.com/drugs/DB01254)), small **kinase inhibitors** molecules used to treat certain types of **cancer**.\r\n\r\nThe tutorials will guide you through the process of identifying the **active site cavity** (pocket) without previous knowledge, and the final prediction of the **protein-ligand complex**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/128?version=1","name":"Version 1","author":["Adam Hospital","Douglas Lowe","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/128?version=2","name":"Version 2","author":["Adam Hospital","Douglas Lowe","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/128?version=3","name":"Version 3","author":["Adam Hospital","Douglas Lowe","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/128?version=4","name":"Version 4","author":["Adam Hospital","Douglas Lowe","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/128?version=5","name":"Version 5","author":["Adam Hospital","Douglas Lowe","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/128?version=6","name":"Version 6","author":["Adam Hospital","Douglas Lowe","Genís Bayarri"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/128?version=7","name":"Version 7","author":["Adam Hospital","Douglas Lowe","Genís Bayarri"],"descriptor_type":[]}]},{"id":"129","url":"https://workflowhub.eu/workflows/129","name":"Jupyter Notebook Protein-ligand Docking tutorial (Fpocket)","description":"# Protein-ligand Docking tutorials using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorials aim to illustrate the process of **protein-ligand docking**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular examples used are based on the **Mitogen-activated protein kinase 14** (p38-α) protein (PDB code [3HEC](https://www.rcsb.org/structure/3HEC)), a well-known **Protein Kinase enzyme**,\r\n in complex with the FDA-approved **Imatinib** (PDB Ligand code [STI](https://www.rcsb.org/ligand/STI), DrugBank Ligand Code [DB00619](https://go.drugbank.com/drugs/DB00619)) and **Dasatinib** (PDB Ligand code [1N1](https://www.rcsb.org/ligand/1N1), DrugBank Ligand Code [DB01254](https://go.drugbank.com/drugs/DB01254)), small **kinase inhibitors** molecules used to treat certain types of **cancer**.\r\n\r\nThe tutorials will guide you through the process of identifying the **active site cavity** (pocket) without previous knowledge, and the final prediction of the **protein-ligand complex**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/129?version=1","name":"Version 1","author":["Douglas Lowe","Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/129?version=2","name":"Version 2","author":["Douglas Lowe","Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/129?version=3","name":"Version 3","author":["Douglas Lowe","Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/129?version=4","name":"Version 4","author":["Douglas Lowe","Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/129?version=5","name":"Version 5","author":["Douglas Lowe","Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/129?version=6","name":"Version 6","author":["Douglas Lowe","Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/129?version=7","name":"Version 7","author":["Douglas Lowe","Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"130","url":"https://workflowhub.eu/workflows/130","name":"Jupyter Notebook Amber Protein MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/130?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/130?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/130?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/130?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/130?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/130?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/130?version=7","name":"Version 7","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"131","url":"https://workflowhub.eu/workflows/131","name":"Jupyter Notebook Amber Protein Ligand Complex MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/131?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/131?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/131?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/131?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/131?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/131?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/131?version=7","name":"Version 7","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"132","url":"https://workflowhub.eu/workflows/132","name":"Jupyter Notebook Amber Constant pH MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/132?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/132?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/132?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/132?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/132?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/132?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/132?version=7","name":"Version 7","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"136","url":"https://workflowhub.eu/workflows/136","name":"ORSON: workflow for prOteome and tRanScriptome functiOnal aNnotation","description":"ORSON combine state-of-the-art tools for annotation processes within a Nextflow pipeline: sequence similarity search (PLAST, BLAST or Diamond), functional annotation retrieval (BeeDeeM) and functional prediction (InterProScan). When required, BUSCO completness evaluation and eggNOG Orthogroup annotation can be activated. While ORSON results can be analyzed through the command-line, it also offers the possibility to be compatible with BlastViewer or Blast2GO graphical tools.\r\n\r\n","organization":"SeBiMER","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/136?version=1","name":"Version 1","author":["Cyril Noel","Alexandre Cormier","Patrick Durand","Laura Leroi","Pierre Cuzin"],"descriptor_type":["NFL"]}]},{"id":"137","url":"https://workflowhub.eu/workflows/137","name":"parallel-accession-download/main","description":"Downloads fastq files for sequencing run accessions provided in a text file using fasterq-dump. Creates one job per listed run accession.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/137?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/137?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/137?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/137?version=4","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/137?version=5","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/137?version=6","name":"v0.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/137?version=7","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/137?version=8","name":"v0.1.13","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/137?version=9","name":"v0.1.14","author":[],"descriptor_type":["GALAXY"]}]},{"id":"138","url":"https://workflowhub.eu/workflows/138","name":"sars-cov-2-consensus-from-variation/COVID-19-CONSENSUS-CONSTRUCTION","description":"Build a consensus sequence from FILTER PASS variants with intrasample allele-frequency above a configurable consensus threshold.\nHard-mask regions with low coverage (but not consensus variants within them) and ambiguous sites.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/138?version=1","name":"v0.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/138?version=2","name":"v0.2.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/138?version=3","name":"v0.2.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/138?version=4","name":"v0.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/138?version=5","name":"v0.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/138?version=6","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/138?version=7","name":"v0.4.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/138?version=8","name":"v0.4.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/138?version=9","name":"v0.4.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"140","url":"https://workflowhub.eu/workflows/140","name":"microPIPE: a pipeline for high-quality bacterial genome construction using ONT and Illumina sequencing","description":"microPIPE was developed to automate high-quality complete bacterial genome assembly using Oxford Nanopore Sequencing in combination with Illumina sequencing.\r\n\r\nTo build microPIPE we evaluated the performance of several tools at each step of bacterial genome assembly, including basecalling, assembly, and polishing. Results at each step were validated using the high-quality ST131 Escherichia coli strain EC958 (GenBank: HG941718.1). After appraisal of each step, we selected the best combination of tools to achieve the most consistent and best quality bacterial genome assemblies.\r\n\r\nThe workflow below summarises the different steps of the pipeline (with each selected tool) and the approximate run time (using GPU basecalling, averaged over 12 E. coli isolates sequenced on a R9.4 MinION flow cell). Dashed boxes correspond to optional steps in the pipeline.\r\n\r\nMicropipe has been written in Nextflow and uses Singularity containers. It can use both GPU and CPU resources.\r\n\r\nFor more information please see our publication here: https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-021-07767-z\r\n\r\nInfrastructure\\_deployment\\_metadata: Zeus (Pawsey)","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/140?version=1","name":"Version 1","author":["Valentine Murigneux"],"descriptor_type":["NFL"]}]},{"id":"141","url":"https://workflowhub.eu/workflows/141","name":"HI-FRIENDS HI data cube source finding and characterization","description":"This repository contains the workflow used to find and characterize the HI sources in the data cube of the SKA Data Challenge 2. It was developed to process a simulated [SKA data cube](https://sdc2.astronomers.skatelescope.org/sdc2-challenge/data) data cube, but can be adapted for clean HI data cubes from other radio observatories.\r\n\r\nThe workflow is managed and executed using snakemake workflow management system. It uses [https://spectral-cube.readthedocs.io/en/latest/](http://) based on [https://dask.org/](http://) parallelization tool and [https://www.astropy.org/](http://) suite to divide the large cube in smaller pieces. On each of the subcubes, we execute [https://github.com/SoFiA-Admin/SoFiA-2](http://) for masking the subcubes, find sources and characterize their properties. Finally, the individual catalogs are cleaned, concatenated into a single catalog, and duplicates from the overlapping regions are eliminated. Some diagnostic plots are produced using Jupyter notebook.\r\n\r\nThe documentation can be found in the [Documentation page](https://hi-friends-sdc2.readthedocs.io/en/latest/index.html). The workflow and the results can be cited in the [Zenodo record](https://doi.org/10.5281/zenodo.5167659).","organization":"IAA-CSIC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/141?version=1","name":"Version 1","author":[],"descriptor_type":["SMK"]}]},{"id":"142","url":"https://workflowhub.eu/workflows/142","name":"16S_biodiversity_BIOM","description":"This is a Galaxy workflow that uses to convert the16S BIOM file to table and figures. It is part of the metaDEGalaxy workflow MetaDEGalaxy: Galaxy workflow for differential abundance analysis of 16s metagenomic data. ","organization":"Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/142?version=1","name":"Version 1","author":["Mike Thang"],"descriptor_type":["GALAXY"]}]},{"id":"143","url":"https://workflowhub.eu/workflows/143","name":"Germline-ShortV @ NCI-Gadi","description":"Germline-ShortV @ NCI-Gadi is an implementation of the BROAD Institute's best practice workflow for germline short variant discovery. This implementation is optimised for the National Compute Infrastucture's Gadi HPC, utilising scatter-gather parallelism to enable use of multiple nodes with high CPU or memory efficiency. This workflow requires sample BAM files, which can be generated using the [Fastq-to-bam @ NCI-Gadi](https://workflowhub.eu/workflows/146) pipeline. Germline-ShortV can be applied to model and non-model organisms (including non-diploid organisms). \r\n\r\nInfrastructure\\_deployment\\_metadata: Gadi (NCI)","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/143?version=1","name":"Version 1","author":["Tracy Chew","Cali Willet","Georgina Samaha","Rosemarie Sadsad"],"descriptor_type":[]}]},{"id":"144","url":"https://workflowhub.eu/workflows/144","name":"GATK4 Fastq to joint-called cohort VCF with Cromwell on SLURM","description":"# SLURM HPC Cromwell implementation of GATK4 germline variant calling pipeline\r\nSee the [GATK](https://gatk.broadinstitute.org/hc/en-us) website for more information on this toolset \r\n## Assumptions\r\n- Using hg38 human reference genome build\r\n- Running using HPC/SLURM scheduling. This repo was specifically tested on Pawsey Zeus machine, primarily running in the `/scratch` partition. \r\n- Starting from short-read Illumina paired-end fastq files as input\r\n\r\n### Dependencies\r\nThe following versions have been tested and work, but GATK and Cromwell are regularly updated and so one must consider whether they would like to use newer versions of these tools. \r\n- BWA/0.7.15\r\n- GATK v4.0.6.0\r\n- SAMtools/1.5\r\n- picard/2.9\r\n- Python/2.7\r\n- Cromwell v61\r\n\r\n## Quick start guide\r\n### Installing and preparing environment for GATK4 with Cromwell\r\n\r\n1. Clone repository\r\n```\r\ngit clone https://github.com/SarahBeecroft/slurmCromwellGATK4.git\r\ncd slurmCromwellGATK4\r\nchmod +x *.sh\r\n```\r\n\r\n2. Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html) if you haven’t already. This is best placed in your `/group` directory to avoid filling your small `/home` directory, or being purged is placed in the `/scratch` directory.\r\n\r\n3. Create Conda environment using the supplied conda environment file\r\n\r\n```\r\nconda env create --file gatk4_pipeline.yml\r\n```\r\n\r\n3. Download the necessary .jar files\r\n    - The Cromwell workfow orchestration engine can be downloaded from https://github.com/broadinstitute/cromwell/releases/ \r\n    - GATK can be downloaded from https://github.com/broadinstitute/gatk/releases. Unzip the file with `unzip` \r\n    - Picard can be downloaded from https://github.com/broadinstitute/picard/releases/\r\n\r\n\r\n4. If you do not have the resource bundle files already, these need to be downloaded. In future they will be cached on Pawsey systems. The bundle data should be download from the [Google Cloud bucket](https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0;tab=objects?_ga=2.98248159.1769807612.1582055494-233304531.1578854612\u0026pli=1\u0026prefix=\u0026forceOnObjectsSortingFiltering=false) and not from the FTP site, which is missing various files. Refer to this handy [blog post](https://davetang.org/muse/2020/02/21/using-google-cloud-sdk-to-download-gatk-resource-bundle-files/) on how to download the resource files using Google Cloud SDK. There is a Slurm script (download_bundle.slurm) that can be used to download all hg38 files from the Google Cloud bucket. The files were downloaded in /scratch/pawsey0001/sbeecroft/hg38/v0, which needs to be moved before the data becomes purged after 30 days. Note that Homo_sapiens_assembly38.dbsnp138.vcf.gz was from the FTP bundle as this file could not be downloaded using the Conda version of Google Cloud SDK.\r\n\r\nNote that the `hg38_wgs_scattered_calling_intervals.txt` will need to be to generated using the following:\r\n\r\n```\r\ncd \u003cyour_resource_dir\u003e\r\nfind `pwd` -name \"scattered.interval_list\" -print | sort \u003e hg38_wgs_scattered_calling_intervals.txt\r\n```\r\n\r\nThese files are required for Multisample_Fastq_to_Gvcf_GATK4.\r\n\r\n```\r\nHomo_sapiens_assembly38.dict\r\nHomo_sapiens_assembly38.fasta\r\nHomo_sapiens_assembly38.fasta.fai\r\nHomo_sapiens_assembly38.fasta.64.alt\r\nHomo_sapiens_assembly38.fasta.64.amb\r\nHomo_sapiens_assembly38.fasta.64.ann\r\nHomo_sapiens_assembly38.fasta.64.bwt\r\nHomo_sapiens_assembly38.fasta.64.pac\r\nHomo_sapiens_assembly38.fasta.64.sa\r\nHomo_sapiens_assembly38.fasta.amb\r\nHomo_sapiens_assembly38.fasta.ann\r\nHomo_sapiens_assembly38.fasta.bwt\r\nHomo_sapiens_assembly38.fasta.pac\r\nHomo_sapiens_assembly38.fasta.sa\r\nHomo_sapiens_assembly38.dbsnp138.vcf.gz (needs to be gunzipped)\r\nHomo_sapiens_assembly38.dbsnp138.vcf.idx\r\nMills_and_1000G_gold_standard.indels.hg38.vcf.gz\r\nMills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi\r\nHomo_sapiens_assembly38.dbsnp138.vcf\r\nHomo_sapiens_assembly38.dbsnp138.vcf.idx\r\nHomo_sapiens_assembly38.known_indels.vcf.gz\r\nHomo_sapiens_assembly38.known_indels.vcf.gz.tbi\r\n```\r\n\r\nThese files are required for Multisample_jointgt_GATK4.\r\n\r\n```\r\nwgs_evaluation_regions.hg38.interval_list\r\nhg38.custom_100Mb.intervals\r\nHomo_sapiens_assembly38.dbsnp138.vcf\r\nHomo_sapiens_assembly38.dbsnp138.vcf.idx\r\n1000G_phase1.snps.high_confidence.hg38.vcf.gz\r\n1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi\r\n1000G_omni2.5.hg38.vcf.gz\r\n1000G_omni2.5.hg38.vcf.gz.tbi\r\nAxiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz\r\nAxiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz.tbi\r\nhapmap_3.3.hg38.vcf.gz\r\nhapmap_3.3.hg38.vcf.gz.tbi\r\n```\r\n\r\n\r\n5. Set up the config files. Files that you need to edit with the correct paths to your data/jar files or other specific configurations are:\r\n    - `Multisample_Fastq_to_Gvcf_GATK4_inputs_hg38.json`\r\n    - `Multisample_jointgt_GATK4_inputs_hg38.json`\r\n        - both json files will need the correct paths to your reference file locations, and the file specifying your inputs i.e. `samples.txt` or `gvcfs.txt`\r\n    - `samples.txt`\r\n    - `gvcfs.txt`\r\n        - These are the sample input files (tab seperated)\r\n        - The format for samples.txt is sampleID, sampleID_readgroup, path_to_fastq_R1_file, path_to_fastq_R2_file,\r\n        - The format for gvcfs.txt is sample ID, gvcf, gvcf .tbi index file\r\n        - Examples are included in this repo\r\n        - NOTE: Having tabs, not spaces, is vital for parsing the file. Visual studio code tends to introduce spaces, so if you are having issues, check the file with another text editor such as sublime. \r\n    - `launch_cromwell.sh`\r\n    - `launch_jointgt.sh`\r\n        - These are the scripts which launch the pipeline. \r\n        - `launch_cromwell.sh` launches the fastq to gvcf stage\r\n        - `launch_jointgt.sh` launched the gvcf joint genotyping to cohort vcf step. This is perfomed when you have run all samples through the fastq to gvcf stage.\r\n        - Check the paths and parameters make sense for your machine\r\n    - `slurm.conf`\r\n        - the main options here relate to the job scheduler. If you are running on Zeus at Pawsey, you should not need to alter these parameters.\r\n    - `cromwell.options`\r\n        - `cromwell.options` requires editing to provide the directory where you would like the final workflow outputs to be written\r\n    - `Multisample_Fastq_to_Gvcf_GATK4.wdl`\r\n    - `ruddle_fastq_to_gvcf_single_sample_gatk4.wdl`\r\n        - The paths to your jar files will need to be updated\r\n        - The path to your conda `activate` binary will need to be updated (e.g. `/group/projectID/userID/miniconda/bin/activate`)\r\n\r\n6. Launch the job using `sbatch launch_cromwell.sh`. When that has completed successfully, you can launch the second stage of the pipeline (joint calling) with `sbatch launch_jointgt.sh`.\r\n\r\n### Overview of the steps in `Multisample_Fastq_to_Gvcf_GATK4.wdl`\r\nThis part of the pipeline takes short-read, Illumina paired-end fastq files as the input. The outputs generated are sorted, duplicate marked bam files and their indices, duplicate metric information, and a GVCF file for each sample. The GVCF files are used as input for the second part of the pipeline (joint genotyping).\r\n\r\n```\r\nFastqToUbam\r\nGetBwaVersion\r\nSamToFastqAndBwaMem\r\nMergeBamAlignment\r\nSortAndFixTags\r\nMarkDuplicates\r\nCreateSequenceGroupingTSV\r\nBaseRecalibrator\r\nGatherBqsrReports\r\nApplyBQSR\r\nGatherBamFiles\r\nHaplotypeCaller\r\nMergeGVCFs\r\n```\r\n\r\n### Overview of the steps in `Multisample_jointgt_GATK4.wdl`\r\nThis part of the pipeline takes GVCF files (one per sample), and performs joint genotyping across all of the provided samples. This means that old previously generated GVCFs can be joint-called with new GVCFs whenever you need to add new samples. The key output from this is a joint-genotyped, cohort-wide VCF file.\r\n\r\n```\r\nGetNumberOfSamples\r\nImportGVCFs\r\nGenotypeGVCFs\r\nHardFilterAndMakeSitesOnlyVcf\r\nIndelsVariantRecalibrator\r\nSNPsVariantRecalibratorCreateModel\r\nSNPsVariantRecalibrator\r\nGatherTranches\r\nApplyRecalibration\r\nGatherVcfs\r\nCollectVariantCallingMetrics\r\nGatherMetrics\r\nDynamicallyCombineIntervals\r\n```\r\n","organization":"Australian BioCommons, Pawsey Supercomputing Research Centre","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/144?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"145","url":"https://workflowhub.eu/workflows/145","name":"Trinity @ NCI-Gadi","description":"Description: Trinity @ NCI-Gadi contains a staged [Trinity](https://github.com/trinityrnaseq/trinityrnaseq/wiki) workflow that can be run on the National Computational Infrastructure’s (NCI) Gadi supercomputer. Trinity performs de novo transcriptome assembly of RNA-seq data by combining three independent software modules Inchworm, Chrysalis and Butterfly to process RNA-seq reads. The algorithm can detect isoforms, handle paired-end reads, multiple insert sizes and strandedness. \r\n\r\nInfrastructure\\_deployment\\_metadata: Gadi (NCI)","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/145?version=1","name":"Version 1","author":["Georgina Samaha","Rosemarie Sadsad","Tracy Chew"],"descriptor_type":[]}]},{"id":"146","url":"https://workflowhub.eu/workflows/146","name":"Fastq-to-bam @ NCI-Gadi","description":"Fastq-to-BAM @ NCI-Gadi is a genome alignment workflow that takes raw FASTQ files, aligns them to a reference genome and outputs analysis ready BAM files. This workflow is designed for the National Computational Infrastructure's (NCI) Gadi supercompter, leveraging multiple nodes on NCI Gadi to run all stages of the workflow in parallel, either massively parallel using the scatter-gather approach or parallel by sample. It consists of a number of stages and follows the BROAD Institute's best practice recommendations. \r\n\r\nInfrastructure\\_deployment\\_metadata: Gadi (NCI)","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/146?version=1","name":"Version 1","author":["Cali Willet","Tracy Chew","Georgina Samaha","Rosemarie Sadsad"],"descriptor_type":[]}]},{"id":"147","url":"https://workflowhub.eu/workflows/147","name":"GATK4 Fastq to joint-called cohort VCF with Cromwell on local cluster (no job scheduler)","description":"# Local Cromwell implementation of GATK4 germline variant calling pipeline\r\nSee the [GATK](https://gatk.broadinstitute.org/hc/en-us) website for more information on this toolset \r\n## Assumptions\r\n- Using hg38 human reference genome build\r\n- Running 'locally' i.e. not using HPC/SLURM scheduling, or containers. This repo was specifically tested on Pawsey Nimbus 16 CPU, 64GB RAM virtual machine, primarily running in the `/data` volume storage partition. \r\n- Starting from short-read Illumina paired-end fastq files as input\r\n\r\n### Dependencies\r\nThe following versions have been tested and work, but GATK and Cromwell are regularly updated and so one must consider whether they would like to use newer versions of these tools. \r\n- BWA/0.7.15\r\n- GATK v4.0.6.0\r\n- SAMtools/1.5\r\n- picard/2.9\r\n- Python/2.7\r\n- Cromwell v61\r\n\r\n## Quick start guide\r\n### Installing and preparing environment for GATK4 with Cromwell\r\n\r\n1. Clone repository\r\n```\r\ngit clone https://github.com/SarahBeecroft/cromwellGATK4.git\r\ncd cromwellGATK4\r\nchmod 777 *.sh\r\n```\r\n\r\n2. Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html) if you haven’t already. Create Conda environment using the supplied conda environment file\r\n\r\n```\r\nconda env create --file gatk4_pipeline.yml\r\n```\r\n\r\n3. Download the necessary .jar files\r\n    - The Cromwell workfow orchestration engine can be downloaded from https://github.com/broadinstitute/cromwell/releases/ \r\n    - GATK can be downloaded from https://github.com/broadinstitute/gatk/releases. Unzip the file with `unzip` \r\n    - Picard can be downloaded from https://github.com/broadinstitute/picard/releases/\r\n\r\n\r\n4. Upload the resource bundle file from IRDS using rclone or filezilla and unpack it with `tar xzvf resource.tar.gz`. Note that the `hg38_wgs_scattered_calling_intervals.txt` will need to be to generated using the following:\r\n\r\n```\r\ncd \u003cyour_resource_dir\u003e\r\nfind `pwd` -name \"scattered.interval_list\" -print | sort \u003e hg38_wgs_scattered_calling_intervals.txt\r\n```\r\n\r\n5. Set up the config files. Files that you need to edit with the correct paths to your data/jar files or other specific configurations are:\r\n    - `Multisample_Fastq_to_Gvcf_GATK4_inputs_hg38.json`\r\n    - `Multisample_jointgt_GATK4_inputs_hg38.json`\r\n        - both json files will need the correct paths to your reference file locations, and the file specifying your inputs i.e. `samples.txt` or `gvcfs.txt`\r\n    - `samples.txt`\r\n    - `gvcfs.txt`\r\n        - These are the sample input files (tab seperated)\r\n        - The format for samples.txt is sampleID, sampleID_readgroup, path_to_fastq_R1_file, path_to_fastq_R2_file,\r\n        - The format for gvcfs.txt is sample ID, gvcf, gvcf .tbi index file\r\n        - Examples are included in this repo\r\n        - NOTE: Having tabs, not spaces, is vital for parsing the file. Visual studio code tends to introduce spaces, so if you are having issues, check the file with another text editor such as sublime. \r\n    - `launch_cromwell.sh`\r\n    - `launch_jointgt.sh`\r\n        - These are the scripts which launch the pipeline. \r\n        - `launch_cromwell.sh` launches the fastq to gvcf stage\r\n        - `launch_jointgt.sh` launched the gvcf joint genotyping to cohort vcf step. This is perfomed when you have run all samples through the fastq to gvcf stage.\r\n        - Check the paths and parameters make sense for your machine\r\n    - `local.conf`\r\n        - the main tuneable parameters here are:\r\n        \t- `concurrent-job-limit = 5` this is the max number of concurrent jobs that can be spawned by cromwell. This depends on the computational resources available to you. 5 was determined to work reasonably well on a 16 CPU, 64GB RAM Nimbus VM (Pawsey). \r\n        \t- `call-caching enabled = true`. Setting this parameter to `false` will disable call caching (i.e. being able to resume if the job fails before completion). By default, call caching is enabled. \r\n    - `cromwell.options`\r\n        - `cromwell.options` requires editing to provide the directory where you would like the final workflow outputs to be written\r\n    - `Multisample_Fastq_to_Gvcf_GATK4.wdl`\r\n    - `ruddle_fastq_to_gvcf_single_sample_gatk4.wdl`\r\n        - The paths to your jar files will need to be updated\r\n        - The path to your conda `activate` binary will need to be updated (e.g. `/data/miniconda/bin/activate`)\r\n\r\n6. Launch the job within a `screen` or `tmux` session, using `./launch_cromwell.sh`. When that has completed successfully, you can launch the second stage of the pipeline (joint calling) with `./launch_jointgt.sh`. Ensure you pipe the stdout and stderr to a log file using (for example) `./launch_cromwell.sh \u0026\u003e cromwell.log`\r\n\r\n### Overview of the steps in `Multisample_Fastq_to_Gvcf_GATK4.wdl`\r\nThis part of the pipeline takes short-read, Illumina paired-end fastq files as the input. The outputs generated are sorted, duplicate marked bam files and their indices, duplicate metric information, and a GVCF file for each sample. The GVCF files are used as input for the second part of the pipeline (joint genotyping).\r\n\r\n```\r\nFastqToUbam\r\nGetBwaVersion\r\nSamToFastqAndBwaMem\r\nMergeBamAlignment\r\nSortAndFixTags\r\nMarkDuplicates\r\nCreateSequenceGroupingTSV\r\nBaseRecalibrator\r\nGatherBqsrReports\r\nApplyBQSR\r\nGatherBamFiles\r\nHaplotypeCaller\r\nMergeGVCFs\r\n```\r\n\r\n### Overview of the steps in `Multisample_jointgt_GATK4.wdl`\r\nThis part of the pipeline takes GVCF files (one per sample), and performs joint genotyping across all of the provided samples. This means that old previously generated GVCFs can be joint-called with new GVCFs whenever you need to add new samples. The key output from this is a joint-genotyped, cohort-wide VCF file. This file can be used for a GEMINI database after normalisation with VT and annotation with a tool such as VEP or SNPEFF. \r\n\r\nThe file `hg38.custom_100Mb.intervals` is required for this step of the pipeline to run. This is included in the git repo for convenience, but should be moved to your resource directory with all the other resource files. \r\n\r\n```\r\nGetNumberOfSamples\r\nImportGVCFs\r\nGenotypeGVCFs\r\nHardFilterAndMakeSitesOnlyVcf\r\nIndelsVariantRecalibrator\r\nSNPsVariantRecalibratorCreateModel\r\nSNPsVariantRecalibrator\r\nGatherTranches\r\nApplyRecalibration\r\nGatherVcfs\r\nCollectVariantCallingMetrics\r\nGatherMetrics\r\nDynamicallyCombineIntervals\r\n```\r\n","organization":"Australian BioCommons, Pawsey Supercomputing Research Centre","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/147?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"148","url":"https://workflowhub.eu/workflows/148","name":"Somatic-ShortV @ NCI-Gadi","description":"Somatic-ShortV @ NCI-Gadi is a variant calling pipeline that calls somatic short variants (SNPs and indels) from tumour and matched normal BAM files following [GATK's Best Practice Workflow](https://gatk.broadinstitute.org/hc/en-us/articles/360035894731-Somatic-short-variant-discovery-SNVs-Indels-). This workflow is designed for the National Computational Infrastructure's (NCI) Gadi supercompter, leveraging multiple nodes on NCI Gadi to run all stages of the workflow in parallel. \r\n\r\nInfrastructure\\_deployment\\_metadata: Gadi (NCI)","organization":"Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/148?version=1","name":"Version 1","author":["Tracy Chew","Cali Willet","Rosemarie Sadsad"],"descriptor_type":[]}]},{"id":"149","url":"https://workflowhub.eu/workflows/149","name":"Flashlite-Trinity","description":"Flashlite-Trinity contains two workflows that run Trinity on the [University of Queensland's HPC, Flashlite](https://rcc.uq.edu.au/flashlite).  Trinity performs de novo transcriptome assembly of RNA-seq data by combining three independent software modules Inchworm, Chrysalis and Butterfly to process RNA-seq reads. The algorithm can detect isoforms, handle paired-end reads, multiple insert sizes and strandedness. Users can run Flashlite-Trinity on single samples, or smaller samples requiring \u003c500Gb of memory or staged Trinity which is recommended for global assemblies with multiple sample inputs. Both implementations make use of Singularity containers to install software. \r\n\r\nInfrastructure\\_deployment\\_metadata: FlashLite (QRISCloud)","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/149?version=1","name":"Version 1","author":["Tracy Chew","Rosemarie Sadsad","Georgina Samaha"],"descriptor_type":[]}]},{"id":"150","url":"https://workflowhub.eu/workflows/150","name":"Flashlite-Juicer","description":"Flashlite-Juicer is a PBS implementation of [Juicer](https://github.com/aidenlab/juicer) for University of Queensland's Flashlite HPC.\r\n\r\nInfrastructure\\_deployment\\_metadata: FlashLite (QRISCloud)","organization":"Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/150?version=1","name":"Version 1","author":["Tracy Chew","Rosemarie Sadsad"],"descriptor_type":[]}]},{"id":"151","url":"https://workflowhub.eu/workflows/151","name":"Flashlite-Supernova","description":"The Flashlite-Supernova pipeline runs Supernova to generate phased whole-genome de novo assemblies from a Chromium prepared library on [University of Queensland's HPC, Flashlite](https://rcc.uq.edu.au/flashlite). \r\n\r\nInfrastructure\\_deployment\\_metadata: FlashLite (QRISCloud)","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/151?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"152","url":"https://workflowhub.eu/workflows/152","name":"RNASeq-DE @ NCI-Gadi","description":"RNASeq-DE @ NCI-Gadi processes RNA sequencing data (single, paired and/or multiplexed) for differential expression (raw FASTQ to counts). This pipeline consists of multiple stages and is designed for the National Computational Infrastructure's (NCI) Gadi supercompter, leveraging multiple nodes to run each stage in parallel. \r\n\r\nInfrastructure\\_deployment\\_metadata: Gadi (NCI)","organization":"Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/152?version=1","name":"Version 1","author":["Tracy Chew","Rosemarie Sadsad"],"descriptor_type":[]}]},{"id":"153","url":"https://workflowhub.eu/workflows/153","name":"Bootstrapping-for-BQSR @ NCI-Gadi","description":"Bootstrapping-for-BQSR @ NCI-Gadi is a pipeline for bootstrapping a variant resource to enable GATK base quality score recalibration (BQSR) for non-model organisms that lack a publicly available variant resource. This implementation is optimised for the National Compute Infrastucture's Gadi HPC. Multiple rounds of bootstrapping can be performed. Users can use [Fastq-to-bam @ NCI-Gadi](https://workflowhub.eu/workflows/146) and [Germline-ShortV @ NCI-Gadi](https://workflowhub.eu/workflows/143) to produce required input files for Bootstrapping-for-BQSR @ NCI-Gadi. \r\n\r\nInfrastructure\\_deployment\\_metadata: Gadi (NCI)\r\n\r\n","organization":"Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/153?version=1","name":"Version 1","author":["Cali Willet","Tracy Chew"],"descriptor_type":[]}]},{"id":"154","url":"https://workflowhub.eu/workflows/154","name":"Quality assessment, amplicon classification and functional prediction","description":"Workflow for quality assessment of paired reads and classification using NGTax 2.0 and functional annotation using picrust2.\u003cbr\u003e\r\nIn addition files are exported to their respective subfolders for easier data management in a later stage.\u003cbr\u003e\u003cbr\u003e\r\n\r\nSteps:\r\n  - Quality plots (FastQC)\r\n  - NG-TAX 2 High-throughput Amplicon Analysis\r\n  - PICRUSt 2 - Function prediction from marker gene sequences\r\n  - Export module for ngtax","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/154?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/154?version=2","name":"Version 2","author":["Bart Nijsse","Jasper Koehorst"],"descriptor_type":["CWL"]}]},{"id":"155","url":"https://workflowhub.eu/workflows/155","name":"sars-cov-2-pe-illumina-artic-ivar-analysis/SARS-COV-2-ILLUMINA-AMPLICON-IVAR-PANGOLIN-NEXTCLADE","description":"Find and annotate variants in ampliconic SARS-CoV-2 Illumina sequencing data and classify samples with pangolin and nextclade","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/155?version=1","name":"v0.1","author":["Peter van Heusden"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/155?version=2","name":"v0.2","author":["Peter van Heusden"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/155?version=3","name":"v0.2.1","author":["Peter van Heusden"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/155?version=4","name":"v0.2.2","author":["Peter van Heusden"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/155?version=5","name":"v0.2.3","author":["Peter van Heusden"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/155?version=6","name":"v0.3.1","author":["Peter van Heusden"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/155?version=7","name":"v0.4","author":["Peter van Heusden"],"descriptor_type":["GALAXY"]}]},{"id":"156","url":"https://workflowhub.eu/workflows/156","name":"SAMBA: Standardized and Automated MetaBarcoding Analyses workflow","description":"SAMBA is a FAIR scalable workflow integrating, into a unique tool, state-of-the-art bioinformatics and statistical methods to conduct reproducible eDNA analyses using Nextflow. SAMBA starts processing by verifying integrity of raw reads and metadata. Then all bioinformatics processing is done using commonly used procedure (QIIME 2 and DADA2) but adds new steps relying on dbOTU3 and microDecon to build high quality ASV count tables. Extended statistical analyses are also performed. Finally, SAMBA produces a full dynamic HTML report including resources used, commands executed, intermediate results, statistical analyses and figures.\r\n\r\nThe SAMBA pipeline can run tasks across multiple compute infrastructures in a very portable manner. It comes with singularity containers making installation trivial and results highly reproducible.","organization":"SeBiMER","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/156?version=1","name":"Version 1","author":["Cyril Noel","Alexandre Cormier","Laura Leroi","Patrick Durand"],"descriptor_type":["NFL"]}]},{"id":"160","url":"https://workflowhub.eu/workflows/160","name":"Cryo electron microscopy of SARS-CoV-2 spike in prefusion state","description":"Continuous flexibility analysis of SARS-CoV-2 Spike prefusion structures","organization":"UX trial team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/160?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"183","url":"https://workflowhub.eu/workflows/183","name":"testEntryTitleNew","description":"Cryo-EM processing workflow","organization":"UX trial team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/183?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"188","url":"https://workflowhub.eu/workflows/188","name":"entryTitleTest","description":"Cryo-EM processing workflow","organization":"UX trial team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/188?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"189","url":"https://workflowhub.eu/workflows/189","name":"BioTranslator Workflow","description":"BioTranslator performs sequentially pathway analysis and gene prioritization: A specific operator is executed for each task to translate the input gene set into semantic terms and pinpoint the pivotal-role genes on the derived semantic network. The output consists of the set of statistically significant semantic terms and the associated hub genes (the gene signature), prioritized according to their involvement in the underlying semantic topology.","organization":"CO2MICS Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/189?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/189?version=2","name":"Version 2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/189?version=3","name":"Version 3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"192","url":"https://workflowhub.eu/workflows/192","name":"ASPICov","description":"ASPICov was developed to provide a rapid, reliable and complete analysis of NGS SARS-Cov2 samples to the biologist. This broad application tool allows to process samples from either capture or amplicon strategy and Illumina or Ion Torrent technology. To ensure FAIR data analysis, this Nextflow pipeline follows nf-core guidelines and use Singularity containers. \r\n\r\nAvailability and Implementation: https://gitlab.com/vtilloy/aspicov\r\n\r\nCitation: Valentin Tilloy, Pierre Cuzin, Laura Leroi, Emilie Guérin, Patrick Durand, Sophie Alain\r\n\t\t\t\t\t\t\tASPICov: An automated pipeline for identification of SARS-Cov2 nucleotidic variants\r\n\t\t\t\t\t\t\tPLoS One 2022 Jan 26;17(1):e0262953: https://pubmed.ncbi.nlm.nih.gov/35081137/","organization":"CHU Limoges - UF9481 Bioinformatique / CNR Herpesvirus","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/192?version=1","name":"Version 1","author":["Valentin Tilloy","Pierre Cuzin","Laura Leroi","Patrick Durand"],"descriptor_type":["NFL"]}]},{"id":"193","url":"https://workflowhub.eu/workflows/193","name":"Workflow of BioTranslator Comparative Analysis","description":"This workflow is based on the idea of comparing different gene sets through their semantic interpretation. In many cases, the user studies a specific phenotype (e.g. disease) by analyzing lists of genes resulting from different samples or patients. Their pathway analysis could result in different semantic networks, revealing mechanistic and phenotypic divergence between these gene sets. The workflow of BioTranslator Comparative Analysis compares quantitatively the outputs of pathway analysis, based on the topology of the underlying ontological graph, in order to derive a semantic similarity value for each pair of the initial gene sets. The workflow is available in a Galaxy application and can be used for 14 species. The algorithm accepts as input a batch of gene sets, such as BioTranslator, for the same organism. It performs pathway analysis according to the user-selected ontology and then it compares the derived semantic networks and extracts a matrix with their distances, as well as a respective heatmap.","organization":"CO2MICS Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/193?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"194","url":"https://workflowhub.eu/workflows/194","name":"Protein MD Setup tutorial using BioExcel Building Blocks (biobb) in Galaxy","description":"Galaxy workflow example that illustrate the process of setting up a simulation system containing a protein, step by step, using the [BioExcel Building Blocks](/projects/11) library (biobb). The particular example used is the Lysozyme protein (PDB code 1AKI). This workflow returns a resulting protein structure and simulated 3D trajectories.\r\n\r\nDesigned for running on the https://dev.usegalaxy.es Galaxy instance.","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/194?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"195","url":"https://workflowhub.eu/workflows/195","name":"Jupyter Notebook Structural DNA helical parameters tutorial","description":"# Structural DNA helical parameters from MD trajectory tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the [NAFlex](https://mmb.irbbarcelona.org/NAFlex) server and in particular in its [Nucleic Acids Analysis section](https://mmb.irbbarcelona.org/NAFlex/help.php?id=tutorialAnalysisNA).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **extracting structural and dynamical properties** from a **DNA MD trajectory helical parameters**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Drew Dickerson Dodecamer** sequence -CGCGAATTCGCG- (PDB code [1BNA](https://www.rcsb.org/structure/1BNA)). The trajectory used is a  500ns-long MD simulation taken from the [BigNASim](https://mmb.irbbarcelona.org/BIGNASim/) database ([NAFlex_DDD_II](https://mmb.irbbarcelona.org/BIGNASim/getStruc.php?idCode=NAFlex_DDD_II) entry).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/195?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/195?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/195?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/195?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/195?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/195?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/195?version=7","name":"Version 7","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"196","url":"https://workflowhub.eu/workflows/196","name":"Jupyter Notebook ABC MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/196?version=1","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/196?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/196?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/196?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/196?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/196?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"7","url":"https://workflowhub.eu/workflows/196?version=7","name":"Version 7","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"199","url":"https://workflowhub.eu/workflows/199","name":"lncRNA","description":"Analysis of RNA-seq data starting from BAM and focusing on mRNA, lncRNA and miRNA","organization":"CO2MICS Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/199?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"200","url":"https://workflowhub.eu/workflows/200","name":"Protein MD Setup HPC tutorial using BioExcel Building Blocks (biobb) in PyCOMPSs","description":"This PyCOMPSs workflow tutorial aims to illustrate the process of setting up a simulation system containing a protein, step by step, using the BioExcel Building Blocks library (biobb) in PyCOMPSs for execution on HPC. Three variants of the MD Setup workflows are included, supporting a list of structures, a list of mutations, or a cumulative set of mutations. ","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/200?version=1","name":"Version 1","author":["Adam Hospital","Pau Andrio"],"descriptor_type":[]}]},{"id":"201","url":"https://workflowhub.eu/workflows/201","name":"Protein MD Setup tutorial using BioExcel Building Blocks (biobb) in KNIME","description":"This is an experimental KNIME workflow of using the BioExcel building blocks to implement the Protein MD Setup tutorial for molecular dynamics with GROMACS.\r\n\r\nNote that this workflow won't import in KNIME without the [experimental KNIME nodes](https://bioexcel.eu/research/projects/biobb_knime/) for BioBB - contact Adam Hospital for details.","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/201?version=1","name":"Protein MD Setup tutorial using BioExcel Building Blocks (biobb) in KNIME","author":["Adam Hospital"],"descriptor_type":[]}]},{"id":"216","url":"https://workflowhub.eu/workflows/216","name":"Preparing a data set for Deep Learning from zipped ABR raw data files","description":"This notebook is about pre-processing the Auditory Brainstem Response (ABR) raw data files provided by [Ingham et. al](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3000194) to create a data set for Deep Learning models.\r\n\r\nThe unprocessed ABR data files are available at [Dryad](https://datadryad.org/stash/dataset/doi:10.5061/dryad.cv803rv).\r\n\r\nSince the ABR raw data are available as zip-archives, these have to be unzipped and the extracted raw data files parsed so that the time series corresponding to the ABR audiograms can be saved in a single csv file.\r\n\r\nThe final data set contains the ABR time series, an individual mouse identifier, stimulus frequency, stimulus sound pressure level (SPL) and a manually determined hearing threshold. For each mouse there are different time series corresponding to six different sound stimuli: broadband click, 6, 12, 18, 24, and 30 kHz, each of which was measured for a range of sound pressure levels. The exact range of sound levels can vary between the different mice and stimuli. \r\n\r\nThe following is done: \r\n\r\n* The zip archives are unpacked.\r\n* The extracted ABR raw data files are parsed and collected in one csv file per archive.\r\n* The csv files are merged into a data set of time series. Each time series corresponds to an ABR audiogram measured for a mouse at a specific frequency and sound level.\r\n* The mouse phenotyping data are available in Excel format. The individual data sheets are combined into one mouse phenotyping data set, maintaining the mouse pipeline and the cohort type mapping. In addition, the hearing thresholds are added to the ABR audiogram data set.\r\n* The data sets are curated: \r\n\r\n\t* there is a single curve per mouse, stimulus frequency and sound level,\r\n\t* each sound level is included in the list of potential sound pressure levels,\r\n\t* for each mouse for which an ABR audiogram has been measured, mouse phenotyping data are also provided.","organization":"Applied Computational Biology at IEG/HMGU","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/216?version=1","name":"Version 1","author":["Elida Schneltzer"],"descriptor_type":[]}]},{"id":"220","url":"https://workflowhub.eu/workflows/220","name":"BAM to FASTQ + QC v1.0","description":"# BAM-to-FASTQ-QC\r\n\r\n## General recommendations for using BAM-to-FASTQ-QC\r\nPlease see the [`Genome assembly with hifiasm on Galaxy Australia`](https://australianbiocommons.github.io/how-to-guides/genome_assembly/hifi_assembly) guide.\r\n\r\n## Acknowledgements\r\n\r\nThe workflow \u0026 the [doc_guidelines template used](https://github.com/AustralianBioCommons/doc_guidelines) are supported by the Australian BioCommons via Bioplatforms Australia funding, the Australian Research Data Commons (https://doi.org/10.47486/PL105) and the Queensland Government RICF programme. Bioplatforms Australia and the Australian Research Data Commons are enabled by the National Collaborative Research Infrastructure Strategy (NCRIS).\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/220?version=1","name":"Version 1","author":["Gareth Price"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/220?version=2","name":"main @ 1d969fc","author":["Gareth Price"],"descriptor_type":["GALAXY"]}]},{"id":"221","url":"https://workflowhub.eu/workflows/221","name":"PacBio HiFi genome assembly using hifiasm v2.1","description":"# PacBio HiFi genome assembly using hifiasm v2.1\r\n\r\n## General usage recommendations\r\nPlease see the [Genome assembly with hifiasm on Galaxy Australia](https://australianbiocommons.github.io/how-to-guides/genome_assembly/hifi_assembly) guide.\r\n\r\n## See [change log](./change_log.md)\r\n\r\n## Acknowledgements\r\n\r\nThe workflow \u0026 the [doc_guidelines template used](https://github.com/AustralianBioCommons/doc_guidelines) are \r\nsupported by the Australian BioCommons via Bioplatforms Australia funding, the Australian Research Data Commons \r\n(https://doi.org/10.47486/PL105) and the Queensland Government RICF programme. Bioplatforms Australia and the \r\nAustralian Research Data Commons are enabled by the National Collaborative Research Infrastructure Strategy (NCRIS).\r\n\r\n\r\n\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/221?version=1","name":"Version 1","author":["Gareth Price","Katherine Farquharson"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/221?version=2","name":"master @ c4797d8","author":["Gareth Price","Katherine Farquharson"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/221?version=3","name":"v2.1.0","author":["Gareth Price","Katherine Farquharson"],"descriptor_type":["GALAXY"]}]},{"id":"222","url":"https://workflowhub.eu/workflows/222","name":"Data QC","description":"Data QC step, can run alone or as part of a combined workflow for large genome assembly. \r\n\r\n* What it does: Reports statistics from sequencing reads.\r\n* Inputs: long reads (fastq.gz format), short reads (R1 and R2) (fastq.gz format).\r\n* Outputs: For long reads: a nanoplot report (the HTML report summarizes all the information). For short reads: a MultiQC report.\r\n* Tools used: Nanoplot, FastQC, MultiQC.\r\n* Input parameters: None required.\r\n* Workflow steps: Long reads are analysed by Nanoplot; Short reads (R1 and R2) are analysed by FastQC; the resulting reports are processed by MultiQC.\r\n* Options: see the tool settings options at runtime and change as required. Alternative tool option: fastp\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/222?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"223","url":"https://workflowhub.eu/workflows/223","name":"kmer counting - meryl","description":"Kmer counting step, can run alone or as part of a combined workflow for large genome assembly. \r\n\r\n* What it does: Estimates genome size and heterozygosity based on counts of kmers\r\n* Inputs: One set of short reads: e.g. R1.fq.gz\r\n* Outputs: GenomeScope graphs\r\n* Tools used: Meryl, GenomeScope\r\n* Input parameters: None required\r\n* Workflow steps: The tool meryl counts kmers in the input reads (k=21), then converts this into a histogram. GenomeScope: runs a model on the histogram; reports estimates. k-mer size set to 21. \r\n* Options: Use a different kmer counting tool. e.g. khmer.\r\n\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/223?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"224","url":"https://workflowhub.eu/workflows/224","name":"Trim and filter reads - fastp","description":"Trim and filter reads; can run alone or as part of a combined workflow for large genome assembly. \r\n\r\n* What it does: Trims and filters raw sequence reads according to specified settings. \r\n* Inputs: Long reads (format fastq); Short reads R1 and R2 (format fastq) \r\n* Outputs: Trimmed and filtered reads: fastp_filtered_long_reads.fastq.gz (But note: no trimming or filtering is on by default), fastp_filtered_R1.fastq.gz, fastp_filtered_R2.fastq.gz\r\n* Reports: fastp report on long reads, html; fastp report on short reads, html\r\n* Tools used: fastp (Note. The latest version (0.20.1) of fastp has an issue displaying plot results. Using version 0.19.5 here instead until this is rectified). \r\n* Input parameters: None required, but recommend removing the long reads from the workflow if not using any trimming/filtering settings. \r\n\r\nWorkflow steps:\r\n\r\nLong reads: fastp settings: \r\n* These settings have been changed from the defaults (so that all filtering and trimming settings are now disabled). \r\n* Adapter trimming options: Disable adapter trimming: yes\r\n* Filter options: Quality filtering options: Disable quality filtering: yes\r\n* Filter options: Length filtering options: Disable length filtering: yes\r\n* Read modification options: PolyG tail trimming: Disable\r\n* Output options: output JSON report: yes\r\n\r\nShort reads: fastp settings:\r\n* adapter trimming (default setting: adapters are auto-detected)\r\n* quality filtering (default: phred quality 15), unqualified bases limit (default = 40%), number of Ns allowed in a read (default = 5)\r\n* length filtering (default length = min 15)\r\n* polyG tail trimming (default = on for NextSeq/NovaSeq data which is auto detected)\r\n* Output options: output JSON report: yes\r\n\r\nOptions:\r\n* Change any settings in fastp for any of the input reads. \r\n* Adapter trimming: input the actual adapter sequences. (Alternative tool for long read adapter trimming: Porechop.) \r\n* Trimming n bases from ends of reads if quality less than value x  (Alternative tool for trimming long reads: NanoFilt.)\r\n* Discard post-trimmed reads if length is \u003c x (e.g. for long reads, 1000 bp)\r\n* Example filtering/trimming that you might do on long reads: remove adapters (can also be done with Porechop), trim bases from ends of the reads with low quality (can also be done with NanoFilt), after this can keep only reads of length x (e.g. 1000 bp) \r\n\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/224?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"225","url":"https://workflowhub.eu/workflows/225","name":"Assembly with Flye","description":"Assembly with Flye; can run alone or as part of a combined workflow for large genome assembly. \r\n\r\n* What it does: Assembles long reads with the tool Flye\r\n* Inputs: long reads (may be raw, or filtered, and/or corrected); fastq.gz format\r\n* Outputs: Flye assembly fasta; Fasta stats on assembly.fasta; Assembly graph image from Bandage; Bar chart of contig sizes; Quast reports of genome assembly\r\n* Tools used: Flye, Fasta statistics, Bandage, Bar chart, Quast\r\n* Input parameters: None required, but recommend setting assembly mode to match input sequence type\r\n\r\nWorkflow steps:\r\n* Long reads are assembled with Flye, using default tool settings. Note: the default setting for read type (\"mode\") is nanopore raw. Change this at runtime if required. \r\n* Statistics are computed from the assembly.fasta file output, using Fasta Statistics and Quast (is genome large: Yes; distinguish contigs with more that 50% unaligned bases: no)\r\n* The graphical fragment assembly file is visualized with the tool Bandage. \r\n* Assembly information sent to bar chart to visualize contig sizes\r\n\r\nOptions\r\n* See other Flye options. \r\n* Use a different assembler (in a different workflow). \r\n* Bandage image options - change size (max size is 32767), labels - add (e.g. node lengths). You can also install Bandage on your own computer and donwload the \"graphical fragment assembly\" file to view in greater detail. \r\n\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)\r\n\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/225?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"226","url":"https://workflowhub.eu/workflows/226","name":"Assembly polishing","description":"Assembly polishing; can run alone or as part of a combined workflow for large genome assembly. \r\n\r\n* What it does: Polishes (corrects) an assembly, using long reads (with the tools Racon and Medaka) and short reads (with the tool Racon). (Note: medaka is only for nanopore reads, not PacBio reads). \r\n* Inputs:  assembly to be polished:  assembly.fasta; long reads - the same set used in the assembly (e.g. may be raw or filtered) fastq.gz format; short reads, R1 only, in fastq.gz format\r\n* Outputs: Racon+Medaka+Racon polished_assembly. fasta; Fasta statistics after each polishing tool\r\n* Tools used: Minimap2, Racon, Fasta statistics, Medaka\r\n* Input parameters:  None required. The Medaka model is set to r941_min_hac_g507 (R9.4.1 pore, MinION, Guppy 5 High Accuracy Calling). To use a different model, edit the workflow before importing. \r\n\r\nWorkflow steps:\r\n\r\n-1-  Polish with long reads: using Racon\r\n* Long reads and assembly contigs =\u003e Racon polishing (subworkflow): \r\n* minimap2 : long reads are mapped to assembly =\u003e overlaps.paf. \r\n* overaps, long reads, assembly =\u003e Racon =\u003e polished assembly 1\r\n* using polished assembly 1 as input; repeat minimap2 + racon =\u003e polished assembly 2\r\n* using polished assembly 2 as input, repeat minimap2 + racon =\u003e polished assembly 3\r\n* using polished assembly 3 as input, repeat minimap2 + racon =\u003e polished assembly 4\r\n* Racon long-read polished assembly =\u003e Fasta statistics\r\n* Note: The Racon tool panel can be a bit confusing and is under review for improvement. Presently it requires sequences (= long reads), overlaps (= the paf file created by minimap2), and target sequences (= the contigs to be polished) as per \"usage\" described here https://github.com/isovic/racon/blob/master/README.md\r\n* Note: Racon: the default setting for \"output unpolished target sequences?\" is No. This has been changed to Yes for all Racon steps in these polishing workflows.  This means that even if no polishes are made in some contigs, they will be part of the output fasta file. \r\n* Note: the contigs output by Racon have new tags in their headers. For more on this see https://github.com/isovic/racon/issues/85.\r\n\r\n-2-  Polish with long reads: using Medaka\r\n* Racon polished assembly + long reads =\u003e medaka polishing X1 =\u003e medaka polished assembly\r\n* Medaka polished assembly =\u003e Fasta statistics\r\n\r\n-3-  Polish with short reads: using Racon\r\n* Short reads and Medaka polished assembly =\u003eRacon polish (subworkflow):\r\n* minimap2: short reads (R1 only) are mapped to the assembly =\u003e overlaps.paf. Minimap2 setting is for short reads.\r\n* overlaps + short reads + assembly =\u003e Racon =\u003e polished assembly 1\r\n* using polished assembly 1 as input; repeat minimap2 + racon =\u003e polished assembly 2\r\n* Racon short-read polished assembly =\u003e Fasta statistics\r\n\r\nOptions\r\n* Change settings for Racon long read polishing if using PacBio reads:  The default profile setting for Racon long read polishing: minimap2 read mapping is \"Oxford Nanopore read to reference mapping\", which is specified as an input parameter to the whole Assembly polishing workflow, as text: map-ont. If you are not using nanopore reads and/or need a different setting, change this input. To see the other available settings, open the minimap2 tool, find \"Select a profile of preset options\", and click on the drop down menu. For each described option, there is a short text in brackets at the end (e.g. map-pb). This is the text to enter into the assembly polishing workflow at runtime instead of the default (map-ont).\r\n* Other options: change the number of polishes (in Racon and/or Medaka). There are ways to assess how much improvement in assembly quality has occurred per polishing round (for example, the number of corrections made; the change in Busco score - see section Genome quality assessment for more on Busco).\r\n* Option: change polishing settings for any of these tools. Note: for Racon - these will have to be changed within those subworkflows first. Then, in the main workflow, update the subworkflows, and re-save. \r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/226?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/226?version=2","name":"Version 2","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"227","url":"https://workflowhub.eu/workflows/227","name":"Racon polish with long reads, x4","description":"Assembly polishing subworkflow: Racon polishing with long reads\r\n\r\nInputs: long reads and assembly contigs\r\n\r\nWorkflow steps:\r\n* minimap2 : long reads are mapped to assembly =\u003e overlaps.paf. \r\n* overaps, long reads, assembly =\u003e Racon =\u003e polished assembly 1\r\n* using polished assembly 1 as input; repeat minimap2 + racon =\u003e polished assembly 2\r\n* using polished assembly 2 as input, repeat minimap2 + racon =\u003e polished assembly 3\r\n* using polished assembly 3 as input, repeat minimap2 + racon =\u003e polished assembly 4\r\n* Racon long-read polished assembly =\u003e Fasta statistics\r\n* Note: The Racon tool panel can be a bit confusing and is under review for improvement. Presently it requires sequences (= long reads), overlaps (= the paf file created by minimap2), and target sequences (= the contigs to be polished) as per \"usage\" described here https://github.com/isovic/racon/blob/master/README.md\r\n* Note: Racon: the default setting for \"output unpolished target sequences?\" is No. This has been changed to Yes for all Racon steps in these polishing workflows.  This means that even if no polishes are made in some contigs, they will be part of the output fasta file. \r\n* Note: the contigs output by Racon have new tags in their headers. For more on this see https://github.com/isovic/racon/issues/85.\r\n\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/227?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"228","url":"https://workflowhub.eu/workflows/228","name":"Racon polish with Illumina reads, x2","description":"Assembly polishing subworkflow: Racon polishing with short reads\r\n\r\nInputs: short reads and assembly (usually pre-polished with other tools first, e.g. Racon + long reads; Medaka)\r\n\r\nWorkflow steps: \r\n* minimap2: short reads (R1 only) are mapped to the assembly =\u003e overlaps.paf. Minimap2 setting is for short reads.\r\n* overlaps + short reads + assembly =\u003e Racon =\u003e polished assembly 1\r\n* using polished assembly 1 as input; repeat minimap2 + racon =\u003e polished assembly 2\r\n* Racon short-read polished assembly =\u003e Fasta statistics\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/228?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"229","url":"https://workflowhub.eu/workflows/229","name":"Assess genome quality","description":"Assess genome quality; can run alone or as part of a combined workflow for large genome assembly. \r\n\r\n* What it does: Assesses the quality of the genome assembly: generate some statistics and determine if expected genes are present; align contigs to a reference genome.\r\n* Inputs: polished assembly;  reference_genome.fasta (e.g. of a closely-related species, if available). \r\n* Outputs:  Busco table of genes found; Quast HTML report, and link to Icarus contigs browser,  showing contigs aligned to a reference genome\r\n* Tools used: Busco, Quast\r\n* Input parameters: None required\r\n\r\nWorkflow steps: \r\n\r\nPolished assembly =\u003e Busco\r\n* First: predict genes in the assembly: using Metaeuk\r\n* Second: compare the set of predicted genes to the set of expected genes in a particular lineage. Default setting for lineage: Eukaryota\r\n\r\nPolished assembly and a reference genome =\u003e Quast\r\n* Contigs/scaffolds file: polished assembly\r\n* Type of assembly: Genome\r\n* Use a reference genome: Yes\r\n* Reference genome: Arabidopsis genome\r\n* Is the genome large (\u003e 100Mbp)? Yes. \r\n* All other settings as defaults, except second last setting: Distinguish contigs with more than 50% unaligned bases as a separate group of contigs?: change to No\r\n\r\nOptions\r\n\r\nGene prediction: \r\n* Change tool used by Busco to predict genes in the assembly: instead of Metaeuk, use Augustus. \r\n* To do this: select: Use Augustus; Use another predefined species model; then choose from the drop down list.\r\n* Select from a database of trained species models. list here:  https://github.com/Gaius-Augustus/Augustus/tree/master/config/species\r\n* Note: if using Augustus: it may fail if the input assembly is too small (e.g. a test-size data assembly). It can't do the training part properly. \r\n\r\nCompare genes found to other lineage: \r\n* Busco has databases of lineages and their expected genes. Option to change lineage. \r\n* Not all lineages are available - there is a mix of broader and narrower lineages. - list of lineages here: https://busco.ezlab.org/list_of_lineages.html. \r\n* To see the groups in taxonomic hierarchies: Eukaryotes:  https://busco.ezlab.org/frames/euka.htm\r\n* For example,  if you have a plant species from Fabales, you could set that as the lineage. \r\n* The narrower the taxonomic group, the more total genes are expected. \r\n\r\n\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/229?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"230","url":"https://workflowhub.eu/workflows/230","name":"Combined workflows for large genome assembly","description":"Combined workflow for large genome assembly\r\n\r\nThe tutorial document for this workflow is here: https://doi.org/10.5281/zenodo.5655813\r\n\r\n\r\nWhat it does:  A workflow for genome assembly, containing subworkflows:\r\n* Data QC\r\n* Kmer counting\r\n* Trim and filter reads\r\n* Assembly with Flye\r\n* Assembly polishing\r\n* Assess genome quality\r\n\r\nInputs: \r\n* long reads and short reads in fastq format\r\n* reference genome for Quast\r\n\r\nOutputs: \r\n* Data information - QC, kmers\r\n* Filtered, trimmed reads\r\n* Genome assembly, assembly graph, stats\r\n* Polished assembly, stats\r\n* Quality metrics - Busco, Quast\r\n\r\nOptions\r\n* Omit some steps - e.g. Data QC and kmer counting\r\n* Replace a module with one using a different tool - e.g. change assembly tool\r\n\r\nInfrastructure_deployment_metadata: Galaxy Australia (Galaxy)","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/230?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"232","url":"https://workflowhub.eu/workflows/232","name":"16S_biodiversity_for_overlap_paired_end","description":"MetaDEGalaxy: Galaxy workflow for differential abundance analysis of 16s metagenomic data","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/232?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"233","url":"https://workflowhub.eu/workflows/233","name":"16S_biodiversity_for_nonoverlap_paired_end","description":"","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/233?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"235","url":"https://workflowhub.eu/workflows/235","name":"Hecatomb","description":"A hecatomb is a great sacrifice or an extensive loss. Heactomb the software empowers an analyst to make data driven decisions to 'sacrifice' false-positive viral reads from metagenomes to enrich for true-positive viral reads. This process frequently results in a great loss of suspected viral sequences / contigs.\r\n\r\nFor information about installation, usage, tutorial etc please refer to the documentation: https://hecatomb.readthedocs.io/en/latest/\r\n\r\n### Quick start guide\r\n\r\nInstall Hecatomb from Bioconda\r\n```bash\r\n# create an env called hecatomb and install Hecatomb in it\r\nconda create -n hecatomb -c conda-forge -c bioconda hecatomb\r\n\r\n# activate conda env\r\nconda activate hecatomb\r\n\r\n# check the installation\r\nhecatomb -h\r\n\r\n# download the databases - you only have to do this once\r\nhecatomb install\r\n\r\n# Run the test dataset\r\nhecatomb run --test\r\n```","organization":"HecatombDevelopment","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/235?version=1","name":"Version 1","author":["Michael Roach"],"descriptor_type":["SMK"]}]},{"id":"236","url":"https://workflowhub.eu/workflows/236","name":"Janis Germline Variant-Calling Workflow (GATK)","description":"This is a genomics pipeline to do a single germline sample variant-calling, adapted from GATK Best Practice Workflow.\r\n\r\nThis workflow is a reference pipeline for using the Janis Python framework (pipelines assistant).\r\n- Alignment: bwa-mem\r\n- Variant-Calling: GATK HaplotypeCaller\r\n- Outputs the final variants in the VCF format.\r\n\r\n**Resources**\r\n\r\nThis pipeline has been tested using the HG38 reference set, available on Google Cloud Storage through:\r\n\r\n- https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\r\n\r\nThis pipeline expects the assembly references to be as they appear in that storage     (\".fai\", \".amb\", \".ann\", \".bwt\", \".pac\", \".sa\", \"^.dict\").\r\nThe known sites (snps_dbsnp, snps_1000gp, known_indels, mills_indels) should be gzipped and tabix indexed.\r\n\r\n\r\nInfrastructure_deployment_metadata: Spartan (Unimelb)","organization":"Janis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/236?version=1","name":"Version 1","author":["Richard Lupat"],"descriptor_type":[]}]},{"id":"237","url":"https://workflowhub.eu/workflows/237","name":"Purge duplicates from hifiasm assembly v1.0","description":"# Purge-duplicates-from-hifiasm-assembly\r\n\r\n## General recommendations for using `Purge-duplicates-from-hifiasm-assembly`\r\n\r\nPlease see the [`Genome assembly with hifiasm on Galaxy Australia`](https://australianbiocommons.github.io/how-to-guides/genome_assembly/hifi_assembly) guide.\r\n\r\n## Acknowledgements\r\n\r\nThe workflow \u0026 the [doc_guidelines template used](https://github.com/AustralianBioCommons/doc_guidelines) are \r\nsupported by the Australian BioCommons via Bioplatforms Australia funding, the Australian Research Data Commons \r\n(https://doi.org/10.47486/PL105) and the Queensland Government RICF programme. Bioplatforms Australia and the \r\nAustralian Research Data Commons are enabled by the National Collaborative Research Infrastructure Strategy (NCRIS).\r\n\r\n\r\n\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/237?version=1","name":"Version 1","author":["Gareth Price"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/237?version=2","name":"v1.0.0","author":["Gareth Price"],"descriptor_type":["GALAXY"]}]},{"id":"238","url":"https://workflowhub.eu/workflows/238","name":"Calculate the half maximal inhibitory concentration (IC50) for each compound used in a SARS-CoV-2 study","description":"# Summary\r\nThis notebook demonstrates how to retrieve metadata associated to the paper [A SARS-CoV-2 cytopathicity dataset generated by high-content screening of a large drug repurposing collection](https://doi.org/10.1038/s41597-021-00848-4) and available in IDR at [idr0094-ellinger-sarscov2](https://idr.openmicroscopy.org/search/?query=Name:idr0094).\r\nOver 300 compounds were used in this investigation. This notebook allows the user to calculate the half maximal inhibitory concentration (IC50) for each compound. IC50 is a measure of the potency of a substance in inhibiting a specific biological or biochemical function. IC50 is a quantitative measure that indicates how much of a particular inhibitory substance (e.g. drug) is needed to inhibit, in vitro, a given biological process or biological component by 50%.\r\nUser can download the IC50 for each compound used in that study\r\n\r\nThe notebook can be launched in [My Binder](https://mybinder.org/v2/gh/IDR/idr0094-ellinger-sarscov2/master?urlpath=notebooks%2Fnotebooks%2Fidr0094-ic50.ipynb%3FscreenId%3D2603).\r\n\r\nA shiny app is also available for dynamic plotting of the IC50 curve for each compound.\r\nThis R shiny app can be launched in [My Binder](https://mybinder.org/v2/gh/IDR/idr0094-ellinger-sarscov2/master?urlpath=shiny/apps/)\r\n\r\n\r\n# Inputs\r\nParameters needed to configure the workflow:\r\n\r\n**screenId**: Identifier of a screen in IDR.\r\n\r\n# Ouputs\r\nOutput file generated:\r\n\r\n**ic50.csv**: Comma separate value file containing the IC50 for each compound.\r\n\r\n","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/238?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"239","url":"https://workflowhub.eu/workflows/239","name":"exome-alignment","description":"Exome Alignment Workflow\r\n","organization":"iPC: individualizedPaediatricCure","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/239?version=1","name":"Version 1","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]}]},{"id":"240","url":"https://workflowhub.eu/workflows/240","name":"exome-samtools","description":"Exome SAMtools Workflow","organization":"iPC: individualizedPaediatricCure","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/240?version=1","name":"Version 1","author":["Laura Rodriguez-Navas"],"descriptor_type":["NFL"]}]},{"id":"241","url":"https://workflowhub.eu/workflows/241","name":"atavide","description":"`atavide` is a complete workflow for metagenomics data analysis, including QC/QA, optional host removal, assembly and cross-assembly, and individual read based annotations. We have also built in some advanced analytics including tools to assign annotations from reads to contigs, and to generate metagenome-assembled genomes in several different ways, giving you the power to explore your data!\r\n\r\n`atavide` is 100% snakemake and conda, so you only need to install the snakemake workflow, and then everything else will be installed with conda.\r\n\r\nSteps:\r\n1. QC/QA with [prinseq++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus)\r\n2. optional host removal using bowtie2 and samtools, [as described previously](https://edwards.flinders.edu.au/command-line-deconseq/). To enable this, you need to provide a path to the host db and a host db.\r\n\r\nMetagenome assembly\r\n1. pairwise assembly of each sample using [megahit](https://github.com/voutcn/megahit)\r\n2. extraction of all reads that do not assemble using samtools flags\r\n3. assembly of all unassembled reads using [megahit](https://github.com/voutcn/megahit)\r\n4. compilation of _all_ contigs into a single unified set using [Flye](https://github.com/fenderglass/Flye)\r\n5. comparison of reads -\u003e contigs to generate coverage\r\n\r\nMAG creation\r\n1. [metabat](https://bitbucket.org/berkeleylab/metabat/src/master/)\r\n2. [concoct](https://github.com/BinPro/CONCOCT)\r\n3. Pairwise comparisons using [turbocor](https://github.com/dcjones/turbocor) followed by clustering\r\n\r\nRead-based annotations\r\n1. [Kraken2](https://ccb.jhu.edu/software/kraken2/)\r\n2. [singlem](https://github.com/wwood/singlem)\r\n3. [SUPER-focus](https://github.com/metageni/SUPER-FOCUS)\r\n4. [FOCUS](https://github.com/metageni/FOCUS)\r\n\r\nWant something else added to the suite? File an issue on github and we'll add it ASAP!\r\n\r\n### Installation\r\n\r\nYou will need to install\r\n1. The NCBI taxonomy database somewhere\r\n2. The superfocus databases somewhere, and set the SUPERFOCUS_DB environmental variable\r\n\r\nEverything else should install automatically.","organization":"FAME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/241?version=1","name":"Version 1","author":[],"descriptor_type":["SMK"]}]},{"id":"242","url":"https://workflowhub.eu/workflows/242","name":"Diabetes related genes expressed in pancreas","description":"# Summary\r\nThis notebook shows how to integrate genomic and image data resources. This notebook looks at the question **Which diabetes related genes are expressed in the pancreas?** \r\n\r\nSteps:\r\n\r\n* Query humanmine.org, an integrated database of Homo sapiens genomic data using the intermine API to find the genes.\r\n* Using the list of found genes, search in the Image Data Resource (IDR) for images linked to the genes, tissue and disease.\r\n* \r\nWe use the [intermine Python API](https://github.com/intermine/intermine-ws-python) and the IDR Python API.\r\n\r\nThe notebook can be opened in [Colab](https://colab.research.google.com/github/IDR/idr-notebooks/blob/master/humanmine.ipynb)\r\n\r\n# Inputs\r\nParameters needed to configure the workflow:\r\n\r\n* TISSUE = \"Pancreas\" \r\n* DISEASE = \"diabetes\"\r\n\r\n# Ouputs\r\n* List of genes found using [HumanMine](https://pubmed.ncbi.nlm.nih.gov/35820040/)\r\n* List of images from IDR for one of the gene found","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/242?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"244","url":"https://workflowhub.eu/workflows/244","name":"OpenEBench TCGA Cancer Driver Genes benchmarking workflow","description":"## Description\r\n\r\nThe workflow takes an input file with Cancer Driver Genes predictions (i.e. the results provided by a participant), computes a set of metrics, and compares them against the data currently stored in OpenEBench within the TCGA community. Two assessment metrics are provided for that predictions. Also, some plots (which are optional) that allow to visualize the performance of the tool are generated. The workflow consists in three standard steps, defined by OpenEBench. The tools needed to run these steps are containerised in three Docker images, whose recipes are available in the [TCGA_benchmarking_dockers](https://github.com/inab/TCGA_benchmarking_dockers ) repository and the images are stored in the [INB GitLab container registry](https://gitlab.bsc.es/inb/elixir/openebench/workflows/tcga_benchmarking_dockers/container_registry) . Separated instances are spawned from these images for each step:\r\n1. **Validation**: the input file format is checked and, if required, the content of the file is validated (e.g check whether the submitted gene IDs exist)\r\n2. **Metrics Generation**: the predictions are compared with the 'Gold Standards' provided by the community, which results in two performance metrics - precision (Positive Predictive Value) and recall(True Positive Rate).\r\n3. **Consolidation**: the benchmark itself is performed by merging the tool metrics with the rest of TCGA data. The results are provided in JSON format and SVG format (scatter plot).\r\n\r\n![OpenEBench benchmarking workflow](https://raw.githubusercontent.com/inab/TCGA_benchmarking_workflow/1.0.8/workflow_schema.jpg)\r\n\r\n## Data\r\n\r\n* [TCGA_sample_data](./TCGA_sample_data) folder contains all the reference data required by the steps. It is derived from the manuscript:\r\n[Comprehensive Characterization of Cancer Driver Genes and Mutations](https://www.cell.com/cell/fulltext/S0092-8674%2818%2930237-X?code=cell-site), Bailey et al, 2018, Cell [![doi:10.1016/j.cell.2018.02.060](https://img.shields.io/badge/doi-10.1016%2Fj.cell.2018.02.060-green.svg)](https://doi.org/10.1016/j.cell.2018.02.060) \r\n* [TCGA_sample_out](./TCGA_sample_out) folder contains an example output for a worklow run, with two cancer types / challenges selected (ACC, BRCA). Results obtained from the default execution should be similar to those ones available in this directory. Results found in [TCGA_sample_out/results](./TCGA_sample_out/results) can be visualized in the browser using [`benchmarking_workflows_results_visualizer` javascript library](https://github.com/inab/benchmarking_workflows_results_visualizer).\r\n\r\n## Requirements\r\nThis workflow depends on three tools that have to be installed before you can run it:\r\n* [Git](https://git-scm.com/downloads): Used to download the workflow from GitHub.\r\n* [Docker](https://docs.docker.com/get-docker/): The Docker Engine is used under the hood to execute the containerised steps of the benchmarking workflow.\r\n* [Nextflow](https://www.nextflow.io/): Is the technology used to write and execute the benchmarking workflow. Note that it depends on Bash (\u003e=3.2) and Java (\u003e=8 , \u003c=17). We provide the script [run_local_nextflow.bash](run_local_nextflow.bash) that automates their installation for local testing.\r\n\r\nCheck that these tools are available in your environment:\r\n```\r\n# Git\r\n\u003e which git\r\n/usr/bin/git\r\n\u003e git --version\r\ngit version 2.26.2\r\n\r\n# Docker\r\n\u003e which docker\r\n/usr/bin/docker\r\n\u003e docker --version\r\nDocker version 20.10.9-ce, build 79ea9d308018\r\n\r\n# Nextflow\r\n\u003e which nextflow\r\n/home/myuser/bin/nextflow\r\n\u003e nextflow -version\r\n\r\n      N E X T F L O W\r\n      version 21.04.1 build 5556\r\n      created 14-05-2021 15:20 UTC (17:20 CEST)\r\n      cite doi:10.1038/nbt.3820\r\n      http://nextflow.io\r\n```\r\nIn the case of docker, apart from being installed the daemon has to be running. On Linux distributions that use `Systemd` for service management, which includes the most popular ones as of 2021 (Ubuntu, Debian, CentOs, Red Hat, OpenSuse), the `systemctl` command can be used to check its status and manage it:\r\n\r\n```\r\n# Check status of docker daemon\r\n\u003e sudo systemctl status docker\r\n● docker.service - Docker Application Container Engine\r\n   Loaded: loaded (/usr/lib/systemd/system/docker.service; disabled; vendor preset: disabled)\r\n   Active: inactive (dead)\r\n     Docs: http://docs.docker.com\r\n\r\n# Start docker daemon\r\n\u003e sudo systemctl start docker\r\n```\r\n\r\n### Download workflow\r\nSimply clone the repository and check out the latest tag (currently `1.0.8`):\r\n\r\n```\r\n# Clone repository\r\n\u003e git clone https://github.com/inab/TCGA_benchmarking_dockers.git\r\n\r\n# Move to new directory\r\ncd TCGA_benchmarking_workflow/\r\n\r\n# Checkout version 1.0.8\r\n\u003e git checkout 1.0.8 -b 1.0.8\r\n```\r\n\r\n## Usage\r\nThe workflow can be run workflow in two different ways:\r\n* Standard: `nextflow run main.nf -profile docker`\r\n* Using the bash script that installs Java and Nextflow:`./run_local_nextflow.bash run main.nf -profile docker`.\r\n\r\nArguments specifications:\r\n```\r\nUsage:\r\nRun the pipeline with default parameters:\r\nnextflow run main.nf -profile docker\r\n\r\nRun with user parameters:\r\nnextflow run main.nf -profile docker --predictionsFile {driver.genes.file} --public_ref_dir {validation.reference.file} --participant_name {tool.name} --metrics_ref_dir {gold.standards.dir} --cancer_types {analyzed.cancer.types} --assess_dir {benchmark.data.dir} --results_dir {output.dir}\r\n\r\nMandatory arguments:\r\n\t--input                 List of cancer genes prediction\r\n\t--community_id          Name or OEB permanent ID for the benchmarking community\r\n\t--public_ref_dir        Directory with list of cancer genes used to validate the predictions\r\n\t--participant_id        Name of the tool used for prediction\r\n\t--goldstandard_dir      Dir that contains metrics reference datasets for all cancer types\r\n\t--challenges_ids        List of types of cancer selected by the user, separated by spaces\r\n\t--assess_dir            Dir where the data for the benchmark are stored\r\n\r\nOther options:\r\n\t--validation_result     The output directory where the results from validation step will be saved\r\n\t--augmented_assess_dir  Dir where the augmented data for the benchmark are stored\r\n\t--assessment_results    The output directory where the results from the computed metrics step will be saved\r\n\t--outdir                The output directory where the consolidation of the benchmark will be saved\r\n\t--statsdir              The output directory with nextflow statistics\r\n\t--data_model_export_dir The output dir where json file with benchmarking data model contents will be saved\r\n\t--otherdir              The output directory where custom results will be saved (no directory inside)\r\nFlags:\r\n\t--help                  Display this message\r\n```\r\n\r\nDefault input parameters and Docker images to use for each step can be specified in the [config](./nextflow.config) file.\r\n\r\n**NOTE: In order to make your workflow compatible with the [OpenEBench VRE Nextflow Executor](https://github.com/inab/vre-process_nextflow-executor), please make sure to use the same parameter names in your workflow.**\r\n","organization":"OpenEBench","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/244?version=1","name":"Version 1","author":["José Mª Fernández","Asier Gonzalez-Uriarte"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/244?version=2","name":"Version 2","author":["José Mª Fernández","Asier Gonzalez-Uriarte"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/244?version=3","name":"Version 3","author":["José Mª Fernández","Asier Gonzalez-Uriarte"],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/244?version=4","name":"Version 4","author":["José Mª Fernández","Asier Gonzalez-Uriarte"],"descriptor_type":["NFL"]}]},{"id":"245","url":"https://workflowhub.eu/workflows/245","name":"De novo digitisation","description":"","organization":"Specimen Data Refinery","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/245?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"246","url":"https://workflowhub.eu/workflows/246","name":"fragment-based-docking-scoring/main","description":"Virtual screening of the SARS-CoV-2 main protease with rDock and pose scoring","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/246?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/246?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/246?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/246?version=4","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]}]},{"id":"247","url":"https://workflowhub.eu/workflows/247","name":"protein-ligand-complex-parameterization/main","description":"# Protein-ligand complex parameterization\n\nParameterizes an input protein (PDB) and ligand (SDF) file prior to molecular\ndynamics simulation with GROMACS.\n\nThis is a simple workflow intended for use as a subworkflow in more complex\nMD workflows. It is used as a subworkflow by the GROMACS MMGBSA and dcTMD\nworkflows. \n","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/247?version=1","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/247?version=2","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"248","url":"https://workflowhub.eu/workflows/248","name":"gromacs-mmgbsa/main","description":"MMGBSA simulation and calculation","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/248?version=1","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/248?version=2","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/248?version=3","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/248?version=4","name":"v0.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/248?version=5","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/248?version=6","name":"v0.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/248?version=7","name":"v0.1.8","author":[],"descriptor_type":["GALAXY"]}]},{"id":"249","url":"https://workflowhub.eu/workflows/249","name":"gromacs-dctmd/main","description":"Perform dcTMD free energy simulations and calculations","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/249?version=1","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/249?version=2","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/249?version=3","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/249?version=4","name":"v0.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/249?version=5","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]}]},{"id":"250","url":"https://workflowhub.eu/workflows/250","name":"multiAffinity","description":"\u003cbr\u003e\r\n\r\n\u003cimg src=\"https://github.com/marbatlle/multiAffinity/raw/main/docs/img/multiAffinty-logo.png\" alt=\"drawing\" width=\"400\"/\u003e\r\n\r\n\u003cbr\u003e\r\n\r\nMultiAffinity enables the study of how gene dysregulation propagates on a multilayer network on a disease of interest, uncovering key genes. Find the detailed documentation for the tool [here](https://marbatlle.github.io/multiAffinity/).\r\n\r\n![alt](https://github.com/marbatlle/multiAffinity/raw/main/docs/img/multiAffinity_workflow.png)","organization":"iPC: individualizedPaediatricCure","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/250?version=1","name":"Version 1","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]}]},{"id":"251","url":"https://workflowhub.eu/workflows/251","name":"Investigation of lockdown effect on air quality between January 2019 to May 2021.","description":"This workflow extracts 5 different time periods e.g. January- June 2019, 2020 and 2021, July-December 2019 and 2020 over a single selected location. Then statistics (mean, minimum, maximum) are computed. The final products are maximum, minimum and mean.","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/251?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"252","url":"https://workflowhub.eu/workflows/252","name":"Pangeo 101 for everyone - introduction to Xarray","description":"Abstract CWL Automatically generated from the Galaxy workflow file: GTN 'Pangeo 101 for everyone - Introduction to Xarray'.\r\n\r\nIn this tutorial, we analyze particle matter \u003c 2.5 μm/m3 data from Copernicus Atmosphere Monitoring Service to understand Xarray Galaxy Tools:\r\n- Understand how an Xarray dataset is organized;\r\n- Get metadata from Xarray dataset such as variable names, units, coordinates (latitude, longitude, level), etc;\r\n- Plot an Xarray dataset on a geographical map and learn to customize it;\r\n- Select/Subset an Xarray dataset from coordinates values such as time selection or a subset over a geographical area;\r\n- Mask an Xarray dataset with a Where statement, for instance to only see PM2.5 \u003e 30 μm/m and highlight on a map regions with \"high\" values;\r\n- Convert an Xarray dataset to Tabular data (pandas dataframe);\r\n- Plot tabular data to visualize the forecast PM2.5 over a single point (here Naples) using a scatterplot and/or climate stripes.","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/252?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"253","url":"https://workflowhub.eu/workflows/253","name":"Nanopore Guppy Basecalling Assembly Workflow","description":"### - deprecated - \r\n\r\nWorkflow for sequencing with ONT Nanopore, from basecalling to assembly.\r\n  - Guppy (basecalling of raw reads)\r\n  - MinIONQC (quality check)\r\n  - FASTQ merging from multi into one file\r\n  - Kraken2 (taxonomic classification)\r\n  - Krona (classification visualization)\r\n  - Flye (de novo assembly)\r\n  - Medaka (assembly polishing)\r\n  - QUAST (assembly quality reports)\r\n\r\n**All tool CWL files and other workflows can be found here:**\u003cbr\u003e\r\n  Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl\u003cbr\u003e\r\n  Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows\u003cbr\u003e\r\n\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/253?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst","Germán Royval"],"descriptor_type":["CWL"]}]},{"id":"254","url":"https://workflowhub.eu/workflows/254","name":"Nanopore Assembly Workflow - Deprecated -","description":"#### - Deprecated -\r\n#### See our updated hybrid assembly workflow: https://workflowhub.eu/workflows/367\r\n#### And other workflows: https://workflowhub.eu/projects/16#workflows\r\n# \r\n**Workflow for sequencing with ONT Nanopore data, from basecalled reads to (meta)assembly and binning**\r\n- Workflow Nanopore Quality\r\n- Kraken2 taxonomic classification of FASTQ reads\r\n- Flye (de-novo assembly)\r\n- Medaka (assembly polishing)\r\n- metaQUAST (assembly quality reports)\r\n\r\n**When Illumina reads are provided:** \r\n  - Workflow Illumina Quality: https://workflowhub.eu/workflows/336?version=1\t\r\n  - Assembly polishing with Pilon\u003cbr\u003e\r\n  - Workflow binnning https://workflowhub.eu/workflows/64?version=11\r\n      - Metabat2\r\n      - CheckM\r\n      - BUSCO\r\n      - GTDB-Tk\r\n\r\n**All tool CWL files and other workflows can be found here:**\u003cbr\u003e\r\n  Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl\u003cbr\u003e\r\n  Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows\u003cbr\u003e","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/254?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst","Germán Royval"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/254?version=2","name":"Version 2","author":["Bart Nijsse","Jasper Koehorst","Germán Royval"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/254?version=3","name":"Version 3","author":["Bart Nijsse","Jasper Koehorst","Germán Royval"],"descriptor_type":["CWL"]}]},{"id":"255","url":"https://workflowhub.eu/workflows/255","name":"CWL GMX Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/255?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/255?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/255?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"257","url":"https://workflowhub.eu/workflows/257","name":"Protein-ligand docking (fpocket)","description":"This workflow performs the process of protein-ligand docking, step by step, using the BioExcel Building Blocks library (biobb).","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/257?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"258","url":"https://workflowhub.eu/workflows/258","name":"CWL Protein Ligand Complex MD Setup tutorial","description":"# Protein Ligand Complex MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/complex/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation system** containing a **protein in complex with a ligand**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **T4 lysozyme** L99A/M102Q protein (PDB code 3HTB), in complex with the **2-propylphenol** small molecule (3-letter Code JZ4). \r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/258?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/258?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/258?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"259","url":"https://workflowhub.eu/workflows/259","name":"CWL Protein-ligand Docking tutorial (Fpocket)","description":"# Protein-ligand Docking tutorials using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorials aim to illustrate the process of **protein-ligand docking**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular examples used are based on the **Mitogen-activated protein kinase 14** (p38-α) protein (PDB code [3HEC](https://www.rcsb.org/structure/3HEC)), a well-known **Protein Kinase enzyme**,\r\n in complex with the FDA-approved **Imatinib** (PDB Ligand code [STI](https://www.rcsb.org/ligand/STI), DrugBank Ligand Code [DB00619](https://go.drugbank.com/drugs/DB00619)) and **Dasatinib** (PDB Ligand code [1N1](https://www.rcsb.org/ligand/1N1), DrugBank Ligand Code [DB01254](https://go.drugbank.com/drugs/DB01254)), small **kinase inhibitors** molecules used to treat certain types of **cancer**.\r\n\r\nThe tutorials will guide you through the process of identifying the **active site cavity** (pocket) without previous knowledge, and the final prediction of the **protein-ligand complex**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/259?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/259?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/259?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"260","url":"https://workflowhub.eu/workflows/260","name":"CWL Amber Protein MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/260?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/260?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/260?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"261","url":"https://workflowhub.eu/workflows/261","name":"CWL Amber Protein Ligand Complex MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/261?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/261?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/261?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"262","url":"https://workflowhub.eu/workflows/262","name":"CWL ABC MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/262?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/262?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/262?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"263","url":"https://workflowhub.eu/workflows/263","name":"polya_liftover","description":"# polya_liftover - sc/snRNAseq Snakemake Workflow\r\n\r\nA [Snakemake][sm] workflow for using PolyA_DB and UCSC Liftover with Cellranger.\r\n\r\nSome genes are not accurately annotated in the reference genome.\r\nHere,\r\nwe use information provide by the [PolyA_DB v3.2][polya] to update the coordinates,\r\nthen the [USCS Liftover][liftover] tool to update to a more recent genome.\r\nNext,\r\nwe use [Cellranger][cr] to create the reference and count matrix.\r\nFinally,\r\nby taking advantage of the integrated [Conda][conda] and [Singularity][sing] support,\r\nwe can run the whole thing in an isolated environment.\r\n\r\nPlease see our [README][readme] for the full details!\r\n\r\n\r\n[sm]: https://snakemake.readthedocs.io/en/stable/index.html \"Snakemake\"\r\n[polya]: https://exon.apps.wistar.org/polya_db/v3/index.php \"PolyA_DB\"\r\n[liftover]: https://genome.ucsc.edu/cgi-bin/hgLiftOver \"Liftover\"\r\n[cr]: https://github.com/alexdobin/STAR \"Cellranger\"\r\n[conda]: https://docs.conda.io/en/latest/ \"Conda\"\r\n[sing]: https://sylabs.io/singularity/ \"Singularity\"\r\n[readme]: https://github.com/IMS-Bio2Core-Facility/polya_liftover/blob/main/README.md","organization":"Bioinformatics and Biostatistics (BIO2 ) Core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/263?version=1","name":"Version 1","author":["Ryan Patterson-Cross"],"descriptor_type":["SMK"]}]},{"id":"264","url":"https://workflowhub.eu/workflows/264","name":"RNA sequencing data obtained from organisms with a reference genome and annotation followed by a prediction step of editing sites using RDDpred","description":"## Introduction\r\n\r\n**vibbits/rnaseq-editing** is a bioinformatics pipeline that can be used to analyse RNA sequencing data obtained from organisms with a reference genome and annotation followed by a prediction step of editing sites using RDDpred.\r\n\r\nThe pipeline is largely based on the [nf-core RNAseq pipeline](https://nf-co.re/rnaseq/).\r\n\r\nThe initial nf-core pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\n## Pipeline summary\r\n\r\n1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html))\r\n2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\r\n3. Adapter and quality trimming ([`Trimmomatics`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))\r\n4. Use of STAR for multiple alignment and quantification: [`STAR`](https://github.com/alexdobin/STAR)\r\n5. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\r\n6. Prediction of editing sites using RDDpred ([`RDDpred`](https://github.com/vibbits/RDDpred))\r\n7. Extensive quality control:\r\n    1. [`RSeQC`](http://rseqc.sourceforge.net/)\r\n    2. [`Qualimap`](http://qualimap.bioinfo.cipf.es/)\r\n    3. [`dupRadar`](https://bioconductor.org/packages/release/bioc/html/dupRadar.html)\r\n8. Present QC for raw read, alignment, gene biotype, sample similarity, and strand-specificity checks ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/))\r\n\r\n## Quick Start\r\n\r\n1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`\u003e=21.04.0`)\r\n\r\n2. Install [`Docker`](https://docs.docker.com/engine/installation/) on a Linux operating system.\r\n   Note: This pipeline does not currently support running with macOS.\r\n\r\n3. Download the pipeline via git clone, download the associated training data files for RDDpred into the assets folder, download the reference data to \r\n\r\n    ```console\r\n    git clone https://github.com/vibbits/rnaseq-editing.git\r\n    cd $(pwd)/rnaseq-editing/assets\r\n    # download training data file for RDDpred\r\n    wget -c \r\n    # download reference data for your genome, we provide genome and indexed genome for STAR 2.7.3a\r\n    \r\n    ```\r\n\r\n    \u003e * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile \u003cinstitute\u003e` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.\r\n\r\n4. Start running your own analysis using Docker locally!\r\n\r\n    ```console\r\n    nextflow run vibbits/rnaseq-editing \\\r\n        --input samplesheet.csv \\\r\n        --genome hg19 \\\r\n        -profile docker\r\n    ```\r\n\r\n    * An executable Python script called [`fastq_dir_to_samplesheet.py`](https://github.com/nf-core/rnaseq/blob/master/bin/fastq_dir_to_samplesheet.py) has been provided if you would like to auto-create an input samplesheet based on a directory containing FastQ files **before** you run the pipeline (requires Python 3 installed locally) e.g.\r\n\r\n        ```console\r\n        wget -L https://raw.githubusercontent.com/nf-core/rnaseq/master/bin/fastq_dir_to_samplesheet.py\r\n        ./fastq_dir_to_samplesheet.py \u003cFASTQ_DIR\u003e samplesheet.csv --strandedness reverse\r\n        ```\r\n\r\n    * The final analysis has been executed on the Azure platform using Azure Kubernetes Services (AKS). AKS has to be set up on the Azure platform by defining a standard node pool called sys next to the scalable node pool cpumem using Standard_E8ds_v4 as node size for calculation.\r\n      Furthermore, persistent volume claims (PVCs) have been setup for input and work folders of the nextflow runs. In the PVC `input` the reference data as well as the fastqc files have been stored where the PVC `work`, the temporary nextflow files for the individual runs as well as the output files have been stored.\r\n    * The config file for the final execution run for [RNAseq editing for the human samples and reference genome hg19](https://github.com/vibbits/rnaseq-editing/blob/master/nextflow.config.as-executed).    \r\n\r\n## Documentation\r\n\r\nThe nf-core/rnaseq pipeline comes with documentation about the pipeline [usage](https://nf-co.re/rnaseq/usage), [parameters](https://nf-co.re/rnaseq/parameters) and [output](https://nf-co.re/rnaseq/output).\r\n\r\n## Credits\r\nThese scripts were written to provide a reproducible data analysis pipeline until the downstream processing using dedicated R scripts for exploratory analysis and plotting. The general structure of pipeline is based on the data analysis steps of the our recent paper [ADAR1 interaction with Z-RNA promotes editing of endogenous double-stranded RNA and prevents MDA5-dependent immune activation](https://pubmed.ncbi.nlm.nih.gov/34380029/).\r\n\r\nNote: The nf-core scripts this pipeline is based on were originally written for use at the [National Genomics Infrastructure](https://ngisweden.scilifelab.se), part of [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden, by Phil Ewels ([@ewels](https://github.com/ewels)) and Rickard Hammarén ([@Hammarn](https://github.com/Hammarn)).\r\n\r\nThe RNAseq pipeline was re-written in Nextflow DSL2 by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [The Bioinformatics \u0026 Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London.\r\n\r\n## Citations\r\n\r\nThe `nf-core` publication is cited here as follows:\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"VIB Bioinformatics Core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/264?version=1","name":"master @ 6b921d6","author":[],"descriptor_type":["NFL"]}]},{"id":"265","url":"https://workflowhub.eu/workflows/265","name":"FAIR CRCC - send data","description":"# Snakemake workflow: FAIR CRCC - send data\r\n\r\n[![Snakemake](https://img.shields.io/badge/snakemake-≥6.3.0-brightgreen.svg)](https://snakemake.github.io)\r\n[![GitHub actions status](https://github.com/crs4/fair-crcc-send-data/workflows/Tests/badge.svg?branch=main)](https://github.com/crs4/fair-crcc-send-data/actions?query=branch%3Amain+workflow%3ATests)\r\n\r\n\r\nA Snakemake workflow for securely sharing Crypt4GH-encrypted sensitive data from\r\nthe [CRC\r\nCohort](https://www.bbmri-eric.eu/scientific-collaboration/colorectal-cancer-cohort/)\r\nto a destination approved through a successful [access\r\nrequest](https://www.bbmri-eric.eu/services/access-policies/).\r\n\r\nThe recommendation is to create a directory for the request that has been\r\napproved;  it will be used as the working directory for the run.  Copy there the\r\nrecipient's crypt4gh key and prepare the run configuration.  The configuration\r\nwill specify the repository, the destination of the data, and the list of\r\nfiles/directories to transfer.\r\n\r\n\r\n## What's the CRC Cohort?\r\n\r\nThe CRC Cohort is a collection of clinical data and digital high-resolution\r\ndigital pathology images pertaining to tumor cases.  The collection has been\r\nassembled from a number of participating biobanks and other partners through the\r\n[ADOPT BBMRI-ERIC](https://www.bbmri-eric.eu/scientific-collaboration/adopt-bbmri-eric/) project.\r\n\r\nResearchers interested in using the data for science can file an application for\r\naccess.  If approved, the part of the dataset required for the planned and\r\napproved work can be copied to the requester's selected secure storage location\r\n(using this workflow).\r\n\r\n\r\n## Usage\r\n\r\n### Example\r\n\r\n    mkdir request_1234 \u0026\u0026 cd request_1234\r\n    # Now write the configuration, specifying crypt4gh keys, destination and files to send.\r\n    # Finally, execute workflow.\r\n    snakemake --snakefile ../fair-crcc-send-data/workflow/Snakefile --profile ../profile/ --configfile config.yml --use-singularity --cores\r\n\r\n\r\n#### Run configuration example\r\n\r\n```\r\nrecipient_key: ./recipient_key\r\nrepository:\r\n  path: \"/mnt/rbd/data/sftp/fair-crcc/\"\r\n  private_key: bbmri-key\r\n  public_key: bbmri-key.pub\r\nsources:\r\n  glob_extension: \".tiff.c4gh\"\r\n  items:\r\n  - some/directory/to/glob\r\n  - another/individual/file.tiff.c4gh\r\ndestination:\r\n  type: \"S3\"\r\n  root_path: \"my-bucket/prefix/\"\r\n  connection:  # all elements will be passed to the selected snakemake remote provider\r\n    access_key_id: \"MYACCESSKEY\"\r\n    secret_access_key: \"MYSECRET\"\r\n    host: http://localhost:9000\r\n    verify: false # don't verify ssl certificates\r\n```\r\n\r\n\r\nTODO\r\n\r\nThe usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?usage=crs4%2Ffair-crcc-send-data).\r\n\r\nIf you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this (original) fair-crcc-send-datasitory and its DOI (see above).\r\n","organization":"CRC Cohort","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/265?version=1","name":"Version 1","author":["Luca Pireddu"],"descriptor_type":["SMK"]}]},{"id":"266","url":"https://workflowhub.eu/workflows/266","name":"FAIR CRCC - image conversion","description":"# Snakemake workflow: FAIR CRCC - image conversion\r\n\r\n[![Snakemake](https://img.shields.io/badge/snakemake-≥6.3.0-brightgreen.svg)](https://snakemake.github.io)\r\n[![GitHub actions status](https://github.com/crs4/fair-crcc-img-convert/workflows/Tests/badge.svg?branch=main)](https://github.com/crs4/fair-crcc-img-convert/actions?query=branch%3Amain+workflow%3ATests)\r\n\r\n\r\nA Snakemake workflow for converting whole-slide images (WSI) from the [CRC\r\nCohort](https://www.bbmri-eric.eu/scientific-collaboration/colorectal-cancer-cohort/)\r\nfrom vendor-specific image formats to open image formats (at the moment,\r\nOME-TIFF).  The workflow also encrypts the new image files with\r\n[Crypt4GH](https://doi.org/10.1093/bioinformatics/btab087).\r\n\r\n\r\n## What's the CRC Cohort?\r\n\r\nThe CRC Cohort is a collection of clinical data and digital high-resolution\r\ndigital pathology images pertaining to tumor cases.  The collection has been\r\nassembled from a number of participating biobanks and other partners through the\r\n[ADOPT BBMRI-ERIC](https://www.bbmri-eric.eu/scientific-collaboration/adopt-bbmri-eric/) project.\r\n\r\nResearchers interested in using the data for science can [apply for\r\naccess](https://www.bbmri-eric.eu/services/access-policies/).\r\n\r\n\r\n## Usage\r\n\r\nThe usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?usage=crs4%2Ffair-crcc-img-convert).\r\n\r\nIf you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this repository and its DOI (see above).\r\n","organization":"CRC Cohort","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/266?version=1","name":"Version 1","author":["Luca Pireddu"],"descriptor_type":["SMK"]}]},{"id":"267","url":"https://workflowhub.eu/workflows/267","name":"Cell Lineage in the adult mouse corpus callosum","description":"\r\n# Summary \r\n\r\nThis notebook demonstrates how to recreate lineages published in the paper [Live imaging of remyelination in the adult mouse corpus callosum](https://www.pnas.org/content/118/28/e2025795118) and available at [idr0113-bottes-opcclones](https://idr.openmicroscopy.org/search/?query=Name:idr0113).\r\n\r\nThe lineage is created from the metadata associated to the specified image.\r\n\r\nTo load the data from the Image Data Resource, we use:\r\n\r\n* the [Python API](https://docs.openmicroscopy.org/omero/latest/developers/Python.html)\r\n* the [JSON API](https://docs.openmicroscopy.org/omero/latest/developers/json-api.html)\r\n\r\nLPC-induced focal demyelination and in vivo imaging of genetically targeted OPCs and their progeny to describe the cellular dynamics of OPC-mediated remyelination in the CC.\r\n\r\nLongitudinal observation of OPCs and their progeny for up to two months reveals functional inter- and intraclonal heterogeneity and provides insights into the cell division capacity and the migration/differentiation dynamics of OPCs and their daughter cells in vivo.\r\n\r\nThe majority of the clones remained quiescent or divided only few times. Some OPCs were highly proliferative. Large clones showed longer times between consecutive divisions compared to low proliferating clones.\r\n\r\nOPCs show distinct modes of cell division: from symmetric proliferative, to symmetric differentiating and also asymmetric cell division, where the OPC is self-renewed while the other daughter cell differentiates.\r\n\r\nOnly 16.46% of OPC-derived cells differentiated into mature, remyelinating oligodendrocytes, with OPCs born at early divisions showing a higher probability to survive and to terminally differentiate.\r\n\r\nCell death was associated with distinct cell division histories of different clones, with higher probability of death when generated at later divisions.\r\n\r\nMigratory behaviour was restricted to progenitors. Successfully differentiating progenitors moved shorter distances per day compared to dying cells.\r\n\r\n# Inputs\r\nParameters needed to configure the workflow:\r\n\r\n**imageId**: Identifier of an image in IDR.\r\n\r\n# Ouputs\r\nOutput file generated:\r\n\r\n**lineage_imageId.pdf**: A PDF with the generated lineage. Options to save as `png` or `svg` are also available.\r\n\r\n","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/267?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"274","url":"https://workflowhub.eu/workflows/274","name":"Galaxy workflow demonstrating the usage of EODIE Galaxy Tool","description":"This workflow demonstrates the usage of EODIE, a toolkit to extract object based timeseries information from Earth Observation data.\r\n\r\nEODIE is a toolkit to extract object based timeseries information from Earth Observation data.\r\n\r\nThe EODIE code can be found on [Gitlab](https://gitlab.com/fgi_nls/public/EODIE) .\r\n\r\nThe goal of EODIE is to ease the extraction of time series information at object level. Today, vast amounts of Earth Observation data are available to the users via for example earth explorer or scihub. Often, not the whole images are needed for exploitation, but only the timeseries of a certain feature on object level. Objects may be polygons depicting agricultural field parcels, forest plots, or areas of a certain land cover type.\r\n\r\nEODIE takes the objects in as polygons in a shapefile as well as the timeframe of interest and the features (eg vegetation indices) to be extracted. The output is a per polygon timeseries of the selected features over the timeframe of interest.\r\n\r\n**Online documentation**\r\nEODIE documentation can be found [here](https://eodie.readthedocs.io/en/latest/).\r\n\r\n**Abstract CWL**\r\nAutomatically generated from the Galaxy workflow file: Workflow constructed from history 'EODIE Sentinel'","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/274?version=1","name":"Version 1","author":["Anne Fouilloux"],"descriptor_type":["GALAXY"]}]},{"id":"276","url":"https://workflowhub.eu/workflows/276","name":"Python Protein MD Setup tutorial","description":"# Protein MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Lysozyme** protein (PDB code 1AKI).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/276?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/276?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/276?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/276?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/276?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"277","url":"https://workflowhub.eu/workflows/277","name":"Galaxy Protein MD Setup tutorial","description":"# Protein MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n***\r\n## This workflow must be run in **biobb.usegalaxy.es**. Please, [click here to access](https://biobb.usegalaxy.es/u/gbayarri/w/gmx-protein-md-setup).\r\n***\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Lysozyme** protein (PDB code 1AKI).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/277?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/277?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/277?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"278","url":"https://workflowhub.eu/workflows/278","name":"CNV_pipeline","description":"# StructuralVariants Workflow\r\n","organization":"TransBioNet","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/278?version=1","name":"1.0.1","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/278?version=2","name":"1.0.2","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/278?version=3","name":"1.0.3","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"4","url":"https://workflowhub.eu/workflows/278?version=4","name":"1.0.4","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"5","url":"https://workflowhub.eu/workflows/278?version=5","name":"1.0.5","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"6","url":"https://workflowhub.eu/workflows/278?version=6","name":"1.0.6","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"7","url":"https://workflowhub.eu/workflows/278?version=7","name":"1.0.7","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"8","url":"https://workflowhub.eu/workflows/278?version=8","name":"1.0.8","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"9","url":"https://workflowhub.eu/workflows/278?version=9","name":"1.0.9","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"10","url":"https://workflowhub.eu/workflows/278?version=10","name":"1.1.0","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]},{"id":"11","url":"https://workflowhub.eu/workflows/278?version=11","name":"1.1.3","author":["Laura Rodriguez-Navas"],"descriptor_type":["CWL"]}]},{"id":"279","url":"https://workflowhub.eu/workflows/279","name":"CWL Protein MD Setup tutorial","description":"# Protein MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Lysozyme** protein (PDB code 1AKI).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/279?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/279?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/279?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"280","url":"https://workflowhub.eu/workflows/280","name":"Python GMX Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/280?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/280?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/280?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/280?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/280?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/280?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"281","url":"https://workflowhub.eu/workflows/281","name":"Python Protein Ligand Complex MD Setup tutorial","description":"# Protein Ligand Complex MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/complex/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation system** containing a **protein in complex with a ligand**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **T4 lysozyme** L99A/M102Q protein (PDB code 3HTB), in complex with the **2-propylphenol** small molecule (3-letter Code JZ4). \r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/281?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/281?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/281?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/281?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/281?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"282","url":"https://workflowhub.eu/workflows/282","name":"Python Protein-ligand Docking tutorial (Fpocket)","description":"# Protein-ligand Docking tutorials using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorials aim to illustrate the process of **protein-ligand docking**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular examples used are based on the **Mitogen-activated protein kinase 14** (p38-α) protein (PDB code [3HEC](https://www.rcsb.org/structure/3HEC)), a well-known **Protein Kinase enzyme**,\r\n in complex with the FDA-approved **Imatinib** (PDB Ligand code [STI](https://www.rcsb.org/ligand/STI), DrugBank Ligand Code [DB00619](https://go.drugbank.com/drugs/DB00619)) and **Dasatinib** (PDB Ligand code [1N1](https://www.rcsb.org/ligand/1N1), DrugBank Ligand Code [DB01254](https://go.drugbank.com/drugs/DB01254)), small **kinase inhibitors** molecules used to treat certain types of **cancer**.\r\n\r\nThe tutorials will guide you through the process of identifying the **active site cavity** (pocket) without previous knowledge, and the final prediction of the **protein-ligand complex**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/282?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/282?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/282?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/282?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/282?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"283","url":"https://workflowhub.eu/workflows/283","name":"Python Amber Protein MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/283?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/283?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/283?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/283?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/283?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"284","url":"https://workflowhub.eu/workflows/284","name":"Python Amber Protein Ligand Complex MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/284?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/284?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/284?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/284?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/284?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"285","url":"https://workflowhub.eu/workflows/285","name":"Python ABC MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/285?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/285?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/285?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/285?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/285?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"286","url":"https://workflowhub.eu/workflows/286","name":"Python Structural DNA helical parameters tutorial","description":"# Structural DNA helical parameters from MD trajectory tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the [NAFlex](https://mmb.irbbarcelona.org/NAFlex) server and in particular in its [Nucleic Acids Analysis section](https://mmb.irbbarcelona.org/NAFlex/help.php?id=tutorialAnalysisNA).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **extracting structural and dynamical properties** from a **DNA MD trajectory helical parameters**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Drew Dickerson Dodecamer** sequence -CGCGAATTCGCG- (PDB code [1BNA](https://www.rcsb.org/structure/1BNA)). The trajectory used is a  500ns-long MD simulation taken from the [BigNASim](https://mmb.irbbarcelona.org/BIGNASim/) database ([NAFlex_DDD_II](https://mmb.irbbarcelona.org/BIGNASim/getStruc.php?idCode=NAFlex_DDD_II) entry).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/286?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/286?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/286?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/286?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/286?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/286?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"287","url":"https://workflowhub.eu/workflows/287","name":"Python Protein MD Analysis tutorial","description":"# Protein MD Analysis tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis workflow computes a set of Quality Control (QC) analyses on top of an uploaded MD trajectory. QC analyses include positional divergence (RMSd), change of shape (\u003cstrong\u003eRadius of Gyration\u003c/strong\u003e), identification of flexible regions (\u003cstrong\u003eatomic/residue fluctuations\u003c/strong\u003e), and identification of different molecular conformations (\u003cstrong\u003etrajectory clustering\u003c/strong\u003e).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/287?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/287?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/287?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/287?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"289","url":"https://workflowhub.eu/workflows/289","name":"CWL Protein MD Setup tutorial with mutations","description":"# Mutations Protein MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Lysozyme** protein (PDB code 1AKI).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/289?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/289?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/289?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"290","url":"https://workflowhub.eu/workflows/290","name":"Python Protein MD Setup tutorial with mutations","description":"# Mutations Protein MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Lysozyme** protein (PDB code 1AKI).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/290?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/290?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/290?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/290?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"291","url":"https://workflowhub.eu/workflows/291","name":"Python GMX OPLS/AA Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2022 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2022 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/291?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/291?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"292","url":"https://workflowhub.eu/workflows/292","name":"Python Amber Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2022 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2022 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/292?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/292?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"293","url":"https://workflowhub.eu/workflows/293","name":"Python CNS/XPLOR MD Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2022 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2022 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/293?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/293?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"294","url":"https://workflowhub.eu/workflows/294","name":"Galaxy GMX Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n## This workflow must be run in **biobb.usegalaxy.es**. Please, [click here to access](https://biobb.usegalaxy.es/u/gbayarri/w/gmx-ligand-parameterization).\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/294?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/294?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/294?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"295","url":"https://workflowhub.eu/workflows/295","name":"Galaxy Protein Ligand Complex MD Setup","description":"# Protein Ligand Complex MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n***\r\n## This workflow must be run in **biobb.usegalaxy.es**. Please, [click here to access](https://biobb.usegalaxy.es/u/gbayarri/w/gmx-protein-ligand-complex-md-setup).\r\n***\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/complex/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation system** containing a **protein in complex with a ligand**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **T4 lysozyme** L99A/M102Q protein (PDB code 3HTB), in complex with the **2-propylphenol** small molecule (3-letter Code JZ4). \r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2022 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2022 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/295?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/295?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/295?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"296","url":"https://workflowhub.eu/workflows/296","name":"Galaxy Protein-ligand Docking tutorial (Fpocket)","description":"# Protein-ligand Docking tutorials using BioExcel Building Blocks (biobb)\r\n***\r\n## This workflow must be run in **biobb.usegalaxy.es**. Please, [click here to access](https://biobb.usegalaxy.es/u/gbayarri/w/protein-ligand-docking).\r\n***\r\n\r\nThis tutorials aim to illustrate the process of **protein-ligand docking**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular examples used are based on the **Mitogen-activated protein kinase 14** (p38-α) protein (PDB code [3HEC](https://www.rcsb.org/structure/3HEC)), a well-known **Protein Kinase enzyme**,\r\n in complex with the FDA-approved **Imatinib** (PDB Ligand code [STI](https://www.rcsb.org/ligand/STI), DrugBank Ligand Code [DB00619](https://go.drugbank.com/drugs/DB00619)) and **Dasatinib** (PDB Ligand code [1N1](https://www.rcsb.org/ligand/1N1), DrugBank Ligand Code [DB01254](https://go.drugbank.com/drugs/DB01254)), small **kinase inhibitors** molecules used to treat certain types of **cancer**.\r\n\r\nThe tutorials will guide you through the process of identifying the **active site cavity** (pocket) without previous knowledge, and the final prediction of the **protein-ligand complex**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/296?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/296?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/296?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"297","url":"https://workflowhub.eu/workflows/297","name":"Galaxy Amber Protein MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n***\r\n## This workflow must be run in **biobb.usegalaxy.es**. Please, [click here to access](https://biobb.usegalaxy.es/u/gbayarri/w/amber-protein-md-setup).\r\n***\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/297?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/297?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/297?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"298","url":"https://workflowhub.eu/workflows/298","name":"Galaxy Amber Protein Ligand Complex MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n***\r\n## This workflow must be run in **biobb.usegalaxy.es**. Please, [click here to access](https://biobb.usegalaxy.es/u/gbayarri/w/amber-protein-ligand-complex-md-setup).\r\n***\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/298?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/298?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/298?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"299","url":"https://workflowhub.eu/workflows/299","name":"Galaxy ABC MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n***\r\n## This workflow must be run in **biobb.usegalaxy.es**. Please, [click here to access](https://biobb.usegalaxy.es/u/gbayarri/w/abcix-md-setup).\r\n***\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/299?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/299?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/299?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"300","url":"https://workflowhub.eu/workflows/300","name":"eQTL-Catalogue/qtlmap","description":"# eQTL-Catalogue/qtlmap\r\n**Portable eQTL analysis and statistical fine mapping workflow used by the eQTL Catalogue**\r\n\r\n### Introduction\r\n\r\n**eQTL-Catalogue/qtlmap** is a bioinformatics analysis pipeline used for QTL Analysis.\r\n\r\nThe workflow takes phenotype count matrix (normalized and quality controlled) and genotype data as input, and finds associations between them with the help of sample metadata and phenotype metadata files (See [Input formats and preparation](docs/inputs_expl.md) for required input file details). To map QTLs, pipeline uses [QTLTools's](https://qtltools.github.io/qtltools/) PCA and RUN methods. For manipulation of files [BcfTools](https://samtools.github.io/bcftools/bcftools.html), [Tabix](http://www.htslib.org/doc/tabix.html) and custom [Rscript](https://www.rdocumentation.org/packages/utils/versions/3.5.3/topics/Rscript) scripts are used.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a bioinformatics workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker / singularity containers making installation trivial and results highly reproducible.\r\n\r\n\r\n### Documentation\r\nThe eQTL-Catalogue/qtlmap pipeline comes with documentation about the pipeline, found in the `docs/` directory:\r\n\r\n1. [Installation](docs/installation.md)\r\n2. Pipeline configuration\r\n    * [Local installation](docs/configuration/local.md)\r\n    * [Adding your own system](docs/configuration/adding_your_own.md)\r\n3. [Input formats and preparation](docs/inputs_expl.md)\r\n4. [Running the pipeline](docs/usage.md)\r\n5. [Troubleshooting](docs/troubleshooting.md)\r\n\r\n\u003c!-- TODO nf-core: Add a brief overview of what the pipeline does and how it works --\u003e\r\n\r\n### Pipeline Description\r\nMapping QTLs is a process of finding statistically significant associations between phenotypes and genetic variants located nearby (within a specific window around phenotype, a.k.a cis window)\r\nThis pipeline is designed to perform QTL mapping. It is intended to add this pipeline to the nf-core framework in the future.\r\nHigh level representation of the pipeline is shown below:\r\n\r\n### Results\r\nThe output directory of the workflow contains the following subdirectories:\r\n\r\n1. PCA - genotype and gene expression PCA values used as covariates for QTL analysis.\r\n2. sumstats - QTL summary statistics from nominal and permutation passes.\r\n3. susie - SuSiE fine mapping credible sets.\r\n4. susie_full - full set of susie results for all tested variants (very large files).\r\n5. susie_merged - susie credible sets merged with summary statistics from univariate QTL analysis.\r\n\r\nColumn names of the output files are explained [here](https://github.com/eQTL-Catalogue/eQTL-Catalogue-resources/blob/master/tabix/Columns.md).\r\n\r\n\r\n# Contributors\r\n* Nurlan Kerimov\r\n* Kaur Alasoo\r\n* Masahiro Kanai\r\n* Ralf Tambets\r\n","organization":"CINECA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/300?version=1","name":"master @ 48318a7","author":[],"descriptor_type":["NFL"]}]},{"id":"301","url":"https://workflowhub.eu/workflows/301","name":"V-pipe (main multi-virus version)","description":"\u003c!-- markdownlint-disable MD013 MD041 --\u003e\r\n\r\n![Logo](https://cbg-ethz.github.io/V-pipe/img/logo.svg)\r\n\r\n[![bio.tools](https://img.shields.io/badge/bio-tools-blue.svg)](https://bio.tools/V-Pipe)\r\n[![Snakemake](https://img.shields.io/badge/snakemake-≥7.11.0-blue.svg)](https://snakemake.github.io/snakemake-workflow-catalog/?usage=cbg-ethz/V-pipe)\r\n[![Deploy Docker image](https://github.com/cbg-ethz/V-pipe/actions/workflows/deploy-docker.yaml/badge.svg)](https://github.com/cbg-ethz/V-pipe/pkgs/container/v-pipe)\r\n[![Tests](https://github.com/cbg-ethz/V-pipe/actions/workflows/run_regression_tests.yaml/badge.svg)](https://github.com/cbg-ethz/V-pipe/actions/workflows/run_regression_tests.yaml)\r\n[![Mega-Linter](https://github.com/cbg-ethz/V-pipe/actions/workflows/mega-linter.yml/badge.svg)](https://github.com/cbg-ethz/V-pipe/actions/workflows/mega-linter.yml)\r\n[![License: Apache-2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)\r\n\r\nV-pipe is a workflow designed for the analysis of next generation sequencing (NGS) data from viral pathogens. It produces a number of results in a curated format (e.g., consensus sequences, SNV calls, local/global haplotypes).\r\nV-pipe is written using the Snakemake workflow management system.\r\n\r\n## Usage\r\n\r\nDifferent ways of initializing V-pipe are presented below. We strongly encourage you to deploy it [using the quick install script](#using-quick-install-script), as this is our preferred method.\r\n\r\nTo configure V-pipe refer to the documentation present in [config/README.md](config/README.md).\r\n\r\nV-pipe expects the input samples to be organized in a [two-level](config/README.md#samples) directory hierarchy,\r\nand the sequencing reads must be provided in a sub-folder named `raw_data`. Further details can be found on the [website](https://cbg-ethz.github.io/V-pipe/usage/).\r\nCheck the utils subdirectory for [mass-importers tools](utils/README.md#samples-mass-importers) that can assist you in generating this hierarchy.\r\n\r\nWe provide [virus-specific base configuration files](config/README.md#virus-base-config) which contain handy defaults for, e.g., HIV and SARS-CoV-2. Set the virus in the general section of the configuration file:\r\n\r\n```yaml\r\ngeneral:\r\n  virus_base_config: hiv\r\n```\r\n\r\nAlso see [snakemake's documentation](https://snakemake.readthedocs.io/en/stable/executing/cli.html) to learn more about the command-line options available when executing the workflow.\r\n\r\n\r\n### Tutorials\r\n\r\nTutorials for your first steps with V-pipe for different scenarios are available in the [docs/](docs/README.md) subdirectory.\r\n\r\n\r\n### Using quick install script\r\n\r\nTo deploy V-pipe, use the [installation script](utils/README.md#quick-installer) with the following parameters:\r\n\r\n```bash\r\ncurl -O 'https://raw.githubusercontent.com/cbg-ethz/V-pipe/master/utils/quick_install.sh'\r\n./quick_install.sh -w work\r\n```\r\n\r\nThis script will download and install miniconda, checkout the V-pipe git repository (use `-b` to specify which branch/tag) and setup a work directory (specified with `-w`) with an executable script that will execute the workflow:\r\n\r\n```bash\r\ncd work\r\n# edit config.yaml and provide samples/ directory\r\n./vpipe --jobs 4 --printshellcmds --dry-run\r\n```\r\n\r\nTest data to test your installation is available with the tutorials provided in the [docs/](docs/README.md) subdirectory.\r\n\r\n### Using Docker\r\n\r\nNote: the [docker image](https://github.com/cbg-ethz/V-pipe/pkgs/container/v-pipe) is only setup with components to run the workflow for HIV and SARS-CoV-2 virus base configurations.\r\nUsing V-pipe with other viruses or configurations might require internet connectivity for additional software components.\r\n\r\nCreate `config.yaml` or `vpipe.config` and then populate the `samples/` directory.\r\nFor example, the following config file could be used:\r\n\r\n```yaml\r\ngeneral:\r\n  virus_base_config: hiv\r\n\r\noutput:\r\n  snv: true\r\n  local: true\r\n  global: false\r\n  visualization: true\r\n  QA: true\r\n```\r\n\r\nThen execute:\r\n\r\n```bash\r\ndocker run --rm -it -v $PWD:/work ghcr.io/cbg-ethz/v-pipe:master --jobs 4 --printshellcmds --dry-run\r\n```\r\n\r\n### Using Snakedeploy\r\n\r\nFirst install [mamba](https://github.com/conda-forge/miniforge#mambaforge), then create and activate an environment with Snakemake and Snakedeploy:\r\n\r\n```bash\r\nmamba create -c conda-forge -c bioconda --name snakemake snakemake snakedeploy\r\nconda activate snakemake\r\n```\r\n\r\nSnakemake's [official workflow installer Snakedeploy](https://snakemake.github.io/snakemake-workflow-catalog/?usage=cbg-ethz/V-pipe) can now be used:\r\n\r\n```bash\r\nsnakedeploy deploy-workflow https://github.com/cbg-ethz/V-pipe --tag master .\r\n# edit config/config.yaml and provide samples/ directory\r\nsnakemake --use-conda --jobs 4 --printshellcmds --dry-run\r\n```\r\n\r\n## Dependencies\r\n\r\n- **[Conda](https://conda.io/docs/index.html)**\r\n\r\n  Conda is a cross-platform package management system and an environment manager application. Snakemake uses mamba as a package manager.\r\n\r\n- **[Snakemake](https://snakemake.readthedocs.io/)**\r\n\r\n  Snakemake is the central workflow and dependency manager of V-pipe. It determines the order in which individual tools are invoked and checks that programs do not exit unexpectedly.\r\n\r\n- **[VICUNA](https://www.broadinstitute.org/viral-genomics/vicuna)**\r\n\r\n  VICUNA is a _de novo_ assembly software designed for populations with high mutation rates. It is used to build an initial reference for mapping reads with ngshmmalign aligner when a `references/cohort_consensus.fasta` file is not provided. Further details can be found in the [wiki](https://github.com/cbg-ethz/V-pipe/wiki/getting-started#input-files) pages.\r\n\r\n### Computational tools\r\n\r\nOther dependencies are managed by using isolated conda environments per rule, and below we list some of the computational tools integrated in V-pipe:\r\n\r\n- **[FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)**\r\n\r\n  FastQC gives an overview of the raw sequencing data. Flowcells that have been overloaded or otherwise fail during sequencing can easily be determined with FastQC.\r\n\r\n- **[PRINSEQ](http://prinseq.sourceforge.net/)**\r\n\r\n  Trimming and clipping of reads is performed by PRINSEQ. It is currently the most versatile raw read processor with many customization options.\r\n\r\n- **[ngshmmalign](https://github.com/cbg-ethz/ngshmmalign)**\r\n\r\n  We perform the alignment of the curated NGS data using our custom ngshmmalign that takes structural variants into account. It produces multiple consensus sequences that include either majority bases or ambiguous bases.\r\n\r\n- **[bwa](https://github.com/lh3/bwa)**\r\n\r\n  In order to detect specific cross-contaminations with other probes, the Burrows-Wheeler aligner is used. It quickly yields estimates for foreign genomic material in an experiment.\r\n  Additionally, It can be used as an alternative aligner to ngshmmalign.\r\n\r\n- **[MAFFT](http://mafft.cbrc.jp/alignment/software/)**\r\n\r\n  To standardise multiple samples to the same reference genome (say HXB2 for HIV-1), the multiple sequence aligner MAFFT is employed. The multiple sequence alignment helps in determining regions of low conservation and thus makes standardisation of alignments more robust.\r\n\r\n- **[Samtools and bcftools](https://www.htslib.org/)**\r\n\r\n  The Swiss Army knife of alignment postprocessing and diagnostics. bcftools is also used to generate consensus sequence with indels.\r\n\r\n- **[SmallGenomeUtilities](https://github.com/cbg-ethz/smallgenomeutilities)**\r\n\r\n  We perform genomic liftovers to standardised reference genomes using our in-house developed python library of utilities for rewriting alignments.\r\n\r\n- **[ShoRAH](https://github.com/cbg-ethz/shorah)**\r\n\r\n  ShoRAh performs SNV calling and local haplotype reconstruction by using bayesian clustering.\r\n\r\n- **[LoFreq](https://csb5.github.io/lofreq/)**\r\n\r\n  LoFreq (version 2) is SNVs and indels caller from next-generation sequencing data, and can be used as an alternative engine for SNV calling.\r\n\r\n- **[SAVAGE](https://bitbucket.org/jbaaijens/savage) and [Haploclique](https://github.com/cbg-ethz/haploclique)**\r\n\r\n  We use HaploClique or SAVAGE to perform global haplotype reconstruction for heterogeneous viral populations by using an overlap graph.\r\n\r\n## Citation\r\n\r\nIf you use this software in your research, please cite:\r\n\r\nFuhrmann, L., Jablonski, K. P., Topolsky, I., Batavia, A. A., Borgsmueller, N., Icer Baykal, P., Carrara, M. ... \u0026 Beerenwinkel, (2023).\r\n\"V-Pipe 3.0: A Sustainable Pipeline for Within-Sample Viral Genetic Diversity Estimation.\"\r\n_bioRxiv_, doi:[10.1101/2023.10.16.562462](https://doi.org/10.1101/2023.10.16.562462).\r\n\r\n## Contributions\r\n\r\n- [Ivan Topolsky\\* ![orcid]](https://orcid.org/0000-0002-7561-0810), [![github]](https://github.com/dryak)\r\n- [Pelin Icer Baykal ![orcid]](https://orcid.org/0000-0002-9542-5292), [![github]](https://github.com/picerbaykal)\r\n- [Kim Philipp Jablonski ![orcid]](https://orcid.org/0000-0002-4166-4343), [![github]](https://github.com/kpj)\r\n- [Lara Fuhrmann ![orcid]](https://orcid.org/0000-0001-6405-0654), [![github]](https://github.com/LaraFuhrmann)\r\n- [Uwe Schmitt ![orcid]](https://orcid.org/0000-0002-4658-0616), [![github]](https://github.com/uweschmitt)\r\n- [Michal Okoniewski ![orcid]](https://orcid.org/0000-0003-4722-4506), [![github]](https://github.com/michalogit)\r\n- [Monica Dragan ![orcid]](https://orcid.org/0000-0002-7719-5892), [![github]](https://github.com/monicadragan)\r\n- [Susana Posada Céspedes ![orcid]](https://orcid.org/0000-0002-7459-8186), [![github]](https://github.com/sposadac)\r\n- [David Seifert ![orcid]](https://orcid.org/0000-0003-4739-5110), [![github]](https://github.com/SoapZA)\r\n- Tobias Marschall\r\n- [Niko Beerenwinkel\\*\\* ![orcid]](https://orcid.org/0000-0002-0573-6119)\r\n\r\n\\* software maintainer ;\r\n\\** group leader\r\n\r\n[github]: https://cbg-ethz.github.io/V-pipe/img/mark-github.svg\r\n[orcid]: https://cbg-ethz.github.io/V-pipe/img/ORCIDiD_iconvector.svg\r\n\r\n## Contact\r\n\r\nWe encourage users to use the [issue tracker](https://github.com/cbg-ethz/V-pipe/issues). For further enquiries, you can also contact the V-pipe Dev Team \u003cv-pipe@bsse.ethz.ch\u003e.\r\n","organization":"V-Pipe","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/301?version=1","name":"master @ 5054a3a","author":["Ivan Topolsky","Kim Philipp Jablonski"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/301?version=2","name":"v2.99.2","author":["Ivan Topolsky","Kim Philipp Jablonski"],"descriptor_type":["SMK"]},{"id":"3","url":"https://workflowhub.eu/workflows/301?version=3","name":"v2.99.3","author":["Ivan Topolsky","Kim Philipp Jablonski"],"descriptor_type":["SMK"]},{"id":"4","url":"https://workflowhub.eu/workflows/301?version=4","name":"v3.0.0.pre0","author":["Ivan Topolsky","Kim Philipp Jablonski"],"descriptor_type":["SMK"]},{"id":"5","url":"https://workflowhub.eu/workflows/301?version=5","name":"v3.0.0.pre1","author":["Ivan Topolsky","Kim Philipp Jablonski"],"descriptor_type":["SMK"]}]},{"id":"303","url":"https://workflowhub.eu/workflows/303","name":"seurat scRNA-seq","description":"","organization":"Single Cell Unit","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/303?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/303?version=2","name":"master @ f44e9e1","author":[],"descriptor_type":["CWL"]}]},{"id":"309","url":"https://workflowhub.eu/workflows/309","name":"VGP genome profile analysis","description":"Create Meryl Database used for the estimation of assembly parameters and quality control with Merqury. Part of the VGP pipeline.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/309?version=1","name":"Version 1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"318","url":"https://workflowhub.eu/workflows/318","name":"VGP HiFi phased assembly with hifiasm and HiC data","description":"Performs Long Read assembly using PacBio data and Hifiasm. Part of VGP assembly pipeline. This workflow generate a phased assembly.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/318?version=1","name":"Version 1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"321","url":"https://workflowhub.eu/workflows/321","name":"VGP purge assembly with purge_dups pipeline","description":"Purge Phased assembly of duplications and overlaps. Include purge steps for Primary and Alternate assemblies.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/321?version=1","name":"Version 1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"322","url":"https://workflowhub.eu/workflows/322","name":"VGP hybrid scaffolding with Bionano optical maps","description":"Performs scaffolding using Bionano Data. Part of VGP assembly pipeline.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/322?version=1","name":"Version 1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"324","url":"https://workflowhub.eu/workflows/324","name":"VGP hybrid scaffolding with HiC data","description":"Performs scaffolding using HiC Data. Part of VGP assembly pipeline. The scaffolding can be performed on long read assembly contigs or on scaffolds (e.g.: Bionano scaffolds).","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/324?version=1","name":"Version 1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"325","url":"https://workflowhub.eu/workflows/325","name":"VGP HiFi phased assembly with hifiasm and HiC data","description":"Performs Long Read assembly using PacBio data and Hifiasm. Part of VGP assembly pipeline. This workflow generate a phased assembly.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/325?version=1","name":"Version 1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"326","url":"https://workflowhub.eu/workflows/326","name":"BridgeDb tutorial: Gene HGNC name to Ensembl identifier","description":"\r\n# BridgeDb tutorial: Gene HGNC name to Ensembl identifier\r\n\r\nThis tutorial explains how to use the BridgeDb identifier mapping service to translate HGNC names to Ensembl identifiers. This step is part of the OpenRiskNet use case to link Adverse Outcome Pathways to [WikiPathways](https://wikipathways.org/).\r\n\r\nFirst we need to load the Python library to allow calls to the [BridgeDb REST webservice](http://bridgedb.prod.openrisknet.org/swagger/):\r\n\r\n\r\n```python\r\nimport requests\r\n```\r\n\r\nLet's assume we're interested in the gene with HGNC MECP2 (FIXME: look up a gene in AOPWiki), the API call to make mappings is given below as `callUrl`. Here, the `H` indicates that the query (`MECP2`) is an HGNC symbol:\r\n\r\n\r\n```python\r\ncallUrl = 'http://bridgedb.prod.openrisknet.org/Human/xrefs/H/MECP2'\r\n```\r\n\r\nThe default call returns all identifiers, not just for Ensembl:\r\n\r\n\r\n```python\r\nresponse = requests.get(callUrl)\r\nresponse.text\r\n```\r\n\r\n\r\n\r\n\r\n    'GO:0001964\\tGeneOntology\\nuc065cav.1\\tUCSC Genome Browser\\n312750\\tOMIM\\nGO:0042551\\tGeneOntology\\nuc065car.1\\tUCSC Genome Browser\\nA0A087X1U4\\tUniprot-TrEMBL\\n4204\\tWikiGenes\\nGO:0043524\\tGeneOntology\\nILMN_1702715\\tIllumina\\n34355_at\\tAffy\\nGO:0007268\\tGeneOntology\\nMECP2\\tHGNC\\nuc065caz.1\\tUCSC Genome Browser\\nA_33_P3339036\\tAgilent\\nGO:0006576\\tGeneOntology\\nuc065cbg.1\\tUCSC Genome Browser\\nGO:0006342\\tGeneOntology\\n300496\\tOMIM\\nGO:0035176\\tGeneOntology\\nuc065cbc.1\\tUCSC Genome Browser\\nGO:0033555\\tGeneOntology\\nGO:0045892\\tGeneOntology\\nA_23_P114361\\tAgilent\\nGO:0045893\\tGeneOntology\\nENSG00000169057\\tEnsembl\\nGO:0090063\\tGeneOntology\\nGO:0005515\\tGeneOntology\\nGO:0002087\\tGeneOntology\\nGO:0005634\\tGeneOntology\\nGO:0007416\\tGeneOntology\\nGO:0008104\\tGeneOntology\\nGO:0042826\\tGeneOntology\\nGO:0007420\\tGeneOntology\\nGO:0035067\\tGeneOntology\\n300005\\tOMIM\\nNP_001104262\\tRefSeq\\nA0A087WVW7\\tUniprot-TrEMBL\\nNP_004983\\tRefSeq\\nGO:0046470\\tGeneOntology\\nGO:0010385\\tGeneOntology\\n11722682_at\\tAffy\\nGO:0051965\\tGeneOntology\\nNM_001316337\\tRefSeq\\nuc065caw.1\\tUCSC Genome Browser\\nA0A0D9SFX7\\tUniprot-TrEMBL\\nA0A140VKC4\\tUniprot-TrEMBL\\nGO:0003723\\tGeneOntology\\nGO:0019233\\tGeneOntology\\nGO:0001666\\tGeneOntology\\nGO:0003729\\tGeneOntology\\nGO:0021591\\tGeneOntology\\nuc065cas.1\\tUCSC Genome Browser\\nGO:0019230\\tGeneOntology\\nGO:0003682\\tGeneOntology\\nGO:0001662\\tGeneOntology\\nuc065cbh.1\\tUCSC Genome Browser\\nX99687_at\\tAffy\\nGO:0008344\\tGeneOntology\\nGO:0009791\\tGeneOntology\\nuc065cbd.1\\tUCSC Genome Browser\\nGO:0019904\\tGeneOntology\\nGO:0030182\\tGeneOntology\\nGO:0035197\\tGeneOntology\\n8175998\\tAffy\\nGO:0016358\\tGeneOntology\\nNM_004992\\tRefSeq\\nGO:0003714\\tGeneOntology\\nGO:0005739\\tGeneOntology\\nGO:0005615\\tGeneOntology\\nGO:0005737\\tGeneOntology\\nuc004fjv.3\\tUCSC Genome Browser\\n202617_s_at\\tAffy\\nGO:0050905\\tGeneOntology\\nGO:0008327\\tGeneOntology\\nD3YJ43\\tUniprot-TrEMBL\\nGO:0003677\\tGeneOntology\\nGO:0006541\\tGeneOntology\\nGO:0040029\\tGeneOntology\\nA_33_P3317211\\tAgilent\\nNP_001303266\\tRefSeq\\n11722683_a_at\\tAffy\\nGO:0008211\\tGeneOntology\\nGO:0051151\\tGeneOntology\\nNM_001110792\\tRefSeq\\nX89430_at\\tAffy\\nGO:2000820\\tGeneOntology\\nuc065cat.1\\tUCSC Genome Browser\\nGO:0003700\\tGeneOntology\\nGO:0047485\\tGeneOntology\\n4204\\tEntrez Gene\\nGO:0009405\\tGeneOntology\\nA0A0D9SEX1\\tUniprot-TrEMBL\\nGO:0098794\\tGeneOntology\\n3C2I\\tPDB\\nHs.200716\\tUniGene\\nGO:0000792\\tGeneOntology\\nuc065cax.1\\tUCSC Genome Browser\\n300055\\tOMIM\\n5BT2\\tPDB\\nGO:0006020\\tGeneOntology\\nGO:0031175\\tGeneOntology\\nuc065cbe.1\\tUCSC Genome Browser\\nGO:0008284\\tGeneOntology\\nuc065cba.1\\tUCSC Genome Browser\\nGO:0060291\\tGeneOntology\\n202618_s_at\\tAffy\\nGO:0016573\\tGeneOntology\\n17115453\\tAffy\\nA0A1B0GTV0\\tUniprot-TrEMBL\\nuc065cbi.1\\tUCSC Genome Browser\\nGO:0048167\\tGeneOntology\\nGO:0007616\\tGeneOntology\\nGO:0016571\\tGeneOntology\\nuc004fjw.3\\tUCSC Genome Browser\\nGO:0007613\\tGeneOntology\\nGO:0007612\\tGeneOntology\\nGO:0021549\\tGeneOntology\\n11722684_a_at\\tAffy\\nGO:0001078\\tGeneOntology\\nX94628_rna1_s_at\\tAffy\\nGO:0007585\\tGeneOntology\\nGO:0010468\\tGeneOntology\\nGO:0031061\\tGeneOntology\\nA_24_P237486\\tAgilent\\nGO:0050884\\tGeneOntology\\nGO:0000930\\tGeneOntology\\nGO:0005829\\tGeneOntology\\nuc065cau.1\\tUCSC Genome Browser\\nH7BY72\\tUniprot-TrEMBL\\n202616_s_at\\tAffy\\nGO:0006355\\tGeneOntology\\nuc065cay.1\\tUCSC Genome Browser\\nGO:0010971\\tGeneOntology\\n300673\\tOMIM\\nGO:0008542\\tGeneOntology\\nGO:0060079\\tGeneOntology\\nuc065cbf.1\\tUCSC Genome Browser\\nGO:0006122\\tGeneOntology\\nuc065cbb.1\\tUCSC Genome Browser\\nGO:0007052\\tGeneOntology\\nC9JH89\\tUniprot-TrEMBL\\nB5MCB4\\tUniprot-TrEMBL\\nGO:0032048\\tGeneOntology\\nGO:0050432\\tGeneOntology\\nGO:0001976\\tGeneOntology\\nI6LM39\\tUniprot-TrEMBL\\nGO:0005813\\tGeneOntology\\nILMN_1682091\\tIllumina\\nP51608\\tUniprot-TrEMBL\\n1QK9\\tPDB\\nGO:0006349\\tGeneOntology\\nGO:1900114\\tGeneOntology\\nGO:0000122\\tGeneOntology\\nGO:0006351\\tGeneOntology\\nGO:0008134\\tGeneOntology\\nILMN_1824898\\tIllumina\\n300260\\tOMIM\\n0006510725\\tIllumina\\n'\r\n\r\n\r\n\r\nYou can also see the results are returned as a TSV file, consisting of two columns, the identifier and the matching database.\r\n\r\nWe will want to convert this reply into a Python dictionary (with the identifier as key, as one database may have multiple identifiers):\r\n\r\n\r\n```python\r\nlines = response.text.split(\"\\n\")\r\nmappings = {}\r\nfor line in lines:\r\n    if ('\\t' in line):\r\n        tuple = line.split('\\t')\r\n        identifier = tuple[0]\r\n        database = tuple[1]\r\n        if (database == \"Ensembl\"):\r\n            mappings[identifier] = database\r\n\r\nprint(mappings)\r\n```\r\n\r\n    {'ENSG00000169057': 'Ensembl'}\r\n\r\n\r\nAlternatively, we can restrivct the return values from the BridgeDb webservice to just return Ensembl identifiers (system code `En`). For this, we add the `?dataSource=En` call parameter:\r\n\r\n\r\n```python\r\ncallUrl = 'http://bridgedb-swagger.prod.openrisknet.org/Human/xrefs/H/MECP2?dataSource=En'\r\nresponse = requests.get(callUrl)\r\nresponse.text\r\n```\r\n\r\n\r\n\r\n\r\n    'ENSG00000169057\\tEnsembl\\n'\r\n","organization":"Toxicology community","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/326?version=1","name":"master @ 5f34ac1","author":["Marvin Martens"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/326?version=2","name":"master @ 5f34ac1","author":["Marvin Martens"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/326?version=3","name":"master @ 0f98fd8","author":["Marvin Martens"],"descriptor_type":[]}]},{"id":"327","url":"https://workflowhub.eu/workflows/327","name":"Shotgun-Metagenomics-Analysis","description":"# Shotgun Metagenomics Analysis\r\nAnalysis of metagenomic shotgun sequences including assembly, speciation, ARG discovery and more\r\n\r\n## Description\r\nThe input for this analysis is paired end next generation sequencing data from metagenomic samples. The workflow is designed to be modular, so that individual modules can be run depending on the nature of the metagenomics project at hand. More modules will be added as we develop them - this repo is a work in progress!\r\n\r\nThese scripts have been written specifically for NCI Gadi HPC, wich runs PBS Pro, however feel free to use and modify for anothre system if you are not a Gadi user. \r\n\r\n### Part 1. Setup and QC\r\nDownload the repo. You will see directories for `Fastq`, `Inputs`, `Reference` and `Logs`. You will need to copy or symlink your fastq to `Fastq`, sample configuration file (see below) to `Inputs` and the reference genome sequence of your host species (if applicable) to `Reference` for host contamination removal.\r\n \r\n\r\n#### Fastq inputs\r\nThe scripts assume all fastq files are paired, gzipped, and all in the one directory named 'Fastq'. If your fastq are within a convoluted directory structure (eg per-sample directories) or you would simply like to link them from an alternate location, please use the script `setup_fastq.sh`.\r\n\r\nTo use this script, parse the path name of your fastq as first argument on the command line, and run the script from the base working directory (\u003cyour_path\u003e/Shotgun-Metagenomics-Analysis) which will from here on be referred to as `workdir`. Note that this script looks for `f*q.gz` files (ie fastq.gz or fq.gz) - if yours differ in suffix, please adjust the script accordingly.\r\n\r\n```\r\nbash ./Scripts/setup_fastq.sh \u003c/path/to/your/parent/fastq/directory\u003e\r\n```\r\n\r\n#### Configuration/sample info\r\nThe only required input configuration file should be named \u003ccohort\u003e.config, where \u003ccohort\u003e is the name of the current batch of samples you are processing, or some other meaningful name to your project; it will be used to name output files. The config file should be placed inside the $workdir/Inputs directory, and include the following columns, in this order:\r\n\r\n```\r\n1. Sample ID - used to identify the sample, eg if you have 3 lanes of sequencing per sample, erach of those 6 fastq files should contain this ID that si in column 1\r\n2. Lab Sample ID - can be the same as column 1, or different if you have reason to change the IDs eg if the seq centre applies an in-house ID. Please make sure IDs are unique within column 1 and unique within column 2\r\n3. Group - eg different time points or treatment groups. If no specific group structure is relevant, please set this to 1 (do not leave blank!) \r\n3. Platform - should be Illumina; other sequencing platforms are not tested on this workflow\r\n4. Sequencing centre name\r\n5. Library - eg if you have 2 sequencing libraries for the same sample. Can be left blank, or assigned to 1. Blank will be assigned libray ID of 1 during processing.\r\n```\r\n\r\nPlease do not have spaces in any of the values for the config file. \r\n\r\n\r\n#### General setup\r\n\r\nAll scripts will need to be edited to reflect your NCI project code at the `-P \u003cproject\u003e` and `-l \u003cstorage\u003e directive. Please run the script create_project.sh and follow the prompts to complete some of the setup for you. \r\n\r\nNote that you will need to manually edit the PDS resource requests for each PBS script; guidelines/example resources will be given at each step to help you do this. As the 'sed' commands within this script operate on .sh and .pbs files, this setup script has been intentionally named .bash (easiest solution).\r\n\r\nRemember to submit all scripts from your `workdir`. \r\n\r\n`bash ./Scripts/create_project.sh`\r\n\r\nFor jobs that execute in parallel, there are 3 scripts: one to make the 'inputs' file listing hte details of each parallel task, one job execution shell script that is run over each task in parallel, and one PBS launcher script. The process is to submit the make input script, check it to make sure your job details are correct, edit the resources directives depending on the number and size of your parallel tasks, then submit the PBS launcher script with `qsub`. \r\n\r\n#### QC\r\n\r\nRun fastQC over each fastq file in parallel. Adjust the resources as per your project. To run all files in parallel, set the number of NCPUS requested equal to the number of fastq files (remember that Gadi can only request \u003c1 node or multiples of whole nodes). The make input script sorts the fastq files largest to smallest, so if you have a discrpeancy in file size, optimal efficiency can be achieved by requested less nodes than the total required to run all your fastq in parallel.\r\n\r\nFastQC does not multithread on a single file, so CPUs per parallel task is set to 1. Example walltimes on Gadi 'normal' queue:  one 1.8 GB fastq = 4 minutes; one 52 GB fastq file = 69.5 minutes.\r\n\r\nMake the fastqc parallel inputs file by running (from `workdir`):\r\n`bash ./Scripts/fastqc_make_inputs.sh`\r\n\r\nEdit the resource requests in `fastqc_run_parallel.pbs` according to your number of fastq files and their size, then submit:\r\n`qsub fastqc_run_parallel.pbs`\r\n\r\nTo ease manual inspection of the fastQC output, running `multiqc` is recommended. This will collate the individual fastQC reports into one report. This can be done on the login node for small sample numbers, or using the below script for larger cohorts. Edit the PBS directives, then run:\r\n\r\n`qsub multiqc.pbs`\r\n\r\nSave a copy of ./MultiQC/multiqc_report.html to your local disk then open in a web browser to inspect the results. \r\n\r\n#### Quality filtering and trimming\r\n\r\nWill be added at a later date. This is highly dependent on the quality of your data and your individual project needs so will be a guide only. \r\n\r\n### Part 2. Removal of host contamination. \r\n\r\nIf you have metagenomic data extracted from a host, you will need a copy of the host reference genome sequence in order to remove any DNA sequences belonging to the host. Even if your wetlab protocol included a host removal step, it is still important to run bioinformatic host removal.\r\n\r\n\r\n#### Prepare the reference\r\nEnsure you have a copy of the reference genome (or symlink) in ./Fasta. This workflow requires BBtools(tested with version 37.98). As of writing, BBtools is not available as a global app on Gadi. Please install locally and make \"module loadable\", or else edit the scripts to point directly to your local BBtools installation.\r\n\r\nBBtools repeat masking will use all available threads on machine and 85% of available mem by default. For a mammalian genome, 2 hours on one Gadi 'normal' node is sufficient for repeat masking. \r\n\r\nUpdate the name of your reference fastq in the `bbmap_prep.pbs` script (and BBtools, see note above), then run:\r\n`qsub ./Scripts/bbmap_prep.pbs`\r\n\r\n#### Host contamination removal\r\n\r\nTBC 1/4/22... \r\n","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/327?version=1","name":"main @ 4e26b8b","author":["Cali Willet","Rosemarie Sadsad","Tracy Chew"],"descriptor_type":[]}]},{"id":"328","url":"https://workflowhub.eu/workflows/328","name":"Python Mutation Free Energy Calculations using BioExcel Building Blocks (biobb)","description":"# Mutation Free Energy Calculations using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\n**Based on the official [pmx tutorial](http://pmx.mpibpc.mpg.de/sardinia2018_tutorial1/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate how to compute a **fast-growth mutation free energy** calculation, step by step, using the BioExcel **Building Blocks library (biobb)**. The particular example used is the **Staphylococcal nuclease** protein (PDB code 1STN), a small, minimal protein, appropriate for a short tutorial.\r\n\r\nThe **non-equilibrium free energy calculation** protocol performs a **fast alchemical transition** in the direction **WT-\u003eMut** and back **Mut-\u003eWT**. The two equilibrium trajectories needed for the tutorial, one for **Wild Type (WT)** and another for the **Mutated (Mut)** protein (Isoleucine 10 to Alanine -I10A-), have already been generated and are included in this example. We will name **WT as stateA** and **Mut as stateB**.\r\n\r\n![](https://raw.githubusercontent.com/bioexcel/biobb_wf_pmx_tutorial/master/biobb_wf_pmx_tutorial/notebooks/schema.png)\r\n\r\nThe tutorial calculates the **free energy difference** in the folded state of a protein. Starting from **two 1ns-length independent equilibrium simulations** (WT and mutant), snapshots are selected to start **fast (50ps) transitions** driving the system in the **forward** (WT to mutant) and **reverse** (mutant to WT) directions, and the **work values** required to perform these transitions are collected. With these values, **Crooks Gaussian Intersection** (CGI), **Bennett Acceptance Ratio** (BAR) and **Jarzynski estimator** methods are used to calculate the **free energy difference** between the two states.\r\n\r\n*Please note that for the sake of disk space this tutorial is using 1ns-length equilibrium trajectories, whereas in the [original example](http://pmx.mpibpc.mpg.de/sardinia2018_tutorial1/eq.mdp) the equilibrium trajectories used were obtained from 10ns-length simulations.*\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/328?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/328?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/328?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/328?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/328?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"6","url":"https://workflowhub.eu/workflows/328?version=6","name":"Version 6","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"330","url":"https://workflowhub.eu/workflows/330","name":"EJP-RD WP13 case-study: CAKUT peptidome and miRNome data analysis using the DIABLO and PLS-DA methods from the mixOmics R package","description":"For integrative analysis of CAKUT multi-omics data DIABLO method of the mixOmics package (version 6.10.9. Singh et. al. 2019) was used with sPLS-DA (sparse Partial Least Squares Discriminant Analysis Discriminant Analysis) and PLS-DA classification.","organization":"EJPRD WP13 case-studies workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/330?version=1","name":"Version 1","author":["Juma Bayjan","Ozan Ozisik","Cenna Doornbos","Friederike Ehrhart"],"descriptor_type":["SMK"]}]},{"id":"331","url":"https://workflowhub.eu/workflows/331","name":"EJP-RD WP13 case-study: CAKUT proteome, peptidome and miRNome data analysis using WikiPathways","description":"In this analysis, we created an extended pathway, using the WikiPathways repository (Version 20210110) and the three -omics datasets. For this, each of the three -omics datasets was first analyzed to identify differentially expressed elements, and pathways associated with the significant miRNA-protein links were detected. A miRNA-protein link is deemed significant, and may possibly be implying causality, if both a miRNA and its target are significantly differentially expressed. \r\n\r\nThe peptidome and the proteome datasets were quantile normalized and log2 transformed (Pan and Zhang 2018; Zhao, Wong, and Goh 2020). Before transformation, peptide IDs were mapped to protein IDs, using the information provided by the data uploaders, and were summarized into single protein-level values using geometric mean. The miRNome dataset was already normalized and transformed; thus, the information of their targeting genes was simply added to each miRNA ID, using the information provided by miTaRBase (Huang et al. 2019). As a result, all three datasets had been mapped to their appropriate gene product-level (or, protein-level) identifiers. ","organization":"EJPRD WP13 case-studies workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/331?version=1","name":"Version 1","author":["Woosub Shin","Friederike Ehrhart","Juma Bayjan","Cenna Doornbos","Ozan Ozisik"],"descriptor_type":["SMK"]}]},{"id":"334","url":"https://workflowhub.eu/workflows/334","name":"Assemblosis","description":"## CWL based workflow to assemble haploid/diploid eukaryote genomes of non-model organisms\r\nThe workflow is designed to use both PacBio long-reads and Illumina short-reads. The workflow first extracts, corrects, trims and decontaminates the long reads. Decontaminated trimmed reads are then used to assemble the genome and raw reads are used to polish it. Next, Illumina reads are cleaned and used to further polish the resultant assembly. Finally, the polished assembly is masked using inferred repeats and haplotypes are eliminated. The workflow uses BioConda and DockerHub to install required software and is therefore fully automated. In addition to final assembly, the workflow produces intermediate assemblies before and after polishing steps. The workflow follows the syntax for CWL v1.0.\r\n\r\n### Dependencies\r\n# Programs\r\nThe pipeline can be run either using [Cromwell](https://cromwell.readthedocs.io/en/stable) or [cwltool reference](https://github.com/common-workflow-language/cwltool) implementation and docker containers can be run either using [Singularity](https://singularity.lbl.gov) or [udocker](https://singularity.lbl.gov).\r\n\r\nCromwell implementation\r\n* [cromwell v44](https://github.com/broadinstitute/cromwell/releases/tag/44)\r\n* [java-jdk v8.0.112](https://www.java.com/en)\r\n\r\nReference implementation\r\n* [cwltool v1.0.20181012180214](https://github.com/common-workflow-language/cwltool)\r\n* [nodejs v10.4.1 required by cwltool](https://nodejs.org/en)\r\n* [Python library galaxy-lib v18.5.7](https://pypi.org/project/galaxy-lib)\r\n\r\nSingularity software packages have to be installed server-wide by administrator\r\n* [Singularity v3.2.1](https://singularity.lbl.gov)\r\n* [squashfs-tools v4.3.0](https://github.com/plougher/squashfs-tools)\r\n\r\nUdocker software package can be installed locally\r\n* [udocker v1.1.2](https://github.com/indigo-dc/udocker)\r\n\r\n# Data\r\n* [Illumina adapters converted to FASTA format](http://sapac.support.illumina.com/downloads/illumina-adapter-sequences-document-1000000002694.html)\r\n* [NCBI nucleotide non-redundant sequences for decontamination with Centrifuge](http://www.ccb.jhu.edu/software/centrifuge)\r\n* [RepBase v17.02 file RMRBSeqs.embl](https://www.girinst.org/repbase)\r\n\r\n### Installation\r\nInstall miniconda using installation script ```installConda.sh```.\r\nTo install CWL, use either installation script ```installCromwell.sh``` or ```installCwltool.sh```.\r\nTo install udocker, use installation script ```installUdocker.sh```.\r\nTo install singularity, ask your system administrator.\r\n\r\n```\r\n# First confirm that you have the program 'git' installed in your system\r\n\u003e cd\r\n\u003e git clone -b 'v0.1.3-beta' --single-branch --depth 1 https://github.com/vetscience/Assemblosis\r\n\u003e cd Assemblosis\r\n\u003e bash installConda.sh\r\n\u003e bash installCromwell.sh # or bash installCwltool.sh\r\n\u003e bash installUdocker.sh # if singularity cannot be installed or does not run\r\n\r\n```\r\nFor data dependencies: download and extract [RepBase database](https://www.girinst.org/repbase), download Centrifuge version of [NCBI nt database](http://www.ccb.jhu.edu/software/centrifuge) and create [Illumina adapter FASTA file](http://sapac.support.illumina.com/downloads/illumina-adapter-sequences-document-1000000002694.html) to your preferred locations. If your reads are clean from adapters, the adapter FASTA file can be empty.\r\nGive the location of these data in the configuration (.yml) file (see **Usage**).\r\n\r\n### Usage\r\nYou have to create a YAML (.yml) file for each assembly. This file defines the required parameters and the location for both PacBio and Illumina raw-reads.\r\n```\r\n\u003e cd\r\n\u003e export PATH=~/miniconda3/bin:$PATH\r\n\u003e cd Assemblosis/Run\r\n\u003e cp ../Examples/assemblyCele.yml .\r\n\r\n\"Edit assemblyCele.yml to fit your computing environment and to define the location for the read files, databases and Illumina adapters\"\r\n\r\n\"Running docker images using Cromwell and singularity:\"\r\n\u003e java -Dconfig.file=cromwell.udocker.conf -jar cromwell-44.jar run -t CWL -v v1.0 assembly.cwl -i assemblyCele.yml\r\n\r\n\"Running docker images using Cromwell and udocker:\"\r\n\u003e java -Dconfig.file=cromwell.singularity.conf -jar cromwell-44.jar run -t CWL -v v1.0 assembly.cwl -i assemblyCele.yml\r\n\r\n\"Running docker images using Cwltool and singularity:\"\r\n\u003e cwltool --tmpdir-prefix /home/\u003cusername\u003e/Tmp --beta-conda-dependencies --cachedir /home/\u003cusername\u003e/Cache --singularity --leave-tmpdir assembly.cwl assemblyCele.yml\r\n\r\n\"Running docker images using Cwltool and udocker:\"\r\n\u003e cwltool --tmpdir-prefix /home/\u003cusername\u003e/Tmp --beta-conda-dependencies --cachedir /home/\u003cusername\u003e/Cache --user-space-docker-cmd udocker --leave-tmpdir assembly.cwl assemblyCele.yml\r\n```\r\n\r\nAn annotated example of the YAML file for Caenorhabditis elegans assembly.\r\n```\r\n## Directory, which contains the PacBio raw data\r\n# NOTE! The software looks for all .h5 file (or bam files if bacBioInBam below is defined true) in given directory\r\npacBioDataDir:\r\n  class: Directory\r\n  location: /home/\u003cusername\u003e/Dna\r\n\r\n## PacBio files are in bam format as returned from Sequel platform\r\npacBioInBam: true\r\n\r\n## Prefix for the resultant assembly files\r\nprefix: cele\r\n\r\n## Maximum number of threads used in the pipeline\r\nthreads: 24\r\n\r\n## Minimum number of threads per job used in canu assembler\r\nminThreads: 4\r\n\r\n## Number of concurrent jobs in canu assembler (recommended to use threads / minThreads)\r\ncanuConcurrency: 6\r\n\r\n### Parameters for the program Canu are described in https://canu.readthedocs.io/en/latest/parameter-reference.html\r\n## Expected genome size. This parameter is forwarded to Canu assembler.\r\ngenomeSize: 100m\r\n\r\n## Minimum length for the PacBio reads used for the assembly. This parameter is forwarded to Canu assembler.\r\n# The maximum resolvable repeat regions becomes 2 x minReadLength\r\nminReadLen: 6000\r\n\r\n## Parameter for Canu assembler to adjust to GC-content. Should be 0.15 for high or low GC content.\r\ncorMaxEvidenceErate: 0.20\r\n\r\n### Parameters for the program Trimmomatic are described in http://www.usadellab.org/cms/?page=trimmomatic\r\n## Paired-end (PE) reads of Illumina raw data. These files are given to the program Trimmomatic.\r\n# NOTE! Data for two paired libraries is given below.\r\nreadsPe1:\r\n  - class: File\r\n    format: edam:format_1930  # fastq\r\n    path: /home/\u003cusername\u003e/Dna/SRR2598966_1.fastq.gz\r\n  - class: File\r\n    format: edam:format_1930  # fastq\r\n    path: /home/\u003cusername\u003e/Dna/SRR2598967_1.fastq.gz\r\nreadsPe2:\r\n  - class: File\r\n    format: edam:format_1930  # fastq\r\n    path: /home/\u003cusername\u003e/Dna/SRR2598966_2.fastq.gz\r\n  - class: File\r\n    format: edam:format_1930  # fastq\r\n    path: /home/\u003cusername\u003e/Dna/SRR2598967_2.fastq.gz\r\n\r\n## Phred coding of Illumina data. This parameter is forwarded to Trimmomatic.\r\n# NOTE! Each read-pair needs one phred value.\r\nphredsPe: ['33','33']\r\n\r\n## Sliding window and illuminaClip parameters for Trimmomatic\r\nslidingWindow:\r\n    windowSize: 4\r\n    requiredQuality: 25\r\nilluminaClip:\r\n    adapters:\r\n        class: File\r\n        path: \u003cpath to Illumina adapter file\u003e\r\n    seedMismatches: 2\r\n    palindromeClipThreshold: 30\r\n    simpleClipThreshold: 10\r\n    minAdapterLength: 20\r\n    keepBothReads: true\r\n## Further parameters for Trimmomatic\r\n# Required phred-quality for leading 5 nucleotides\r\nleading: 25\r\n# Required phred-quality for trailing 5 nucleotides\r\ntrailing: 25\r\n# Minimum accepted read-length to keep the read after trimming\r\nminlen: 40\r\n\r\n### Parameters for the program bowtie2 are described in http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml\r\n## Illumina PE fragment length. Program bowtie2 parameter -X.\r\n# NOTE! Each read-pair needs one phred value.\r\nmaxFragmentLens: [500, 600]\r\n# Orientation of pair-end reads e.g. 'fr', 'rf', 'ff': Program bowtie2 parameters --fr, --rf or --ff\r\norientation: 'fr'\r\n\r\n### Parameters for the program Pilon are described in https://github.com/broadinstitute/pilon/wiki/Requirements-\u0026-Usage\r\n# Prefix for the resultant pilon polished assembly. Pilon parameter --output\r\npolishedAssembly: celePilon\r\n# This is set 'true' for an organism with diploid genome: Pilon parameter --diploid\r\ndiploidOrganism: true\r\n# Value 'bases' fixes snps and indels: Pilon parameter --fix\r\nfix: bases\r\n\r\n### Parameters for the program centrifuge are described in http://www.ccb.jhu.edu/software/centrifuge/manual.shtml\r\n# Path to the directory, that contains NCBI nt database in nt.?.cf files. Centrifuge parameter -x\r\ndatabase:\r\n  class: Directory\r\n  path:  /home/\u003cusername\u003e/ntDatabase\r\n# Lenght of the identical match in nucleotides required to infer a read as contaminant. Centrifuge parameter --min-hitlen\r\npartialMatch: 100\r\n# NCBI taxon root identifers for the species considered contaminants: e.g. bacteria (=2), viruses (=10239), fungi (=4751), mammals (=40674), artificial seqs (=81077). Pipeline specific parameter.\r\ntaxons: [2,10239,4751,40674,81077]\r\n\r\n## Parameters for the RepeatModeler and RepeatMasker are described in http://www.repeatmasker.org\r\nrepBaseLibrary:\r\n  class: File\r\n  # This is the RepBase file from https://www.girinst.org/repbase. RepeatMasker parameter -lib\r\n  path: /home/\u003cusername\u003e/RepBaseLibrary/RMRBSeqs.embl\r\n# Constant true and false values for repeat masker\r\ntrueValue: true\r\nfalseValue: false\r\n\r\n```\r\n### Runtimes and hardware requirements\r\nThe workflow was tested in Linux environment (CentOS Linux release 7.2.1511) in a server with 24 physical CPUs (48 hyperthreaded CPUs) and 512 GB RAM.\r\n\r\n| Assembly | Runtime in CPU hours | RAM usage (GB) |\r\n| --- | --- | --- |\r\n| *Caenorhabditis elegans* | 1537 | 134.1 |\r\n| *Drosophila melanogaster* | 6501 | 134.1 |\r\n| *Plasmodium falciparum* | 424 | 134.1 |\r\n\r\nMaximum memory usage of 134.1 GB was claimed by the program Centrifuge for each assembly.\r\n\r\n### Software tools used in this pipeline\r\n* [Dextractor v1.0](https://github.com/thegenemyers/DEXTRACTOR)\r\n* [Trimmomatic v0.36](http://www.usadellab.org/cms/?page=trimmomatic)\r\n* [Centrifuge v1.0.3](http://www.ccb.jhu.edu/software/centrifuge)\r\n* [Canu v1.8](http://canu.readthedocs.io/en/latest/index.html)\r\n* [Arrow in SmrtLink v7.0.1](https://www.pacb.com/support/software-downloads)\r\n* [Bowtie 2 v2.2.8](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)\r\n* [SAMtools v1.6](http://samtools.sourceforge.net)\r\n* [Pilon v1.22](https://github.com/broadinstitute/pilon)\r\n* [RepeatMasker v4.0.6](http://www.repeatmasker.org)\r\n* [RepeatModeler v1.0.11](http://www.repeatmasker.org)\r\n* [RepBase v17.02](https://www.girinst.org/repbase)\r\n* [HaploMerger2 build_20160512](https://github.com/mapleforest/HaploMerger2)\r\n\r\n### Cite\r\nIf you use the pipeline, please cite:\r\nKorhonen, Pasi K., Ross S. Hall, Neil D. Young, and Robin B. Gasser. \"Common Workflow Language (CWL)-based software pipeline for de novo genome assembly from long-and short-read data.\" GigaScience 8, no. 4 (2019): giz014.\r\n\r\n","organization":"Workflows Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/334?version=1","name":"v0.1.3-beta","author":["Pasi Korhonen"],"descriptor_type":["CWL"]}]},{"id":"335","url":"https://workflowhub.eu/workflows/335","name":"Escalibur","description":"# ESCALIBUR\r\n\r\nEscalibur Population Genomic Analysis Pipeline is able to explore key aspects centering the population genetics of organisms, and automates three key bioinformatic components in population genomic analysis using Workflow Definition Language (WDL: https://openwdl.org/), and customised R, Perl, Python and Unix shell scripts. Associated programs are packaged into a platform independent singularity image, for which the definition file is provided.\r\n\r\nThe workflow for analysis using Escalibur consists of three steps - each step can be run in a separate workflow in a sequential manner; step 2 is optional.\r\n\r\n    1. Trimming and mapping the raw data - selection of the best reference genome;\r\n    2. Removing the contamination from mapped data;\r\n    3. Recalibration, variant calling and filtering;\r\n\r\nThis implementation runs both locally and in a distributed environment that uses SLURM job scheduler.\r\n\r\n## Dependencies\r\nFollowing software dependencies are required:\r\n\r\n* Git\r\n* SLURM scheduler required for distributed HPC environment (https://slurm.schedmd.com/documentation.html)\r\n* Python3.7: (https://www.python.org/)\r\n* Perl 5.26.2: (https://www.perl.org/)\r\n* Java 1.8\r\n* Singularity 3.7.3: (https://sylabs.io/singularity/)\r\n\r\n## Step 1: Installation\r\n\r\nTypically, the installation of Singularity requires root rights. You should therefore contact your administrator to get it correctly installed. Minimum Linux kernel version requirement is 3.8, thought \u003e= 3.18 would be preferred (https://sylabs.io/guides/3.5/admin-guide/installation.html).\r\n\r\nClone the git repository to a directory on your cluster or stand-alone server.\r\n```\r\n\u003e git clone --depth 1 -b v0.3-beta https://gitlab.unimelb.edu.au/bioscience/escalibur.git\r\n\u003e cd escalibur\r\n```\r\n\r\n### Description of Files\r\n* `workflow-main.local.config`: main configuration file for stand alone server runtime environment\r\n* `workflow-main.slurm.config`: main configuration file for HPC runtime environment that support Slurm job scheduler\r\n* `workflow-mapping.json`: defines location of input files, has behavioral settings and sets resource allocations\r\n* `workflow-cleaning.json`:  defines location of input files and sets resource allocations\r\n* `workflow-variants.json`:  defines location of input files, has behavioral settings and sets resource allocations\r\n* `workflow-mapping.wdl`: main workflow file to trim and map PE reads into the genome\r\n* `workflow-cleaning.wdl`: main workflow file to clean contamination from mapped PE reads against genomes representing putative contamination\r\n* `workflow-variants.wdl`: main workflow file to call variants using mapped and cleaned reads\r\n* `workflow-mapping.outputs.json`: defines location for resultant outputs and logs from mapping workflow\r\n* `workflow-cleaning.outputs.json`: defines location for resultant outputs and logs from cleaning workflow\r\n* `workflow-variants.outputs.json`: defines location for resultant outputs and logs from variants workflow\r\n* `inputReads.txt`: example input file for fastq read files to mapping step\r\n* `cleanup.conf`: example configuration file for putative host contamination to cleaning step\r\n* `inputBams.txt`: example input file for resultant BAM files to variant calling step\r\n* `references.txt`: contains list of example references genomes\r\n* `perl_scripts`: contains Perl scripts used by the pipeline\r\n* `scripts`: contains Python scripts used by the pipeline\r\n* `R_scripts`: contains R scripts used by the pipeline\r\n* `sub_workflows`: sub-workflows, one for each of the workflow steps\r\n* `tasks`: workflow tasks\r\n* `cromwell-50.jar`: java archive file required to run the workflow.\r\n\r\nTwo config files have been created. One for stand alone server (`workflow-runtime.local.config`) and another one for HPC environment that supports Slurm scheduler (`workflow-runtime.slurm.config`).\r\nThese files have already been optimised. For slurm configuration you only need to define the HPC partition in line 35: \"String rt_queue\"\r\nChange this to the partition you have access to on HPC environment.\r\n\r\nFiles `workflow-mapping.outputs.json`, `workflow-cleaning.outputs.json` and `workflow-variants.outputs.json` define the directories to copy the result files to. Modify if you want to change default output directories `outputMapping`, `outputCleaning` and `outputVariants`. These output directories are generated to the directory `escalibur`.\r\n#### NOTE: delete output directories from previous runs. If you have files there already and a name matches during the copy, the workflow may fail.\r\n\r\n`Singularity` directory contains the definition file for the software used in Escalibur. Pre-built singularity image can be downloaded from `library://pakorhon/workflows/escalibur:0.0.1-beta`.\r\n```\r\n\u003e singularity pull escalibur.sif library://pakorhon/workflows/escalibur:0.0.1-beta\r\n```\r\n\r\n## Step 2: Test run\r\n\r\nTo confirm correct function of the workflows (`mapping`, `cleaning` and `variant calling`), fix the required absolute paths, marked by three dots `...` in `workflow-mapping.json`, `workflow-cleaning.json` and `workflow-variants.json` and configuration files `cleanup.conf` and `inputBams.txt`, and run the workflow with the provided test and configuration files, and parameter settings.\r\n```\r\n\u003e java -Dconfig.file=./workflow-runtime.local.config  -jar ./cromwell-50.jar run workflow-mapping.wdl -i workflow-mapping.json -o workflow-mapping.outputs.json \u003e out.mapping 2\u003e err.mapping\r\n\u003e java -Dconfig.file=./workflow-runtime.local.config  -jar ./cromwell-50.jar run workflow-cleaning.wdl -i workflow-cleaning.json -o workflow-cleaning.outputs.json \u003e out.cleaning 2\u003e err.cleaning\r\n\u003e java -Dconfig.file=./workflow-runtime.local.config  -jar ./cromwell-50.jar run workflow-variants.wdl -i workflow-variants.json -o workflow-variants.outputs.json \u003e out.variants 2\u003e err.variants\r\n```\r\nSlurm file templates `runMapping.slurm`, `runCleaning.slurm` and `runVariants.slurm` are available for each workflow.\r\n#### NOTE: default parameter settings for run-times, memory usage and module loading may require adjustment in these files if run in HPC environment using slurm. Current settings should account for the test run.\r\n\r\nAfter the runs are complete, the results will be at the output directories: `outputMapping`, `outputCleaning` and `outputVariants`.\r\nYou can compare the result of `outputVariants/full_genotype_output.vcf` to that or pre-run `TestResults/full_genotype_output.vcf`.\r\n\r\n## Step 3: Mapping\r\n\r\nMake a directory for your fastq files e.g. `Reads` and copy your paired end raw data in there.\r\n```\r\n\u003e mkdir Reads\r\n```\r\n\r\nIt should look something like below\r\n```\r\n\u003e ls TestReads/\r\n1-1_r1.fastq.gz  32-1_r1.fastq.gz  44-1_r1.fastq.gz\r\n1-1_r2.fastq.gz  32-1_r2.fastq.gz  44-1_r2.fastq.gz\r\n```\r\nRun the python script to create a file of your input samples and edit the resulting file to match your sample identifiers and libraries.\r\n```\r\n\u003e python3 scripts/inputArgMaker.py -d Reads/ -p -ps 33 -pq 20 -pl ILLUMINA -ml 50 -o inputReads.txt \r\n```\r\n\r\nThe edited output file is shown below. The script will automatically sort the files by size.\r\n```\r\n\u003e cat inputReads.txt\r\n# Prefix PE/SE\tMinLen\tPhredS\tSequencer\tPhredQ\tLibrary\tRead Group ID\tSample\tPlatform Unit\tFirst pair of PE reads\t\tSecond pair of PE reads\r\ntest1\t PE\t50\t33\tILLUMINA\t28\tLIB1\tCL100082180L1\tSM1\tCL100082180L1\t./TestReads/1-1_r1.fastq.gz\t./TestReads/1-1_r2.fastq.gz\r\ntest2\t PE\t50\t33\tILLUMINA\t20\tLIB2\tCL100082180L1\tSM2\tCL100082180L1\t./TestReads/44-1_r1.fastq.gz\t./TestReads/44-1_r2.fastq.gz\r\ntest3\t PE\t50\t33\tILLUMINA\t20\tLIB3\tCL100034574L1\tSM2\tCL100034574L1\t./TestReads/32-1_r1.fastq.gz\t./TestReads/32-1_r2.fastq.gz\r\n```\r\n#### NOTE: If several libraries are embedded in a single read file, library-specific reads have to be separated into own files before create the inputReads.txt file. In contrast, inputReads.txt file format can accommodate multiple library files to a single sample.\r\n\r\n* `Prefix`: Prefix for the resultant files from trimming.\r\n* `PE/SE`: Paired-End/Single-End reads as input.\r\n* `MinLen`: Minimum Length of reads after trimming.\r\n* `PhredS`: Used Phred coding by the sequencer (33 or 64).\r\n* `Sequencer`: Name of the sequencer.\r\n* `PhredQ`: Phred cut-off score used in trimming.\r\n* `Library`: Identifier for the library.\r\n* `Read Group ID`: Identifier for the read groups required by GATK (inputArgMaker tries to find this from FASTQ reads). Refer to (https://gatk.broadinstitute.org/hc/en-us/articles/360035890671-Read-groups).\r\n* `Sample`: Identifier for the sample. Defined prefix for resultant sample specific files.\r\n* `Platform Unit (optional)`: Information about flow cell, lane and sample. Helps GATK in recalibration (inputArgMaker copies Read Group ID here). Refer to (https://gatk.broadinstitute.org/hc/en-us/articles/360035890671-Read-groups).\r\n* `First pair of PE reads`: Relative path to the forward pair of PE reads.\r\n* `Second pair of PE reads`: Relative path to the reverse pair of PE reads.\r\n\r\nCreate a file listing reference genomes and configure `workflow-mapping.json` file.\r\nAn example reference file (`references.txt`) has been created for you. Use this as an example to create your own.\r\nEnsure there are no whitespaces at the end of the line or else the cromwell engine will throw an error.\r\nReads are mapped to these reference files and the best matching reference will be selected for variant calling.\r\n```\r\n\u003e cat references.txt\r\nscf00001\t./TestReferences/scf00001.fa\r\nscf00013\t./TestReferences/scf00013.fa\r\n```\r\n#### NOTE: Reference label (e.g. `scf00001`) must be a substring found in the reference fasta file (`scf00001.fa`)\r\n\r\nThe figure below illustrates the flow of the information, and appearance of labels (`Prefix`, `Sample`, `Label`) in file names, as defined in `inputReads.txt` and `references.txt`.\r\n![](figures/labelFlow.png)\r\n\r\n### workflow-mapping.json config file\r\nAdd the path of your fastq and reference genome input files and change parameters as appropriate, and adjust the absolute paths for singularity image. If `mapping_workflow.readQc` is set to `yes`, reads are trimmed both for quality and the adapters. Adapters to trim are given in `mapping_workflow.pe_filtering_workflow.trimmomatic_pe_task.truseq_pe_adapter`. If you want to use custom adapters, copy them to `adapters` directory and instead of default `TruSeq3-PE.fa`, refer to your custom file. If you don't want to use adapters, use `empty.fa` file instead. For BGISEQ adapters, refer to (https://en.mgitech.cn/Download/download_file/id/71).\r\n```\r\n{\r\n  \"## CONFIG FILE\": \"WDL\",\r\n  \"mapping_workflow.inputSampleFile\": \"./inputReads.txt\",\r\n  \"mapping_workflow.inputReferenceFile\": \"./references.txt\",\r\n\r\n  \"## Parameters for samtools read filtering\": \"-F 4 does filters unmapped reads from resultant files\",\r\n  \"mapping_workflow.samtoolsParameters\": \"-F 4\",\r\n  \r\n  \"## Is read QC required\": \"yes or no\",\r\n  \"mapping_workflow.readQc\": \"yes\",\r\n  \"## What is the ploidy of given genome\": \"1 for haploid, 2 for diploid, etc.\",\r\n  \"mapping_workflow.ploidy\": 2,\r\n  \r\n  \"## Singularity parameters\": \"absolute paths to the container and the directory to bind visible inside singularity\",\r\n  \"mapping_workflow.singularityContainerPath\": \"/home/.../escalibur/escalibur.sif\",\r\n  \"mapping_workflow.singularityBindPath\": \"/home/.../escalibur/\",\r\n\r\n  \"## trimmomatic adapters\": \"\",\r\n  \"mapping_workflow.pe_filtering_workflow.trimmomatic_pe_task.truseq_pe_adapter\":\"./adapters/TruSeq3-PE.fa\",\r\n  \"mapping_workflow.pe_filtering_workflow.trimmomatic_se_task.truseq_se_adapter\":\"./adapters/TruSeq3-SE.fa\",\r\n  \r\n  \"## Indexing sub workflow task parameters\": \"Samtools index run time parameters\",\r\n  \"mapping_workflow.index_sub_workflow.indexing_sam_task.IST_minutes\": 300,\r\n  \"mapping_workflow.index_sub_workflow.indexing_sam_task.IST_threads\": 16,\r\n  \"mapping_workflow.index_sub_workflow.indexing_sam_task.IST_mem\": 30000,\r\n  .\r\n  .\r\n  .\r\n}\r\n```\r\n\r\nRun the mapping workflow.\r\n```\r\n\u003e java -Dconfig.file=./workflow-runtime.local.config  -jar ./cromwell-50.jar run workflow-mapping.wdl -i workflow-mapping.json -o workflow-mapping.outputs.json \u003e out.mapping 2\u003e err.mapping\r\n```\r\nThe resultant BAM files will be copied to `outputMapping` directory.\r\n\r\n## Step 4 (optional): Cleaning\r\n\r\nIf you suspect 'host' contamination in your data, you can remove that using the cleaning workflow.\r\nDefine the file representing the contamination. First column defines the sample identifier, second the resultant BAM file from mapping workflow and third the putative contaminant genome assembly.\r\n```\r\n\u003e cat cleanup.conf\r\nSM1\t/home/.../escalibur/outputMapping/SM1.scf00001.MarkDup.bam\t/home/.../escalibur/Hosts/host1.fa\r\nSM2\t/home/.../escalibur/outputMapping/SM2.scf00001.MarkDup.bam\t/home/.../escalibur/Hosts/host1.fa\r\n```\r\n#### NOTE: you have to use absolute paths both to BAM files and the contaminant reference genome (here `host1.fa` and `host2.fa`).\r\n\r\n### workflow-cleaning.json config file\r\nAdd the path of your cleaning config file (here `cleanup.conf`) and adjust the absolute paths for singularity image.\r\n```\r\n{\r\n  \"## CONFIG FILE\": \"WDL\",\r\n  \"cleaning_workflow.inputContaminantFile\": \"./cleanup.conf\",\r\n  \r\n  \"## Singularity parameters\": \"absolute paths to the container and the directory to bind visible inside singularity\",\r\n  \"cleaning_workflow.singularityContainerPath\": \"/home/.../escalibur/escalibur.sif\",\r\n  \"cleaning_workflow.singularityBindPath\": \"/home/.../escalibur/\",\r\n\r\n  \"cleaning_workflow.indexing_bwa_task.IBT_minutes\": 60,\r\n  \"cleaning_workflow.indexing_bwa_task.IBT_threads\": 1,\r\n  \"cleaning_workflow.indexing_bwa_task.IBT_mem\": 16000,\r\n\r\n  \"######################################\":\"########################################\",\r\n  \"CLEANING\":\"PARAMETERS\",\r\n  \"######################################\":\"########################################\",\r\n  \"cleaning_workflow.clean_bams_workflow.cleanBams_task.CLEAN_BAMS_minutes\": 600,\r\n  \"cleaning_workflow.clean_bams_workflow.cleanBams_task.CLEAN_BAMS_threads\": 4,\r\n  \"cleaning_workflow.clean_bams_workflow.cleanBams_task.CLEAN_BAMS_mem\": 32000,\r\n\r\n  \"cleaning_workflow.create_cleaned_bams_workflow.createCleanedBams_task.CREATE_CLEAN_BAMS_minutes\": 300,\r\n  \"cleaning_workflow.create_cleaned_bams_workflow.createCleanedBams_task.CREATE_CLEAN_BAMS_threads\": 4,\r\n  \"cleaning_workflow.create_cleaned_bams_workflow.createCleanedBams_task.CREATE_CLEAN_BAMS_mem\": 32000,\r\n\r\n  \"cleaning_workflow.refsBySample.RBS_minutes\": 5,\r\n  \"cleaning_workflow.refsBySample.RBS_threads\": 1,\r\n  \"cleaning_workflow.refsBySample.RBS_mem\": 4000\r\n}\r\n```\r\n\r\nRun the cleaning workflow.\r\n```\r\n\u003e java -Dconfig.file=./workflow-runtime.local.config  -jar ./cromwell-50.jar run workflow-cleaning.wdl -i workflow-cleaning.json -o workflow-cleaning.outputs.json \u003e out.cleaning 2\u003e err.cleaning\r\n```\r\nThe resultant cleaned BAM files will be copied to `outputCleaning` directory. You can repeat the workflow if you suspect that there may be more than one contaminant genomes per each sample. In that case you have to take care of the properly configured `cleanup.conf` file that should describe the BAM files from previous cleaning round but also define new output directory for each round in `workflow-cleaning.outputs.json` file.\r\n\r\n## Step 5: Variant calling\r\n\r\nDefine the file listing the BAM files used for variant calling. First column defines the sample identifier, and second the resultant BAM file either from mapping of cleaning workflow.\r\n```\r\n\u003e cat inputBams.txt\r\nSM1\t/home/.../escalibur/outputMapping/SM1.scf00001.MarkDup.bam\r\nSM2\t/home/.../escalibur/outputCleaned/SM2.scf00001.MarkDup.cleaned.bam\r\n```\r\n\r\n### workflow-variants.json config file\r\nAdd the path of your file listing the locations of BAM files (here `inputBams.txt`), and add the location to selected reference genome (found in `outputMapping/best.ref`) and it's label, as defined in `references.txt` file. Adjust the absolute paths for singularity image and adjust other parameters, especially define if you want to recalibrate the BAM files by selecting value \"independent\" to \"variants_workflow.call_type\".\r\n```\r\n{\r\n  \"## CONFIG FILE\": \"WDL\",\r\n  \"variants_workflow.inputSampleFile\": \"./inputBams.txt\",\r\n  \"variants_workflow.selectedRefFile\": \"TestReferences/scf00001.fa\",\r\n  \"variants_workflow.selectedRefLabel\": \"scf00001\",\r\n  \r\n  \"## Singularity parameters\": \"absolute paths to the container and the directory to bind visible inside singularity\",\r\n  \"variants_workflow.singularityContainerPath\": \"/home/.../escalibur/escalibur.sif\",\r\n  \"variants_workflow.singularityBindPath\": \"/home/.../escalibur/\",\r\n\r\n  \"## Which variant call workflow to use\": \"fast or independent\",\r\n  \"variants_workflow.call_type\": \"fast\",\r\n  \r\n  \"## Variant filtering expressions\": \"For SNPs and INDELs\",\r\n  \"variants_workflow.SNP_filt_exp\": \"QD \u003c 2.0 || FS \u003e 60.0 || MQ \u003c 40.0 || MQRankSum \u003c -12.5 || ReadPosRankSum \u003c -8.0\",\r\n  \"variants_workflow.INDEL_filt_exp\": \"QD \u003c 2.0 || FS \u003e 200.0 || ReadPosRankSum \u003c -20.0\",\r\n\r\n  \"## Variant Filter params\": \"Variant filter, indel, snps, report making: Safe to leave as default\",\r\n  \"variants_workflow.ploidy\": 2,\r\n  \"variants_workflow.maxIndelSize\": 60,\r\n  \"variants_workflow.scafNumLim\": 95,\r\n  \"variants_workflow.scafNumCo\": 2,\r\n  \"variants_workflow.scafLenCutOff\": 0,\r\n  \"variants_workflow.ldWinSize\": 10,\r\n  \"variants_workflow.ldWinStep\": 5,\r\n  \"variants_workflow.ldCutOff\": 0.3,\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.indelFilterName\": \"Indel_filter\",\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.indelFilterExpression\": \"QD \u003c 2.0 || FS \u003e 200.0 || ReadPosRankSum \u003c -20.0\",\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.snpFilterName\": \"Snp_filter\",\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.snpFilterExpression\": \"QD \u003c 2.0 || FS \u003e 60.0 || MQ \u003c 40.0 || MQRankSum \u003c -12.5 || ReadPosRankSum \u003c -8.0\",\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.vfindel_tk.selectType\": \"\",\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.vfsnp_tk.selectType\": \"\",\r\n\r\n  \"## Build chromosome map\":\"map_def_scf_lim_task\",\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.map_def_scf_lim_task.scafLenCutOff\": 1000000,\r\n  \"variants_workflow.snp_indel_var_filtering_workflow.map_def_scf_lim_task.scafNumCo\": 3,\r\n\r\n  \"## Indexing sub workflow task parameters\": \"Samtools index run time parameters\",\r\n  \"variants_workflow.ref_index.IST_minutes\": 300,\r\n  \"variants_workflow.ref_index.IST_threads\": 2,\r\n  \"variants_workflow.ref_index.IST_mem\": 8000,\r\n  .\r\n  .\r\n  .\r\n}\r\n```\r\n\r\nRun the variant calling workflow.\r\n```\r\n\u003e java -Dconfig.file=./workflow-runtime.local.config  -jar ./cromwell-50.jar run workflow-variants.wdl -i workflow-variants.json -o workflow-variants.outputs.json \u003e out.variants 2\u003e err.variants\r\n```\r\nThe resultant files will be copied to `outputVariants` directory. That includes filtered variants calls (`full_genotype_output.vcf`) and recalibrated BAM files (if independent call_type is selected).\r\n\r\n## Other considerations\r\n\r\n### Resource allocation in HPC environment\r\nWall time, memory usage and thread count (`_minutes`, `_mem`, `_threads`) given in `.json` files for each workflow can vary substantially and may require adjusting in HPC environment and slurm. This may lead to frequent restarting of the workflow after each adjustment. We have automated this task by providing scripts that automatically check the failed resource allocations and double them for each round. These scripts are located in `Automation` directory and can be run as follows:\r\n```\r\n\u003e cd Automation\r\n\u003e sh init.sh # Copies the content of ../tasks directory to tasksOrig directory\r\n\u003e sbatch runMapping.slurm # Runs runLoopMapping.sh in a worker node\r\n\u003e sbatch runCleaning.slurm # Runs runLoopCleaning.sh in a worker node\r\n\u003e sbatch runVariants.slurm # Runs runLoopVariants.sh in a worker node\r\n```\r\nScripts `runLoop*.sh` copy resource allocations from collective `runtimes.json` file to the files in `../tasks` directory, run the workflow and double the failed resource allocations in `../tasks` files, and reruns the workflow until it succeeds or until ten rounds have passed. Copying of resource allocations directly to the files in `../tasks` directory is necessary to guarantee proper function of call-caching.\r\n#### NOTE: automated resource allocation adjustment is experimental, should be monitored when running and may require modifications to scripts to function properly.\r\n\r\n### Disk usage\r\nCromwell will create duplicate copies of files while running the workflows. It is therefore recommended to remove `cromwell-executions` directory after each workflow is run, if disk space is getting sparse.\r\n```\r\n\u003e rm -r cromwell-executions\r\n```\r\nEspecially, if there are hundreds of samples that may sum up to terabytes of data, disk space might become an issue if unused files are not removed.\r\n\r\n### Troubleshooting\r\nIf the output text does not reveal the error, you can try to find an error message using command(s):\r\n```\r\n\u003e find cromwell-executions/ -name stderr -exec cat {} \\; | grep -i fatal\r\n\u003e find cromwell-executions/ -name stderr -exec cat {} \\; | less\r\n```\r\n\r\nMost commonly encountered error cases:\r\n\r\n* Singularity is not running correctly. Typically you require help from your administrator to get singularity properly installed.\r\n* Singularity image `escalibur.sif` was not downloaded\r\n* Check that you are using correct runtime configuration file `workflow-runtime.local.config` or `workflow-runtime.slurm.config` when calling `cromwell-50.jar`\r\n* Absolute file paths for Singularity/Trimmomatic, input files or contaminant genomes are not updated or are wrong in `workflow-*.json`, `inputBams.txt` or `cleanup.conf` configuration files, respectively.\r\n* Defined run-time and memory requirements for some tasks are not sufficient in `.json` configuration files to run the pipeline in HPC environment.\r\n* If you are using slurm job scheduler and want to run the pipeline in HPC environment, you have to create the related configuration file yourselves.\r\n* Pipeline has not been tested in other environments but Linux and we expect that users encounter challenges if trying to run the pipeline e.g. in Mac environment.\r\n\r\n","organization":"Workflows Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/335?version=1","name":"v0.3-beta","author":[],"descriptor_type":[]}]},{"id":"336","url":"https://workflowhub.eu/workflows/336","name":"Short read quality control, trimming and contamination filter","description":"**Workflow for short paired end reads quality control, trimming and filtering.**\u003cbr /\u003e\r\nMultiple paired datasets will be merged into single paired dataset.\u003cbr /\u003e\r\nSummary:\r\n- Sequali QC on raw data files\u003cbr /\u003e\r\n- fastp for read quality trimming\u003cbr /\u003e\r\n- BBduk for phiX and rRNA filtering (optional)\u003cbr /\u003e\r\n- Filter human reads using Hostile (optional)\u003cbr /\u003e\r\n- Custom read filtering using Hostile (optional)\u003cbr /\u003e\r\n- Sequali QC on filtered (merged) data\u003cbr /\u003e\r\n\r\nOther UNLOCK workflows on WorkflowHub: https://workflowhub.eu/projects/16/workflows?view=default\u003cbr\u003e\u003cbr\u003e\r\n\r\n**All tool CWL files and other workflows can be found at:**\u003cbr\u003e\r\nhttps://gitlab.com/m-unlock/cwl\r\n\r\n**How to setup and use an UNLOCK workflow:**\u003cbr\u003e\r\nhttps://docs.m-unlock.nl/docs/workflows/setup.html\u003cbr\u003e\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/336?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst","Changlin Ke"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/336?version=2","name":"Version 2","author":["Bart Nijsse","Jasper Koehorst","Changlin Ke"],"descriptor_type":["CWL"]}]},{"id":"337","url":"https://workflowhub.eu/workflows/337","name":"LongRead Quality Control and Filtering","description":"### Workflow for LongRead Quality Control and Filtering\r\n\r\n- NanoPlot  (read quality control) before and after filtering\r\n- Filtlong  (read trimming)\r\n- Kraken2 taxonomic read classification before and after filtering\r\n- Minimap2 read filtering based on given references\u003cbr\u003e\u003cbr\u003e\r\n\r\nOther UNLOCK workflows on WorkflowHub: https://workflowhub.eu/projects/16/workflows?view=default\u003cbr\u003e\u003cbr\u003e\r\n\r\n**All tool CWL files and other workflows can be found here:**\u003cbr\u003e\r\nhttps://gitlab.com/m-unlock/cwl/workflows\r\n\r\n**How to setup and use an UNLOCK workflow:**\u003cbr\u003e\r\nhttps://m-unlock.gitlab.io/docs/setup/setup.html\u003cbr\u003e\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/337?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst","Germán Royval"],"descriptor_type":["CWL"]}]},{"id":"338","url":"https://workflowhub.eu/workflows/338","name":"Biomarker screening in preeclampsia","description":"Objective. Biomarkers have become important for the prognosis and diagnosis of various diseases. High-throughput methods such as RNA-sequencing facilitate the detection of differentially expressed genes (DEGs), hence potential biomarker candidates. Individual studies suggest long lists of DEGs, hampering the identification of clinically relevant ones. Concerning preeclampsia, a major obstetric burden with high risk for adverse maternal and/or neonatal outcomes, limitations in diagnosis and prediction are still important issues. Therefore, we developed a workflow to facilitate the screening for biomarkers.\r\nMethods. Based on the tool DeSeq2, we established a comprehensive workflow for the identification of  DEGs, analyzing data from multiple publicly available RNA-sSequencing studies. We applied it to four RNA-sSequencing datasets (one blood, three placenta) analyzing patients with preeclampsia and normotensive controls. We compared our results with other published approaches and evaluated their performance. \r\nResults. We identified 110 genes dysregulated in preeclampsia, observed in ≥3 of the analyzed studies, six even in all four studies. Among them were FLT-1, TREM-1, and FN1 which either represent established biomarkers on protein level, or promising candidates based on recent studies. In comparison, using a published meta-analysis approach we obtained 5,240  DEGs.\r\nConclusions. We present a data analysis workflow for preeclampsia biomarker screening, capable of identifying significant biomarker candidates, while drastically decreasing the numbers of candidates. Moreover, we were also able to confirm its performance for heart failure. Our approach can be applied to additional diseases for biomarker identification and the set of identified DEGs in preeclampsia represents a resource for further studies.\r\n","organization":"Gyn Department","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/338?version=1","name":"Version 1","author":["Marlene Rezk"],"descriptor_type":["GALAXY"]}]},{"id":"339","url":"https://workflowhub.eu/workflows/339","name":"GermlineShortV_biovalidation","description":"# GermlineShortV_biovalidation\r\n\r\n - [Description](#description)\r\n  - [Diagram](#diagram)\r\n  - [User guide](#user-guide)\r\n      - [Quick start guide](#quick-start-guide)\r\n  - [Benchmarking](#benchmarking)\r\n  - [Workflow summaries](#workflow-summaries)\r\n      - [Metadata](#metadata)\r\n      - [Component tools](#component-tools)\r\n      - [Required (minimum)\r\n        inputs/parameters](#required-minimum-inputsparameters)  \r\n        [Preparing your own input files](#preparing-input-files)\r\n  - [Additional notes](#additional-notes)\r\n      - [Understanding your outputs](#understanding-your-outputs)  \r\n      - [Performance metrics explained](#performance-metrics-explained)   \r\n  - [Help/FAQ/Troubleshooting](#helpfaqtroubleshooting)\r\n  - [Acknowledgements/citations/credits](#acknowledgementscitationscredits)\r\n\r\n## Description \r\nPopulation-scale WGS cohorts are essential resources for genetic analyses including heritable diseases, evolutionary genomics, conservation biology, and population genomics. Processing raw reads into analysis-ready variants remains challenging. Various mapping and variant calling pipelines have been made publicly available in recent decades. Designing a mapping and variant calling pipeline to meet your needs is dependent on the compute infrastructure you’re working on, the types of variants you’re primarily interested in, and the sequencing technology you use to generate raw sequencing data. Keep in mind that the tools you use to build your pipeline can affect variant calling accuracy. Further, optimisation and customisation of these tools’ commands can also affect their performance. Best-practice recommendations for variant calling pipelines vary dramatically between species and research questions, depending on the availability of genomic resources for the population of interest, genome structure, and clinical relevance of the resulting variant dataset. It is important to not only design a robust variant calling pipeline but also fine-tune it to achieve optimal performance for your dataset and research question. \r\n\r\nThere are various measurements that you can apply to evaluate the biological accuracy of your germline variant calling pipeline. Currently, no best practice methods for interrogating joint-called variant sets exist in the literature. A number of publicly available, human ‘gold standard’ truth datasets including Platinum Genomes and Genome in a Bottle (GIAB) are useful for benchmarking across high confidence regions of the genome and evaluating the recall and precision of the pipeline. We recommend individuals working with human datasets benchmark their germline variant calling pipelines using one of these datasets. Unfortunately, these resources are not typically available for non-human organisms. \r\n\r\nHere, we present protocols for benchmarking and validating germline short variant (SNVs and indels) datasets using a combination of methods that can capture the quality of your variant sets for human, non-human model, and non-model organisms. The process you can apply will depend on the organism you’re working with and the genomic resources available to that organism. \r\n\r\n## Diagram \r\n\r\n\u003cp align=\"center\"\u003e \r\n\u003cimg src=\"https://github.com/Sydney-Informatics-Hub/GermlineShortV_biovalidation/blob/main/Benchmarking%20and%20validation%20protocol.png\" width=\"70%\" height=\"70%\"\u003e  \r\n\u003c/p\u003e \r\n\r\n## User guide \r\n###  Quick start guide \r\n\r\nThese bash scripts were written for the University of Sydney’s high performance computer, Artemis. They can be run on the command line or submitted as PBS jobs. These scripts assume your input is a gzipped multi-sample (cohort) VCF file. Before running, edit the PBS project directive and define the variables at the top of the script. All software used in this protocol is installed on Artemis- to use alternate versions or run on a different compute infrastructure, edit the modules according to your needs.  \r\n\r\n#### Human datasets \r\nFor human datasets, we recommend you benchmark your germline variant calling pipeline using a gold standard dataset such as Platinum Genomes. Raw sequence data in FASTQ format for these datasets can be downloaded along with their high confidence variant calls and regions from public repositories. See [Preparing input files]() for more information on how to download and prepare these files.    \r\n\r\n##### 1. Collect vcf summary metrics  \r\nEdit the PBS -P directive and variables for your dataset in `vcfstat.sh`. Then run script with: \r\n\r\n```\r\nqsub vcfstat.sh (or bash vcfstat.sh)\r\n```\r\nThis will produce summary and quality metrics reports and plots for your cohort. It will also produce summary and detail files for known variant representation. BCFtools stats plots will be housed in a directory labelled `${cohort}_vcfplots`. \r\n\r\n##### 2. Biological benchmarking using a truth set  \r\n\r\nEdit the PBS -P directive and variables for your files. Then run script with:  \r\n\r\n```\r\nqsub run_happy.sh\r\n```\r\nThis script will subset your multi-sample VCF into individual samples, prepare them for hap.py, and output a number of files including summary metrics (including recall, precision and F1-score) and ROC count files that can be used to produce ROC curves, separately for SNVs and indels. See the [hap.py user guide](https://github.com/Illumina/hap.py/blob/master/doc/happy.md) for more information on how to interpret hap.py output. ROC curves of Hap.py runs can be plotted using the script [rocplot.Rscript](https://github.com/Illumina/hap.py/blob/master/src/R/rocplot.Rscript).   \r\n\r\n#### Non-human model organism datasets\r\n\r\n##### 1. Collect vcf summary metrics  \r\nEdit the PBS -P directive and variables for your dataset in `vcfstat.sh`. We recommend you use the set of known variants used for base quality score recalibration to validate population level variants. If you used trio data, unhash the Mendelian error command within the script. Then run script with: \r\n\r\n```\r\nqsub vcfstat.sh (or bash vcfstat.sh)\r\n```\r\nThis will produce summary and quality metrics reports and plots for your cohort. It will also produce summary and detail files for known variant representation. BCFtools stats plots will be housed in a directory labelled `${cohort}_vcfplots`.  \r\n#### Non-model organism datasets \r\n\r\n##### 1. Collect vcf summary metrics  \r\n\r\nEdit the PBS -P directive and variables for your dataset in `vcfstat_nonmodel.sh`. Then run script with: \r\n\r\n```\r\nqsub vcfstat_nonmodel.sh (or bash vcfstat_nonmodel.sh)\r\n```\r\n\r\nThis will produce summary and quality metrics reports and plots for your cohort. It will also produce summary and detail files for known variant representation. BCFtools stats plots will be housed in a directory labelled `${cohort}_vcfplots`. \r\n\r\n## Benchmarking \r\nComing soon!  \r\n\r\n## Workflow summaries \r\n### Metadata \r\n|metadata field     | workflow_name / workflow_version  |\r\n|-------------------|:---------------------------------:|\r\n|Version            | 1.0                 |\r\n|Maturity           | stable                            |\r\n|Creators           | Georgie Samaha, Tracy Chew, Cali Willet                 |\r\n|Source             | NA                                |\r\n|License            | NA                                |\r\n|Workflow manager   | NA                          |\r\n|Container          | None                              |\r\n|Install method     | Manual                            |\r\n|GitHub             | NA                                |\r\n|bio.tools \t        | NA                                |\r\n|BioContainers      | NA                                | \r\n|bioconda           | NA                                |\r\n\r\n### Component tools \r\n\r\nbcftools/1.14  \r\nhtslib/1.14  \r\npython/3.8.2  \r\nR/4.1.1  \r\nhap.py/0.3.14  \r\n\r\n### Required (minimum) inputs/parameters \r\n\r\n- Multi-sample or single sample VCF file (VCF.gz format)\r\n- List of sample IDs that match the VCF (.txt format)\r\n- Known variant dataset (VCF format. Human and non-human model organisms only)\r\n- Pedigree file (format: mother,father,offspring. Trios or Platinum Genomes only)\r\n- Truth set variant calls (VCF.gz format. Human, Platinum Genomes only)\r\n- High confidence call regions (BED format. Human, Platinum Genomes only)\r\n\r\n### Preparing input files \r\n\r\n#### Gold standard variant truth sets  \r\n\r\nThe benchmarking protocol for human datasets assumes you have performed mapping and germline variant calling on a gold standard truth set. These datasets contain millions of variants that have been confirmed using orthologous technologies [Eberle et al. 2017](https://doi.org/10.1101/gr.210500.116).   \r\n\r\nWe recommend you use the Platinum Genomes dataset for benchmarking germline variant calling pipelines that include joint genotyping of multiple samples. Six members, comprising two trios, of the Platinum Genomes dataset can be downloaded from the Illumina BaseSpace Sequence Hub, the ENA, or dbGaP. The Platinum Genomes dataset contains multiple files including the following files you will need for running `run_happy.sh`: \r\n- Paired-end FASTQ files for each sample\r\n- High-confidence germline variant VCF files for each sample\r\n- High-confidence genomic regions (BED format)\r\n\r\nCurrently, these files are available for Hg19 (GRCh37) and Hg38 (GRCh38) . Links to raw data are [here](https://github.com/Illumina/PlatinumGenomes). BaseSpace offers a command line tool for downloading files, see [here](https://developer.basespace.illumina.com/docs/content/documentation/cli/cli-examples) for instructions. \r\n\r\n#### Providing your own ‘truth set’ \r\n*A word of caution*- testing the performance of your pipeline using a truth set is only intended to estimate the overall quality of your pipeline and detect any potential sources of error in your method. It is not intended to test the truthfulness of your variant set. See [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531572-Evaluating-the-quality-of-a-germline-short-variant-callset) for further discussion of the assumptions we make about truth sets. Most non-human organisms do not have access to gold standard truth set resources like the Platinum Genomes dataset. However there are a few alternative options you could try: \r\n - Genotyping arrays: if you have genotyping data for the same samples you tested your germline variant calling pipeline with, you can reformat these to VCF using a tool like [PLINK’s recode](https://www.cog-genomics.org/plink/1.9/data#recode) and use it as a truth set. \r\n - Known variant datasets: if your organism of interest has a set of known population-level variants you can use these as a truth-set. Just remember that these variants might not always be validated (i.e. dbSNP). \r\n\r\nUsing this method you will need to also provide your own high-confidence regions file in BED format. The location and size of these regions will depend on your dataset, organism, reference assembly and sequencing method. Typically these regions would exclude centromeres, telomeres and repetitive parts of the genome that are likely to complicate variant calling.   \r\n\r\n\r\n## Additional notes \r\n\r\nTest data for Hap.py can be found [here](https://github.com/Illumina/hap.py/blob/master/doc/microbench.md)  \r\n\r\nInstructions on how to install Hap.py can be found [here](https://github.com/Illumina/hap.py#installation)   \r\n\r\nThis warning may be thrown by Hap.py and can be ignored: `WARNING  No reference file found at default locations. You can set the environment variable 'HGREF' or 'HG19' to point to a suitable Fasta file.`  \r\n\r\n\r\n### Understanding your outputs \r\nThe following files will be produced and stored in your designated working directory. They will all be labelled with your specified cohort name.  \r\n\r\n#### Variant based metrics \r\nProduced by BCFtools stats command. Output file:\r\n- ${cohort}.bcftools.metrics  \r\n- ${cohort}_bcftools.metrics_vcfstatplots (directory and files)  \r\n\r\n#### Sample based metrics   \r\nProduced by BCFtools smplstats and mendelian commands. Output files:\r\n- ${cohort}.smplstats\r\n- ${cohort}.smplstats.pdf\r\n- ${cohort}.Mendelianerr\r\n\r\n#### Known variant concordance \r\nProduced by GATK CollectVariantCallingMetrics command. Output files:\r\n- ${cohort}.known.variant_calling_summary_metrics\r\n- ${cohort}.known.variant_calling_detail_metrics\r\n\r\n#### Biological validation using a truth set \r\nProduced by Hap.py. Output files:\r\n- ${sample}.happy.metrics.json.gz\r\n- ${sample}.happy.roc.all.csv.gz\r\n- ${sample}.happy.roc.Locations.INDEL.csv.gz\r\n- ${sample}.happy.roc.Locations.INDEL.PASS.csv.gz\r\n- ${sample}.happy.roc.Locations.SNP.csv.gz\r\n- ${sample}.happy.roc.Locations.SNP.PASS.csv.gz\r\n- ${sample}.happy.roc.tsv\r\n- ${sample}.happy.runinfo.json\r\n- ${sample}.happy.summary.csv\r\n\r\n### Performance metrics explained  \r\n\r\n|Metric                                |Expected/ideal value                                |Tool           |Relevance                                                                                                      |\r\n|--------------------------------------|----------------------------------------------------|---------------|---------------------------------------------------------------------------------------------------------------|\r\n|Number of SNVs and indels (per sample)|Human WGS: ~4.4M, Human WES: ~41k, Species dependent|bcftools stats |Population, sequencing approach, and genomic region dependent. Alone, this metric cannot indicate data quality.|\r\n|Indel length distribution             |Indel length range is 1-10,000bp.                   |bcftools stats |Increased length is conflated with reduced mapping quality. Distribution is dataset dependent. Recommend filtering for high quality.|\r\n|Depth of coverage                     |Depends on the sequencing coverage of samples.      |bcftools stats |Dramatic deviation from expected distribution can indicate artifactual bias.                                   |\r\n|Substitution type counts              |See TiTv ratio.                                     |bcftools stats |Twice as many possible transversions as transitions. See [here](https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtu668)  |\r\n|TiTv ratio (genome wide)              |For mammals: WGS: 2.0-2.1, WES: 3.0-3.3             |bcftools stats |Dramatic deviation from expected ratio can indicate artifactual bias. Typically elevated in coding regions where transversions are more likely to occur. |\r\n|Base quality distribution             |Dataset dependent.                                  |bcftools stats |This will reflect the quality based filtering you performed. Dramatic deviation from expected ratio can indicate artifactual bias.|\r\n|Indel ratio                           |Common: ~1.0, Rare: 0.2-0.5                         |GATK CollectVariantCallingMetrics|This should be evaluated after custom filtering variants for your needs. Dramatic deviation from expected ratio can indicate artifactual bias.|\r\n|Het/hom(non-ref)                      |~2.0 assuming Hardy-Weinberg equilibrium.           |GATK CollectVariantCallingMetrics|Ancestry dependent, can vary dramatically. See [Wang et al. 2015](https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtu668)|\r\n|Mendelian error                       |0                                                   |BCFtools +mendelian|Mendelian inheritance errors are likely erroneous genotype calls. See [Pilipenko et al. 2014](https://dx.doi.org/10.1186%2F1753-6561-8-S1-S21)|\r\n|True positives                        |Dataset dependent.                                  |Hap.py         |Number of query variants that are present in the truth set.                                                    |\r\n|False negatives                       |Dataset dependent.                                  |Hap.py         |Number of variants in truth set, not present in query VCF.                                                     |\r\n|False positives                       |Dataset dependent.                                  |Hap.py         |Number of variants in query VCF, not present in truth set.                                                     |\r\n|Recall                                |1                                                   |Hap.py         |Absence of false negatives. See [Krusche et al. 2019](https://doi.org/10.1038/s41587-019-0054-x)               |\r\n|Precision                             |1                                                   |Hap.py         |Absence of false positives. See [Krusche et al. 2019](https://doi.org/10.1038/s41587-019-0054-x)               |\r\n|F1-score                              |1                                                   |Hap.py         |Harmonic mean of recall and precision. See [Krusche et al. 2019](https://doi.org/10.1038/s41587-019-0054-x)    |\r\n|Genotype errors (FP.GT)               |Dataset dependent.                                  |Hap.py         |Number of query variants with incorrect genotype                                                               |\r\n\r\n### Resources and references \r\n\r\nEberle, M. A., Fritzilas, E., Krusche, P., Källberg, M., Moore, B. L., Bekritsky, M. A., Iqbal, Z., Chuang, H. Y., Humphray, S. J., Halpern, A. L., Kruglyak, S., Margulies, E. H., McVean, G., \u0026 Bentley, D. R. (2017). A reference data set of 5.4 million phased human variants validated by genetic inheritance from sequencing a three-generation 17-member pedigree. Genome research, 27(1), 157–164. https://doi.org/10.1101/gr.210500.116   \r\n\r\nKoboldt, D.C. Best practises for variant calling in clinical sequencing. Genome Med 12, 91 (2020). https://doi.org/10.1186/s13073-020-00791-w  \r\n\r\nKrusche, P., Trigg, L., Boutros, P.C. et al. Best practices for benchmarking germline small-variant calls in human genomes. Nat Biotechnol 37, 555–560 (2019). https://doi.org/10.1038/s41587-019-0054-x  \r\n\r\nMarshall, C.R., Chowdhury, S., Taft, R.J. et al. Best practices for the analytical validation of clinical whole-genome sequencing intended for the diagnosis of germline disease. npj Genom. Med. 5, 47 (2020). https://doi.org/10.1038/s41525-020-00154-9   \r\n\r\nPilipenko, V.V., He, H., Kurowski, B.G. et al. Using Mendelian inheritance errors as quality control criteria in whole genome sequencing data set. BMC Proc 8, S21 (2014). https://doi.org/10.1186/1753-6561-8-S1-S21   \r\n\r\nWang, J., Raskin, J., Samuels, D., Shyr, Y., Guo, Y., Genome measures used for quality control are dependent on gene function and ancestry, Bioinformatics 31, 318–323 (2015)  https://doi.org/10.1093/bioinformatics/btu668  \r\n\r\n\r\n## Help/FAQ/Troubleshooting\r\n\r\nIf Hap.py throws an error, search the [issues at Hap.py GitHub repository](https://github.com/Illumina/hap.py/issues) and attempt to resolve it before submitting an issue here.    \r\n\r\n## Acknowledgements/citations/credits  \r\n\r\n### Authors \r\n- Georgie Samaha (Sydney Informatics Hub, University of Sydney)   \r\n- Tracy Chew (Sydney Informatics Hub, University of Sydney)  \r\n- Cali Willet (Sydney Informatics Hub, University of Sydney)  \r\n- Nandan Deshpande (Sydney Informatics Hub, University of Sydney)\r\n\r\nAcknowledgements (and co-authorship, where appropriate) are an important way for us to demonstrate the value we bring to your research. Your research outcomes are vital for ongoing funding of the Sydney Informatics Hub and national compute facilities. We suggest including the following acknowledgement in any publications that follow from this work:  \r\n\r\nThe authors acknowledge the technical assistance provided by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney and the Australian BioCommons which is enabled by NCRIS via Bioplatforms Australia.  \r\n","organization":"Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/339?version=1","name":"main @ 6865e5d","author":["Georgina Samaha","Tracy Chew","Cali Willet"],"descriptor_type":[]}]},{"id":"340","url":"https://workflowhub.eu/workflows/340","name":"HiFi de novo genome assembly workflow","description":"# HiFi *de novo* genome assembly workflow\r\n\r\nHiFi-assembly-workflow is a bioinformatics pipeline that can be used to analyse Pacbio CCS reads for *de novo* genome assembly using PacBio Circular Consensus Sequencing (CCS)  reads. This workflow is implemented in Nextflow and has 3 major sections. \r\n \r\nPlease refer to the following documentation for detailed description of each workflow section:\r\n \r\n- [Pre-assembly quality control (QC)](https://github.com/AusARG/hifi-assembly-workflow/blob/master/recommendations.md#stage-1-pre-assembly-quality-control)\r\n- [Assembly](https://github.com/AusARG/hifi-assembly-workflow/blob/master/recommendations.md#stage-2-assembly)\r\n- [Post-assembly QC](https://github.com/AusARG/hifi-assembly-workflow/blob/master/recommendations.md#stage-3-post-assembly-quality-control)\r\n\r\n## HiFi assembly workflow flowchart\r\n\r\n![](https://github.com/AusARG/hifi-assembly-workflow/blob/master/workflow.png?raw=true)\r\n\r\n# Quick Usage:\r\nThe pipeline has been tested  on NCI Gadi and AGRF balder cluster. If needed to run on AGRF cluster, please contact us at bioinformatics@agrf.org.au.\r\nPlease note for running this on NCI Gadi you need access. Please refer to Gadi guidelines for account creation and usage: these can be found at https://opus.nci.org.au/display/Help/Access.\r\n\r\nHere is an example that can be used to run a phased assembly on Gadi:\r\n\r\n```\r\nModule load nextflow/21.04.3\r\nnextflow run Hifi_assembly.nf –bam_folder \u003cPATH TO THE BAM FOLDER\u003e -profile gadi \r\n\r\nThe workflow accepts 2 mandatory arguments:\r\n--bam_folder     --    Full Path to the CCS bam files\r\n-profile         --    gadi/balder/local\r\n```\r\n\r\nPlease note that you can either run jobs interactively or submit jobs to the cluster. This is determined by the -profile flag. By passing the gadi tag to the profile argument, the jobs are submitted and run on the cluster.\r\n\r\n# General recommendations for using the HiFi *de novo* genome assembly workflow\r\n\r\n## Example local profile usage\r\n\r\n```\r\nStart a screen, submit a job, and run the workflow \r\nScreen -S ‘name’\r\n\r\nqsub -I -qnormal -Pwz54 -lwalltime=48:00:00,ncpus=4,mem=200GB,storage=scratch/wz54+gdata/wz54,wd\r\nexport MODULEPATH=/apps/Modules/modulefiles:/g/data/wz54/groupResources/modules\r\n\r\nmodule load nextflow/21.04.3\r\nnextflow run /g/data/wz54/groupResources/scripts/pl/hifi_assembly.nf  --bam_folder  \u003cbam-folder_path\u003e -profile local\r\n\r\n#This load the scripts directory to the environmental PATH and load nextflow module\r\nmodule load hifi_assembly/1.0.0 \r\n```\r\n\r\n# Outputs\r\n\r\nPipeline generates various files and folders here is a brief description: \r\nThe pipeline creates a folder called `secondary_analysis` that contains two sub folders named:\r\n\r\n- `exeReport`     \r\n- `Results`       -- Contains preQC, assembly and postQC analysis files\r\n\r\n## exeReport\r\nThis folder contains a computation resource usage summary in various charts and a text file. \r\n`report.html` provides a comprehensive summary.\r\n\r\n## Results\r\nThe `Results` folder contains three sub-directories preQC, assembly and postqc. As the name suggests, outputs from the respective workflow sections are placed in each of these folders.\r\n\r\n### preQC\r\nThe following table contains list of files and folder from preQC results\r\n\r\n| Output folder/file | File             | Description                                                                    |\r\n| ------------------ | ---------------- | ------------------------------------------------------------------------------ |\r\n| \u003csample\u003e.fa        |                  | Bam files converted to fasta format                                            |\r\n| kmer\\_analysis     |                  | Folder containing kmer analysis outputs                                        |\r\n|                    | \u003csample\u003e.jf      | k-mer counts from each sample                                                  |\r\n|                    | \u003csample\u003e.histo   | histogram of k-mer occurrence                                                  |\r\n| genome\\_profiling  |                  | genomescope profiling outputs                                                  |\r\n|                    | summary.txt      | Summary metrics of genome scope outputs                                        |\r\n|                    | linear\\_plot.png | Plot showing no. of times a k-mer observed by no. of k-mers with that coverage |\r\n\r\n\r\n### Assembly\r\nThis folder contains final assembly results in \u003cFASTA\u003e format.\r\n\r\n- `\u003csample\u003e_primary.fa` - Fasta file containing primary contigs\r\n- `\u003csample\u003e_associate.fa` - Fasta file containing associated contigs\r\n\r\n### postqc\r\n \r\nThe postqc folder contains two sub folders \r\n\r\n- `assembly_completeness`\r\n- `assembly_evaluation`\r\n\r\n#### assembly_completeness\r\nThis contains BUSCO evaluation results for primary and associate contig.\r\n\r\n#### assembly_evaluation\r\nAssembly evaluation folder contains various file formats, here is a brief description for each of the outputs.\r\n\r\n| File        | Description                                                                               |\r\n| ----------- | ----------------------------------------------------------------------------------------- |\r\n| report.txt  | Assessment summary in plain text format                                                   |\r\n| report.tsv  | Tab-separated version of the summary, suitable for spreadsheets (Google Docs, Excel, etc) |\r\n| report.tex  | LaTeX version of the summary                                                              |\r\n| icarus.html | Icarus main menu with links to interactive viewers                                        |\r\n| report.html | HTML version of the report with interactive plots inside                                  |\r\n\r\n\r\n# Infrastructure usage and recommendations\r\n\r\n### NCI facility access\r\nOne should have a user account set with NCI to access gadi high performance computational facility. Setting up a NCI account is mentioned in detail at the following URL: https://opus.nci.org.au/display/Help/Setting+up+your+NCI+Account \r\n  \r\nDocumentation for a specific infrastructure should go into a infrastructure documentation template\r\nhttps://github.com/AustralianBioCommons/doc_guidelines/blob/master/infrastructure_optimisation.md\r\n\r\n\r\n## Compute resource usage across tested infrastructures\r\n\r\n|                                       | Computational resource for plant case study |\r\n| ------------------------------------- | ------------------------------------------- |\r\n|                                       | Time                                        | CPU | Memory | I/O |\r\n| Process                               | duration                                    | realtime | %cpu | peak\\_rss | peak\\_vmem | rchar | wchar |\r\n| Converting bam to fasta for sample    | 12m 54s                                     | 12m 48s | 99.80% | 5.2 MB | 197.7 MB | 43.3 GB | 50.1 GB |\r\n| Generating k-mer counts and histogram | 26m 43s                                     | 26m 36s | 1725.30% | 19.5 GB | 21 GB | 77.2 GB | 27.1 GB |\r\n| Profiling genome characteristics      | 34.7s                                       | 13.2s | 89.00% | 135 MB | 601.2 MB | 8.5 MB | 845.9 KB |\r\n| Denovo assembly                       | 6h 51m 15s                                  | 6h 51m 11s | 4744.40% | 84.7 GB | 225.6 GB | 1.4 TB | 456 GB |\r\n| evaluate\\_assemblies                  | 5m 18s                                      | 4m 54s | 98.20% | 1.6 GB | 1.9 GB | 13.6 GB | 2.8 GB |\r\n| assemblies\\_completeness              | 25m 57s                                     | 25m 53s | 2624.20% | 22 GB | 25.2 GB | 624.9 GB | 2.9 GB |\r\n\r\n\r\n|                                       | Computational resource for bird case study |\r\n| ------------------------------------- | ------------------------------------------ |\r\n|                                       | Time                                       | CPU | Memory | I/O |\r\n| Process                               | duration                                   | realtime | %cpu | peak\\_rss | peak\\_vmem | rchar | wchar |\r\n| Converting bam to fasta for sample    | 12m 54s                                    | 7m 9s | 86.40% | 5.2 MB | 197.8 MB | 21.5 GB | 27.4 GB |\r\n| Generating k-mer counts and histogram | 26m 43s                                    | 15m 34s | 1687.70% | 10.1 GB | 11.7 GB | 44 GB | 16.6 GB |\r\n| Profiling genome characteristics      | 34.7s                                      | 1m 15s | 15.30% | 181.7 MB | 562.2 MB | 8.5 MB | 819.1 KB |\r\n| De novo assembly                      | 6h 51m 15s                                 | 9h 2m 47s | 1853.50% | 67.3 GB | 98.4 GB | 1 TB | 395.6 GB |\r\n| evaluate assemblies                   | 5m 18s                                     | 2m 48s | 97.50% | 1.1 GB | 1.4 GB | 8.7 GB | 1.8 GB |\r\n| assemblies completeness               | 25m 57s                                    | 22m 36s | 2144.00% | 22.2 GB | 25 GB | 389.7 GB | 1.4 GB |\r\n\r\n\r\n# Workflow summaries\r\n\r\n## Metadata\r\n\r\n| Metadata field   | Pre-assembly quality control                                                      | Primary assembly   | Post-assembly quality control |\r\n| ---------------- | --------------------------------------------------------------------------------- | ------------------ | ----------------------------- |\r\n| Version          | 1.0                                                                               | 1.0                | 1.0                           |\r\n| Maturity         | Production                                                                        | Production         | production                    |\r\n| Creators         | Naga, Kenneth                                                                     | Naga, Kenneth      | Naga, Kenneth                 |\r\n| Source           | [AusARG/hifi-assembly-workflow](https://github.com/AusARG/hifi-assembly-workflow) |\r\n| License          |  MIT License                                                                       | MIT License         | MIT License                     |\r\n| Workflow manager | NextFlow                                                                          | NextFlow           | NextFlow                      |\r\n| Container        | No containers used                                                                | No containers used | No containers used            |\r\n| Install method   | Manual                                                                            | Manual             | Manual                        |\r\n\r\n\r\n## Component tools\r\n​\r\n| Workflow element                  | Workflow element version | Workflow title                |\r\n| --------------------------------- | ------------------------ | ----------------------------- |\r\n| Samtools, jellyfish, genomescope  | 1.0                      | Pre-assembly quality control  |\r\n| Improved phased assembler (pbipa) | 1.0                      | Primary assembly              |\r\n| Quast and busco                   | 1.0                      | Post-assembly quality control |\r\n\r\n\r\n## Required (minimum) inputs/parameters\r\n \r\nPATH to HIFI bam folder is the minimum requirement for the processing the pipeline.\r\n\r\n## Third party tools / dependencies\r\n\r\nThe following packages are used by the pipeline.\r\n\r\n- `nextflow/21.04.3`\r\n- `samtools/1.12`\r\n- `jellyfish/2.3.0`\r\n- `genomescope/2.0`\r\n- `ipa/1.3.1`\r\n- `quast/5.0.2`\r\n- `busco/5.2.2`\r\n\r\nThe following paths contain all modules required for the pipeline.\r\n\r\n- `/apps/Modules/modulefiles`\r\n- `/g/data/wz54/groupResources/modules`\r\n\r\n---\r\n\r\n# Help/FAQ/Troubleshooting\r\n\r\nDirect training and help is available if you are new to HPC and/or new to NCI/Gadi.\r\n\r\n- Basic information to get started with the NCI Gadi for bioinformatics can be found at https://github.com/AusARG/ABLeS/wiki/temppage.\r\n- For NCI support, contact the NCI helpdesk directly at https://www.nci.org.au/users/nci-helpdesk\r\n- Queue limits and structure explained at https://opus.nci.org.au/display/Help/4.+PBS+Jobs\r\n\r\n---\r\n\r\n# 3rd party Tutorials \r\n\r\nA tutorial by Andrew Severin on running GenomeScope 1.0 is available here:\r\nhttps://github.com/AusARG/hifi-assembly-workflow.git\r\n\r\nImproved Phased Assembler tutorial is available at \r\nhttps://github.com/PacificBiosciences/pbbioconda/wiki/Improved-Phased-Assembler\r\n\r\nBusco tutorial\r\nhttps://wurmlab.com/genomicscourse/2016-SIB/practicals/busco/busco_tutorial\r\n\r\n---\r\n\r\n# Licence(s)\r\n\r\nMIT License\r\n\r\nCopyright (c) 2022 AusARG\r\n\r\nPermission is hereby granted, free of charge, to any person obtaining a copy\r\nof this software and associated documentation files (the \"Software\"), to deal\r\nin the Software without restriction, including without limitation the rights\r\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\r\ncopies of the Software, and to permit persons to whom the Software is\r\nfurnished to do so, subject to the following conditions:\r\n\r\nThe above copyright notice and this permission notice shall be included in all\r\ncopies or substantial portions of the Software.\r\n\r\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\r\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\r\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\r\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r\nSOFTWARE.\r\n\r\n---\r\n\r\n# Acknowledgements/citations/credits\r\n\r\n\u003e Jung, H. et al. Twelve quick steps for genome assembly and annotation in the classroom. PLoS Comput. Biol. 16, 1–25 (2020).\r\n\r\n\u003e 2020, G. A. W. No Title. https://ucdavis-bioinformatics-training.github.io/2020-Genome_Assembly_Workshop/kmers/kmers.\r\n\r\n\u003e Sović, I. et al. Improved Phased Assembly using HiFi Data. (2020).\r\n\r\n\u003e Gurevich, A., Saveliev, V., Vyahhi, N. \u0026 Tesler, G. QUAST: Quality assessment tool for genome assemblies. Bioinformatics 29, 1072–1075 (2013).\r\n\r\n\u003e Waterhouse, R. M. et al. BUSCO applications from quality assessments to gene prediction and phylogenomics. Mol. Biol. Evol. 35, 543–548 (2018).\r\n\r\n---\r\n","organization":"AGRF BIO","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/340?version=1","name":"master @ d3d3d39","author":[],"descriptor_type":["NFL"]}]},{"id":"343","url":"https://workflowhub.eu/workflows/343","name":"MaCProQC","description":"A workflow for the quality assessment of mass spectrometry (MS) based proteomics analyses","organization":"Medizinisches Proteom-Center, Medical Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/343?version=1","name":"v1.0.0","author":["Julian Uszkoreit"],"descriptor_type":[]}]},{"id":"344","url":"https://workflowhub.eu/workflows/344","name":"Kallisto RNAseq Workflow","description":"### Workflow Kallisto RNAseq \r\n**(pseudoalignment on transcripts)**\r\n  - Workflow Illumina Quality: https://workflowhub.eu/workflows/336?version=1\t\r\n  - kallisto\r\n\r\n**All tool CWL files and other workflows can be found here:**\u003cbr\u003e\r\n  Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl\u003cbr\u003e\r\n  Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows\r\n\r\n**How to setup and use an UNLOCK workflow:**\u003cbr\u003e\r\nhttps://m-unlock.gitlab.io/docs/setup/setup.html\r\n\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/344?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst"],"descriptor_type":["CWL"]}]},{"id":"346","url":"https://workflowhub.eu/workflows/346","name":"QC of RADseq reads","description":"# workflow-qc-of-radseq-reads\r\n\r\nThese workflows are part of a set designed to work for RAD-seq data on the Galaxy platform, using the tools from the Stacks program. \r\n\r\nGalaxy Australia: https://usegalaxy.org.au/\r\n\r\nStacks: http://catchenlab.life.illinois.edu/stacks/\r\n\r\n## Inputs\r\n* demultiplexed reads in fastq format, in a collection\r\n* two adapter sequences in fasta format, for input into cutadapt\r\n\r\n## Steps and outputs\r\n\r\nThe workflow can be modified to suit your own parameters. \r\n\r\nThe workflow steps are:\r\n* Run FastQC to get statistics on the raw reads, send to MultiQC to create a nice output. This is tagged as \"Report 1\" in the Galaxy history. \r\n* Run Cutadapt on the reads to cut adapters - enter two files with adapter sequence at the workflow option for \"Choose file containing 3' adapters\". The default settings are on except that the \"Maximum error rate\" for the adapters is set to 0.2 instead of 0.1. Send output statistics to MulitQC, this is \"Report 2\" in the Galaxy history. Note that you may have different requirements here in terms of how many adapter sequences you want to enter. We recommend copying the workflow and modifying as needed. \r\n* Send these reads to fastp for additional filtering or trimming. Default settings are on but can be modified as needed. Send output statistics to MultiQC, this is \"Report 3\" in the Galaxy history. \r\n* The filtered and trimmed reads are then ready for the stacks workflows. \r\n\r\n![qc-wf](wf-image-qc.png)\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/346?version=1","name":"v1.0","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"347","url":"https://workflowhub.eu/workflows/347","name":"Stacks RAD-seq reference-guided workflow","description":"# workflow-ref-guided-stacks\r\n\r\nThese workflows are part of a set designed to work for RAD-seq data on the Galaxy platform, using the tools from the Stacks program. \r\n\r\nGalaxy Australia: https://usegalaxy.org.au/\r\n\r\nStacks: http://catchenlab.life.illinois.edu/stacks/\r\n\r\n## Inputs\r\n* demultiplexed reads in fastq format, may be output from the QC workflow. Files are in a collection. \r\n* population map in text format\r\n* reference genome in fasta format\r\n\r\n## Steps and outputs\r\n\r\nBWA MEM 2:\r\n* The reads are mapped to the reference genome; output in BAM format\r\n* The collection of bam files is named something like Map with BWA-MEM on collection 5 (mapped reads in BAM format)\r\n* Each of the bam files in the collection is named something like sample_CAAC\r\n\r\nSamtools stats before filtering:\r\n* These bam files are sent to Samtools stats to get statistics; these are then sent to MultiQC to provide a nice output. This is tagged as \"bam stats before filtering\" in the Galaxy history. \r\n* The \"General Statistics\" show how many reads were mapped - if there is a low mapping rate, it may be worth re-checking or repeating QC on the raw reads, or considering a different reference genome, or using a de novo approach. To see if many reads have been soft-clipped by Bwa mem (which may affect how well gstacks can work), look at the \"Alignment Metrics\" section, and the row with \"Mapped bases (Cigar)\". Hover over the dots to see sample names especially towards the left of the row - these have the least mapped reads.\r\n\r\nSamtools view:\r\n* This step filters out certain reads from the bam files. The default settings are to exclude reads if they are unmapped, if the alignment is not primary or is supplementary, if the read fails platform/vendor quality checks, and if the read is a PCR or optical duplicate. \r\n* The output bams are tagged with \"filtered bams\" in the Galaxy history.\r\n\r\nSamtools stats after filtering:\r\n* Filtered bams are sent again to samtools stats, and statistics to MultiQC, with the report tagged as \"bam stats after filtering\" in the Galaxy history. \r\n\r\ngstacks:\r\n* Filtered bams and a population map are sent to gstacks. The outputs are:\r\n* Catalog of loci in fasta format\r\n* Variant calls in VCF format\r\n* Note: some bam files cause errors here with gstacks. For example, the log file may say \"Error, all records discard with file SampleXYZ.FASTQ.bam, Aborted\". If this occurs, check the bam stats (as described above). Some of the options are to re-do QC on the raw reads, change settings for mapping reads in BWA MEM, and/or delete this sample/s from the population map and proceed to gstacks. \r\nThe sample can still remain in the list of bam files but gstacks will only consider what is listed in the pop map. \r\n\r\npopulations:\r\n* gstacks outputs and a population map are snet to the \"populations\" module. The outputs are:\r\n* Locus consensus sequences in fasta format\r\n* Snp calls, in VCF format\r\n* Haplotypes, in VCF format\r\n* Summary statistics\r\n\r\n![qc-wf](wf-ref-guided.png)\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/347?version=1","name":"v1.1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"348","url":"https://workflowhub.eu/workflows/348","name":"Stacks RAD-seq de novo workflow","description":"# workflow-denovo-stacks\r\n\r\nThese workflows are part of a set designed to work for RAD-seq data on the Galaxy platform, using the tools from the Stacks program. \r\n\r\nGalaxy Australia: https://usegalaxy.org.au/\r\n\r\nStacks: http://catchenlab.life.illinois.edu/stacks/\r\n\r\n## Inputs\r\n* demultiplexed reads in fastq format, may be output from the QC workflow. Files are in a collection. \r\n* population map in text format\r\n\r\n\r\n## Steps and outputs\r\n\r\nustacks:\r\n* input reads go to ustacks. \r\n* ustacks assembles the reads into matching stacks (hypothetical alleles). \r\n* The outputs are in a collection called something like: Stacks2: ustacks  on data 21, data 20, and others Loci and polymorphism. Click on this to see the files:\r\n* for each sample, assembled loci (tsv format), named e.g. sample_CAAC.tags\r\n* for each sample, model calls from each locus (tsv format), named e.g. sample_CAAC.snps\r\n* for each sample, haplotypes/alleles recorded from each locus (tsv format), named e.g. sample_CAAC.alleles\r\n* Please see sections 6.1 to 6.4 in https://catchenlab.life.illinois.edu/stacks/manual/#ufiles for a full description. \r\n\r\ncstacks:\r\n* cstacks will merge stacks into a catalog of consensus loci. \r\n* The outputs are in a collection called something like Stacks2: cstacks  on data 3, data 71, and others Catalog of loci. Click on this to see the three files, each in tsv format:\r\ncatalog.tags\r\ncatalog.snps\r\ncatalog.alleles\r\n\r\n\r\nsstacks:\r\n* sstacks will compare each sample to the loci in the catalog. \r\n* The outputs are in a collection called something like Stacks2: sstacks  on data 3, data 76, and others Matches to the catalog.Click on this to see the files:\r\nThere is one file for each sample, named e.g. sample_CAAC.matches, in tsv format. \r\n\r\ntsv2bam:\r\n* Conversion to BAM format\r\n* Reads from each sample are now aligned to each locus, and the tsv2bam tool will convert this into a bam file for each sample. \r\n* The outputs are in a collection called something like Stacks2: tsv2bam  on data 3, data 94, and others Matches to the catalog.Click on this to see the files:\r\nThere is one file for each sample, named e.g sample_CAAC.matches, in BAM format. \r\n\r\ngstacks:\r\n* Catalog of loci in fasta format\r\n* Variant calls in VCF format\r\n\r\npopulations:\r\n* Locus consensus sequences in fasta format\r\n* Snp calls, in VCF format\r\n* Haplotypes, in VCF format\r\n* Summary statistics\r\n\r\n![denovo](wf-denovo.png)\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/348?version=1","name":"v1.0","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"349","url":"https://workflowhub.eu/workflows/349","name":"Partial de novo workflow: ustacks only","description":"# workflow-partial-ustacks-only\r\n\r\n\r\nThese workflows are part of a set designed to work for RAD-seq data on the Galaxy platform, using the tools from the Stacks program. \r\n\r\nGalaxy Australia: https://usegalaxy.org.au/\r\n\r\nStacks: http://catchenlab.life.illinois.edu/stacks/\r\n\r\n\r\nFor the full de novo workflow see https://workflowhub.eu/workflows/348\r\n\r\nYou may want to run ustacks with different batches of samples. \r\n* To be able to combine these later, there are some necessary steps - we need to keep track of how many samples have already run in ustacks, so that new samples can be labelled with different identifying numbers.  \r\n* In ustacks, under \"Processing options\" there is an option called \"Start identifier at\". \r\n* The default for this is 1, which can be used for the first batch of samples. These will then be labelled as sample 1, sample 2 and so on. \r\n* For any new batches of samples to process in ustacks, we will want to start numbering these at the next available number. e.g. if there were 10 samples in batch 1, this should then be set to start at 11. \r\n\r\nTo combine multiple outputs from ustacks, providing these have been given appropriate starting identifiers:\r\n* Find the ustacks output in the Galaxy history. This will be a list of samples. \r\n* Click on the cross button next to the filename to delete, but select \"Collection only\". This releases the items from the list, but they will now be hidden in the Galaxy history.\r\n* In the history panel, click on \"hidden\" to reveal any hidden files. Unhide the samples. \r\n* Do this for all the batches of ustacks outputs that are needed. \r\n* Click on the tick button, tick all the samples needed, then \"For all selected\" choose \"Build dataset list\"\r\n* This is now a combined set of samples for input into cstacks. \r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/349?version=1","name":"v1.0","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"350","url":"https://workflowhub.eu/workflows/350","name":"Partial de novo workflow: c-s-g-pops only","description":"# workflow-partial-cstacks-sstacks-gstacks\r\n\r\nThese workflows are part of a set designed to work for RAD-seq data on the Galaxy platform, using the tools from the Stacks program. \r\n\r\nGalaxy Australia: https://usegalaxy.org.au/\r\n\r\nStacks: http://catchenlab.life.illinois.edu/stacks/\r\n\r\nThis workflow takes in ustacks output, and runs cstacks, sstacks and gstacks. \r\n\r\nTo generate ustacks output see https://workflowhub.eu/workflows/349\r\n\r\nFor the full de novo workflow see https://workflowhub.eu/workflows/348\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/350?version=1","name":"v1.0","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"351","url":"https://workflowhub.eu/workflows/351","name":"Partial ref-guided workflow - bwa mem only","description":"# workflow-partial-bwa-mem\r\n\r\nThese workflows are part of a set designed to work for RAD-seq data on the Galaxy platform, using the tools from the Stacks program. \r\n\r\nGalaxy Australia: https://usegalaxy.org.au/\r\n\r\nStacks: http://catchenlab.life.illinois.edu/stacks/\r\n\r\nThis workflow is part of the reference-guided stacks workflow, https://workflowhub.eu/workflows/347\r\n\r\nInputs\r\n* demultiplexed reads in fastq format, may be output from the QC workflow. Files are in a collection. \r\n* reference genome in fasta format\r\n\r\nOutputs\r\n* A set of filtered bam files, ready for the next part of the stacks workflow (e.g. gstacks). \r\n* Statistics on the bam files. \r\n\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/351?version=1","name":"v1.0","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"352","url":"https://workflowhub.eu/workflows/352","name":"Partial ref-guided workflow - gstacks and pops","description":"# workflow-partial-gstacks-populations\r\n\r\nThese workflows are part of a set designed to work for RAD-seq data on the Galaxy platform, using the tools from the Stacks program. \r\n\r\nGalaxy Australia: https://usegalaxy.org.au/\r\n\r\nStacks: http://catchenlab.life.illinois.edu/stacks/\r\n\r\nThis workflow is part of the reference-guided stacks workflow, https://workflowhub.eu/workflows/347\r\n\r\n This workflow takes in bam files and a population map. \r\n\r\nTo generate bam files see: https://workflowhub.eu/workflows/351\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/352?version=1","name":"v1.0","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"353","url":"https://workflowhub.eu/workflows/353","name":"Generic variation analysis on WGS PE data","description":"# Generic variant calling\r\n\r\n\r\nA generic workflow for identification of variants in a haploid genome such as genomes of bacteria or viruses. It can be readily used on MonkeyPox. The workflow accepts two inputs:\r\n\r\n- A genbank file with the reference genomes\r\n- A collection of paired fastqsanger files\r\n\r\nThe workflow outputs a collection of VCF files for each sample (each fastq pair). These VCF files serve as input to the [Reporting workflow](https://workflowhub.eu/workflows/354). \r\n\r\nWorkflow can be accessed directly on [usegalaxy.org](https://usegalaxy.org/u/aun1/w/generic-variation-analysis-on-wgs-pe-data)\r\n\r\nThe general idea of the workflow is:\r\n\r\n![](https://i.imgur.com/rk40Y4t.png)","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/353?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"354","url":"https://workflowhub.eu/workflows/354","name":"Generic variation analysis reporting","description":"# Generic variation analysis reporting\r\n\r\nThis workflow generates reports from a list of variants generated by [Variant Calling Workflow](https://workflowhub.eu/workflows/353). \r\n\r\nThe workflow accepts a single input:\r\n\r\n- A collection of VCF files\r\n\r\nThe workflow produces two outputs (format description below):\r\n\r\n1. A list of variants grouped by Sample\r\n2. A list of variants grouped by Variant\r\n\r\nHere is example of output **by sample**. In this table all varinats in all samples are epxlicitrly listed:\r\n\r\n| Sample\t| POS\t| FILTER\t| REF\t| ALT\t| DP\t| AF\t| AFcaller\t| SB\t| DP4\t| IMPACT | FUNCLASS\t| EFFECT\t| GENE\t| CODON\t| AA\t| TRID\t| min(AF)\t| max(AF)\t| countunique(change)\t| countunique(FUNCLASS)\t| change |\r\n|----------|------|----------|---------|-----|-----|------|-----------|-----|-------|----------|---------------|-------------|--------|-------------| ---|--------|----------|-----------|-------------------------|------------------------------|------------|\r\n| ERR3485786\t| 11644\t| PASS\t| A\t| G\t| 97\t| 0.979381\t| 0.907216\t| 0\t| 1,1,49,46\t| LOW\t| SILENT\t| SYNONYMOUS_CODING\t| D7L\t| tgT/tgC\t| C512\t| AKG51361.1\t| 0.979381\t| 1\t| 1\t| 1\t| A\u003eG |\r\n| ERR3485786\t| 11904\t| PASS\t| T\t| C\t| 102\t| 0.990196\t| 0.95098\t| 0\t| 0,0,51,50\t| MODERATE\t| MISSENSE\t| NON_SYNONYMOUS_CODING\t| D7L\t| Act/Gct\t| T426A\t| AKG51361.1\t| 0.990196\t| 1\t| 1\t| 1\t| T\u003eC |\r\n\r\n\u003e **Note** the two alernative allele frequency fields: \"AFcaller\" ans \"AF\". LoFreq reports AF values listed in \"AFcaller\". They incorrect due to the known LoFreq [bug](https://github.com/CSB5/lofreq/issues/80). To correct for this we are recomputing AF values from DP4 and DP fields as follows: `AF == (DP4[2] + DP4[3]) / DP.`\r\n\r\nHere is an example of output **by variant**. In this table data is aggregated by variant across all samples in which this variant is present:\r\n\r\n| POS\t| REF\t| ALT\t| IMPACT\t| FUNCLASS\t| EFFECT\t| GENE\t| CODON\t| AA\t| TRID\t| countunique(Sample)\t| min(AF)\t| max(AF)\t| SAMPLES(above-thresholds)\t| SAMPLES(all)\t| AFs(all)\t| change |\r\n|-----|-------|-----|-----------|----------------|------------|----------|-----------|------|--------|------------------------|----------|-----------|------------------------------------|------------------|----------|---------|\r\n| 11644\t| A\t| G\t| LOW\t| SILENT\t| SYNONYMOUS_CODING\t| D7L\t| tgT/tgC\t| C512\t| AKG51361.1\t| 11\t| 0.979381\t| 1\t| ERR3485786,ERR3485787... | \tERR3485786,ERR3485787,ERR3485789 ... \t| 0.979381,1.0...\t| A\u003eG |\r\n| 11904\t| T\t| C\t| MODERATE\t| MISSENSE\t| NON_SYNONYMOUS_CODING\t| D7L\t| Act/Gct\t| T426A\t| AKG51361.1\t| 12\t| 0.990196\t| 1\t| ERR3485786,ERR3485787... | \tERR3485786,ERR3485787,ERR3485789... | \t0.990196,1.0,1.0... | \tT\u003eC | \r\n\r\nThe workflow can be accessed at [usegalaxy.org](https://usegalaxy.org/u/aun1/w/genetic-variation-analysis-reporting)\r\n\r\nThe general idea of the workflow is:\r\n\r\n![](https://i.imgur.com/k2cIZK5.png)\r\n\r\n","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/354?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"356","url":"https://workflowhub.eu/workflows/356","name":"Generic consensus construction from VCF calls","description":"# Generic consensus building\r\n\r\nThis workflow generates consensus sequences using a list of variants generated by [Variant Calling Workflow](https://workflowhub.eu/workflows/353). \r\n\r\nThe workflow accepts a single input:\r\n\r\n- A collection of VCF files\r\n\r\nThe workflow produces a single output:\r\n\r\n- Consensus sequence for each input VCF file\r\n\r\nThe workflow can be accessed at [usegalaxy.org](https://usegalaxy.org/u/aun1/w/consensus-construction)","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/356?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"357","url":"https://workflowhub.eu/workflows/357","name":"generic-variant-calling-wgs-pe/main","description":"Generic variation analysis on WGS PE data\n-------------------------------------------\n\nThis workflows performs paired end read mapping with bwa-mem followed by\nsensitive variant calling across a wide range of AFs with lofreq and variant\nannotation with snpEff. The reference genome can be provided as a GenBank file.\n","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/357?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"358","url":"https://workflowhub.eu/workflows/358","name":"preparing genomic data for phylogeny recostruction (GTN)","description":"This workflow begins from a set of genome assemblies of different samples, strains, species. The genome is first annotated with Funnanotate. Predicted proteins are furtner annotated with Busco. Next, 'ProteinOrtho' finds orthologs across the samples and makes orthogroups. Orthogroups where all samples are represented are extracted. Orthologs in each orthogroup are aligned with ClustalW. Test dataset: https://zenodo.org/record/6610704#.Ypn3FzlBw5k","organization":"usegalaxy.be workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/358?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"359","url":"https://workflowhub.eu/workflows/359","name":"ML phylogenetic reconstruction","description":"Phylogenetic reconstruction using genome-wide and single-gene alignment data. Here we use maximum likelihood reconstruction program IQTree. \r\nData can be prepared using the [phylogenetic data preparation workflow](http://workflowhub.eu/workflows/358) prior to phylogenetic reconstruction.\r\nResulting trees can be viewed interactively using Galaxy's 'Phyloviz' or 'Phylogenetic Tree Visualization'","organization":"usegalaxy.be workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/359?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"360","url":"https://workflowhub.eu/workflows/360","name":"MGnify - assembly analysis pipeline","description":"MGnify (http://www.ebi.ac.uk/metagenomics) provides a free to use platform for the assembly, analysis and archiving of microbiome data derived from sequencing microbial populations that are present in particular environments. Over the past 2 years, MGnify (formerly EBI Metagenomics) has more than doubled the number of publicly available analysed datasets held within the resource. Recently, an updated approach to data analysis has been unveiled (version 5.0), replacing the previous single pipeline with multiple analysis pipelines that are tailored according to the input data, and that are formally described using the Common Workflow Language, enabling greater provenance, reusability, and reproducibility. MGnify's new analysis pipelines offer additional approaches for taxonomic assertions based on ribosomal internal transcribed spacer regions (ITS1/2) and expanded protein functional annotations. Biochemical pathways and systems predictions have also been added for assembled contigs. MGnify's growing focus on the assembly of metagenomic data has also seen the number of datasets it has assembled and analysed increase six-fold. The non-redundant protein database constructed from the proteins encoded by these assemblies now exceeds 1 billion sequences. Meanwhile, a newly developed contig viewer provides fine-grained visualisation of the assembled contigs and their enriched annotations.\r\n\r\nDocumentation: https://docs.mgnify.org/en/latest/analysis.html#assembly-analysis-pipeline\r\n","organization":"HoloFood at MGnify, MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/360?version=1","name":"master @ 981aafc","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/360?version=2","name":"v5.0.7","author":[],"descriptor_type":["CWL"]}]},{"id":"361","url":"https://workflowhub.eu/workflows/361","name":"MGnify - amplicon analysis pipeline","description":"MGnify (http://www.ebi.ac.uk/metagenomics) provides a free to use platform for the assembly, analysis and archiving of microbiome data derived from sequencing microbial populations that are present in particular environments. Over the past 2 years, MGnify (formerly EBI Metagenomics) has more than doubled the number of publicly available analysed datasets held within the resource. Recently, an updated approach to data analysis has been unveiled (version 5.0), replacing the previous single pipeline with multiple analysis pipelines that are tailored according to the input data, and that are formally described using the Common Workflow Language, enabling greater provenance, reusability, and reproducibility. MGnify's new analysis pipelines offer additional approaches for taxonomic assertions based on ribosomal internal transcribed spacer regions (ITS1/2) and expanded protein functional annotations. Biochemical pathways and systems predictions have also been added for assembled contigs. MGnify's growing focus on the assembly of metagenomic data has also seen the number of datasets it has assembled and analysed increase six-fold. The non-redundant protein database constructed from the proteins encoded by these assemblies now exceeds 1 billion sequences. Meanwhile, a newly developed contig viewer provides fine-grained visualisation of the assembled contigs and their enriched annotations.\r\n\r\nDocumentation: https://docs.mgnify.org/en/latest/analysis.html#amplicon-analysis-pipeline\r\n","organization":"MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/361?version=1","name":"v5.0.7","author":[],"descriptor_type":["CWL"]}]},{"id":"362","url":"https://workflowhub.eu/workflows/362","name":"MGnify - raw-reads analysis pipeline","description":"MGnify (http://www.ebi.ac.uk/metagenomics) provides a free to use platform for the assembly, analysis and archiving of microbiome data derived from sequencing microbial populations that are present in particular environments. Over the past 2 years, MGnify (formerly EBI Metagenomics) has more than doubled the number of publicly available analysed datasets held within the resource. Recently, an updated approach to data analysis has been unveiled (version 5.0), replacing the previous single pipeline with multiple analysis pipelines that are tailored according to the input data, and that are formally described using the Common Workflow Language, enabling greater provenance, reusability, and reproducibility. MGnify's new analysis pipelines offer additional approaches for taxonomic assertions based on ribosomal internal transcribed spacer regions (ITS1/2) and expanded protein functional annotations. Biochemical pathways and systems predictions have also been added for assembled contigs. MGnify's growing focus on the assembly of metagenomic data has also seen the number of datasets it has assembled and analysed increase six-fold. The non-redundant protein database constructed from the proteins encoded by these assemblies now exceeds 1 billion sequences. Meanwhile, a newly developed contig viewer provides fine-grained visualisation of the assembled contigs and their enriched annotations.\r\n\r\nDocumentation: https://docs.mgnify.org/en/latest/analysis.html#raw-reads-analysis-pipeline","organization":"MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/362?version=1","name":"v5.0.7","author":["Ekaterina Sakharova","Varsha Kale","Martin Beracochea"],"descriptor_type":["CWL"]}]},{"id":"363","url":"https://workflowhub.eu/workflows/363","name":"Trio Analysis","description":"To discover causal mutations of inherited diseases it’s common practice to do a trio analysis. In a trio analysis DNA is sequenced of both the patient and parents. Using this method, it’s possible to identify multiple inheritance patterns. Some examples of these patterns are autosomal recessive, autosomal dominant, and de-novo variants, which are represented in the figure below. To elaborate, the most left tree shows an autosomal dominant inhertitance pattern where the offspring inherits a faulty copy of the gene from one of the parents.\r\n\r\nTo discover these mutations either whole exome sequencing (WES) or whole genome sequencing (WGS) can be used. With these technologies it is possible to uncover the DNA of the parents and offspring to find (shared) mutations in the DNA. These mutations can include insertions/deletions (indels), loss of heterozygosity (LOH), single nucleotide variants (SNVs), copy number variations (CNVs), and fusion genes.\r\n\r\nIn this workflow  we will also make use of the HTSGET protocol, which is a program to download our data securely and savely. This protocol has been implemented in the EGA Download Client Tool: toolshed.g2.bx.psu.edu/repos/iuc/ega_download_client/pyega3/4.0.0+galaxy0 tool, so we don’t have to leave Galaxy to retrieve our data.\r\n\r\nWe will not start our analysis from scratch, since the main goal of this tutorial is to use the HTSGET protocol to download variant information from an online archive and to find the causative variant from those variants. If you want to learn how to do the analysis from scratch, using the raw reads, you can have a look at the Exome sequencing data analysis for diagnosing a genetic disease tutorial.","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/363?version=1","name":"Version 1","author":["Jasper Ouwerkerk"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/363?version=2","name":"Version 2","author":["Jasper Ouwerkerk"],"descriptor_type":["GALAXY"]}]},{"id":"364","url":"https://workflowhub.eu/workflows/364","name":"Workflow for running the Community Earth System Model in fully coupled mode","description":"This workflow demonstrates the usage of the [Community Earth System Model](https://www.cesm.ucar.edu/) on Galaxy Europe. \r\n\r\nA fully coupled B1850 compset with resolution f19_g17 is run for 1 month.\r\n\r\n![](https://nordicesmhub.github.io/GEO4962/fig/newcase.png)","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/364?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"365","url":"https://workflowhub.eu/workflows/365","name":"VGP-meryldb-creation-trio/main","description":"# VGP Workflow #1\n\nThis workflow collects the metrics on the properties of the genome under consideration by analyzing the k-mer frequencies. It provides information about the genomic complexity, such as the genome size and levels of heterozygosity and repeat content, as well about the data quality. It uses reads from two parental genomes to partition long reads from the offspring into haplotype-specific k-mer databases.\n\n### Inputs\n\n-   Collection of Hifi long reads in FASTQ format\n-   Paternal short-read Illumina sequencing reads in FASTQ format\n-   Maternal short-read Illumina sequencing reads in FASTQ format\n\n### Outputs\n\n-   Meryl databases of k-mer counts\n    - Child\n    - Paternal haplotype\n    - Maternal haplotype\n-   GenomeScope metrics of child and parental genomes\n    -   Linear plot\n    -   Log plot\n    -   Transformed linear plot\n    -   Transformed log plot\n    -   Summary\n    -   Model\n    -   Model parameteres","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/365?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"366","url":"https://workflowhub.eu/workflows/366","name":"VGP-meryldb-creation/main","description":"# VGP Workflow #1\n\nThis workflow produces a Meryl database and Genomescope outputs that will be used to determine parameters for following workflows, and assess the quality of genome assemblies. Specifically, it provides information about the genomic complexity, such as the genome size and levels of heterozygosity and repeat content, as well about the data quality.\n\n### Inputs\n\n-   Collection of Hifi long reads in FASTQ format\n\n### Outputs\n\n-   Meryl Database of kmer counts\n-   GenomeScope\n    -   Linear plot\n    -   Log plot\n    -   Transformed linear plot\n    -   Transformed log plot\n    -   Summary\n    -   Model\n    -   Model parameteres","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/366?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"367","url":"https://workflowhub.eu/workflows/367","name":"(Hybrid) Metagenomics workflow","description":"**Workflow (hybrid) metagenomic assembly and binning**\u003cbr\u003e\r\n  - Workflow Illumina Quality: \r\n    - Sequali (control)\r\n    - hostile contamination filter\r\n    - fastp (quality trimming)\r\n  - Workflow Longread Quality:\t\r\n    - NanoPlot (control)\r\n    - fastplong (quality trimming)\r\n    - hostile contamination filter\r\n  - Kraken2 taxonomic classification of FASTQ reads\r\n  - SPAdes/Flye (Assembly)\r\n  - Medaka/PyPolCA (Assembly polishing)\r\n  - QUAST (Assembly quality report)\r\n\r\n  (optional)\r\n  - Workflow binnning\r\n    - Metabat2/MaxBin2/SemiBin\r\n    - Binette\r\n    - BUSCO\r\n    - GTDB-Tk\r\n\r\n  (optional)\r\n  - Workflow Genome-scale metabolic models https://workflowhub.eu/workflows/372\r\n    - CarveMe (GEM generation)\r\n    - MEMOTE (GEM test suite)\r\n    - SMETANA (Species METabolic interaction ANAlysis)\r\n\r\nOther UNLOCK workflows on WorkflowHub: https://workflowhub.eu/projects/16/workflows?view=default\u003cbr\u003e\u003cbr\u003e\r\n\r\n**All tool CWL files and other workflows can be found here:**\u003cbr\u003e\r\n  https://gitlab.com/m-unlock/cwl/ \u003cbr\u003e\r\n\r\n**How to setup and use an UNLOCK workflow:**\u003cbr\u003e\r\nhttps://docs.m-unlock.nl/docs/workflows/setup.html\u003cbr\u003e\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/367?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst","Changlin Ke"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/367?version=2","name":"WFP","author":["Bart Nijsse","Jasper Koehorst","Changlin Ke"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/367?version=3","name":"Version 2","author":["Bart Nijsse","Jasper Koehorst","Changlin Ke"],"descriptor_type":["CWL"]}]},{"id":"372","url":"https://workflowhub.eu/workflows/372","name":"Metagenomic GEMs from Assembly","description":"### Workflow for Metagenomics from bins to metabolic models (GEMs)\r\n\r\n**Summary**\r\n  - Prodigal gene prediction\r\n  - CarveMe genome scale metabolic model reconstruction\r\n  - MEMOTE for metabolic model testing\r\n  - SMETANA Species METabolic interaction ANAlysis\r\n\r\nOther UNLOCK workflows on WorkflowHub: https://workflowhub.eu/projects/16/workflows?view=default\u003cbr\u003e\r\n\r\n**All tool CWL files and other workflows can be found here:**\u003cbr\u003e\r\nTools: https://gitlab.com/m-unlock/cwl\u003cbr\u003e\r\nWorkflows: https://gitlab.com/m-unlock/cwl/workflows\r\n\r\n**How to setup and use an UNLOCK workflow:**\u003cbr\u003e\r\nhttps://m-unlock.gitlab.io/docs/setup/setup.html\u003cbr\u003e\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/372?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst"],"descriptor_type":["CWL"]}]},{"id":"373","url":"https://workflowhub.eu/workflows/373","name":"De novo digitisation","description":"An example workflow to allow users to run the Specimen Data Refinery tools on data provided in an input CSV file.","organization":"Specimen Data Refinery","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/373?version=1","name":"Version 1","author":["Paul Brack","Oliver Woolland","Laurence Livermore"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/373?version=2","name":"Version 2","author":["Paul Brack","Oliver Woolland","Laurence Livermore"],"descriptor_type":["GALAXY"]}]},{"id":"374","url":"https://workflowhub.eu/workflows/374","name":"DLA-Collections-test","description":"An example workflow for the Specimen Data Refinery tool, allowing an individual tool to be used","organization":"Specimen Data Refinery","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/374?version=1","name":"Version 1","author":["Laurence Livermore","Oliver Woolland"],"descriptor_type":["GALAXY"]}]},{"id":"375","url":"https://workflowhub.eu/workflows/375","name":"HTR-Collections-test","description":"An example workflow for the Specimen Data Refinery tool, allowing an individual tool to be used","organization":"Specimen Data Refinery","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/375?version=1","name":"Version 1","author":["Laurence Livermore","Oliver Woolland"],"descriptor_type":["GALAXY"]}]},{"id":"376","url":"https://workflowhub.eu/workflows/376","name":"ABR Threshold Detection","description":"# ABR\\_Threshold_Detection\r\n\r\n## What is this?\r\n\r\nThis code can be used to automatically determine hearing thresholds from ABR hearing curves. \r\n\r\nOne of the following methods can be used for this purpose:\r\n \r\n+ neural network (NN) training, \r\n+ calibration of a self-supervised sound level regression (SLR) method \r\n\r\non given data sets with manually determined hearing thresholds.\r\n\r\n## Installation:\r\n\r\nRun inside the [src](./src) directory:\r\n\r\n### Installation as python package\r\n\r\n```\r\npip install -e ./src        (Installation as python package)\r\n```\r\n\r\n### Installation as conda virtual environment\r\n```\r\nconda create -n abr_threshold_detection python=3.7\r\nconda activate abr_threshold_detection\r\nconda install pip\r\npip install -e ./src\r\n```\r\n\r\n## Usage:\r\nData files can be downloaded here: [https://zenodo.org/deposit/5779876](https://zenodo.org/deposit/5779876).\r\n\r\nFor the Jupyter Notebooks (see the [`notebooks`](./notebooks) directory) to run, the path to the data has to be defined. For this, see the corresponding documentation of the respective notebooks.\r\n\r\n### Using NNs (`./src/ABR_ThresholdFinder_NN`)\r\n\r\nThe neural network models were trained in `./src/notebooks/GMCtrained_NN*_training.ipynb` with GMC data and in `./src/notebooks/INGtrained_NN*_training.ipynb` with ING data.\r\n\r\n```\r\nimport ABR_ThresholdFinder_NN.data_preparation as dataprep\r\nfrom ABR_ThresholdFinder_NN.models import create_model_1, compile_model_1\r\n```\r\nFor automatic threshold detection based on NNs, `GMCtrained_NN_threshold_detection.ipynb` and `INGtrained_NN_threshold_detection.ipynb` in `./src/notebooks` can be used.\r\n\r\n```\r\nimport ABR_ThresholdFinder_NN.data_preparation as dataprep\r\nimport ABR_ThresholdFinder_NN.thresholder as abrthr\r\n```\r\n\r\n### Using the SLR method (`./src/ABR_ThresholdFinder_SLR`)\r\n\r\nIn `./src/notebooks/GMCcalibrated_SLR_threshold_detection.ipynb` and `./src/notebooks/INGcalibrated_SLR_threshold_detection.ipynb` it is shown how to use the module to:\r\n\r\n+ train a threshold detector on a data set and estimate the thresholds\r\n+ save a trained model\r\n+ load a model\r\n+ apply a trained threshold estimator to a data set\r\n+ evaluate thresholds by comparing it to a ground truth\r\n+ evaluate thresholds by analysing signal averages\r\n\r\n```\r\nimport pandas as pd\r\nimport numpy as np\r\n\r\nfrom ABR_ThresholdFinder_SLR import ABR_Threshold_Detector_multi_stimulus\r\nfrom ABR_ThresholdFinder_SLR.evaluations import evaluate_classification_against_ground_truth, plot_evaluation_curve_for_specific_stimulus\r\n```\r\n\r\n##### Evaluate thresholds by comparing it with a 'ground truth' (a human set threshold in this case)\r\n\r\nFor example:\r\n\r\n```\r\n# 5dB buffer\r\nevaluation = evaluate_classification_against_ground_truth(GMC_data2, 5, \r\n                                 frequency = 'frequency',\r\n                                 mouse_id = 'mouse_id',\r\n                                 sound_level = 'sound_level',\r\n                                 threshold_estimated = 'slr_estimated_thr',\r\n                                 threshold_ground_truth = 'threshold')\r\n```     \r\n### Compute and plot evaluation curves that allow to judge the quality of a thresholding\r\n\r\nFour threshold types are evaluated and compared:\r\n\r\n+ the threshols predicted with neural networks ('threshold NN')\r\n+ the thresholds estimated by a sound level regression method ('threshold SLR')\r\n+ the human ground truth ('threshold manual')\r\n+ a constant threshold ('50')\r\n\r\nFor more details, please see `Evaluation_of_ML_detected_thresholds.ipynb` in `./src/notebooks`.\r\n\r\n## Folder structure:\r\n\r\n### [`data`](./data)\r\nContains the preprocessed ABR and mouse phenotyping datasets from GMC and Ingham et al. in csv format, as well as the mouse ID distributions stored as numpy arrays for neural networks training, validation and testing.\r\n\r\n### [`models`](./models)\r\nContains the trained models of the two neural networks and the SLR method, but also the predictions of the first neural network with which the second neural network was fed.\r\n\r\n### [`models_cross-validation`](./models_cross-validation)\r\nContains the models that resulted from the cross-validation of the neural networks.\r\n\r\n### [`notebooks`](./notebooks)\r\nContains the Jupyter notebooks used for training, testing and evaluation of the neural networks and the SLR method, as well as those used for the hearing curve analysis.\r\n\r\n### [`notebooks_reports`](./notebooks_reports)\r\nContains the contents of Jupyter notebooks in html format.\r\n\r\n### [`results`](./results)\r\nContains the predictions or estimates made by the neural networks or the SLR method for the two data sets from GMC and Ingham et al. but also all the plots made to analyse the results.\r\n\r\n### [`src`](./src)\r\nContains the Python scripts used in the Jupyter notebooks.","organization":"Applied Computational Biology at IEG/HMGU","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/376?version=1","name":"master @ edbcd7b","author":[],"descriptor_type":[]}]},{"id":"379","url":"https://workflowhub.eu/workflows/379","name":"Lysozyme in Water COMPSs workflow","description":"Lysozyme in Water simplest version, from COMPSs Tutorial. The original idea of this worklfow comes from http://www.mdtutorials.com/gmx/lysozyme/index.html","organization":"Cluster Emergent del Cervell Humà, Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/379?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"380","url":"https://workflowhub.eu/workflows/380","name":"Air Quality Prediction Prototype","description":"A prototype implementation of the Air Quality Prediction pipeline in Galaxy, using CWL tools.","organization":"Air Quality Prediction","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/380?version=1","name":"Version 1","author":["Oliver Woolland","Douglas Lowe"],"descriptor_type":["GALAXY"]}]},{"id":"382","url":"https://workflowhub.eu/workflows/382","name":"dna-seq-varlociraptor workflow","description":"# Snakemake workflow: dna-seq-varlociraptor\r\n\r\n[![Snakemake](https://img.shields.io/badge/snakemake-≥6.3.0-brightgreen.svg)](https://snakemake.github.io)\r\n[![GitHub actions status](https://github.com/snakemake-workflows/dna-seq-varlociraptor/workflows/Tests/badge.svg?branch=master)](https://github.com/snakemake-workflows/dna-seq-varlociraptor/actions?query=branch%3Amaster+workflow%3ATests)\r\n[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4675661.svg)](https://doi.org/10.5281/zenodo.4675661)\r\n\r\n\r\nA Snakemake workflow for calling small and structural variants under any kind of scenario (tumor/normal, tumor/normal/relapse, germline, pedigree, populations) via the unified statistical model of [Varlociraptor](https://varlociraptor.github.io).\r\n\r\n\r\n## Usage\r\n\r\nThe usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?usage=snakemake-workflows%2Fdna-seq-varlociraptor).\r\n\r\nIf you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this (original) repository and its DOI (see above).\r\n","organization":"Snakemake-Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/382?version=1","name":"master @ e6b98b8","author":["Johannes Köster"],"descriptor_type":["SMK"]}]},{"id":"383","url":"https://workflowhub.eu/workflows/383","name":"CNV_pipeline","description":"# StructuralVariants Workflow\r\n","organization":"TransBioNet","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/383?version=1","name":"1.1.0","author":["Laura Rodriguez-Navas"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/383?version=2","name":"1.1.1","author":["Laura Rodriguez-Navas"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/383?version=3","name":"1.1.3","author":["Laura Rodriguez-Navas"],"descriptor_type":["NFL"]}]},{"id":"384","url":"https://workflowhub.eu/workflows/384","name":"A workflow for marine Genomic Observatories data analysis","description":"# metaGOflow: A workflow for marine Genomic Observatories' data analysis\r\n\r\n![logo](https://raw.githubusercontent.com/hariszaf/metaGOflow-use-case/gh-pages/assets/img/metaGOflow_logo_italics.png)\r\n\r\n\r\n## An EOSC-Life project\r\n\r\nThe workflows developed in the framework of this project are based on `pipeline-v5` of the MGnify resource.\r\n\r\n\u003e This branch is a child of the [`pipeline_5.1`](https://github.com/hariszaf/pipeline-v5/tree/pipeline_5.1) branch\r\n\u003e that contains all CWL descriptions of the MGnify pipeline version 5.1.\r\n\r\n## Dependencies\r\n\r\nTo run metaGOflow you need to make sure you have the following set on your computing environmnet first:\r\n\r\n- python3 [v 3.8+]\r\n- [Docker](https://www.docker.com) [v 19.+] or [Singularity](https://apptainer.org) [v 3.7.+]/[Apptainer](https://apptainer.org) [v 1.+]\r\n- [cwltool](https://github.com/common-workflow-language/cwltool) [v 3.+]\r\n- [rdflib](https://rdflib.readthedocs.io/en/stable/) [v 6.+]\r\n- [rdflib-jsonld](https://pypi.org/project/rdflib-jsonld/) [v 0.6.2]\r\n- [ro-crate-py](https://github.com/ResearchObject/ro-crate-py) [v 0.7.0]\r\n- [pyyaml](https://pypi.org/project/PyYAML/) [v 6.0]\r\n- [Node.js](https://nodejs.org/) [v 10.24.0+]\r\n- Available storage ~235GB for databases\r\n\r\n### Storage while running\r\n\r\nDepending on the analysis you are about to run, disk requirements vary.\r\nIndicatively, you may have a look at the metaGOflow publication for computing resources used in various cases.\r\n\r\n## Installation\r\n\r\n### Get the EOSC-Life marine GOs workflow\r\n\r\n```bash\r\ngit clone https://github.com/emo-bon/MetaGOflow\r\ncd MetaGOflow\r\n```\r\n\r\n### Download necessary databases (~235GB)\r\n\r\nYou can download databases for the EOSC-Life GOs workflow by running the\r\n`download_dbs.sh` script under the `Installation` folder.\r\n\r\n```bash\r\nbash Installation/download_dbs.sh -f [Output Directory e.g. ref-dbs] \r\n```\r\nIf you have one or more already in your system, then create a symbolic link pointing\r\nat the `ref-dbs` folder or at one of its subfolders/files.\r\n\r\nThe final structure of the DB directory should be like the following:\r\n\r\n````bash\r\nuser@server:~/MetaGOflow: ls ref-dbs/\r\ndb_kofam/  diamond/  eggnog/  GO-slim/  interproscan-5.57-90.0/  kegg_pathways/  kofam_ko_desc.tsv  Rfam/  silva_lsu/  silva_ssu/\r\n````\r\n\r\n## How to run\r\n\r\n### Ensure that `Node.js` is installed on your system before running metaGOflow\r\n\r\nIf you have root access on your system, you can run the commands below to install it:\r\n\r\n##### DEBIAN/UBUNTU\r\n```bash\r\nsudo apt-get update -y\r\nsudo apt-get install -y nodejs\r\n```\r\n\r\n##### RH/CentOS\r\n```bash\r\nsudo yum install rh-nodejs\u003cstream version\u003e (e.g. rh-nodejs10)\r\n```\r\n\r\n### Set up the environment\r\n\r\n#### Run once - Setup environment\r\n\r\n- ```bash\r\n  conda create -n EOSC-CWL python=3.8\r\n  ```\r\n\r\n- ```bash\r\n  conda activate EOSC-CWL\r\n  ```\r\n\r\n- ```bash\r\n  pip install cwlref-runner cwltool[all] rdflib-jsonld rocrate pyyaml\r\n\r\n  ```\r\n\r\n#### Run every time\r\n\r\n```bash\r\nconda activate EOSC-CWL\r\n``` \r\n\r\n### Run the workflow\r\n\r\n- Edit the `config.yml` file to set the parameter values of your choice. For selecting all the steps, then set to `true` the variables in lines [2-6].\r\n\r\n#### Using Singularity\r\n\r\n##### Standalone\r\n- run:\r\n   ```bash\r\n   ./run_wf.sh -s -n osd-short -d short-test-case -f test_input/wgs-paired-SRR1620013_1.fastq.gz -r test_input/wgs-paired-SRR1620013_2.fastq.gz\r\n   ``\r\n\r\n##### Using a cluster with a queueing system (e.g. SLURM)\r\n\r\n- Create a job file (e.g., SBATCH file)\r\n\r\n- Enable Singularity, e.g. module load Singularity \u0026 all other dependencies \r\n\r\n- Add the run line to the job file\r\n\r\n\r\n#### Using Docker\r\n\r\n##### Standalone\r\n- run:\r\n    ``` bash\r\n    ./run_wf.sh -n osd-short -d short-test-case -f test_input/wgs-paired-SRR1620013_1.fastq.gz -r test_input/wgs-paired-SRR1620013_2.fastq.gz\r\n  ```\r\n  HINT: If you are using Docker, you may need to run the above command without the `-s' flag.\r\n\r\n## Testing samples\r\nThe samples are available in the `test_input` folder.\r\n\r\nWe provide metaGOflow with partial samples from the Human Metagenome Project ([SRR1620013](https://www.ebi.ac.uk/ena/browser/view/SRR1620013) and [SRR1620014](https://www.ebi.ac.uk/ena/browser/view/SRR1620014))\r\nThey are partial as only a small part of their sequences have been kept, in terms for the pipeline to test in a fast way. \r\n\r\n\r\n## Hints and tips\r\n\r\n1. In case you are using Docker, it is strongly recommended to **avoid** installing it through `snap`.\r\n\r\n2. `RuntimeError`: slurm currently does not support shared caching, because it does not support cleaning up a worker\r\n   after the last job finishes.\r\n   Set the `--disableCaching` flag if you want to use this batch system.\r\n\r\n3. In case you are having errors like:\r\n\r\n```\r\ncwltool.errors.WorkflowException: Singularity is not available for this tool\r\n```\r\n\r\nYou may run the following command:\r\n\r\n```\r\nsingularity pull --force --name debian:stable-slim.sif docker://debian:stable-sli\r\n```\r\n\r\n## Contribution\r\n\r\nTo make contribution to the project a bit easier, all the MGnify `conditionals` and `subworkflows` under\r\nthe `workflows/` directory that are not used in the metaGOflow framework, have been removed.   \r\nHowever, all the MGnify `tools/` and `utils/` are available in this repo, even if they are not invoked in the current\r\nversion of metaGOflow.\r\nThis way, we hope we encourage people to implement their own `conditionals` and/or `subworkflows` by exploiting the\r\ncurrently supported `tools` and `utils` as well as by developing new `tools` and/or `utils`.\r\n\r\n\r\n\u003c!-- cwltool --print-dot my-wf.cwl | dot -Tsvg \u003e my-wf.svg --\u003e\r\n","organization":"emo-bon","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/384?version=1","name":"eosc-life-gos @ 28122db","author":["Haris Zafeiropoulos","Martin Beracochea"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/384?version=2","name":"eosc-life-gos @ deb5427","author":["Haris Zafeiropoulos","Martin Beracochea"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/384?version=3","name":"eosc-life-gos @ deb5427","author":["Haris Zafeiropoulos","Martin Beracochea"],"descriptor_type":["CWL"]}]},{"id":"386","url":"https://workflowhub.eu/workflows/386","name":"BackTrackBB: Multi-band array detection and location of seismic sources","description":"BackTrackBB is a program for detection and space-time location of seismic sources based on multi-scale, frequency-selective statistical coherence of the wave field recorded by dense large-scale seismic networks and local antennas. The method is designed to enhance coherence of the signal statistical features across the array of sensors and consists of three steps. They are signal processing, space-time imaging and detection and location.\r\n\r\nSource with inputs and outputs included (too big for WorkflowHub): [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7788030.svg)](https://doi.org/10.5281/zenodo.7788030)\r\n\r\nMore information: https://backtrackbb.github.io/","organization":"Cluster Emergent del Cervell Humà, Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/386?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/386?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"387","url":"https://workflowhub.eu/workflows/387","name":"DRC_template_toxicity","description":"Generates Dose-response curve fits on cell-based toxicity data. Outliers of replicate data-sets can be removed by setting a threshold for standard deviation (here set to 25). Curve fits for compounds showing low response can be removed by setting a threshold for minimum activity (here set to 75% confluence).\r\nThis workflow needs R-Server to run in the back-end. Start R and run the following command: library(Rserve); Rserve(args = \"--vanilla\")","organization":"EU-Openscreen","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/387?version=1","name":"Version 1","author":["Jeanette Reinshagen"],"descriptor_type":[]}]},{"id":"388","url":"https://workflowhub.eu/workflows/388","name":"DRC_cellbased_OutlierDetection","description":"This workflow can be used to fit dose-response curves from normalised cell-based assay data (%confluence) using the KNIME HCS extension. The workflow expects triplicates for each of eight test concentrations. This workflow needs R-Server to run in the back-end. Start R and run the following command: library(Rserve); Rserve(args = \"--vanilla\"). \r\nThree types of outliers can be removed: 1 - Outliers from triplicate measurement (standard deviation cut-off can be selected), 2 - inactive and weekly active compounds (% confluence cut-offs can be selected), 3 - toxic concentrations (cut-off for reduction in confluence with stepwise increasing concentration can be selected)\r\nOutput are two dose-response curve fits per compound for pre and post outlier removal with graphical representation and numerical fit parameters. \r\n","organization":"EU-Openscreen","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/388?version=1","name":"Version 1","author":["Jeanette Reinshagen"],"descriptor_type":[]}]},{"id":"389","url":"https://workflowhub.eu/workflows/389","name":"DRC_biochemical_toECBD","description":"This workflow can be used to fit dose-response curves from normalised biochemical assay data (%Inhibition) using the HCS extension. This workflow needs R-Server to run in the back-end. Start R and run the following command: library(Rserve); Rserve(args = \"--vanilla\")\r\nIC50 values will not be extrapolated outside the tested concentration range\r\nFor activity classification the following criteria are applied:\r\n- maximum (average % inhibion) \u003e25 % and slope is \u003e0 and IC50 \u003e 5 µM or\r\n- minimum (average % inhibion) \u003e75 %\r\nResults are formatted for upload to the European Chemical Biology Database (ECBD)","organization":"EU-Openscreen","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/389?version=1","name":"Version 1","author":["Jeanette Reinshagen"],"descriptor_type":[]}]},{"id":"390","url":"https://workflowhub.eu/workflows/390","name":"CroMaSt: A workflow for assessing protein domain classification by cross-mapping of structural instances between domain databases and structural alignment","description":"# CroMaSt: A workflow for assessing protein domain classification by cross-mapping of structural instances between domain databases and structural alignment\r\n\r\nCroMaSt (\u003cspan style=\"color:red\"\u003e**Cro**\u003c/span\u003ess \u003cspan style=\"color:red\"\u003e**Ma**\u003c/span\u003epper of domain \u003cspan style=\"color:red\"\u003e**St**\u003c/span\u003eructural instances) is an automated iterative workflow to clarify  the assignment of protein domains to a given domain type of interest, based on their 3D structure and by cross-mapping of domain structural instances between domain databases. CroMaSt (for Cross-Mapper of domain Structural instances) will classify all structural instances of a given domain type into 4 different categories (**Core**, **True**, **Domain-like**, and **Failed**). \r\n\r\n\r\n## Requirements\r\n1. [Conda](https://docs.conda.io/projects/conda/en/latest/) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html)\r\n2. [Kpax](http://kpax.loria.fr/download.php)  \r\nDownload and install conda (or Miniconda) and Kpax by following the instructions from their official site.\r\n\r\n\r\n## Get it running \r\n(Considering the requirements are already met)\r\n\r\n1. Clone the repository and change the directory\r\n\r\n```\r\ngit clone https://gitlab.inria.fr/capsid.public_codes/CroMaSt.git\r\ncd CroMaSt\r\n```\r\n\r\n2. Create the conda environment for the workflow\r\n```\r\nconda env create --file yml/environment.yml\r\nconda activate CroMaSt\r\n```\r\n\r\n3. Change the path of variables in paramter file\r\n```\r\nsed -i 's/\\/home\\/hdhondge\\/CroMaSt\\//\\/YOUR\\/PATH\\/TO_CroMaSt\\//g' yml/CroMaSt_input.yml \r\n```\r\n\r\n4. Create the directory to store files from PDB and SIFTS (if not already)\r\n```\r\nmkdir PDB_files SIFTS\r\n```\r\n\r\n5. Download the source input data\r\n```\r\ncwl-runner Tools/download_data.cwl yml/download_data.yml\r\n```\r\n\r\n## Basic example\r\n\r\n### 1. First, we will run the workflow for the KH domain with family identifiers `RRM_1` and `RRM` in Pfam and CATH, respectively.\r\nRun the workflow -\r\n\r\n```\r\ncwl-runner --parallel  --outdir=Results/  CroMaSt.cwl yml/CroMaSt_input.yml\r\n```\r\n\r\n### 2.  Once the iteration is complete, check the `new_param.yml` file from the `outputdir` (Results), if there is any family identifier in either `pfam` or `cath`; run the next iteration using following command (Until there is no new families explored by workflow) -\r\n\r\n```\r\ncwl-runner --parallel  --outdir=Results/  CroMaSt.cwl Results/new_param.yml\r\n```\r\n  \r\n### **Extra:** Start the workflow with multiple families from one or both databases  \r\nIf you would like to start the workflow with multiple families from one or both databases, then simply add a comma in between two family identifiers. \r\n```\r\npfam: ['PF00076', 'PF08777']\r\ncath: ['3.30.70.330']\r\n```\r\n\r\n- **Pro Tip**: Don't forget to give different path to `--outdir` option while running the workflow multiple times or at least move the results to some other location after first run.\r\n\r\n## Run the workflow for protein domain of your choice  \r\n### 1. You can run the workflow for the domain of your choice by simply changing the family identifers in `yml/CroMaSt_input.yml` file.\r\n\r\nSimply replace the following values of family identifiers (for pfam and cath) with the family identifiers of your choice in `yml/CroMaSt_input.yml` file. \r\n```\r\npfam: ['PF00076']\r\ncath: ['3.30.70.330']\r\n```\r\n\r\n\r\n\r\n## Data files used in current version are as follows:\r\n**Files in Data directory can be downloaded as follows**:\r\n\r\n1. File used from Pfam database: [pdbmap.gz](http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam35.0/pdbmap.gz)\r\n\r\n2. File used from CATH database: [cath-domain-description-file.txt](ftp://orengoftp.biochem.ucl.ac.uk:21/cath/releases/latest-release/cath-classification-data/cath-domain-description-file.txt)  \r\n\r\n3. Obsolete entries from RCSB PDB\r\n[obsolete_PDB_entry_ids.txt](https://data.rcsb.org/rest/v1/holdings/removed/entry_ids)  \r\n\r\n\r\nCATH Version - 4.3.0 (Ver_Date - 11-Sep-2019) [FTP site](ftp://orengoftp.biochem.ucl.ac.uk/cath/releases/latest-release/cath-classification-data/)\r\nPfam Version - 35.0 (Ver_Date - November-2021) [FTP site](http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam35.0/)\r\n\r\n## Reference\r\n```\r\nPoster - \r\n1. Hrishikesh Dhondge, Isaure Chauvot de Beauchêne, Marie-Dominique Devignes. CroMaSt: A workflow for domain family curation through cross-mapping of structural instances between protein domain databases. 21st European Conference on Computational Biology, Sep 2022, Sitges, Spain. ⟨hal-03789541⟩\r\n\r\n```\r\n\r\n## Acknowledgements\r\nThis  project  has  received  funding  from  the  Marie  Skłodowska-Curie Innovative Training Network (MSCA-ITN) RNAct supported by European Union’s Horizon 2020 research and innovation programme under granta greement No 813239.\r\n","organization":"CAPSID","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/390?version=1","name":"main @ 9f38328","author":["Hrishikesh Dhondge","Marie-Dominique Devignes"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/390?version=2","name":"v1.1","author":["Hrishikesh Dhondge","Marie-Dominique Devignes"],"descriptor_type":["CWL"]}]},{"id":"393","url":"https://workflowhub.eu/workflows/393","name":"IndexReferenceFasta-nf","description":"# IndexReferenceFasta-nf\r\n===========\r\n\r\n  - [Description](#description)\r\n  - [Diagram](#diagram)\r\n  - [User guide](#user-guide)\r\n  - [Benchmarking](#benchmarking)\r\n  - [Workflow summaries](#workflow-summaries)\r\n      - [Metadata](#metadata)\r\n      - [Component tools](#component-tools)\r\n      - [Required (minimum)\r\n        inputs/parameters](#required-minimum-inputsparameters)\r\n  - [Additional notes](#additional-notes)\r\n  - [Help/FAQ/Troubleshooting](#helpfaqtroubleshooting)\r\n  - [Acknowledgements/citations/credits](#acknowledgementscitationscredits)\r\n\r\n---\r\n\r\n## Description\r\nThis is a flexible pipeline for generating common reference genome index files for WGS data analysis. IndexReferenceFasta-nf is a Nextflow (DSL2) pipeline that runs the following tools using Singularity containers:\r\n* Samtools faidx\r\n* BWA index\r\n* GATK CreateSequenceDictionary \r\n\r\n## Diagram\r\n\u003cp align=\"center\"\u003e \r\n\u003cimg src=\"https://user-images.githubusercontent.com/73086054/189310509-375fea4f-11fb-41ca-ba52-90760e9a5aa3.png\" width=\"80%\"\u003e\r\n\u003c/p\u003e \r\n\r\n## User guide\r\n**1. Set up**\r\n\r\nClone this repository by running:\r\n```\r\ngit clone https://github.com/Sydney-Informatics-Hub/IndexReferenceFasta-nf.git\r\ncd IndexReferenceFasta-nf\r\n``` \r\n\r\n**2. Generate indexes**  \r\n\r\nUsers can specify which index files to create by using the `--samtools`, `--bwa`, and/or `--gatk` flags. All are optional. Run the pipeline with:\r\n\r\n```\r\nnextflow run main.nf /path/to/ref.fasta --bwa --samtools --gatk \r\n```\r\n\r\n## Benchmarking\r\n\r\n### Human hg38 reference assembly @ Pawsey's Nimbus (NCPU/task = 1)\r\n|task_id|hash     |native_id|name          |status   |exit|submit |duration  |realtime  |%cpu   |peak_rss|peak_vmem|rchar  |wchar  |\r\n|-------|---------|---------|--------------|---------|----|-------|----------|----------|-------|--------|---------|-------|-------|\r\n|3      |27/33fffc|131621   |samtools_index|COMPLETED|0   |55:44.9|12.2s     |12s       |99.20% |6.3 MB  |11.8 MB  |3 GB   |19.1 KB|\r\n|1      |80/f03e46|131999   |gatk_index    |COMPLETED|0   |55:46.7|22.6s     |22.3s     |231.90%|3.8 GB  |37.1 GB  |3.1 GB |726 KB |\r\n|2      |ea/e29535|131594   |bwa_index     |COMPLETED|0   |55:44.9|1h 50m 16s|1h 50m 15s|99.50% |4.5 GB  |4.5 GB   |12.1 GB|8.2 GB |\r\n\r\n## Workflow summaries\r\n\r\n### Metadata\r\n|metadata field     | workflow_name / workflow_version  |\r\n|-------------------|:---------------------------------:|\r\n|Version            | workflow_version                  |\r\n|Maturity           | under development                 |\r\n|Creators           | Georgie Samaha                    |\r\n|Source             | NA                                |\r\n|License            | GPL-3.0 license                   |\r\n|Workflow manager   | NextFlow                          |\r\n|Container          | None                              |\r\n|Install method     | Manual                            |\r\n|GitHub             | Sydney-Informatics-Hub/IndexReferenceFasta-nf                                |\r\n|bio.tools          | NA                                |\r\n|BioContainers      | NA                                | \r\n|bioconda           | NA                                |\r\n\r\n### Component tools\r\n\r\n* samtools/1.15.1\r\n* gatk/4.2.6.1 \r\n* bwa/0.7.17\r\n\r\n### Required (minimum) inputs/parameters\r\n\r\n* A reference genome file in fasta format.\r\n\r\n## Additional notes\r\n\r\n### Help/FAQ/Troubleshooting\r\n\r\n## Acknowledgements/citations/credits\r\n### Authors \r\n- Georgie Samaha (Sydney Informatics Hub, University of Sydney)   \r\n\r\n### Acknowledgements \r\n\r\n- This pipeline was built using the [Nextflow DSL2 template](https://github.com/Sydney-Informatics-Hub/Nextflow_DSL2_template).  \r\n- Documentation was created following the [Australian BioCommons documentation guidelines](https://github.com/AustralianBioCommons/doc_guidelines).  \r\n\r\n### Cite us to support us! \r\nAcknowledgements (and co-authorship, where appropriate) are an important way for us to demonstrate the value we bring to your research. Your research outcomes are vital for ongoing funding of the Sydney Informatics Hub and national compute facilities. We suggest including the following acknowledgement in any publications that follow from this work:  \r\n\r\nThe authors acknowledge the technical assistance provided by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney and the Australian BioCommons which is enabled by NCRIS via Bioplatforms Australia. \r\n","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/393?version=1","name":"main @ ac2d99e","author":["Georgina Samaha"],"descriptor_type":["NFL"]}]},{"id":"395","url":"https://workflowhub.eu/workflows/395","name":"cutandrun/main","description":"This workflow take as input a collection of paired fastq. Remove adapters with cutadapt, map pairs with bowtie2 allowing dovetail. Keep MAPQ30 and concordant pairs. BAM to BED. MACS2 with \"ATAC\" parameters.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/395?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/395?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/395?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/395?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/395?version=5","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/395?version=6","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/395?version=7","name":"v0.6.1","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/395?version=8","name":"v0.10","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/395?version=9","name":"v0.11","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/395?version=10","name":"v0.12","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/395?version=11","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/395?version=12","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/395?version=13","name":"v0.9","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/395?version=14","name":"v0.13","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/395?version=15","name":"v0.14","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/395?version=16","name":"v0.15","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/395?version=17","name":"v0.16","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/395?version=18","name":"v0.17","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/395?version=19","name":"v0.18","author":[],"descriptor_type":["GALAXY"]}]},{"id":"397","url":"https://workflowhub.eu/workflows/397","name":"chipseq-sr/main","description":"Complete ChIP-seq analysis for single-end sequencing data. Processes raw FASTQ files through adapter removal (cutadapt), alignment to reference genome (Bowtie2), and quality filtering (MAPQ \u0026gt;= 30). Peak calling with MACS2 uses either a fixed extension parameter or built-in model to identify protein-DNA binding sites. Generates alignment files, peak calls, and quality metrics for downstream analysis.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/397?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/397?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/397?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/397?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/397?version=5","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/397?version=6","name":"v0.9","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/397?version=7","name":"v0.10","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/397?version=8","name":"v0.11","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/397?version=9","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/397?version=10","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/397?version=11","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/397?version=12","name":"v0.12","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/397?version=13","name":"v0.13","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/397?version=14","name":"v0.14","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/397?version=15","name":"v0.15","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/397?version=16","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/397?version=17","name":"v1.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"398","url":"https://workflowhub.eu/workflows/398","name":"chipseq-pe/main","description":"Complete ChIP-seq analysis for paired-end sequencing data. Processes raw FASTQ files through adapter removal (cutadapt), alignment to reference genome (Bowtie2), and stringent quality filtering (MAPQ \u0026gt;= 30, concordant pairs only). Peak calling with MACS2 optimized for paired-end reads identifies protein-DNA binding sites. Generates alignment files, peak calls, and quality metrics for downstream analysis.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/398?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/398?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/398?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/398?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/398?version=5","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/398?version=6","name":"v0.9","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/398?version=7","name":"v0.10","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/398?version=8","name":"v0.11","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/398?version=9","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/398?version=10","name":"v0.6.1","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/398?version=11","name":"v0.7.0","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/398?version=12","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/398?version=13","name":"v0.12","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/398?version=14","name":"v0.13","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/398?version=15","name":"v0.14","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/398?version=16","name":"v0.15","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/398?version=17","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/398?version=18","name":"v1.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"399","url":"https://workflowhub.eu/workflows/399","name":"atacseq/main","description":"This workflow take as input a collection of paired fastq. It will remove bad quality and adapters with cutadapt. Map with Bowtie2 end-to-end. Will remove reads on MT and unconcordant pairs and pairs with mapping quality below 30 and PCR duplicates. Will compute the pile-up on 5' +- 100bp. Will call peaks and count the number of reads falling in the 1kb region centered on the summit. Will plot the number of reads for each fragment length.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/399?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/399?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/399?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/399?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/399?version=5","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/399?version=6","name":"v0.5.1","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/399?version=7","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/399?version=8","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/399?version=9","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/399?version=10","name":"v0.9","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/399?version=11","name":"v0.14","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/399?version=12","name":"v0.15","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/399?version=13","name":"v0.16","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/399?version=14","name":"v0.10","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/399?version=15","name":"v0.11","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/399?version=16","name":"v0.12","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/399?version=17","name":"v0.13","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/399?version=18","name":"v0.17","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/399?version=19","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"20","url":"https://workflowhub.eu/workflows/399?version=20","name":"v2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"400","url":"https://workflowhub.eu/workflows/400","name":"rnaseq-sr/main","description":"This workflow takes as input a list of single-read fastqs. Adapters and bad quality bases are removed with cutadapt. Reads are mapped with STAR with ENCODE parameters and genes are counted simultaneously. The counts are reprocess to be similar to HTSeq-count output. FPKM are computed with cufflinks. Coverage (per million mapped reads) are computed with bedtools on uniquely mapped reads.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/400?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/400?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/400?version=3","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/400?version=4","name":"v0.4.1","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/400?version=5","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/400?version=6","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/400?version=7","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/400?version=8","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/400?version=9","name":"v0.9","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/400?version=10","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/400?version=11","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/400?version=12","name":"v1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/400?version=13","name":"v1.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"401","url":"https://workflowhub.eu/workflows/401","name":"rnaseq-pe/main","description":"This workflow takes as input a list of paired-end fastqs. Adapters and bad quality bases are removed with cutadapt. Reads are mapped with STAR with ENCODE parameters and genes are counted simultaneously. The counts are reprocess to be similar to HTSeq-count output. FPKM are computed with cufflinks. Coverage (per million mapped reads) are computed with bedtools on uniquely mapped reads (with R2 orientation inverted).","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/401?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/401?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/401?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/401?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/401?version=5","name":"v0.4.1","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/401?version=6","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/401?version=7","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/401?version=8","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/401?version=9","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/401?version=10","name":"v0.9","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/401?version=11","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/401?version=12","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/401?version=13","name":"v1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/401?version=14","name":"v1.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"402","url":"https://workflowhub.eu/workflows/402","name":"X-omics ACTIONdemonstrator analysis workflow","description":"This workflow is designed to analyze to a multi-omics data set that comprises genome-wide DNA methylation profiles, targeted metabolomics, and behavioral data of two cohorts that participated in the ACTION Biomarker Study (ACTION, Aggression in Children: Unraveling gene-environment interplay to inform Treatment and InterventiON strategies. (Boomsma 2015, Bartels 2018, Hagenbeek 2020, van Dongen 2021, Hagenbeek 2022). The ACTION-NTR cohort consists of twins that are either longitudinally concordant or discordant for childhood aggression. The ACTION-Curium-LUMC cohort consists of children referred to the Dutch LUMC Curium academic center for child and youth psychiatry. With the joint analysis of multi-omics data and behavioral data, we aim to identify substructures in the ACTION-NTR cohort and link them to aggressive behavior. First, the individuals are clustered using Similarity Network Fusion (SNF, Wang 2014), and latent feature dimensions are uncovered using different unsupervised methods including Multi-Omics Factor Analysis (MOFA) (Argelaguet 2018) and Multiple Correspondence Analysis (MCA, Lê 2008, Husson 2017). In a second step, we determine correlations between -omics and phenotype dimensions, and use them to explain the subgroups of individuals from the ACTION-NTR cohort. In order to validate the results, we project data of the ACTION-Curium-LUMC cohort onto the latent dimensions and determine if correlations between omics and phenotype data can be reproduced.","organization":"X-omics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/402?version=1","name":"Version 1","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/402?version=2","name":"Version 1.1","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/402?version=3","name":"Version 1.2","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/402?version=4","name":"Version 4","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/402?version=5","name":"Version 2.0","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/402?version=6","name":"Version 2.1","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/402?version=7","name":"Version 2.2","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/402?version=8","name":"Version 2.3","author":["Casper de Visser","Anna Niehues"],"descriptor_type":["NFL"]}]},{"id":"403","url":"https://workflowhub.eu/workflows/403","name":"Genome-assessment-post-assembly","description":"Post-genome assembly quality control workflow using Quast, BUSCO, Meryl, Merqury and Fasta Statistics, with updates November 2024.\r\n\r\nWorkflow inputs: reads as fastqsanger.gz (not fastq.gz), and primary assembly.fasta. (To change reads format: click on the pencil icon next to the file in the Galaxy history, then \"Datatypes\", then set \"New type\" as fastqsanger.gz). Note: the reads should be those that were used for the assembly (i.e., the filtered/cleaned reads), not the raw reads. \r\n\r\nWhat it does:\r\nComputes read coverage. \r\nRuns Quast. \r\nRuns Fasta Statistics.\r\nRuns Meryl and Merqury. \r\nRuns Busco. (New default settings for BUSCO: lineage = eukaryota; for Quast: lineage = eukaryotes, genome = large.)\r\n\r\nWorkflow outputs:\r\nReports assembly stats into a table called metrics.tsv, including selected metrics from Fasta Stats, and read coverage; reports BUSCO versions and dependencies; and displays these tables in the workflow report.\r\n\r\nNote: a known bug is that sometimes the workflow report text resets to default text. \r\n\r\nTo check and restore: open the workflow in Galaxy for editing.\r\n\r\nClick on the \"Edit Report\" icon (top right, pencil icon). \r\n\r\nCopy and paste the following text into the workflow report, then exit this report page, then save the workflow.\r\n\r\n# Workflow Execution Report\r\n\r\nWorkflow name: Genome assessment post assembly\r\n\r\n## Genome assembly metrics\r\n\r\nSelected statistics from the workflow outputs. Additional metrics are available in other outputs in the history.\r\n\r\n```galaxy\r\nhistory_dataset_display(output=\"Genome assembly metrics\")\r\n```\r\n## Software\r\n\r\nBusco version and dependencies:\r\n\r\n```galaxy\r\nhistory_dataset_display(output=\"Busco and dependencies version\")\r\n```\r\n## Galaxy Australia\r\n\r\nThanks for using Galaxy! When you use Galaxy Australia to support your publication or project, please acknowledge its use with the following statement: \"This work is supported by Galaxy Australia, a service provided by the Australian Biocommons and its partners. The service receives NCRIS funding through Bioplatforms Australia and the Australian Research Data Commons (https://doi.org/10.47486/PL105), as well as The University of Melbourne and Queensland Government RICF funding.\"\r\n","organization":"Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/403?version=1","name":"v1.0.0","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/403?version=2","name":"v1.1.0","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/403?version=3","name":"v2.0.2","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/403?version=4","name":"v2.0.4","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/403?version=5","name":"v2.0.5","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/403?version=6","name":"v2.0.6 - ignore","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/403?version=7","name":"v2.0.6 - ignore","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/403?version=8","name":"master @ 0154e28 - ignore","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/403?version=9","name":"v2.0.6","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/403?version=10","name":"v2.0.7","author":["Gareth Price","Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"404","url":"https://workflowhub.eu/workflows/404","name":"GBIF data Quality check and filtering workflow Feb-2020","description":"This Galaxy-E workflow was made from the [\"Cleaning GBIF data for the use in biogeography\" tutorial](https://ropensci.github.io/CoordinateCleaner/articles/Cleaning_GBIF_data_with_CoordinateCleaner.html) and allows to:\r\n- Use CoordinateCleaner to automatically flag problematic records\r\n- Use GBIF provided meta-data to improve coordinate quality, tailored to your downstream analyses\r\n- Use automated cleaning algorithms of CoordinateCleaner to identify problematic contributing datasets\r\n- Visualize data on a map","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/404?version=1","name":"Version 1","author":["Yvan Le Bras"],"descriptor_type":["GALAXY"]}]},{"id":"406","url":"https://workflowhub.eu/workflows/406","name":"GTN Training - Antibiotic Resistance Detection","description":"Workflow for the GTN training \"Antibiotic resistance detection\"","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/406?version=1","name":"Version 1","author":["Saskia Hiltemann","Willem de Koning"],"descriptor_type":["GALAXY"]}]},{"id":"407","url":"https://workflowhub.eu/workflows/407","name":"Workflow 3: AMR - SeqSero2/SISTR","description":"With this galaxy pipeline you can use Salmonella sp. next generation sequencing results to predict bacterial AMR phenotypes and compare the results against gold standard Salmonella sp. phenotypes obtained from food.\r\n\r\nThis pipeline is based on the work of the National Food Agency of Canada.  \r\nDoi: [10.3389/fmicb.2020.00549](https://doi.org/10.3389/fmicb.2020.00549)\r\n\r\n| tool | version | license |\r\n| -- | -- | -- |\r\n| SeqSero2 | 1.2.1 | [GNU GPL v2.0](https://github.com/denglab/SeqSero2/blob/master/LICENSE) |\r\n| BBTools | 39.01 | [MIT License](https://github.com/kbaseapps/BBTools/blob/master/LICENSE) |\r\n| SRST2 | 0.2.0 | [BSD License](https://github.com/katholt/srst2/blob/master/LICENSE.txt) |\r\n| hamronize | 1.0.3 | [GNU LGPL v3.0](https://github.com/pha4ge/hAMRonization/blob/master/LICENSE.txt) |\r\n| SPAdes | v3.15.5 | [GNU GPL v2.0](https://github.com/ablab/spades/blob/main/LICENSE) |\r\n| SKESA | 3.0.0 | [Public Domain](https://github.com/ncbi/SKESA/blob/master/LICENSE) |\r\n| pilon | 1.1.0 | [GNU GPL v2.0](https://github.com/broadinstitute/pilon/blob/master/LICENSE) |\r\n| shovill | 1.0.4 | [GPL-3.0 license](https://github.com/tseemann/shovill/blob/master/LICENSE) |\r\n| sistr | 1.1.1 | [Apache-2.0 license](https://github.com/phac-nml/sistr_cmd/blob/master/LICENSE) |\r\n| MOB-Recon | 3.0.3 | [Apache-2.0 license](https://github.com/phac-nml/mob-suite/blob/master/LICENSE) |","organization":"Seq4AMR","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/407?version=1","name":"Version 0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"409","url":"https://workflowhub.eu/workflows/409","name":"EmpiricalReads2Map","description":"[![Development](https://img.shields.io/badge/development-active-blue.svg)](https://img.shields.io/badge/development-active-blue.svg)\r\n[![Reads2Map](https://circleci.com/gh/Cristianetaniguti/Reads2Map.svg?style=svg)](https://app.circleci.com/pipelines/github/Cristianetaniguti/Reads2Map)\r\n\r\n## Reads2Map \r\n\r\nReads2Map presents a collection of [WDL workflows](https://openwdl.org/)  to build linkage maps from sequencing reads. Each workflow release is described in the [Read2Map releases page](https://github.com/Cristianetaniguti/Reads2Map/releases). \r\n\r\nThe main workflows are the `EmpiricalReads2Map.wdl` and the `SimulatedReads2Map.wdl`. The `EmpiricalReads2Map.wdl` is composed by the `EmpiricalSNPCalling.wdl` that performs the SNP calling, and the `EmpiricalMaps.wdl` that performs the genotype calling and map building in empirical reads. The `SimulatedReads2Map.wdl` simulates Illumina reads for RADseq, exome, or WGS data and performs the SNP and genotype calling and genetic map building.\r\n\r\nBy now, [GATK](https://github.com/broadinstitute/gatk), [Freebayes](https://github.com/ekg/freebayes) are included for SNP calling; [updog](https://github.com/dcgerard/updog), [polyRAD](https://github.com/lvclark/polyRAD), [SuperMASSA](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0030906) for dosage calling; and [OneMap](https://github.com/augusto-garcia/onemap), and [GUSMap](https://github.com/tpbilton/GUSMap) for linkage map build.\r\n\r\n![math_meth2](https://user-images.githubusercontent.com/7572527/203172239-e4d2d857-84e2-48c5-bb88-01052a287004.png)\r\n\r\n## How to use\r\n\r\nMultiple systems are available to run WDL workflows such as Cromwell, miniWDL, and dxWDL. See further information in the [openwdl documentation](https://github.com/openwdl/wdl#execution-engines).\r\n\r\nTo run a pipeline, first navigate to [Reads2Map releases page](https://github.com/Cristianetaniguti/Reads2Map/releases), search for the pipeline tag you which to run, and download the pipeline’s assets (the WDL workflow, the JSON, and the ZIP with accompanying dependencies).\r\n\r\n## Documentation\r\n\r\nCheck the description of the inputs for the pipelines:\r\n\r\n* [EmpiricalReads2Map (EmpiricalSNPCalling and EmpiricalMaps)](https://cristianetaniguti.github.io/Tutorials/Reads2Map/EmpiricalReads.html)\r\n\r\n* [SimulatedReads2Map](https://cristianetaniguti.github.io/Tutorials/Reads2Map/simulatedreads.html)\r\n\r\nCheck how to evaluate the workflows results in Reads2MapApp Shiny:\r\n\r\n* [Reads2MapApp](https://github.com/Cristianetaniguti/Reads2MapApp)\r\n\r\nOnce you selected the best pipeline using a subset of your data, you can build a complete high-density linkage map:\r\n\r\n* [A Guide to Build High-Density Linkage Maps](https://cristianetaniguti.github.io/Tutorials/onemap/Quick_HighDens/High_density_maps.html)\r\n\r\nCheck more information and examples of usage in:\r\n\r\n* [Taniguti, C. H., Taniguti, L. M., Amadeu, R. R., Mollinari, M., Da, G., Pereira, S., Riera-Lizarazu, O., Lau, J., Byrne, D., de Siqueira Gesteira, G., De, T., Oliveira, P., Ferreira, G. C., \u0026#38; Franco Garcia, A. A.  Developing best practices for genotyping-by-sequencing analysis using linkage maps as benchmarks. BioRxiv. https://doi.org/10.1101/2022.11.24.517847](https://www.biorxiv.org/content/10.1101/2022.11.24.517847v2)\r\n\r\n## Third-party software and images\r\n\r\n- [BWA](https://github.com/lh3/bwa) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Used to align simulated reads to reference;\r\n- [cutadapt](https://github.com/marcelm/cutadapt) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): Trim simulated reads;\r\n- [ddRADseqTools](https://github.com/GGFHF/ddRADseqTools) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): Set of applications useful to in silico design and testing of double digest RADseq (ddRADseq) experiments;\r\n- [Freebayes](https://github.com/ekg/freebayes) in [Cristaniguti/freebayes:0.0.1](): Variant call step;\r\n- [GATK](https://github.com/broadinstitute/gatk) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Variant call step using Haplotype Caller, GenomicsDBImport and GenotypeGVCFs;\r\n- [PedigreeSim](https://github.com/PBR/pedigreeSim?files=1) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Simulates progeny genotypes from parents genotypes for different types of populations;\r\n- [picard](https://github.com/broadinstitute/picard) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Process alignment files;\r\n- [pirs](https://github.com/galaxy001/pirs) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): To generate simulates paired-end reads from a reference genome;\r\n- [samtools](https://github.com/samtools/samtools) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Process alignment files;\r\n- [SimuSCoP](https://github.com/qasimyu/simuscop) in [cristaniguti/simuscopr:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/simuscopr): Exome and WGS Illumina reads simulations;\r\n- [RADinitio](http://catchenlab.life.illinois.edu/radinitio/) in [\tcristaniguti/radinitio:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/radinitio): RADseq Illumina reads simulation;\r\n- [SuperMASSA](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0030906) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Efficient Exact Maximum a Posteriori Computation for Bayesian SNP Genotyping in Polyploids;\r\n- [bcftools](https://github.com/samtools/bcftools) in [lifebitai/bcftools:1.10.2](https://hub.docker.com/r/lifebitai/bcftools): utilities for variant calling and manipulating VCFs and BCFs;\r\n- [vcftools](http://vcftools.sourceforge.net/) in [cristaniguti/split_markers:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/split_markers): program package designed for working with VCF files.\r\n- [MCHap](https://github.com/PlantandFoodResearch/MCHap) in [cristaniguti/mchap:0.7.0](https://hub.docker.com/repository/docker/cristaniguti/mchap): Polyploid micro-haplotype assembly using Markov chain Monte Carlo simulation.\r\n\r\n### R packages\r\n\r\n- [OneMap](https://github.com/augusto-garcia/onemap) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Is a software for constructing genetic maps in experimental crosses: full-sib, RILs, F2 and backcrosses;\r\n- [Reads2MapTools](https://github.com/Cristianetaniguti/Reads2MapTools) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Support package to perform mapping populations simulations and genotyping for OneMap genetic map building\r\n- [GUSMap](https://github.com/tpbilton/GUSMap): Genotyping Uncertainty with Sequencing data and linkage MAPping\r\n- [updog](https://github.com/dcgerard/updog) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Flexible Genotyping of Polyploids using Next Generation Sequencing Data\r\n- [polyRAD](https://github.com/lvclark/polyRAD) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Genotype Calling with Uncertainty from Sequencing Data in Polyploids\r\n- [Reads2MapApp](https://github.com/Cristianetaniguti/Reads2MapApp) in [cristaniguti/reads2mapApp:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Shiny app to evaluate Reads2Map workflows results\r\n- [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations.","organization":"Read2Map","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/409?version=1","name":"main @ 09cc213","author":["Cristiane Taniguti"],"descriptor_type":[]}]},{"id":"410","url":"https://workflowhub.eu/workflows/410","name":"SimulatedReads2Map","description":"[![Development](https://img.shields.io/badge/development-active-blue.svg)](https://img.shields.io/badge/development-active-blue.svg)\r\n[![Reads2Map](https://circleci.com/gh/Cristianetaniguti/Reads2Map.svg?style=svg)](https://app.circleci.com/pipelines/github/Cristianetaniguti/Reads2Map)\r\n\r\n## Reads2Map \r\n\r\nReads2Map presents a collection of [WDL workflows](https://openwdl.org/)  to build linkage maps from sequencing reads. Each workflow release is described in the [Read2Map releases page](https://github.com/Cristianetaniguti/Reads2Map/releases). \r\n\r\nThe main workflows are the `EmpiricalReads2Map.wdl` and the `SimulatedReads2Map.wdl`. The `EmpiricalReads2Map.wdl` is composed by the `EmpiricalSNPCalling.wdl` that performs the SNP calling, and the `EmpiricalMaps.wdl` that performs the genotype calling and map building in empirical reads. The `SimulatedReads2Map.wdl` simulates Illumina reads for RADseq, exome, or WGS data and performs the SNP and genotype calling and genetic map building.\r\n\r\nBy now, [GATK](https://github.com/broadinstitute/gatk), [Freebayes](https://github.com/ekg/freebayes) are included for SNP calling; [updog](https://github.com/dcgerard/updog), [polyRAD](https://github.com/lvclark/polyRAD), [SuperMASSA](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0030906) for dosage calling; and [OneMap](https://github.com/augusto-garcia/onemap), and [GUSMap](https://github.com/tpbilton/GUSMap) for linkage map build.\r\n\r\n![math_meth2](https://user-images.githubusercontent.com/7572527/203172239-e4d2d857-84e2-48c5-bb88-01052a287004.png)\r\n\r\n## How to use\r\n\r\nMultiple systems are available to run WDL workflows such as Cromwell, miniWDL, and dxWDL. See further information in the [openwdl documentation](https://github.com/openwdl/wdl#execution-engines).\r\n\r\nTo run a pipeline, first navigate to [Reads2Map releases page](https://github.com/Cristianetaniguti/Reads2Map/releases), search for the pipeline tag you which to run, and download the pipeline’s assets (the WDL workflow, the JSON, and the ZIP with accompanying dependencies).\r\n\r\n## Documentation\r\n\r\nCheck the description of the inputs for the pipelines:\r\n\r\n* [EmpiricalReads2Map (EmpiricalSNPCalling and EmpiricalMaps)](https://cristianetaniguti.github.io/Tutorials/Reads2Map/EmpiricalReads.html)\r\n\r\n* [SimulatedReads2Map](https://cristianetaniguti.github.io/Tutorials/Reads2Map/simulatedreads.html)\r\n\r\nCheck how to evaluate the workflows results in Reads2MapApp Shiny:\r\n\r\n* [Reads2MapApp](https://github.com/Cristianetaniguti/Reads2MapApp)\r\n\r\nOnce you selected the best pipeline using a subset of your data, you can build a complete high-density linkage map:\r\n\r\n* [A Guide to Build High-Density Linkage Maps](https://cristianetaniguti.github.io/Tutorials/onemap/Quick_HighDens/High_density_maps.html)\r\n\r\nCheck more information and examples of usage in:\r\n\r\n* [Taniguti, C. H., Taniguti, L. M., Amadeu, R. R., Mollinari, M., Da, G., Pereira, S., Riera-Lizarazu, O., Lau, J., Byrne, D., de Siqueira Gesteira, G., De, T., Oliveira, P., Ferreira, G. C., \u0026#38; Franco Garcia, A. A.  Developing best practices for genotyping-by-sequencing analysis using linkage maps as benchmarks. BioRxiv. https://doi.org/10.1101/2022.11.24.517847](https://www.biorxiv.org/content/10.1101/2022.11.24.517847v2)\r\n\r\n## Third-party software and images\r\n\r\n- [BWA](https://github.com/lh3/bwa) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Used to align simulated reads to reference;\r\n- [cutadapt](https://github.com/marcelm/cutadapt) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): Trim simulated reads;\r\n- [ddRADseqTools](https://github.com/GGFHF/ddRADseqTools) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): Set of applications useful to in silico design and testing of double digest RADseq (ddRADseq) experiments;\r\n- [Freebayes](https://github.com/ekg/freebayes) in [Cristaniguti/freebayes:0.0.1](): Variant call step;\r\n- [GATK](https://github.com/broadinstitute/gatk) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Variant call step using Haplotype Caller, GenomicsDBImport and GenotypeGVCFs;\r\n- [PedigreeSim](https://github.com/PBR/pedigreeSim?files=1) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Simulates progeny genotypes from parents genotypes for different types of populations;\r\n- [picard](https://github.com/broadinstitute/picard) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Process alignment files;\r\n- [pirs](https://github.com/galaxy001/pirs) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): To generate simulates paired-end reads from a reference genome;\r\n- [samtools](https://github.com/samtools/samtools) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Process alignment files;\r\n- [SimuSCoP](https://github.com/qasimyu/simuscop) in [cristaniguti/simuscopr:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/simuscopr): Exome and WGS Illumina reads simulations;\r\n- [RADinitio](http://catchenlab.life.illinois.edu/radinitio/) in [\tcristaniguti/radinitio:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/radinitio): RADseq Illumina reads simulation;\r\n- [SuperMASSA](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0030906) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Efficient Exact Maximum a Posteriori Computation for Bayesian SNP Genotyping in Polyploids;\r\n- [bcftools](https://github.com/samtools/bcftools) in [lifebitai/bcftools:1.10.2](https://hub.docker.com/r/lifebitai/bcftools): utilities for variant calling and manipulating VCFs and BCFs;\r\n- [vcftools](http://vcftools.sourceforge.net/) in [cristaniguti/split_markers:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/split_markers): program package designed for working with VCF files.\r\n- [MCHap](https://github.com/PlantandFoodResearch/MCHap) in [cristaniguti/mchap:0.7.0](https://hub.docker.com/repository/docker/cristaniguti/mchap): Polyploid micro-haplotype assembly using Markov chain Monte Carlo simulation.\r\n\r\n### R packages\r\n\r\n- [OneMap](https://github.com/augusto-garcia/onemap) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Is a software for constructing genetic maps in experimental crosses: full-sib, RILs, F2 and backcrosses;\r\n- [Reads2MapTools](https://github.com/Cristianetaniguti/Reads2MapTools) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Support package to perform mapping populations simulations and genotyping for OneMap genetic map building\r\n- [GUSMap](https://github.com/tpbilton/GUSMap): Genotyping Uncertainty with Sequencing data and linkage MAPping\r\n- [updog](https://github.com/dcgerard/updog) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Flexible Genotyping of Polyploids using Next Generation Sequencing Data\r\n- [polyRAD](https://github.com/lvclark/polyRAD) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Genotype Calling with Uncertainty from Sequencing Data in Polyploids\r\n- [Reads2MapApp](https://github.com/Cristianetaniguti/Reads2MapApp) in [cristaniguti/reads2mapApp:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Shiny app to evaluate Reads2Map workflows results\r\n- [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations.","organization":"Read2Map","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/410?version=1","name":"main @ 09cc213","author":["Cristiane Taniguti"],"descriptor_type":[]}]},{"id":"411","url":"https://workflowhub.eu/workflows/411","name":"ParslRNA-seq Scientific Workflow","description":"# RNA-seq Scientific Workflow\r\nWorkflow for RNA sequencing using the Parallel Scripting Library - Parsl.\r\n\r\n**Reference:** Cruz, L., Coelho, M., Terra, R., Carvalho, D., Gadelha, L., Osthoff, C., \u0026 Ocaña, K. (2021). *Workflows* Científicos de RNA-Seq em Ambientes Distribuídos de Alto Desempenho: Otimização de Desempenho e Análises de Dados de Expressão Diferencial de Genes. In *Anais do XV Brazilian e-Science Workshop*, p. 57-64. Porto Alegre: SBC. DOI: https://doi.org/10.5753/bresci.2021.15789\r\n\r\n## Requirements\r\n\r\nIn order to use RNA-seq Workflow the following tools must be available:\r\n\r\n- [Bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)\r\n\r\nYou can install Bowtie2 by running:\r\n\r\n\u003e bowtie2-2.3.5.1-linux-x86_64.zip\r\n\r\nOr\r\n\r\n\u003e sudo yum install bowtie2-2.3.5-linux-x86_64\r\n\r\n- [Samtools](http://www.htslib.org/)\r\n\r\nSamtools is a suite of programs for interacting with high-throughput sequencing data.\r\n\r\n- [Picard](https://github.com/broadinstitute/picard)\r\n\r\nPicard is a set of Java command line tools for manipulating high-throughput sequencing (HTS) data and formats.\r\n\r\n- [HTSeq](https://htseq.readthedocs.io/en/master/)\r\n\r\nHTSeq is a native Python library that folows conventions of many Python packages. You can install it by running:\r\n\r\n\u003e pip install HTSeq\r\n\r\nHTSeq uses [NumPy](https://numpy.org/), [Pysam](https://github.com/pysam-developers/pysam) and [matplotlib](https://matplotlib.org/). Be sure this tools are installed.\r\n\r\n- [R](https://www.r-project.org/)\r\n\r\nTo use [DESEq2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) script make sure R language is also installed. You can install it by running:\r\n\r\n\r\n\u003e sudo apt install r-base\r\n\r\n- [Parsl - Parallel Scripting Library](https://parsl.readthedocs.io/en/stable/index.html)\r\n\r\nThe recommended way to install Parsl is the suggest approach from Parsl's documentation:\r\n\r\n\r\n\u003e python3 -m pip install parsl\r\n\r\n- [Python (version \u003e= 3.5)](https://www.python.org/)\r\n\r\nTo use Parsl, you need Python 3.5 or above. You also need Python to use HTSeq, so you should load only one Python version.\r\n\r\n## Workflow invocation\r\n\r\nFirst of all, make a Comma Separated Values (CSV) file. So, onto the first line type: ``sampleName,fileName,condition``. **Remember, there must be no spaces between items**. You can use the file *\"table.csv\"* in this repository as an example. Your CSV file will be like this:\r\n\r\n   |    sampleName    |     fileName     |condition|\r\n   |------------------|------------------|---------|\r\n   | tissue control 1 | SRR5445794.merge.count | control |\r\n   | tissue control 2 | SRR5445795.merge.count | control |\r\n   | tissue control 3 | SRR5445796.merge.count | control |\r\n   | tissue wntup 1   | SRR5445797.merge.count | wntup   |\r\n   | tissue wntup 2   | SRR5445798.merge.count | wntup   |\r\n   | tissue wntup 3   | SRR5445799.merge.count | wntup   |\r\n\r\nThe list of command line arguments passed to Python script, beyond the script's name, must be: \r\n\r\n 1. The indexed genome; \r\n 2. The number of threads for bowtie task, sort task, number of splitted files for split_picard task and number of CPU running in htseq task; \r\n 3. Path to read fastaq file, which is the path of the input files; \r\n 4. Directory's name where the output files must be placed;  \r\n 5. GTF file;\r\n 7. and, lastly the DESeq script. \r\n \r\nMake sure all the files necessary to run the workflow are in the same directory and the fastaq files in a dedicated folder, as a input directory. The command line will be like this:\r\n\r\n\u003e python3 rna-seq.py ../mm9/mm9 24 ../inputs/ ../outputs ../Mus_musculus.NCBIM37.67.gtf ../DESeq.R\r\n\r\n**Remember to adjust the parameter multithreaded and multicore according with your computational environment.** \r\nExample: If your machine has 8 cores, you should set the parameter on 8.\r\n","organization":"ParslRNA-Seq: an efficient and scalable RNAseq analysis workflow for studies of differentiated gene expression","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/411?version=1","name":"master @ 22ad96e","author":["Lucas Cruz","Luiz Gadelha","Kary Ocaña"],"descriptor_type":[]}]},{"id":"412","url":"https://workflowhub.eu/workflows/412","name":"RNAseq_UMG_SDumont_v1","description":"RNAseq workflow UMG: Here we introduce a scientific workflow implementing several open-source software executed by Galaxy parallel scripting language in an high-performance computing environment. We have applied the workflow to a single-cardiomyocyte RNA-seq data retrieved from Gene Expression Omnibus database. The workflow allows for the analysis (alignment, QC, sort and count reads, statistics generation) of raw RNA-seq data and seamless integration of differential expression results into a configurable script code.\r\n","organization":"ParslRNA-Seq: an efficient and scalable RNAseq analysis workflow for studies of differentiated gene expression","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/412?version=1","name":"Version 1","author":["Kary Ocaña"],"descriptor_type":["GALAXY"]}]},{"id":"413","url":"https://workflowhub.eu/workflows/413","name":"Mothra","description":"Example workflow which allows the use of Mothra\r\n\r\nAccepts (e.g.) [these](https://github.com/machine-shop/mothra-data/tree/main/test_images) input files, bundled as a collection.","organization":"Specimen Data Refinery","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/413?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"414","url":"https://workflowhub.eu/workflows/414","name":"ReMM score","description":"The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations (SNVs and small InDels) in the human genome (GRCh37) in terms of Mendelian diseases. This project updates the ReMM score for the genome build GRCh38 and combines GRCh37 and GRCh38 into one workflow.\r\n\r\n## Pre-requirements\r\n\r\n### Conda\r\nWe use Conda as software and dependency management tool. Conda installation guidelines can be found here:\r\n\r\nhttps://conda.io/projects/conda/en/latest/user-guide/install/index.html\r\n\r\n### Additional programs\r\nThese programs are used during the workflow. They usually need to be compiled, however, the repository already contains the executables or generated files.\r\n\r\n- [AttributeDB](https://github.com/visze/attributedb)\r\n- [Jannovar](https://github.com/charite/jannovar) \r\n- [parSMURF](https://github.com/AnacletoLAB/parSMURF)\r\n\r\n### Snakemake\r\n\r\nThe workflow is managed by Snakemake - a workflow management system used to create reproducible and scalable data analyses. To install Snakemake as well as all other required packages, you need to create a working environment according to the description in the file env/ReMM.yaml. For that, first\r\n\r\nClone the repository\r\n```\r\ngit clone https://github.com/kircherlab/ReMM\r\ncd ReMM\r\n```\r\n\r\nCreate a working environment and activate it\r\n\r\n```\r\nconda env create -n ReMM --file workflow/envs/ReMM.yaml\r\nconda activate ReMM\r\n```\r\n\r\nAll paths are relative to the Snakemake file so you do not need to change any path variables. Additionally, Snakemake creates all missing directories, so no need to create any aditional folders either.\r\n\r\n## Workflow\r\n\r\nThe workflow consists of four main parts:\r\n\r\n- Download of feature data\r\n- Data processing and cleaning\r\n- Model training and validation\r\n- Calculation of ReMM for the whole genome\r\n\r\nThe `workflow` folder contains a graph of the workflow and more detailed information on the most important steps.\r\n\r\nTo launch a snakemake workflow, you need to tell snakemake which file you want to generate. We defined all rules for multiple steps. They can be found here: `workflow/Snakefile`. For example, you want to generate all feature sets defined in a config file you can run:\r\n\r\n```\r\nsnakemake -c1 all_feature_sets\r\n```\r\n\r\nTo execute any step separately (see `README.md` in the `workflow` folder for details on workflow steps), you need to look up the name of the desired output file in the scripts and call Snakemake with the exact name. Using a flag `-n`, you can initiate a 'dry run': Snakemake will check the consistency of all rules and files and show the number of steps. However, a clean dry run does not necessarily mean that no errors will occur during a normal run. ReMM score is not allele-specific so that you get only one score independent of the variant itself. The workflow from the download of data up to computing the scores may take several days or weeks depending on the computing power and internet connection.\r\n\r\n\r\n### The config files\r\n\r\nThe main config file can be found in `config/config.yaml`. This config file was used to generate the ReMM score. Here most of the configuration magic happens. There is a second config file `config/features.yaml` where all features are listed (with additional description). Config files are controled via [json-schema](http://json-schema.org). \r\n\r\nWe also provide a slurm config file for runtimes, memory and number of threads per rule: `config/slurm.yaml`.\r\n","organization":"KircherLab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/414?version=1","name":"master @ b1fbe1e","author":[],"descriptor_type":["SMK"]}]},{"id":"415","url":"https://workflowhub.eu/workflows/415","name":"Gravitational Wave source Cone Search","description":"","organization":"ODA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/415?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"416","url":"https://workflowhub.eu/workflows/416","name":"GEN-ERA toolbox","description":"\r\n# Github: https://github.com/Lcornet/GENERA\r\n\r\n# BCCM GEN-ERA tools repository\r\n\r\nPlease visit the wiki for tutorials and access to the tools:\r\nhttps://github.com/Lcornet/GENERA/wiki  \r\n\r\n# NEWS\r\nMantis is now installed in a singularity container for the Metabolic workflow (install is no longer necessary).  \r\n\r\n# Information about the GEN-ERA project\r\nPlease visit  \r\nhttps://bccm.belspo.be/content/bccm-collections-genomic-era  \r\n\r\n# Publications\r\n1. ToRQuEMaDA: tool for retrieving queried Eubacteria, metadata and dereplicating assemblies.  \r\n   Léonard, R. R., Leleu, M., Vlierberghe, M. V., Cornet, L., Kerff, F., and Baurain, D. (2021).  \r\n   PeerJ 9, e11348. doi:10.7717/peerj.11348.  \r\n   https://peerj.com/articles/11348/  \r\n2. The taxonomy of the Trichophyton rubrum complex: a phylogenomic approach.  \r\n   Cornet, L., D’hooge, E., Magain, N., Stubbe, D., Packeu, A., Baurain, D., and Becker P. (2021).  \r\n   Microbial Genomics 7, 000707. doi:10.1099/mgen.0.000707.  \r\n   https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000707  \r\n3. ORPER: A Workflow for Constrained SSU rRNA Phylogenies.  \r\n   Cornet, L., Ahn, A.-C., Wilmotte, A., and Baurain, D. (2021).  \r\n   Genes 12, 1741. doi:10.3390/genes12111741.  \r\n   https://www.mdpi.com/2073-4425/12/11/1741/html  \r\n4. AMAW: automated gene annotation for non-model eukaryotic genomes.  \r\n   Meunier, L., Baurain, D., Cornet, L. (2021)  \r\n   https://www.biorxiv.org/content/10.1101/2021.12.07.471566v1  \r\n5. Phylogenomic analyses of Snodgrassella isolates from honeybees and bumblebees reveals taxonomic and functional diversity.  \r\n   Cornet, L.,  Cleenwerck, I., Praet, J., Leonard, R., Vereecken, N.J., Michez, D., Smagghe, G., Baurain, D., Vandamme, P. (2021)  \r\n   https://www.biorxiv.org/content/10.1101/2021.12.10.472130v1  \r\n6. Contamination detection in genomic data: more is not enough.   \r\n   Cornet, L \u0026 Baurain, D (2022)   \r\n   Genome Biology. 2022;23:60.  \r\n   https://genomebiology.biomedcentral.com/articles/10.1186/s13059-022-02619-9  \r\n7. The GEN-ERA toolbox: unified and reproducible workflows for research in microbial genomics  \r\n   Cornet, L., Durieu, B., Baert, F., D’hooge, E., Colignon, D., Meunier, L., Lupo, V., Cleenwerck I.,\r\n   Daniel, HM., Rigouts, L., Sirjacobs, D., Declerck, D., Vandamme, P., Wilmotte, A., Baurain, D., Becker P (2022).  \r\n   https://www.biorxiv.org/content/10.1101/2022.10.20.513017v1  \r\n8. CRitical Assessment of genomic COntamination detection at several Taxonomic ranks (CRACOT)    \r\n   Cornet, L., Lupo, V., Declerck, S., Baurain, D. (2022).   \r\n   https://www.biorxiv.org/content/10.1101/2022.11.14.516442v1  \r\n\r\n# Copyright and License\r\n\r\nThis softwares is copyright (c) 2017-2021 by University of Liege / Sciensano / BCCM collection by Luc CORNET\r\nThis is free softwares; you can redistribute it and/or modify.\r\n\r\n![BCCM](https://github.com/Lcornet/GENERA/blob/main/images/GENERA-logo.png)  \r\n","organization":"BCCM_ULC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/416?version=1","name":"main @ fbfc07c","author":[],"descriptor_type":["NFL"]}]},{"id":"417","url":"https://workflowhub.eu/workflows/417","name":"CoVigator pipeline: variant detection pipeline for Sars-CoV-2 (and other viruses...)","description":"![CoVigator logo](images/CoVigator_logo_txt_nobg.png \"CoVigator logo\")\r\n\r\n# CoVigator pipeline: variant detection pipeline for Sars-CoV-2\r\n\r\n[![DOI](https://zenodo.org/badge/374669617.svg)](https://zenodo.org/badge/latestdoi/374669617)\r\n[![Run tests](https://github.com/TRON-Bioinformatics/covigator-ngs-pipeline/actions/workflows/automated_tests.yml/badge.svg?branch=master)](https://github.com/TRON-Bioinformatics/covigator-ngs-pipeline/actions/workflows/automated_tests.yml)\r\n[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-Nextflow-orange.svg?style=flat\u0026colorA=E1523D\u0026colorB=007D8A)](https://www.nextflow.io/)\r\n[![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT)\r\n\r\n\r\n\r\nThe Covigator pipeline processes SARS-CoV-2 FASTQ or FASTA files into annotated and normalized analysis ready VCF files.\r\nIt also classifies samples into lineages using pangolin.\r\nThe pipeline is implemented in the Nextflow framework (Di Tommaso, 2017), it is a stand-alone pipeline that can be\r\nused independently of the CoVigator dashboard and knowledge base.\r\n\r\nAlthough it is configured by default for SARS-CoV-2 it can be employed for the analysis of other microbial organisms \r\nif the required references are provided.\r\n\r\nThe result of the pipeline is one or more annotated VCFs with the list of SNVs and indels ready for analysis.\r\n\r\nThe results from the CoVigator pipeline populate our CoVigator dashboard [https://covigator.tron-mainz.de](https://covigator.tron-mainz.de) \r\n\r\n**Table of Contents**\r\n\r\n1. [Two pipelines in one](#id1)\r\n2. [Implementation](#id2)\r\n3. [How to run](#id3)\r\n4. [Understanding the output](#id4)\r\n6. [Annotation resources](#id5)\r\n7. [Future work](#id6)\r\n8. [Bibliography](#id7)\r\n\r\n\r\n## Two pipelines in one\r\n\r\nIn CoVigator we analyse samples from two different formats, FASTQ files (e.g.: as provided by the European Nucleotide \r\nArchive) and FASTA files containing a consensus assembly. While from the first we get the raw reads, \r\nfrom the second we obtain already assembled genomes. Each of these formats has to be \r\nanalysed differently. Also, the output data that we can obtain from each of these is different.\r\n\r\n![CoVigator pipeline](images/pipeline.drawio.png)\r\n\r\n### Pipeline for FASTQ files\r\n\r\nWhen FASTQ files are provided the pipeline includes the following steps:\r\n- **Trimming**. `fastp` is used to trim reads with default values. This step also includes QC filtering.\r\n- **Alignment**. `BWA mem 2` is used for the alignment of single or paired end samples.\r\n- **BAM preprocessing**. BAM files are prepared and duplicate reads are marked using GATK and Sambamba tools.\r\n- **Primer trimming**. When a BED with primers is provided, these are trimmed from the reads using iVar. This is applicable to the results from all variant callers.\r\n- **Coverage analysis**. `samtools coverage` and `samtools depth` are used to compute the horizontal and vertical \r\n  coverage respectively.\r\n- **Variant calling**. Four different variant callers are employed: BCFtools, LoFreq, iVar and GATK. \r\n  Subsequent processing of resulting VCF files is independent for each caller.\r\n- **Variant normalization**. `bcftools norm` is employed to left align indels, trim variant calls and remove variant duplicates.\r\n- **Technical annotation**. `VAFator` is employed to add VAF and coverage annotations from the reads pileup.\r\n- **Phasing**. Clonal mutations (ie: VAF \u003e= 0.8) occurring in the same amino acid are merged for its correct functional annotation.\r\n- **Biological annotation**. `SnpEff` is employed to annotate the variant consequences of variants and\r\n  `bcftools annotate` is employed to add additional SARS-CoV-2 annotations.\r\n- **Lineage determination**. `pangolin` is used for this purpose, this runs over the results from each of the variant callers separately.\r\n\r\nBoth single end and paired end FASTQ files are supported.\r\n\r\n### Pipeline for FASTA files\r\n\r\nWhen a FASTA file is provided with a single assembly sequence the pipeline includes the following steps:\r\n- **Variant calling**. A Smith-Waterman global alignment is performed against the reference sequence to call SNVs and \r\n  indels. Indels longer than 50 bp and at the beginning or end of the assembly sequence are excluded. Any mutation where\r\n  either reference or assembly contain an N is excluded.\r\n- **Variant normalization**. Same as described above.\r\n- **Phasing**. mutations occurring in the same amino acid are merged for its correct annotation.\r\n- **Biological annotation**. Same as described above.\r\n- **Lineage determination**. `pangolin` is used for this purpose.\r\n\r\nThe FASTA file is expected to contain a single assembly sequence. \r\nBear in mind that only clonal variants can be called on the assembly.\r\n\r\n### Pipeline for VCF files\r\n\r\nWhen a VCF file is provided the pipeline includes the following steps:\r\n- **Variant normalization**. Same as described above.\r\n- **Technical annotation**. Same as described above (optional if BAM is provided)\r\n- **Phasing**. mutations occurring in the same amino acid are merged for its correct annotation.\r\n- **Biological annotation**. Same as described above\r\n- **Lineage determination**. `pangolin` is used for this purpose.\r\n\r\n## Implementation\r\n\r\nThe pipeline is implemented as a Nextflow workflow with its DSL2 syntax.\r\nThe dependencies are managed through a conda environment to ensure version traceability and reproducibility.\r\nThe references for SARS-CoV-2 are embedded in the pipeline.\r\nThe pipeline is based on a number of third-party tools, plus a custom implementation based on biopython (Cock, 2009) \r\nfor the alignment and subsequent variant calling over a FASTA file.\r\n\r\nAll code is open sourced in GitHub [https://github.com/TRON-Bioinformatics/covigator-ngs-pipeline](https://github.com/TRON-Bioinformatics/covigator-ngs-pipeline)\r\nand made available under the MIT license. We welcome any contribution. \r\nIf you have troubles using the CoVigator pipeline or you find an issue, we will be thankful if you would report a ticket \r\nin GitHub.\r\n\r\nThe alignment, BAM preprocessing and variant normalization pipelines are based on the implementations in additional \r\nNextflow pipelines within the TronFlow initiative [https://tronflow-docs.readthedocs.io/](https://tronflow-docs.readthedocs.io/). \r\n\r\n\r\n### Variant annotations\r\n\r\nThe variants derived from a FASTQ file are annotated on the `FILTER` column using the VAFator \r\n(https://github.com/TRON-Bioinformatics/vafator) variant allele frequency \r\n(VAF) into `LOW_FREQUENCY`, `SUBCLONAL`, `LOW_QUALITY_CLONAL` and finally `PASS` variants correspond to clonal variants. \r\nBy default, variants with a VAF \u003c 2 % are considered `LOW_FREQUENCY`, variants with a VAF \u003e= 2 % and \u003c 50 % are \r\nconsidered `SUBCLONAL` and variants with a VAF \u003e= 50 % and \u003c 80 % are considered `LOW_QUALITY_CLONAL`. \r\nThis thresholds can be changed with the parameters `--low_frequency_variant_threshold`,\r\n`--subclonal_variant_threshold` and `--low_quality_clonal_variant_threshold` respectively.\r\n\r\nVAFator technical annotations:\r\n\r\n- `INFO/vafator_af`: variant allele frequency of the mutation \r\n- `INFO/vafator_ac`: number of reads supporting the mutation \r\n- `INFO/vafator_dp`: total number of reads at the position, in the case of indels this represents the number of reads in the previous position\r\n\r\nSnpEff provides the functional annotations. And all mutations are additionally annotated with the following SARS-CoV-2 specific annotations:\r\n- ConsHMM conservation scores as reported in (Kwon, 2021)\r\n- Pfam domains as reported in Ensemble annotations.\r\n\r\nBiological annotations: \r\n\r\n- `INFO/ANN` are the SnpEff consequence annotations (eg: overlapping gene, effect of the mutation). \r\nThis are described in detail here [http://pcingola.github.io/SnpEff/se_inputoutput/](http://pcingola.github.io/SnpEff/se_inputoutput/) \r\n- `INFO/CONS_HMM_SARS_COV_2` is the ConsHMM conservation score in SARS-CoV-2\r\n- `INFO/CONS_HMM_SARBECOVIRUS` is the ConsHMM conservation score among Sarbecovirus\r\n- `INFO/CONS_HMM_VERTEBRATE_COV` is the ConsHMM conservation score among vertebrate Corona virus\r\n- `INFO/PFAM_NAME` is the Interpro name for the overlapping Pfam domains\r\n- `INFO/PFAM_DESCRIPTION` is the Interpro description for the overlapping Pfam domains\r\n- `INFO/problematic` contains the filter provided in DeMaio et al. (2020) for problematic mutations\r\n\r\nAccording to DeMaio et al. (2020), mutations at the beginning (ie: POS \u003c= 50) and end (ie: POS \u003e= 29,804) of the \r\ngenome are filtered out\r\n\r\nThis is an example of biological annotations of a missense mutation in the spike protein on the N-terminal subunit 1 domain.\r\n```\r\nANN=A|missense_variant|MODERATE|S|gene-GU280_gp02|transcript|TRANSCRIPT_gene-GU280_gp02|protein_coding|1/1|c.118G\u003eA|\r\np.D40N|118/3822|118/3822|40/1273||;CONS_HMM_SARS_COV_2=0.57215;CONS_HMM_SARBECOVIRUS=0.57215;CONS_HMM_VERTEBRATE_COV=0;\r\nPFAM_NAME=bCoV_S1_N;PFAM_DESCRIPTION=Betacoronavirus-like spike glycoprotein S1, N-terminal\r\n```\r\n\r\n\r\n### Phasing limitations\r\n\r\nThe phasing implementation is applicable only to clonal mutations. It assumes all clonal mutations are in phase and \r\nhence it merges those occurring in the same amino acid.\r\nIn order to phase intrahost mutations we would need to implement a read-backed phasing approach such as in WhatsHap \r\nor GATK's ReadBackedPhasing. Unfortunately these tools do not support the scenario of a haploid organism with an\r\nundefined number of subclones.\r\nFor this reason, phasing is implemented with custom Python code at `bin/phasing.py`.\r\n\r\n### Primers trimming\r\n\r\nWith some library preparation protocols such as ARTIC it is recommended to trim the primers from the reads.\r\nWe have observed that if primers are not trimmed spurious mutations are being called specially SNVs with lower frequencies and long deletions.\r\nAlso the variant allele frequencies of clonal mutations are underestimated.\r\n\r\nThe BED files containing the primers for each ARTIC version can be found at https://github.com/artic-network/artic-ncov2019/tree/master/primer_schemes/nCoV-2019.\r\n\r\nIf the adequate BED file is provided to the CoVigator pipeline with `--primers` the primers will be trimmed with iVar. \r\nThis affects the output of every variant caller, not only iVar.\r\n\r\n### Reference data\r\n\r\nThe default SARS-CoV-2 reference files correspond to Sars_cov_2.ASM985889v3 and were downloaded from Ensembl servers.\r\nNo additional parameter needs to be provided to use the default SARS-CoV-2 reference genome.\r\n\r\n#### Using a custom reference genome\r\n\r\nThese references can be customised to use a different SARS-CoV-2 reference or to analyse a different virus.\r\nTwo files need to be provided:\r\n- Use a custom reference genome by providing the parameter `--reference your.fasta`.\r\n- Gene annotation file in GFFv3 format `--gff your.gff`. This is only required to run iVar\r\n\r\nAdditionally, the FASTA needs bwa indexes, .fai index and a .dict index.\r\nThese indexes can be generated with the following two commands:\r\n```\r\nbwa index reference.fasta\r\nsamtools faidx reference.fasta\r\ngatk CreateSequenceDictionary --REFERENCE your.fasta\r\n```\r\n\r\n**NOTE**: beware that for Nextflow to find these indices the reference needs to be passed as an absolute path.\r\n\r\nThe SARS-CoV-2 specific annotations will be skipped when using a custom genome.\r\n\r\nIn order to have SnpEff functional annotations available you will also need to provide three parameters:\r\n- `--snpeff_organism`: organism to annotate with SnpEff (ie: as registered in SnpEff)\r\n- `--snpeff_data`: path to the SnpEff data folder\r\n- `--snpeff_config`: path to the SnpEff config file\r\n\r\n### Intrahost mutations\r\n\r\nSome mutations may be observed in a subset of the virus sample, this may arise through intrahost virus evolution or\r\nco-infection. Intrahost mutations can only be detected when analysing the raw reads (ie: the FASTQs) \r\nas in the assembly (ie: the FASTA file) a single virus consensus sequence is represented. \r\nBCFtools and GATK do not normally capture intrahost mutations; on the other hand LoFreq and iVar both capture\r\nmutations that deviate from a clonal-like VAF. \r\nNevertheless, mutations with lower variant allele frequency (VAF) are challenging to distinguish from sequencing and\r\nanalytical errors.  \r\n\r\nMutations are annotated on the `FILTER` column using the VAF into three categories: \r\n- `LOW_FREQUENCY`: subset of intrahost mutations with lowest frequencies, potentially enriched with false positive calls (VAF \u003c 2 %).\r\n- `SUBCLONAL`: subset of intrahost mutations with higher frequencies (2 % \u003c= VAF \u003c 50 %).\r\n- `LOW_QUALITY_CLONAL`: subset of clonal mutations with lower frequencies (50 % \u003c= VAF \u003c 80 %).\r\n- `PASS` clonal mutations (VAF \u003e= 80 %)\r\n\r\nOther low quality mutations are removed from the output.\r\n\r\nThe VAF thresholds can be changed with the parameters `--low_frequency_variant_threshold`,\r\n`--subclonal_variant_threshold` and `--low_quality_clonal_variant_threshold`.\r\n\r\n## How to run\r\n\r\n### Requirements\r\n\r\n- Nextflow \u003e= 19.10.0\r\n- Java \u003e= 8\r\n- Conda \u003e=4.9\r\n\r\n### Testing\r\n\r\nTo run the workflow on a test assembly dataset run:\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda,test_fasta\r\n```\r\n\r\nFind the output in the folder `covigator_test_fasta`.\r\n\r\nTo run the workflow on a test raw reads dataset run:\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda,test_fastq\r\n```\r\n\r\nFind the output in the folder `covigator_test_fastq`.\r\n\r\nThe above commands are useful to create the conda environments beforehand.\r\n\r\n**NOTE**: pangolin is the most time-consuming step of the whole pipeline. To make it faster, locate the conda \r\nenvironment that Nextflow created with pangolin (eg: `find $YOUR_NEXTFOW_CONDA_ENVS_FOLDER -name pangolin`) and run\r\n`pangolin --decompress-model`.\r\n\r\n### Running\r\n\r\nFor paired end reads:\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline \\\r\n[-r v0.10.0] \\\r\n[-profile conda] \\\r\n--fastq1 \u003cFASTQ_FILE\u003e \\\r\n--fastq2 \u003cFASTQ_FILE\u003e \\\r\n--name example_run \\\r\n--output \u003cOUTPUT_FOLDER\u003e \\\r\n[--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] \\\r\n[--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\n\r\nFor single end reads:\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline \\\r\n[-r v0.10.0] \\\r\n[-profile conda] \\\r\n--fastq1 \u003cFASTQ_FILE\u003e \\\r\n--name example_run \\\r\n--output \u003cOUTPUT_FOLDER\u003e \\\r\n[--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] \\\r\n[--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\n\r\nFor assembly:\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline \\\r\n[-r v0.10.0] \\\r\n[-profile conda] \\\r\n--fasta \u003cFASTA_FILE\u003e \\\r\n--name example_run \\\r\n--output \u003cOUTPUT_FOLDER\u003e \\\r\n[--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] \\\r\n[--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\n\r\nFor VCF:\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline \\\r\n[-r v0.10.0] \\\r\n[-profile conda] \\\r\n--vcf \u003cVCF_FILE\u003e \\\r\n--name example_run \\\r\n--output \u003cOUTPUT_FOLDER\u003e \\\r\n[--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] \\\r\n[--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\n\r\nAs an optional input when processing directly VCF files you can provide BAM files to annotate VAFs:\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline \\\r\n[-r v0.10.0] \\\r\n[-profile conda] \\\r\n--vcf \u003cVCF_FILE\u003e \\\r\n--bam \u003cBAM_FILE\u003e \\\r\n--bai \u003cBAI_FILE\u003e \\\r\n--name example_run \\\r\n--output \u003cOUTPUT_FOLDER\u003e \\\r\n[--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] \\\r\n[--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\n\r\nFor batch processing of reads use `--input_fastqs_list` and `--name`.\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline [-profile conda] --input_fastqs_list \u003cTSV_FILE\u003e --library \u003cpaired|single\u003e --output \u003cOUTPUT_FOLDER\u003e [--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] [--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\nwhere the TSV file contains two or three columns tab-separated columns **without header**. Columns: sample name, path to FASTQ 1 and optionally path to FASTQ 2. \r\n\r\n| Sample    | FASTQ 1                       | FASTQ 2 (optional column)     |\r\n|-----------|-------------------------------|-------------------------------|\r\n| sample1   | /path/to/sample1_fastq1.fastq | /path/to/sample1_fastq2.fastq |\r\n| sample2   | /path/to/sample2_fastq1.fastq | /path/to/sample2_fastq2.fastq |\r\n| ...       | ...                           | ...                           |\r\n\r\n\r\nFor batch processing of assemblies use `--input_fastas_list`.\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline [-profile conda] --input_fastas_list \u003cTSV_FILE\u003e --library \u003cpaired|single\u003e --output \u003cOUTPUT_FOLDER\u003e [--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] [--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\nwhere the TSV file contains two columns tab-separated columns **without header**. Columns: sample name and path to FASTA.\r\n\r\n| Sample    | FASTA                  | \r\n|-----------|------------------------|\r\n| sample1   | /path/to/sample1.fasta |\r\n| sample2   | /path/to/sample2.fasta |\r\n| ...       | ...                    |\r\n\r\nFor batch processing of VCFs use `--input_vcfs_list`.\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline [-profile conda] --input_vcfs_list \u003cTSV_FILE\u003e --output \u003cOUTPUT_FOLDER\u003e [--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] [--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\nwhere the TSV file contains two columns tab-separated columns **without header**. Columns: sample name and path to VCF.\r\n\r\n| Sample    | FASTA                  |\r\n|-----------|------------------------|\r\n| sample1   | /path/to/sample1.vcf |\r\n| sample2   | /path/to/sample2.vcf |\r\n| ...       | ...                    |\r\n\r\nOptionally, provide BAM files for batch processing of VCFs using `--input_bams_list`.\r\n```\r\nnextflow run tron-bioinformatics/covigator-ngs-pipeline [-profile conda] \\\r\n  --input_vcfs_list \u003cTSV_FILE\u003e \\\r\n  --input_bams_list \u003cTSV_FILE\u003e \\\r\n  --output \u003cOUTPUT_FOLDER\u003e \\\r\n  [--reference \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.fa] \\\r\n  [--gff \u003cpath_to_reference\u003e/Sars_cov_2.ASM985889v3.gff3]\r\n```\r\nwhere the BAMs TSV file contains three columns tab-separated columns **without header**. Columns: sample name, \r\npath to BAM and path to BAI.\r\n\r\n| Sample    | BAM                  | BAI                  |\r\n|-----------|----------------------|----------------------|\r\n| sample1   | /path/to/sample1.bam | /path/to/sample1.bai |\r\n| sample2   | /path/to/sample2.bam | /path/to/sample2.bai |\r\n| ...       | ...                  | ...                  |\r\n\r\n\r\n\r\n### Getting help\r\n\r\nYou can always contact us directly or create a GitHub issue, otherwise see all available options using `--help`:\r\n```\r\n$ nextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda --help\r\n\r\nUsage:\r\n    nextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda --help\r\n\r\nInput:\r\n    * --fastq1: the first input FASTQ file (not compatible with --fasta, nor --vcf)\r\n    * --fasta: the FASTA file containing the assembly sequence (not compatible with --fastq1, nor --vcf)\r\n    * --vcf: the VCF file containing mutations to analyze (not compatible with --fastq1, nor --fasta)\r\n    * --bam: the BAM file containing reads to annotate VAFs on a VCF (not compatible with --fastq1, nor --fasta)\r\n    * --bai: the BAI index for a BAM file (not compatible with --fastq1, nor --fasta)\r\n    * --name: the sample name, output files will be named after this name\r\n    * --output: the folder where to publish output\r\n    * --input_fastqs_list: alternative to --name and --fastq1 for batch processing\r\n    * --library: required only when using --input_fastqs\r\n    * --input_fastas_list: alternative to --name and --fasta for batch processing\r\n    * --input_vcfs_list: alternative to --name and --vcf for batch processing\r\n    * --input_bams_list: alternative to --name, --vcf, --bam and --bai for batch processing\r\n\r\nOptional input only required to use a custom reference:\r\n    * --reference: the reference genome FASTA file, *.fai, *.dict and bwa indexes are required.\r\n    * --gff: the GFFv3 gene annotations file (required to run iVar and to phase mutations from all variant callers)    \r\n    * --snpeff_data: path to the SnpEff data folder, it will be useful to use the pipeline on other virus than SARS-CoV-2\r\n    * --snpeff_config: path to the SnpEff config file, it will be useful to use the pipeline on other virus than SARS-CoV-2\r\n    * --snpeff_organism: organism to annotate with SnpEff, it will be useful to use the pipeline on other virus than SARS-CoV-2\r\n\r\nOptional input:\r\n    * --fastq2: the second input FASTQ file\r\n    * --primers: a BED file containing the primers used during library preparation. If provided primers are trimmed from the reads.\r\n    * --min_base_quality: minimum base call quality to take a base into account for variant calling (default: 20)\r\n    * --min_mapping_quality: minimum mapping quality to take a read into account for variant calling (default: 20)\r\n    * --vafator_min_base_quality: minimum base call quality to take a base into account for VAF annotation (default: 0)\r\n    * --vafator_min_mapping_quality: minimum mapping quality to take a read into account for VAF annotation (default: 0)\r\n    * --low_frequency_variant_threshold: VAF threshold to mark a variant as low frequency (default: 0.02)\r\n    * --subclonal_variant_threshold: VAF superior threshold to mark a variant as subclonal  (default: 0.5)\r\n    * --lq_clonal_variant_threshold: VAF superior threshold to mark a variant as loq quality clonal (default: 0.8)\r\n    * --memory: the ammount of memory used by each job (default: 3g)\r\n    * --cpus: the number of CPUs used by each job (default: 1)\r\n    * --skip_lofreq: skips calling variants with LoFreq\r\n    * --skip_gatk: skips calling variants with GATK\r\n    * --skip_bcftools: skips calling variants with BCFTools\r\n    * --skip_ivar: skips calling variants with iVar\r\n    * --skip_pangolin: skips lineage determination with pangolin\r\n    * --match_score: global alignment match score, only applicable for assemblies (default: 2)\r\n    * --mismatch_score: global alignment mismatch score, only applicable for assemblies (default: -1)\r\n    * --open_gap_score: global alignment open gap score, only applicable for assemblies (default: -3)\r\n    * --extend_gap_score: global alignment extend gap score, only applicable for assemblies (default: -0.1)\r\n    * --skip_sarscov2_annotations: skip some of the SARS-CoV-2 specific annotations (default: false)\r\n    * --keep_intermediate: keep intermediate files (ie: BAM files and intermediate VCF files)\r\n    * --args_bcftools_mpileup: additional arguments for bcftools mpileup command (eg: --args_bcftools_mpileup='--ignore-overlaps')\r\n    * --args_bcftools_call: additional arguments for bcftools call command (eg: --args_bcftools_call='--something')\r\n    * --args_lofreq: additional arguments for lofreq command (eg: --args_lofreq='--something')\r\n    * --args_gatk: additional arguments for gatk command (eg: --args_gatk='--something')\r\n    * --args_ivar_samtools: additional arguments for ivar samtools mpileup command (eg: --args_ivar_samtools='--ignore-overlaps')\r\n    * --args_ivar: additional arguments for ivar command (eg: --args_ivar='--something')\r\n\r\nOutput:\r\n    * Output a VCF file for each of BCFtools, GATK, LoFreq and iVar when FASTQ files are\r\n    provided or a single VCF obtained from a global alignment when a FASTA file is provided.\r\n    * A pangolin results file for each of the VCF files.\r\n    * Only when FASTQs are provided:\r\n      * FASTP statistics\r\n      * Depth and breadth of coverage analysis results\r\n      \r\n```\r\n\r\n## Understanding the output\r\n\r\nAlthough the VCFs are normalized for both pipelines, the FASTQ pipeline runs four variant callers, while the FASTA\r\npipeline runs a single variant caller. Also, there are several metrics in the FASTQ pipeline that are not present\r\nin the output of the FASTA pipeline. Here we will describe these outputs.\r\n\r\n### FASTQ pipeline output\r\n\r\nFind in the table below a description of each of the expected files and a link to a sample file for the FASTQ pipeline.\r\nThe VCF files will be described in more detail later.\r\n\r\n| Name                            | Description                                                    | Sample file                                                                                                                                       |\r\n|---------------------------------|----------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|\r\n| $NAME.fastp_stats.json          | Output metrics of the fastp trimming process in JSON format    | [ERR4145453.fastp_stats.json](_static/covigator_pipeline_sample_output_reads/ERR4145453.fastp_stats.json)                                         |\r\n| $NAME.fastp_stats.html          | Output metrics of the fastp trimming process in HTML format    | [ERR4145453.fastp_stats.html](_static/covigator_pipeline_sample_output_reads/ERR4145453.fastp_stats.html)                                         |\r\n| $NAME.deduplication_metrics.txt | Deduplication metrics                                          | [ERR4145453.deduplication_metrics.txt](_static/covigator_pipeline_sample_output_reads/ERR4145453.deduplication_metrics.txt)                       |\r\n| $NAME.coverage.tsv              | Coverage metrics (eg: mean depth, % horizontal coverage)       | [ERR4145453.coverage.tsv](_static/covigator_pipeline_sample_output_reads/ERR4145453.coverage.tsv)                                                 |\r\n| $NAME.depth.tsv                 | Depth of coverage per position                                 | [ERR4145453.depth.tsv](_static/covigator_pipeline_sample_output_reads/ERR4145453.depth.tsv)                                                       |\r\n| $NAME.bcftools.vcf.gz           | Bgzipped, tabix-indexed and annotated output VCF from BCFtools | [ERR4145453.bcftools.normalized.annotated.vcf.gz](_static/covigator_pipeline_sample_output_reads/ERR4145453.bcftools.normalized.annotated.vcf.gz) |\r\n| $NAME.gatk.vcf.gz               | Bgzipped, tabix-indexed and annotated output VCF from GATK     | [ERR4145453.gatk.normalized.annotated.vcf.gz](_static/covigator_pipeline_sample_output_reads/ERR4145453.gatk.normalized.annotated.vcf.gz)         |\r\n| $NAME.lofreq.vcf.gz             | Bgzipped, tabix-indexed and annotated output VCF from LoFreq   | [ERR4145453.lofreq.normalized.annotated.vcf.gz](_static/covigator_pipeline_sample_output_reads/ERR4145453.lofreq.normalized.annotated.vcf.gz)     |\r\n| $NAME.ivar.vcf.gz               | Bgzipped, tabix-indexed and annotated output VCF from LoFreq   | [ERR4145453.ivar.tsv](_static/covigator_pipeline_sample_output_reads/ERR4145453.ivar.tsv)                                                         |\r\n| $NAME.lofreq.pangolin.csv       | Pangolin CSV output file derived from LoFreq mutations         | [ERR4145453.lofreq.pangolin.csv](_static/covigator_pipeline_sample_output_reads/ERR4145453.lofreq.pangolin.csv)                                              |\r\n\r\n\r\n### FASTA pipeline output\r\n\r\nThe FASTA pipeline returns a single VCF file. The VCF files will be described in more detail later.\r\n\r\n| Name                        | Description                                                  | Sample file                                                                                          |\r\n|-----------------------------|--------------------------------------------------------------|------------------------------------------------------------------------------------------------------|\r\n| $NAME.assembly.vcf.gz | Bgzipped, tabix-indexed and annotated output VCF | [ERR4145453.assembly.normalized.annotated.vcf.gz](_static/covigator_pipeline_sample_output_assembly/hCoV-19_NTXX.assembly.normalized.annotated.vcf.gz) |\r\n\r\n\r\n## Annotations resources\r\n\r\nSARS-CoV-2 ASM985889v3 references were downloaded from Ensembl on 6th of October 2020:\r\n- ftp://ftp.ensemblgenomes.org/pub/viruses/fasta/sars_cov_2/dna/Sars_cov_2.ASM985889v3.dna.toplevel.fa.gz\r\n- ftp://ftp.ensemblgenomes.org/pub/viruses/gff3/sars_cov_2/Sars_cov_2.ASM985889v3.101.gff3.gz\r\n\r\nConsHMM mutation depletion scores downloaded on 1st of July 2021:\r\n- https://github.com/ernstlab/ConsHMM_CoV/blob/master/wuhCor1.mutDepletionConsHMM.bed\r\n- https://github.com/ernstlab/ConsHMM_CoV/blob/master/wuhCor1.mutDepletionSarbecovirusConsHMM.bed\r\n- https://github.com/ernstlab/ConsHMM_CoV/blob/master/wuhCor1.mutDepletionVertebrateCoVConsHMM.bed\r\n\r\nGene annotations including Pfam domains downloaded from Ensembl on 25th of February 2021 from:\r\n- ftp://ftp.ensemblgenomes.org/pub/viruses/json/sars_cov_2/sars_cov_2.json\r\n\r\n\r\n## Future work\r\n\r\n- Primer trimming on an arbitrary sequencing library.\r\n- Pipeline for Oxford Nanopore technology.\r\n- Variant calls from assemblies contain an abnormally high number of deletions of size greater than 3 bp. This\r\nis a technical artifact that would need to be avoided.\r\n\r\n## Bibliography\r\n\r\n- Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., \u0026 Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316–319. https://doi.org/10.1038/nbt.3820\r\n- Vasimuddin Md, Sanchit Misra, Heng Li, Srinivas Aluru. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE Parallel and Distributed Processing Symposium (IPDPS), 2019.\r\n- Adrian Tan, Gonçalo R. Abecasis and Hyun Min Kang. Unified Representation of Genetic Variants. Bioinformatics (2015) 31(13): 2202-2204](http://bioinformatics.oxfordjournals.org/content/31/13/2202) and uses bcftools [Li, H. (2011). A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics (Oxford, England), 27(21), 2987–2993. 10.1093/bioinformatics/btr509\r\n- Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819.\r\n- Van der Auwera GA, Carneiro M, Hartl C, Poplin R, del Angel G, Levy-Moonshine A, Jordan T, Shakir K, Roazen D, Thibault J, Banks E, Garimella K, Altshuler D, Gabriel S, DePristo M. (2013). From FastQ Data to High-Confidence Variant Calls: The Genome Analysis Toolkit Best Practices Pipeline. Curr Protoc Bioinformatics, 43:11.10.1-11.10.33. DOI: 10.1002/0471250953.bi1110s43.\r\n- Martin, M., Patterson, M., Garg, S., O Fischer, S., Pisanti, N., Klau, G., Schöenhuth, A., \u0026 Marschall, T. (2016). WhatsHap: fast and accurate read-based phasing. BioRxiv, 085050. https://doi.org/10.1101/085050\r\n- Danecek, P., \u0026 McCarthy, S. A. (2017). BCFtools/csq: haplotype-aware variant consequences. Bioinformatics, 33(13), 2037–2039. https://doi.org/10.1093/bioinformatics/btx100\r\n- Wilm, A., Aw, P. P. K., Bertrand, D., Yeo, G. H. T., Ong, S. H., Wong, C. H., Khor, C. C., Petric, R., Hibberd, M. L., \u0026 Nagarajan, N. (2012). LoFreq: A sequence-quality aware, ultra-sensitive variant caller for uncovering cell-population heterogeneity from high-throughput sequencing datasets. Nucleic Acids Research, 40(22), 11189–11201. https://doi.org/10.1093/nar/gks918\r\n- Grubaugh, N. D., Gangavarapu, K., Quick, J., Matteson, N. L., De Jesus, J. G., Main, B. J., Tan, A. L., Paul, L. M., Brackney, D. E., Grewal, S., Gurfield, N., Van Rompay, K. K. A., Isern, S., Michael, S. F., Coffey, L. L., Loman, N. J., \u0026 Andersen, K. G. (2019). An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biology, 20(1), 8. https://doi.org/10.1186/s13059-018-1618-7\r\n- Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu; fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 1 September 2018, Pages i884–i890, https://doi.org/10.1093/bioinformatics/bty560\r\n- Kwon, S. Bin, \u0026 Ernst, J. (2021). Single-nucleotide conservation state annotation of the SARS-CoV-2 genome. Communications Biology, 4(1), 1–11. https://doi.org/10.1038/s42003-021-02231-w\r\n- Cock, P. J., Antao, T., Chang, J. T., Chapman, B. A., Cox, C. J., Dalke, A., et al. (2009). Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics, 25(11), 1422–1423.\r\n- Artem Tarasov, Albert J. Vilella, Edwin Cuppen, Isaac J. Nijman, Pjotr Prins, Sambamba: fast processing of NGS alignment formats, Bioinformatics, Volume 31, Issue 12, 15 June 2015, Pages 2032–2034, https://doi.org/10.1093/bioinformatics/btv098\r\n","organization":"TRON gGmbH","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/417?version=1","name":"master @ 3c15c95","author":["Pablo Riesgo Ferreiro"],"descriptor_type":["NFL"]}]},{"id":"418","url":"https://workflowhub.eu/workflows/418","name":"TronFlow alignment pipeline","description":"# TronFlow alignment pipeline\r\n\r\n![GitHub tag (latest SemVer)](https://img.shields.io/github/v/release/tron-bioinformatics/tronflow-bwa?sort=semver)\r\n[![Run tests](https://github.com/TRON-Bioinformatics/tronflow-bwa/actions/workflows/automated_tests.yml/badge.svg?branch=master)](https://github.com/TRON-Bioinformatics/tronflow-bwa/actions/workflows/automated_tests.yml)\r\n[![DOI](https://zenodo.org/badge/327943420.svg)](https://zenodo.org/badge/latestdoi/327943420)\r\n[![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT)\r\n[![Powered by Nextflow](https://img.shields.io/badge/powered%20by-Nextflow-orange.svg?style=flat\u0026colorA=E1523D\u0026colorB=007D8A)](https://www.nextflow.io/)\r\n\r\nThe TronFlow alignment pipeline is part of a collection of computational workflows for tumor-normal pair \r\nsomatic variant calling.\r\n\r\nFind the documentation here [![Documentation Status](https://readthedocs.org/projects/tronflow-docs/badge/?version=latest)](https://tronflow-docs.readthedocs.io/en/latest/?badge=latest)\r\n\r\nThis pipeline aligns paired and single end FASTQ files with BWA aln and mem algorithms and with BWA mem 2.\r\nFor RNA-seq STAR is also supported. To increase sensitivity of novel junctions use `--star_two_pass_mode` (recommended for RNAseq variant calling).\r\nIt also includes an initial step of read trimming using FASTP.\r\n\r\n\r\n## How to run it\r\n\r\nRun it from GitHub as follows:\r\n```\r\nnextflow run tron-bioinformatics/tronflow-alignment -profile conda --input_files $input --output $output --algorithm aln --library paired\r\n```\r\n\r\nOtherwise download the project and run as follows:\r\n```\r\nnextflow main.nf -profile conda --input_files $input --output $output --algorithm aln --library paired\r\n```\r\n\r\nFind the help as follows:\r\n```\r\n$ nextflow run tron-bioinformatics/tronflow-alignment  --help\r\nN E X T F L O W  ~  version 19.07.0\r\nLaunching `main.nf` [intergalactic_shannon] - revision: e707c77d7b\r\n\r\nUsage:\r\n    nextflow main.nf --input_files input_files [--reference reference.fasta]\r\n\r\nInput:\r\n    * input_fastq1: the path to a FASTQ file (incompatible with --input_files)\r\n    * input_files: the path to a tab-separated values file containing in each row the sample name and two paired FASTQs (incompatible with --fastq1 and --fastq2)\r\n    when `--library paired`, or a single FASTQ file when `--library single`\r\n    Example input file:\r\n    name1\tfastq1.1\tfastq1.2\r\n    name2\tfastq2.1\tfastq2.2\r\n    * reference: path to the indexed FASTA genome reference or the star reference folder in case of using star\r\n\r\nOptional input:\r\n    * input_fastq2: the path to a second FASTQ file (incompatible with --input_files, incompatible with --library paired)\r\n    * output: the folder where to publish output (default: output)\r\n    * algorithm: determines the BWA algorithm, either `aln`, `mem`, `mem2` or `star` (default `aln`)\r\n    * library: determines whether the sequencing library is paired or single end, either `paired` or `single` (default `paired`)\r\n    * cpus: determines the number of CPUs for each job, with the exception of bwa sampe and samse steps which are not parallelized (default: 8)\r\n    * memory: determines the memory required by each job (default: 32g)\r\n    * inception: if enabled it uses an inception, only valid for BWA aln, it requires a fast file system such as flash (default: false)\r\n    * skip_trimming: skips the read trimming step\r\n    * star_two_pass_mode: activates STAR two-pass mode, increasing sensitivity of novel junction discovery, recommended for RNA variant calling (default: false)\r\n    * additional_args: additional alignment arguments, only effective in BWA mem, BWA mem 2 and STAR (default: none) \r\n\r\nOutput:\r\n    * A BAM file \\${name}.bam and its index\r\n    * FASTP read trimming stats report in HTML format \\${name.fastp_stats.html}\r\n    * FASTP read trimming stats report in JSON format \\${name.fastp_stats.json}\r\n```\r\n\r\n### Input tables\r\n\r\nThe table with FASTQ files expects two tab-separated columns without a header\r\n\r\n| Sample name          | FASTQ 1                      | FASTQ 2                  |\r\n|----------------------|---------------------------------|------------------------------|\r\n| sample_1             | /path/to/sample_1.1.fastq      |    /path/to/sample_1.2.fastq   |\r\n| sample_2             | /path/to/sample_2.1.fastq      |    /path/to/sample_2.2.fastq   |\r\n\r\n\r\n### Reference genome\r\n\r\nThe reference genome has to be provided in FASTA format and it requires two set of indexes:\r\n* FAI index. Create with `samtools faidx your.fasta`\r\n* BWA indexes. Create with `bwa index your.fasta`\r\n\r\nFor bwa-mem2 a specific index is needed:\r\n```\r\nbwa-mem2 index your.fasta\r\n```\r\n\r\nFor star a reference folder prepared with star has to be provided. In order to prepare it will need the reference\r\ngenome in FASTA format and the gene annotations in GTF format. Run a command as follows:\r\n```\r\nSTAR --runMode genomeGenerate --genomeDir $YOUR_FOLDER --genomeFastaFiles $YOUR_FASTA --sjdbGTFfile $YOUR_GTF\r\n```\r\n\r\n## References\r\n\r\n* Li H. and Durbin R. (2010) Fast and accurate long-read alignment with Burrows-Wheeler Transform. Bioinformatics, Epub. https://doi.org/10.1093/bioinformatics/btp698 \r\n* Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu; fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 1 September 2018, Pages i884–i890, https://doi.org/10.1093/bioinformatics/bty560\r\n* Vasimuddin Md, Sanchit Misra, Heng Li, Srinivas Aluru. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE Parallel and Distributed Processing Symposium (IPDPS), 2019.\r\n* Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner. Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PMID: 23104886; PMCID: PMC3530905.\r\n","organization":"TRON gGmbH","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/418?version=1","name":"master @ 3811646","author":[],"descriptor_type":["NFL"]}]},{"id":"419","url":"https://workflowhub.eu/workflows/419","name":"TronFlow BAM preprocessing pipeline","description":"# TronFlow BAM preprocessing pipeline\r\n\r\n![GitHub tag (latest SemVer)](https://img.shields.io/github/v/release/tron-bioinformatics/tronflow-bam-preprocessing?sort=semver)\r\n[![Automated tests](https://github.com/TRON-Bioinformatics/tronflow-bam-preprocessing/actions/workflows/automated_tests.yml/badge.svg)](https://github.com/TRON-Bioinformatics/tronflow-bam-preprocessing/actions/workflows/automated_tests.yml)\r\n[![DOI](https://zenodo.org/badge/358400957.svg)](https://zenodo.org/badge/latestdoi/358400957)\r\n[![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT)\r\n[![Powered by Nextflow](https://img.shields.io/badge/powered%20by-Nextflow-orange.svg?style=flat\u0026colorA=E1523D\u0026colorB=007D8A)](https://www.nextflow.io/)\r\n\r\nThe TronFlow BAM preprocessing pipeline is part of a collection of computational workflows for tumor-normal pair \r\nsomatic variant calling. These workflows are implemented in the Nextflow (Di Tommaso, 2017) framework.\r\n\r\nFind the documentation here [![Documentation Status](https://readthedocs.org/projects/tronflow-docs/badge/?version=latest)](https://tronflow-docs.readthedocs.io/en/latest/?badge=latest)\r\n\r\n\r\nThe aim of this workflow is to preprocess BAM files based on Picard and GATK (DePristo, 2011) best practices.\r\n\r\n\r\n## Background\r\n\r\nIn order to have a variant calling ready BAM file there are a number of operations that need to be applied on the BAM. \r\nThis pipeline depends on the particular variant caller, but there are some common operations.\r\n\r\nGATK has been providing a well known best practices document on BAM preprocessing, the latest best practices for \r\nGATK4 (https://software.broadinstitute.org/gatk/best-practices/workflow?id=11165) does not perform anymore realignment around indels as opposed to best practices for GATK3 (https://software.broadinstitute.org/gatk/documentation/article?id=3238). This pipeline is based on both Picard and GATK. These best practices have been implemented a number of times, see for instance this implementation in Workflow Definition Language https://github.com/gatk-workflows/gatk4-data-processing/blob/master/processing-for-variant-discovery-gatk4.wdl.\r\n\r\n\r\n## Objectives\r\n\r\nWe aim at providing a single implementation of the BAM preprocessing pipeline that can be used across different \r\nuse cases. \r\nFor this purpose there are some required steps and some optional steps.  \r\n\r\nThe input can be either a tab-separated values file (`--input_files`) where each line corresponds to one input BAM or a single BAM (`--input_bam` and `--input_name`).\r\n\r\n## Implementation\r\n\r\nSteps:\r\n\r\n* **Clean BAM**. Sets the mapping quality to 0 for all unmapped reads and avoids soft clipping going beyond the reference genome boundaries. Implemented in Picard\r\n* **Reorder chromosomes**. Makes the chromosomes in the BAM follow the same order as the reference genome. Implemented in Picard\r\n* **Add read groups**. GATK requires that some headers are adde to the BAM, also we want to flag somehow the normal and tumor BAMs in the header as some callers, such as Mutect2 require it. Implemented in Picard.\r\n* **Mark duplicates** (optional). Identify the PCR and the optical duplications and marks those reads. This uses the parallelized version on Spark, it is reported to scale linearly up to 16 CPUs.\r\n* **Realignment around indels** (optional). This procedure is important for locus based variant callers, but for any variant caller doing haplotype assembly it is not needed. This is computing intensive as it first finds regions for realignment where there are indication of indels  and then it performs a local realignment over those regions. Implemented in GATK3, deprecated in GATK4\r\n* **Base Quality Score Recalibration (BQSR)** (optional). It aims at correcting systematic errors in the sequencer when assigning the base call quality errors, as these scores are used by variant callers it improves variant calling in some situations. Implemented in GATK4\r\n* **Metrics** (optional). A number of metrics are obtained from the BAM file with Picard's CollectMetrics, CollectHsMetrics and samtools' coverage and depth.\r\n\r\n![Pipeline](figures/bam_preprocessing2.png)\r\n\r\n\r\n## How to run it\r\n\r\n```\r\n$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing --help\r\n\r\nN E X T F L O W  ~  version 19.07.0\r\nLaunching `main.nf` [intergalactic_shannon] - revision: e707c77d7b\r\n\r\nUsage:\r\n    main.nf --input_files input_files\r\n\r\nInput:\r\n    * --input_bam: the path to a single BAM (this option is not compatible with --input_files)\r\n    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam)\r\n    Sample type will be added to the BAM header @SN sample name\r\n    The input file does not have header!\r\n    Example input file:\r\n    name1       tumor   tumor.1.bam\r\n    name1       normal  normal.1.bam\r\n    name2       tumor   tumor.2.bam\r\n    * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)\r\n\r\nOptional input:\r\n    * --input_name: the name of the sample. Only used when --input_bam is provided (default: normal)\r\n    * --dbsnp: path to the dbSNP VCF (required to perform BQSR)\r\n    * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)\r\n    * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)\r\n    * --intervals: path to a BED file to collect coverage and HS metrics from (default: None)\r\n    * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20).\r\n    * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20).\r\n    * --skip_bqsr: optionally skip BQSR (default: false)\r\n    * --skip_realignment: optionally skip realignment (default: false)\r\n    * --skip_deduplication: optionally skip deduplication (default: false)\r\n    * --remove_duplicates: removes duplicate reads from output BAM instead of flagging them (default: true)\r\n    * --skip_metrics: optionally skip metrics (default: false)\r\n    * --output: the folder where to publish output (default: ./output)\r\n    * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)\r\n\r\nComputational resources:\r\n    * --prepare_bam_cpus: (default: 3)\r\n    * --prepare_bam_memory: (default: 8g)\r\n    * --mark_duplicates_cpus: (default: 16)\r\n    * --mark_duplicates_memory: (default: 64g)\r\n    * --realignment_around_indels_cpus: (default: 2)\r\n    * --realignment_around_indels_memory: (default: 31g)\r\n    * --bqsr_cpus: (default: 3)\r\n    * --bqsr_memory: (default: 4g)\r\n    * --metrics_cpus: (default: 1)\r\n    * --metrics_memory: (default: 8g)\r\n\r\n Output:\r\n    * Preprocessed and indexed BAMs\r\n    * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt\r\n\r\nOptional output:\r\n    * Recalibration report\r\n    * Deduplication metrics\r\n    * Realignment intervals\r\n    * GATK multiple metrics\r\n    * HS metrics\r\n    * Horizontal and vertical coverage metrics\r\n```\r\n\r\n### Input table\r\n\r\nThe table with FASTQ files expects two tab-separated columns **without a header**\r\n\r\n| Sample name          | Sample type                      | BAM                  |\r\n|----------------------|---------------------------------|------------------------------|\r\n| sample_1             | normal      |    /path/to/sample_1.normal.bam   |\r\n| sample_1             | tumor      |    /path/to/sample_1.tumor.bam   |\r\n| sample_2             | normal      |    /path/to/sample_2.normal.bam   |\r\n| sample_2             | tumor      |    /path/to/sample_2.tumor.bam   |\r\n\r\nThe values used in `sample type` are arbitrary. These will be set in the BAM header tag @RG:SM for sample. There may be some downstream constraints, eg: Mutect2 pipeline requires that the sample type between normal and tumor samples of the same pair are not the same.\r\n\r\n### References\r\n\r\nThe BAM preprocessing workflow requires the human reference genome (`--reference`)\r\nBase Quality Score Recalibration (BQSR) requires dbSNP to avoid extracting error metrics from polymorphic sites (`--dbsnp`)\r\nRealignment around indels requires a set of known indels (`--known_indels1` and `--known_indels2`).\r\nThese resources can be fetched from the GATK bundle https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle.\r\n\r\nOptionally, in order to run Picard's CollectHsMetrics a BED file will need to be provided (`--intervals`).\r\nThis BED file will also be used for `samtools coverage`.\r\n\r\n## Troubleshooting\r\n\r\n### Too new Java version for MarkDuplicatesSpark\r\n\r\nWhen using Java 11 the cryptic error messsage `java.lang.IllegalArgumentException: Unsupported class file major version 55` has been observed.\r\nThis issue is described here and the solution is to use Java 8 https://gatk.broadinstitute.org/hc/en-us/community/posts/360056174592-MarkDuplicatesSpark-crash.\r\n\r\n\r\n\r\n## Bibliography\r\n\r\n* DePristo M, Banks E, Poplin R, Garimella K, Maguire J, Hartl C, Philippakis A, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell T, Kernytsky A, Sivachenko A, Cibulskis K, Gabriel S, Altshuler D, Daly M. (2011). A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet, 43:491-498. DOI: 10.1038/ng.806.\r\n* Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., \u0026 Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316–319. 10.1038/nbt.3820\r\n","organization":"TRON gGmbH","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/419?version=1","name":"master @ 266c18b","author":[],"descriptor_type":["NFL"]}]},{"id":"420","url":"https://workflowhub.eu/workflows/420","name":"hic-hicup-cooler/hic-fastq-to-cool-hicup-cooler","description":"This workflow take as input a collection of paired fastq. It uses HiCUP to go from fastq to validPair file. The pairs are filtered for MAPQ and sorted by cooler to generate a tabix dataset. Cooler is used to generate a balanced cool file to the desired resolution.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/420?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/420?version=2","name":"v0.2.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/420?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"422","url":"https://workflowhub.eu/workflows/422","name":"Apis-wings-EU: A workflow for morphometric identification of honey bees from Europe","description":"We present an R script that describes the workflow for analysing honey bee (Apis mellifera) wing shape. It is based on a large dataset of wing images and landmark coordinates available at Zenodo: https://doi.org/10.5281/zenodo.7244070. The dataset can be used as a reference for the identification of unknown samples. As unknown samples, we used data from Nawrocka et al. (2018), available at Zenodo: https://doi.org/10.5281/zenodo.7567336. Among others, the script can be used to identify the geographic origin of unknown samples and therefore assist in the monitoring and conservation of honey bee biodiversity in Europe.","organization":"Apis-wings","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/422?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"431","url":"https://workflowhub.eu/workflows/431","name":"GermlineStructuralV-nf","description":"\r\n\r\n\r\nGermlineStructuralV-nf is a pipeline for identifying structural variant events in human Illumina short read whole genome sequence data. GermlineStructuralV-nf identifies structural variant and copy number events from BAM files using [Manta](https://github.com/Illumina/manta/blob/master/docs/userGuide/README.md#de-novo-calling), [Smoove](https://github.com/brentp/smoove), and [TIDDIT](https://github.com/SciLifeLab/TIDDIT). Variants are then merged using [SURVIVOR](https://github.com/fritzsedlazeck/SURVIVOR), and annotated by [AnnotSV](https://pubmed.ncbi.nlm.nih.gov/29669011/). The pipeline is written in Nextflow and uses Singularity/Docker to run containerised tools.","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/431?version=1","name":"main @ 45954a4","author":["Georgina Samaha","Tracy Chew","Sarah Beecroft"],"descriptor_type":["NFL"]}]},{"id":"432","url":"https://workflowhub.eu/workflows/432","name":"Reconstructing raw tomography data","description":"# Snakemake workflow: Reconstructing raw tomography data\r\n\r\nA Snakemake worfklow for tomographically reconstructing raw data using [tomopy](https://tomopy.readthedocs.io/en/stable/).\r\n\r\n## Installation\r\n\r\nFirst download this repo and navigate to it\r\n```bash\r\ngit clone https://codebase.helmholtz.cloud/gernha62/reconstructing-raw-tomography-data.git\r\n```\r\n```bash\r\ncd /path/to/repo\r\n```\r\n(Optional) Download the example folder with:\r\n```bash\r\nwget -m -np https://doi2.psi.ch/datasets/das/work/p15/p15869/compression/MI04_02/tif\r\n```\r\nCreate a virtual environment and install all necessary packages (requires conda): \r\n```bash\r\nconda env create --name reconstr_env --file workflow/envs/reconstr.yml\r\n```\r\nActivate the new virtual environment: \r\n```bash\r\nconda activate reconstr_env\r\n```\r\n\r\n## Configuration\r\n\r\nTo configure the workflow, adapt the config file found at `config/config.yaml` . The config looks as follows:\r\n```yaml\r\nnumber_of_darks: 50\r\nnumber_of_flats: 100\r\nnumber_of_projections: 501\r\nrotation_center: 508.77\r\nraw_data:\r\n  MI04_02: doi2.psi.ch/datasets/das/work/p15/p15869/compression/MI04_02/tif\r\n```\r\n In the config, adjust `number_of_darks`, `number_of_flats`, `number_of_projections` and `rotation_center` to the number of darks, flats, projections and the rotation center of your dataset. The necessary information can usually be found in the .log file of the folder that contains the raw data. \r\n\r\n`MI04_02: doi2.psi.ch/datasets/das/work/p15/p15869/compression/MI04_02/tif` denotes the path to the example folder used for reconstruction and the keyword `MI04_02` will be used to name the output (e.g. in this case the output folder will be named `recon_dir_MI04_02`). Replace the examle path with the path to the dataset you want to reconstruct. Additionally, if you want the name of the output folder to have a different suffix, replace the keyword `MI04_02` with a name you prefer.\r\n\r\n## Run the workflow\r\n\r\nIf the .tif files contain a numerical prefix that is not separated from the actual image index, it is best to first rename the files. The files will be renamed to `00001.tif`, `00002.tif` and so on. If the renaming is needed, run:\r\n\r\n```bash\r\nsnakemake --cores 1 'logs/renamefile_MI04_02.log'\r\n```\r\nIf you replaced the keyword `MI04_02` in the config file then adjust the command accordingly (e.g. if you replaced the keyword with `Tomo_dataset` then the command should be `snakemake --cores 1 'logs/renamefile_Tomo_dataset.log'`).\r\n\r\nBefore trying to compute the reconstructions, make sure you have enough memory available (ideally more than 60 GB).\r\nTo compute the reconstructions using one core, use the command:\r\n```bash\r\nsnakemake --cores 1\r\n```\r\nIf you want to use all available cores instead, use:\r\n```bash\r\nsnakemake --cores all\r\n```\r\nThis creates a folder in `results` with the reconstructed data.\r\n\r\n## Credit\r\nThe example dataset used in this project (MI04_02 evolving magma, Mattia Pistone, University of Georgia) was taken from: https://doi.psi.ch/detail/10.16907/05a50450-767f-421d-9832-342b57c201af\r\n\r\nThe script used for reconstruction (`scripts/reconstructs_tomo_datasets.py`) was provided by Alain Studer, PSI.","organization":"Computational Science at HZDR","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/432?version=1","name":"main @ e2c4eb6","author":["Felicita Gernhardt"],"descriptor_type":["SMK"]}]},{"id":"433","url":"https://workflowhub.eu/workflows/433","name":"TF-Prioritizer","description":"# 1. About TF-Prioritizer\r\n\r\nThis pipeline gives you a full analysis of nfcore chromatine accessibility peak data (ChIP-Seq, ATAC-Seq or DNAse-Seq)\r\nand nfcore RNA-seq count data. It performs\r\nDESeq2, TEPIC and DYNAMITE including all preprocessing and postprocessing steps necessary to transform the data. It also\r\ngives you plots for deep analysis of the data. The general workflow is sketched in the images below:\r\n\r\n## Graphical abstract:\r\n\r\n![Graphical abstrat](https://raw.githubusercontent.com/biomedbigdata/TF-Prioritizer/master/media/graphicalAbstract.png)\r\n\r\n## Technical workflow:\r\n\r\n![Technical workflow](https://github.com/biomedbigdata/TF-Prioritizer/raw/master/media/technicalWorkflow.png)\r\n\r\n# 2. License and Citing\r\n\r\nTF-Prioritizer is distributed under the [GNU General Public License](https://www.gnu.org/licenses/gpl-3.0.en.html). The\r\nGraphical Abstract and the Technical Workflow\r\nwas created using [biorender.com](https://biorender.com/).\r\n\r\n# 3. Usage\r\n\r\nThe software can be executed using docker. For the following command, only [python3](https://www.python.org/downloads/),\r\n[curl](https://curl.se/download.html) and [docker](https://docs.docker.com/get-docker/) are required.\r\nExplanations about the configs can be found in\r\nthe [config readme](https://github.com/biomedbigdata/TF-Prioritizer/blob/master/configTemplates/README.md).\r\n\r\n```bash\r\ncurl -s https://raw.githubusercontent.com/biomedbigdata/TF-Prioritizer/master/docker.py | python3 - -c [config_file] -o [output_dir] -t [threads]\r\n```\r\n\r\nNote, that for this approach an internet connection is required. The docker image will be downloaded\r\nfrom [DockerHub](https://hub.docker.com/r/nicotru/tf-prioritizer) on the first execution as well as with every update we\r\nrelease. Furthermore, the wrapper script\r\nwill be fetched from GitHub with every execution.\r\n\r\nIf curl is not available (for example if you are using windows), or you want to be able to execute the software without\r\nan internet connection, you can download the wrapper script\r\nfrom [here](https://raw.githubusercontent.com/biomedbigdata/TF-Prioritizer/pipeJar/docker.py).\r\n\r\nYou can then execute the script using\r\n\r\n```bash\r\npython3 [script_path] -c [config_file] -o [output_dir] -t [threads]\r\n```\r\n\r\n## If you want to use the pipeline without docker\r\n\r\nWe do not recommend using the pipeline without docker, because the dependencies are very complex, and it is very hard to\r\ninstall them correctly. However, if you want to use the pipeline without docker, you can do so by installing the\r\ndependencies manually. The dependencies and their correct installation process can be derived from\r\nthe [Dockerfile](https://github.com/biomedbigdata/TF-Prioritizer/blob/master/Dockerfile) and the environment scripts\r\nwhich can be found in\r\nthe [environment directory](https://github.com/biomedbigdata/TF-Prioritizer/tree/master/environment).","organization":"Big data in biomedicine","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/433?version=1","name":"master @ a532453","author":[],"descriptor_type":[]}]},{"id":"434","url":"https://workflowhub.eu/workflows/434","name":"extract SRA + viralRNAspades (PE)","description":"extract 1 Id from SRA and assume it is PE as input to viralRNASpades.","organization":"Integrated and Urban Plant Pathology Laboratory","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/434?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"435","url":"https://workflowhub.eu/workflows/435","name":"sqtlseeker2-nf","description":"# sqtlseeker2-nf\r\n\r\n[![nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.27.0-blue.svg)](http://nextflow.io)\r\n[![CI-checks](https://github.com/guigolab/sqtlseeker2-nf/actions/workflows/ci.yaml/badge.svg)](https://github.com/guigolab/sqtlseeker2-nf/actions/workflows/ci.yaml)\r\n\r\nA pipeline for splicing quantitative trait loci (sQTL) mapping.\r\n\r\nThe pipeline performs the following analysis steps:\r\n\r\n* Index the genotype file\r\n* Preprocess the transcript expression data\r\n* Test for association between splicing ratios and genetic variants in *cis* (nominal pass)\r\n* Obtain an empirical P-value for each phenotype (permutation pass, optional)\r\n* Control for multiple testing \r\n\r\nFor details on each step, please read [sQTLseekeR2](https://github.com/guigolab/sQTLseekeR2) documentation.\r\n\r\nThe pipeline uses [Nextflow](http://www.nextflow.io) as the execution backend. Please check [Nextflow documentation](http://www.nextflow.io/docs/latest/index.html) for more information.\r\n\r\n## Requirements\r\n\r\n- Unix-like operating system (Linux, MacOS, etc.)\r\n- Java 8 or later \r\n- [Docker](https://www.docker.com/) (v1.10.0 or later) or [Singularity](http://singularity.lbl.gov) (v2.5.0 or later)\r\n\r\n## Quickstart (~2 min)\r\n\r\n1. Install Nextflow:\r\n    ```\r\n    curl -fsSL get.nextflow.io | bash\r\n    ```\r\n\r\n2. Make a test run:\r\n    ```\r\n    ./nextflow run guigolab/sqtlseeker2-nf -with-docker\r\n    ```\r\n\r\n    **Note**: set `-with-singularity` to use Singularity instead of Docker. \r\n\r\n## Pipeline usage\r\n\r\nLaunching the pipeline with the `--help` parameter shows the help message:\r\n\r\n```\r\nnextflow run sqtlseeker2-nf --help\r\n```\r\n\r\n```\r\nN E X T F L O W  ~  version 0.27.2\r\nLaunching `sqtlseeker2.nf` [admiring_lichterman] - revision: 28c86caf1c\r\n\r\nsqtlseeker2-nf ~ A pipeline for splicing QTL mapping\r\n----------------------------------------------------\r\nRun sQTLseekeR2 on a set of data.\r\n\r\nUsage: \r\n    sqtlseeker2-nf [options]\r\n\r\nOptions:\r\n--genotype GENOTYPE_FILE    the genotype file\r\n--trexp EXPRESSION_FILE     the transcript expression file\r\n--metadata METADATA_FILE    the metadata file\r\n--genes GENES_FILE          the gene location file\r\n--dir DIRECTORY             the output directory\r\n--mode MODE                 the run mode: nominal or permuted (default: nominal)\r\n--win WINDOW                the cis window in bp (default: 5000)\r\n--covariates COVARIATES     include covariates in the model (default: false)\r\n--fdr FDR                   false discovery rate level (default: 0.05)\r\n--min_md MIN_MD             minimum effect size reported (default: 0.05)\r\n--svqtl SVQTLS              report svQTLs (default: false)\r\n\r\nAdditional parameters for mode = nominal:\r\n--ld LD                     threshold for LD-based variant clustering (default: 0, no clustering)\r\n--kn KN                     number of genes per batch in nominal pass (default: 10)\r\n\r\nAdditional parameters for mode = permuted:\r\n--kp KP                     number of genes per batch in permuted pass (default: 10)\r\n--max_perm MAX_PERM         maximum number of permutations (default: 1000)\r\n```\r\n\r\n## Input files and format\r\n\r\n`sqtlseeker2-nf` takes as input files the following:\r\n\r\n* **Genotype file.**\r\nContains the genotype of each sample, coded as follows: 0 for REF/REF, 1 for REF/ALT, 2 for ALT/ALT, -1 for missing value.\r\nThe first four columns should be: `chr`, `start`, `end` and `snpId`. This file needs to be sorted by coordinate.\r\n\r\n* **Transcript expression file.**\r\nContains the expression of each transcript in each sample (e.g. read counts, RPKM, TPM).\r\nIt is not recommended to use transformed (log, quantile, or any non-linear transformation) expression.\r\nColumns `trId` and `geneId`, corresponding to the transcript and gene IDs, are required. \r\n\r\n* **Metadata file.** Contains the covariate information for each sample. \r\nIn addition, it defines the groups or conditions for which sQTL mapping will be performed.\r\nThe first columns should be: `indId`, `sampleId`, `group`, followed by the covariates.\r\nThis file defines which samples will be tested.\r\n\r\n* **Gene location file.**\r\nContains the location of each gene. Columns `chr`, `start`, `end` and `geneId` are required. \r\nThis file defines which genes will be tested.\r\n\r\nExample [data](data) is available for the test run.\r\n\r\n## Pipeline results\r\n\r\nsQTL mapping results are saved into the folder specified with the `--dir` parameter. By default it is the `result` folder within the current working directory.\r\n\r\nOutput files are organinzed into subfolders corresponding to the different `groups` specified in the metadata file: \r\n\r\n```\r\nresult\r\n└── groups\r\n    ├── group1                            \r\n    │   ├── all-tests.nominal.tsv          \r\n    │   ├── all-tests.permuted.tsv         \r\n    │   ├── sqtls-${level}fdr.nominal.tsv      \r\n    │   └── sqtls-${level}fdr.permuted.tsv     \r\n    ├── group2\r\n   ...\r\n```\r\n\r\nNote: if only a nominal pass was run, files `*.permuted.tsv` will not be present.\r\n\r\nOutput files contain the following information:\r\n\r\n`all-tests.nominal.tsv`\r\n\r\n* geneId: gene name\t\r\n* snpId: variant name\r\n* F: test statistic\r\n* nb.groups: number of genotype groups\r\n* md: maximum difference in relative expression between genotype groups (sQTL effect size)\r\n* tr.first/tr.second: the transcript IDs of the two transcripts that change the most, in opposite directions\r\n* info: number of individuals in each genotype group, including missing values (-1,0,1,2)\r\n* pv: nominal P-value\r\n\r\nif `--svqtl true`\r\n* F.svQTL: svQTL test statistic\r\n* nb.perms.svQTL: number of permutations for svQTL test\r\n* pv.svQTL: svQTL nominal P-value \r\n\r\nif `--ld ${r2}`\r\n* LD: other variants in linkage disequilibrium with snpId above a given r\u003csup\u003e2\u003c/sup\u003e threshold \u003e 0\r\n\r\n`sqtls-${level}fdr.nominal.tsv` (in addition to the previous)\r\n\r\n* fdr: false discovery rate (computed across all nominal tests)\r\n* fdr.svQTL: svQTL FDR\r\n\r\n`all-tests.permuted.tsv`\r\n\r\n* geneId: gene name\r\n* variants.cis: number of variants tested in *cis*\r\n* LD: median linkage disequilibrium in the region (r\u003csup\u003e2\u003c/sup\u003e)\r\n* best.snp: ID of the top variant\r\n* best.nominal.pv: P-value of the top variant\r\n* shape1: first parameter value of the fitted beta distribution\r\n* shape2: second parameter value of the fitted beta distribution (effective number of independent tests in the region)\r\n* nb.perm: number of permutations\r\n* pv.emp.perm: empirical P-value, computed based on permutations\r\n* pv.emp.beta: empirical P-value, computed based on the fitted beta distribution\r\n* runtime: run time in minutes\r\n\r\n`sqtls-${level}fdr.nominal.tsv` (in addition to the previous)\r\n\r\n* fdr: false discovery rate (computed across empirical P-values)\r\n* p_tn: gene-level threshold for nominal P-values\r\n\r\n## Cite sqtlseeker2-nf\r\n\r\nIf you find `sqtlseeker2-nf` useful in your research please cite the related publication:\r\n\r\nGarrido-Martín, D., Borsari, B., Calvo, M., Reverter, F., Guigó, R. Identification and analysis of splicing quantitative trait loci across multiple tissues in the human genome. *Nat Commun* 12, 727 (2021). [https://doi.org/10.1038/s41467-020-20578-2](https://doi.org/10.1038/s41467-020-20578-2)\r\n\r\n","organization":"Guigó lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/435?version=1","name":"master @ 8929a79","author":["Diego Garrido-Martín","Roderic Guigó"],"descriptor_type":["NFL"]}]},{"id":"436","url":"https://workflowhub.eu/workflows/436","name":"mvgwas-nf","description":"# mvgwas-nf\r\n\r\n[![nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.1-blue.svg)](http://nextflow.io)\r\n[![CI-checks](https://github.com/guigolab/sqtlseeker2-nf/actions/workflows/ci.yaml/badge.svg)](https://github.com/guigolab/sqtlseeker2-nf/actions/workflows/ci.yaml)\r\n\r\nA pipeline for multi-trait genome-wide association studies (GWAS) using [MANTA](https://github.com/dgarrimar/manta).\r\n\r\nThe pipeline performs the following analysis steps:\r\n\r\n* Split genotype file \r\n* Preprocess phenotype and covariate data\r\n* Test for association between phenotypes and genetic variants\r\n* Collect summary statistics\r\n\r\nThe pipeline uses [Nextflow](http://www.nextflow.io) as the execution backend. Please check [Nextflow documentation](http://www.nextflow.io/docs/latest/index.html) for more information.\r\n\r\n## Requirements\r\n\r\n- Unix-like operating system (Linux, MacOS, etc.)\r\n- Java 8 or later \r\n- [Docker](https://www.docker.com/) (v1.10.0 or later) or [Singularity](http://singularity.lbl.gov) (v2.5.0 or later)\r\n\r\n## Quickstart (~2 min)\r\n\r\n1. Install Nextflow:\r\n    ```\r\n    curl -fsSL get.nextflow.io | bash\r\n    ```\r\n\r\n2. Make a test run:\r\n    ```\r\n    nextflow run dgarrimar/mvgwas-nf -with-docker\r\n    ```\r\n\r\n**Notes**: move the `nextflow` executable to a directory in your `$PATH`. Set `-with-singularity` to use Singularity instead of Docker. \r\n\r\n(*) Alternatively you can clone this repository:\r\n```\r\ngit clone https://github.com/dgarrimar/mvgwas-nf\r\ncd mvgwas-nf\r\nnextflow run mvgwas.nf -with-docker\r\n```\r\n\r\n## Pipeline usage\r\n\r\nLaunching the pipeline with the `--help` parameter shows the help message:\r\n\r\n```\r\nnextflow run mvgwas.nf --help\r\n```\r\n\r\n```\r\nN E X T F L O W  ~  version 20.04.1\r\nLaunching `mvgwas.nf` [amazing_roentgen] - revision: 56125073b7\r\n\r\nmvgwas-nf: A pipeline for multivariate Genome-Wide Association Studies\r\n==============================================================================================\r\nPerforms multi-trait GWAS using using MANTA (https://github.com/dgarrimar/manta)\r\n\r\nUsage:\r\nnextflow run mvgwas.nf [options]\r\n\r\nParameters:\r\n--pheno PHENOTYPES          phenotype file\r\n--geno GENOTYPES            indexed genotype VCF file\r\n--cov COVARIATES            covariate file\r\n--l VARIANTS/CHUNK          variants tested per chunk (default: 10000)\r\n--t TRANSFOMATION           phenotype transformation: none, sqrt, log (default: none)\r\n--i INTERACTION             test for interaction with a covariate: none, \u003ccovariate\u003e (default: none)\r\n--ng INDIVIDUALS/GENOTYPE   minimum number of individuals per genotype group (default: 10)\r\n--dir DIRECTORY             output directory (default: result)\r\n--out OUTPUT                output file (default: mvgwas.tsv)\r\n```\r\n\r\n## Input files and format\r\n\r\n`mvgwas-nf` requires the following input files:\r\n\r\n* **Genotypes.** \r\n[bgzip](http://www.htslib.org/doc/bgzip.html)-compressed and indexed [VCF](https://samtools.github.io/hts-specs/VCFv4.3.pdf) genotype file.\r\n\r\n* **Phenotypes.**\r\nTab-separated file with phenotype measurements (quantitative) for each sample (i.e. *n* samples x *q* phenotypes).\r\nThe first column should contain sample IDs. Columns should be named.\r\n\r\n* **Covariates.**\r\nTab-separated file with covariate measurements (quantitative or categorical) for each sample (i.e. *n* samples x *k* covariates). \r\nThe first column should contain sample IDs. Columns should be named. \r\n\r\nExample [data](data) is available for the test run.\r\n\r\n## Pipeline results\r\n\r\nAn output text file containing the multi-trait GWAS summary statistics (default: `./result/mvgwas.tsv`), with the following information:\r\n\r\n* `CHR`: chromosome\r\n* `POS`: position\r\n* `ID`: variant ID\r\n* `REF`: reference allele\r\n* `ALT`: alternative allele\r\n* `F`: pseudo-F statistic\r\n* `R2`: fraction of variance explained by the variant\r\n* `P`: P-value\r\n\r\nThe output folder and file names can be modified with the `--dir` and `--out` parameters, respectively.\r\n\r\n## Cite mvgwas-nf\r\n\r\nIf you find `mvgwas-nf` useful in your research please cite the related publication:\r\n\r\nGarrido-Martín, D., Calvo, M., Reverter, F., Guigó, R. A fast non-parametric test of association for multiple traits. *bioRxiv* (2022). [https://doi.org/10.1101/2022.06.06.493041](https://doi.org/10.1101/2022.06.06.493041)\r\n","organization":"Statistical genetics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/436?version=1","name":"master @ aaa979d","author":["Diego Garrido-Martín","Roderic Guigó"],"descriptor_type":["NFL"]}]},{"id":"437","url":"https://workflowhub.eu/workflows/437","name":"Delineating Regions-of-interest for Mass Spectrometry Imaging by Multimodally Corroborated Spatial Segmentation","description":"# ROIforMSI\r\nSource codes for manuscript \"Delineating Regions-of-interest for Mass Spectrometry Imaging by Multimodally Corroborated Spatial Segmentation\"\r\n\r\n\r\n\"ExampleWorkflow.ipynb\" is a methods document to demonstrate the workflow of our multimodal fusion-based spatial segmentation.\r\n\r\n\r\n\"Utilities.py\" contains all the tools to implement our method.\r\n\r\n\r\n\"gui.py\" and \"registration_gui.py\" are files to implement linear and nonlinear registration.\r\n\r\n(Licence: GPL-3)","organization":"Delineating Regions-of-interest for Mass Spectrometry Imaging by Multimodally Corroborated Spatial Segmentation","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/437?version=1","name":"main @ bd51f80","author":[],"descriptor_type":[]}]},{"id":"438","url":"https://workflowhub.eu/workflows/438","name":"Master of Pores 2","description":"# MoP2- DSL2 version of Master of Pores\r\n[![Docker Build Status](https://img.shields.io/docker/automated/biocorecrg/nanopore.svg)](https://cloud.docker.com/u/biocorecrg/repository/docker/biocorecrg/nanopore/builds)\r\n[![mop2-CI](https://github.com/biocorecrg/MoP2/actions/workflows/build.yml/badge.svg)](https://github.com/biocorecrg/MoP2/actions/workflows/build.yml)\r\n[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)\r\n[![Nextflow version](https://img.shields.io/badge/Nextflow-21.04.1-brightgreen)](https://www.nextflow.io/)\r\n[![Nextflow DSL2](https://img.shields.io/badge/Nextflow-DSL2-brightgreen)](https://www.nextflow.io/)\r\n[![Singularity version](https://img.shields.io/badge/Singularity-v3.2.1-green.svg)](https://www.sylabs.io/)\r\n[![Docker version](https://img.shields.io/badge/Docker-v20.10.8-blue)](https://www.docker.com/)\r\n\r\n\u003cbr/\u003e\r\n\r\n![MOP2](https://github.com/biocorecrg/MoP2/blob/main/img/master_red.jpg?raw=true)\r\n\r\n\r\nInspired by Metallica's [Master Of Puppets](https://www.youtube.com/watch?v=S7blkui3nQc)\r\n\r\n## Install\r\nPlease install nextflow and singularity or docker before.\r\n\r\nThen download the repo:\r\n\r\n```\r\ngit clone --depth 1 --recurse-submodules git@github.com:biocorecrg/MOP2.git\r\n```\r\n\r\nYou can use INSTALL.sh to download the version 3.4.5 of guppy or you can replace it with the version you prefer. Please consider that the support of VBZ compression of fast5 started with version 3.4.X. \r\n\r\n```\r\ncd MoP2; sh INSTALL.sh\r\n```\r\n\r\n## Testing\r\nYou can replace ```-with-singularity``` with ```-with-docker``` if you want to use the docker engine.\r\n\r\n```\r\ncd mop_preprocess\r\nnextflow run mop_preprocess.nf -with-singularity -bg -profile local \u003e log\r\n\r\n```\r\n\r\n## Reference\r\nIf you use this tool, please cite our papers:\r\n\r\n[\"Nanopore Direct RNA Sequencing Data Processing and Analysis Using MasterOfPores\"\r\nCozzuto L, Delgado-Tejedor A, Hermoso Pulido T, Novoa EM, Ponomarenko J. *N. Methods Mol Biol. 2023*;2624:185-205. doi: 10.1007/978-1-0716-2962-8_13.](https://link.springer.com/protocol/10.1007/978-1-0716-2962-8_13)\r\n\r\n[\"MasterOfPores: A Workflow for the Analysis of Oxford Nanopore Direct RNA Sequencing Datasets\"\r\nLuca Cozzuto, Huanle Liu, Leszek P. Pryszcz, Toni Hermoso Pulido, Anna Delgado-Tejedor, Julia Ponomarenko, Eva Maria Novoa.\r\n*Front. Genet., 17 March 2020.* https://doi.org/10.3389/fgene.2020.00211](https://www.frontiersin.org/articles/10.3389/fgene.2020.00211/full)\r\n\r\n\r\n## Documentation\r\nThe documentation is available at [https://biocorecrg.github.io/MOP2/docs/](https://biocorecrg.github.io/MOP2/docs/about.html)\r\n","organization":"Bioinformatics Unit @ CRG","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/438?version=1","name":"main @ e6eabb3","author":[],"descriptor_type":["NFL"]}]},{"id":"439","url":"https://workflowhub.eu/workflows/439","name":"pox-virus-amplicon/main","description":"A workflow for the analysis of pox virus genomes sequenced as half-genomes (for ITR resolution) in a tiled-amplicon approach","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/439?version=1","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/439?version=2","name":"v0.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/439?version=3","name":"v0.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/439?version=4","name":"v0.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"440","url":"https://workflowhub.eu/workflows/440","name":"IGVreport-nf","description":"# IGVreport-nf \r\n\r\n- [Description](#description)\r\n  - [Diagram](#diagram)\r\n  - [User guide](#user-guide)\r\n  - [Workflow summaries](#workflow-summaries)\r\n      - [Metadata](#metadata)\r\n      - [Component tools](#component-tools)\r\n      - [Required (minimum)\r\n        inputs/parameters](#required-minimum-inputsparameters)\r\n  - [Additional notes](#additional-notes)\r\n  - [Help/FAQ/Troubleshooting](#helpfaqtroubleshooting)\r\n  - [Acknowledgements/citations/credits](#acknowledgementscitationscredits)\r\n\r\n## Description \r\n\r\nQuickly generate [IGV `.html` reports](https://github.com/igvteam/igv-reports) for a genomic region of interest in the human genome (hg38). Bcftools is used to subset a VCF to a region of interest, the subset VCF is then passed to IGV-reports, which generates a report consisting of a table of genomic sites or regions and associated IGV views for each site. The reports can be opened by any web browser as a static page.  \r\n\r\n### Diagram \r\n\r\n```mermaid\r\ngraph LR;\r\n    VCF--\u003e|bcftools view|SubsetVCF;\r\n    SubsetVCF--\u003e|IGVtools|HTMLreport;\r\n    AlignmentBAM--\u003e|IGVtools|HTMLreport;\r\n```\r\n\r\n### User guide\r\n\r\nThis workflow uses containers for all steps and can run using Singularity or Docker. It requires Nextflow and either Singularity or Docker be installed. For instructions on installing Nextflow, see their [documentation](https://www.nextflow.io/docs/latest/getstarted.html).\r\n\r\n**This workflow currently only generates reports for the human reference genome assembly, Hg38.** \r\n\r\nThe workflow runs three processes: \r\n1. The provided VCF file is subset to a region of interest using Bcftools view \r\n2. The Subset VCF file is then indexed using Bcftools index \r\n3. The subset VCF and provided Bam file are used to generate the html report for the region of interest. \r\n\r\nTo start clone this repository: \r\n```\r\ngit clone https://github.com/Sydney-Informatics-Hub/IGVreport-nf.git\r\n```\r\n\r\nFrom the IGVreport-nf directory, run the pipeline: \r\n```\r\nnextflow run main.nf --sample \u003csampleID\u003e \\\r\n    --bam \u003cpath/to/bam\u003e \\\r\n    --vcf \u003cpath/to/vcf\u003e \\\r\n    --chr \u003cchrID\u003e --start \u003cbegin bp\u003e --stop \u003cend bp\u003e     \r\n```\r\n\r\nThis will create a report in a directory titled `./Report`. You can rename this directory at runtime using the flag `--outDir`. All runtime summary reports will be available in the `./runInfo` directory.  \r\n\r\n### Workflow summaries\r\n\r\n#### Metadata \r\n\r\n|metadata field     | workflow_name / workflow_version  |\r\n|-------------------|:---------------------------------:|\r\n|Version            | 1.0                               |\r\n|Maturity           | under development                 |\r\n|Creators           | Georgie Samaha                    |\r\n|Source             | NA                                |\r\n|License            | GPL-3.0 license                   |\r\n|Workflow manager   | NextFlow                          |\r\n|Container          | None                              |\r\n|Install method     | NA                                |\r\n|GitHub             | github.com/Sydney-Informatics-Hub/IGVreport-nf    |\r\n|bio.tools \t        | NA                                |\r\n|BioContainers      | NA                                | \r\n|bioconda           | NA                                |\r\n\r\n#### Component tools\r\n\r\n* nextflow\u003e=20.07.1\r\n* singularity or docker\r\n* bcftools/1.16\r\n* igv-reports/1.6.1\r\n\r\n#### Required (minimum) inputs/parameters\r\n\r\n* An indexed alignment file in Bam format \r\n* A gzipped and indexed vcf file\r\n\r\n## Additional notes\r\n\r\n## Help/FAQ/troubleshooting \r\n\r\n## Acknowledgements/citations/credits\r\n\r\nThis workflow was developed by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney and the Australian BioCommons which is enabled by NCRIS via Bioplatforms Australia. \r\n","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/440?version=1","name":"main @ 47b1b65","author":["Georgina Samaha","Tracy Chew"],"descriptor_type":["NFL"]}]},{"id":"442","url":"https://workflowhub.eu/workflows/442","name":"SNP-Calling Workflow","description":"# SNP-Calling\r\nGATK Variant calling pipeline for genomic data using Nextflow\r\n\r\n[![nextflow](https://img.shields.io/badge/nextflow-%E2%89%A522.04.5-brightgreen.svg)](http://nextflow.io)\r\n\r\n## Quickstart\r\n\r\nInstall Nextflow using the following command: \r\n\r\n    curl -s https://get.nextflow.io | bash\r\n  \r\nIndex reference genome:\r\n\r\n  `$ bwa index /path/to/reference/genome.fa`\r\n \r\n  `$ samtools faidx /path/to/reference/genome.fa`\r\n  \r\n  `$ gatk CreateSequenceDictionary -R /path/to/genome.fa -O genome.dict`\r\n\r\nLaunch the pipeline execution with the following command:\r\n\r\n    nextflow run jdetras/snp-calling -r main -profile docker\r\n  \r\n## Pipeline Description\r\n\r\nThe variant calling pipeline follows the recommended practices from GATK. The input genomic data are aligned to a reference genome using BWA. The alignemnt files are processed using Picard Tools. Variant calling is done using samtools and GATK. \r\n\r\n## Input files\r\n\r\nThe input files required to run the pipeline:\r\n* Genomic sequence paired reads, `*_{1,2}.fq.gz`\r\n* Reference genome, `*.fa`\r\n\r\n## Pipeline parameters\r\n\r\n### Usage\r\nUsage: `nextflow run jdetras/snp-calling -profile docker [options]`\r\n\r\nOptions:\r\n\r\n* `--reads` \r\n* `--genome`\r\n* `--output`\r\n\r\nExample: \r\n  `$ nextflow run jdetras/snp-calling -profile docker --reads '/path/to/reads/*_{1,2}.fq.gz' --genome '/path/to/reference/genome.fa' --output '/path/to/output'`\r\n\r\n#### `--reads`\r\n\r\n* The path to the FASTQ read files.\r\n* Wildcards (*, ?) can be used to declare multiple reads. Use single quotes when wildcards are used. \r\n* Default parameter: `$projectDir/data/reads/*_{1,2}.fq.gz`\r\n\r\nExample: \r\n  `$ nextflow run jdetras/snp-calling -profile docker --reads '/path/to/reads/*_{1,2}.fq.gz'`\r\n  \r\n#### `--genome`\r\n\r\n* The path to the genome file in fasta format.\r\n* The extension is `.fa`.\r\n* Default parameter: `$projectDir/data/reference/genome.fa`\r\n\r\nExample:\r\n  `$ nextflow run jdetras/snp-calling -profile docker --genome /path/to/reference/genome.fa`\r\n    \r\n#### `--output`\r\n\r\n* The path to the directory for the output files.\r\n* Default parameter: `$projectDir/output`\r\n\r\n## Software\r\n\r\n* [BWA 0.7.17](http://bio-bwa.sourceforge.net/)\r\n* [Samtools 1.3.1](http://www.htslib.org/)\r\n* [GATK 4.2.6.1](https://gatk.broadinstitute.org/) \r\n","organization":"IRRI Bioinformatics Group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/442?version=1","name":"main @ fde6f63","author":[],"descriptor_type":["NFL"]}]},{"id":"443","url":"https://workflowhub.eu/workflows/443","name":"GRAVI: Gene Regulatory Analysis using Variable Inputs","description":"# GRAVI: Gene Regulatory Analysis using Variable Inputs\r\n\r\nThis is a `snakemake` workflow for:\r\n\r\n1. Performing sample QC\r\n2. Calling ChIP peaks\r\n3. Performing Differential Binding Analysis\r\n4. Comparing results across ChIP targets\r\n\r\nThe minimum required input is one ChIP target with two conditions.\r\n\r\nFull documentation can be found [here](https://steveped.github.io/GRAVI/)\r\n\r\n\r\n## Snakemake Implementation\r\n\r\nThe basic workflow is written `snakemake`, requiring at least v7.7, and can be called using the following steps.\r\n\r\nFirstly, setup the required conda environments\r\n\r\n```\r\nsnakemake \\\r\n\t--use-conda \\\r\n\t--conda-prefix '/home/steveped/mambaforge/envs/' \\\r\n\t--conda-create-envs-only \\\r\n\t--cores 1\r\n```\r\n\r\nSecondly, create and inspect the rulegraph\r\n\r\n```\r\nsnakemake --rulegraph \u003e workflow/rules/rulegraph.dot\r\ndot -Tpdf workflow/rules/rulegraph.dot \u003e workflow/rules/rulegraph.pdf\r\n```\r\n\r\nFinally, the workflow itself can be run using:\r\n\r\n```\r\nsnakemake \\\r\n\t-p \\\r\n\t--use-conda \\\r\n\t--conda-prefix '/home/steveped/mambaforge/envs/' \\\r\n\t--notemp \\\r\n\t--rerun-triggers mtime \\\r\n\t--keep-going \\\r\n\t--cores 16\r\n```\r\n\r\nNote that this creates common environments able to be called by other workflows and is dependent on the user.\r\nFor me, my global conda environments are stored in `/home/steveped/mambaforge/envs/`.\r\nFor other users, this path will need to be modified.\r\n\r\nIf wishing to tidy the directory after a successful run, you can check which non-essential files can be deleted using `snakemake -n --delete-temp-output --cores 1`.\r\nIf the files earmarked for deletion are considered to be non-essential, they can be deleted by removing the `-n` flag from the above code: `snakemake --delete-temp-output --cores 1`.\r\nAs the bedgraph files produced by `macs2 callpeak` are typically very large, hence their conversion to bigwig files during the workflow, this step can free a considerable amount of disk space.\r\n","organization":"Black Ochre Data Labs","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/443?version=1","name":"v0.1.5d","author":["Stevie Pederson"],"descriptor_type":["SMK"]}]},{"id":"444","url":"https://workflowhub.eu/workflows/444","name":"WOMBAT-Pipelines","description":"## Introduction\r\n\r\n**wombat-p pipelines** is a bioinformatics analysis pipeline that bundles different workflow for the analysis of label-free proteomics data with the purpose of comparison and benchmarking. It allows using files from the [proteomics metadata standard SDRF](https://github.com/bigbio/proteomics-metadata-standard).\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. We used one of the [nf-core](https://nf-co.re/) templates. \r\n\r\n## Pipeline summary\r\n\r\nThis work contains four major different workflows for the analysis or label-free proteomics data, originating from LC-MS experiments.\r\n1. [MaxQuant](https://www.maxquant.org/) + [NormalyzerDE](https://normalyzerde.immunoprot.lth.se/)\r\n2. [SearchGui](http://compomics.github.io/projects/searchgui) + [Proline](https://www.profiproteomics.fr/proline/) + [PolySTest](https://bitbucket.org/veitveit/polystest)\r\n3. [Compomics tools](http://compomics.github.io/) + [FlashLFQ](https://github.com/smith-chem-wisc/FlashLFQ) + [MSqRob](https://github.com/statOmics/MSqRob)\r\n4. Tools from the [Trans-Proteomic Pipeline](http://tools.proteomecenter.org/TPP.php) + [ROTS](https://bioconductor.org/packages/release/bioc/html/ROTS.html)\r\n\r\nInitialization and parameterization of the workflows is based on tools from the [SDRF pipelines](https://github.com/bigbio/sdrf-pipelines), the [ThermoRawFileParser](http://compomics.github.io/projects/ThermoRawFileParser) with our own contributions and additional programs from the wombat-p organizaion [https://github.com/wombat-p/Utilities] as well as our [fork](https://github.com/elixir-proteomics-community/sdrf-pipelines). This includes setting a generalized set of data analysis parameters and the calculation of a multiple benchmarks.\r\n\r\n## Credits\r\n\r\nnf-core/wombat was originally written by the members of the ELIXIR Implementation study  [Comparison, benchmarking and dissemination of proteomics data analysis pipelines](https://elixir-europe.org/internal-projects/commissioned-services/proteomics-pipelines) under the lead of Veit Schwämmle and major participation of David Bouyssié and Fredrik Levander.\r\n\r\n## Citations\r\n\r\nManuscript in preparation\r\n\r\n\r\nAs the workflows are using an nf-core template, we refer to the publication:\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).","organization":"ELIXIR Proteomics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/444?version=1","name":"Version 1","author":["Veit Schwämmle"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/444?version=2","name":"Version 2","author":["Veit Schwämmle"],"descriptor_type":["NFL"]}]},{"id":"447","url":"https://workflowhub.eu/workflows/447","name":"ZARP: An automated workflow for processing of RNA-seq data","description":"[![ci](https://github.com/zavolanlab/zarp/workflows/CI/badge.svg?branch=dev)](https://github.com/zavolanlab/zarp/actions?query=workflow%3Aci)\r\n[![GitHub license](https://img.shields.io/github/license/zavolanlab/zarp?color=orange)](https://github.com/zavolanlab/zarp/blob/dev/LICENSE)\r\n[![DOI:10.1101/2021.11.18.469017](http://img.shields.io/badge/DOI-10.1101/2021.11.18.469017-B31B1B.svg)](https://doi.org/10.1101/2021.11.18.469017)\r\n\r\n\r\n\u003cdiv align=\"left\"\u003e\r\n    \u003cimg width=\"20%\" align=\"left\" src=https://raw.githubusercontent.com/zavolanlab/zarp/2bdf65deae5d4ffacc4b1a600d7d9ed425614255/images/zarp_logo.svg\u003e\r\n\u003c/div\u003e \r\n\r\n\r\n# **ZARP** ([Zavolan-Lab](https://www.biozentrum.unibas.ch/research/researchgroups/overview/unit/zavolan/research-group-mihaela-zavolan/) Automated RNA-Seq Pipeline) \r\n...is a generic RNA-Seq analysis workflow that allows \r\nusers to process and analyze Illumina short-read sequencing libraries with minimum effort. The workflow relies on \r\npublicly available bioinformatics tools and currently handles single or paired-end stranded bulk RNA-seq data.\r\nThe workflow is developed in [Snakemake](https://snakemake.readthedocs.io/en/stable/), a widely used workflow management system in the bioinformatics\r\ncommunity.\r\n\r\nAccording to the current ZARP implementation, reads are analyzed (pre-processed, aligned, quantified) with state-of-the-art\r\ntools to give meaningful initial insights into the quality and composition of an RNA-Seq library, reducing hands-on time for bioinformaticians and giving experimentalists the possibility to rapidly assess their data. Additional reports summarise the results of the individual steps and provide useful visualisations.\r\n\r\n\r\n\u003e **Note:** For a more detailed description of each step, please refer to the [workflow\r\n\u003e documentation](https://github.com/zavolanlab/zarp/blob/main/pipeline_documentation.md).\r\n\r\n\r\n## Requirements\r\n\r\nThe workflow has been tested on:\r\n- CentOS 7.5\r\n- Debian 10\r\n- Ubuntu 16.04, 18.04\r\n\r\n\u003e **NOTE:**\r\n\u003e Currently, we only support **Linux** execution. \r\n\r\n\r\n# Installation\r\n\r\n## 1. Clone the repository\r\n\r\nGo to the desired directory/folder on your file system, then clone/get the \r\nrepository and move into the respective directory with:\r\n\r\n```bash\r\ngit clone https://github.com/zavolanlab/zarp.git\r\ncd zarp\r\n```\r\n\r\n## 2. Conda and Mamba installation\r\n\r\nWorkflow dependencies can be conveniently installed with the [Conda](http://docs.conda.io/projects/conda/en/latest/index.html)\r\npackage manager. We recommend that you install [Miniconda](https://docs.conda.io/en/latest/miniconda.html) \r\nfor your system (Linux). Be sure to select Python 3 option. \r\nThe workflow was built and tested with `miniconda 4.7.12`.\r\nOther versions are not guaranteed to work as expected.\r\n\r\nGiven that Miniconda has been installed and is available in the current shell the first\r\ndependency for ZARP is the [Mamba](https://github.com/mamba-org/mamba) package manager, which needs to be installed in\r\nthe `base` conda environment with:\r\n\r\n```bash\r\nconda install mamba -n base -c conda-forge\r\n```\r\n\r\n## 3. Dependencies installation\r\n\r\nFor improved reproducibility and reusability of the workflow,\r\neach individual step of the workflow runs either in its own [Singularity](https://sylabs.io/singularity/)\r\ncontainer or in its own [Conda](http://docs.conda.io/projects/conda/en/latest/index.html) virtual environemnt. \r\nAs a consequence, running this workflow has very few individual dependencies. \r\nThe **container execution** requires Singularity to be installed on the system where the workflow is executed. \r\nAs the functional installation of Singularity requires root privileges, and Conda currently only provides Singularity\r\nfor Linux architectures, the installation instructions are slightly different depending on your system/setup:\r\n\r\n### For most users\r\n\r\nIf you do *not* have root privileges on the machine you want\r\nto run the workflow on *or* if you do not have a Linux machine, please [install\r\nSingularity](https://sylabs.io/guides/3.5/admin-guide/installation.html) separately and in privileged mode, depending\r\non your system. You may have to ask an authorized person (e.g., a systems\r\nadministrator) to do that. This will almost certainly be required if you want\r\nto run the workflow on a high-performance computing (HPC) cluster. \r\n\r\n\u003e **NOTE:**\r\n\u003e The workflow has been tested with the following Singularity versions:  \r\n\u003e  * `v2.6.2`\r\n\u003e  * `v3.5.2`\r\n\r\nAfter installing Singularity, install the remaining dependencies with:\r\n```bash\r\nmamba env create -f install/environment.yml\r\n```\r\n\r\n\r\n### As root user on Linux\r\n\r\nIf you have a Linux machine, as well as root privileges, (e.g., if you plan to\r\nrun the workflow on your own computer), you can execute the following command\r\nto include Singularity in the Conda environment:\r\n\r\n```bash\r\nmamba env update -f install/environment.root.yml\r\n```\r\n\r\n## 4. Activate environment\r\n\r\nActivate the Conda environment with:\r\n\r\n```bash\r\nconda activate zarp\r\n```\r\n\r\n# Extra installation steps (optional)\r\n\r\n## 5. Non-essential dependencies installation\r\n\r\nMost tests have additional dependencies. If you are planning to run tests, you\r\nwill need to install these by executing the following command _in your active\r\nConda environment_:\r\n\r\n```bash\r\nmamba env update -f install/environment.dev.yml\r\n```\r\n\r\n## 6. Successful installation tests\r\n\r\nWe have prepared several tests to check the integrity of the workflow and its\r\ncomponents. These can be found in subdirectories of the `tests/` directory. \r\nThe most critical of these tests enable you to execute the entire workflow on a \r\nset of small example input files. Note that for this and other tests to complete\r\nsuccessfully, [additional dependencies](#installing-non-essential-dependencies) \r\nneed to be installed. \r\nExecute one of the following commands to run the test workflow \r\non your local machine:\r\n* Test workflow on local machine with **Singularity**:\r\n```bash\r\nbash tests/test_integration_workflow/test.local.sh\r\n```\r\n* Test workflow on local machine with **Conda**:\r\n```bash\r\nbash tests/test_integration_workflow_with_conda/test.local.sh\r\n```\r\nExecute one of the following commands to run the test workflow \r\non a [Slurm](https://slurm.schedmd.com/documentation.html)-managed high-performance computing (HPC) cluster:\r\n\r\n* Test workflow with **Singularity**:\r\n\r\n```bash\r\nbash tests/test_integration_workflow/test.slurm.sh\r\n```\r\n* Test workflow with **Conda**:\r\n\r\n```bash\r\nbash tests/test_integration_workflow_with_conda/test.slurm.sh\r\n```\r\n\r\n\u003e **NOTE:** Depending on the configuration of your Slurm installation you may\r\n\u003e need to adapt file `slurm-config.json` (located directly under `profiles`\r\n\u003e directory) and the arguments to options `--cores` and `--jobs`\r\n\u003e in the file `config.yaml` of a respective profile.\r\n\u003e Consult the manual of your workload manager as well as the section of the\r\n\u003e Snakemake manual dealing with [profiles].\r\n\r\n# Running the workflow on your own samples\r\n\r\n1. Assuming that your current directory is the repository's root directory,\r\ncreate a directory for your workflow run and move into it with:\r\n\r\n    ```bash\r\n    mkdir config/my_run\r\n    cd config/my_run\r\n    ```\r\n\r\n2. Create an empty sample table and a workflow configuration file:\r\n\r\n    ```bash\r\n    touch samples.tsv\r\n    touch config.yaml\r\n    ```\r\n\r\n3. Use your editor of choice to populate these files with appropriate\r\nvalues. Have a look at the examples in the `tests/` directory to see what the\r\nfiles should look like, specifically:\r\n\r\n    - [samples.tsv](https://github.com/zavolanlab/zarp/blob/main/tests/input_files/samples.tsv)\r\n    - [config.yaml](https://github.com/zavolanlab/zarp/blob/main/tests/input_files/config.yaml)\r\n\r\n    - For more details and explanations, refer to the [pipeline-documentation](https://github.com/zavolanlab/zarp/blob/main/pipeline_documentation.md)\r\n\r\n\r\n4. Create a runner script. Pick one of the following choices for either local\r\nor cluster execution. Before execution of the respective command, you need to\r\nremember to update the argument of the `--singularity-args` option of a\r\nrespective profile (file: `profiles/{profile}/config.yaml`) so that\r\nit contains a comma-separated list of _all_ directories\r\ncontaining input data files (samples and any annotation files etc) required for\r\nyour run.\r\n\r\n    Runner script for _local execution_:\r\n\r\n    ```bash\r\n    cat \u003c\u003c \"EOF\" \u003e run.sh\r\n    #!/bin/bash\r\n\r\n    snakemake \\\r\n        --profile=\"../../profiles/local-singularity\" \\\r\n        --configfile=\"config.yaml\"\r\n\r\n    EOF\r\n    ```\r\n\r\n    **OR**\r\n\r\n    Runner script for _Slurm cluster exection_ (note that you may need\r\n    to modify the arguments to `--jobs` and `--cores` in the file:\r\n    `profiles/slurm-singularity/config.yaml` depending on your HPC\r\n    and workload manager configuration):\r\n\r\n    ```bash\r\n    cat \u003c\u003c \"EOF\" \u003e run.sh\r\n    #!/bin/bash\r\n    mkdir -p logs/cluster_log\r\n    snakemake \\\r\n        --profile=\"../profiles/slurm-singularity\" \\\r\n        --configfile=\"config.yaml\"\r\n    EOF\r\n    ```\r\n\r\n    When running the pipeline with *conda* you should use `local-conda` and\r\n    `slurm-conda` profiles instead.\r\n\r\n5. Start your workflow run:\r\n\r\n    ```bash\r\n    bash run.sh\r\n    ```\r\n\r\n# Sample downloads from SRA\r\n\r\nAn independent Snakemake workflow `workflow/rules/sra_download.smk` is included\r\nfor the download of SRA samples with [sra-tools].\r\n\r\n\u003e Note: as of Snakemake 7.3.1, only profile conda is supported. \r\n\u003e Singularity fails because the *sra-tools* Docker container only has `sh` \r\nbut `bash` is required.\r\n\r\n\u003e Note: The workflow uses the implicit temporary directory \r\nfrom snakemake, which is called with [resources.tmpdir].\r\n\r\nThe workflow expects the following config:\r\n* `samples`, a sample table (tsv) with column *sample* containing *SRR* identifiers,\r\nsee example [here](https://github.com/zavolanlab/zarp/blob/main/tests/input_files/sra_samples.tsv).\r\n* `outdir`, an output directory\r\n* `samples_out`, a pointer to a modified sample table with location of fastq files\r\n* `cluster_log_dir`, the cluster log directory.\r\n\r\nFor executing the example one can use the following\r\n(with activated *zarp* environment):\r\n\r\n```bash\r\nsnakemake --snakefile=\"workflow/rules/sra_download.smk\" \\\r\n          --profile=\"profiles/local-conda\" \\\r\n          --config samples=\"tests/input_files/sra_samples.tsv\" \\\r\n                   outdir=\"results/sra_downloads\" \\\r\n                   samples_out=\"results/sra_downloads/sra_samples.out.tsv\" \\\r\n                   log_dir=\"logs\" \\\r\n                   cluster_log_dir=\"logs/cluster_log\"\r\n```\r\nAfter successful execution, `results/sra_downloads/sra_samples.out.tsv` should contain:\r\n```tsv\r\nsample\tfq1\tfq2\r\nSRR18552868\tresults/sra_downloads/SRR18552868/SRR18552868.fastq.gz\t\r\nSRR18549672\tresults/sra_downloads/SRR18549672/SRR18549672_1.fastq.gz\tresults/sra_downloads/SRR18549672/SRR18549672_2.fastq.gz\r\n```\r\n\r\n\r\n# Metadata completion with HTSinfer\r\nAn independent Snakemake workflow `workflow/rules/htsinfer.smk` that populates the `samples.tsv` required by ZARP with the sample specific parameters `seqmode`, `f1_3p`, `f2_3p`, `organism`, `libtype` and `index_size`. Those parameters are inferred from the provided `fastq.gz` files by [HTSinfer](https://github.com/zavolanlab/htsinfer).\r\n\r\n\u003e Note: The workflow uses the implicit temporary directory \r\nfrom snakemake, which is called with [resources.tmpdir].\r\n\r\n\r\nThe workflow expects the following config:\r\n* `samples`, a sample table (tsv) with column *sample* containing sample identifiers, as well as columns *fq1* and *fq2* containing the paths to the input fastq files\r\nsee example [here](https://github.com/zavolanlab/zarp/blob/main/tests/input_files/sra_samples.tsv). If the table contains further ZARP compatible columns (see [pipeline documentation](https://github.com/zavolanlab/zarp/blob/main/pipeline_documentation.md#read-sample-table)), the values specified there by the user are given priority over htsinfer's results. \r\n* `outdir`, an output directory\r\n* `samples_out`, path to a modified sample table with inferred parameters\r\n* `records`, set to 100000 per default\r\n  \r\nFor executing the example one can use the following\r\n(with activated *zarp* environment):\r\n```bash\r\ncd tests/test_htsinfer_workflow\r\nsnakemake \\\r\n    --snakefile=\"../../workflow/rules/htsinfer.smk\" \\\r\n    --restart-times=0 \\\r\n    --profile=\"../../profiles/local-singularity\" \\\r\n    --config outdir=\"results\" \\\r\n             samples=\"../input_files/htsinfer_samples.tsv\" \\\r\n             samples_out=\"samples_htsinfer.tsv\" \\\r\n    --notemp \\\r\n    --keep-incomplete\r\n```\r\n\r\nHowever, this call will exit with an error, as not all parameters can be inferred from the example files. The argument `--keep-incomplete` makes sure the `samples_htsinfer.tsv` file can nevertheless be inspected. \r\n\r\nAfter successful execution - if all parameters could be either inferred or were specified by the user - `[OUTDIR]/[SAMPLES_OUT]` should contain a populated table with parameters `seqmode`, `f1_3p`, `f2_3p`, `organism`, `libtype` and `index_size` for all input samples as described in the [pipeline documentation](https://github.com/zavolanlab/zarp/blob/main/pipeline_documentation.md#read-sample-table).\r\n\r\n","organization":"Zavolan Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/447?version=1","name":"main @ 930a818","author":["Zavolan Lab"],"descriptor_type":["SMK"]}]},{"id":"450","url":"https://workflowhub.eu/workflows/450","name":"MGnify raw reads taxonomic profiling pipeline","description":"The containerised pipeline for profiling shotgun metagenomic data is derived from the [MGnify](https://www.ebi.ac.uk/metagenomics/) pipeline raw-reads analyses, a well-established resource used for analyzing microbiome data.\r\nKey components:\r\n- Quality control and decontamination\r\n- rRNA and ncRNA detection using Rfam database\r\n- Taxonomic classification of SSU and LSU regions \r\n- Abundance analysis with mOTUs","organization":"MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/450?version=1","name":"main @ 5cd3f68","author":["Ekaterina Sakharova","Martin Beracochea"],"descriptor_type":["NFL"]}]},{"id":"451","url":"https://workflowhub.eu/workflows/451","name":"RASflow: RNA-Seq Analysis Snakemake Workflow","description":"# RASflow: RNA-Seq Analysis Snakemake Workflow\r\nRASflow is a modular, flexible and user-friendly RNA-Seq analysis workflow. \r\n\r\nRASflow can be applied to both model and non-model organisms. It supports mapping RNA-Seq raw reads to both genome and transcriptome (can be downloaded from public database or can be homemade by users) and it can do both transcript- and gene-level Differential Expression Analysis (DEA) when transcriptome is used as mapping reference. It requires little programming skill for basic use. If you're good at programming, you can do more magic with RASflow!\r\n\r\nYou can help support RASflow by citing our publication:\r\n\r\n**Zhang, X., Jonassen, I. RASflow: an RNA-Seq analysis workflow with Snakemake. BMC Bioinformatics 21, 110 (2020). https://doi.org/10.1186/s12859-020-3433-x**\r\n","organization":"Bioinformatics workflows for life science","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/451?version=1","name":"master @ 66c9f37","author":["Xiaokang Zhang"],"descriptor_type":["SMK"]}]},{"id":"452","url":"https://workflowhub.eu/workflows/452","name":"Mobilome Annotation Pipeline","description":"# Mobilome Annotation Pipeline (former MoMofy)\r\n\r\nBacteria can acquire genetic material through horizontal gene transfer, allowing them to rapidly adapt to changing environmental conditions. These mobile genetic elements can be classified into three main categories: plasmids, phages, and integrative elements. Plasmids are mostly extrachromosmal; phages can be found extrachromosmal or as temperate phages (prophages); whereas integrons are stable inserted in the chromosome. Autonomous elements are those integrative elements capable of excising themselves from the chromosome and reintegrate elsewhere. They can use a transposase (like insertion sequences and transposons) or an integrase/excisionase (like ICEs and IMEs).\r\n\r\nThe Mobilome Annotation Pipeline is a wrapper that integrates the output of different tools designed for the prediction of plasmids, phages, insertion sequences, and other autonomous integrative mobile genetic elements such as ICEs, IMEs and integrons in prokaryotic genomes and metagenomes. The output is a PROKKA gff file with extra entries for the mobilome.\r\n\r\n## Contents\r\n\r\n- [ Workflow ](#wf)\r\n- [ Setup ](#sp)\r\n- [ Install and dependencies ](#install)\r\n- [ Usage ](#usage)\r\n- [ Inputs ](#in)\r\n- [ Outputs ](#out)\r\n- [ Tests ](#test)\r\n- [ Citation ](#cite)\r\n\r\n\u003ca name=\"wf\"\u003e\u003c/a\u003e\r\n\r\n## Workflow\r\n\r\nThis workflow has the following main subworkflows:\r\n\r\n- Preprocessing: Rename and filter contigs, and run PROKKA annotation\r\n- Prediction: Run geNomad, ICEfinder, IntegronFinder, and ISEScan\r\n- Annotation: Generate extra-annotation for antimicrobial resistance genes (AMRFinderPlus) and other mobilome-related proteins (MobileOG).\r\n- Integration: Parse and integrate the outputs generated on `Prediction` and `Annotation` subworkflows. In this step optional results of VIRify v3.0.0 can be incorporated. MGEs \u003c500 bp lengh and predictions with no genes are discarded.\r\n- Postprocessing: Write the mobilome fasta file, write a report of the location of AMR genes (either mobilome or chromosome), and generate three new GFF files:\r\n\r\n1. `mobilome_clean.gff`: mobilome + associated CDSs\r\n2. `mobilome_extra.gff`: mobilome + ViPhOGs/mobileOG annotated genes (note that ViPhOG annotation is generated by VIRify)\r\n3. `mobilome_nogenes.gff`: mobilome only\r\n   The output `mobilome_nogenes.gff` is validated in this subworkflow.\r\n\r\n\u003ca name=\"sp\"\u003e\u003c/a\u003e\r\n\r\n## Setup\r\n\r\nThis workflow is built using [Nextflow](https://www.nextflow.io/). It uses Singularity containers making installation trivial and results highly reproducible.\r\nExplained in this section, there is one manual step required to build the singularity image for [ICEfinder](https://bioinfo-mml.sjtu.edu.cn/ICEfinder/index.php), as we can't distribute that software due to license issues.\r\n\r\n- Install [Nextflow version \u003e=21.10](https://www.nextflow.io/docs/latest/getstarted.html#installation)\r\n- Install [Singularity](https://github.com/apptainer/singularity/blob/master/INSTALL.md)\r\n\r\n\u003ca name=\"install\"\u003e\u003c/a\u003e\r\n\r\n## Install and dependencies\r\n\r\nTo get a copy of the Mobilome Annotation Pipeline, clone this repo by:\r\n\r\n```bash\r\n$ git clone https://github.com/EBI-Metagenomics/mobilome-annotation-pipeline.git\r\n```\r\n\r\nThe mobileOG-database is required to run an extra step of annotation on the mobilome coding sequences. The first time you run the Mobilome Annotation Pipeline, you will need to download the [Beatrix 1.6 v1](https://mobileogdb.flsi.cloud.vt.edu/entries/database_download) database, move the tarball to `mobilome-annotation-pipeline/databases`, decompress it, and run the script to format the db for diamond:\r\n\r\n```bash\r\n$ mv beatrix-1-6_v1_all.zip /PATH/mobilome-annotation-pipeline/databases\r\n$ cd /PATH/mobilome-annotation-pipeline/databases\r\n$ unzip beatrix-1-6_v1_all.zip\r\n$ nextflow run /PATH/mobilome-annotation-pipeline/format_mobileOG.nf\r\n```\r\n\r\nTwo additional databases need to be manually downloaded and extracted: [AMRFinder plus db](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest) and the [geNomad database](https://zenodo.org/records/8339387) databases. Then you can provide the paths to your databases using the `mobileog_db`, the `amrfinder_plus_db` and the `genomad_db` respectively when you run the pipeline.\r\n\r\nMost of the tools are available on [quay.io](https://quay.io) and no install is needed. However, in the case of ICEfinder, you will need to contact the author to get a copy of the software, visit the [ICEfinder website](https://bioinfo-mml.sjtu.edu.cn/ICEfinder/download.html) for more information. Once you have the `ICEfinder_linux.tar.gz` tarball, move it to `mobilome-annotation-pipeline/templates` and build the singularity image using the following command:\r\n\r\n```bash\r\n$ mv ICEfinder_linux.tar.gz /PATH/mobilome-annotation-pipeline/templates/\r\n$ cd /PATH/mobilome-annotation-pipeline/templates/\r\n$ sudo singularity build ../../singularity/icefinder-v1.0-local.sif icefinder-v1.0-local.def\r\n```\r\n\r\nThe path to the ICEfinder image needs to be provided when running the pipeline, unless a custom config file is created.\r\n\r\n\r\n\u003ca name=\"usage\"\u003e\u003c/a\u003e\r\n\r\n\r\n## Inputs\r\n\r\nTo run the Mobilome Annotation Pipeline on multiple samples, prepare a samplesheet with your input data that looks as in the following example. Note that `virify_gff` is an optional input for this pipeline generated with [VIRify](https://github.com/EBI-Metagenomics/emg-viral-pipeline) v3.0.0 tool. \r\n\r\n`samplesheet.csv`:\r\n\r\n```csv\r\nsample,assembly,user_proteins_gff,virify_gff\r\nminimal,/PATH/assembly.fasta,,\r\nassembly_proteins,/PATH/assembly.fasta,/PATH/proteins.gff,\r\nassembly_proteins_virify,/PATH/assembly.fasta,/PATH/proteins.gff,/PATH/virify_out.gff\r\n```\r\n\r\nEach row represents a sample. The minimal input is the (meta)genome assembly in fasta format.\r\n\r\nBasic run:\r\n\r\n```bash\r\n$ nextflow run /PATH/mobilome-annotation-pipeline/main.nf --input samplesheet.csv [--icefinder_sif icefinder-v1.0-local.sif]\r\n```\r\n\r\nNote that the final output in gff format is created by adding information to PROKKA output. If you have your own protein prediction files, provide the path the the uncompressed gff file in the samplesheet.csv. This file will be used to generate a `user_mobilome_extra.gff` file containing the mobilome plus any extra annotation generated on the annotation subworkflow.\r\n\r\nIf you want to integrate VIRify results to the final output provide the path to the GFF file generated by VIRify v3.0.0 in your samplesheet.csv.\r\n\r\n\r\n\u003ca name=\"out\"\u003e\u003c/a\u003e\r\n\r\n## Outputs\r\n\r\nResults will be written by default in the `mobilome_results` directory unless the `--outdir` option is used. There, you will find the following outputs:\r\n\r\n```bash\r\nmobilome_results/\r\n├── mobilome.fasta\r\n├── mobilome_prokka.gff\r\n├── overlapping_integrons.txt\r\n├── discarded_mge.txt\r\n├── func_annot/\r\n├── gff_output_files/\r\n├── prediction/\r\n└── preprocessing\r\n```\r\n\r\nThe AMRFinderPlus results are generated by default. The `func_annot/amr_location.txt` file contains a summary of the AMR genes annotated and their location (either mobilome or chromosome).\r\n\r\nThe file `discarded_mge.txt` contains a list of predictions that were discarded, along with the reason for their exclusion. Possible reasons include:\r\n\r\n1. 'mge \u003c 500bp' Discarded by length.\r\n2. 'no_cds' If there are no genes encoded in the prediction.\r\n\r\nThe file `overlapping_integrons.txt` is a report of long-MGEs with overlapping coordinates. No predictions are discarded in this case.\r\n\r\nThe main output files containing the mobilome predictions are `mobilome.fasta` containing the nucleotide sequences of every prediction, and `mobilome_prokka.gff` containing the mobilome annotation plus any other feature annotated by PROKKA, mobileOG, or ViPhOG (only when VIRify results are provided).\r\n\r\nThe mobilome prediction IDs are build as follows:\r\n\r\n1. Contig ID\r\n2. MGE type:\r\n   flanking_site\r\n   recombination_site\r\n   prophage\r\n   viral_sequence\r\n   plasmid\r\n   phage_plasmid\r\n   integron\r\n   conjugative_integron\r\n   insertion_sequence\r\n3. Start and end coordinates separated by ':'\r\n\r\nExample:\r\n\r\n```bash\r\n\u003econtig_id|mge_type-start:end\r\n```\r\n\r\nAny CDS with a coverage \u003e= 0.9 in the boundaries of a predicted MGE is considered as part of the mobilome and labelled acordingly in the attributes field under the key `location`.\r\n\r\nThe labels used in the Type column of the gff file corresponds to the following nomenclature according to the [Sequence Ontology resource](http://www.sequenceontology.org/browser/current_svn/term/SO:0000001) when possible:\r\n\r\n| Type in gff file                 | Sequence ontology ID                                                              | Element description                                         | Reporting tool            |\r\n| -------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------- | ------------------------- |\r\n| insertion_sequence               | [SO:0000973](http://www.sequenceontology.org/browser/current_svn/term/SO:0000973) | Insertion sequence                                          | ISEScan, PaliDIS          |\r\n| terminal_inverted_repeat_element | [SO:0000481](http://www.sequenceontology.org/browser/current_svn/term/SO:0000481) | Terminal Inverted Repeat (TIR) flanking insertion sequences | ISEScan, PaliDIS          |\r\n| integron                         | [SO:0000365](http://www.sequenceontology.org/browser/current_svn/term/SO:0000365) | Integrative mobilizable element                             | IntegronFinder, ICEfinder |\r\n| attC_site                        | [SO:0000950](http://www.sequenceontology.org/browser/current_svn/term/SO:0000950) | Integration site of DNA integron                            | IntegronFinder            |\r\n| conjugative_integron             | [SO:0000371](http://www.sequenceontology.org/browser/current_svn/term/SO:0000371) | Integrative Conjugative Element                             | ICEfinder                 |\r\n| direct_repeat                    | [SO:0000314](http://www.sequenceontology.org/browser/current_svn/term/SO:0000314) | Flanking regions on mobilizable elements                    | ICEfinder                 |\r\n| prophage                         | [SO:0001006](http://www.sequenceontology.org/browser/current_svn/term/SO:0001006) | Temperate phage                                             | geNomad, VIRify           |\r\n| viral_sequence                   | [SO:0001041](http://www.sequenceontology.org/browser/current_svn/term/SO:0001041) | Viral genome fragment                                       | geNomad, VIRify           |\r\n| plasmid                          | [SO:0000155](http://www.sequenceontology.org/browser/current_svn/term/SO:0000155) | Plasmid                                                     | geNomad                   |\r\n\r\n\u003ca name=\"test\"\u003e\u003c/a\u003e\r\n\r\n## Tests\r\n\r\nNextflow tests are executed with [nf-test](https://github.com/askimed/nf-test). It takes around 3 min in executing.\r\n\r\nRun:\r\n\r\n```bash\r\n$ cd mobilome-annotation-pipeline/\r\n$ nf-test test\r\n```\r\n\r\n\u003ca name=\"cite\"\u003e\u003c/a\u003e\r\n\r\n## Citation\r\n\r\nThe Mobilome Annotation Pipeline parses and integrates the output of the following tools and DBs sorted alphabetically:\r\n\r\n- AMRFinderPlus v3.11.4 with database v2023-02-23.1 [Feldgarden et al., Sci Rep, 2021](https://doi.org/10.1038/s41598-021-91456-0)\r\n- Diamond v2.0.12 [Buchfink et al., Nature Methods, 2021](https://doi.org/10.1038/s41592-021-01101-x)\r\n- geNomad v1.6.1 [Camargo et al., Nature Biotechnology, 2023](https://doi.org/10.1038/s41587-023-01953-y)\r\n- ICEfinder v1.0 [Liu et al., Nucleic Acids Res, 2019](https://doi.org/10.1093/nar/gky1123)\r\n- IntegronFinder2 v2.0.2 [Néron et al., Microorganisms, 2022](https://doi.org/10.3390/microorganisms10040700)\r\n- ISEScan v1.7.2.3 [Xie et al., Bioinformatics, 2017](https://doi.org/10.1093/bioinformatics/btx433)\r\n- MobileOG-DB Beatrix 1.6 v1 [Brown et al., Appl Environ Microbiol, 2022](https://doi.org/10.1128/aem.00991-22)\r\n- PROKKA v1.14.6 [Seemann, Bioinformatics, 2014](https://doi.org/10.1093/bioinformatics/btu153)\r\n- VIRify v3.0.0 [Rangel-Pineros et al., PLoS Comput Biol, 2023](https://doi.org/10.1371/journal.pcbi.1011422)\r\n\r\n","organization":"MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/452?version=1","name":"main @ 9ed4ca9","author":["Alejandra Escobar","Martin Beracochea"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/452?version=2","name":"v3.0.0","author":["Alejandra Escobar","Martin Beracochea"],"descriptor_type":["NFL"]}]},{"id":"453","url":"https://workflowhub.eu/workflows/453","name":"BatchConvert","description":"# BatchConvert  ![DOI:10.5281](https://zenodo.org/badge/doi/10.5281/zenodo.7955974.svg)\r\n\r\nA command line tool for converting image data into either of the standard file formats OME-TIFF or OME-Zarr. \r\n\r\nThe tool wraps the dedicated file converters bfconvert and bioformats2raw to convert into OME-TIFF or OME-Zarr,\r\nrespectively. The workflow management system NextFlow is used to perform conversion in parallel for batches of images. \r\n\r\nThe tool also wraps s3 and Aspera clients (go-mc and aspera-cli, respectively). Therefore, input and output locations can \r\nbe specified as local or remote storage and file transfer will be performed automatically. The conversion can be run on \r\nHPC with Slurm.  \r\n\r\n![](figures/diagram.png)\r\n\r\n## Installation \u0026 Dependencies\r\n\r\n**Important** note: The package has been so far only tested on Ubuntu 20.04.\r\n\r\nThe minimal dependency to run the tool is NextFlow, which should be installed and made accessible from the command line.\r\n\r\nIf conda exists on your system, you can install BatchConvert together with NextFlow using the following script:\r\n```\r\ngit clone https://github.com/Euro-BioImaging/BatchConvert.git \u0026\u0026 \\ \r\nsource BatchConvert/installation/install_with_nextflow.sh\r\n```\r\n\r\n\r\nIf you already have NextFlow installed and accessible from the command line (or if you prefer to install it manually \r\ne.g., as shown [here](https://www.nextflow.io/docs/latest/getstarted.html)), you can also install BatchConvert alone, using the following script:\r\n```\r\ngit clone https://github.com/Euro-BioImaging/BatchConvert.git \u0026\u0026 \\ \r\nsource BatchConvert/installation/install.sh\r\n```\r\n\r\n\r\nOther dependencies (which will be **automatically** installed):\r\n- bioformats2raw (entrypoint bioformats2raw)\r\n- bftools (entrypoint bfconvert)\r\n- go-mc (entrypoint mc)\r\n- aspera-cli (entrypoint ascp)\r\n\r\nThese dependencies will be pulled and cached automatically at the first execution of the conversion command. \r\nThe mode of dependency management can  be specified by using the command line option ``--profile`` or `-pf`. Depending \r\non how this option is specified, the dependencies will be acquired / run either via conda or via docker/singularity containers. \r\n\r\nSpecifying ``--profile conda`` (default) will install the dependencies to an \r\nenvironment at ``./.condaCache`` and use this environment to run the workflow. This option \r\nrequires that miniconda/anaconda is installed on your system.    \r\n\r\nAlternatively, specifying ``--profile docker`` or ``--profile singularity`` will pull a docker or \r\nsingularity image with the dependencies, respectively, and use this image to run the workflow.\r\nThese options assume that the respective container runtime (docker or singularity) is available on \r\nyour system. If singularity is being used, a cache directory will be created at the path \r\n``./.singularityCache`` where the singularity image is stored. \r\n\r\nFinally, you can still choose to install the dependencies manually and use your own installations to run\r\nthe workflow. In this case, you should specify ``--profile standard`` and make sure the entrypoints\r\nspecified above are recognised by your shell.  \r\n\r\n\r\n## Configuration\r\n\r\nBatchConvert can be configured to have default options for file conversion and transfer. Probably, the most important sets of parameters\r\nto be configured include credentials for the remote ends. The easiest way to configure remote stores is by running the interactive \r\nconfiguration command as indicated below.\r\n\r\n### Configuration of the s3 object store\r\n\r\nRun the interactive configuration command: \r\n\r\n`batchconvert configure_s3_remote`\r\n\r\nThis will start a sequence of requests for s3 credentials such as name, url, access, etc. Provide each requested credential and click\r\nenter. Continue this cycle until the process is finished. Upon completing the configuration, the sequence of commands should roughly look like this:\r\n\r\n```\r\noezdemir@pc-ellenberg108:~$ batchconvert configure_s3_remote\r\nenter remote name (for example s3)\r\ns3\r\nenter url:\r\nhttps://s3.embl.de\r\nenter access key:\r\n\"your-access-key\"\r\nenter secret key:\r\n\"your-secret-key\"\r\nenter bucket name:\r\n\"your-bucket\"\r\nConfiguration of the default s3 credentials is complete\r\n```\r\n\r\n\r\n### Configuration of the BioStudies user space\r\n\r\nRun the interactive configuration command: \r\n\r\n`batchconvert configure_bia_remote`\r\n\r\nThis will prompt a request for the secret directory to connect to. Enter the secret directory for your user space and click enter. \r\nUpon completing the configuration, the sequence of commands should roughly look like this:\r\n\r\n```\r\noezdemir@pc-ellenberg108:~$ batchconvert configure_bia_remote\r\nenter the secret directory for BioImage Archive user space:\r\n\"your-secret-directory\"\r\nconfiguration of the default bia credentials is complete\r\n```\r\n\r\n### Configuration of the slurm options\r\n\r\nBatchConvert can also run on slurm clusters. In order to configure the slurm parameters, run the interactive configuration command: \r\n\r\n`batchconvert configure_slurm`\r\n\r\nThis will start a sequence of requests for slurm options. Provide each requested option and click enter. \r\nContinue this cycle until the process is finished. Upon completing the configuration, the sequence of commands should \r\nroughly look like this:\r\n\r\n```\r\noezdemir@pc-ellenberg108:~$ batchconvert configure_slurm\r\nPlease enter value for queue_size\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the current value ´50´\r\ns\r\nPlease enter value for submit_rate_limit\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the current value ´10/2min´\r\ns\r\nPlease enter value for cluster_options\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the current value ´--mem-per-cpu=3140 --cpus-per-task=16´\r\ns\r\nPlease enter value for time\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the current value ´6h´\r\ns\r\nconfiguration of the default slurm parameters is complete\r\n```\r\n\r\n### Configuration of the default conversion parameters\r\n\r\nWhile all conversion parameters can be specified as command line arguments, it can\r\nbe useful for the users to set their own default parameters to avoid re-entering those\r\nparameters for subsequent executions. BatchConvert allows for interactive configuration of \r\nconversion in the same way as configuration of the remote stores described above.\r\n\r\nTo configure the conversion into OME-TIFF, run the following command:\r\n\r\n`batchconvert configure_ometiff`\r\n\r\nThis will prompt the user to enter a series of parameters, which will then be saved as the \r\ndefault parameters to be passed to the `batchconvert ometiff` command. Upon completing the \r\nconfiguration, the sequence of commands should look similar to:\r\n\r\n```\r\noezdemir@pc-ellenberg108:~$ batchconvert configure_ometiff\r\nPlease enter value for noflat\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the parameter´s current value, which is \"bfconvert defaults\"\r\ns\r\nPlease enter value for series\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the parameter´s current value, which is \"bfconvert defaults\"\r\ns\r\nPlease enter value for timepoint\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the parameter´s current value, which is \"bfconvert defaults\"\r\ns\r\n...\r\n...\r\n...\r\n...\r\n...\r\n...\r\nConfiguration of the default parameters for 'bfconvert' is complete\r\n```\r\n\r\n\r\nTo configure the conversion into OME-Zarr, run the following command:\r\n\r\n`batchconvert configure_omezarr`\r\n\r\nSimilarly, this will prompt the user to enter a series of parameters, which will then be saved as the \r\ndefault parameters to be passed to the `batchconvert omezarr` command. Upon completing the configuration, \r\nthe sequence of commands should look similar to:\r\n\r\n```\r\noezdemir@pc-ellenberg108:~$ batchconvert configure_omezarr\r\nPlease enter value for resolutions_zarr\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the parameter´s current value, which is \"bioformats2raw defaults\"\r\ns\r\nPlease enter value for chunk_h\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the parameter´s current value, which is \"bioformats2raw defaults\"\r\ns\r\nPlease enter value for chunk_w\r\nClick enter if this parameter is not applicable\r\nEnter \"skip\" or \"s\" if you would like to keep the parameter´s current value, which is \"bioformats2raw defaults\"\r\n...\r\n...\r\n...\r\n...\r\n...\r\n...\r\nConfiguration of the default parameters for 'bioformats2raw' is complete\r\n```\r\n\r\nIt is important to note that the initial defaults for the conversion parameters are the same as the defaults\r\nof the backend tools bfconvert and bioformats2raw, as noted in the prompt excerpt above. Through interactive configuration, \r\nthe user is overriding these initial defaults and setting their own defaults. It is possible to reset the initial\r\ndefaults by running the following command.\r\n\r\n`batchconvert reset_defaults`\r\n\r\nAnother important point is that any of these configured parameters can be overridden by passing a value to that\r\nparameter in the commandline. For instance, in the following command, the value of 20 will be assigned to `chunk_h` parameter \r\neven if the value for the same parameter might be different in the configuration file. \r\n\r\n`batchconvert omezarr --chunk_h 20 \"path/to/input\" \"path/to/output\"`\r\n\r\n\r\n## Examples\r\n\r\n### Local conversion\r\n\r\n#### Parallel conversion of files to separate OME-TIFFs / OME-Zarrs:\r\nConvert a batch of images on your local storage into OME-TIFF format. \r\nNote that the `input_path` in the command given below is typically a \r\ndirectory with multiple image files but a single image file can also be passed:\\\r\n`batchconvert ometiff -pf conda \"input_path\" \"output_path\"` \r\n\r\nNote that if this is your first conversion with the profile `conda`, \r\nit will take a while for a conda environment with the dependencies to be\r\ncreated. All the subsequent conversion commands with the profile `conda`,\r\nhowever, will use this environment, and thus show no such delay.\r\n\r\nSince conda is the default profile, it does not have to be \r\nexplicitly included in the command line. Thus, the command can be shortened to:\\\r\n`batchconvert ometiff \"input_path\" \"output_path\"`\r\n\r\nConvert only the first channel of the images:\\\r\n`batchconvert ometiff -chn 0 \"input_path\" \"output_path\"`\r\n\r\nCrop the images being converted along x and y axis by 150 pixels:\\\r\n`batchconvert ometiff -cr 0,0,150,150 \"input_path\" \"output_path\"`\r\n\r\nConvert into OME-Zarr instead:\\\r\n`batchconvert omezarr \"input_path\" \"output_path\"`\r\n\r\nConvert into OME-Zarr with 3 resolution levels:\\\r\n`batchconvert omezarr -rz 3 \"input_path\" \"output_path\"`\r\n\r\nSelect a subset of images with a matching string such as \"mutation\":\\\r\n`batchconvert omezarr -p mutation \"input_path\" \"output_path\"`\r\n\r\nSelect a subset of images using wildcards. Note that the use of \"\" around \r\nthe input path is necessary when using wildcards:\\\r\n`batchconvert omezarr \"input_path/*D3*.oir\" \"output_path\"`\r\n\r\nConvert by using a singularity container instead of conda environment (requires\r\nsingularity to be installed on your system):\\\r\n`batchconvert omezarr -pf singularity \"input_path/*D3*.oir\" \"output_path\"`\r\n\r\nConvert by using a docker container instead of conda environment (requires docker\r\nto be installed on your system):\\\r\n`batchconvert omezarr -pf docker \"input_path/*D3*.oir\" \"output_path\"`\r\n\r\nNote that similarly to the case with the profile `conda`, the first execution of\r\na conversion with the profile `singularity` or `docker` will take a while for the\r\ncontainer image to be pulled. All the subsequent conversion commands using a \r\ncontainer option will use this image, and thus show no such delay. \r\n\r\nConvert local data and upload the output to an s3 bucket. Note that the output \r\npath is created relative to the bucket specified in your s3 configuration:\\\r\n`batchconvert omezarr -dt s3 \"input_path\" \"output_path\"`\r\n\r\nReceive input files from an s3 bucket, convert locally and upload the output to \r\nthe same bucket. Note that wildcards cannot be used when the input is from s3. \r\nUse pattern matching option `-p` for selecting a subset of input files:\\\r\n`batchconvert omezarr -p mutation -st s3 -dt s3 \"input_path\" \"output_path\"`\r\n\r\nReceive input files from your private BioStudies user space and convert them locally.\r\nUse pattern matching option `-p` for selecting a subset of input files:\\\r\n`batchconvert omezarr -p mutation -st bia \"input_path\" \"output_path\"`\r\n\r\nReceive an input from an s3 bucket, convert locally and upload the output to your \r\nprivate BioStudies user space. Use pattern matching option `-p` for selecting a subset \r\nof input files:\\\r\n`batchconvert omezarr -p mutation -st s3 -dt bia \"input_path\" \"output_path\"`\r\n\r\nNote that in all the examples shown above, BatchConvert treats each input file as separate,\r\nstandalone data point, disregarding the possibility that some of the input files might belong to \r\nthe same multidimensional array. Thus, each input file is converted to an independent \r\nOME-TIFF / OME-Zarr and the number of outputs will thus equal the number of selected input files.\r\nAn alternative scenario is discussed below.\r\n\r\n#### Parallel conversion of file groups by stacking multiple files into single OME-TIFFs / OME-Zarrs:\r\n\r\nWhen the flag `--merge_files` is specified, BatchConvert tries to detect which input files might \r\nbelong to the same multidimensional array based on the patterns in the filenames. Then a \"grouped conversion\" \r\nis performed, meaning that the files belonging to the same dataset will be incorporated into \r\na single OME-TIFF / OME-Zarr series, in that files will be concatenated along specific dimension(s) \r\nduring the conversion. Multiple file groups in the input directory can be detected and converted \r\nin parallel. \r\n\r\nThis feature uses Bio-Formats's pattern files as described [here](https://docs.openmicroscopy.org/bio-formats/6.6.0/formats/pattern-file.html).\r\nHowever, BatchConvert generates pattern files automatically, allowing the user to directly use the \r\ninput directory in the conversion command. BatchConvert also has the option of specifying the \r\nconcatenation axes in the command line, which is especially useful in cases where the filenames \r\nmay not contain dimension information.  \r\n\r\nTo be able to use the `--merge files` flag, the input file names must obey certain rules:\r\n1. File names in the same group must be uniform, except for one or more **numeric field(s)**, which\r\nshould show incremental change across the files. These so-called **variable fields** \r\nwill be detected and used as the dimension(s) of concatenation.\r\n2. The length of variable fields must be uniform within the group. For instance, if the\r\nvariable field has values reaching multi-digit numbers, leading \"0\"s should be included where needed \r\nin the file names to make the variable field length uniform within the group.\r\n3. Typically, each variable field should follow a dimension specifier. What patterns can be used as \r\ndimension specifiers are explained [here](https://docs.openmicroscopy.org/bio-formats/6.6.0/formats/pattern-file.html).\r\nHowever, BatchConvert also has the option `--concatenation_order`, which allows the user to\r\nspecify from the command line, the dimension(s), along which the files must be concatenated.\r\n4. File names that are unique and cannot be associated with any group will be assumed as\r\nstandalone images and converted accordingly. \r\n\r\nBelow are some examples of grouped conversion commands in the context of different possible use-case scenarios:\r\n\r\n**Example 1:**\r\n\r\nThis is an example of a folder with non-uniform filename lengths:\r\n```\r\ntime-series/test_img_T2\r\ntime-series/test_img_T4\r\ntime-series/test_img_T6\r\ntime-series/test_img_T8\r\ntime-series/test_img_T10\r\ntime-series/test_img_T12\r\n```\r\nIn this example, leading zeroes are missing in the variable fields of some filenames. \r\nA typical command to convert this folder to a single OME-TIFF would look like: \\\r\n`batchconvert --ometiff --merge_files \"input_dir/time-series\" \"output_path\"`\r\n\r\nHowever, this command would fail to create a single OME-Zarr folder due to the non-uniform \r\nlengths of the filenames. Instead, the files would be split into two groups based on the\r\nfilename length, leading to two separate OME-Zarrs with names:\r\n\r\n`test_img_TRange{2-8-2}.ome.zarr` and `test_img_TRange{10-12-2}.ome.zarr`\r\n\r\nHere is the corrected version of the folder for the above example-\r\n```\r\ntime-series/test_img_T02\r\ntime-series/test_img_T04\r\ntime-series/test_img_T06\r\ntime-series/test_img_T08\r\ntime-series/test_img_T10\r\ntime-series/test_img_T12\r\n```\r\n\r\nExecuting the same command on this folder would result in a single OME-Zarr with the name:\r\n`test_img_TRange{02-12-2}.ome.zarr`\r\n\r\n**Example 2**- \r\n\r\nIn this example, the filename lengths are uniform but the incrementation within the variable field is not.\r\n```\r\ntime-series/test_img_T2\r\ntime-series/test_img_T4\r\ntime-series/test_img_T5\r\ntime-series/test_img_T7\r\n```\r\n\r\nA typical command to convert this folder to a single OME-Zarr would look like: \\\r\n`batchconvert --omezarr --merge_files \"input_dir/time-series\" \"output_path\"`\r\n\r\nHowever, the command would fail to assume these files as a single group due to the\r\nnon-uniform incrementation in the variable field of the filenames. Instead, the dataset \r\nwould be split into two groups, leading to two separate OME-Zarrs with the following names:\r\n`test_img_TRange{2-4-2}.ome.zarr` and `test_img_TRange{5-7-2}.ome.zarr`  \r\n\r\n\r\n**Example 3**\r\n\r\nThis is an example of a case where the conversion attempts to concatenate files along two\r\ndimensions, channel and time.\r\n```\r\nmultichannel_time-series/test_img_C1-T1\r\nmultichannel_time-series/test_img_C1-T2\r\nmultichannel_time-series/test_img_C1-T3\r\nmultichannel_time-series/test_img_C2-T1\r\nmultichannel_time-series/test_img_C2-T2\r\n```\r\nTo convert this folder to a single OME-Zarr, one could try the following command: \\\r\n`batchconvert --omezarr --merge_files \"input_dir/multichannel_time-series\" \"output_path\"`\r\n\r\nHowever, since the channel-2 does not have the same number of timeframes as the channel-1, \r\nBatchConvert will fail to assume these two channels as part of the same series and\r\nwill instead split the two channels into two separate OME-Zarrs. \r\n\r\nThe output would look like: \\\r\n`test_img_C1-TRange{1-3-1}.ome.zarr` \\\r\n`test_img_C2-TRange{1-2-1}.ome.zarr`\r\n\r\nTo be able to really incorporate all files into a single OME-Zarr, the folder should have equal\r\nnumber of images corresponding to both channels, as shown below:\r\n```\r\nmultichannel_time-series/test_img_C1-T1\r\nmultichannel_time-series/test_img_C1-T2\r\nmultichannel_time-series/test_img_C1-T3\r\nmultichannel_time-series/test_img_C2-T1\r\nmultichannel_time-series/test_img_C2-T2\r\nmultichannel_time-series/test_img_C2-T3\r\n```\r\nThe same conversion command on this version of the input folder would result in a single \r\nOME-Zarr with the name: \\\r\n`test_img_CRange{1-2-1}-TRange{1-3-1}.ome.zarr`\r\n\r\n\r\n**Example 4**\r\n\r\nThis is another example of a case, where there are multiple filename patterns in the input folder.\r\n\r\n```\r\nfolder_with_multiple_groups/test_img_C1-T1\r\nfolder_with_multiple_groups/test_img_C1-T2\r\nfolder_with_multiple_groups/test_img_C2-T1\r\nfolder_with_multiple_groups/test_img_C2-T2\r\nfolder_with_multiple_groups/test_img_T1-Z1\r\nfolder_with_multiple_groups/test_img_T1-Z2\r\nfolder_with_multiple_groups/test_img_T1-Z3\r\nfolder_with_multiple_groups/test_img_T2-Z1\r\nfolder_with_multiple_groups/test_img_T2-Z2\r\nfolder_with_multiple_groups/test_img_T2-Z3\r\n```\r\n\r\nOne can convert this folder with- \\\r\n`batchconvert --omezarr --merge_files \"input_dir/folder_with_multiple_groups\" \"output_path\"`\r\n \r\nBatchConvert will detect the two patterns in this folder and perform two grouped conversions. \r\nThe output folders will be named as `test_img_CRange{1-2-1}-TRange{1-2-1}.ome.zarr` and \r\n`test_img_TRange{1-2-1}-ZRange{1-3-1}.ome.zarr`. \r\n\r\n\r\n**Example 5**\r\n\r\nNow imagine that we have the same files as in the example 4 but the filenames of the\r\nfirst group lack any dimension specifier, so we have the following folder:\r\n\r\n```\r\nfolder_with_multiple_groups/test_img_1-1\r\nfolder_with_multiple_groups/test_img_1-2\r\nfolder_with_multiple_groups/test_img_2-1\r\nfolder_with_multiple_groups/test_img_2-2\r\nfolder_with_multiple_groups/test_img_T1-Z1\r\nfolder_with_multiple_groups/test_img_T1-Z2\r\nfolder_with_multiple_groups/test_img_T1-Z3\r\nfolder_with_multiple_groups/test_img_T2-Z1\r\nfolder_with_multiple_groups/test_img_T2-Z2\r\nfolder_with_multiple_groups/test_img_T2-Z3\r\n```\r\n\r\nIn such a scenario, BatchConvert allows the user to specify the concatenation axes \r\nvia `--concatenation_order` option. This option expects comma-separated strings of dimensions \r\nfor each group. In this example, the user must provide a string of 2 characters, such as `ct` for \r\nchannel and time, for group 1, since there are two variable fields for this group. Since group 2 \r\nalready has dimension specifiers (T and Z as specified in the filenames preceding the variable fields),\r\nthe user does not need to specify anything for this group, and can enter `auto` or `aa` for automatic\r\ndetection of the specifiers. \r\n\r\nSo the following line can be used to convert this folder: \\\r\n`batchconvert --omezarr --merge_files --concatenation_order ct,aa \"input_dir/folder_with_multiple_groups\" \"output_path\"`\r\n\r\nThe resulting OME-Zarrs will have the names:\r\n`test_img_CRange{1-2-1}-TRange{1-2-1}.ome.zarr` and\r\n`test_img_TRange{1-2-1}-ZRange{1-3-1}.ome.zarr`\r\n\r\nNote that `--concatenation_order` will override any dimension specifiers already\r\nexisting in the filenames.\r\n\r\n\r\n**Example 6**\r\n\r\nThere can be scenarios where the user may want to have further control over the axes along \r\nwhich to concatenate the images. For example, the filenames might contain the data acquisition\r\ndate, which can be recognised by BatchConvert as a concatenation axis in the automatic \r\ndetection mode. An example of such a fileset might look like:\r\n\r\n```\r\nfilenames_with_dates/test_data_date03.03.2023_imageZ1-T1\r\nfilenames_with_dates/test_data_date03.03.2023_imageZ1-T2\r\nfilenames_with_dates/test_data_date03.03.2023_imageZ1-T3\r\nfilenames_with_dates/test_data_date03.03.2023_imageZ2-T1\r\nfilenames_with_dates/test_data_date03.03.2023_imageZ2-T2\r\nfilenames_with_dates/test_data_date03.03.2023_imageZ2-T3\r\nfilenames_with_dates/test_data_date04.03.2023_imageZ1-T1\r\nfilenames_with_dates/test_data_date04.03.2023_imageZ1-T2\r\nfilenames_with_dates/test_data_date04.03.2023_imageZ1-T3\r\nfilenames_with_dates/test_data_date04.03.2023_imageZ2-T1\r\nfilenames_with_dates/test_data_date04.03.2023_imageZ2-T2\r\nfilenames_with_dates/test_data_date04.03.2023_imageZ2-T3\r\n```\r\n\r\nOne may try the following command to convert this folder:\r\n\r\n`batchconvert --omezarr --merge_files \"input_dir/filenames_with_dates\" \"output_path\"`\r\n\r\nSince the concatenation axes are not specified, this command would try to create\r\na single OME-Zarr with name: `test_data_dateRange{03-04-1}.03.2023_imageZRange{1-2-1}-TRange{1-3-1}`.\r\n\r\nIn order to force BatchConvert to ignore the date field, the user can restrict the concatenation \r\naxes to the last two numeric fields. This can be done by using a command such as: \\\r\n`batchconvert --omezarr --merge_files --concatenation_order aa \"input_dir/filenames_with_dates\" \"output_path\"` \\\r\nThis command will avoid concatenation along the date field, and therefore, there will be two\r\nOME-Zarrs corresponding to the two dates. The number of characters being passed to the \r\n`--concatenation_order` option specifies the number of numeric fields (starting from the right \r\nend of the filename) that are recognised by the BatchConvert as valid concatenation axes. \r\nPassing `aa`, therefore, means that the last two numeric fields must be recognised as \r\nconcatenation axes and the dimension type should be automatically detected (`a` for automatic). \r\nIn the same logic, one could, for example, convert each Z section into a separate OME-Zarr by \r\nspecifying `--concatenation_order a`.\r\n\r\n\r\n\r\n### Conversion on slurm\r\n\r\nAll the examples given above can also be run on slurm by specifying `-pf cluster` option. \r\nNote that this option automatically uses the singularity profile:\\\r\n`batchconvert omezarr -pf cluster -p .oir \"input_path\" \"output_path\"`\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n","organization":"Euro-BioImaging, NGFF Tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/453?version=1","name":"main @ 4ad22ae","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/453?version=2","name":"main @ 03e32fe","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/453?version=3","name":"main @ a2891c0","author":[],"descriptor_type":["NFL"]}]},{"id":"454","url":"https://workflowhub.eu/workflows/454","name":"MetaGT: A pipeline for de novo assembly of metatranscriptomes with the aid of metagenomic data","description":"**Assembly and quantification metatranscriptome using metagenome data**.\r\n\r\nVersion: see VERSION\r\n\r\n## Introduction\r\n\r\n**MetaGT** is a bioinformatics analysis pipeline used for improving and quantification \r\nmetatranscriptome assembly using metagenome data. The pipeline supports Illumina sequencing \r\ndata and complete metagenome and metatranscriptome assemblies. The pipeline involves the \r\nalignment of metatranscriprome assembly to the metagenome assembly with further extracting CDSs,\r\nwhich are covered by transcripts.\r\n\r\nThe pipeline is built using Nextflow, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. The Nextflow DSL2 implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies.\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.0-brightgreen.svg)](https://www.nextflow.io/)\r\n\r\n[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/)\r\n\r\n## Quick Start\r\n\r\n1. Install [`nextflow`](https://nf-co.re/usage/installation)\r\n\r\n2. Install any of [`Conda`](https://conda.io/miniconda.html) for full pipeline reproducibility \r\n\r\n3. Download the pipeline, e.g. by cloning metaGT GitHub repository:\r\n\r\n    ```bash\r\n    git clone git@github.com:ablab/metaGT.git\r\n    ```\r\n   \r\n4. Test it on a minimal dataset by running:\r\n\r\n    ```bash\r\n    nextflow run metaGT -profile test,conda\r\n    ```\r\n   \r\n5. Start running your own analysis!\r\n    \u003e Typical command for analysis using reads:\r\n\r\n    ```bash\r\n    nextflow run metaGT -profile \u003cconda\u003e --dna_reads '*_R{1,2}.fastq.gz' --rna_reads '*_R{1,2}.fastq.gz'\r\n    ```\r\n    \u003e Typical command for analysis using multiple files with reads:\r\n\r\n    ```bash\r\n    nextflow run metaGT -profile \u003cconda\u003e --dna_reads '*.yaml' --rna_reads '*.yaml' --yaml\r\n    ```\r\n    \u003e Typical command for analysis using assemblies:\r\n\r\n    ```bash\r\n    nextflow run metaGT -profile \u003cconda\u003e --genome '*.fasta' --transcriptome '*.fasta'\r\n    ```\r\n## Pipeline Summary\r\nOptionally, if raw reades are used:\r\n\r\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of default steps of pipeline --\u003e\r\n\r\n* Sequencing quality control (`FastQC`)\r\n* Assembly metagenome or metatranscriptome (`metaSPAdes, rnaSPAdes `)\r\n\r\nBy default, the pipeline currently performs the following:\r\n\r\n* Annotation metagenome (`Prokka`)\r\n* Aligning metatranscriptome on metagenome (`minimap2`)\r\n* Annotation unaligned transcripts (`TransDecoder`)\r\n* Clustering covered CDS and CDS from unaligned transcripts (`MMseqs2`)\r\n* Quantifying abundances of transcripts (`kallisto`)\r\n\r\n## Citation\r\n\r\nMetaGT was developed by Daria Shafranskaya and Andrey Prjibelski.\r\nIf you use it in your research please cite:\r\n\r\n[MetaGT: A pipeline for de novo assembly of metatranscriptomes with the aid of metagenomic data](https://doi.org/10.3389/fmicb.2022.981458)\r\n\r\n## Feedback and bug report\r\n\r\nIf you have any questions, please leave an issue at out [GitHub page](https://github.com/ablab/metaGT/issues).\r\n","organization":"HoloFood at MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/454?version=1","name":"main @ 70395bb","author":["Andrey Prjibelski","Varsha Kale","Anton Korobeynikov"],"descriptor_type":["NFL"]}]},{"id":"455","url":"https://workflowhub.eu/workflows/455","name":"WRF / EMEP Linear Workflow","description":"# WRF/EMEP Linear Workflow\r\n\r\nExample Common Workflow Language (CWL) workflow and tool descriptors for running the \r\nWeather Research and Forecase (WRF) and EMEP models.\r\n\r\nThis workflow is designed for a single model domain. Example datasets for testing this \r\nworkflow can be downloaded from Zenodo.\r\n\r\n\r\n## Requirements:\r\n\r\n* docker or singularity\r\n* conda\r\n* cwltool\r\n* Toil - optional, useful for running on HPC or distributed computing systems\r\n\r\n### CWL / Toil Installation:\r\n\r\nThe workflow runner (either cwltool, or Toil) can be installed using either conda or pip.\r\nEnvironment files for conda are included, and can be used as shown below:\r\n* cwltool only:\r\n  * `conda env create --file install/env_cwlrunner.yml --name cwl`\r\n* Toil \u0026 cwltool:\r\n  * `conda env create --file install/env_toil.yml --name toil`\r\n\r\n### Setup for Example Workflow\r\n\r\n* Download the example dataset from Zenodo: https://doi.org/10.5281/zenodo.7817216\r\n* Extract into the `input_files` directory:\r\n  * `tar -zxvf wrf_emep_UK_example_inputs.tar.gz -C input_files --strip-components=1`\r\n\r\n## Running the Workflow\r\n\r\nThe full workflow is broken into several logical steps:\r\n1. ERA5 download\r\n2. WPS 1st step: Geogrid geography file creation\r\n3. WPS process: ungribbing of ERA5 data, and running of metgrid to produce meteorology files.\r\n4. WRF process: generation of WRF input files by REAL, and running of WRF model\r\n5. EMEP model: running of EMEP chemistry and transport model\r\n\r\nSteps 1 and 3 require you to register with the CDS service, in order to download ERA5 data\r\nbefore using in the WPS process.\r\nSteps 2 and 5 require you to download extra input data - the instructions on how to do this\r\nare included in the README.txt files in the relevant input data directories.\r\n\r\nA full workflow for all steps is provided here. But each separate step can by run on it's \r\nown too, following the instructions given below. We recommend running step 4 first, to \r\nexplore how the REAL \u0026 WRF workflow works, before trying the other steps.\r\n\r\n### 1. ERA5 download.\r\n\r\nBefore running the ERA5 download tool, ensure that you have reqistered for the CDS service, \r\nsigned the ERA5 licensing agreement, and saved the CDS API key (`.cdsapirc`) in your \r\nworking directory.\r\n\r\nTo run the ERA5 download tool use the following command:\r\n```\r\ncwltool [--cachdir CACHE] [--singularity] workflows/era5_workflow.cwl example_workflow_configurations/era5_download_settings.yaml\r\n```\r\nNote that the `--cachedir CACHE` option sets the working directory cache, which enables the\r\nreuse of any steps previously run (and the restarting of the workflow from this point).\r\nThe `--singularity` option is needed if you are using singularity instead of docker.\r\n\r\n### 2. WPS: Geogrid geography file creation\r\n\r\nBefore running the geogrid tool you will need to download the geography data from the\r\n[UCAR website](https://www2.mmm.ucar.edu/wrf/users/download/get_sources_wps_geog.html).\r\nThese should be extracted into the `input_files/geogrid_geog_input` directory.\r\n\r\nTo run the geogrid program use the following command:\r\n```\r\ncwltool [--cachdir CACHE] [--singularity] workflows/geogrid_workflow.cwl example_workflow_configurations/wps_geogrid_cwl_settings.yaml\r\n```\r\n\r\n### 3. WPS: Creation of meteorology input files\r\n\r\nBefore running the WPS process you will have to download the ERA5 datafiles (which will be\r\ncalled `preslev_[YYYYMMDD].grib` and `surface_[YYYYMMDD].grib`) and copy these to the directory\r\n`input_files/wps_era5_input`. If you have also run geogrid in step 2 you can replace the \r\n`geo_em.d01.nc` file in the `input_files/wps_geogrid_input` directory with the file that \r\ngeogrid created.\r\n\r\nTo run the wps metgrid process use the following command:\r\n```\r\ncwltool [--cachdir CACHE] [--singularity] workflows/wps_workflow.cwl example_workflow_configurations/wps_metgrid_cwl_settings.yaml\r\n```\r\n\r\n### 4. WRF: Creation of WRF input files, and running WRF model\r\n\r\nThe WRF model can be run without any prepreparation, except for the downloading of the \r\ninput data from Zenodo. However, if you have created new meteorology files (`met_em*`) using\r\nWPS you can replace the files in the `input_files/wrf_met_input` directory with these.\r\n\r\nTo run the WRF process (including REAL) use the following command:\r\n```\r\ncwltool [--cachdir CACHE] [--singularity] workflows/wrf_workflow.cwl example_workflow_configurations/wrf_real_cwl_settings.yaml\r\n``` \r\n\r\n### 5. EMEP: Running EMEP chemistry and transport model\r\n\r\nBefore running the EMEP model you will need to download the EMEP input dataset. This can be\r\ndone using the `catalog.py` tool, following the instructions in the `input_files/emep_input/README.txt`\r\nfile. If you have run WRF you can also replace the `wrfout*` data files in the \r\n`input_Files/emep_wrf_input` directory with those you have created.\r\n\r\nTo run the EMEP model use the following command:\r\n```\r\ncwltool [--cachdir CACHE] [--singularity] workflows/emep_workflow.cwl example_workflow_configurations/emep_cwl_settings.yaml\r\n```\r\n\r\n### Full Workflow\r\n\r\nBefore running the full workflow make sure you have carried out the setup tasks described\r\nabove.\r\n\r\nTo run the full workflow use the following command:\r\n```\r\ncwltool [--cachdir CACHE] [--singularity] wrf_emep_full_workflow.cwl example_workflow_configurations/wrf_emep_full_workflow_cwl_settings.yaml\r\n```\r\n\r\n## Notes\r\n\r\n### WRF filenames\r\n\r\nIn order to work with singularity, all filenames need to exclude special characters.\r\nTo ensure that all WRF filenames comply with this requirement, you will need to add the \r\n`nocolons = .true.` option to your WPS, REAL and WRF namelists to ensure this.\r\n\r\n### MPI parallel processing\r\n\r\nThe WPS processes all run in single thread mode. REAL, WRF and EMEP have been compiled with\r\nMPI support. The default cores for each of these is 2, 9 and 9, respectively. The \r\nsettings file can be edited to modify these requirements.\r\n\r\n### Caching intermediate workflow steps\r\n\r\nTo cache the data from individual steps you can use the `--cachedir \u003ccache-dir\u003e` optional flag.\r\n\r\n\r\n## License and Copyright \r\n\r\nThese workflow scripts have been developed by the [Research IT](https://research-it.manchester.ac.uk/) \r\nat the [University of Manchester](https://www.manchester.ac.uk/).\r\n\r\nCopyright 2023 [University of Manchester, UK](https://www.manchester.ac.uk/).\r\n\r\nLicensed under the MIT license, see the LICENSE file for details.","organization":"Air Quality Prediction","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/455?version=1","name":"main @ 70c6a60","author":["Douglas Lowe"],"descriptor_type":["CWL"]}]},{"id":"456","url":"https://workflowhub.eu/workflows/456","name":"Formula","description":"cccccc","organization":"Workflows for geographic science","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/456?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"457","url":"https://workflowhub.eu/workflows/457","name":"PyUtils","description":"","organization":"Workflows for geographic science","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/457?version=1","name":"main @ 88fb6a4","author":[],"descriptor_type":[]}]},{"id":"460","url":"https://workflowhub.eu/workflows/460","name":"Gene similariy anaylsis across physiological systems in IMPC phenotype data","description":"# Gene similariy anaylsis across physiological systems in IMPC phenotype data\r\n\r\nA Jupyter Notebook tool for analysing user specified genes across the different physiological systems in IMPC data.\r\n\r\n**_Input_**\r\n\r\nThe tool takes as input a list of gene ids (MGI ids or Gene Symbol ids). The elemnts in the list could be separated by a comma, semicolumn, tab or newline.\r\n\r\n**_Operation_**\r\n\r\nThe program will create an heatmap representing the number of phenotypes and the mp term list for each gene contained in an [IMPC physiological system](https://www.mousephenotype.org/help/data-visualization/gene-pages/phenogrid/).\r\nUsing the slider, adjust the treshold to set the minimum count to be displayed in the heatmap. \r\n\r\nNB: Genes without phenotypes in any physiological system will not be displayed. Also, the labels of the heatmap will use Gene Symbols independently from the type of id used in the input.\r\n\r\n**_Tool access:_** [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/AndreaFurlani/Jupyter_interactive_plots/main?urlpath=voila%2Frender%2FInteractive_plots.ipynb)","organization":"INFRAFRONTIER workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/460?version=1","name":"Version 1","author":["Andrea Furlani","Philipp Gormanns"],"descriptor_type":[]}]},{"id":"461","url":"https://workflowhub.eu/workflows/461","name":"Mouse phenotype similarity analyis","description":"# **Phenotype similarity analysis**\r\n\r\nA Jupyter Notebook for analyzing phenotyping similarities across user specified genes. Phenotypes are retrieved from the MGI resource\r\n\r\n**_Input_**\r\n\r\nThe tool takes as input a list of gene ids (MGI ids or Gene Symbol ids). The elemnts in the list could be separated by a comma, semicolumn, tab or newline.\r\n\r\n**_Operation_**\r\n\r\nThe Notebook will create a table where row and columns names are the Gene Symbols of the input elements and each cell will contain the name of the common phenotypes shared by those genes. Then an interactive heatmap will be displayed, showing also the count of those phenotypes. Using the slider, adjust the treshold to set the minimum count to be displayed in the heatmap.\r\n\r\nNB: Genes with only counts below the treshold will be not displayed in the heatmap. Also, the labels of the heatmap will use Gene Symbols independently from the type of id used in the input.\r\n\r\n**_Tool access:_** [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/AndreaFurlani/Jupyter_alliance/main?urlpath=voila%2Frender%2FAlliance_API_query.ipynb)\r\n","organization":"INFRAFRONTIER workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/461?version=1","name":"Version 1","author":["Andrea Furlani","Philipp Gormanns"],"descriptor_type":[]}]},{"id":"462","url":"https://workflowhub.eu/workflows/462","name":"MGnify genomes catalogue pipeline","description":"# MGnify genomes catalogue pipeline\r\n\r\n[MGnify](https://www.ebi.ac.uk/metagenomics/) A pipeline to perform taxonomic and functional annotation and to generate a catalogue from a set of isolate and/or metagenome-assembled genomes (MAGs) using the workflow described in the following publication:\r\n\r\nGurbich TA, Almeida A, Beracochea M, Burdett T, Burgin J, Cochrane G, Raj S, Richardson L, Rogers AB, Sakharova E, Salazar GA and Finn RD. (2023) [MGnify Genomes: A Resource for Biome-specific Microbial Genome Catalogues.](https://www.sciencedirect.com/science/article/pii/S0022283623000724) \u003ci\u003eJ Mol Biol\u003c/i\u003e. doi: https://doi.org/10.1016/j.jmb.2023.168016\r\n\r\nDetailed information about existing MGnify catalogues: https://docs.mgnify.org/src/docs/genome-viewer.html\r\n\r\n### Tools used in the pipeline\r\n| Tool/Database                                                                                    | Version           | Purpose                                                                                                                |\r\n|--------------------------------------------------------------------------------------------------|-------------------|------------------------------------------------------------------------------------------------------------------------|\r\n| CheckM2                                                                                          | 1.0.1             | Determining genome quality                                                                                             |\r\n| dRep                                                                                             | 3.2.2             | Genome clustering                                                                                                      |\r\n| Mash                                                                                             | 2.3               | Sketch for the catalogue; placement of genomes into clusters (update only); strain tree                                |\r\n| GUNC                                                                                             | 1.0.3             | Quality control                                                                                                        |\r\n| GUNC DB                                                                                          | 2.0.4             | Database for GUNC                                                                                                      |\r\n| GTDB-Tk                                                                                          | 2.4.0             | Assigning taxonomy; generating alignments                                                                              |\r\n| GTDB                                                                                             | r220              | Database for GTDB-Tk                                                                                                   |\r\n| Prokka                                                                                           | 1.14.6            | Protein annotation                                                                                                     |\r\n| IQ-TREE 2                                                                                        | 2.2.0.3           | Generating a phylogenetic tree                                                                                         |\r\n| Kraken 2                                                                                         | 2.1.2             | Generating a kraken database                                                                                           |\r\n| Bracken                                                                                          | 2.6.2             | Generating a bracken database                                                                                          |\r\n| MMseqs2                                                                                          | 13.45111          | Generating a protein catalogue                                                                                         |\r\n| eggNOG-mapper                                                                                    | 2.1.11            | Protein annotation (eggNOG, KEGG, COG,  CAZy)                                                                          |\r\n| eggNOG DB                                                                                        | 5.0.2             | Database for eggNOG-mapper                                                                                             |\r\n| Diamond                                                                                          | 2.0.11            | Protein annotation (eggNOG)                                                                                            |\r\n| InterProScan                                                                                     | 5.62-94.0         | Protein annotation (InterPro, Pfam)                                                                                    |\r\n| kegg-pathways-completeness tool                                                                  | 1.0.5             | Computes KEGG pathway completeness                                                                                     |\r\n| CRISPRCasFinder                                                                                  | 4.3.2             | Annotation of CRISPR arrays                                                                                            |\r\n| AMRFinderPlus                                                                                    | 3.11.4            | Antimicrobial resistance gene annotation; virulence factors, biocide, heat, acid, and metal resistance gene annotation |\r\n| AMRFinderPlus DB                                                                                 | 3.11 2023-02-23.1 | Database for AMRFinderPlus                                                                                             |\r\n| antiSMASH                                                                                        | 7.1.0             | Biosynthetic gene cluster annotation                                                                                   |\r\n| GECCO                                                                                            | 0.9.8             | Biosynthetic gene cluster annotation                                                                                   |\r\n| SanntiS                                                                                          | 0.9.3.2           | Biosynthetic gene cluster annotation                                                                                   |\r\n| DefenseFinder                                                                                    | 1.2.0             | Annotation of anti-phage systems                                                                                       |\r\n| DefenseFinder models                                                                             | 1.2.3             | Database for DefenseFinder                                                                                             |\r\n| run_dbCAN                                                                                        | 4.1.2             | Polysaccharide utilization loci prediction                                                                             |\r\n| dbCAN DB                                                                                         | V12               | Database for run_dbCAN                                                                                                 |\r\n| Infernal                                                                                         | 1.1.4             | RNA predictions                                                                                                        |\r\n| tRNAscan-SE                                                                                      | 2.0.9             | tRNA predictions                                                                                                       |\r\n| Rfam                                                                                             | 14.9              | Identification of SSU/LSU rRNA and other ncRNAs                                                                        |\r\n| Panaroo                                                                                          | 1.3.2             | Pan-genome computation                                                                                                 |\r\n| Seqtk                                                                                            | 1.3               | Generating a gene catalogue                                                                                            |\r\n| VIRify                                                                                           | 2.0.1             | Viral sequence annotation                                                                                              |\r\n| [Mobilome annotation pipeline](https://github.com/EBI-Metagenomics/mobilome-annotation-pipeline) | 2.0.2             | Mobilome annotation                                                                                                    |\r\n| samtools                                                                                         | 1.15              | FASTA indexing                                                                                                         |\r\n\r\n## Setup\r\n\r\n### Environment\r\n\r\nThe pipeline is implemented in [Nextflow](https://www.nextflow.io/).\r\n\r\nRequirements:\r\n- [singulairty](https://sylabs.io/docs/) or [docker](https://www.docker.com/)\r\n\r\n#### Reference databases\r\n\r\nThe pipeline needs the following reference databases and configuration files (roughtly ~150G):\r\n\r\n- ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/genomes-pipeline/gunc_db_2.0.4.dmnd.gz\r\n- ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/genomes-pipeline/eggnog_db_5.0.2.tgz\r\n- ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/genomes-pipeline/rfam_14.9/\r\n- ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/genomes-pipeline/kegg_classes.tsv\r\n- ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/genomes-pipeline/continent_countries.csv\r\n- https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.0/auxillary_files/gtdbtk_r214_data.tar.gz\r\n- ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-02-23.1\r\n- https://zenodo.org/records/4626519/files/uniref100.KO.v1.dmnd.gz\r\n\r\n### Containers\r\n\r\nThis pipeline requires [singularity](https://sylabs.io/docs/) or [docker](https://www.docker.com/) as the container engine to run pipeline.\r\n\r\nThe containers are hosted in [biocontainers](https://biocontainers.pro/) and [quay.io/microbiome-informatics](https://quay.io/organization/microbiome-informatics) repository.\r\n\r\nIt's possible to build the containers from scratch using the following script:\r\n\r\n```bash\r\ncd containers \u0026\u0026 bash build.sh\r\n```\r\n\r\n## Running the pipeline\r\n\r\n## Data preparation\r\n\r\n1. You need to pre-download your data to directories and make sure that genomes are uncompressed. Scripts to fetch genomes from ENA ([fetch_ena.py](https://github.com/EBI-Metagenomics/genomes-pipeline/blob/master/bin/fetch_ena.py)) and NCBI ([fetch_ncbi.py](https://github.com/EBI-Metagenomics/genomes-pipeline/blob/master/bin/fetch_ncbi.py)) are provided and need to be executed separately from the pipeline. If you have downloaded genomes from both ENA and NCBI, put them into separate folders.\r\n\r\n2. When genomes are fetched from ENA using the `fetch_ena.py` script, a CSV file with contamination and completeness statistics is also created in the same directory where genomes are saved to. If you are downloading genomes using a different approach, a CSV file needs to be created manually (each line should be genome accession, % completeness, % contamination). The ENA fetching script also pre-filters genomes to satisfy the QS50 cut-off (QS = % completeness - 5 * % contamination).\r\n\r\n3. You will need the following information to run the pipeline:\r\n - catalogue name (for example, zebrafish-faecal)\r\n - catalogue version (for example, 1.0)\r\n - catalogue biome (for example, root:Host-associated:Human:Digestive system:Large intestine:Fecal)\r\n - min and max accession number to be assigned to the genomes (only MGnify specific). Max - Min = #total number of genomes (NCBI+ENA)\r\n\r\n### Execution\r\n\r\nThe pipeline is built in [Nextflow](https://www.nextflow.io), and utilized containers to run the software (we don't support conda ATM).\r\nIn order to run the pipeline it's required that the user creates a profile that suits their needs, there is an `ebi` profile in `nexflow.config` that can be used as template.\r\n\r\nAfter downloading the databases and adjusting the config file:\r\n\r\n```bash\r\nnextflow run EBI-Metagenomics/genomes-pipeline -c \u003ccustom.config\u003e -profile \u003cprofile\u003e \\\r\n--genome-prefix=MGYG \\\r\n--biome=\"root:Host-associated:Fish:Digestive system\" \\\r\n--ena_genomes=\u003cpath to genomes\u003e \\\r\n--ena_genomes_checkm=\u003cpath to genomes quality data\u003e \\\r\n--mgyg_start=0 \\\r\n--mgyg_end=10 \\\r\n--preassigned_accessions=\u003cpath to file with preassigned accessions if using\u003e\r\n--catalogue_name=zebrafish-faecal \\\r\n--catalogue_version=\"1.0\" \\\r\n--ftp_name=\"zebrafish-faecal\" \\\r\n--ftp_version=\"v1.0\" \\\r\n--outdir=\"\u003cpath-to-results\u003e\"\r\n```\r\n\r\n### Development\r\n\r\nInstall development tools (including pre-commit hooks to run Black code formatting).\r\n\r\n```bash\r\npip install -r requirements-dev.txt\r\npre-commit install\r\n```\r\n\r\n#### Code style\r\n\r\nUse Black, this tool is configured if you install the pre-commit tools as above.\r\n\r\nTo manually run them: black .\r\n\r\n### Testing\r\n\r\nThis repo has 2 set of tests, python unit tests for some of the most critical python scripts and [nf-test](https://github.com/askimed/nf-test) scripts for the nextflow code.\r\n\r\nTo run the python tests\r\n\r\n```bash\r\npip install -r requirements-test.txt\r\npytest\r\n```\r\n\r\nTo run the nextflow ones the databases have to downloaded manually, we are working to improve this.\r\n\r\n```bash\r\nnf-test test tests/*\r\n```\r\n","organization":"MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/462?version=1","name":"v2.0.0","author":["Ekaterina Sakharova","Tatiana Gurbich","Martin Beracochea"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/462?version=2","name":"v2.3.0","author":["Ekaterina Sakharova","Tatiana Gurbich","Martin Beracochea"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/462?version=3","name":"v2.4.0","author":["Ekaterina Sakharova","Tatiana Gurbich","Martin Beracochea"],"descriptor_type":["NFL"]}]},{"id":"463","url":"https://workflowhub.eu/workflows/463","name":"Katdetectr","description":"# Introduction\r\n\r\n`katdetectr` is an *R* package for the detection, characterization and visualization of localized hypermutated regions, often referred to as *kataegis*.\r\n\r\nPlease see the [Application Note](https://www.biorxiv.org/content/10.1101/2022.07.11.499364v1) (under submission) for additional background, details and performance evaluations of `katdetectr`.\r\n\r\nThe general workflow of `katdetectr` can be summarized as follows:\r\n\r\n1. Import of genomic variants; VCF, MAF or VRanges objects.\r\n2. Detection of kataegis foci.\r\n3. Visualization of segmentation and kataegis foci.\r\n\r\nPlease see the [vignette](https://bioconductor.org/packages/release/bioc/vignettes/katdetectr/inst/doc/General_overview.html) for an overview of the workflow in a step-by-step manner on publicly-available datasets which are included within this package.\r\n\r\n\r\n## Installation\r\n\r\nDownload katdetectr from BioConductor:\r\n```R\r\nif (!requireNamespace(\"BiocManager\", quietly = TRUE))\r\n    install.packages(\"BiocManager\")\r\n\r\nBiocManager::install(\"katdetectr\")\r\n\r\n```\r\n","organization":"Katdetectr","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/463?version=1","name":"devel @ 5a6e5d0","author":[],"descriptor_type":[]}]},{"id":"465","url":"https://workflowhub.eu/workflows/465","name":"scRNAseq Single Sample Processing STARSolo","description":"From the R1 and R2 fastq files of a single samples, make a scRNAseq counts matrix, and perform basic QC with scanpy. Then, do further processing by making a UMAP and clustering. Produces a processed AnnData \r\n\r\nDepreciated: use individual workflows insead for multiple samples","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/465?version=1","name":"master @ 76f52ed","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/465?version=2","name":"main @ a95a4ee","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/465?version=3","name":"main @ 03682c0","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/465?version=4","name":"main @ fe052c0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"467","url":"https://workflowhub.eu/workflows/467","name":"scRNAseq_CellQC","description":"Take an anndata file, and perform basic QC with scanpy. Produces a filtered AnnData object.","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/467?version=1","name":"master @ 76f52ed","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/467?version=2","name":"main @ a95a4ee","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/467?version=3","name":"main @ a95a4ee","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/467?version=4","name":"main @ 7581788","author":[],"descriptor_type":["GALAXY"]}]},{"id":"468","url":"https://workflowhub.eu/workflows/468","name":"scRNAseq_QCtoBasicProcessing","description":"Basic processing of a QC-filtered Anndata Object. UMAP, clustering e.t.c ","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/468?version=1","name":"master @ 76f52ed","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/468?version=2","name":"main @ a95a4ee","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/468?version=3","name":"main @ 7581788","author":[],"descriptor_type":["GALAXY"]}]},{"id":"469","url":"https://workflowhub.eu/workflows/469","name":"PerMedCoE Covid19 Pilot workflow (Rbbt)","description":"Rbbt implementation of the Covid-19 pilot workflow from the Personalized Medicine Center of Excellence.\r\n\r\nThis workflow processes single cell data to personalize boolean models that are then used in a multi-scale cellular simulation using PhysiBoSS.","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/469?version=1","name":"main @ 252aa39","author":[],"descriptor_type":[]}]},{"id":"470","url":"https://workflowhub.eu/workflows/470","name":"Workflow 4: Staramr","description":"Correlation between Phenotypic and In Silico Detection of Antimicrobial Resistance in Salmonella enterica in Canada Using Staramr. \r\n\r\nDoi: [10.3390/microorganisms10020292](https://doi.org/10.3390/microorganisms10020292)\r\n\r\n| tool | version | license |\r\n| -- | -- | -- |\r\n| staramr | 0.8.0 | [Apache-2.0 license](https://github.com/phac-nml/staramr/blob/development/LICENSE) |\r\n","organization":"Seq4AMR","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/470?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"471","url":"https://workflowhub.eu/workflows/471","name":"rquest-omop-worker-workflow","description":"# rquest-omop-worker-workflows\r\n\r\nSource for workflow definitions for the open source RQuest OMOP Worker tool developed for Hutch/TRE-FX\r\n\r\nNote: ARM workflows are currently broken. x86 ones work.\r\n\r\n## Inputs\r\n\r\n### Body\r\nSample input payload:\r\n\r\n```json\r\n{\r\n  \"task_id\": \"job-2023-01-13-14: 20: 38-\u003cproject\u003e\",\r\n  \"project\": \"\u003cproject\u003e\",\r\n  \"owner\": \"\u003cowner\u003e\",\r\n  \"cohort\": {\r\n    \"groups\": [\r\n      {\r\n        \"rules\": [\r\n          {\r\n            \"varname\": \"OMOP\",\r\n            \"varcat\": \"Person\",\r\n            \"type\": \"TEXT\",\r\n            \"oper\": \"=\",\r\n            \"value\": \"8507\"\r\n          }\r\n        ],\r\n        \"rules_oper\": \"AND\"\r\n      }\r\n    ],\r\n    \"groups_oper\": \"OR\"\r\n  },\r\n  \"collection\": \"\u003ccollection\u003e\",\r\n  \"protocol_version\": \"\u003cversion\u003e\",\r\n  \"char_salt\": \"\u003cchar_salt\u003e\",\r\n  \"uuid\": \"\u003cuuid\u003e\"\r\n}\r\n```\r\n\r\n### Database access\r\n\r\nCurrently this workflow requires inputs for connecting to the database it will run queries against.\r\n\r\nIn future this may be moved to environment variables.","organization":"TRE-FX","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/471?version=1","name":"main @ b916ecf","author":["Vasiliki Panagi"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/471?version=2","name":"main @ 8a4c8c1","author":["Vasiliki Panagi"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/471?version=3","name":"main @ 724775a","author":["Vasiliki Panagi"],"descriptor_type":["CWL"]}]},{"id":"472","url":"https://workflowhub.eu/workflows/472","name":"Genome-wide alternative splicing analysis","description":"This workflow correspond to the Genome-wide alternative splicing analysis training. It allows to analyze isoform switching by making use of IsoformSwitchAnalyzeR.","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/472?version=1","name":"Version 1","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/472?version=2","name":"Version 2","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/472?version=3","name":"Version 3","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/472?version=4","name":"Version 4","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]}]},{"id":"473","url":"https://workflowhub.eu/workflows/473","name":"Sorting and registration of Terahertz ELBE raw data","description":"The radiation source ELBE (Electron Linac for beams with high Brilliance and low Emittance) at the Helmholtz Centre Dresden Rossendorf (HZDR) can produce several kinds of secondary radiations. THz radiation is one of them and can be used with a typical pulse frequency of 100 kHz as a stimulation source for elementary low-energy degrees of freedom in matter. To sample the whole THz wave the laser path length is modified by moving specific mirrors. The raw data contains for each mirror position a binary file storing the signal spectra and a folder with gray scaled tiff files storing the jitter timing. This Workflow is equivalent to the first part of the standalone jupyter notebook https://github.com/hzdr/TELBE-raw-data-evaluation/blob/main/sorting_binning.ipynb\r\n\r\nIn the job file the folder \u003c FOLDER_BASE\u003e and \u003c FOLDER_SUB\u003e needs to be specified and the parameters as a json string like \u003c PARAMS\u003e = { \"rep\": 100000, \"t_exp\": 1, \"N_sample\": 96, \"offset\": 0, \"pixel_to_ps\": 0.0115, \"Stage_zero\": 0 }\r\n\r\nThe python file which is used is originally published in gitlab https://codebase.helmholtz.cloud/science2workflow/telbe-sorting-binning/-/blob/master/src/ The workflow can automatically be monitored in Heliport if the project number \u003c HELIPORT_PROJECT\u003e is provided.\r\n","organization":"Helmholtz Scientific Project Workflow Platform","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/473?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"474","url":"https://workflowhub.eu/workflows/474","name":"Metagenome and metatranscriptome assembly in CWL","description":"# CWL-assembly\r\n[![Codacy Badge](https://api.codacy.com/project/badge/Grade/684724bbc0134960ab41748f4a4b732f)](https://www.codacy.com/app/mb1069/CWL-assembly?utm_source=github.com\u0026amp;utm_medium=referral\u0026amp;utm_content=EBI-Metagenomics/CWL-assembly\u0026amp;utm_campaign=Badge_Grade)\r\n[![Build Status](https://travis-ci.org/EBI-Metagenomics/CWL-assembly.svg?branch=develop)](https://travis-ci.org/EBI-Metagenomics/CWL-assembly)\r\n\r\n## Description\r\n\r\nThis repository contains two workflows for metagenome and metatranscriptome assembly of short read data. MetaSPAdes is used as default for paired-end data, and MEGAHIT for single-end data and co-assemblies. MEGAHIT can be specified as the default assembler in the yaml file if preferred. Steps include:\r\n\r\n  * _QC_: removal of short reads, low quality regions, adapters and host decontamination\r\n  * _Assembly_: with metaSPADES or MEGAHIT\r\n  * _Post-assembly_: Host and PhiX decontamination, contig length filter (500bp), stats generation\r\n\r\n## Requirements - How to install\r\n\r\nThis pipeline requires a conda environment with cwltool, blastn, and metaspades. If created with `requirements.yml`, the environment will be called `cwl_assembly`. \r\n\r\n```\r\nconda env create -f requirements.yml\r\nconda activate cwl_assembly\r\npip install cwltool==3.1.20230601100705\r\n```\r\n\r\n## Databases\r\n\r\nYou will need to pre-download fasta files for host decontamination and generate the following databases accordingly:\r\n  * bwa index\r\n  * blast index\r\n    \r\nSpecify the locations in the yaml file when running the pipeline.\r\n\r\n## Main pipeline executables\r\n\r\n  * `src/workflows/metagenome_pipeline.cwl`\r\n  * `src/workflows/metatranscriptome_pipeline.cwl`\r\n\r\n## Example command\r\n\r\n```cwltool --singularity --outdir ${OUTDIR} ${CWL} ${YML}```\r\n\r\n`$CWL` is going to be one of the executables mentioned above\r\n`$YML` should be a config yaml file including entries among what follows. \r\nYou can find a yml template in the `examples` folder.\r\n\r\n## Example output directory structure\r\n```\r\nRoot directory\r\n    ├── megahit\r\n    │   └── 001 -------------------------------- Assembly root directory\r\n    │       ├── assembly_stats.json ------------ Human-readable assembly stats file\r\n    │       ├── coverage.tab ------------------- Coverage file\r\n    │       ├── log ---------------------------- CwlToil+megahit output log\r\n    |       ├── options.json ------------------- Megahit input options\r\n    │       ├── SRR6257420.fasta.gz ------------ Archived and trimmed assembly\r\n    │       └── SRR6257420.fasta.gz.md5 -------- MD5 hash of above archive\r\n    ├── metaspades\r\n    │   └── 001 -------------------------------- Assembly root directory\r\n    │       ├── assembly_graph.fastg ----------- Assembly graph\r\n    │       ├── assembly_stats.json ------------ Human-readable assembly stats file\r\n    │       ├── coverage.tab ------------------- Coverage file\r\n    |       ├── params.txt --------------------- Metaspades input options\r\n    │       ├── spades.log --------------------- Metaspades output log\r\n    │       ├── SRR6257420.fasta.gz ------------ Archived and trimmed assembly\r\n    │       └── SRR6257420.fasta.gz.md5 -------- MD5 hash of above archive\r\n    │ \r\n    └── raw ------------------------------------ Raw data directory\r\n        ├── SRR6257420.fastq.qc_stats.tsv ------ Stats for cleaned fastq\r\n        ├── SRR6257420_fastp_clean_1.fastq.gz -- Cleaned paired-end file_1\r\n        └── SRR6257420_fastp_clean_2.fastq.gz -- Cleaned paired-end file_2\r\n```\r\n","organization":"HoloFood at MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/474?version=1","name":"master @ b269a55","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/474?version=2","name":"master @ 39efebc","author":[],"descriptor_type":["CWL"]}]},{"id":"475","url":"https://workflowhub.eu/workflows/475","name":"EukRecover","description":"# EukRecover\r\nPipeline to recover eukaryotic MAGs using CONCOCT, metaBAT2 and EukCC's merging algorythm.\r\n\r\nNeeds paired end shotgun metagenomic reads.\r\n\r\n## Environment\r\n\r\nEukrecover requires an environment with snakemake and metaWRAP.\r\n\r\n## Quickstart\r\n\r\nDefine your samples in the file `samples.csv`.\r\nThis file needs to have the columns project and run to identify each metagenome. \r\n\r\nThis pipeline does not support co-binning, but feel free to change it. \r\n\r\nClone this repro wherever you want to run the pipeline:\r\n```\r\ngit clone https://github.com/openpaul/eukrecover/\r\n```\r\n\r\n\r\nYou can then run the snakemake like so\r\n\r\n```\r\nsnakemake --use-singularity\r\n```\r\n\r\nThe pipeline used dockerhub to fetch all tools, so make sure you have singularity installed.\r\n\r\n\r\n\r\n## Prepare databases\r\nThe pipeline will setup databases for you, but if you already have a EukCC or a BUSCO 5 database you can use them \r\nby specifying the location in the file `config/config.yaml`\r\n\r\n\r\n## Output:\r\nIn the folder results you will find a folder `MAGs` which will contain a folder\r\n`fa` containing the actual MAG fastas.\r\nIn addition you will find stats for each MAG in the table `QC.csv`.\r\n\r\nThis table contains the following columns:\r\n\r\nname,eukcc_compl,eukcc_cont,BUSCO_C,BUSCO_M,BUSCO_D,BUSCO_F,BUSCO_tax,N50,bp\r\n\r\n\r\n\r\n## Citation:\r\n\r\nIf you use this pipeline please make sure to cite all used software. \r\n\r\nFor this please reffer to the used rules.\r\n","organization":"HoloFood at MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/475?version=1","name":"main @ f46419d","author":[],"descriptor_type":["SMK"]}]},{"id":"476","url":"https://workflowhub.eu/workflows/476","name":"PerMedCoE Covid19 Pilot workflow (PyCOMPSs)","description":"# COVID-19 Multiscale Modelling of the Virus and Patients’ Tissue Workflow\r\n\r\n## Table of Contents\r\n\r\n- [COVID-19 Multiscale Modelling of the Virus and Patients’ Tissue Workflow](#covid-19-multiscale-modelling-of-the-virus-and-patients-tissue-workflow)\r\n  - [Table of Contents](#table-of-contents)\r\n  - [Description](#description)\r\n  - [Contents](#contents)\r\n    - [Building Blocks](#building-blocks)\r\n    - [Workflows](#workflows)\r\n    - [Resources](#resources)\r\n    - [Tests](#tests)\r\n  - [Instructions](#instructions)\r\n    - [Local machine](#local-machine)\r\n      - [Requirements](#requirements)\r\n      - [Usage steps](#usage-steps)\r\n    - [MareNostrum 4](#marenostrum-4)\r\n      - [Requirements in MN4](#requirements-in-mn4)\r\n      - [Usage steps in MN4](#usage-steps-in-mn4)\r\n    - [Mahti or Puhti](#mahti-or-puhti)\r\n      - [Requirements](#requirements)\r\n      - [Steps](#steps)\r\n  - [License](#license)\r\n  - [Contact](#contact)\r\n\r\n## Description\r\n\r\nUses multiscale simulations to predict patient-specific SARS‑CoV‑2 severity subtypes\r\n(moderate, severe or control), using single-cell RNA-Seq data, MaBoSS and PhysiBoSS.\r\nBoolean models are used to determine the behaviour of individual agents as a function\r\nof extracellular conditions and the concentration of different  substrates, including\r\nthe number of virions. Predictions of severity subtypes are based on a meta-analysis of\r\npersonalised model outputs simulating cellular apoptosis regulation in epithelial cells\r\ninfected by SARS‑CoV‑2.\r\n\r\nThe workflow uses the following building blocks, described in order of execution:\r\n\r\n1. High-throughput mutant analysis\r\n2. Single-cell processing\r\n3. Personalise patient\r\n4. PhysiBoSS\r\n5. Analysis of all simulations\r\n\r\nFor details on individual workflow steps, see the user documentation for each building block.\r\n\r\n[`GitHub repository`](\u003chttps://github.com/PerMedCoE/covid-19-workflow\u003e)\r\n\r\n\r\n## Contents\r\n\r\n### Building Blocks\r\n\r\nThe ``BuildingBlocks`` folder contains the script to install the\r\nBuilding Blocks used in the COVID-19 Workflow.\r\n\r\n### Workflows\r\n\r\nThe ``Workflow`` folder contains the workflows implementations.\r\n\r\nCurrently contains the implementation using PyCOMPSs and Snakemake (in progress).\r\n\r\n### Resources\r\n\r\nThe ``Resources`` folder contains dataset files.\r\n\r\n### Tests\r\n\r\nThe ``Tests`` folder contains the scripts that run each Building Block\r\nused in the workflow for the given small dataset.\r\nThey can be executed individually for testing purposes.\r\n\r\n## Instructions\r\n\r\n### Local machine\r\n\r\nThis section explains the requirements and usage for the COVID19 Workflow in a laptop or desktop computer.\r\n\r\n#### Requirements\r\n\r\n- [`permedcoe`](https://github.com/PerMedCoE/permedcoe) package\r\n- [PyCOMPSs](https://pycompss.readthedocs.io/en/stable/Sections/00_Quickstart.html) / [Snakemake](https://snakemake.readthedocs.io/en/stable/)\r\n- [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)\r\n\r\n#### Usage steps\r\n\r\n1. Clone this repository:\r\n\r\n  ```bash\r\n  git clone https://github.com/PerMedCoE/covid-19-workflow.git\r\n  ```\r\n\r\n2. Install the Building Blocks required for the COVID19 Workflow:\r\n\r\n  ```bash\r\n  covid-19-workflow/BuildingBlocks/./install_BBs.sh\r\n  ```\r\n\r\n3. Get the required Building Block images from the project [B2DROP](https://b2drop.bsc.es/index.php/f/444350):\r\n\r\n  - Required images:\r\n      - MaBoSS.singularity\r\n      - meta_analysis.singularity\r\n      - PhysiCell-COVID19.singularity\r\n      - single_cell.singularity\r\n\r\n  The path where these files are stored **MUST be exported in the `PERMEDCOE_IMAGES`** environment variable.\r\n\r\n  \u003e :warning: **TIP**: These containers can be built manually as follows (be patient since some of them may take some time):\r\n  1. Clone the `BuildingBlocks` repository\r\n     ```bash\r\n     git clone https://github.com/PerMedCoE/BuildingBlocks.git\r\n     ```\r\n  2. Build the required Building Block images\r\n     ```bash\r\n     cd BuildingBlocks/Resources/images\r\n     sudo singularity build MaBoSS.sif MaBoSS.singularity\r\n     sudo singularity build meta_analysis.sif meta_analysis.singularity\r\n     sudo singularity build PhysiCell-COVID19.sif PhysiCell-COVID19.singularity\r\n     sudo singularity build single_cell.sif single_cell.singularity\r\n     cd ../../..\r\n     ```\r\n\r\n**If using PyCOMPSs in local PC** (make sure that PyCOMPSs in installed):\r\n\r\n4. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflows/PyCOMPSs\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n\r\n**If using Snakemake in local PC** (make sure that SnakeMake is installed):\r\n\r\n4. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflows/SnakeMake\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n  \u003e **TIP**: If you want to run the workflow with a different dataset, please update the `run.sh` script setting the `dataset` variable to the new dataset folder and their file names.\r\n\r\n\r\n### MareNostrum 4\r\n\r\nThis section explains the requirements and usage for the COVID19 Workflow in the MareNostrum 4 supercomputer.\r\n\r\n#### Requirements in MN4\r\n\r\n- Access to MN4\r\n\r\nAll Building Blocks are already installed in MN4, and the COVID19 Workflow available.\r\n\r\n#### Usage steps in MN4\r\n\r\n1. Load the `COMPSs`, `Singularity` and `permedcoe` modules\r\n\r\n   ```bash\r\n   export COMPSS_PYTHON_VERSION=3\r\n   module load COMPSs/3.1\r\n   module load singularity/3.5.2\r\n   module use /apps/modules/modulefiles/tools/COMPSs/libraries\r\n   module load permedcoe\r\n   ```\r\n\r\n   \u003e **TIP**: Include the loading into your `${HOME}/.bashrc` file to load it automatically on the session start.\r\n\r\n   This commands will load COMPSs and the permedcoe package which provides all necessary dependencies, as well as the path to the singularity container images (`PERMEDCOE_IMAGES` environment variable) and testing dataset (`COVID19WORKFLOW_DATASET` environment variable).\r\n\r\n2. Get a copy of the pilot workflow into your desired folder\r\n\r\n   ```bash\r\n   mkdir desired_folder\r\n   cd desired_folder\r\n   get_covid19workflow\r\n   ```\r\n\r\n3. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflow/PyCOMPSs\r\n   ```\r\n\r\n4. Execute `./launch.sh`\r\n\r\n  This command will launch a job into the job queuing system (SLURM) requesting 2 nodes (one node acting half master and half worker, and other full worker node) for 20 minutes, and is prepared to use the singularity images that are already deployed in MN4 (located into the `PERMEDCOE_IMAGES` environment variable). It uses the dataset located into `../../Resources/data` folder.\r\n\r\n  \u003e :warning: **TIP**: If you want to run the workflow with a different dataset, please edit the `launch.sh` script and define the appropriate dataset path.\r\n\r\n  After the execution, a `results` folder will be available with with COVID19 Workflow results.\r\n\r\n### Mahti or Puhti\r\n\r\nThis section explains how to run the COVID19 workflow on CSC supercomputers using SnakeMake.\r\n\r\n#### Requirements\r\n\r\n- Install snakemake (or check if there is a version installed using `module spider snakemake`)\r\n- Install workflow, using the same steps as for the local machine. With the exception that containers have to be built elsewhere.\r\n\r\n#### Steps\r\n\r\n\r\n1. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflow/SnakeMake\r\n   ```\r\n\r\n2. Edit `launch.sh` with the correct partition, account, and resource specifications.  \r\n\r\n3. Execute `./launch.sh`\r\n\r\n  \u003e :warning: Snakemake provides a `--cluster` flag, but this functionality should be avoided as it's really not suited for HPC systems.\r\n\r\n## License\r\n\r\n[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\r\n\r\n## Contact\r\n\r\n\u003chttps://permedcoe.eu/contact/\u003e\r\n\r\nThis software has been developed for the [PerMedCoE project](https://permedcoe.eu/), funded by the European Commission (EU H2020 [951773](https://cordis.europa.eu/project/id/951773)).\r\n\r\n![](https://permedcoe.eu/wp-content/uploads/2020/11/logo_1.png \"PerMedCoE\")\r\n","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/476?version=1","name":"main @ 7ef4b06","author":[],"descriptor_type":[]}]},{"id":"477","url":"https://workflowhub.eu/workflows/477","name":"PerMedCoE Cancer Invasion","description":"# Cancer Invasion Workflow\r\n\r\n## Table of Contents\r\n\r\n- [Cancer Invasion Workflow](#cancer-invasion-workflow)\r\n  - [Table of Contents](#table-of-contents)\r\n  - [Description](#description)\r\n  - [Contents](#contents)\r\n    - [Building Blocks](#building-blocks)\r\n    - [Workflows](#workflows)\r\n    - [Resources](#resources)\r\n    - [Tests](#tests)\r\n  - [Instructions](#instructions)\r\n    - [Local machine](#local-machine)\r\n      - [Requirements](#requirements)\r\n      - [Usage steps](#usage-steps)\r\n    - [MareNostrum 4](#marenostrum-4)\r\n      - [Requirements in MN4](#requirements-in-mn4)\r\n      - [Usage steps in MN4](#usage-steps-in-mn4)\r\n    - [Mahti or Puhti](#mahti-or-puhti)\r\n      - [Requirements](#requirements)\r\n      - [Steps](#steps)\r\n  - [License](#license)\r\n  - [Contact](#contact)\r\n\r\n## Description\r\n\r\nUses multiscale simulations to describe cancer progression into invasion.\r\n\r\nThe workflow uses the following building blocks, described in order of execution:\r\n\r\n1. PhysiBoSS-Invasion\r\n\r\nFor details on individual workflow steps, see the user documentation for each building block.\r\n\r\n[`GitHub repository`](\u003chttps://github.com/PerMedCoE/cancer-invasion-workflow\u003e)\r\n\r\n\r\n## Contents\r\n\r\n### Building Blocks\r\n\r\nThe ``BuildingBlocks`` folder contains the script to install the\r\nBuilding Blocks used in the Cancer Invasion Workflow.\r\n\r\n### Workflows\r\n\r\nThe ``Workflow`` folder contains the workflows implementations.\r\n\r\nCurrently contains the implementation using PyCOMPSs and Snakemake (in progress).\r\n\r\n### Resources\r\n\r\nThe ``Resources`` folder contains dataset files.\r\n\r\n### Tests\r\n\r\nThe ``Tests`` folder contains the scripts that run each Building Block\r\nused in the workflow for the given small dataset.\r\nThey can be executed individually for testing purposes.\r\n\r\n## Instructions\r\n\r\n### Local machine\r\n\r\nThis section explains the requirements and usage for the Cancer Invasion Workflow in a laptop or desktop computer.\r\n\r\n#### Requirements\r\n\r\n- [`permedcoe`](https://github.com/PerMedCoE/permedcoe) package\r\n- [PyCOMPSs](https://pycompss.readthedocs.io/en/stable/Sections/00_Quickstart.html) / [Snakemake](https://snakemake.readthedocs.io/en/stable/)\r\n- [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)\r\n\r\n#### Usage steps\r\n\r\n1. Clone this repository:\r\n\r\n  ```bash\r\n  git clone https://github.com/PerMedCoE/cancer-invasion-workflow\r\n  ```\r\n\r\n2. Install the Building Blocks required for the Cancer Invasion Workflow:\r\n\r\n  ```bash\r\n  cancer-invasion-workflow/BuildingBlocks/./install_BBs.sh\r\n  ```\r\n\r\n3. Get the required Building Block images from the project [B2DROP](https://b2drop.bsc.es/index.php/f/444350):\r\n\r\n  - Required images:\r\n      - PhysiCell-Invasion.singularity\r\n\r\n  The path where these files are stored **MUST be exported in the `PERMEDCOE_IMAGES`** environment variable.\r\n\r\n  \u003e :warning: **TIP**: These containers can be built manually as follows (be patient since some of them may take some time):\r\n  1. Clone the `BuildingBlocks` repository\r\n     ```bash\r\n     git clone https://github.com/PerMedCoE/BuildingBlocks.git\r\n     ```\r\n  2. Build the required Building Block images\r\n     ```bash\r\n     cd BuildingBlocks/Resources/images\r\n     sudo singularity build PhysiCell-Invasion.sif PhysiCell-Invasion.singularity\r\n     cd ../../..\r\n     ```\r\n\r\n**If using PyCOMPSs in local PC** (make sure that PyCOMPSs in installed):\r\n\r\n4. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflows/PyCOMPSs\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n\r\n**If using Snakemake in local PC** (make sure that SnakeMake is installed):\r\n\r\n4. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflows/SnakeMake\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n  \u003e **TIP**: If you want to run the workflow with a different dataset, please update the `run.sh` script setting the `dataset` variable to the new dataset folder and their file names.\r\n\r\n\r\n### MareNostrum 4\r\n\r\nThis section explains the requirements and usage for the Cancer Invasion Workflow in the MareNostrum 4 supercomputer.\r\n\r\n#### Requirements in MN4\r\n\r\n- Access to MN4\r\n\r\nAll Building Blocks are already installed in MN4, and the Cancer Invasion Workflow available.\r\n\r\n#### Usage steps in MN4\r\n\r\n1. Load the `COMPSs`, `Singularity` and `permedcoe` modules\r\n\r\n   ```bash\r\n   export COMPSS_PYTHON_VERSION=3\r\n   module load COMPSs/3.1\r\n   module load singularity/3.5.2\r\n   module use /apps/modules/modulefiles/tools/COMPSs/libraries\r\n   module load permedcoe\r\n   ```\r\n\r\n   \u003e **TIP**: Include the loading into your `${HOME}/.bashrc` file to load it automatically on the session start.\r\n\r\n   This commands will load COMPSs and the permedcoe package which provides all necessary dependencies, as well as the path to the singularity container images (`PERMEDCOE_IMAGES` environment variable) and testing dataset (`CANCERINVASIONWORKFLOW_DATASET` environment variable).\r\n\r\n2. Get a copy of the pilot workflow into your desired folder\r\n\r\n   ```bash\r\n   mkdir desired_folder\r\n   cd desired_folder\r\n   get_cancerinvasionworkflow\r\n   ```\r\n\r\n3. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflow/PyCOMPSs\r\n   ```\r\n\r\n4. Execute `./launch.sh`\r\n\r\n  This command will launch a job into the job queuing system (SLURM) requesting 2 nodes (one node acting half master and half worker, and other full worker node) for 20 minutes, and is prepared to use the singularity images that are already deployed in MN4 (located into the `PERMEDCOE_IMAGES` environment variable). It uses the dataset located into `../../Resources/data` folder.\r\n\r\n  \u003e :warning: **TIP**: If you want to run the workflow with a different dataset, please edit the `launch.sh` script and define the appropriate dataset path.\r\n\r\n  After the execution, a `results` folder will be available with with Cancer Invasion Workflow results.\r\n\r\n### Mahti or Puhti\r\n\r\nThis section explains how to run the Cancer Invasion workflow on CSC supercomputers using SnakeMake.\r\n\r\n#### Requirements\r\n\r\n- Install snakemake (or check if there is a version installed using `module spider snakemake`)\r\n- Install workflow, using the same steps as for the local machine. With the exception that containers have to be built elsewhere.\r\n\r\n#### Steps\r\n\r\n\r\n1. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflow/SnakeMake\r\n   ```\r\n\r\n2. Edit `launch.sh` with the correct partition, account, and resource specifications.  \r\n\r\n3. Execute `./launch.sh`\r\n\r\n  \u003e :warning: Snakemake provides a `--cluster` flag, but this functionality should be avoided as it's really not suited for HPC systems.\r\n\r\n## License\r\n\r\n[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\r\n\r\n## Contact\r\n\r\n\u003chttps://permedcoe.eu/contact/\u003e\r\n\r\nThis software has been developed for the [PerMedCoE project](https://permedcoe.eu/), funded by the European Commission (EU H2020 [951773](https://cordis.europa.eu/project/id/951773)).\r\n\r\n![](https://permedcoe.eu/wp-content/uploads/2020/11/logo_1.png \"PerMedCoE\")\r\n","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/477?version=1","name":"master @ 1e752a4","author":[],"descriptor_type":[]}]},{"id":"478","url":"https://workflowhub.eu/workflows/478","name":"PerMedCoE Single Drug Prediction","description":"# Single drug prediction Workflow\r\n## Table of Contents\r\n\r\n- [Single drug prediction Workflow](#single-drug-prediction-workflow)\r\n  - [Table of Contents](#table-of-contents)\r\n  - [Description](#description)\r\n  - [Contents](#contents)\r\n    - [Building Blocks](#building-blocks)\r\n    - [Workflows](#workflows)\r\n    - [Resources](#resources)\r\n    - [Tests](#tests)\r\n  - [Instructions](#instructions)\r\n    - [Local machine](#local-machine)\r\n      - [Requirements](#requirements)\r\n      - [Usage steps](#usage-steps)\r\n    - [MareNostrum 4](#marenostrum-4)\r\n      - [Requirements in MN4](#requirements-in-mn4)\r\n      - [Usage steps in MN4](#usage-steps-in-mn4)\r\n  - [License](#license)\r\n  - [Contact](#contact)\r\n\r\n## Description\r\n\r\nComplementarily, the workflow supports single drug response predictions to provide a baseline prediction in cases where drug response information for a given drug and cell line is not available. As an input, the workflow needs basal gene expression data for a cell, the drug targets (they need to be known for untested drugs) and optionally CARNIVAL features (sub-network activity predicted with CARNIVAL building block) and predicts log(IC50) values. This workflow uses a custom matrix factorization approach built with Google JAX and trained with gradient descent. The workflow can be used both for training a model, and for predicting new drug responses.\r\n\r\nThe workflow uses the following building blocks in order of execution (for training a model):\r\n\r\n1. Carnival_gex_preprocess\r\n    - Preprocessed the basal gene expression data from GDSC. The input is a matrix of Gene x Sample expression data.\r\n2. Progeny\r\n    - Using the preprocessed data, it estimates pathway activities for each column in the data (for each sample). It returns a matrix of Pathways x Samples with activity values for 11 pathways.\r\n3. Omnipath\r\n    - It downloads latest Prior Knowledge Network of signalling. This building block can be ommited if there exists already a csv file with the network.\r\n4. TF Enrichment\r\n    - For each sample, transcription factor activities are estimated using Dorothea.\r\n5. CarnivalPy\r\n    - Using the TF activities estimated before, it runs Carnival to obtain a sub-network consistent with the TF activities (for each sample).\r\n6. Carnival_feature_merger\r\n    - Preselect a set of genes by the user (if specified) and merge the features with the basal gene expression data.\r\n7. ML Jax Drug Prediction\r\n    - Trains a model using the combined features to predict IC50 values from GDSC.\r\n\r\nFor details on individual workflow steps, please check the scripts that use each individual building block in the workflow [`GitHub repository`](\u003chttps://github.com/PerMedCoE/single_drug_prediction\u003e)\r\n\r\n## Contents\r\n\r\n### Building Blocks\r\n\r\nThe ``BuildingBlocks`` folder contains the script to install the\r\nBuilding Blocks used in the Single Drug Prediction Workflow.\r\n\r\n### Workflows\r\n\r\nThe ``Workflow`` folder contains the workflows implementations.\r\n\r\nCurrently contains the implementation using PyCOMPSs.\r\n\r\n### Resources\r\n\r\nThe ``Resources`` folder contains a small dataset for testing purposes.\r\n\r\n### Tests\r\n\r\nThe ``Tests`` folder contains the scripts that run each Building Block\r\nused in the workflow for a small dataset.\r\nThey can be executed individually *without PyCOMPSs installed* for testing\r\npurposes.\r\n\r\n## Instructions\r\n\r\n### Local machine\r\n\r\nThis section explains the requirements and usage for the Single Drug Prediction Workflow in a laptop or desktop computer.\r\n\r\n#### Requirements\r\n\r\n- [`permedcoe`](https://github.com/PerMedCoE/permedcoe) package\r\n- [PyCOMPSs](https://pycompss.readthedocs.io/en/stable/Sections/00_Quickstart.html)\r\n- [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)\r\n\r\n#### Usage steps\r\n\r\n1. Clone this repository:\r\n\r\n  ```bash\r\n  git clone https://github.com/PerMedCoE/single-drug-prediction-workflow.git\r\n  ```\r\n\r\n2. Install the Building Blocks required for the COVID19 Workflow:\r\n\r\n  ```bash\r\n  single-drug-prediction-workflow/BuildingBlocks/./install_BBs.sh\r\n  ```\r\n\r\n3. Get the required Building Block images from the project [B2DROP](https://b2drop.bsc.es/index.php/f/444350):\r\n\r\n  - Required images:\r\n      - toolset.singularity\r\n      - carnivalpy.singularity\r\n      - ml-jax.singularity\r\n\r\n  The path where these files are stored **MUST be exported in the `PERMEDCOE_IMAGES`** environment variable.\r\n\r\n  \u003e :warning: **TIP**: These containers can be built manually as follows (be patient since some of them may take some time):\r\n  1. Clone the `BuildingBlocks` repository\r\n     ```bash\r\n     git clone https://github.com/PerMedCoE/BuildingBlocks.git\r\n     ```\r\n  2. Build the required Building Block images\r\n     ```bash\r\n     cd BuildingBlocks/Resources/images\r\n     ## Download new BB singularity files\r\n     wget https://github.com/saezlab/permedcoe/archive/refs/heads/master.zip\r\n     unzip master.zip\r\n     cd permedcoe-master/containers\r\n     ## Build containers\r\n     cd toolset\r\n     sudo /usr/local/bin/singularity build toolset.sif toolset.singularity\r\n     mv toolset.sif ../../../\r\n     cd ..\r\n     cd carnivalpy\r\n     sudo /usr/local/bin/singularity build carnivalpy.sif carnivalpy.singularity\r\n     mv carnivalpy.sif ../../../\r\n     cd ..\r\n     cd ml-jax\r\n     sudo /usr/local/bin/singularity build ml-jax.sif ml-jax.singularity\r\n     mv ml-jax.sif ../../../tf-jax.sif\r\n     cd ..\r\n     cd ../..\r\n     ## Cleanup\r\n     rm -rf permedcoe-master\r\n     rm master.zip\r\n     cd ../../..\r\n     ```\r\n\r\n     \u003e :warning: **TIP**: The singularity containers **can to be downloaded** from: https://cloud.sylabs.io/library/pablormier\r\n\r\n\r\n**If using PyCOMPSs in local PC** (make sure that PyCOMPSs in installed):\r\n\r\n4. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflows/PyCOMPSs\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n\r\n  The execution is prepared to use the singularity images that **MUST** be placed into `BuildingBlocks/Resources/images` folder. If they are located in any other folder, please update the `run.sh` script setting the `PERMEDCOE_IMAGES` to the images folder.\r\n\r\n  \u003e **TIP**: If you want to run the workflow with a different dataset, please update the `run.sh` script setting the `dataset` variable to the new dataset folder and their file names.\r\n\r\n### MareNostrum 4\r\n\r\nThis section explains the requirements and usage for the Single Drug Prediction Workflow in the MareNostrum 4 supercomputer.\r\n\r\n#### Requirements in MN4\r\n\r\n- Access to MN4\r\n\r\nAll Building Blocks are already installed in MN4, and the Single Drug Prediction Workflow available.\r\n\r\n#### Usage steps in MN4\r\n\r\n1. Load the `COMPSs`, `Singularity` and `permedcoe` modules\r\n\r\n   ```bash\r\n   export COMPSS_PYTHON_VERSION=3\r\n   module load COMPSs/3.1\r\n   module load singularity/3.5.2\r\n   module use /apps/modules/modulefiles/tools/COMPSs/libraries\r\n   module load permedcoe\r\n   ```\r\n\r\n   \u003e **TIP**: Include the loading into your `${HOME}/.bashrc` file to load it automatically on the session start.\r\n\r\n   This commands will load COMPSs and the permedcoe package which provides all necessary dependencies, as well as the path to the singularity container images (`PERMEDCOE_IMAGES` environment variable) and testing dataset (`SINGLE_DRUG_PREDICTION_WORKFLOW_DATASET` environment variable).\r\n\r\n2. Get a copy of the pilot workflow into your desired folder\r\n\r\n   ```bash\r\n   mkdir desired_folder\r\n   cd desired_folder\r\n   get_single_drug_prediction_workflow\r\n   ```\r\n\r\n3. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflow/PyCOMPSs\r\n   ```\r\n\r\n4. Execute `./launch.sh`\r\n\r\n  This command will launch a job into the job queuing system (SLURM) requesting 2 nodes (one node acting half master and half worker, and other full worker node) for 20 minutes, and is prepared to use the singularity images that are already deployed in MN4 (located into the `PERMEDCOE_IMAGES` environment variable). It uses the dataset located into `../../Resources/data` folder.\r\n\r\n  \u003e :warning: **TIP**: If you want to run the workflow with a different dataset, please edit the `launch.sh` script and define the appropriate dataset path.\r\n\r\n  After the execution, a `results` folder will be available with with Single Drug Prediction Workflow results.\r\n\r\n## License\r\n\r\n[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\r\n\r\n## Contact\r\n\r\n\u003chttps://permedcoe.eu/contact/\u003e\r\n\r\nThis software has been developed for the [PerMedCoE project](https://permedcoe.eu/), funded by the European Commission (EU H2020 [951773](https://cordis.europa.eu/project/id/951773)).\r\n\r\n![](https://permedcoe.eu/wp-content/uploads/2020/11/logo_1.png \"PerMedCoE\")\r\n","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/478?version=1","name":"main @ 2177ee0","author":[],"descriptor_type":[]}]},{"id":"479","url":"https://workflowhub.eu/workflows/479","name":"PerMedCoE Drug Synergy","description":"# Drug Synergies Screening Workflow\r\n\r\n## Table of Contents\r\n\r\n- [Drug Synergies Screening Workflow](#drug-synergies-screening-workflow)\r\n  - [Table of Contents](#table-of-contents)\r\n  - [Description](#description)\r\n  - [Contents](#contents)\r\n    - [Building Blocks](#building-blocks)\r\n    - [Workflows](#workflows)\r\n    - [Resources](#resources)\r\n    - [Tests](#tests)\r\n  - [Instructions](#instructions)\r\n    - [Local machine](#local-machine)\r\n      - [Requirements](#requirements)\r\n      - [Usage steps](#usage-steps)\r\n    - [MareNostrum 4](#marenostrum-4)\r\n      - [Requirements in MN4](#requirements-in-mn4)\r\n      - [Usage steps in MN4](#usage-steps-in-mn4)\r\n  - [License](#license)\r\n  - [Contact](#contact)\r\n\r\n## Description\r\n\r\nThis pipeline simulates a drug screening on personalised cell line models. It automatically builds Boolean models of interest, then uses cell lines data (expression, mutations, copy number variations) to personalise them as MaBoSS models. Finally, this pipeline simulates multiple drug intervention on these MaBoSS models, and lists drug synergies of interest.\r\n\r\nThe workflow uses the following building blocks, described in order of execution:\r\n\r\n1. Build model from species\r\n2. Personalise patient\r\n3. MaBoSS\r\n4. Print drug results\r\n\r\nFor details on individual workflow steps, see the user documentation for each building block.\r\n\r\n[`GitHub repository`](https://github.com/PerMedCoE/drug-synergies-workflow\u003e)\r\n\r\n## Contents\r\n\r\n### Building Blocks\r\n\r\nThe ``BuildingBlocks`` folder contains the script to install the\r\nBuilding Blocks used in the Drug Synergies Workflow.\r\n\r\n### Workflows\r\n\r\nThe ``Workflow`` folder contains the workflows implementations.\r\n\r\nCurrently contains the implementation using PyCOMPSs.\r\n\r\n### Resources\r\n\r\nThe ``Resources`` folder contains a small dataset for testing purposes.\r\n\r\n### Tests\r\n\r\nThe ``Tests`` folder contains the scripts that run each Building Block\r\nused in the workflow for a small dataset.\r\nThey can be executed individually *without PyCOMPSs installed* for testing\r\npurposes.\r\n\r\n## Instructions\r\n\r\n### Local machine\r\n\r\nThis section explains the requirements and usage for the Drug Synergies Workflow in a laptop or desktop computer.\r\n\r\n#### Requirements\r\n\r\n- [`permedcoe`](https://github.com/PerMedCoE/permedcoe) package\r\n- [PyCOMPSs](https://pycompss.readthedocs.io/en/stable/Sections/00_Quickstart.html)\r\n- [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)\r\n\r\n#### Usage steps\r\n\r\n1. Clone this repository:\r\n\r\n  ```bash\r\n  git clone https://github.com/PerMedCoE/drug-synergies-workflow.git\r\n  ```\r\n\r\n2. Install the Building Blocks required for the COVID19 Workflow:\r\n\r\n  ```bash\r\n  drug-synergies-workflow/BuildingBlocks/./install_BBs.sh\r\n  ```\r\n\r\n3. Get the required Building Block images from the project [B2DROP](https://b2drop.bsc.es/index.php/f/444350):\r\n\r\n  - Required images:\r\n      - PhysiCell-COVID19.singularity\r\n      - printResults.singularity\r\n      - MaBoSS_sensitivity.singularity\r\n      - FromSpeciesToMaBoSSModel.singularity\r\n\r\n  The path where these files are stored **MUST be exported in the `PERMEDCOE_IMAGES`** environment variable.\r\n\r\n  \u003e :warning: **TIP**: These containers can be built manually as follows (be patient since some of them may take some time):\r\n  1. Clone the `BuildingBlocks` repository\r\n     ```bash\r\n     git clone https://github.com/PerMedCoE/BuildingBlocks.git\r\n     ```\r\n  2. Build the required Building Block images\r\n     ```bash\r\n     cd BuildingBlocks/Resources/images\r\n     sudo singularity build PhysiCell-COVID19.sif PhysiCell-COVID19.singularity\r\n     sudo singularity build printResults.sif printResults.singularity\r\n     sudo singularity build MaBoSS_sensitivity.sif MaBoSS_sensitivity.singularity\r\n     sudo singularity build FromSpeciesToMaBoSSModel.sif FromSpeciesToMaBoSSModel.singularity\r\n     cd ../../..\r\n     ```\r\n\r\n**If using PyCOMPSs in local PC** (make sure that PyCOMPSs in installed):\r\n\r\n4. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflows/PyCOMPSs\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n  \u003e **TIP**: If you want to run the workflow with a different dataset, please update the `run.sh` script setting the `dataset` variable to the new dataset folder and their file names.\r\n\r\n### MareNostrum 4\r\n\r\nThis section explains the requirements and usage for the Drug Synergies Workflow in the MareNostrum 4 supercomputer.\r\n\r\n#### Requirements in MN4\r\n\r\n- Access to MN4\r\n\r\nAll Building Blocks are already installed in MN4, and the Drug Synergies Workflow available.\r\n\r\n#### Usage steps in MN4\r\n\r\n1. Load the `COMPSs`, `Singularity` and `permedcoe` modules\r\n\r\n   ```bash\r\n   export COMPSS_PYTHON_VERSION=3\r\n   module load COMPSs/3.1\r\n   module load singularity/3.5.2\r\n   module use /apps/modules/modulefiles/tools/COMPSs/libraries\r\n   module load permedcoe\r\n   ```\r\n\r\n   \u003e **TIP**: Include the loading into your `${HOME}/.bashrc` file to load it automatically on the session start.\r\n\r\n   This commands will load COMPSs and the permedcoe package which provides all necessary dependencies, as well as the path to the singularity container images (`PERMEDCOE_IMAGES` environment variable) and testing dataset (`DRUG_SYNERGIES_WORKFLOW_DATASET` environment variable).\r\n\r\n2. Get a copy of the pilot workflow into your desired folder\r\n\r\n   ```bash\r\n   mkdir desired_folder\r\n   cd desired_folder\r\n   get_drug_synergies_workflow\r\n   ```\r\n\r\n3. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflow/PyCOMPSs\r\n   ```\r\n\r\n4. Execute `./launch.sh`\r\n\r\n  This command will launch a job into the job queuing system (SLURM) requesting 2 nodes (one node acting half master and half worker, and other full worker node) for 20 minutes, and is prepared to use the singularity images that are already deployed in MN4 (located into the `PERMEDCOE_IMAGES` environment variable). It uses the dataset located into `../../Resources/data` folder.\r\n\r\n  \u003e :warning: **TIP**: If you want to run the workflow with a different dataset, please edit the `launch.sh` script and define the appropriate dataset path.\r\n\r\n  After the execution, a `results` folder will be available with with Drug Synergies Workflow results.\r\n\r\n## License\r\n\r\n[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\r\n\r\n## Contact\r\n\r\n\u003chttps://permedcoe.eu/contact/\u003e\r\n\r\nThis software has been developed for the [PerMedCoE project](https://permedcoe.eu/), funded by the European Commission (EU H2020 [951773](https://cordis.europa.eu/project/id/951773)).\r\n\r\n![](https://permedcoe.eu/wp-content/uploads/2020/11/logo_1.png \"PerMedCoE\")\r\n","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/479?version=1","name":"main @ 77403c9","author":[],"descriptor_type":[]}]},{"id":"480","url":"https://workflowhub.eu/workflows/480","name":"PerMedCoE Covid19 Pilot workflow (Nextflow)","description":"# COVID-19 Multiscale Modelling of the Virus and Patients’ Tissue Workflow\r\n\r\n## Table of Contents\r\n\r\n- [COVID-19 Multiscale Modelling of the Virus and Patients’ Tissue Workflow](#covid-19-multiscale-modelling-of-the-virus-and-patients-tissue-workflow)\r\n  - [Table of Contents](#table-of-contents)\r\n  - [Description](#description)\r\n  - [Contents](#contents)\r\n    - [Building Blocks](#building-blocks)\r\n    - [Workflows](#workflows)\r\n    - [Resources](#resources)\r\n    - [Tests](#tests)\r\n  - [Instructions](#instructions)\r\n    - [Local machine](#local-machine)\r\n      - [Requirements](#requirements)\r\n      - [Usage steps](#usage-steps)\r\n    - [MareNostrum 4](#marenostrum-4)\r\n      - [Requirements in MN4](#requirements-in-mn4)\r\n      - [Usage steps in MN4](#usage-steps-in-mn4)\r\n    - [Mahti or Puhti](#mahti-or-puhti)\r\n      - [Requirements](#requirements)\r\n      - [Steps](#steps)\r\n  - [License](#license)\r\n  - [Contact](#contact)\r\n\r\n## Description\r\n\r\nUses multiscale simulations to predict patient-specific SARS‑CoV‑2 severity subtypes\r\n(moderate, severe or control), using single-cell RNA-Seq data, MaBoSS and PhysiBoSS.\r\nBoolean models are used to determine the behaviour of individual agents as a function\r\nof extracellular conditions and the concentration of different  substrates, including\r\nthe number of virions. Predictions of severity subtypes are based on a meta-analysis of\r\npersonalised model outputs simulating cellular apoptosis regulation in epithelial cells\r\ninfected by SARS‑CoV‑2.\r\n\r\nThe workflow uses the following building blocks, described in order of execution:\r\n\r\n1. High-throughput mutant analysis\r\n2. Single-cell processing\r\n3. Personalise patient\r\n4. PhysiBoSS\r\n5. Analysis of all simulations\r\n\r\nFor details on individual workflow steps, see the user documentation for each building block.\r\n\r\n[`GitHub repository`](\u003chttps://github.com/PerMedCoE/covid-19-workflow\u003e)\r\n\r\n\r\n## Contents\r\n\r\n### Building Blocks\r\n\r\nThe ``BuildingBlocks`` folder contains the script to install the\r\nBuilding Blocks used in the COVID-19 Workflow.\r\n\r\n### Workflows\r\n\r\nThe ``Workflow`` folder contains the workflows implementations.\r\n\r\nCurrently contains the implementation using PyCOMPSs and Snakemake (in progress).\r\n\r\n### Resources\r\n\r\nThe ``Resources`` folder contains dataset files.\r\n\r\n### Tests\r\n\r\nThe ``Tests`` folder contains the scripts that run each Building Block\r\nused in the workflow for the given small dataset.\r\nThey can be executed individually for testing purposes.\r\n\r\n## Instructions\r\n\r\n### Local machine\r\n\r\nThis section explains the requirements and usage for the COVID19 Workflow in a laptop or desktop computer.\r\n\r\n#### Requirements\r\n\r\n- [`permedcoe`](https://github.com/PerMedCoE/permedcoe) package\r\n- [PyCOMPSs](https://pycompss.readthedocs.io/en/stable/Sections/00_Quickstart.html) / [Snakemake](https://snakemake.readthedocs.io/en/stable/)\r\n- [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)\r\n\r\n#### Usage steps\r\n\r\n1. Clone this repository:\r\n\r\n  ```bash\r\n  git clone https://github.com/PerMedCoE/covid-19-workflow.git\r\n  ```\r\n\r\n2. Install the Building Blocks required for the COVID19 Workflow:\r\n\r\n  ```bash\r\n  covid-19-workflow/BuildingBlocks/./install_BBs.sh\r\n  ```\r\n\r\n3. Get the required Building Block images from the project [B2DROP](https://b2drop.bsc.es/index.php/f/444350):\r\n\r\n  - Required images:\r\n      - MaBoSS.singularity\r\n      - meta_analysis.singularity\r\n      - PhysiCell-COVID19.singularity\r\n      - single_cell.singularity\r\n\r\n  The path where these files are stored **MUST be exported in the `PERMEDCOE_IMAGES`** environment variable.\r\n\r\n  \u003e :warning: **TIP**: These containers can be built manually as follows (be patient since some of them may take some time):\r\n  1. Clone the `BuildingBlocks` repository\r\n     ```bash\r\n     git clone https://github.com/PerMedCoE/BuildingBlocks.git\r\n     ```\r\n  2. Build the required Building Block images\r\n     ```bash\r\n     cd BuildingBlocks/Resources/images\r\n     sudo singularity build MaBoSS.sif MaBoSS.singularity\r\n     sudo singularity build meta_analysis.sif meta_analysis.singularity\r\n     sudo singularity build PhysiCell-COVID19.sif PhysiCell-COVID19.singularity\r\n     sudo singularity build single_cell.sif single_cell.singularity\r\n     cd ../../..\r\n     ```\r\n\r\n**If using PyCOMPSs in local PC** (make sure that PyCOMPSs in installed):\r\n\r\n4. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflows/PyCOMPSs\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n\r\n**If using Snakemake in local PC** (make sure that SnakeMake is installed):\r\n\r\n4. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflows/SnakeMake\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n  \u003e **TIP**: If you want to run the workflow with a different dataset, please update the `run.sh` script setting the `dataset` variable to the new dataset folder and their file names.\r\n\r\n\r\n### MareNostrum 4\r\n\r\nThis section explains the requirements and usage for the COVID19 Workflow in the MareNostrum 4 supercomputer.\r\n\r\n#### Requirements in MN4\r\n\r\n- Access to MN4\r\n\r\nAll Building Blocks are already installed in MN4, and the COVID19 Workflow available.\r\n\r\n#### Usage steps in MN4\r\n\r\n1. Load the `COMPSs`, `Singularity` and `permedcoe` modules\r\n\r\n   ```bash\r\n   export COMPSS_PYTHON_VERSION=3\r\n   module load COMPSs/3.1\r\n   module load singularity/3.5.2\r\n   module use /apps/modules/modulefiles/tools/COMPSs/libraries\r\n   module load permedcoe\r\n   ```\r\n\r\n   \u003e **TIP**: Include the loading into your `${HOME}/.bashrc` file to load it automatically on the session start.\r\n\r\n   This commands will load COMPSs and the permedcoe package which provides all necessary dependencies, as well as the path to the singularity container images (`PERMEDCOE_IMAGES` environment variable) and testing dataset (`COVID19WORKFLOW_DATASET` environment variable).\r\n\r\n2. Get a copy of the pilot workflow into your desired folder\r\n\r\n   ```bash\r\n   mkdir desired_folder\r\n   cd desired_folder\r\n   get_covid19workflow\r\n   ```\r\n\r\n3. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflow/PyCOMPSs\r\n   ```\r\n\r\n4. Execute `./launch.sh`\r\n\r\n  This command will launch a job into the job queuing system (SLURM) requesting 2 nodes (one node acting half master and half worker, and other full worker node) for 20 minutes, and is prepared to use the singularity images that are already deployed in MN4 (located into the `PERMEDCOE_IMAGES` environment variable). It uses the dataset located into `../../Resources/data` folder.\r\n\r\n  \u003e :warning: **TIP**: If you want to run the workflow with a different dataset, please edit the `launch.sh` script and define the appropriate dataset path.\r\n\r\n  After the execution, a `results` folder will be available with with COVID19 Workflow results.\r\n\r\n### Mahti or Puhti\r\n\r\nThis section explains how to run the COVID19 workflow on CSC supercomputers using SnakeMake.\r\n\r\n#### Requirements\r\n\r\n- Install snakemake (or check if there is a version installed using `module spider snakemake`)\r\n- Install workflow, using the same steps as for the local machine. With the exception that containers have to be built elsewhere.\r\n\r\n#### Steps\r\n\r\n\r\n1. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflow/SnakeMake\r\n   ```\r\n\r\n2. Edit `launch.sh` with the correct partition, account, and resource specifications.  \r\n\r\n3. Execute `./launch.sh`\r\n\r\n  \u003e :warning: Snakemake provides a `--cluster` flag, but this functionality should be avoided as it's really not suited for HPC systems.\r\n\r\n## License\r\n\r\n[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\r\n\r\n## Contact\r\n\r\n\u003chttps://permedcoe.eu/contact/\u003e\r\n\r\nThis software has been developed for the [PerMedCoE project](https://permedcoe.eu/), funded by the European Commission (EU H2020 [951773](https://cordis.europa.eu/project/id/951773)).\r\n\r\n![](https://permedcoe.eu/wp-content/uploads/2020/11/logo_1.png \"PerMedCoE\")\r\n","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/480?version=1","name":"main @ 7ef4b06","author":[],"descriptor_type":["NFL"]}]},{"id":"481","url":"https://workflowhub.eu/workflows/481","name":"PerMedCoE Covid19 Pilot workflow (Snakemake)","description":"# COVID-19 Multiscale Modelling of the Virus and Patients’ Tissue Workflow\r\n\r\n## Table of Contents\r\n\r\n- [COVID-19 Multiscale Modelling of the Virus and Patients’ Tissue Workflow](#covid-19-multiscale-modelling-of-the-virus-and-patients-tissue-workflow)\r\n  - [Table of Contents](#table-of-contents)\r\n  - [Description](#description)\r\n  - [Contents](#contents)\r\n    - [Building Blocks](#building-blocks)\r\n    - [Workflows](#workflows)\r\n    - [Resources](#resources)\r\n    - [Tests](#tests)\r\n  - [Instructions](#instructions)\r\n    - [Local machine](#local-machine)\r\n      - [Requirements](#requirements)\r\n      - [Usage steps](#usage-steps)\r\n    - [MareNostrum 4](#marenostrum-4)\r\n      - [Requirements in MN4](#requirements-in-mn4)\r\n      - [Usage steps in MN4](#usage-steps-in-mn4)\r\n    - [Mahti or Puhti](#mahti-or-puhti)\r\n      - [Requirements](#requirements)\r\n      - [Steps](#steps)\r\n  - [License](#license)\r\n  - [Contact](#contact)\r\n\r\n## Description\r\n\r\nUses multiscale simulations to predict patient-specific SARS‑CoV‑2 severity subtypes\r\n(moderate, severe or control), using single-cell RNA-Seq data, MaBoSS and PhysiBoSS.\r\nBoolean models are used to determine the behaviour of individual agents as a function\r\nof extracellular conditions and the concentration of different  substrates, including\r\nthe number of virions. Predictions of severity subtypes are based on a meta-analysis of\r\npersonalised model outputs simulating cellular apoptosis regulation in epithelial cells\r\ninfected by SARS‑CoV‑2.\r\n\r\nThe workflow uses the following building blocks, described in order of execution:\r\n\r\n1. High-throughput mutant analysis\r\n2. Single-cell processing\r\n3. Personalise patient\r\n4. PhysiBoSS\r\n5. Analysis of all simulations\r\n\r\nFor details on individual workflow steps, see the user documentation for each building block.\r\n\r\n[`GitHub repository`](\u003chttps://github.com/PerMedCoE/covid-19-workflow\u003e)\r\n\r\n\r\n## Contents\r\n\r\n### Building Blocks\r\n\r\nThe ``BuildingBlocks`` folder contains the script to install the\r\nBuilding Blocks used in the COVID-19 Workflow.\r\n\r\n### Workflows\r\n\r\nThe ``Workflow`` folder contains the workflows implementations.\r\n\r\nCurrently contains the implementation using PyCOMPSs and Snakemake (in progress).\r\n\r\n### Resources\r\n\r\nThe ``Resources`` folder contains dataset files.\r\n\r\n### Tests\r\n\r\nThe ``Tests`` folder contains the scripts that run each Building Block\r\nused in the workflow for the given small dataset.\r\nThey can be executed individually for testing purposes.\r\n\r\n## Instructions\r\n\r\n### Local machine\r\n\r\nThis section explains the requirements and usage for the COVID19 Workflow in a laptop or desktop computer.\r\n\r\n#### Requirements\r\n\r\n- [`permedcoe`](https://github.com/PerMedCoE/permedcoe) package\r\n- [PyCOMPSs](https://pycompss.readthedocs.io/en/stable/Sections/00_Quickstart.html) / [Snakemake](https://snakemake.readthedocs.io/en/stable/)\r\n- [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)\r\n\r\n#### Usage steps\r\n\r\n1. Clone this repository:\r\n\r\n  ```bash\r\n  git clone https://github.com/PerMedCoE/covid-19-workflow.git\r\n  ```\r\n\r\n2. Install the Building Blocks required for the COVID19 Workflow:\r\n\r\n  ```bash\r\n  covid-19-workflow/BuildingBlocks/./install_BBs.sh\r\n  ```\r\n\r\n3. Get the required Building Block images from the project [B2DROP](https://b2drop.bsc.es/index.php/f/444350):\r\n\r\n  - Required images:\r\n      - MaBoSS.singularity\r\n      - meta_analysis.singularity\r\n      - PhysiCell-COVID19.singularity\r\n      - single_cell.singularity\r\n\r\n  The path where these files are stored **MUST be exported in the `PERMEDCOE_IMAGES`** environment variable.\r\n\r\n  \u003e :warning: **TIP**: These containers can be built manually as follows (be patient since some of them may take some time):\r\n  1. Clone the `BuildingBlocks` repository\r\n     ```bash\r\n     git clone https://github.com/PerMedCoE/BuildingBlocks.git\r\n     ```\r\n  2. Build the required Building Block images\r\n     ```bash\r\n     cd BuildingBlocks/Resources/images\r\n     sudo singularity build MaBoSS.sif MaBoSS.singularity\r\n     sudo singularity build meta_analysis.sif meta_analysis.singularity\r\n     sudo singularity build PhysiCell-COVID19.sif PhysiCell-COVID19.singularity\r\n     sudo singularity build single_cell.sif single_cell.singularity\r\n     cd ../../..\r\n     ```\r\n\r\n**If using PyCOMPSs in local PC** (make sure that PyCOMPSs in installed):\r\n\r\n4. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflows/PyCOMPSs\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n\r\n**If using Snakemake in local PC** (make sure that SnakeMake is installed):\r\n\r\n4. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflows/SnakeMake\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n  \u003e **TIP**: If you want to run the workflow with a different dataset, please update the `run.sh` script setting the `dataset` variable to the new dataset folder and their file names.\r\n\r\n\r\n### MareNostrum 4\r\n\r\nThis section explains the requirements and usage for the COVID19 Workflow in the MareNostrum 4 supercomputer.\r\n\r\n#### Requirements in MN4\r\n\r\n- Access to MN4\r\n\r\nAll Building Blocks are already installed in MN4, and the COVID19 Workflow available.\r\n\r\n#### Usage steps in MN4\r\n\r\n1. Load the `COMPSs`, `Singularity` and `permedcoe` modules\r\n\r\n   ```bash\r\n   export COMPSS_PYTHON_VERSION=3\r\n   module load COMPSs/3.1\r\n   module load singularity/3.5.2\r\n   module use /apps/modules/modulefiles/tools/COMPSs/libraries\r\n   module load permedcoe\r\n   ```\r\n\r\n   \u003e **TIP**: Include the loading into your `${HOME}/.bashrc` file to load it automatically on the session start.\r\n\r\n   This commands will load COMPSs and the permedcoe package which provides all necessary dependencies, as well as the path to the singularity container images (`PERMEDCOE_IMAGES` environment variable) and testing dataset (`COVID19WORKFLOW_DATASET` environment variable).\r\n\r\n2. Get a copy of the pilot workflow into your desired folder\r\n\r\n   ```bash\r\n   mkdir desired_folder\r\n   cd desired_folder\r\n   get_covid19workflow\r\n   ```\r\n\r\n3. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflow/PyCOMPSs\r\n   ```\r\n\r\n4. Execute `./launch.sh`\r\n\r\n  This command will launch a job into the job queuing system (SLURM) requesting 2 nodes (one node acting half master and half worker, and other full worker node) for 20 minutes, and is prepared to use the singularity images that are already deployed in MN4 (located into the `PERMEDCOE_IMAGES` environment variable). It uses the dataset located into `../../Resources/data` folder.\r\n\r\n  \u003e :warning: **TIP**: If you want to run the workflow with a different dataset, please edit the `launch.sh` script and define the appropriate dataset path.\r\n\r\n  After the execution, a `results` folder will be available with with COVID19 Workflow results.\r\n\r\n### Mahti or Puhti\r\n\r\nThis section explains how to run the COVID19 workflow on CSC supercomputers using SnakeMake.\r\n\r\n#### Requirements\r\n\r\n- Install snakemake (or check if there is a version installed using `module spider snakemake`)\r\n- Install workflow, using the same steps as for the local machine. With the exception that containers have to be built elsewhere.\r\n\r\n#### Steps\r\n\r\n\r\n1. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflow/SnakeMake\r\n   ```\r\n\r\n2. Edit `launch.sh` with the correct partition, account, and resource specifications.  \r\n\r\n3. Execute `./launch.sh`\r\n\r\n  \u003e :warning: Snakemake provides a `--cluster` flag, but this functionality should be avoided as it's really not suited for HPC systems.\r\n\r\n## License\r\n\r\n[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\r\n\r\n## Contact\r\n\r\n\u003chttps://permedcoe.eu/contact/\u003e\r\n\r\nThis software has been developed for the [PerMedCoE project](https://permedcoe.eu/), funded by the European Commission (EU H2020 [951773](https://cordis.europa.eu/project/id/951773)).\r\n\r\n![](https://permedcoe.eu/wp-content/uploads/2020/11/logo_1.png \"PerMedCoE\")\r\n","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/481?version=1","name":"main @ 7ef4b06","author":[],"descriptor_type":["SMK"]}]},{"id":"482","url":"https://workflowhub.eu/workflows/482","name":"Genome-wide alternative splicing analysis v.2","description":"Genome-wide alternative splicing analysis v.2","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/482?version=1","name":"Version 1","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/482?version=2","name":"Version 2","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/482?version=3","name":"Version 3","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/482?version=4","name":"Version 4","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/482?version=5","name":"Version 5","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/482?version=6","name":"Version 6","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/482?version=7","name":"Version 7","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]}]},{"id":"483","url":"https://workflowhub.eu/workflows/483","name":"StringTie workflow","description":"Abstract CWL Automatically generated from the Galaxy workflow file: Copy of Genome-wide alternative splicing analysis","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/483?version=1","name":"Version 1","author":["Cristóbal Gallardo"],"descriptor_type":["GALAXY"]}]},{"id":"484","url":"https://workflowhub.eu/workflows/484","name":"Java COMPSs Matrix Multiplication, out-of-core, using files","description":"**Name:** Matrix Multiplication  \r\n**Contact Person:** support-compss@bsc.es  \r\n**Access Level:** public  \r\n**License Agreement:** Apache2  \r\n**Platform:** COMPSs  \r\n\r\n# Description\r\nMatrix multiplication is a binary operation that takes a pair of matrices and produces another matrix.\r\n\r\nIf A is an n×m matrix and B is an m×p matrix, the result AB of their multiplication is an n×p matrix defined only if the number of columns m in A is equal to the number of rows m in B. When multiplying A and B, the elements of the rows in A are multiplied with corresponding columns in B.\r\n\r\nIn this implementation, A and B are square matrices (same number of rows and columns), and so it is the result matrix C. Each matrix is divided in N blocks of M doubles. The multiplication of two blocks is done by a multiply task method with a simple three-nested-loop implementation. When executed with COMPSs, the main program generates N^3^ tasks arranged as N^2^ chains of N tasks in the dependency graph.\r\n\r\n# Versions\r\nThere are three versions of Matrix Multiplication, depending on the data types used to store the blocks.\r\n## Version 1\r\n''files'', where the matrix blocks are stored in files.\r\n## Version 2\r\n''objects'', where the matrix blocks are represented by objects.\r\n## Version 3\r\n''arrays'', where the matrix blocks are stored in arrays.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss matmul.files.Matmul numberOfBlocks blockSize\r\nruncompss matmul.objects.Matmul numberOfBlocks blockSize\r\nruncompss matmul.arrays.Matmul numberOfBlocks blockSize\r\n``` \r\n\r\nwhere:\r\n  * numberOfBlocks: Number of blocks inside each matrix\r\n  * blockSize: Size of each block\r\n\r\n\r\n# Execution Example\r\n```\r\nruncompss matmul.objects.Matmul 16 4\r\nruncompss matmul.files.Matmul 16 4\r\nruncompss matmul.arrays.Matmul 16 4  \r\n```\r\n\r\n# Build\r\n## Option 1: Native java\r\n```\r\ncd ~/tutorial_apps/java/matmul/; javac src/main/java/matmul/*/*.java\r\ncd src/main/java/; jar cf matmul.jar matmul/\r\ncd ../../../; mv src/main/java/matmul.jar jar/\r\n```\r\n\r\n## Option 2: Maven\r\n```\r\ncd ~/tutorial_apps/java/matmul/\r\nmvn clean package\r\n```\r\n","organization":"Cluster Emergent del Cervell Humà, Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/484?version=1","name":"Version 1","author":["Jorge Ejarque"],"descriptor_type":[]}]},{"id":"485","url":"https://workflowhub.eu/workflows/485","name":"PyCOMPSs Matrix Multiplication, out-of-core, using files","description":"**Name:** Matrix multiplication with Files  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nMatrix multiplication is a binary operation that takes a pair of matrices and produces another matrix.\r\n\r\nIf A is an n×m matrix and B is an m×p matrix, the result AB of their multiplication is an n×p matrix defined only if the number of columns m in A is equal to the number of rows m in B. When multiplying A and B, the elements of the rows in A are multiplied with corresponding columns in B.\r\n\r\nIn this implementation, A and B are square matrices (same number of rows and columns), and so it is the result matrix C. Each matrix is divided in N blocks of M doubles. The multiplication of two blocks is done by a multiply task method with a simple three-nested-loop implementation. When executed with COMPSs, the main program generates N^3^ tasks arranged as N^2^ chains of N tasks in the dependency graph.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python src/matmul_files.py numberOfBlocks blockSize\r\n```\r\n\r\nwhere:\r\n* numberOfBlocks: Number of blocks inside each matrix\r\n* blockSize: Size of each block\r\n\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python src/matmul_files.py 4 4\r\nruncompss src/matmul_files.py 4 4\r\npython -m pycompss src/matmul_files.py 4 4\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Cluster Emergent del Cervell Humà, Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/485?version=1","name":"Version 1","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"486","url":"https://workflowhub.eu/workflows/486","name":"Jupyter Notebook Protein conformational ensembles generation","description":"# Protein Conformational ensembles generation\r\n\r\n## Workflow included in the [ELIXIR 3D-Bioinfo](https://elixir-europe.org/communities/3d-bioinfo) Implementation Study:\r\n\r\n### Building on PDBe-KB to chart and characterize the conformation landscape of native proteins\r\n\r\nThis tutorial aims to illustrate the process of generating **protein conformational ensembles** from** 3D structures **and analysing its **molecular flexibility**, step by step, using the **BioExcel Building Blocks library (biobb)**.\r\n\r\n## Conformational landscape of native proteins\r\n**Proteins** are **dynamic** systems that adopt multiple **conformational states**, a property essential for many **biological processes** (e.g. binding other proteins, nucleic acids, small molecule ligands, or switching between functionaly active and inactive states). Characterizing the different **conformational states** of proteins and the transitions between them is therefore critical for gaining insight into their **biological function** and can help explain the effects of genetic variants in **health** and **disease** and the action of drugs.\r\n\r\n**Structural biology** has become increasingly efficient in sampling the different **conformational states** of proteins. The **PDB** has currently archived more than **170,000 individual structures**, but over two thirds of these structures represent **multiple conformations** of the same or related protein, observed in different crystal forms, when interacting with other proteins or other macromolecules, or upon binding small molecule ligands. Charting this conformational diversity across the PDB can therefore be employed to build a useful approximation of the **conformational landscape** of native proteins.\r\n\r\nA number of resources and **tools** describing and characterizing various often complementary aspects of protein **conformational diversity** in known structures have been developed, notably by groups in Europe. These tools include algorithms with varying degree of sophistication, for aligning the 3D structures of individual protein chains or domains, of protein assemblies, and evaluating their degree of **structural similarity**. Using such tools one can **align structures pairwise**, compute the corresponding **similarity matrix**, and identify ensembles of **structures/conformations** with a defined **similarity level** that tend to recur in different PDB entries, an operation typically performed using **clustering** methods. Such workflows are at the basis of resources such as **CATH, Contemplate, or PDBflex** that offer access to **conformational ensembles** comprised of similar **conformations** clustered according to various criteria. Other types of tools focus on differences between **protein conformations**, identifying regions of proteins that undergo large **collective displacements** in different PDB entries, those that act as **hinges or linkers**, or regions that are inherently **flexible**.\r\n\r\nTo build a meaningful approximation of the **conformational landscape** of native proteins, the **conformational ensembles** (and the differences between them), identified on the basis of **structural similarity/dissimilarity** measures alone, need to be **biophysically characterized**. This may be approached at **two different levels**. \r\n- At the **biological level**, it is important to link observed **conformational ensembles**, to their **functional roles** by evaluating the correspondence with **protein family classifications** based on sequence information and **functional annotations** in public databases e.g. Uniprot, PDKe-Knowledge Base (KB). These links should provide valuable mechanistic insights into how the **conformational and dynamic properties** of proteins are exploited by evolution to regulate their **biological function**. \u003cbr\u003e\u003cbr\u003e\r\n\r\n- At the **physical level** one needs to introduce **energetic consideration** to evaluate the likelihood that the identified **conformational ensembles** represent **conformational states** that the protein (or domain under study) samples in isolation. Such evaluation is notoriously **challenging** and can only be roughly approximated by using **computational methods** to evaluate the extent to which the observed **conformational ensembles** can be reproduced by algorithms that simulate the **dynamic behavior** of protein systems. These algorithms include the computationally expensive **classical molecular dynamics (MD) simulations** to sample local thermal fluctuations but also faster more approximate methods such as **Elastic Network Models** and **Normal Node Analysis** (NMA) to model low energy **collective motions**. Alternatively, **enhanced sampling molecular dynamics** can be used to model complex types of **conformational changes** but at a very high computational cost. \r\n\r\nThe **ELIXIR 3D-Bioinfo Implementation Study** *Building on PDBe-KB to chart and characterize the conformation landscape of native proteins* focuses on:\r\n\r\n1. Mapping the **conformational diversity** of proteins and their homologs across the PDB. \r\n2. Characterize the different **flexibility properties** of protein regions, and link this information to sequence and functional annotation.\r\n3. Benchmark **computational methods** that can predict a biophysical description of protein motions.\r\n\r\nThis notebook is part of the third objective, where a list of **computational resources** that are able to predict **protein flexibility** and **conformational ensembles** have been collected, evaluated, and integrated in reproducible and interoperable workflows using the **BioExcel Building Blocks library**. Note that the list is not meant to be exhaustive, it is built following the expertise of the implementation study partners.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/486?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/486?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/486?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/486?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/486?version=5","name":"Version 5","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"487","url":"https://workflowhub.eu/workflows/487","name":"Python Protein conformational ensembles generation","description":"# Protein Conformational ensembles generation\r\n\r\n## Workflow included in the [ELIXIR 3D-Bioinfo](https://elixir-europe.org/communities/3d-bioinfo) Implementation Study:\r\n\r\n### Building on PDBe-KB to chart and characterize the conformation landscape of native proteins\r\n\r\nThis tutorial aims to illustrate the process of generating **protein conformational ensembles** from** 3D structures **and analysing its **molecular flexibility**, step by step, using the **BioExcel Building Blocks library (biobb)**.\r\n\r\n## Conformational landscape of native proteins\r\n**Proteins** are **dynamic** systems that adopt multiple **conformational states**, a property essential for many **biological processes** (e.g. binding other proteins, nucleic acids, small molecule ligands, or switching between functionaly active and inactive states). Characterizing the different **conformational states** of proteins and the transitions between them is therefore critical for gaining insight into their **biological function** and can help explain the effects of genetic variants in **health** and **disease** and the action of drugs.\r\n\r\n**Structural biology** has become increasingly efficient in sampling the different **conformational states** of proteins. The **PDB** has currently archived more than **170,000 individual structures**, but over two thirds of these structures represent **multiple conformations** of the same or related protein, observed in different crystal forms, when interacting with other proteins or other macromolecules, or upon binding small molecule ligands. Charting this conformational diversity across the PDB can therefore be employed to build a useful approximation of the **conformational landscape** of native proteins.\r\n\r\nA number of resources and **tools** describing and characterizing various often complementary aspects of protein **conformational diversity** in known structures have been developed, notably by groups in Europe. These tools include algorithms with varying degree of sophistication, for aligning the 3D structures of individual protein chains or domains, of protein assemblies, and evaluating their degree of **structural similarity**. Using such tools one can **align structures pairwise**, compute the corresponding **similarity matrix**, and identify ensembles of **structures/conformations** with a defined **similarity level** that tend to recur in different PDB entries, an operation typically performed using **clustering** methods. Such workflows are at the basis of resources such as **CATH, Contemplate, or PDBflex** that offer access to **conformational ensembles** comprised of similar **conformations** clustered according to various criteria. Other types of tools focus on differences between **protein conformations**, identifying regions of proteins that undergo large **collective displacements** in different PDB entries, those that act as **hinges or linkers**, or regions that are inherently **flexible**.\r\n\r\nTo build a meaningful approximation of the **conformational landscape** of native proteins, the **conformational ensembles** (and the differences between them), identified on the basis of **structural similarity/dissimilarity** measures alone, need to be **biophysically characterized**. This may be approached at **two different levels**. \r\n- At the **biological level**, it is important to link observed **conformational ensembles**, to their **functional roles** by evaluating the correspondence with **protein family classifications** based on sequence information and **functional annotations** in public databases e.g. Uniprot, PDKe-Knowledge Base (KB). These links should provide valuable mechanistic insights into how the **conformational and dynamic properties** of proteins are exploited by evolution to regulate their **biological function**. \u003cbr\u003e\u003cbr\u003e\r\n\r\n- At the **physical level** one needs to introduce **energetic consideration** to evaluate the likelihood that the identified **conformational ensembles** represent **conformational states** that the protein (or domain under study) samples in isolation. Such evaluation is notoriously **challenging** and can only be roughly approximated by using **computational methods** to evaluate the extent to which the observed **conformational ensembles** can be reproduced by algorithms that simulate the **dynamic behavior** of protein systems. These algorithms include the computationally expensive **classical molecular dynamics (MD) simulations** to sample local thermal fluctuations but also faster more approximate methods such as **Elastic Network Models** and **Normal Node Analysis** (NMA) to model low energy **collective motions**. Alternatively, **enhanced sampling molecular dynamics** can be used to model complex types of **conformational changes** but at a very high computational cost. \r\n\r\nThe **ELIXIR 3D-Bioinfo Implementation Study** *Building on PDBe-KB to chart and characterize the conformation landscape of native proteins* focuses on:\r\n\r\n1. Mapping the **conformational diversity** of proteins and their homologs across the PDB. \r\n2. Characterize the different **flexibility properties** of protein regions, and link this information to sequence and functional annotation.\r\n3. Benchmark **computational methods** that can predict a biophysical description of protein motions.\r\n\r\nThis notebook is part of the third objective, where a list of **computational resources** that are able to predict **protein flexibility** and **conformational ensembles** have been collected, evaluated, and integrated in reproducible and interoperable workflows using the **BioExcel Building Blocks library**. Note that the list is not meant to be exhaustive, it is built following the expertise of the implementation study partners.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/487?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/487?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/487?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"488","url":"https://workflowhub.eu/workflows/488","name":"CWL Protein conformational ensembles generation","description":"# Protein Conformational ensembles generation\r\n\r\n## Workflow included in the [ELIXIR 3D-Bioinfo](https://elixir-europe.org/communities/3d-bioinfo) Implementation Study:\r\n\r\n### Building on PDBe-KB to chart and characterize the conformation landscape of native proteins\r\n\r\nThis tutorial aims to illustrate the process of generating **protein conformational ensembles** from** 3D structures **and analysing its **molecular flexibility**, step by step, using the **BioExcel Building Blocks library (biobb)**.\r\n\r\n## Conformational landscape of native proteins\r\n**Proteins** are **dynamic** systems that adopt multiple **conformational states**, a property essential for many **biological processes** (e.g. binding other proteins, nucleic acids, small molecule ligands, or switching between functionaly active and inactive states). Characterizing the different **conformational states** of proteins and the transitions between them is therefore critical for gaining insight into their **biological function** and can help explain the effects of genetic variants in **health** and **disease** and the action of drugs.\r\n\r\n**Structural biology** has become increasingly efficient in sampling the different **conformational states** of proteins. The **PDB** has currently archived more than **170,000 individual structures**, but over two thirds of these structures represent **multiple conformations** of the same or related protein, observed in different crystal forms, when interacting with other proteins or other macromolecules, or upon binding small molecule ligands. Charting this conformational diversity across the PDB can therefore be employed to build a useful approximation of the **conformational landscape** of native proteins.\r\n\r\nA number of resources and **tools** describing and characterizing various often complementary aspects of protein **conformational diversity** in known structures have been developed, notably by groups in Europe. These tools include algorithms with varying degree of sophistication, for aligning the 3D structures of individual protein chains or domains, of protein assemblies, and evaluating their degree of **structural similarity**. Using such tools one can **align structures pairwise**, compute the corresponding **similarity matrix**, and identify ensembles of **structures/conformations** with a defined **similarity level** that tend to recur in different PDB entries, an operation typically performed using **clustering** methods. Such workflows are at the basis of resources such as **CATH, Contemplate, or PDBflex** that offer access to **conformational ensembles** comprised of similar **conformations** clustered according to various criteria. Other types of tools focus on differences between **protein conformations**, identifying regions of proteins that undergo large **collective displacements** in different PDB entries, those that act as **hinges or linkers**, or regions that are inherently **flexible**.\r\n\r\nTo build a meaningful approximation of the **conformational landscape** of native proteins, the **conformational ensembles** (and the differences between them), identified on the basis of **structural similarity/dissimilarity** measures alone, need to be **biophysically characterized**. This may be approached at **two different levels**. \r\n- At the **biological level**, it is important to link observed **conformational ensembles**, to their **functional roles** by evaluating the correspondence with **protein family classifications** based on sequence information and **functional annotations** in public databases e.g. Uniprot, PDKe-Knowledge Base (KB). These links should provide valuable mechanistic insights into how the **conformational and dynamic properties** of proteins are exploited by evolution to regulate their **biological function**. \u003cbr\u003e\u003cbr\u003e\r\n\r\n- At the **physical level** one needs to introduce **energetic consideration** to evaluate the likelihood that the identified **conformational ensembles** represent **conformational states** that the protein (or domain under study) samples in isolation. Such evaluation is notoriously **challenging** and can only be roughly approximated by using **computational methods** to evaluate the extent to which the observed **conformational ensembles** can be reproduced by algorithms that simulate the **dynamic behavior** of protein systems. These algorithms include the computationally expensive **classical molecular dynamics (MD) simulations** to sample local thermal fluctuations but also faster more approximate methods such as **Elastic Network Models** and **Normal Node Analysis** (NMA) to model low energy **collective motions**. Alternatively, **enhanced sampling molecular dynamics** can be used to model complex types of **conformational changes** but at a very high computational cost. \r\n\r\nThe **ELIXIR 3D-Bioinfo Implementation Study** *Building on PDBe-KB to chart and characterize the conformation landscape of native proteins* focuses on:\r\n\r\n1. Mapping the **conformational diversity** of proteins and their homologs across the PDB. \r\n2. Characterize the different **flexibility properties** of protein regions, and link this information to sequence and functional annotation.\r\n3. Benchmark **computational methods** that can predict a biophysical description of protein motions.\r\n\r\nThis notebook is part of the third objective, where a list of **computational resources** that are able to predict **protein flexibility** and **conformational ensembles** have been collected, evaluated, and integrated in reproducible and interoperable workflows using the **BioExcel Building Blocks library**. Note that the list is not meant to be exhaustive, it is built following the expertise of the implementation study partners.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/488?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/488?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/488?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"489","url":"https://workflowhub.eu/workflows/489","name":"Validate a tool against IDR data: Load Image with labels from IDR, re-analyze using Cellpose","description":"IDR is based on OMERO and thus all what we show in this notebook can be easily adjusted for use against another OMERO server, e.g. your institutional OMERO server instance.\r\n\r\nThe main objective of this notebook is to demonstrate how public resources such as the IDR can be used to train your neural network or validate software tools.\r\n\r\nThe authors of the PLOS Biology paper, \"Nessys: A new set of tools for the automated detection of nuclei within intact tissues and dense 3D cultures\" published in August 2019: https://doi.org/10.1371/journal.pbio.3000388, considered several image segmenation packages, but they did not use the approach described in this notebook.\r\n\r\nWe will analyse the data using Cellpose and compare the output with the original segmentation produced by the authors. StarDist was not considered by the authors. Our workflow shows how public repository can be accessed and data inside it used to validate software tools or new algorithms.\r\n\r\nWe will use an image (id=6001247) referenced in the paper. The image can be viewed online in the Image Data Resource (IDR).\r\n\r\nWe will use a predefined model from Cellpose as a starting point. Steps to access data from IDR could be re-used if you wish to create a new model (outside the scope of this notebook).\r\n\r\n## Launch\r\nThis notebook uses the [environment_cellpose.yml](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_4/environment_cellpose.yml) file.\r\n\r\nSee [Setup](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_4/setup.md).","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/489?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"490","url":"https://workflowhub.eu/workflows/490","name":"Galaxy Protein conformational ensembles generation","description":"# Protein Conformational ensembles generation\r\n\r\n## Workflow included in the [ELIXIR 3D-Bioinfo](https://elixir-europe.org/communities/3d-bioinfo) Implementation Study:\r\n\r\n### Building on PDBe-KB to chart and characterize the conformation landscape of native proteins\r\n\r\nThis tutorial aims to illustrate the process of generating **protein conformational ensembles** from** 3D structures **and analysing its **molecular flexibility**, step by step, using the **BioExcel Building Blocks library (biobb)**.\r\n\r\n## Conformational landscape of native proteins\r\n**Proteins** are **dynamic** systems that adopt multiple **conformational states**, a property essential for many **biological processes** (e.g. binding other proteins, nucleic acids, small molecule ligands, or switching between functionaly active and inactive states). Characterizing the different **conformational states** of proteins and the transitions between them is therefore critical for gaining insight into their **biological function** and can help explain the effects of genetic variants in **health** and **disease** and the action of drugs.\r\n\r\n**Structural biology** has become increasingly efficient in sampling the different **conformational states** of proteins. The **PDB** has currently archived more than **170,000 individual structures**, but over two thirds of these structures represent **multiple conformations** of the same or related protein, observed in different crystal forms, when interacting with other proteins or other macromolecules, or upon binding small molecule ligands. Charting this conformational diversity across the PDB can therefore be employed to build a useful approximation of the **conformational landscape** of native proteins.\r\n\r\nA number of resources and **tools** describing and characterizing various often complementary aspects of protein **conformational diversity** in known structures have been developed, notably by groups in Europe. These tools include algorithms with varying degree of sophistication, for aligning the 3D structures of individual protein chains or domains, of protein assemblies, and evaluating their degree of **structural similarity**. Using such tools one can **align structures pairwise**, compute the corresponding **similarity matrix**, and identify ensembles of **structures/conformations** with a defined **similarity level** that tend to recur in different PDB entries, an operation typically performed using **clustering** methods. Such workflows are at the basis of resources such as **CATH, Contemplate, or PDBflex** that offer access to **conformational ensembles** comprised of similar **conformations** clustered according to various criteria. Other types of tools focus on differences between **protein conformations**, identifying regions of proteins that undergo large **collective displacements** in different PDB entries, those that act as **hinges or linkers**, or regions that are inherently **flexible**.\r\n\r\nTo build a meaningful approximation of the **conformational landscape** of native proteins, the **conformational ensembles** (and the differences between them), identified on the basis of **structural similarity/dissimilarity** measures alone, need to be **biophysically characterized**. This may be approached at **two different levels**. \r\n- At the **biological level**, it is important to link observed **conformational ensembles**, to their **functional roles** by evaluating the correspondence with **protein family classifications** based on sequence information and **functional annotations** in public databases e.g. Uniprot, PDKe-Knowledge Base (KB). These links should provide valuable mechanistic insights into how the **conformational and dynamic properties** of proteins are exploited by evolution to regulate their **biological function**. \u003cbr\u003e\u003cbr\u003e\r\n\r\n- At the **physical level** one needs to introduce **energetic consideration** to evaluate the likelihood that the identified **conformational ensembles** represent **conformational states** that the protein (or domain under study) samples in isolation. Such evaluation is notoriously **challenging** and can only be roughly approximated by using **computational methods** to evaluate the extent to which the observed **conformational ensembles** can be reproduced by algorithms that simulate the **dynamic behavior** of protein systems. These algorithms include the computationally expensive **classical molecular dynamics (MD) simulations** to sample local thermal fluctuations but also faster more approximate methods such as **Elastic Network Models** and **Normal Node Analysis** (NMA) to model low energy **collective motions**. Alternatively, **enhanced sampling molecular dynamics** can be used to model complex types of **conformational changes** but at a very high computational cost. \r\n\r\nThe **ELIXIR 3D-Bioinfo Implementation Study** *Building on PDBe-KB to chart and characterize the conformation landscape of native proteins* focuses on:\r\n\r\n1. Mapping the **conformational diversity** of proteins and their homologs across the PDB. \r\n2. Characterize the different **flexibility properties** of protein regions, and link this information to sequence and functional annotation.\r\n3. Benchmark **computational methods** that can predict a biophysical description of protein motions.\r\n\r\nThis notebook is part of the third objective, where a list of **computational resources** that are able to predict **protein flexibility** and **conformational ensembles** have been collected, evaluated, and integrated in reproducible and interoperable workflows using the **BioExcel Building Blocks library**. Note that the list is not meant to be exhaustive, it is built following the expertise of the implementation study partners.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/490?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"493","url":"https://workflowhub.eu/workflows/493","name":"Validate a tool against IDR data: Load Image with labels from IDR, re-analyze using StarDist","description":"The notebook shows how to load an IDR image with labels.\r\n\r\nThe image is referenced in the paper \"NesSys: a novel method for accurate nuclear segmentation in 3D\" published August 2019 in PLOS Biology: https://doi.org/10.1371/journal.pbio.3000388 and can be viewed online in the Image Data Resource.\r\n\r\nIn this notebook, the image is loaded together with the labels and analyzed using StarDist. The StarDist analysis produces a segmentation, which is then viewed side-by-side with the original segmentations produced by the authors of the paper obtained via the loaded labels.\r\n\r\n## Launch\r\nThis notebook uses the [environment_stardist.yml](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_4/environment_stardist.yml) file.\r\n\r\nSee [Setup](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_4/setup.md).","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/493?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"494","url":"https://workflowhub.eu/workflows/494","name":"Use Public Resources to answer a biological question","description":"## Learning Objectives\r\n- How to access genomic resource via its Python API\r\n- How to access image resource via its Python API\r\n- Relate image data to genomic data\r\n\r\n## Diabetes related genes expressed in pancreas\r\n\r\nThis notebook looks at the question **Which diabetes related genes are expressed in the pancreas?** Tissue and disease can be modified.\r\n\r\nSteps:\r\n\r\n- Query [humanmine.org](https://www.humanmine.org/humanmine), an integrated database of Homo sapiens genomic data using the intermine API to find the genes.\r\n- Using the list of found genes, search in the [Image Data Resource (IDR)](https://idr.openmicroscopy.org/) for images linked to the genes, tissue and disease.\r\n- Analyse the images found.\r\n\r\n## Launch\r\nThis notebook uses the [environment.yml](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_4/environment.yml) file.\r\n\r\nSee [Setup](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_4/setup.md).\r\n","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/494?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"495","url":"https://workflowhub.eu/workflows/495","name":"Load ome.zarr Image with labels from public S3 repositories, analyze in parallel using Cellpose and compare results","description":"## Learning objectives\r\n- Read data to analyse from an object store.\r\n- Analyse data in parallel using Dask.\r\n- Show how to use public resources to train neural network.\r\n- Load labels associated to the original data\r\n- Compare results with ground truth.\r\n\r\nThe authors of the PLOS Biology paper, \"Nessys: A new set of tools for the automated detection of nuclei within intact tissues and dense 3D cultures\" published in August 2019: https://doi.org/10.1371/journal.pbio.3000388, considered several image segmenation packages, but they did not use the approach described in this notebook.\r\n\r\nWe will analyse the data using [Cellpose](https://www.cellpose.org/) and compare the output with the original segmentation produced by the authors. Cellpose was not considered by the authors. Our workflow shows how public repository can be accessed and data inside it used to validate software tools or new algorithms.\r\n\r\nWe will use a predefined model from Cellpose as a starting point.\r\n\r\n## Launch\r\nThis notebook uses the [environment.yml](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_5/environment.yml) file.\r\n\r\nSee [Setup](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_5/setup.md).","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/495?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"496","url":"https://workflowhub.eu/workflows/496","name":"Load ome.zarr Image with labels from a public S3 repository, analyze using StarDist and compare results","description":"The image is referenced in the paper \"NesSys: a novel method for accurate nuclear segmentation in 3D\" published August 2019 in PLOS Biology: https://doi.org/10.1371/journal.pbio.3000388 and can be viewed online in the [Image Data Resource](https://idr.openmicroscopy.org/webclient/?show=image-6001247).\r\n\r\nThis original image was converted into the Zarr format. The analysis results produced by the authors of the paper were converted into labels and linked to the Zarr file which was placed into a public S3 repository.\r\n\r\nIn this notebook, the Zarr file is then loaded together with the labels from the S3 storage and analyzed using [StarDist](https://github.com/stardist/stardist). The StarDist analysis produces a segmentation, which is then viewed side-by-side with the original segmentations produced by the authors of the paper obtained via the loaded labels.\r\n\r\n## Launch\r\nThis notebook uses the [environment_stardist.yml](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_5/environment_stardist.yml) file.\r\n\r\nSee [Setup](https://github.com/ome/EMBL-EBI-imaging-course-05-2023/blob/main/Day_5/setup.md).","organization":"OME","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/496?version=1","name":"Version 1","author":["Jean-Marie Burel"],"descriptor_type":[]}]},{"id":"500","url":"https://workflowhub.eu/workflows/500","name":"Performance evaluation of Katdetectr and other kataegis detection packages","description":"# Introduction\r\n\r\nThis repository contains all the custom scripts used in the evaluation and comparison of [Katdetectr](https://github.com/ErasmusMC-CCBC/evaluation_katdetectr/tree/main) as described in the corresponding Technical Note (under submission).\r\n\r\n# Usage\r\n\r\nAll required files were deposited on [Zenodo](https://zenodo.org/record/6623289#.YqBxHi8Rr0o%5D).\r\nThese can directly be downloaded using `zen4R` and be used as input.\r\n\r\n```R\r\n# Increase the timeout (due to some large files).\r\noptions(timeout=5000)\r\n\r\n# Download the required files into the data/ folder (~1GB).\r\nzen4R::download_zenodo(doi = \"10.5281/zenodo.6810477\", path = 'data/')\r\n```","organization":"Katdetectr","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/500?version=1","name":"main @ fefbbdc","author":[],"descriptor_type":[]}]},{"id":"502","url":"https://workflowhub.eu/workflows/502","name":"BY-COVID WP5 T5.2 Baseline Use Case","description":"This publication corresponds to the Research Objects (RO) of the Baseline Use Case proposed in T.5.2 (WP5) in the BY-COVID project on “COVID-19 Vaccine(s) effectiveness in preventing SARS-CoV-2 infection”.","organization":"BY-COVID Baseline Use Case: SARS-CoV-2 Vaccine(s) effectiveness in preventing SARS-CoV-2 infection","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/502?version=1","name":"main @ 93d83af","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/502?version=2","name":"main @ ccc5926","author":[],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/502?version=3","name":"main @ ccc5926","author":[],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/502?version=4","name":"main @ 2f714d8","author":[],"descriptor_type":[]}]},{"id":"506","url":"https://workflowhub.eu/workflows/506","name":"Purge retained haplotypes using Purge-Dups","description":"## Purge dups\r\n\r\nThis snakemake pipeline is designed to be run using as input a contig-level genome and pacbio reads. This pipeline has been tested with `snakemake v7.32.4`. Raw long-read sequencing files and the input contig genome assembly must be given in the `config.yaml` file. To execute the workflow run:\r\n\r\n`snakemake --use-conda --cores N`\r\n\r\nOr configure the cluster.json and run using the `./run_cluster` command","organization":"Biodiversity Genomics Europe (general), ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/506?version=1","name":"Version 1","author":["Tom Brown"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/506?version=2","name":"Version 2","author":["Tom Brown"],"descriptor_type":["SMK"]}]},{"id":"510","url":"https://workflowhub.eu/workflows/510","name":"Metabolome Annotation Workflow (MAW)","description":"\r\nThis repository hosts Metabolome Annotation Workflow (MAW). The workflow takes MS2 .mzML format data files as an input in R. It performs spectral database dereplication using R Package Spectra and compound database dereplication using SIRIUS OR MetFrag . Final candidate selection is done in Python using RDKit and PubChemPy.","organization":"Metabolomics-Reproducibility","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/510?version=1","name":"main @ 1be2bd3","author":["Mahnoor Zulfiqar","Michael R. Crusoe","Luiz Gadelha"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/510?version=2","name":"main @ 2da7fba","author":["Mahnoor Zulfiqar","Michael R. Crusoe","Luiz Gadelha"],"descriptor_type":["CWL"]}]},{"id":"511","url":"https://workflowhub.eu/workflows/511","name":"Timing of spring events changes under modelled future climate scenarios in a mesotrophic lake","description":"Simulations and figures supporting the manuscript \"Timing of spring events changes under modelled future climate scenarios in a mesotrophic lake\"","organization":"Lake Erken modelling setup","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/511?version=1","name":"Version 1","author":["Jorrit Mesman"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/511?version=2","name":"Version 2","author":["Jorrit Mesman"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/511?version=3","name":"Version 3","author":["Jorrit Mesman"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/511?version=4","name":"Version 4","author":["Jorrit Mesman"],"descriptor_type":[]},{"id":"5","url":"https://workflowhub.eu/workflows/511?version=5","name":"Version 5","author":["Jorrit Mesman"],"descriptor_type":[]}]},{"id":"512","url":"https://workflowhub.eu/workflows/512","name":"scRNAseq: Load counts matrix","description":"Loads a single cell counts matrix into an annData format - adding a column called sample with the sample name.  (Input format - matrix.mtx, features.tsv and barcodes.tsv)","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/512?version=1","name":"main @ a95a4ee","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/512?version=2","name":"main @ 7581788","author":[],"descriptor_type":["GALAXY"]}]},{"id":"513","url":"https://workflowhub.eu/workflows/513","name":"scRNAseq: Count and Load with starSOLO","description":"Takes fastqs and reference data, to produce a single cell counts matrix into and save in annData format - adding a column called sample with the sample name.  ","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/513?version=1","name":"main @ a95a4ee","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/513?version=2","name":"main @ fe052c0","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/513?version=3","name":"main @ 7581788","author":[],"descriptor_type":["GALAXY"]}]},{"id":"514","url":"https://workflowhub.eu/workflows/514","name":"scRNAseq Single Sample Processing Counts Matrix","description":"Take a scRNAseq counts matrix from a single sample, and perform basic QC with scanpy. Then, do further processing by making a UMAP and clustering. Produces a processed AnnData \r\nobject.\r\n\r\nDepreciated: use individual workflows insead for multiple samples","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/514?version=1","name":"main @ a95a4ee","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/514?version=2","name":"main @ 03682c0","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/514?version=3","name":"main @ 7581788","author":[],"descriptor_type":["GALAXY"]}]},{"id":"515","url":"https://workflowhub.eu/workflows/515","name":"Java COMPSs LU Factorization for Sparse Matrices","description":"**Name:** SparseLU  \r\n**Contact Person:** support-compss@bsc.es  \r\n**Access Level:** public  \r\n**License Agreement:** Apache2  \r\n**Platform:** COMPSs  \r\n\r\n# Description\r\nThe Sparse LU application computes an LU matrix factorization on a sparse blocked matrix. The matrix size (number of blocks) and the block size are parameters of the application. \r\n\r\nAs the algorithm progresses, the area of the matrix that is accessed is smaller; concretely, at each iteration, the 0th row and column of the current matrix are discarded. On the other hand, due to the sparseness of the matrix, some of its blocks might not be allocated and, therefore, no work is generated for them.\r\n\r\nWhen executed with COMPSs, Sparse LU produces several types of task with different granularity and numerous dependencies between them.\r\n\r\n# Versions\r\nThere are three versions of Sparse LU, depending on the data types used to store the blocks.\r\n## Version 1\r\n''files'', where the matrix blocks are stored in files.\r\n## Version 2\r\n''objects'', where the matrix blocks are represented by objects.\r\n## Version 3\r\n''arrays'', where the matrix blocks are stored in arrays.\r\n\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss sparseLU.files.SparseLU numberOfBlocks blockSize\r\nruncompss sparseLU.objects.SparseLU numberOfBlocks blockSize\r\nruncompss sparseLU.arrays.SparseLU numberOfBlocks blockSize\r\n```\r\n\r\nwhere:\r\n  * numberOfBlocks: Number of blocks inside each matrix\r\n  * blockSize: Size of each block\r\n\r\n\r\n# Execution Example\r\n```\r\nruncompss sparseLU.objects.SparseLU 16 4 \r\nruncompss sparseLU.files.SparseLU 16 4\r\nruncompss sparseLU.arrays.SparseLU 16 4 \r\n```\r\n\r\n\r\n# Build\r\n## Option 1: Native java\r\n```\r\ncd application_sources/; javac src/main/java/sparseLU/*/*.java\r\ncd src/main/java/; jar cf sparseLU.jar sparseLU/\r\ncd ../../../; mv src/main/java/sparseLU.jar jar/\r\n```\r\n\r\n## Option 2: Maven\r\n```\r\ncd application_sources/\r\nmvn clean package\r\n```\r\n","organization":"Cluster Emergent del Cervell Humà, Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/515?version=1","name":"Version 1","author":["Jorge Ejarque"],"descriptor_type":[]}]},{"id":"516","url":"https://workflowhub.eu/workflows/516","name":"Life Science cross-RI (Research Infrastructure) project","description":"The project allowed us to manage and build structured code scripts on the Jupyter Notebook, a simple web application which is user-friendly, flexible to use in the research community. The script is developed to address the specific needs of research between different platforms of dataset.\r\nThese stakeholders have developed their own platforms for the annotation and standardisation of both data and metadata produced within their respective field.\r\n-The INFRAFRONTIER - European Mutant Mouse Archive (EMMA) comprises over 7200 mutant mouse lines that are extensively integrated and enriched with other public dataset.\r\n-The EU-OpenScreen offers compound screening protocols containing several metadata and will contribute to the development of tools for linking to the chemical entity database.\r\n-The IDR Image Data Resource is a public repository of reference image datasets from published scientific studies, where the community can submit, search and access high-quality bio-image data. \r\n-The CIM-XNAT is an XNAT deployment of the Molecular Imaging Center at UniTo that offers a suite of tools for uploading preclinical images.\r\nTo address the challenges of integrating several EU-RI datasets with focus on preclinical and discovery research bioimaging, our aim is to develop cross researching queries through a web based interface  to combine the resources of the RIs for integrating the information associated with data belonging to the involved RIs. Furthermore, the open-source tool provides users with free, open access to collections of datasets distributed over multiple sources that result from searches by specific keywords. \r\nThe script allows the cross research in different fields of research as: Species, Strain, Gene, Cell line, Disease model, Chemical Compound.\r\nThe novel aspects of this tool are mainly:\r\na) user friendly, e.g. the user has the flexibility to research among the dataset easily with a simple API, intuitive for researchers and biomedical users.  \r\nb) the possibility of making a research between different platforms and repositories, from a unique simple way. \r\nc) the workflow project follows the FAIR principles in the treatment of data and datasets. \r\nThe access to Notebook Jupyter needs the installation of Anaconda, which consents to open the web application. \r\nInside the Jupyter, the script was built using Python. The query code is also easy to download and share in a .ipynb file.\r\nA visual representation of the detailed results (dataset, metadata, information, query results) of the workflow can be printed immediately after the query run. \r\n","organization":"EOSC-Life WP3 OC Team, cross RI project","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/516?version=1","name":"Version 1","author":["Elisabetta Spinazzola"],"descriptor_type":[]}]},{"id":"517","url":"https://workflowhub.eu/workflows/517","name":"VVV2_align_SE","description":"SINGLE-END workflow. \r\nAlign reads on fasta reference/assembly using bwa mem, get a consensus, variants, mutation explanations. \r\n\r\nIMPORTANT: \r\n* For \"bcftools call\" consensus step, the --ploidy file is in \"Données partagées\" (Shared Data) and must be imported in your history to use the worflow by providing this file (tells bcftools to consider haploid variant calling). \r\n* SELECT the mot ADAPTED VADR MODEL for annotation (see vadr parameters).","organization":"ANSES-Ploufragan","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/517?version=1","name":"Version 2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"518","url":"https://workflowhub.eu/workflows/518","name":"VVV2_align_PE","description":"PAIRED-END workflow. Align reads on fasta reference/assembly using bwa mem, get a consensus, variants, mutation explanations.\r\n\r\nIMPORTANT: \r\n* For \"bcftools call\" consensus step, the --ploidy file is in \"Données partagées\" (Shared Data) and must be imported in your history to use the worflow by providing this file (tells bcftools to consider haploid variant calling). \r\n* SELECT THE MOST ADAPTED VADR MODEL for annotation (see vadr parameters).\r\n","organization":"ANSES-Ploufragan","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/518?version=1","name":"Version 2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"519","url":"https://workflowhub.eu/workflows/519","name":"SARS-CoV-2 Illumina Amplicon pipeline - SANBI - v1.2","description":"SARS-CoV-2 variant prediction using Read It And Keep, fastp, bbmap and iVar","organization":"SANBI Pathogen Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/519?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"520","url":"https://workflowhub.eu/workflows/520","name":"SARS-CoV-2 PostProcessing","description":"","organization":"SANBI Pathogen Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/520?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"521","url":"https://workflowhub.eu/workflows/521","name":"SARS-CoV-2 ONT Amplicon Sequencing SANBI 1.0","description":"","organization":"SANBI Pathogen Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/521?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"524","url":"https://workflowhub.eu/workflows/524","name":"CWL-based RNA-Seq workflow","description":"A CWL-based pipeline for processing RNA-Seq data (FASTQ format) and performing differential gene/transcript expression analysis. \r\n\r\nOn the respective GitHub folder are available:\r\n\r\n- The CWL wrappers for the workflow\r\n- A pre-configured YAML template, based on validation analysis of publicly available HTS data\r\n- A table of metadata (``mrna_cll_subsets_phenotypes.csv``), based on the same validation analysis, to serve as an input example for the design of comparisons during differential expression analysis\r\n\r\nBriefly, the workflow performs the following steps:\r\n\r\n1. Quality control of Illumina reads (FastQC)\r\n2. Trimming of the reads (e.g., removal of adapter and/or low quality sequences) (Trim galore)\r\n3. (Optional)  custom processing of the reads using FASTA/Q Trimmer (part of the FASTX-toolkit) \r\n4. Mapping to reference genome (HISAT2)\r\n5. Convertion of mapped reads from SAM (Sequence Alignment Map) to BAM (Binary Alignment Map) format (samtools)\r\n6. Sorting mapped reads based on chromosomal coordinates (samtools)\r\n\r\nSubsequently, two independent workflows are implemented for differential expression analysis at the transcript and gene level. \r\n\r\n**First**, following the [reference protocol](https://doi.org/10.1038/nprot.2016.095) for HISAT, StringTie and Ballgown transcript expression analysis, StringTie along with a reference transcript annotation GTF (Gene Transfer Format) file (if one is available) is used to:\r\n\r\n- Assemble transcripts for each RNA-Seq sample using the previous read alignments (BAM files)\r\n- Generate a global, non-redundant set of transcripts observed in any of the RNA-Seq samples\r\n- Estimate transcript abundances and generate read coverage tables for each RNA-Seq sample, based on the global, merged set of transcripts (rather than the reference) which is observed across all samples\r\n\r\nBallgown program is then used to load the coverage tables generated in the previous step and perform statistical analyses for differential expression at the transcript level. Notably, the StringTie - Ballgown protocol applied here was selected to include potentially novel transcripts in the analysis. \r\n\r\n**Second**, featureCounts is used to count reads that are mapped to selected genomic features, in this case genes by default, and generate a table of read counts per gene and sample. This table is passed as input to DESeq2 to perform differential expression analysis at the gene level. Both Ballgown and DESeq2 R scripts, along with their respective CWL wrappers, were designed to receive as input various parameters, such as experimental design, contrasts of interest, numeric thresholds, and hidden batch effects.\r\n","organization":"Biodata Analysis Group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/524?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"525","url":"https://workflowhub.eu/workflows/525","name":"CWL-based ChIP-Seq workflow","description":"A CWL-based pipeline for processing ChIP-Seq data (FASTQ format) and performing: \r\n\r\n- Peak calling\r\n- Consensus peak count table generation\r\n- Detection of super-enhancer regions\r\n- Differential binding analysis\r\n\r\nOn the respective GitHub folder are available:\r\n\r\n- The CWL wrappers for the workflow\r\n- A pre-configured YAML template, based on validation analysis of publicly available HTS data\r\n- Tables of metadata (``EZH2_metadata_CLL.csv`` and ``H3K27me3_metadata_CLL.csv``), based on the same validation analysis, to serve as input examples for the design of comparisons during differential binding analysis\r\n- A list of ChIP-Seq blacklisted regions (human genome version 38; hg38) from the ENCODE project, which is can be used as input for the workflow, is provided in BED format (``hg38-blacklist.v2.bed``)\r\n\r\nBriefly, the workflow performs the following steps:\r\n\r\n1. Quality control of short reads (FastQC)\r\n2. Trimming of the reads (e.g., removal of adapter and/or low quality sequences) (Trimmomatic)\r\n3. Mapping to reference genome (HISAT2)\r\n5. Convertion of mapped reads from SAM (Sequence Alignment Map) to BAM (Binary Alignment Map) format (samtools)\r\n6. Sorting mapped reads based on chromosomal coordinates (samtools)\r\n7. Adding information regarding paired end reads (e.g., CIGAR field information) (samtools)\r\n8. Re-sorting based on chromosomal coordinates (samtools)\r\n9. Removal of duplicate reads (samtools)\r\n10. Index creation for coordinate-sorted BAM files to enable fast random access (samtools)\r\n11. Production of quality metrics and files for the inspection of the mapped ChIP-Seq reads, taking into consideration the experimental design (deeptools2):\r\n - Read coverages for genomic regions of two or more BAM files are computed (multiBamSummary). The results are produced in compressed numpy array (NPZ) format and are used to calculate and visualize pairwise correlation values between the read coverages (plotCorrelation). \r\n - Estimation of sequencing depth, through genomic position (base pair) sampling, and visualization is performed for multiple BAM files (plotCoverage).\r\n - Cumulative read coverages for each indexed BAM file are plotted by counting and sorting all reads overlapping a “window” of specified length (plotFingerprint).\r\n - Production of coverage track files (bigWig), with the coverage calculated as the number of reads per consecutive windows of predefined size (bamCoverage), and normalized through various available methods (e.g., Reads Per Kilobase per Million mapped reads; RPKM). The coverage track files are used to calculate scores per selected genomic regions (computeMatrix), typically genes, and a heatmap, based on the scores associated with these genomic regions, is produced (plotHeatmap).\r\n12. Calling potential binding positions (peaks) to the genome (peak calling) (MACS2)\r\n13. Generation of consensus peak count table for the application of custom analyses on MACS2 peak calling results (bedtools)\r\n14. Detection of super-enhancer regions (Rank Ordering of Super-Enhancers; ROSE)\r\n15. Differential binding analyses (DiffBind) for:\r\n - MACS2 peak calling results\r\n - ROSE-detected super-enhancer regions \r\n ","organization":"Biodata Analysis Group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/525?version=1","name":"Version 1","author":["Konstantinos Kyritsis"],"descriptor_type":["CWL"]}]},{"id":"526","url":"https://workflowhub.eu/workflows/526","name":"CWL-based (multi-sample) workflow for germline variant calling","description":"A CWL-based pipeline for calling small germline variants, namely SNPs and small INDELs, by processing data from Whole-genome Sequencing (WGS) or Targeted Sequencing (e.g., Whole-exome sequencing; WES) experiments. \r\n\r\nOn the respective GitHub folder are available:\r\n\r\n- The CWL wrappers and subworkflows for the workflow\r\n- A pre-configured YAML template, based on validation analysis of publicly available HTS data\r\n\r\nBriefly, the workflow performs the following steps:\r\n\r\n1. Quality control of Illumina reads (FastQC)\r\n2. Trimming of the reads (e.g., removal of adapter and/or low quality sequences) (Trim galore)\r\n3. Mapping to reference genome (BWA-MEM)\r\n4. Convertion of mapped reads from SAM (Sequence Alignment Map) to BAM (Binary Alignment Map) format (samtools)\r\n5. Sorting mapped reads based on read names (samtools)\r\n6. Adding information regarding paired end reads (e.g., CIGAR field information) (samtools)\r\n7. Re-sorting mapped reads based on chromosomal coordinates (samtools)\r\n8. Adding basic Read-Group information regarding sample name, platform unit, platform (e.g., ILLUMINA), library and identifier (picard AddOrReplaceReadGroups)\r\n9. Marking PCR and/or optical duplicate reads (picard MarkDuplicates)\r\n10. Collection of summary statistics (samtools) \r\n11. Creation of indexes for coordinate-sorted BAM files to enable fast random access (samtools)\r\n12. Splitting the reference genome into a predefined number of intervals for parallel processing (GATK SplitIntervals)\r\n\r\nAt this point the application of multi-sample workflow follows, during which multiple samples are concatenated into a single, unified VCF (Variant Calling Format) file, which contains the variant information for all samples:\r\n\r\n13. Application of Base Quality Score Recalibration (BQSR) (GATK BaseRecalibrator and ApplyBQSR tools)\r\n14. Variant calling in gVCF (genomic VCF) mode (-ERC GVCF) (GATK HaplotypeCaller)  \r\n15. Merging of all genomic interval-split gVCF files for each sample (GATK MergeVCFs)\r\n16. Generation of the unified VCF file (GATK CombineGVCFs and GenotypeGVCFs tools)\r\n17. Separate annotation for SNP and INDEL variants, using the Variant Quality Score Recalibration (VQSR) method (GATK VariantRecalibrator and ApplyVQSR tools)\r\n18. Variant filtering based on the information added during VQSR and/or custom filters (bcftools)\r\n19. Normalization of INDELs (split multiallelic sites) (bcftools)\r\n20. Annotation of the final dataset of filtered variants with genomic, population-related and/or clinical information (ANNOVAR)\r\n","organization":"Biodata Analysis Group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/526?version=1","name":"Version 1","author":["Konstantinos Kyritsis"],"descriptor_type":["CWL"]}]},{"id":"527","url":"https://workflowhub.eu/workflows/527","name":"CWL-based (single-sample) workflow for germline variant calling","description":"A CWL-based pipeline for calling small germline variants, namely SNPs and small INDELs, by processing data from Whole-genome Sequencing (WGS) or Targeted Sequencing (e.g., Whole-exome sequencing; WES) experiments.\r\n\r\nOn the respective GitHub folder are available:\r\n\r\n- The CWL wrappers and subworkflows for the workflow\r\n- A pre-configured YAML template, based on validation analysis of publicly available HTS data\r\n\r\nBriefly, the workflow performs the following steps:\r\n\r\n1. Quality control of Illumina reads (FastQC)\r\n2. Trimming of the reads (e.g., removal of adapter and/or low quality sequences) (Trim galore)\r\n3. Mapping to reference genome (BWA-MEM)\r\n4. Convertion of mapped reads from SAM (Sequence Alignment Map) to BAM (Binary Alignment Map) format (samtools)\r\n5. Sorting mapped reads based on read names (samtools)\r\n6. Adding information regarding paired end reads (e.g., CIGAR field information) (samtools)\r\n7. Re-sorting mapped reads based on chromosomal coordinates (samtools)\r\n8. Adding basic Read-Group information regarding sample name, platform unit, platform (e.g., ILLUMINA), library and identifier (picard AddOrReplaceReadGroups)\r\n9. Marking PCR and/or optical duplicate reads (picard MarkDuplicates)\r\n10. Collection of summary statistics (samtools) \r\n11. Creation of indexes for coordinate-sorted BAM files to enable fast random access (samtools)\r\n12. Splitting the reference genome into a predefined number of intervals for parallel processing (GATK SplitIntervals)\r\n\r\nAt this point the application of single-sample workflow follows, during which multiple samples are accepted as input and they are not merged into a unified VCF file but are rather processed separately in each step of the workflow, leading to the production of a VCF file for each sample:\r\n\r\n13. Application of Base Quality Score Recalibration (BQSR) (GATK BaseRecalibrator, GatherBQSRReports and ApplyBQSR tools)\r\n14. Variant calling (GATK HaplotypeCaller)  \r\n15. Merging of all genomic interval-split gVCF files for each sample (GATK MergeVCFs)\r\n16. Separate annotation of SNPs and INDELs based on pretrained Convolutional Neural Network (CNN) models (GATK SelectVariants, CNNScoreVariants and FilterVariantTranches tools)\r\n17. (Optional) Independent step of hard-filtering (GATK VariantFiltration)\r\n18. Variant filtering based on the information added during VQSR and/or custom filters (bcftools)\r\n19. Normalization of INDELs (split multiallelic sites) (bcftools)\r\n20. Annotation of the final dataset of filtered variants with genomic, population-related and/or clinical information (ANNOVAR)\r\n","organization":"Biodata Analysis Group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/527?version=1","name":"Version 1","author":["Konstantinos Kyritsis"],"descriptor_type":["CWL"]}]},{"id":"528","url":"https://workflowhub.eu/workflows/528","name":"prepareChIPs:","description":"# prepareChIPs\r\n\r\nThis is a simple `snakemake` workflow template for preparing **single-end** ChIP-Seq data.\r\nThe steps implemented are:\r\n\r\n1. Download raw fastq files from SRA\r\n2. Trim and Filter raw fastq files using `AdapterRemoval`\r\n3. Align to the supplied genome using `bowtie2`\r\n4. Deduplicate Alignments using `Picard MarkDuplicates`\r\n5. Call Macs2 Peaks using `macs2`\r\n\r\nA pdf of the rulegraph is available [here](workflow/rules/rulegraph.pdf)\r\n\r\nFull details for each step are given below.\r\nAny additional parameters for tools can be specified using `config/config.yml`, along with many of the requisite paths\r\n\r\nTo run the workflow with default settings, simply run as follows (after editing `config/samples.tsv`)\r\n\r\n```bash\r\nsnakemake --use-conda --cores 16\r\n```\r\n\r\nIf running on an HPC cluster, a snakemake profile will required for submission to the queueing system and appropriate resource allocation.\r\nPlease discuss this will your HPC support team.\r\nNodes may also have restricted internet access and rules which download files may not work on many HPCs.\r\nPlease see below or discuss this with your support team\r\n\r\nWhilst no snakemake wrappers are explicitly used in this workflow, the underlying scripts are utilised where possible to minimise any issues with HPC clusters with restrictions on internet access.\r\nThese scripts are based on `v1.31.1` of the snakemake wrappers\r\n\r\n### Important Note Regarding OSX Systems\r\n\r\nIt should be noted that this workflow is **currently incompatible with OSX-based systems**. \r\nThere are two unsolved issues\r\n\r\n1. `fasterq-dump` has a bug which is specific to conda environments. This has been updated in v3.0.3 but this patch has not yet been made available to conda environments for OSX. Please check [here](https://anaconda.org/bioconda/sra-tools) to see if this has been updated.\r\n2. The following  error appears in some OSX-based R sessions, in a system-dependent manner:\r\n```\r\nError in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y,  : \r\n  polygon edge not found\r\n```\r\n\r\nThe fix for this bug is currently unknown\r\n\r\n## Download Raw Data\r\n\r\n### Outline\r\n\r\nThe file `samples.tsv` is used to specify all steps for this workflow.\r\nThis file must contain the columns: `accession`, `target`, `treatment` and `input`\r\n\r\n1. `accession` must be an SRA accession. Only single-end data is currently supported by this workflow\r\n2. `target` defines the ChIP target. All files common to a target and treatment will be used to generate summarised coverage in bigWig Files\r\n3. `treatment` defines the treatment group each file belongs to. If only one treatment exists, simply use the value 'control' or similar for every file\r\n4. `input` should contain the accession for the relevant input sample. These will only be downloaded once. Valid input samples are *required* for this workflow\r\n\r\nAs some HPCs restrict internet access for submitted jobs, *it may be prudent to run the initial rules in an interactive session* if at all possible.\r\nThis can be performed using the following (with 2 cores provided as an example)\r\n\r\n```bash\r\nsnakemake --use-conda --until get_fastq --cores 2\r\n```\r\n\r\n### Outputs\r\n\r\n- Downloaded files will be gzipped and written to `data/fastq/raw`.\r\n- `FastQC` and `MultiQC` will also be run, with output in `docs/qc/raw`\r\n\r\nBoth of these directories are able to be specified as relative paths in `config.yml`\r\n\r\n## Read Filtering\r\n\r\n### Outline\r\n\r\nRead trimming is performed using [AdapterRemoval](https://adapterremoval.readthedocs.io/en/stable/).\r\nDefault settings are customisable using config.yml, with the defaults set to discard reads shorter than 50nt, and to trim using quality scores with a threshold of Q30.\r\n\r\n### Outputs\r\n\r\n- Trimmed fastq.gz files will be written to `data/fastq/trimmed`\r\n- `FastQC` and `MultiQC` will also be run, with output in `docs/qc/trimmed`\r\n- AdapterRemoval 'settings' files will be written to `output/adapterremoval`\r\n\r\n## Alignments\r\n\r\n### Outline\r\n\r\nAlignment is performed using [`bowtie2`](https://bowtie-bio.sourceforge.net/bowtie2/manual.shtml) and it is assumed that this index is available before running this workflow.\r\nThe path and prefix must be provided using config.yml\r\n\r\nThis index will also be used to produce the file `chrom.sizes` which is essential for conversion of bedGraph files to the more efficient bigWig files.\r\n\r\n### Outputs\r\n\r\n- Alignments will be written to `data/aligned`\r\n- `bowtie2` log files will be written to `output/bowtie2` (not the conenvtional log directory)\r\n- The file `chrom.sizes` will be written to `output/annotations`\r\n\r\nBoth sorted and the original unsorted alignments will be returned.\r\nHowever, the unsorted alignments are marked with `temp()` and can be deleted using \r\n\r\n```bash\r\nsnakemake --delete-temp-output --cores 1\r\n```\r\n\r\n## Deduplication\r\n\r\n### Outline\r\n\r\nDeduplication is performed using [MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) from the Picard set of tools.\r\nBy default, deduplication will remove the duplicates from the set of alignments.\r\nAll resultant bam files will be sorted and indexed.\r\n\r\n### Outputs\r\n\r\n- Deduplicated alignments are written to `data/deduplicated` and are indexed\r\n- DuplicationMetrics files are written to `output/markDuplicates`\r\n\r\n## Peak Calling\r\n\r\n### Outline\r\n\r\nThis is performed using [`macs2 callpeak`](https://pypi.org/project/MACS2/).\r\n\r\n- Peak calling will be performed on:\r\n    a. each sample individually, and \r\n    b. merged samples for those sharing a common ChIP target and treatment group.\r\n- Coverage bigWig files for each individual sample are produced using CPM values (i.e. Signal Per Million Reads, SPMR)\r\n- For all combinations of target and treatment coverage bigWig files are also produced, along with fold-enrichment bigWig files\r\n\r\n### Outputs\r\n\r\n- Individual outputs are written to `output/macs2/{accession}`\r\n\t+ Peaks are written in `narrowPeak` format along with `summits.bed`\r\n\t+ bedGraph files are automatically converted to bigWig files, and the originals are marked with `temp()` for subsequent deletion\r\n\t+ callpeak log files are also added to this directory\r\n- Merged outputs are written to `output/macs2/{target}/`\r\n\t+ bedGraph Files are also converted to bigWig and marked with `temp()`\r\n\t+ Fold-Enrichment bigWig files are also created with the original bedGraph files marked with `temp()`\r\n","organization":"Black Ochre Data Labs","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/528?version=1","name":"v0.1.0","author":["Stevie Pederson"],"descriptor_type":["SMK"]}]},{"id":"541","url":"https://workflowhub.eu/workflows/541","name":"Sample workflow that combines simulations with data analytics.","description":"Sample workflow template that combines simulations with data analytics. It is not a real workflow, but it mimics this type of workflows. It illustrates how COMPSs invokes binaries. It can be extended to invoke MPI applications. ","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/541?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"546","url":"https://workflowhub.eu/workflows/546","name":"ARA (Automated Record Analysis)","description":"## ARA (Automated Record Analysis) : An automatic pipeline for exploration of SRA datasets with sequences as a query\r\n\r\n### Requirements\r\n\r\n- **Docker**\r\n\r\n  - Please checkout the [Docker installation](https://docs.docker.com/get-docker/) guide.\r\n\r\n    _or_\r\n\r\n- **Mamba package manager**\r\n\r\n  - Please checkout the [mamba or micromamba](https://mamba.readthedocs.io/en/latest/installation.html) official installation guide.\r\n\r\n  - We prefer `mamba` over [`conda`](https://docs.conda.io/en/latest/) since it is faster and uses `libsolv` to effectively resolve the dependencies.\r\n\r\n  - `conda` can still be used to install the pipeline using the same commands as described in the installation section.\r\n\r\n    \u003e Note: **It is important to include the 'bioconda' channel in addition to the other channels as indicated in the [official manual](https://bioconda.github.io/#usage \"Bioconda - Usage\")**. Use the following commands in the given order to configure the channels (one-time setup).\r\n    \u003e\r\n    \u003e ```bash\r\n    \u003e conda config --add channels defaults\r\n    \u003e conda config --add channels bioconda\r\n    \u003e conda config --add channels conda-forge\r\n    \u003e conda config --set channel_priority strict\r\n    \u003e ```\r\n\r\n---\r\n\r\n### Installation\r\n\r\nThe user can install the pipeline by using either Docker or Mamba using the steps mentioned below.\r\n\r\nFirst, click the green \"Code\" button, then select \"Download Zip\" to begin downloading the contents of this repository. Once the download is complete, extract the zip file by into the desired location before starting the setup. Please use the commands shown below to begin installing the pipeline.\r\n\r\nAlternatively, the github repo can also be cloned through the options shown after clicking the \"Code\" button. Navigate inside the folder after by using the `cd ARA/` command before starting the setup.\r\n\r\n\u003e _Warning: Before starting any analysis with the pipeline, please make sure that the system has enough disk space available for the data you wish to retrieve and process from the SRA repository._\r\n\r\n- **Using Docker**\r\n\r\n  ```bash\r\n  cd ARA-main/\r\n  docker build -t ara_img .\r\n  ```\r\n\r\n_or_\r\n\r\n- **Using Mamba**\r\n\r\n  ```bash\r\n  cd ARA-main/\r\n  mamba env create --file requirements.yaml\r\n  mamba activate ara_env\r\n  perl setup.pl\r\n  ```\r\n\r\n  \u003e _Note: After installation, the virtual environment consumes approximately 1.5 GB of disk space. The installation was tested on \"Ubuntu 20.04.4 LTS\", \"Ubuntu 22.04.1 LTS\" and \"Fedora 37\" using the procedure mentioned above._\r\n\r\nPlease be patient because downloading and configuring the tools/modules may take several minutes. The warning messages that appear during the installation of certain Perl modules can be ignored by users.\r\n\r\nOptional: The user can also add the current directory to PATH for ease of use. Use the `chmod +x ara.pl` followed by `export PATH=\"$(pwd):$PATH\"` command. Alternatively, the user is free to create symbolic, copy the executable to `/bin/`, or use any other method depending on their operating system.\r\n\r\nRefer the 'Troubleshooting' section in case of any installation related issues.\r\n\r\n---\r\n\r\n### Example usage\r\n\r\n- **Docker**\r\n\r\n  `docker run -it ara_img /home/ARA-main/ara.pl --input /home/ARA-main/example/SraRunInfo.csv --sequences /home/ARA-main/example/Arabidopsis_thaliana.TAIR10.ncrna.fa`\r\n\r\n- **Mamba environment**\r\n\r\n  `perl ara.pl --input example/SraRunInfo.csv --sequences example/Arabidopsis_thaliana.TAIR10.ncrna.fa`\r\n\r\nTo get full usage info: `perl ara.pl --help`\r\n\r\n\u003e _Note_: The user can delete the contents of `results/` directory after testing the tool using the example mentioned above.\r\n\r\n### Configuration file\r\n\r\nThe configuration file `conf.txt` is automatically generated during the installation by setup script. It contains certain default parameters as well as the location to the executable binaries of the tools incorporated in the pipeline.\r\n\r\nThe user can modify the default parameters in `conf.txt` and pass it to the pipeline as an input. For example, the `data_perc` option in the configuration refers to the default value of 5% of the dataset selected for analysis. However, the user has the flexibility to provide any integer value between 1 and 100 to specify the desired percentage of the dataset to be used.\r\n\r\nSimilarly, the user can choose between _blastn_ or _bowtie2_ by changing the 'execute flag' to either 0 or 1 in the configuration file while leaving the rest of the parameters to default values. By default, both the tools are enabled _ie_. `execute = 1`.\r\n\r\nThe `read_drop_perc_cutoff` in `conf.txt` config file denotes the cutoff to discard a sample if the total reads left after executing the trimmomatic are higher than the threshold (by default, if the more than 70% of reads are dropped as per the trimmomatic log, then the sample will fail the quality criteria and will not be processed downstream). Please refer the documentation of [Trimmomatic ](https://github.com/usadellab/Trimmomatic) for more details about the parameters present in the config file.\r\n\r\nSimilarly, the criteria to check the minimal alignment rate are indicated by the `alignment perc cutoff` parameter under blastn and bowtie2 in the `conf.txt` configuration file (if the total alignment percentage is less than the threshold then the pipeline will report that the sample failed the quality criteria). More details about the parameters used in the `conf.txt` file can be found in the respective documentations of [Blastn](https://www.ncbi.nlm.nih.gov/books/NBK279690/) and [Bowtie2](https://bowtie-bio.sourceforge.net/bowtie2/manual.shtml).\r\n\r\nBy default, the pipeline uses a pre-built Kraken2 viral genomic database ([release: 9/8/2022](https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20220908.tar.gz)) from \u003chttps://benlangmead.github.io/aws-indexes/k2\u003e. Users can provide their own database by changing the `kraken2_db_path` parameter in the `conf.txt` file.\r\n\r\n\u003e _Note:_ If the user wishes to use a different installation than Bioconda, the user can manually install the required tools and specify the absolute path of the executable binaries in the configuration.\r\n\r\n---\r\n\r\n### Pipeline parameters\r\n\r\n- **`--input`** (mandatory) The user can provide input in either of the following ways:\r\n\r\n  - A single SRA run accession. eg: **`perl ara.pl --input SRR12548227 --sequences example/Arabidopsis_thaliana.TAIR10.ncrna.fa`**\r\n\r\n  - A list of run accessions in a text file (1 run accession per line). eg: **`perl ara.pl --input example/list.txt --sequences example/Arabidopsis_thaliana.TAIR10.ncrna.fa`**\r\n\r\n  - The SRA runInfo exported directly from the NCBI-SRA web portal. Goto the [SRA homepage](https://www.ncbi.nlm.nih.gov/sra \"Home - NCBI - SRA\") and search for the desired keyword. Export the `SraRunInfo.csv` by clicking 'Send to' =\\\u003e File =\\\u003e RunInfo). eg: **`perl ara.pl --input example/SraRunInfo.csv --sequences example/Arabidopsis_thaliana.TAIR10.ncrna.fa`**\r\n\r\n- **`--sequences`** (mandatory) The user should provide a fasta file containing the query sequences.\r\n\r\n- **`--output`** (optional) The output directory to store the results. By default, the output will be stored into the **`results/`** directory of the package. eg: **`perl ara.pl --input example/SraRunInfo.csv --sequences example/Arabidopsis_thaliana.TAIR10.ncrna.fa --output /src/main/test/`**\r\n\r\n- **`--mode`** (optional) Choose one of the three modes to run the pipeline.\r\n\r\n  - The **`screen`** is the default mode which will only download a fraction of the data-set per SRA-run accession and analyse the file as per the given configuration.\r\n\r\n  - The **`full`** mode will execute the pipeline by downloading the complete fastq file per SRA-run accession.\r\n\r\n  - The **`both`** option searches for samples using a fraction of the data that meet the minimum alignment cutoff from either 'bowtie2' or 'blastn', and then automatically performs alignment by downloading the entire fastq file. eg: **`perl ara.pl --input example/SraRunInfo.csv --sequences example/Arabidopsis_thaliana.TAIR10.ncrna.fa --output /src/main/test/ --mode screen`**\r\n\r\n    \u003e _Note:_ There is a supporting **`summary`** mode, that will generate a unified alignment summary by examining the output files created by either screen-mode or full-mode. The summary mode should only be used when the user needs to recreate the summary stats from the pre-existing results. The user must enter **`–mode summary`** along with the previously used command parameters to re-generate the summary.\r\n\r\n  - **`--config`** (optional) Pipeline configuration. By default it will use the **`conf.txt`** generated by the setup script. eg: **`perl ara.pl --input example/SraRunInfo.csv --sequences example/Arabidopsis_thaliana.TAIR10.ncrna.fa --output /src/main/test/ --mode screen --config conf.txt`**\r\n\r\n---\r\n\r\n### Output structure\r\n\r\nThe pipeline will create folders per SRA run accession and generate results using the run accession as the prefix. The analysis related to the screening a fraction of data will be stored in `screening_results` directory whereas the analysis conducted on the whole dataset will be stored in `full_analyis_results` directory.\r\n\r\nAn outline of directory structure containing the results is shown below-\r\n\r\n    results/\r\n    `-- test/ (name derived from the input fasta sequence file)\r\n        |-- test.screening.analysis.stats.sorted.by.alignment.txt (combined metadata and analysis report generated after processing all the SRA run accessions, sorted in decreasing order of total alignment percentage)\r\n        |-- metadata/\r\n        |   |-- test.metadata.txt (Combined metadata downloaded from SRA)\r\n        |   |-- test.metadata.screened.txt (List of SRA accessions which qualify the filter criteria specified in the config.)\r\n        |   |-- SRA_RUN.run.metadata.txt (unprocessed metadata on a single SRA accession as retrieved from NCBI)\r\n        |-- reference/\r\n        |   |-- blastn_db/ (folder containing the blast database created from the input fasta sequence)\r\n        |   |-- bowtie2_index/ (folder containing the bowtie index created from the input fasta sequence)\r\n        |   |-- bowtie2_index.stdout.txt (stdout captured from bowtie2 index creation)\r\n        |   `-- makeblastdb.stdout.txt (stdout captured from blastn database creation)\r\n        `-- screening_results/ (similar structure for screeing or full mode)\r\n            |-- SRA_RUN/ (each SRA run accession will be processed into a seperate folder)\r\n            |   |-- blastn/\r\n            |   |   |-- SRA_RUN.blast.results.txt (output from NCBI Blastn)\r\n            |   |   `-- blast.stats.txt (blastn overall alignment stats)\r\n            |   |-- bowtie2/\r\n            |   |   |-- SRA_RUN.bam (output from bowtie2)\r\n            |   |   |-- alignment.stats.txt (bowtie2 stdout)\r\n            |   |   `-- alignment.txt (bowtie2 overall alignment summary)\r\n            |   |-- fastQC/\r\n            |   |   |-- \u003cRaw data FastQC report\u003e\r\n            |   |   |-- \u003cAdapter trimmed FastQC report\u003e\r\n            |   |-- kraken2/\r\n            |   |   |-- SRA_RUN.kraken (kraken2 standard classification table)\r\n            |   |   |-- SRA_RUN.report (kraken2 classification report)\r\n            |   |   `-- SRA_RUN.stdout.txt (kraken2 stdout)\r\n            |   |-- raw_fastq/\r\n            |   |   |-- \u003cDownloaded single end or paired end fastq file(s)\u003e\r\n            |   |   |-- fastq_dump.stdout.txt\r\n            |   |   |-- sra/\r\n            |   |   `-- wget.full.sra.stdout.txt\r\n            |   `-- trimmed_data/\r\n            |       |-- \u003cAdapter trimmed single end or paired end fastq file(s)\u003e\r\n            |       `-- SRA_RUN_trim_stdout_log.txt (trimmomatic stdout)\r\n            `-- runlog.SRA_RUN.txt (Complete run log of the pipeline per SRA run accession)\r\n\r\nFor a thorough understanding of the results of the third-party tools, take a look at the following documentations:\r\n\r\n- [Blastn](https://www.ncbi.nlm.nih.gov/books/NBK279690/)\r\n- [Bowtie2](https://bowtie-bio.sourceforge.net/bowtie2/manual.shtml)\r\n- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\r\n- [Kraken2](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown)\r\n- [Trimmomatic](https://github.com/usadellab/Trimmomatic)\r\n\r\n---\r\n\r\n### Disk usage using the input from the example\r\n\r\nThe table below provides a summary of the disk usage for different analyses conducted on varying dataset sizes. It demonstrates how disk usage can increase depending on the choice of the fraction of the dataset the user wishes to analyze.\r\n\r\n| RUN ACCESSION | 100% of dataset | 5% of dataset | 10% of dataset |\r\n| ------------- | --------------- | ------------- | -------------- |\r\n| SRR8392720    | 1.3G            | 85M           | 156M           |\r\n| SRR7289585    | 1.4G            | 150M          | 288M           |\r\n| SRR12548227   | 15M             | 9.0M          | 9.1M           |\r\n\r\nThis summary highlights how the disk usage (in megabytes or gigabytes) can vary depending on the chosen fraction of the dataset for analysis.\r\n\r\n---\r\n\r\n### Troubleshooting\r\n\r\n- Errors related to mamba/conda environment:\r\n\r\n  Since `mamba` is a drop-in replacement and uses the same commands and configuration options as **conda**, it's possible to swap almost all commands between **conda** \u0026 **mamba**.\r\n\r\n  Use **`conda list`** command to verify whether the packages mentioned in the `requirements.yaml` are successfully installed into your environment.\r\n\r\n  \u003e _Note:_ The `requirements.yaml` provided in this package was exported from `mamba 0.25.0` installation running on `Ubuntu 20.04.4 LTS`.\r\n\r\n  In case of any missing tool/ conflicting dependencies in the environment, the user can try using **`conda search \u003ctool name\u003e`** or `mamba repoquery search \u003ctool name\u003e` command to find the supported version of the tool and then manually install it by typing **`conda install \u003ctool name\u003e`** or `mamba install \u003ctool name\u003e` inside the environment. Please refer the official [troubleshooting guide](https://conda.io/projects/conda/en/latest/user-guide/troubleshooting.html \"User guide » Troubleshooting\") for further help.\r\n\r\n  \u003e _Note:_ On macOS and Linux, the supported tools and their dependencies aren't always the same. Even when all of the requirements are completely aligned, the set of available versions isn't necessarily the same. User may try setting up the environment using any of the supplementary `requirements-*.txt` provided in the `src/main/resources/` directory.\r\n\r\n- Error installing Perl modules:\r\n\r\n  Users must ensure that they have write permission to the `/Users/\\*/.cpan/` or similar directory, and the CPAN is properly configured.\r\n\r\n  You might need to define the PERLLIB/PERL5LIB environment variable if you see an error similar to the following:\r\n\r\n  ```bash\r\n      Cant locate My/Module.pm in @INC (@INC contains:\r\n      ...\r\n      ...\r\n      .).\r\n      BEGIN failed--compilation aborted.\r\n  ```\r\n\r\n  \u003e _Note about MAKE_: 'make' is an essential tool for building Perl modules. Please make sure that you have 'make' installed in your system. The setup script provided in this package utilizes 'cpan' to build the required Perl modules automatically.\r\n\r\n  If the automatic setup provided in the package fails to install the required dependencies, you may need to install them manually by using the command `cpan install \u003cmodule name\u003e` or searching the package on [Metacpan](https://metacpan.org/).\r\n\r\n  Additionally, some Perl modules can also be installed through `mamba` (eg. the compatible version of Perl module `Config::Simple` can be searched on mamba by `mamba repoquery search perl-config-simple`)\r\n\r\n---\r\n\r\n### List of Perl modules and tools incorporated in the pipeline\r\n\r\n- Perl modules:\r\n\r\n  - Config::Simple\r\n  - Parallel::ForkManager\r\n  - Log::Log4perl\r\n  - Getopt::Long\r\n  - Text::CSV\r\n  - Text::Unidecode\r\n\r\n- Tools:\r\n\r\n  - [NCBI EDirect utilities \\\u003e=16.2](https://www.ncbi.nlm.nih.gov/books/NBK179288/)\r\n  - [NCBI SRA Toolkit \\\u003e=2.10.7](https://www.ncbi.nlm.nih.gov/home/tools/)\r\n  - [FastQC \\\u003e=0.11.9](https://www.bioinformatics.babraham.ac.uk/projects/download.html#fastqc)\r\n  - [Trimmomatic \\\u003e=0.39](http://www.usadellab.org/cms/?page=trimmomatic)\r\n  - [FASTX-Toolkit \\\u003e=0.0.14](http://hannonlab.cshl.edu/fastx_toolkit/)\r\n  - [NCBI Blast \\\u003e=2.10.1](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs\u0026DOC_TYPE=Download)\r\n  - [Bowtie2 \\\u003e=2.4.5](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)\r\n  - [Samtools \\\u003e=1.15.1](http://www.htslib.org/download/)\r\n  - [Kraken2 \\\u003e=2.1.2](https://ccb.jhu.edu/software/kraken2/)\r\n\r\n---\r\n","organization":"ARA-dev","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/546?version=1","name":"main @ ea7e984","author":[],"descriptor_type":[]}]},{"id":"547","url":"https://workflowhub.eu/workflows/547","name":"GERONIMO","description":"# GERONIMO\r\n\r\n## Introduction\r\nGERONIMO is a bioinformatics pipeline designed to conduct high-throughput homology searches of structural genes using covariance models. These models are based on the alignment of sequences and the consensus of secondary structures. The pipeline is built using Snakemake, a workflow management tool that allows for the reproducible execution of analyses on various computational platforms.  \r\n\r\nThe idea for developing GERONIMO emerged from a comprehensive search for [telomerase RNA in lower plants] and was subsequently refined through an [expanded search of telomerase RNA across Insecta]. GERONIMO can test hundreds of genomes and ensures the stability and reproducibility of the analyses performed.\r\n\r\n\r\n[telomerase RNA in lower plants]: https://doi.org/10.1093/nar/gkab545\r\n[expanded search of telomerase RNA across Insecta]: https://doi.org/10.1093/nar/gkac1202\r\n\r\n## Scope\r\nThe GERONIMO tool utilises covariance models (CMs) to conduct homology searches of RNA sequences across a wide range of gene families in a broad evolutionary context. Specifically, it can be utilised to:\r\n\r\n* Detect RNA sequences that share a common evolutionary ancestor\r\n* Identify and align orthologous RNA sequences among closely related species, as well as paralogous sequences within a single species\r\n* Identify conserved non-coding RNAs in a genome, and extract upstream genomic regions to characterise potential promoter regions.  \r\nIt is important to note that GERONIMO is a computational tool, and as such, it is intended to be run on a computer with a small amount of data. Appropriate computational infrastructure is necessary for analysing hundreds of genomes.\r\n\r\nAlthough GERONIMO was primarily designed for Telomerase RNA identification, its functionality extends to include the detection and alignment of other RNA gene families, including **rRNA**, **tRNA**, **snRNA**, **miRNA**, and **lncRNA**. This can aid in identifying paralogs and orthologs across different species that may carry specific functions, making it useful for phylogenetic analyses.  \r\n\r\nIt is crucial to remember that some gene families may exhibit similar characteristics but different functions. Therefore, analysing the data and functional annotation after conducting the search is essential to characterise the sequences properly.\r\n\r\n## Pipeline overview\r\n\r\n\r\nBy default, the GERONIMO pipeline conducts high-throughput searches of homology sequences in downloaded genomes utilizing covariance models. If a significant similarity is detected between the model and genome sequence, the pipeline extracts the upstream region, making it convenient to identify the promoter of the discovered gene. In brief, the pipeline:\r\n- Compiles a list of genomes using the NCBI's [Entrez] database based on a specified query, *e.g. \"Rhodophyta\"[Organism]*\r\n- Downloads and decompresses the requested genomes using *rsync* and *gunzip*, respectively\r\n- *Optionally*, generates a covariance model based on a provided alignment using [Infernal]\r\n- Conducts searches among the genomes using the covariance model [Infernal]\r\n- Supplements genome information with taxonomy data using [rentrez]\r\n- Expands the significant hits sequence by extracting upstream genomic regions using [*blastcmd*]\r\n- Compiles the results, organizes them into a tabular format, and generates a visual summary of the performed analysis.\r\n\r\n[Entrez]: https://www.ncbi.nlm.nih.gov/books/NBK179288/\r\n[Infernal]: http://eddylab.org/infernal/\r\n[rentrez]: https://github.com/ropensci/rentrez\r\n[*blastcmd*]: https://www.ncbi.nlm.nih.gov/books/NBK569853/\r\n\r\n## Quick start\r\nThe GERONIMO is available as a `snakemake pipeline` running on Linux and Windows operating systems.\r\n\r\n### Windows 10\r\nInstal Linux on Windows 10 (WSL) according to [instructions], which bottling down to opening PowerShell or Windows Command Prompt in *administrator mode* and pasting the following:\r\n```shell\r\nwsl --install\r\nwsl.exe --install UBUNTU\r\n```\r\nThen restart the machine and follow the instructions for setting up the Linux environment.\r\n\r\n[instructions]: https://learn.microsoft.com/en-us/windows/wsl/install\r\n\r\n### Linux:\r\n#### Check whether the conda is installed:\r\n```shell\r\nconda -V\r\n```\r\n\u003e GERONIMO was tested on conda 23.3.1\r\n#### 1) If you do not have installed `conda`, please install `miniconda`\r\nPlease follow the instructions for installing [miniconda]\r\n\r\n[miniconda]: https://conda.io/projects/conda/en/stable/user-guide/install/linux.html\r\n\r\n#### 2) Continue with installing `mamba` (recommended but optional)\r\n```shell\r\nconda install -n base -c conda-forge mamba\r\n```\r\n#### 3) Install `snakemake`\r\n```shell\r\nconda activate base\r\nmamba create -p env_snakemake -c conda-forge -c bioconda snakemake\r\nmamba activate env_snakemake\r\nsnakemake --help\r\n```\r\nIn case of complications, please check the section `Questions \u0026 Answers` below or follow the [official documentation] for troubleshooting.\r\n\r\n[official documentation]: https://snakemake.readthedocs.io/en/stable/getting_started/installation.html\r\n\r\n### Clone the GERONIMO repository\r\nGo to the path in which you want to run the analysis and clone the repository:\r\n```shell\r\ncd \u003cPATH\u003e\r\ngit clone https://github.com/amkilar/GERONIMO.git\r\n```\r\n\r\n### Run sample analysis to ensure GERONIMO installation was successful\r\nAll files are prepared for the sample analysis as a default. Please execute the line below:\r\n```shell\r\nsnakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx\r\n```\r\n\r\nThis will prompt GERONIMO to quickly scan all modules, verifying the correct setup of the pipeline without executing any analysis.\r\nYou should see the message `Building DAG of jobs...`, followed by `Nothing to be done (all requested files are present and up to date).`, when successfully completed.\r\n\r\nIf you want to run the sample analysis fully, please remove the folder `results` from the GERONIMO directory and execute GERONIMO again with:\r\n\r\n`snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx`\r\n\r\n\u003e You might consider allowing more cores to speed up the analysis, which might take up to several hours.\r\n\r\n#### You might want to clean `GERONIMO/` directory from the files produced by the example analysis. You can safely remove the following:\r\n- `GERONIMO/results`\r\n- `GERONIMO/database`\r\n- `GERONIMO/taxonomy`\r\n- `GERONIMO/temp`\r\n- `.create_genome_list.touch`\r\n- `list_of_genomes.txt`\r\n\r\n## Setup the inputs\r\n\r\n### 1) Prepare the `covariance models`:\r\n\r\n#### Browse the collection of available `covariance models` at [Rfam] (*You can find the covariance model in the tab `Curation`.*)  \r\nPaste the covariance model to the folder `GERONIMO/models` and ensure its name follows the convention: `cov_model_\u003cNAME\u003e`\r\n\r\n[Rfam]: https://rfam.org/\r\n\r\n#### **OR**\r\n\r\n#### Prepare your own `covariance model` using [LocARNA]\r\n1. Paste or upload your sequences to the web server and download the `.stk` file with the alignment result.  \r\n  \r\n    \u003e *Please note that the `.stk` file format is crucial for the analysis, containing sequence alignment and secondary structure consensus.*\r\n    \r\n    \u003e The LocARNA web service allows you to align 30 sequences at once - if you need to align more sequences, please use the standalone version available [here]  \r\n    \u003e After installation run: \r\n    ```shell\r\n    mlocarna my_fasta_sequences.fasta\r\n    ```\r\n  \r\n2. Paste the `.stk` alignment file to the folder `GERONIMO/model_to_build` and ensure its name follows the convention: `\u003cNAME\u003e.stk`\r\n\r\n   \u003e Please check the example `heterotrichea.stk` format in `GERONIMO/models_to_built` for reference\r\n   \r\n\r\n[LocARNA]: http://rna.informatik.uni-freiburg.de/LocARNA/Input.jsp\r\n[here]: http://www.bioinf.uni-freiburg.de/Software/LocARNA/\r\n\r\n\r\n### 2) Adjust the `config.yaml` file\r\nPlease adjust the analysis specifications, as in the following example:\r\n\r\n\u003e - database: '\u003cDATABASE_QUERY\u003e [Organism]' (in case of difficulties with defining the database query, please follow the instructions below)\r\n\u003e - extract_genomic_region-length:  \u003cnumber\u003e (here you can determine how long the upstream genomic region should be extracted; tested for 200)\r\n\u003e - models: [\"\u003cNAME\u003e\", \"\u003cNAME\u003e\"] (here specify the names of models that should be used to perform analysis)\r\n\u003e   \r\n\u003e   *Here you can also insert the name of the covariance model you want to build with GERONIMO - just be sure you placed `\u003cNAME\u003e.stk` file in `GERONIMO/models_to_build` before starting analysis*\r\n\u003e - CPU_for_model_building: \u003cnumber\u003e (specify the number of available CPUs devoted to the process of building model (cannot exceed the CPU number allowed to snakemake with `--cores`)\r\n\u003e\r\n\u003e   *You might ignore this parameter when you do not need to create a new covariance model*\r\n\r\n\r\nKeep in mind that the covariance models and alignments must be present in the respective GERONIMO folders.\r\n \r\n### 3) Remove folder `results`, which contains example analysis output\r\n### 4) **Please ensure you have enough storage capacity to download all the requested genomes (in the `GERONIMO/` directory)**\r\n\r\n## Run GERONIMO\r\n```shell\r\nmamba activate env_snakemake\r\ncd ~/GERONIMO\r\nsnakemake -s GERONIMO.sm --cores \u003cdeclare number of CPUs\u003e --use-conda results/summary_table.xlsx\r\n```\r\n  \r\n## Example results\r\n\r\n### Outputs characterisation\r\n\r\n#### A) Summary table\r\nThe Excel table contains the results arranged by taxonomy information and hit significance. The specific columns include:\r\n* family, organism_name, class, order, phylum (taxonomy context)\r\n* GCA_id - corresponds to the genome assembly in the *NCBI database*\r\n* model - describes which covariance model identified the result\r\n* label - follows the *Infernal* convention of categorizing hits\r\n* number - the counter of the result\r\n* e_value - indicates the significance level of the hit\r\n* HIT_sequence - the exact HIT sequence found by *Infernal*, which corresponds to the covariance model\r\n* HIT_ID - describes in which part of the genome assembly the hit was found, which may help publish novel sequences\r\n* extended_genomic_region - upstream sequence, which may contain a possible promoter sequence\r\n* secondary_structure - the secondary structure consensus of the covariance model\r\n\r\n\r\n#### B) Significant Hits Distribution Across Taxonomy Families\r\nThe plot provides an overview of the number of genomes in which at least one significant hit was identified, grouped by family. The bold black line corresponds to the number of genomes present in each family, helping to minimize bias regarding unequal data representation across the taxonomy.\r\n\r\n\r\n#### C) Hits Distribution in Genomes Across Families\r\nThe heatmap provides information about the most significant hits from the genome, identified by a specific covariance model. Genomes are grouped by families (on the right). Hits are classified into three categories based on their e-values. Generally, these categories correspond to hit classifications (\"HIT,\" \"MAYBE,\" \"NO HIT\"). The \"HIT\" category is further divided to distinguish between highly significant hits and moderately significant ones.\r\n\r\n\r\n\r\n### GERONIMO directory structure\r\n\r\nThe GERONIMO directory structure is designed to produce files in a highly structured manner, ensuring clear insight and facilitating the analysis of results. During a successful run, GERONIMO produces the following folders:\r\n* `/database` - which contains genome assemblies that were downloaded from the *NCBI database* and grouped in subfolders\r\n* `/taxonomy` - where taxonomy information is gathered and stored in the form of tables\r\n* `/results` - the main folder containing all produced results:\r\n  * `/infernal_raw` - contains the raw results produced by *Infernal*\r\n  * `/infernal` - contains restructured results of *Infernal* in table format\r\n  * `/cmdBLAST` - contains results of *cmdblast*, which extracts the extended genomic region\r\n  * `/summary` - contains summary files that join results from *Infernal*, *cmdblast*, and attach taxonomy context\r\n  * `/plots` - contains two types of summary plots\r\n* `/temp` - folder contains the information necessary to download genome assemblies from *NCBI database*\r\n\r\n* `/env` - stores instructions for dependency installation\r\n* `/models` - where calibrated covariance models can be pasted, *for example, from the Rfam database*\r\n* `/modes_to_built` - where multiple alignments in *.stk* format can be pasted\r\n* `/scripts` - contains developed scripts that perform results structurization\r\n\r\n#### The example GERONIMO directory structure:\r\n\r\n```shell\r\nGERONIMO\r\n├── database\r\n│   ├── GCA_000091205.1_ASM9120v1_genomic\r\n│   ├── GCA_000341285.1_ASM34128v1_genomic\r\n│   ├── GCA_000350225.2_ASM35022v2_genomic\r\n│   └── ...\r\n├── env\r\n├── models\r\n├── model_to_build\r\n├── results\r\n│   ├── cmdBLAST\r\n│   │   ├── MRP\r\n│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic\r\n│   │   │   │   ├── extended\r\n│   │   │   │   └── filtered\r\n│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic\r\n│   │   │   │   ├── extended\r\n│   │   │   │   └── filtered\r\n│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic\r\n│   │   │   │   ├── extended\r\n│   │   │   │   └── filtered\r\n│   │   │   └── ...\r\n│   │   ├── SRP\r\n│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic\r\n│   │   │   │   ├── extended\r\n│   │   │   │   └── filtered\r\n│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic\r\n│   │   │   │   ├── extended\r\n│   │   │   │   └── filtered\r\n│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic\r\n│   │   │   │   ├── extended\r\n│   │   │   │   └── filtered\r\n│   │   │   └── ...\r\n│   │   ├── ...\r\n│   ├── infernal\r\n│   │   ├── MRP\r\n│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic\r\n│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic\r\n│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic\r\n│   │   │   ├── ...\r\n│   │   ├── SRP\r\n│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic\r\n│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic\r\n│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic\r\n│   │   │   ├── ...\r\n│   ├── plots\r\n│   ├── raw_infernal\r\n│   │   ├── MRP\r\n│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic\r\n│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic\r\n│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic\r\n│   │   │   ├── ...\r\n│   │   ├── SRP\r\n│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic\r\n│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic\r\n│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic\r\n│   │   │   ├── ...\r\n│   └── summary\r\n│       ├── GCA_000091205.1_ASM9120v1_genomic\r\n│       ├── GCA_000341285.1_ASM34128v1_genomic\r\n│       ├── GCA_000350225.2_ASM35022v2_genomic\r\n│       ├── ...\r\n├── scripts\r\n├── taxonomy\r\n└── temp\r\n```\r\n\r\n## GERONIMO applicability\r\n\r\n### Expanding the evolutionary context\r\nTo add new genomes or database queries to an existing analysis, please follow the instructions:\r\n1) Rename the `list_of_genomes.txt` file to `previous_list_of_genomes.txt` or any other preferred name.\r\n2) Modify the `config.yaml` file by replacing the previous database query with the new one.\r\n3) Delete:\r\n   - `summary_table.xlsx`, `part_summary_table.csv`, `summary_table_models.xlsx` files located in the `GERONIMO\\results` directory\r\n   - `.create_genome_list.touch` file\r\n5) Run GERONIMO to calculate new results using the command:\r\n     ```shell\r\n     snakemake -s GERONIMO.sm --cores \u003cdeclare number of CPUs\u003e --use-conda results/summary_table.xlsx\r\n     ```\r\n7) Once the new results are generated, reviewing them before merging them with the original results is recommended.\r\n8) Copy the contents of the `previous_list_of_genomes.txt` file and paste them into the current `list_of_genomes.txt`.\r\n9) Delete:\r\n   - `summary_table.xlsx` located in the `GERONIMO\\results` directory\r\n   - `.create_genome_list.touch` file\r\n10) Run GERONIMO to merge the results from both analyses using the command:\r\n    ```shell\r\n      snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx\r\n    ```\r\n\r\n### Incorporating new covariance models into existing analysis\r\n1) Copy the new covariance model to `GERONIMO/models`\r\n2) Modify the `config.yaml` file by adding the name of the new model to the line `models: [...]`\r\n3) Run GERONIMO to see the updated analysis outcome\r\n\r\n### Building a new covariance model\r\nWith GERONIMO, building a new covariance model from multiple sequence alignment in the `.stk` format is possible. \r\n\r\nTo do so, simply paste `\u003cNAME\u003e.stk` file to `GERONIMO/models_to_build` and paste the name of the new covariance  model to `config.yaml` file to the line `models: [\"\u003cNAME\u003e\"]`\r\n\r\nand run GERONIMO.\r\n\r\n\r\n## Questions \u0026 Answers\r\n\r\n### How to specify the database query?\r\n- Visit the [NCBI Assemblies] website.  \r\n- Follow the instruction on the graphic below:\r\n\r\n[NCBI Assemblies]: https://www.ncbi.nlm.nih.gov/assembly/?term=\r\n\r\n### WSL: problem with creating `snakemake_env`\r\nIn the case of an error similar to the one below:\r\n\u003e CondaError: Unable to create prefix directory '/mnt/c/Windows/system32/env_snakemake'.\r\n\u003e Check that you have sufficient permissions.  \r\n  \r\nYou might try to delete the cache with: `rm -r ~/.cache/` and try again.\r\n\r\n### When `snakemake` does not seem to be installed properly\r\nIn the case of the following error:\r\n\u003e Command 'snakemake' not found ...\r\n\r\nCheck whether the `env_snakemake` is activated.\r\n\u003e It should result in a change from (base) to (env_snakemake) before your login name in the command line window.\r\n\r\nIf you still see `(base)` before your login name, please try to activate the environment with conda:\r\n`conda activate env_snakemake`\r\n\r\n\r\nPlease note that you might need to specify the full path to the `env_snakemake`, like /home/your user name/env_snakemake\r\n\r\n### How to browse GERONIMO results obtained in WSL?\r\nYou can easily access the results obtained on WSL from your Windows environment by opening `File Explorer` and pasting the following line into the search bar: `\\\\wsl.localhost\\Ubuntu\\home\\`. This will reveal a folder with your username, as specified during the configuration of your Ubuntu system. To locate the GERONIMO results, simply navigate to the folder with your username and then to the `home` folder. (`\\\\wsl.localhost\\Ubuntu\\home\\\u003cuser\u003e\\home\\GERONIMO`)\r\n\r\n### GERONIMO occupies a lot of storage space\r\nThrough genome downloads, GERONIMO can potentially consume storage space, rapidly leading to a shortage. Currently, downloading genomes is an essential step for optimal GERONIMO performance.\r\n\r\nRegrettably, if the analysis is rerun without the `/database` folder, it will result in the need to redownload genomes, which is a highly time-consuming process.\r\n\r\nNevertheless, if you do not intend to repeat the analysis and have no requirement for additional genomes or models, you are welcome to retain your results tables and plots while removing the remaining files.\r\n\r\nIt is strongly advised against using local machines for extensive analyses. If you lack access to external storage space, it is recommended to divide the analysis into smaller segments, which can be later merged, as explained in the section titled `Expanding the evolutionary context`.\r\n\r\nConsidering this limitation, I am currently working on implementing a solution that will help circumvent the need for redundant genome downloads without compromising GERONIMO performance in the future.\r\n\r\nYou might consider deleting the `.snakemake` folder to free up storage space. However, please note that deleting this folder will require the reinstallation of GERONIMO dependencies when the analysis is rerun.\r\n\r\n## License\r\nCopyright (c) 2023 Agata M. Kilar\r\n\r\nPermission is hereby granted, free of charge, to any person obtaining a copy\r\nof this software and associated documentation files (the \"Software\"), to deal\r\nin the Software without restriction, including without limitation the rights\r\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\r\ncopies of the Software, and to permit persons to whom the Software is\r\nfurnished to do so, subject to the following conditions:\r\n\r\nThe above copyright notice and this permission notice shall be included in all\r\ncopies or substantial portions of the Software.\r\n\r\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\r\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\r\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\r\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r\nSOFTWARE.\r\n\r\n## Contact\r\nmgr inż. Agata Magdalena Kilar, PhD (agata.kilar@ceitec.muni.cz)\r\n\r\n","organization":"Mendel Centre for Plant Genomics and Proteomics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/547?version=1","name":"main @ 7ad0050","author":["Agata Kilar"],"descriptor_type":["SMK"]}]},{"id":"548","url":"https://workflowhub.eu/workflows/548","name":"Jupyter Notebook Protein Conformational Transitions calculations tutorial","description":"# Protein Conformational Transitions calculations tutorial using BioExcel Building Blocks (biobb) and GOdMD\r\n\r\nThis tutorial aims to illustrate the process of computing a conformational transition between two known structural conformations of a protein, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/548?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/548?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/548?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/548?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"549","url":"https://workflowhub.eu/workflows/549","name":"CWL Protein Conformational Transitions calculations tutorial","description":"# Protein Conformational Transitions calculations tutorial using BioExcel Building Blocks (biobb) and GOdMD\r\n\r\nThis tutorial aims to illustrate the process of computing a conformational transition between two known structural conformations of a protein, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/549?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/549?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"550","url":"https://workflowhub.eu/workflows/550","name":"Python Protein Conformational Transitions calculations tutorial","description":"# Protein Conformational Transitions calculations tutorial using BioExcel Building Blocks (biobb) and GOdMD\r\n\r\nThis tutorial aims to illustrate the process of computing a conformational transition between two known structural conformations of a protein, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/550?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/550?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/550?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"551","url":"https://workflowhub.eu/workflows/551","name":"Jupyter Notebook Macromolecular Coarse-Grained Flexibility tutorial","description":"# Macromolecular Coarse-Grained Flexibility (FlexServ) tutorial using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorial aims to illustrate the process of generating protein conformational ensembles from 3D structures and analysing its molecular flexibility, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/551?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/551?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/551?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/551?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"552","url":"https://workflowhub.eu/workflows/552","name":"CWL Macromolecular Coarse-Grained Flexibility tutorial","description":"# Macromolecular Coarse-Grained Flexibility (FlexServ) tutorial using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorial aims to illustrate the process of generating protein conformational ensembles from 3D structures and analysing its molecular flexibility, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/552?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/552?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"553","url":"https://workflowhub.eu/workflows/553","name":"Python Macromolecular Coarse-Grained Flexibility tutorial","description":"# Macromolecular Coarse-Grained Flexibility (FlexServ) tutorial using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorial aims to illustrate the process of generating protein conformational ensembles from 3D structures and analysing its molecular flexibility, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/553?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/553?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/553?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"554","url":"https://workflowhub.eu/workflows/554","name":"SnakeMAGs: a simple, efficient, flexible and scalable workflow to reconstruct prokaryotic genomes from metagenomes","description":"[![Snakemake](https://img.shields.io/badge/snakemake-≥7.0.0-brightgreen.svg?style=flat)](https://snakemake.readthedocs.io)\r\n\r\n\r\n# About SnakeMAGs\r\nSnakeMAGs is a workflow to reconstruct prokaryotic genomes from metagenomes. The main purpose of SnakeMAGs is to process Illumina data from raw reads to metagenome-assembled genomes (MAGs).\r\nSnakeMAGs is efficient, easy to handle and flexible to different projects. The workflow is CeCILL licensed, implemented in Snakemake (run on multiple cores) and available for Linux.\r\nSnakeMAGs performed eight main steps:\r\n- Quality filtering of the reads\r\n- Adapter trimming\r\n- Filtering of the host sequences (optional)\r\n- Assembly\r\n- Binning\r\n- Evaluation of the quality of the bins\r\n- Classification of the MAGs\r\n- Estimation of the relative abundance of the MAGs\r\n\r\n\r\n![scheme of workflow](SnakeMAGs_schema.jpg?raw=true)\r\n\r\n# How to use SnakeMAGs\r\n## Install conda\r\nThe easiest way to install and run SnakeMAGs is to use [conda](https://www.anaconda.com/products/distribution). These package managers will help you to easily install [Snakemake](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html).\r\n\r\n## Install and activate Snakemake environment\r\nNote: The workflow was developed with Snakemake 7.0.0\r\n```\r\nconda activate\r\n\r\n# First, set up your channel priorities\r\nconda config --add channels defaults\r\nconda config --add channels bioconda\r\nconda config --add channels conda-forge\r\n\r\n# Then, create a new environment for the Snakemake version you require\r\nconda create -n snakemake_7.0.0 snakemake=7.0.0\r\n\r\n# And activate it\r\nconda activate snakemake_7.0.0\r\n```\r\n\r\nAlternatively, you can also install Snakemake via mamba:\r\n```\r\n# If you do not have mamba yet on your machine, you can install it with:\r\nconda install -n base -c conda-forge mamba\r\n\r\n# Then you can install Snakemake\r\nconda activate base\r\nmamba create -c conda-forge -c bioconda -n snakemake snakemake\r\n\r\n# And activate it\r\nconda activate snakemake\r\n\r\n```\r\n\r\n## SnakeMAGs executable\r\nThe easiest way to procure SnakeMAGs and its related files is to clone the repository using git:\r\n```\r\ngit clone https://github.com/Nachida08/SnakeMAGs.git\r\n```\r\nAlternatively, you can download the relevant files:\r\n```\r\nwget https://github.com/Nachida08/SnakeMAGs/blob/main/SnakeMAGs.smk https://github.com/Nachida08/SnakeMAGs/blob/main/config.yaml\r\n```\r\n\r\n## SnakeMAGs input files\r\n- Illumina paired-end reads in FASTQ.\r\n- Adapter sequence file ([adapter.fa](https://github.com/Nachida08/SnakeMAGs/blob/main/adapters.fa)).\r\n- Host genome sequences in FASTA (if host_genome: \"yes\"), in case you work with host-associated metagenomes (e.g. human gut metagenome).\r\n\r\n## Download Genome Taxonomy Database (GTDB)\r\nGTDB-Tk requires ~66G+ of external data (GTDB) that need to be downloaded and unarchived. Because this database is voluminous, we let you decide where you want to store it.\r\nSnakeMAGs do not download automatically GTDB, you have to do it:\r\n\r\n```\r\n#Download the latest release (tested with release207)\r\n#Note: SnakeMAGs uses GTDBtk v2.1.0 and therefore require release 207 as minimum version. See https://ecogenomics.github.io/GTDBTk/installing/index.html#installing for details.\r\nwget https://data.gtdb.ecogenomic.org/releases/latest/auxillary_files/gtdbtk_v2_data.tar.gz\r\n#Decompress\r\ntar -xzvf *tar.gz\r\n#This will create a folder called release207_v2\r\n```\r\nAll you have to do now is to indicate the path to the database folder (in our example, the folder is called release207_v2) in the config file, Classification section.\r\n\r\n## Download the GUNC database (required if gunc: \"yes\")\r\nGUNC accepts either a progenomes or GTDB based reference database. Both can be downloaded using the ```gunc download_db``` command. For our study we used the default proGenome-derived GUNC database. It requires less resources with similar performance.\r\n\r\n```\r\nconda activate\r\n# Install and activate GUNC environment\r\nconda create --prefix /path/to/gunc_env\r\nconda install -c bioconda metabat2 --prefix /path/to/gunc_env\r\nsource activate /path/to/gunc_env\r\n\r\n#Download the proGenome-derived GUNC database (tested with gunc_db_progenomes2.1)\r\n#Note: SnakeMAGs uses GUNC v1.0.5\r\ngunc download_db -db progenomes /path/to/GUNC_DB\r\n```\r\nAll you have to do now is to indicate the path to the GUNC database file in the config file,  Bins quality section.\r\n\r\n## Edit config file\r\nYou need to edit the config.yaml file. In particular, you need to set the correct paths: for the working directory, to specify where are your fastq files, where you want to place the conda environments (that will be created using the provided .yaml files available in [SnakeMAGs_conda_env directory](https://github.com/Nachida08/SnakeMAGs/tree/main/SnakeMAGs_conda_env)), where are the adapters, where is GTDB and optionally where is the GUNC database and where is your host genome reference.\r\n\r\nLastly, you need to allocate the proper computational resources (threads, memory) for each of the main steps. These can be optimized according to your hardware.\r\n\r\n\r\n\r\nHere is an example of a config file:\r\n\r\n```\r\n#####################################################################################################\r\n#####  _____    ___    _              _   _    ______   __    __              _______   _____   #####\r\n##### /  ___|  |   \\  | |     /\\     | | / /  |  ____| |  \\  /  |     /\\     /  _____| /  ___|  #####\r\n##### | (___   | |\\ \\ | |    /  \\    | |/ /   | |____  |   \\/   |    /  \\    | |   __  | (___   #####\r\n#####  \\___ \\  | | \\ \\| |   / /\\ \\   | |\\ \\   |  ____| | |\\  /| |   / /\\ \\   | |  |_ |  \\___ \\  #####\r\n#####  ____) | | |  \\   |  / /__\\ \\  | | \\ \\  | |____  | | \\/ | |  / /__\\ \\  | |____||  ____) | #####\r\n##### |_____/  |_|   \\__| /_/    \\_\\ |_|  \\_\\ |______| |_|    |_| /_/    \\_\\  \\______/ |_____/  #####\r\n#####                                                                                           #####\r\n#####################################################################################################\r\n\r\n############################\r\n### Execution parameters ###\r\n############################\r\n\r\nworking_dir: /path/to/working/directory/                                 #The main directory for the project\r\nraw_fastq: /path/to/raw_fastq/                                           #The directory that contains all the fastq files of all the samples (eg. sample1_R1.fastq \u0026 sample1_R2.fastq, sample2_R1.fastq \u0026 sample2_R2.fastq...)\r\nsuffix_1: \"_R1.fastq\"                                                    #Main type of suffix for forward reads file (eg. _1.fastq or _R1.fastq or _r1.fastq or _1.fq or _R1.fq or _r1.fq )\r\nsuffix_2: \"_R2.fastq\"                                                    #Main type of suffix for reverse reads file (eg. _2.fastq or _R2.fastq or _r2.fastq or _2.fq or _R2.fq or _r2.fq )\r\n\r\n###########################\r\n### Conda environnemnts ###\r\n###########################\r\n\r\nconda_env: \"/path/to/SnakeMAGs_conda_env/\"                               #Path to the provided SnakeMAGs_conda_env directory which contains the yaml file for each conda environment\r\n\r\n#########################\r\n### Quality filtering ###\r\n#########################\r\nemail: name.surname@your-univ.com                                        #Your e-mail address\r\nthreads_filter: 10                                                       #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_filter: 150                                                    #Memory according to tools need (in GB)\r\n\r\n########################\r\n### Adapter trimming ###\r\n########################\r\nadapters: /path/to/working/directory/adapters.fa                         #A fasta file contanning a set of various Illumina adaptors (this file is provided and is also available on github)\r\ntrim_params: \"2:40:15\"                                                   #For further details, see the Trimmomatic documentation\r\nthreads_trim: 10                                                         #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_trim: 150                                                      #Memory according to tools need (in GB)\r\n\r\n######################\r\n### Host filtering ###\r\n######################\r\nhost_genome: \"yes\"                                                      #yes or no. An optional step for host-associated samples (eg. termite, human, plant...)\r\nthreads_bowtie2: 50                                                     #The number of threads to run this process. To be adjusted according to your hardware\r\nhost_genomes_directory: /path/to/working/host_genomes/                  #the directory where the host genome is stored\r\nhost_genomes: /path/to/working/host_genomes/host_genomes.fa             #A fasta file containing the DNA sequences of the host genome(s)\r\nthreads_samtools: 50                                                    #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_host_filtering: 150                                           #Memory according to tools need (in GB)\r\n\r\n################\r\n### Assembly ###\r\n################\r\nthreads_megahit: 50                                                    #The number of threads to run this process. To be adjusted according to your hardware\r\nmin_contig_len: 1000                                                   #Minimum length (in bp) of the assembled contigs\r\nk_list: \"21,31,41,51,61,71,81,91,99,109,119\"                           #Kmer size (for further details, see the megahit documentation)\r\nresources_megahit: 250                                                 #Memory according to tools need (in GB)\r\n\r\n###############\r\n### Binning ###\r\n###############\r\nthreads_bwa: 50                                                        #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_bwa: 150                                                     #Memory according to tools need (in GB)\r\nthreads_samtools: 50                                                   #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_samtools: 150                                                #Memory according to tools need (in GB)\r\nseed: 19860615                                                         #Seed number for reproducible results\r\nthreads_metabat: 50                                                    #The number of threads to run this process. To be adjusted according to your hardware\r\nminContig: 2500                                                        #Minimum length (in bp) of the contigs\r\nresources_binning: 250                                                 #Memory according to tools need (in GB)\r\n\r\n####################\r\n### Bins quality ###\r\n####################\r\n#checkM\r\nthreads_checkm: 50                                                    #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_checkm: 250                                                 #Memory according to tools need (in GB)\r\n#bins_quality_filtering\r\ncompletion: 50                                                        #The minimum completion rate of bins\r\ncontamination: 10                                                     #The maximum contamination rate of bins\r\nparks_quality_score: \"yes\"                                            #yes or no. If yes bins are filtered according to the Parks quality score (completion-5*contamination \u003e= 50)\r\n#GUNC\r\ngunc: \"yes\"                                                           #yes or no. An optional step to detect and discard chimeric and contaminated genomes using the GUNC tool\r\nthreads_gunc: 50                                                      #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_gunc: 250                                                   #Memory according to tools need (in GB)\r\nGUNC_db: /path/to/GUNC_DB/gunc_db_progenomes2.1.dmnd                  #Path to the downloaded GUNC database (see the readme file)\r\n\r\n######################\r\n### Classification ###\r\n######################\r\nGTDB_data_ref: /path/to/downloaded/GTDB                                #Path to uncompressed GTDB-Tk reference data (GTDB)\r\nthreads_gtdb: 10                                                       #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_gtdb: 250                                                    #Memory according to tools need (in GB)\r\n\r\n##################\r\n### Abundances ###\r\n##################\r\nthreads_coverM: 10                                                     #The number of threads to run this process. To be adjusted according to your hardware\r\nresources_coverM: 150                                                  #Memory according to tools need (in GB)\r\n```\r\n# Run SnakeMAGs\r\nIf you are using a workstation with Ubuntu (tested on Ubuntu 22.04):\r\n```{bash}\r\nsnakemake --cores 30 --snakefile SnakeMAGs.smk --use-conda --conda-prefix /path/to/SnakeMAGs_conda_env/ --configfile /path/to/config.yaml --keep-going --latency-wait 180\r\n```\r\n\r\nIf you are working on a cluster with Slurm (tested with version 18.08.7):\r\n```{bash}\r\nsnakemake --snakefile SnakeMAGs.smk --cluster 'sbatch -p \u003ccluster_partition\u003e --mem \u003cmemory\u003e -c \u003ccores\u003e -o \"cluster_logs/{wildcards}.{rule}.{jobid}.out\" -e \"cluster_logs/{wildcards}.{rule}.{jobid}.err\" ' --jobs \u003cnbr_of_parallel_jobs\u003e --use-conda --conda-frontend conda --conda-prefix /path/to/SnakeMAGs_conda_env/ --jobname \"{rule}.{wildcards}.{jobid}\" --latency-wait 180 --configfile /path/to/config.yaml --keep-going\r\n```\r\n\r\nIf you are working on a cluster with SGE (tested with version 8.1.9):\r\n```{bash}\r\nsnakemake --snakefile SnakeMAGs.smk --cluster \"qsub -cwd -V -q \u003cshort.q/long.q\u003e -pe thread {threads} -e cluster_logs/{rule}.e{jobid} -o cluster_logs/{rule}.o{jobid}\" --jobs \u003cnbr_of_parallel_jobs\u003e --use-conda --conda-frontend conda --conda-prefix /path/to/SnakeMAGs_conda_env/ --jobname \"{rule}.{wildcards}.{jobid}\" --latency-wait 180 --configfile /path/to/config.yaml --keep-going\r\n```\r\n\r\n\r\n# Test\r\nWe provide you a small data set in the [test](https://github.com/Nachida08/SnakeMAGs/tree/main/test) directory which will allow you to validate your instalation and take your first steps with SnakeMAGs. This data set is a subset from [ZymoBiomics Mock Community](https://www.zymoresearch.com/blogs/blog/zymobiomics-microbial-standards-optimize-your-microbiomics-workflow) (250K reads) used in this tutoriel [metagenomics_tutorial](https://github.com/pjtorres/metagenomics_tutorial).\r\n\r\n1. Before getting started make sure you have cloned the SnakeMAGs repository or you have downloaded all the necessary files (SnakeMAGs.smk, config.yaml, chr19.fa.gz, insub732_2_R1.fastq.gz, insub732_2_R2.fastq.gz). See the [SnakeMAGs executable](#snakemags-executable) section.\r\n2. Unzip the fastq files and the host sequences file.\r\n```\r\ngunzip fastqs/insub732_2_R1.fastq.gz fastqs/insub732_2_R2.fastq.gz host_genomes/chr19.fa.gz\r\n```\r\n3. For better organisation put all the read files in the same directory (eg. fastqs) and the host sequences file in a separate directory (eg. host_genomes)\r\n4. Edit the config file (see [Edit config file](#edit-config-file) section)\r\n5. Run the test (see [Run SnakeMAGs](#run-snakemags) section)\r\n\r\nNote: the analysis of these files took 1159.32 secondes to complete on a Ubuntu 22.04 LTS with an Intel(R) Xeon(R) Silver 4210 CPU @ 2.20GHz x 40 processor, 96GB of RAM.\r\n\r\n# Genome reference for host reads filtering\r\nFor host-associated samples, one can remove host sequences from the metagenomic reads by mapping these reads against a reference genome. In the case of termite gut metagenomes, we are providing [here](https://zenodo.org/record/6908287#.YuAdFXZBx8M) the relevant files (fasta and index files) from termite genomes.\r\n\r\nUpon request, we can help you to generate these files for your own reference genome and make them available to the community.\r\n\r\nNB. These steps of mapping generate voluminous files such as .bam and .sam. Depending on your disk space, you might want to delete these files after use.\r\n\r\n\r\n# Use case\r\nDuring the test phase of the development of SnakeMAGs, we used this workflow to process 10 publicly available termite gut metagenomes generated by Illumina sequencing, to ultimately reconstruct prokaryotic MAGs. These metagenomes were retrieved from the NCBI database using the following accession numbers: SRR10402454; SRR14739927; SRR8296321; SRR8296327; SRR8296329; SRR8296337; SRR8296343; DRR097505; SRR7466794; SRR7466795. They come from five different studies: Waidele et al, 2019; Tokuda et al, 2018; Romero Victorica et al, 2020; Moreira et al, 2021; and Calusinska et al, 2020.\r\n\r\n## Download the Illumina pair-end reads\r\nWe use fasterq-dump tool to extract data in FASTQ-format from SRA-accessions. It is a commandline-tool which offers a faster solution for downloading those large files.\r\n\r\n```\r\n# Install and activate sra-tools environment\r\n## Note: For this study we used sra-tools 2.11.0\r\n\r\nconda activate\r\nconda install -c bioconda sra-tools\r\nconda activate sra-tools\r\n\r\n# Download fastqs in a single directory\r\nmkdir raw_fastq\r\ncd raw_fastq\r\nfasterq-dump \u003cSRA-accession\u003e --threads \u003cthreads_nbr\u003e --skip-technical --split-3\r\n```\r\n\r\n## Download Genome reference for host reads filtering\r\n```\r\nmkdir host_genomes\r\ncd host_genomes\r\nwget https://zenodo.org/record/6908287/files/termite_genomes.fasta.gz\r\ngunzip termite_genomes.fasta.gz\r\n```\r\n\r\n## Edit the config file\r\nSee [Edit config file](#edit-config-file) section.\r\n\r\n## Run SnakeMAGs\r\n```\r\nconda activate snakemake_7.0.0\r\nmkdir cluster_logs\r\nsnakemake --snakefile SnakeMAGs.smk --cluster 'sbatch -p \u003ccluster_partition\u003e --mem \u003cmemory\u003e -c \u003ccores\u003e -o \"cluster_logs/{wildcards}.{rule}.{jobid}.out\" -e \"cluster_logs/{wildcards}.{rule}.{jobid}.err\" ' --jobs \u003cnbr_of_parallel_jobs\u003e --use-conda --conda-frontend conda --conda-prefix /path/to/SnakeMAGs_conda_env/ --jobname \"{rule}.{wildcards}.{jobid}\" --latency-wait 180 --configfile /path/to/config.yaml --keep-going\r\n```\r\n\r\n## Study results\r\nThe MAGs reconstructed from each metagenome and their taxonomic classification are available in this [repository](https://doi.org/10.5281/zenodo.7661004).\r\n\r\n# Citations\r\n\r\nIf you use SnakeMAGs, please cite:\r\n\u003e Tadrent N, Dedeine F and Hervé V. SnakeMAGs: a simple, efficient, flexible and scalable workflow to reconstruct prokaryotic genomes from metagenomes [version 2; peer review: 2 approved]. F1000Research 2023, 11:1522 (https://doi.org/10.12688/f1000research.128091.2)\r\n\r\n\r\nPlease also cite the dependencies:\r\n- [Snakemake](https://doi.org/10.12688/f1000research.29032.2) : Mölder, F., Jablonski, K. P., Letcher, B., Hall, M. B., Tomkins-tinch, C. H., Sochat, V., Forster, J., Lee, S., Twardziok, S. O., Kanitz, A., Wilm, A., Holtgrewe, M., Rahmann, S., Nahnsen, S., \u0026 Köster, J. (2021) Sustainable data analysis with Snakemake [version 2; peer review: 2 approved]. *F1000Research* 2021, 10:33.\r\n- [illumina-utils](https://doi.org/10.1371/journal.pone.0066643) : Murat Eren, A., Vineis, J. H., Morrison, H. G., \u0026 Sogin, M. L. (2013). A Filtering Method to Generate High Quality Short Reads Using Illumina Paired-End Technology. *PloS ONE*, 8(6), e66643.\r\n- [Trimmomatic](https://doi.org/10.1093/bioinformatics/btu170) : Bolger, A. M., Lohse, M., \u0026 Usadel, B. (2014). Genome analysis Trimmomatic: a flexible trimmer for Illumina sequence data. *Bioinformatics*, 30(15), 2114-2120.\r\n- [Bowtie2](https://doi.org/10.1038/nmeth.1923) : Langmead, B., \u0026 Salzberg, S. L. (2012). Fast gapped-read alignment with Bowtie 2. *Nature Methods*, 9(4), 357–359.\r\n- [SAMtools](https://doi.org/10.1093/bioinformatics/btp352) : Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth, G., Abecasis, G., \u0026 Durbin, R. (2009). The Sequence Alignment/Map format and SAMtools. *Bioinformatics*, 25(16), 2078–2079.\r\n- [BEDtools](https://doi.org/10.1093/bioinformatics/btq033) : Quinlan, A. R., \u0026 Hall, I. M. (2010). BEDTools: A flexible suite of utilities for comparing genomic features. *Bioinformatics*, 26(6), 841–842.\r\n- [MEGAHIT](https://doi.org/10.1093/bioinformatics/btv033) : Li, D., Liu, C. M., Luo, R., Sadakane, K., \u0026 Lam, T. W. (2015). MEGAHIT: An ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph. *Bioinformatics*, 31(10), 1674–1676.\r\n- [bwa](https://doi.org/10.1093/bioinformatics/btp324) : Li, H., \u0026 Durbin, R. (2009). Fast and accurate short read alignment with Burrows-Wheeler transform. *Bioinformatics*, 25(14), 1754–1760.\r\n- [MetaBAT2](https://doi.org/10.7717/peerj.7359) : Kang, D. D., Li, F., Kirton, E., Thomas, A., Egan, R., An, H., \u0026 Wang, Z. (2019). MetaBAT 2: An adaptive binning algorithm for robust and efficient genome reconstruction from metagenome assemblies. *PeerJ*, 2019(7), 1–13.\r\n- [CheckM](https://doi.org/10.1101/gr.186072.114) : Parks, D. H., Imelfort, M., Skennerton, C. T., Hugenholtz, P., \u0026 Tyson, G. W. (2015). CheckM: Assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes. *Genome Research*, 25(7), 1043–1055.\r\n- [GTDB-Tk](https://doi.org/10.1093/BIOINFORMATICS/BTAC672) : Chaumeil, P.-A., Mussig, A. J., Hugenholtz, P., Parks, D. H. (2022). GTDB-Tk v2: memory friendly classification with the genome taxonomy database. *Bioinformatics*.\r\n- [CoverM](https://github.com/wwood/CoverM)\r\n- [Waidele et al, 2019](https://doi.org/10.1101/526038) : Waidele, L., Korb, J., Voolstra, C. R., Dedeine, F., \u0026 Staubach, F. (2019). Ecological specificity of the metagenome in a set of lower termite species supports contribution of the microbiome to adaptation of the host. *Animal Microbiome*, 1(1), 1–13.\r\n- [Tokuda et al, 2018](https://doi.org/10.1073/pnas.1810550115) : Tokuda, G., Mikaelyan, A., Fukui, C., Matsuura, Y., Watanabe, H., Fujishima, M., \u0026 Brune, A. (2018). Fiber-associated spirochetes are major agents of hemicellulose degradation in the hindgut of wood-feeding higher termites. *Proceedings of the National Academy of Sciences of the United States of America*, 115(51), E11996–E12004.\r\n- [Romero Victorica et al, 2020](https://doi.org/10.1038/s41598-020-60850-5) : Romero Victorica, M., Soria, M. A., Batista-García, R. A., Ceja-Navarro, J. A., Vikram, S., Ortiz, M., Ontañon, O., Ghio, S., Martínez-Ávila, L., Quintero García, O. J., Etcheverry, C., Campos, E., Cowan, D., Arneodo, J., \u0026 Talia, P. M. (2020). Neotropical termite microbiomes as sources of novel plant cell wall degrading enzymes. *Scientific Reports*, 10(1), 1–14.\r\n- [Moreira et al, 2021](https://doi.org/10.3389/fevo.2021.632590) : Moreira, E. A., Persinoti, G. F., Menezes, L. R., Paixão, D. A. A., Alvarez, T. M., Cairo, J. P. L. F., Squina, F. M., Costa-Leonardo, A. M., Rodrigues, A., Sillam-Dussès, D., \u0026 Arab, A. (2021). Complementary contribution of Fungi and Bacteria to lignocellulose digestion in the food stored by a neotropical higher termite. *Frontiers in Ecology and Evolution*, 9(April), 1–12.\r\n- [Calusinska et al, 2020](https://doi.org/10.1038/s42003-020-1004-3) : Calusinska, M., Marynowska, M., Bertucci, M., Untereiner, B., Klimek, D., Goux, X., Sillam-Dussès, D., Gawron, P., Halder, R., Wilmes, P., Ferrer, P., Gerin, P., Roisin, Y., \u0026 Delfosse, P. (2020). Integrative omics analysis of the termite gut system adaptation to Miscanthus diet identifies lignocellulose degradation enzymes. *Communications Biology*, 3(1), 1–12.\r\n- [Orakov et al, 2021](https://doi.org/10.1186/s13059-021-02393-0) : Orakov, A., Fullam, A., Coelho, L. P., Khedkar, S., Szklarczyk, D., Mende, D. R., Schmidt, T. S. B., \u0026 Bork, P. (2021). GUNC: detection of chimerism and contamination in prokaryotic genomes. *Genome Biology*, 22(1).\r\n- [Parks et al, 2015](https://doi.org/10.1101/gr.186072.114) : Parks, D. H., Imelfort, M., Skennerton, C. T., Hugenholtz, P., \u0026 Tyson, G. W. (2015). CheckM: Assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes. *Genome Research*, 25(7), 1043–1055.\r\n# License\r\nThis project is licensed under the CeCILL License - see the [LICENSE](https://github.com/Nachida08/SnakeMAGs/blob/main/LICENCE) file for details.\r\n\r\nDeveloped by Nachida Tadrent at the Insect Biology Research Institute ([IRBI](https://irbi.univ-tours.fr/)), under the supervision of Franck Dedeine and Vincent Hervé.\r\n","organization":"Metagenomic tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/554?version=1","name":"main @ a6cfd03","author":[],"descriptor_type":["SMK"]}]},{"id":"556","url":"https://workflowhub.eu/workflows/556","name":"The Polygenic Score Catalog Calculator","description":"# The Polygenic Score Catalog Calculator (`pgsc_calc`)\r\n\r\n[![Documentation Status](https://readthedocs.org/projects/pgsc-calc/badge/?version=latest)](https://pgsc-calc.readthedocs.io/en/latest/?badge=latest)\r\n[![pgscatalog/pgsc_calc CI](https://github.com/PGScatalog/pgsc_calc/actions/workflows/ci.yml/badge.svg)](https://github.com/PGScatalog/pgsc_calc/actions/workflows/ci.yml)\r\n[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5970794.svg)](https://doi.org/10.5281/zenodo.5970794)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-≥23.10.0-23aa62.svg?labelColor=000000)](https://www.nextflow.io/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n\r\n## Introduction\r\n\r\n`pgsc_calc` is a bioinformatics best-practice analysis pipeline for calculating\r\npolygenic [risk] scores on samples with imputed genotypes using existing scoring\r\nfiles from the [Polygenic Score (PGS) Catalog](https://www.pgscatalog.org/)\r\nand/or user-defined PGS/PRS.\r\n\r\n## Pipeline summary\r\n\r\n\u003e [!IMPORTANT]  \r\n\u003e * Whole genome sequencing (WGS) data [are not currently supported by the calculator](https://pgsc-calc.readthedocs.io/en/latest/explanation/match.html#are-your-target-genomes-imputed-are-they-wgs)\r\n\u003e * It’s possible to [create compatible gVCFs from WGS data](https://github.com/PGScatalog/pgsc_calc/discussions/123#discussioncomment-6469422). We plan to improve support for WGS data in the near future.\r\n\r\n\u003cp align=\"center\"\u003e\r\n  \u003cimg width=\"80%\" src=\"https://github.com/PGScatalog/pgsc_calc/assets/11425618/f766b28c-0f75-4344-abf3-3463946e36cc\"\u003e\r\n\u003c/p\u003e\r\n\r\nThe workflow performs the following steps:\r\n\r\n* Downloading scoring files using the PGS Catalog API in a specified genome build (GRCh37 and GRCh38).\r\n* Reading custom scoring files (and performing a liftover if genotyping data is in a different build).\r\n* Automatically combines and creates scoring files for efficient parallel computation of multiple PGS\r\n    - Matching variants in the scoring files against variants in the target dataset (in plink bfile/pfile or VCF format)\r\n* Calculates PGS for all samples (linear sum of weights and dosages)\r\n* Creates a summary report to visualize score distributions and pipeline metadata (variant matching QC)\r\n\r\nAnd optionally:\r\n\r\n- Genetic Ancestry: calculate similarity of target samples to populations in a\r\n  reference dataset ([1000 Genomes (1000G)](http://www.nature.com/nature/journal/v526/n7571/full/nature15393.html)), using principal components analysis (PCA)\r\n- PGS Normalization: Using reference population data and/or PCA projections to report\r\n  individual-level PGS predictions (e.g. percentiles, z-scores) that account for genetic ancestry\r\n\r\nSee documentation for a list of planned [features under development](https://pgsc-calc.readthedocs.io/en/latest/index.html#Features-under-development).\r\n\r\n### PGS applications and libraries\r\n\r\n`pgsc_calc` uses applications and libraries internally developed at the PGS Catalog, which can do helpful things like:\r\n\r\n* Query the PGS Catalog to bulk download scoring files in a specific genome build\r\n* Match variants from scoring files to target variants\r\n* Adjust calculated PGS in the context of genetic ancestry\r\n\r\nIf you want to write Python code to work with PGS, [check out the `pygscatalog` repository to learn more](https://github.com/PGScatalog/pygscatalog).\r\n\r\nIf you want a simpler way of working with PGS, ignore this section and continue below to learn more about `pgsc_calc`.\r\n\r\n## Quick start\r\n\r\n1. Install\r\n[`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation)\r\n(`\u003e=23.10.0`)\r\n\r\n2. Install [`Docker`](https://docs.docker.com/engine/installation/) or\r\n[`Singularity (v3.8.3 minimum)`](https://www.sylabs.io/guides/3.0/user-guide/)\r\n(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort)\r\n\r\n3. Download the pipeline and test it on a minimal dataset with a single command:\r\n\r\n    ```console\r\n    nextflow run pgscatalog/pgsc_calc -profile test,\u003cdocker/singularity/conda\u003e\r\n    ```\r\n\r\n4. Start running your own analysis!\r\n\r\n    ```console\r\n    nextflow run pgscatalog/pgsc_calc -profile \u003cdocker/singularity/conda\u003e --input samplesheet.csv --pgs_id PGS001229\r\n    ```\r\n\r\nSee [getting\r\nstarted](https://pgsc-calc.readthedocs.io/en/latest/getting-started.html) for more\r\ndetails.\r\n\r\n## Documentation\r\n\r\n[Full documentation is available on Read the Docs](https://pgsc-calc.readthedocs.io/)\r\n\r\n## Credits\r\n\r\npgscatalog/pgsc_calc is developed as part of the PGS Catalog project, a\r\ncollaboration between the University of Cambridge’s Department of Public Health\r\nand Primary Care (Michael Inouye, Samuel Lambert) and the European\r\nBioinformatics Institute (Helen Parkinson, Laura Harris).\r\n\r\nThe pipeline seeks to provide a standardized workflow for PGS calculation and\r\nancestry inference implemented in nextflow derived from an existing set of\r\ntools/scripts developed by Inouye lab (Rodrigo Canovas, Scott Ritchie, Jingqin\r\nWu) and PGS Catalog teams (Samuel Lambert, Laurent Gil).\r\n\r\nThe adaptation of the codebase, nextflow implementation, and PGS Catalog features\r\nare written by Benjamin Wingfield, Samuel Lambert, Laurent Gil with additional input\r\nfrom Aoife McMahon (EBI). Development of new features, testing, and code review\r\nis ongoing including Inouye lab members (Rodrigo Canovas, Scott Ritchie) and others. If \r\nyou use the tool we ask you to cite our paper describing software and updated PGS Catalog resource:\r\n\r\n- \u003eLambert, Wingfield _et al._ (2024) Enhancing the Polygenic Score Catalog with tools for score \r\n  calculation and ancestry normalization. Nature Genetics.\r\n  doi:[10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x).\r\n\r\nThis pipeline is distrubuted under an [Apache License](LICENSE) amd uses code and \r\ninfrastructure developed and maintained by the [nf-core](https://nf-co.re) community \r\n(Ewels *et al. Nature Biotech* (2020) doi:[10.1038/s41587-020-0439-x](https://doi.org/10.1038/s41587-020-0439-x)), \r\nreused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).\r\n\r\nAdditional references of open-source tools and data used in this pipeline are described in\r\n[`CITATIONS.md`](CITATIONS.md).\r\n\r\nThis work has received funding from EMBL-EBI core funds, the Baker Institute,\r\nthe University of Cambridge, Health Data Research UK (HDRUK), and the European\r\nUnion’s Horizon 2020 research and innovation programme under grant agreement No\r\n101016775 INTERVENE.\r\n","organization":"Polygenic Score Catalog","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/556?version=1","name":"main @ d14e43e","author":["Samuel Lambert","Benjamin Wingfield"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/556?version=2","name":"v2.0.0-alpha","author":["Samuel Lambert","Benjamin Wingfield"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/556?version=3","name":"v1.3.2","author":["Samuel Lambert","Benjamin Wingfield"],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/556?version=4","name":"v2.0.0-alpha.1","author":["Samuel Lambert","Benjamin Wingfield"],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/556?version=5","name":"v2.0.0-beta.2","author":["Samuel Lambert","Benjamin Wingfield"],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/556?version=6","name":"v2.0.0-beta.3","author":["Samuel Lambert","Benjamin Wingfield"],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/556?version=7","name":"v2.0.0","author":["Samuel Lambert","Benjamin Wingfield"],"descriptor_type":["NFL"]}]},{"id":"557","url":"https://workflowhub.eu/workflows/557","name":"Galaxy Macromolecular Coarse-Grained Flexibility tutorial","description":"# Macromolecular Coarse-Grained Flexibility (FlexServ) tutorial using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorial aims to illustrate the process of generating protein conformational ensembles from 3D structures and analysing its molecular flexibility, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/557?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"558","url":"https://workflowhub.eu/workflows/558","name":"Galaxy Protein Conformational Transitions calculations tutorial","description":"# Protein Conformational Transitions calculations tutorial using BioExcel Building Blocks (biobb) and GOdMD\r\n\r\nThis tutorial aims to illustrate the process of computing a conformational transition between two known structural conformations of a protein, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/558?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"559","url":"https://workflowhub.eu/workflows/559","name":"Apis-mellifera-wings-KZ: A workflow for morphometric identification of honey bees from Kazakhstan","description":"We present an R script that describes the workflow for analysing honey bee (_Apis mellifera_) wing shape. It is based on a dataset of wing images and landmark coordinates available at Zenodo: https://doi.org/10.5281/zenodo.8128010. \r\nThe dataset can be used as a reference for the identification of local bees from southern Kazakhstan, which most probably belong to the subspecies _Apis mellifera pomonella_. It was compared with data from Nawrocka et al. (2018), available at Zenodo: https://doi.org/10.5281/zenodo.7567336. ","organization":"Apis-wings","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/559?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"560","url":"https://workflowhub.eu/workflows/560","name":"HiFi de novo genome assembly workflow","description":"# HiFi *de novo* genome assembly workflow\r\n\r\nHiFi-assembly-workflow is a bioinformatics pipeline that can be used to analyse Pacbio CCS reads for *de novo* genome assembly using PacBio Circular Consensus Sequencing (CCS) reads. This workflow is implemented in Nextflow and has 3 major sections. \r\n \r\nPlease refer to the following documentation for detailed description of each workflow section:\r\n \r\n- [Adapter filtration and pre-assembly quality control (QC)](https://australianbiocommons.github.io/hifi-assembly-workflow/recommendations#stage-1-adapter-filtration-and-pre-assembly-quality-control)\r\n- [Assembly](https://australianbiocommons.github.io/hifi-assembly-workflow/recommendations#stage-2-assembly)\r\n- [Post-assembly QC](https://australianbiocommons.github.io/hifi-assembly-workflow/recommendations#stage-3-post-assembly-quality-control)\r\n\r\n\r\n## General recommendations \r\n\r\nA more detailed module and workflow description as well as execution examples on Gadi and Setonix are [available here](https://australianbiocommons.github.io/hifi-assembly-workflow/workflows).\r\n\r\n\r\n## Attributions\r\n\r\nThis work was developed at AGRF and supported by the Australian BioCommons via Bioplatforms Australia funding, the Australian Research Data Commons (https://doi.org/10.47486/PL105) and the Queensland Government RICF programme. Bioplatforms Australia and the Australian Research Data Commons are enabled by the National Collaborative Research Infrastructure Strategy (NCRIS).\r\n\r\nThe documentation in this repository is based on Australian BioCommons guidelines. \r\n","organization":"Australian BioCommons","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/560?version=1","name":"master @ f8a0e23","author":["Ziad Al-Bkhetan","Johan Gustafsson"],"descriptor_type":["NFL"]}]},{"id":"561","url":"https://workflowhub.eu/workflows/561","name":"consensus-peaks/consensus-peaks-chip-sr","description":"This workflow takes as input SR BAM from ChIP-seq. It calls peaks on each replicate and intersect them. In parallel, each BAM is subsetted to smallest number of reads. Peaks are called using both subsets combined. Only peaks called using a combination of both subsets which have summits intersecting the intersection of both replicates will be kept.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/561?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/561?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/561?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/561?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/561?version=5","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/561?version=6","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/561?version=7","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/561?version=8","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/561?version=9","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/561?version=10","name":"v1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/561?version=11","name":"v1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/561?version=12","name":"v1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/561?version=13","name":"v1.5","author":[],"descriptor_type":["GALAXY"]}]},{"id":"563","url":"https://workflowhub.eu/workflows/563","name":"polish-with-long-reads/main","description":"Racon polish with long reads, x4","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/563?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"567","url":"https://workflowhub.eu/workflows/567","name":"CLAWS (CNAG's long-read assembly workflow in Snakemake)","description":"# CLAWS (CNAG's Long-read Assembly Workflow in Snakemake)\r\n Snakemake Pipeline used for de novo genome assembly @CNAG. It has been developed for Snakemake v6.0.5.\r\n\r\nIt accepts Oxford Nanopore Technologies (ONT) reads, PacBio HFi reads, illumina paired-end data, illumina 10X data and Hi-C reads. It does the preprocessing of the reads, assembly, polishing, purge_dups, scaffodling and different evaluation steps. By default it will preprocess the reads, run Flye + Hypo + purge_dups + yahs and evaluate the resulting assemblies with BUSCO, MERQURY, Nseries and assembly_stats. It needs a config file and a spec file (json file with instructions on which resources should slurm use for each of the jobs). Both files are created by the script \"create_config_assembly.py\" that is located in the bin directory. To check all the options accepted by the script, do:\r\n\r\n```\r\nbin/create_config_assembly.py -h\r\n```\r\n\r\nOnce the 2 config files are produced, the pipeline can be launched using snakemake like this:\r\n\r\n``snakemake --notemp -j 999 --snakefile assembly_pipeline.smk --configfile assembly.config --is --cluster-conf assembly.spec --use-conda --use-envmodules``\r\n\r\nIf you are using an HPC cluster, please check how should you run snakemake to launch the jobs to the cluster. \r\n\r\nMost of the tools used will be installed via conda using the environments of the \"envs\" directory after providing the \"--use-conda\" option to snakemake. However, a few tools cannot be installed via conda and will have to be available in your PATH, or as a module in the cluster. Those tools are:\r\n\r\n- NextDenovo/2.5.0\r\n- NextPolish/1.4.1\r\n\r\n# How to provide input data:\r\n\r\nThere are several ways of providing the reads.\r\n\r\n### 1- ONT reads\r\n\r\n1.1 Using the option ``--ont-dir {DIR}`` in create_config_assembly.py.\r\n\r\nIf you do so, it will look for all the files in the directory that end in '.fastq.gz' and will add the basenames to \"ONT_wildcards\". These wildcards will be processed by the pipeline that will:\r\n\r\n- Concatenate all the files into a single file\r\n\r\n- Run filtlong with the default or specified parameters. \r\n\r\n- Use the resulting file for assembly, polishing and/or purging.\r\n\r\nYou can also specify the basenames of the files that you want to use with the ``--ont-list `` option. In this case, the pipeline will use the wildcards that you're providing instead of merging all the files in the directory.\r\n\r\n1.2 Using the option ```--ont-reads {FILE}``` in create_config_assembly.py.\r\n\r\nIf you do so, it will consider that you already have all the reads in one file and will:  \r\n\r\n- Run filtlong with the default or specified parameters.\r\n\r\n- Use the resulting file for assembly, polishing and/or purging.\r\n\r\n1.3 Using the option ```--ont-filt {FILE}```. It will use this file as the output from filtlong. Hence, it will skip the preprocessing steps and directly use it for assembly, polishing and/or purging. \r\n\r\n\r\n\r\n### 2-Illumina 10X-linked data\r\n\r\n2.1 Using the  ```--raw-10X {DIR:list}``` option. \r\n\r\nDictionary with 10X raw read directories, it has to be the mkfastq dir. You must specify as well the sampleIDs from this run. Example: '{\"mkfastq-                        dir\":\"sample1,sample2,sample3\"}'...\r\n\r\nIt will take each basename in the list to get the fastqs from the corresponding directory and run longranger on each sample. Afterwards, it will build meryldbs for each \"barcoded\" file. Finally, it will concatenate all the meryldbs and \"barcoded\" files. Resulting \"barcoded\" file will be used for polishing. \r\n\r\n2.2 Using the ``--processed-10X {DIR}`` parameter. \r\n\r\nThis directory can already be there or be produced by the pipeline as described in step 2.1. Once all the \"barcoded\" fastq files are there, meryldbs will be built for each \"barcoded\" file.  Finally, it will concatenate all the meryldbs and \"barcoded\" files. Resulting \"barcoded\" file will be used for polishing. \r\n\r\n2.3 Using the ``--10X`` option. \r\n\r\nThe argument to this is the path to the concatenated \".barcoded\" file that needs to be used for polishing. If the pre-concatenated files are not given, meryldbs will be directly generated with this file, but it may run out of memory. \r\n\r\n### 3- Illumina short-read data\r\n\r\n3.1 Using the ``--illumina-dir {DIR}`` option, that will look for all the files in the directory that end in '.1.fastq.gz' and will add the basenames to \"illumina_wildcards\". These wildcards will be processed by the pipeline that will: \r\n\r\n- Trim adaptors with Trimgalore\r\n\r\n- Concatenate all the trimmed *.1.fastq.gz and the *2.fastq.gz in one file per pair. \r\n\r\n- The resulting reads will be used for building meryldbs and polishing. \r\n\r\n3.2 Using the ``--processed-illumina`` option. If the directory exists and contains files, the pipeline will look for all the files in the directory that end in '.1.fastq.gz' and will add the basenames to \"illumina_wildcards\". These wildcards will be processed by the pipeline that will:\r\n\r\n- Concatenate all the trimmed *.1.fastq.gz and the *2.fastq.gz in one file per pair. \r\n\r\n- The resulting reads will be used for building meryldbs and polishing. \r\n\r\n3.3 Using the ``--pe1 {FILE} and --pe2 {FILE}`` options. That will consider that these are the paired files containing all the illumina reads ready to be used and will build meryldbs and polish with them.\r\n\r\n### 4- Input assemblies\r\n\r\nIf you want to polish an already assembled assembly, you can give it to the pipeline by using the option ``--assembly-in ASSEMBLY_IN [ASSEMBLY_IN ...]\r\n                        Dictionary with assemblies that need to be polished but not assembled and directory where they should\r\n                        be polished. Example: '{\"assembly1\":\"polishing_dir1\"}' '{\"assembly2\"=\"polishing_dir2\"}' ...``\r\n\t\t\t\r\nIf you want to start the pipeline after polishing on an already existing assembly, you can give it to the pipeline by using the option ``--postpolish-assemblies POSTPOLISH_ASSEMBLIES [POSTPOLISH_ASSEMBLIES ...]\r\n                        Dictionary with assemblies for which postpolishing steps need to be run but that are not assembled and\r\n                        base step for the directory where the first postpolishing step should be run. Example:\r\n                        '{\"assembly1\":\"s04.1_p03.1\"}' '{\"assembly2\"=\"s04.2_p03.2\"}' ...``\r\n\r\nTo evaluate and produce the final pretext file on a curated assembly, use ``--curated-assemblies CURATED_ASSEMBLIES [CURATED_ASSEMBLIES ...]\r\n                        Dictionary with assemblies that have already been curated. Evaluations and read alignment will be perforder. Example:\r\n                        '{\"assembly1\":\"s04.1_p03.1\"}' '{\"assembly2\":\"s04.2_p03.2\"}' ...``\r\n\r\n\r\n\r\n# Description of implemented rules\r\n\r\n1- Preprocessing:\r\n\t\r\n- **Read concatenation:**\r\n\r\n``zcat {input.fastqs} | pigz -p {threads} -c  \u003e {output.final_fastq}``\r\n\t\r\n- **Longranger for 10X reads**: it uses the Longranger version installed in the path specified in the configfile\r\n\r\n``longranger basic --id={params.sample} --sample={params.sample} --fastqs={input.mkfastq_dir} --localcores={threads}``\r\n\r\n- **Trimgalore:** By default it gives the ``--max_n 0 --gzip -q 20 --paired --retain_unpaired`` options, but it can be changed with the ``--trim-galore-opts `` argument. \r\n\r\n``trim_galore -j {threads} {params.opts} {input.read1} {input.read2}``\r\n\r\n- **Filtlong:** it uses the Filtlong version installed in the path specified in the configfile. By default it gives the min_length and min_mean_q parameters, but extra parameters can be added with the ``--filtlong-opts`` option.\r\n\r\n``filtlong --min_length {params.minlen} --min_mean_q {params.min_mean_q} {params.opts} {input.reads} | pigz -p {threads} -c \u003e {output.outreads}``\r\n\t\r\n- **Build meryldb**: it uses the merqury conda environment specified in the configfile. It takes as argument the `--mery-k` value that needs to be estimated first for the genome size. It can run either on the illumina reads, the ont reads or both, default behaviour is both. \r\n\r\n``meryl k={params.kmer} count output {output.out_dir} {input.fastq}``\r\n\t\r\n- Concat meryldbs: with the merqury conda environment specified in the configfile\r\n\r\n``meryl union-sum output {output.meryl_all} {input.input_run}``\r\n\t\r\n- **Align ONT (Minimap2):** it aligns the reads using minimap2 and outputs the alignment either in bam or in paf.gz formats. It uses the minimap2 conda environment specified in the configfile\r\n\r\n``minimap2 -{params.align_opts} -t {threads} {input.genome} {input.reads} ``\r\n\r\n- **Align Illumina (BWA-MEM):** it aligns the reads with BWA-mem and outputs a bam file\r\n\r\n``bwa mem -Y {params.options} -t {threads} {input.genome} {input.reads} | samtools view -Sb - | samtools sort -@ {threads} -o {output.mapping} -``\r\n\r\n2- Assembly\r\n\r\n- **Flye (default)**. It is run by default, if you don't want the pipeline to run it, you can give `--no-flye` option when creating the config. It uses the conda environment specified in the config. By default it is set to 2 polishing iterations and gives the genome-size estimate that has been given when creating the config. Extra options can be provided with the `--flye-opts`.\r\n\r\n``flye --{params.readtype} {input.reads} -o {params.outdir}out -t {threads} -i {params.pol_iterations} {params.other_flye_opts} ``\r\n\t\r\n- **Nextdenovo (if ``run-nextdenovo``):** It uses the cluster module specified in the config. If nextdenovo option is turned on, the create_config script will also create the nextdenovo config file. Check the create_config help to see which options can be modified on it. \r\n\r\n``nextDenovo {input.config}``\r\n\r\n3- Polishing\r\n\r\n- **Hypo (default):** It is the polisher that the pipeline uses by default, it can be turned off specifying ``--no-hypo`` when creating the config. If selected, the reads will be aligned in previous rules and then hypo will be run, it requires illumina data. It uses the conda environment specified in the config. \r\n\r\n``hypo -r @short_reads.list.txt -d {input.genome} -b {input.sr_bam} -c {coverage} -s {params.genome_size} -B {input.lr_bam} -t {threads} -o {output.polished} -p {params.proc} {params.opts} ``\r\n\t\r\n- **Nextpolish ont (if turned on):** to run nextpolish with ONT reads, specify ``--nextpolish-ont-rounds`` and the number of rounds you want to run of it. \r\n\r\n``\"python /apps/NEXTPOLISH/1.3.1/lib/nextpolish2.py -g {input.genome} -p {threads} -l lgs.fofn -r {params.lrtype} \u003e {output.polished}``\r\n\t\r\n- **Nextpolish illumina (if turned on):** to run nextpolish with ONT reads, specify ``--nextpolish-ill-rounds`` and the number of rounds you want to run of it. \r\n\r\n``\"python /apps/NEXTPOLISH/1.3.1/lib/nextpolish1.py -g {input.genome}  -p {threads} -s {input.bam} -t {params.task} \u003e {output.polished}``\r\n\r\n4- Post-assembly\r\n\r\n- **Purge_dups (by default):** select ``--no-purgedups`` if you don't want to run it. If no manual cutoffs are given, it'll run purgedups with automatic cutoffs and then will rerun it selecting the mean cutoff as 0.75\\*cov. It uses the version installed in the cluster module specified in the config. \r\n\r\n5- Evaluations\r\n\t\r\n- **Merqury:** It runs on each 'terminal' assembly. This is, the base assembly and the resulting assembly from each branch of the pipeline. \r\n\t\r\n- **Busco:** It can be run only in the terminal assemblies or on all the assemblies produced by the pipeline. It uses the conda environment specified in the config as well as the parameters specified. \r\n\t\r\n- **Nseries:** This is run during the *finalize* on all the assemblies that are evaluated. After it, that rule combines the statistics produced by all the evaluation rules. \r\n\r\n# Description of all options\r\n```\r\n bin/create_config_assembly.py -h\r\nusage: create_configuration_file [-h] [--configFile configFile] [--specFile specFile] [--ndconfFile ndconfFile] [--concat-cores concat_cores]\r\n                                 [--genome-size genome_size] [--lr-type lr_type] [--basename base_name] [--species species] [--keep-intermediate]\r\n                                 [--preprocess-lr-step PREPROCESS_ONT_STEP] [--preprocess-10X-step PREPROCESS_10X_STEP]\r\n                                 [--preprocess-illumina-step PREPROCESS_ILLUMINA_STEP] [--preprocess-hic-step PREPROCESS_HIC_STEP]\r\n                                 [--flye-step FLYE_STEP] [--no-flye] [--nextdenovo-step NEXTDENOVO_STEP] [--run-nextdenovo]\r\n                                 [--nextpolish-cores nextpolish_cores] [--minimap2-cores minimap2_cores] [--bwa-cores bwa_cores]\r\n                                 [--hypo-cores hypo_cores] [--pairtools-cores pairtools_cores] [--busco-cores busco_cores]\r\n                                 [--nextpolish-ont-rounds nextpolish_ont_rounds] [--nextpolish-ill-rounds nextpolish_ill_rounds]\r\n                                 [--hypo-rounds hypo_rounds] [--longranger-cores longranger_cores] [--longranger-path longranger_path]\r\n                                 [--genomescope-opts genomescope_additional] [--no-purgedups] [--ploidy ploidy] [--run-tigmint] [--run-kraken2]\r\n                                 [--no-yahs] [--scripts-dir SCRIPTS_DIR] [--ont-reads ONT_READS] [--ont-dir ONT_DIR] [--ont-filt ONT_FILTERED]\r\n                                 [--pe1 PE1] [--pe2 PE2] [--processed-illumina PROCESSED_ILLUMINA] [--raw-10X RAW_10X [RAW_10X ...]]\r\n                                 [--processed-10X PROCESSED_10X] [--10X R10X] [--illumina-dir ILLUMINA_DIR]\r\n                                 [--assembly-in ASSEMBLY_IN [ASSEMBLY_IN ...]]\r\n                                 [--postpolish-assemblies POSTPOLISH_ASSEMBLIES [POSTPOLISH_ASSEMBLIES ...]]\r\n                                 [--curated-assemblies CURATED_ASSEMBLIES [CURATED_ASSEMBLIES ...]] [--hic-dir HIC_DIR]\r\n                                 [--pipeline-workdir PIPELINE_WORKDIR] [--filtlong-dir FILTLONG_DIR] [--concat-hic-dir CONCAT_HIC_DIR]\r\n                                 [--flye-dir FLYE_DIR] [--nextdenovo-dir NEXTDENOVO_DIR] [--flye-polishing-dir POLISH_FLYE_DIR]\r\n                                 [--nextdenovo-polishing-dir POLISH_NEXTDENOVO_DIR] [--eval-dir eval_dir] [--stats-out stats_out]\r\n                                 [--hic-qc-dir hic_qc_dir] [--filtlong-minlen filtlong_minlen] [--filtlong-min-mean-q filtlong_min_mean_q]\r\n                                 [--filtlong-opts filtlong_opts] [--kraken2-db kraken2_db] [--kraken2-kmer kraken2_kmers]\r\n                                 [--kraken2-opts additional_kraken2_opts] [--kraken2-cores kraken2_threads] [--trim-galore-opts trim_galore_opts]\r\n                                 [--trim-Illumina-cores Trim_Illumina_cores] [--flye-cores flye_cores] [--flye-polishing-iterations flye_pol_it]\r\n                                 [--other-flye-opts other_flye_opts] [--nextdenovo-cores nextdenovo_cores] [--nextdenovo-jobtype nextdenovo_type]\r\n                                 [--nextdenovo-task nextdenovo_task] [--nextdenovo-rewrite nextdenovo_rewrite]\r\n                                 [--nextdenovo-parallel_jobs nextdenovo_parallel_jobs] [--nextdenovo-minreadlen nextdenovo_minreadlen]\r\n                                 [--nextdenovo-seeddepth nextdenovo_seeddepth] [--nextdenovo-seedcutoff nextdenovo_seedcutoff]\r\n                                 [--nextdenovo-blocksize nextdenovo_blocksize] [--nextdenovo-pa-correction  nextdenovo_pa_correction]\r\n                                 [--nextdenovo-minimap_raw nextdenovo_minimap_raw] [--nextdenovo-minimap_cns nextdenovo_minimap_cns]\r\n                                 [--nextdenovo-minimap_map nextdenovo_minimap_map] [--nextdenovo-sort nextdenovo_sort]\r\n                                 [--nextdenovo-correction_opts nextdenovo_correction_opts] [--nextdenovo-nextgraph_opt nextdenovo_nextgraph_opt]\r\n                                 [--sr-cov ill_cov] [--hypo-proc hypo_processes] [--hypo-no-lr] [--hypo-opts hypo_opts]\r\n                                 [--purgedups-cores purgedups_cores] [--purgedups-calcuts-opts calcuts_opts] [--tigmint-cores tigmint_cores]\r\n                                 [--tigmint-opts tigmint_opts] [--hic-qc] [--no-pretext] [--assembly-qc assembly_qc] [--yahs-cores yahs_cores]\r\n                                 [--yahs-mq yahs_mq] [--yahs-opts yahs_opts] [--hic-map-opts hic_map_opts] [--mq mq [mq ...]]\r\n                                 [--hic-qc-assemblylen hic_qc_assemblylen] [--blast-cores blast_cores] [--hic-blastdb blastdb]\r\n                                 [--hic-readsblast hic_readsblast] [--no-final-evals] [--busco-lin busco_lineage] [--merqury-db merqury_db]\r\n                                 [--merqury-plot-opts merqury_plot_opts] [--meryl-k meryl_k] [--meryl-threads meryl_threads]\r\n                                 [--meryl-reads meryl_reads [meryl_reads ...]] [--ont-list ONT_wildcards] [--illumina-list illumina_wildcards]\r\n                                 [--r10X-list r10X_wildcards] [--hic-list hic_wildcards]\r\n\r\nCreate a configuration json file for the assembly pipeline.\r\n\r\noptions:\r\n  -h, --help            show this help message and exit\r\n\r\nGeneral Parameters:\r\n  --configFile configFile\r\n                        Configuration JSON to be generated. Default assembly.config\r\n  --specFile specFile   Cluster specifications JSON fileto be generated. Default assembly.spec\r\n  --ndconfFile ndconfFile\r\n                        Name pf the nextdenovo config file. Default nextdenovo.config\r\n  --concat-cores concat_cores\r\n                        Number of threads to concatenate reads and to run filtlong. Default 4\r\n  --genome-size genome_size\r\n                        Approximate genome size. Example: 615m or 2.6g. Default None\r\n  --lr-type lr_type     Type of long reads (options are flye read-type options). Default nano-hq\r\n  --basename base_name  Base name for the project. Default None\r\n  --species species     Name of the species to be assembled. Default None\r\n  --keep-intermediate   Set this to True if you do not want intermediate files to be removed. Default False\r\n  --preprocess-lr-step PREPROCESS_ONT_STEP\r\n                        Step for preprocessing long-reads. Default 02.1\r\n  --preprocess-10X-step PREPROCESS_10X_STEP\r\n                        Step for preprocessing 10X reads. Default 02.2\r\n  --preprocess-illumina-step PREPROCESS_ILLUMINA_STEP\r\n                        Step for preprocessing illumina reads. Default 02.2\r\n  --preprocess-hic-step PREPROCESS_HIC_STEP\r\n                        Step for preprocessing hic reads. Default 02.3\r\n  --flye-step FLYE_STEP\r\n                        Step for running flye. Default 03.1\r\n  --no-flye             Give this option if you do not want to run Flye.\r\n  --nextdenovo-step NEXTDENOVO_STEP\r\n                        Step for running nextdenovo. Default 03.2\r\n  --run-nextdenovo      Give this option if you do want to run Nextdenovo.\r\n  --nextpolish-cores nextpolish_cores\r\n                        Number of threads to run the nextpolish step. Default 24\r\n  --minimap2-cores minimap2_cores\r\n                        Number of threads to run the alignment with minimap2. Default 32\r\n  --bwa-cores bwa_cores\r\n                        Number of threads to run the alignments with BWA-Mem2. Default 16\r\n  --hypo-cores hypo_cores\r\n                        Number of threads to run the hypo step. Default 24\r\n  --pairtools-cores pairtools_cores\r\n                        Number of threads to run the pairtools step. Default 100\r\n  --busco-cores busco_cores\r\n                        Number of threads to run BUSCO. Default 32\r\n  --nextpolish-ont-rounds nextpolish_ont_rounds\r\n                        Number of rounds to run the Nextpolish with ONT step. Default 0\r\n  --nextpolish-ill-rounds nextpolish_ill_rounds\r\n                        Number of rounds to run the Nextpolish with illumina step. Default 0\r\n  --hypo-rounds hypo_rounds\r\n                        Number of rounds to run the Hypostep. Default 1\r\n  --longranger-cores longranger_cores\r\n                        Number of threads to run longranger. Default 16\r\n  --longranger-path longranger_path\r\n                        Path to longranger executable. Default /scratch/project/devel/aateam/src/10X/longranger-2.2.2\r\n  --genomescope-opts genomescope_additional\r\n                        Additional options to run Genomescope2 with. Default -m 10000\r\n  --no-purgedups        Give this option if you do not want to run Purgedups.\r\n  --ploidy ploidy       Expected ploidy. Default 2\r\n  --run-tigmint         Give this option if you want to run the scaffolding with 10X reads step.\r\n  --run-kraken2         Give this option if you want to run Kraken2 on the input reads.\r\n  --no-yahs             Give this option if you do not want to run yahs.\r\n\r\nInputs:\r\n  --scripts-dir SCRIPTS_DIR\r\n                        Directory with the different scripts for the pipeline. Default\r\n                        /software/assembly/pipelines/Assembly_pipeline/CLAWSv2.2/bin/../scripts/\r\n  --ont-reads ONT_READS\r\n                        File with all the ONT reads. Default None\r\n  --ont-dir ONT_DIR     Directory where the ONT fastqs are stored. Default None\r\n  --ont-filt ONT_FILTERED\r\n                        File with the ONT reads after running filtlong on them. Default None\r\n  --pe1 PE1             File with the illumina paired-end fastqs, already trimmed, pair 1.\r\n  --pe2 PE2             File with the illumina paired-end fastqs, already trimmed, pair 2.\r\n  --processed-illumina PROCESSED_ILLUMINA\r\n                        Directory to Processed illumina reads. Already there or to be produced by the pipeline.\r\n  --raw-10X RAW_10X [RAW_10X ...]\r\n                        Dictionary with 10X raw read directories, it has to be the mkfastq dir. You must specify as well the sampleIDs from this run.\r\n                        Example: '{\"mkfastq-dir\":\"sample1,sample2,sample3\"}'...\r\n  --processed-10X PROCESSED_10X\r\n                        Directory to Processed 10X reads. Already there or to be produced by the pipeline.\r\n  --10X R10X            File with barcoded 10X reads in fastq.gz format, concatenated.\r\n  --illumina-dir ILLUMINA_DIR\r\n                        Directory where the raw illumina fastqs are stored. Default None\r\n  --assembly-in ASSEMBLY_IN [ASSEMBLY_IN ...]\r\n                        Dictionary with assemblies that need to be polished but not assembled and directory where they should be polished. Example:\r\n                        '{\"assembly1\":\"polishing_dir1\"}' '{\"assembly2\"=\"polishing_dir2\"}' ...\r\n  --postpolish-assemblies POSTPOLISH_ASSEMBLIES [POSTPOLISH_ASSEMBLIES ...]\r\n                        Dictionary with assemblies for whic postpolishing steps need to be run but that are not assembled and base step for the\r\n                        directory where the first postpolishing step should be run. Example: '{\"assembly1\":\"s04.1_p03.1\"}'\r\n                        '{\"assembly2\":\"s04.2_p03.2\"}' ...\r\n  --curated-assemblies CURATED_ASSEMBLIES [CURATED_ASSEMBLIES ...]\r\n                        Dictionary with assemblies that have already been curated. Evaluations and read alignment will be perforder. Example:\r\n                        '{\"assembly1\":\"s04.1_p03.1\"}' '{\"assembly2\":\"s04.2_p03.2\"}' ...\r\n  --hic-dir HIC_DIR     Directory where the HiC fastqs are stored. Default None\r\n\r\nOutputs:\r\n  --pipeline-workdir PIPELINE_WORKDIR\r\n                        Base directory for the pipeline run. Default /scratch_isilon/groups/assembly/jgomez/test_CLAWSv2/ilErePala/assembly/\r\n  --filtlong-dir FILTLONG_DIR\r\n                        Directory to process the ONT reads with filtlong. Default s02.1_p01.1_Filtlong\r\n  --concat-hic-dir CONCAT_HIC_DIR\r\n                        Directory to concatenate the HiC reads. Default s02.3_p01.1_Concat_HiC\r\n  --flye-dir FLYE_DIR   Directory to run flye. Default s03.1_p02.1_flye/\r\n  --nextdenovo-dir NEXTDENOVO_DIR\r\n                        Directory to run nextdenovo. Default s03.2_p02.1_nextdenovo/\r\n  --flye-polishing-dir POLISH_FLYE_DIR\r\n                        Directory to polish the flye assembly. Default s04.1_p03.1_polishing/\r\n  --nextdenovo-polishing-dir POLISH_NEXTDENOVO_DIR\r\n                        Directory to run nextdenovo. Default s04.2_p03.2_polishing/\r\n  --eval-dir eval_dir   Base directory for the evaluations. Default evaluations/\r\n  --stats-out stats_out\r\n                        Path to the file with the final statistics.\r\n  --hic-qc-dir hic_qc_dir\r\n                        Directory to run the hic_qc. Default hic_qc/\r\n\r\nFiltlong:\r\n  --filtlong-minlen filtlong_minlen\r\n                        Minimum read length to use with Filtlong. Default 1000\r\n  --filtlong-min-mean-q filtlong_min_mean_q\r\n                        Minimum mean quality to use with Filtlong. Default 80\r\n  --filtlong-opts filtlong_opts\r\n                        Extra options to run Filtlong (eg. -t 4000000000)\r\n\r\nKraken2:\r\n  --kraken2-db kraken2_db\r\n                        Database to be used for running Kraken2. Default None\r\n  --kraken2-kmer kraken2_kmers\r\n                        Database to be used for running Kraken2. Default None\r\n  --kraken2-opts additional_kraken2_opts\r\n                        Optional parameters for the rule Kraken2. Default\r\n  --kraken2-cores kraken2_threads\r\n                        Number of threads to run the Kraken2 step. Default 16\r\n\r\nTrim_Galore:\r\n  --trim-galore-opts trim_galore_opts\r\n                        Optional parameters for the rule trim_galore. Default --max_n 0 --gzip -q 20 --paired --retain_unpaired\r\n  --trim-Illumina-cores Trim_Illumina_cores\r\n                        Number of threads to run the Illumina trimming step. Default 8\r\n\r\nFlye:\r\n  --flye-cores flye_cores\r\n                        Number of threads to run FLYE. Default 128\r\n  --flye-polishing-iterations flye_pol_it\r\n                        Number of polishing iterations to use with FLYE. Default 2\r\n  --other-flye-opts other_flye_opts\r\n                        Additional options to run Flye. Default --scaffold\r\n\r\nNextdenovo:\r\n  --nextdenovo-cores nextdenovo_cores\r\n                        Number of threads to run nextdenovo. Default 2\r\n  --nextdenovo-jobtype nextdenovo_type\r\n                        Job_type for nextdenovo. Default slurm\r\n  --nextdenovo-task nextdenovo_task\r\n                        Task need to run. Default all\r\n  --nextdenovo-rewrite nextdenovo_rewrite\r\n                        Overwrite existing directory. Default yes\r\n  --nextdenovo-parallel_jobs nextdenovo_parallel_jobs\r\n                        Number of tasks used to run in parallel. Default 50\r\n  --nextdenovo-minreadlen nextdenovo_minreadlen\r\n                        Filter reads with length \u003c minreadlen. Default 1k\r\n  --nextdenovo-seeddepth nextdenovo_seeddepth\r\n                        Expected seed depth, used to calculate seed_cutoff, co-use with genome_size, you can try to set it 30-45 to get a better\r\n                        assembly result. Default 45\r\n  --nextdenovo-seedcutoff nextdenovo_seedcutoff\r\n                        Minimum seed length, \u003c=0 means calculate it automatically using bin/seq_stat. Default 0\r\n  --nextdenovo-blocksize nextdenovo_blocksize\r\n                        Block size for parallel running, split non-seed reads into small files, the maximum size of each file is blocksize. Default 1g\r\n  --nextdenovo-pa-correction  nextdenovo_pa_correction\r\n                        number of corrected tasks used to run in parallel, each corrected task requires ~TOTAL_INPUT_BASES/4 bytes of memory usage,\r\n                        overwrite parallel_jobs only for this step. Default 100\r\n  --nextdenovo-minimap_raw nextdenovo_minimap_raw\r\n                        minimap2 options, used to find overlaps between raw reads, see minimap2-nd for details. Default -t 30\r\n  --nextdenovo-minimap_cns nextdenovo_minimap_cns\r\n                        minimap2 options, used to find overlaps between corrected reads. Default -t 30\r\n  --nextdenovo-minimap_map nextdenovo_minimap_map\r\n                        minimap2 options, used to map reads back to the assembly. Default -t 30 --no-kalloc\r\n  --nextdenovo-sort nextdenovo_sort\r\n                        sort options, see ovl_sort for details. Default -m 400g -t 20\r\n  --nextdenovo-correction_opts nextdenovo_correction_opts\r\n                        Correction options. Default -p 30 -dbuf\r\n  --nextdenovo-nextgraph_opt nextdenovo_nextgraph_opt\r\n                        nextgraph options, see nextgraph for details. Default -a 1\r\n\r\nHypo:\r\n  --sr-cov ill_cov      Approximate short read coverage for hypo Default 0\r\n  --hypo-proc hypo_processes\r\n                        Number of contigs to be processed in parallel by HyPo. Default 6\r\n  --hypo-no-lr          Set this to false if you don¡t want to run hypo with long reads. Default True\r\n  --hypo-opts hypo_opts\r\n                        Additional options to run Hypo. Default None\r\n\r\nPurge_dups:\r\n  --purgedups-cores purgedups_cores\r\n                        Number of threads to run purgedups. Default 8\r\n  --purgedups-calcuts-opts calcuts_opts\r\n                        Adjusted values to run calcuts for purgedups. Default None\r\n\r\nScaffold_with_10X:\r\n  --tigmint-cores tigmint_cores\r\n                        Number of threads to run the 10X scaffolding step. Default 12\r\n  --tigmint-opts tigmint_opts\r\n                        Adjusted values to run the scaffolding with 10X reads. Default None\r\n\r\nHiC:\r\n  --hic-qc              Give this option if only QC of the HiC data needs to be done.\r\n  --no-pretext          Give this option if you do not want to generate the pretext file\r\n  --assembly-qc assembly_qc\r\n                        Path to the assembly to be used perfom the QC of the HiC reads.\r\n  --yahs-cores yahs_cores\r\n                        Number of threads to run YAHS. Default 48\r\n  --yahs-mq yahs_mq     Mapping quality to use when running yahs.Default 40\r\n  --yahs-opts yahs_opts\r\n                        Additional options to give to YAHS.Default\r\n  --hic-map-opts hic_map_opts\r\n                        Options to use with bwa mem when aligning the HiC reads. Deafault -5SP -T0\r\n  --mq mq [mq ...]      Mapping qualities to use for processing the hic mappings. Default [0, 40]\r\n  --hic-qc-assemblylen hic_qc_assemblylen\r\n                        Lentgh of the assembly to be used for HiC QC\r\n  --blast-cores blast_cores\r\n                        Number of threads to run blast with the HiC unmapped reads.Default 8\r\n  --hic-blastdb blastdb\r\n                        BLAST Database to use to classify the hic unmapped reads. Default /scratch_isilon/groups/assembly/data/blastdbs\r\n  --hic-readsblast hic_readsblast\r\n                        Number of unmapped hic reads to classify with blast. Default 100\r\n\r\nFinalize:\r\n  --no-final-evals      If specified, do not run evaluations on final assemblies. Default True\r\n  --busco-lin busco_lineage\r\n                        Path to the lineage directory to run Busco with. Default None\r\n  --merqury-db merqury_db\r\n                        Meryl database. Default None\r\n  --merqury-plot-opts merqury_plot_opts\r\n                        Meryl database. Default None\r\n  --meryl-k meryl_k     Merqury plot additional options, for example \" -m 200 -n 6000|\". Default None\r\n  --meryl-threads meryl_threads\r\n                        Number of threads to run meryl and merqury. Default 4\r\n  --meryl-reads meryl_reads [meryl_reads ...]\r\n                        Type of reads to be used to build the meryldb. Default ont illumina\r\n\r\nWildcards:\r\n  --ont-list ONT_wildcards\r\n                        List with basename of the ONT fastqs that will be used. Default None\r\n  --illumina-list illumina_wildcards\r\n                        List with basename of the illumina fastqs. Default None\r\n  --r10X-list r10X_wildcards\r\n                        List with basename of the raw 10X fastqs. Default None\r\n  --hic-list hic_wildcards\r\n                        List with basename of the raw hic fastqs. Default None\r\n```\r\n# Changes made to v2.2: \r\n\r\n1. General: \r\n\r\n\tNow default read_type is nano-hq \r\n\r\n2. Rule trim_galore: \r\n\r\n\t\"--max_n 0\" has been added to the default behaviour of \"--trim-galore-opts\" \r\n\r\n3. Meryl: \r\n\r\n\tNew option \"--meryl-reads\" has been added to the config. Default is \"Illumina ont\" to build the meryl database using both type of reads, it can be changed to one or the other \r\n\r\n4. Merqury: \r\n\r\n\tOption \"--merqury-plot-opts\" has been added to config file. It can be used to modify the x and y axis maximum values (eg. --merqury-plot-opts \" -m 200 -n 6000\") \r\n\r\n5. Genomescope: \r\n\r\n\t\"-m 10000\" is now part of the default behavior of \"--genomescope-opts\" \r\n\r\n6. Hic_statistics: \r\n\r\n\tThis is now running for each assembly and mq for which a pretext file is generated \r\n\r\n7. Assembly inputs for different steps: \r\n\r\n\ta. \"--assembly-in\" to start after assembly step (eg. Evaluation, polishing, purging and scaffolding) \r\n\r\n\tb. \"--postpolish-assemblies\" to start after polishing step (eg. Evaluation, purging and scaffolding) \r\n\r\n\tc. \"--curated-assemblies\" to start after scaffolding step (eg. Evaluation and pretext generation) \r\n","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/567?version=1","name":"v2.1.0 @ c5cf1d5","author":["Jessica Gomez-Garrido"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/567?version=2","name":"v2.2.0 @ e4333e0","author":["Jessica Gomez-Garrido"],"descriptor_type":["SMK"]}]},{"id":"569","url":"https://workflowhub.eu/workflows/569","name":"ERGA Protein-coding gene annotation workflow","description":"# ERGA Protein-coding gene annotation workflow.\r\nAdapted from the work of Sagane Joye:\r\n\r\nhttps://github.com/sdind/genome_annotation_workflow\r\n\r\n## Prerequisites\r\n\r\nThe following programs are required to run the workflow and the listed version were tested. It should be noted that older versions of snakemake are not compatible with newer versions of singularity as is noted here: [https://github.com/nextflow-io/nextflow/issues/1659](https://github.com/nextflow-io/nextflow/issues/1659).\r\n\r\n`conda v 23.7.3`\r\n\r\n`singularity v 3.7.3`\r\n\r\n`snakemake v 7.32.3` \r\n\r\nYou will also need to acquire a licence key for Genemark and place this in your home directory with name `~/.gm_key` The key file can be obtained from the following location, where the licence should be read and agreed to: http://topaz.gatech.edu/GeneMark/license_download.cgi\r\n\r\n## Workflow\r\n\r\nThe pipeline is based on braker3 and was tested on the following dataset from Drosophila melanogaster: [https://doi.org/10.5281/zenodo.8013373](https://doi.org/10.5281/zenodo.8013373)\r\n\r\n### Input data\r\n\r\n- Reference genome in fasta format\r\n\r\n- RNAseq data in paired-end zipped fastq format\r\n\r\n- uniprot fasta sequences in zipped fasta format\r\n\r\n### Pipeline steps\r\n\r\n- **Repeat Model and Mask** Run RepeatModeler using the genome as input, filter any repeats also annotated as protein sequences in the uniprot database and use this filtered libray to mask the genome with RepeatMasker\r\n\r\n- **Map RNAseq data** Trim any remaining adapter sequences and map the trimmed reads to the input genome\r\n\r\n- **Run gene prediction software** Use the mapped RNAseq reads and the uniprot sequences to create hints for gene prediction using Braker3 on the masked genome\r\n\r\n- **Evaluate annotation** Run BUSCO to evaluate the completeness of the annotation produced\r\n\r\n### Output data\r\n\r\n- FastQC reports for input RNAseq data before and after adapter trimming\r\n\r\n- RepeatMasker report containing quantity of masked sequence and distribution among TE families\r\n\r\n- Protein-coding gene annotation file in gff3 format\r\n\r\n- BUSCO summary of annotated sequences\r\n\r\n## Setup\r\n\r\nYour data should be placed in the `data` folder, with the reference genome in the folder `data/ref` and the transcript data in the foler `data/rnaseq`.\r\n\r\nThe config file requires the following to be given:\r\n\r\n```\r\nasm: 'absolute path to reference fasta'\r\nsnakemake_dir_path: 'path to snakemake working directory'\r\nname: 'name for project, e.g. mHomSap1'\r\nRNA_dir: 'absolute path to rnaseq directory'\r\nbusco_phylum: 'busco database to use for evaluation e.g. mammalia_odb10'\r\n```\r\n","organization":"ERGA Annotation","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/569?version=1","name":"Version 1","author":["Sagane Joye-Dind"],"descriptor_type":["SMK"]}]},{"id":"571","url":"https://workflowhub.eu/workflows/571","name":"MLme: Machine Learning Made Easy","description":"This workflow represents the Default ML Pipeline for AutoML feature from MLme. Machine Learning Made Easy (MLme) is a novel tool that simplifies machine learning (ML) for researchers. By integrating four essential functionalities, namely data exploration, AutoML, CustomML, and visualization, MLme fulfills the diverse requirements of researchers while eliminating the need for extensive coding efforts. MLme serves as a valuable resource that empowers researchers of all technical levels to leverage ML for insightful data analysis and enhance research outcomes. By simplifying and automating various stages of the ML workflow, it enables researchers to allocate more time to their core research tasks, thereby enhancing efficiency and productivity.\r\n\r\n","organization":"MLme: Machine Learning Made Easy","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/571?version=1","name":"Version 1","author":["Akshay Akshay"],"descriptor_type":[]}]},{"id":"575","url":"https://workflowhub.eu/workflows/575","name":"repeatmasking/main","description":"# RepeatMasking Workflow\n\nThis workflow uses RepeatModeler and RepeatMasker for genome analysis.\n\n- RepeatModeler is a software package for identifying and modeling de novo families of transposable elements (TEs). At the heart of RepeatModeler are three de novo repeat search programs (RECON, RepeatScout and LtrHarvest/Ltr_retriever) which use complementary computational methods to identify repeat element boundaries and family relationships from sequence data.\n\n- RepeatMasker is a program that analyzes DNA sequences for *interleaved repeats* and *low-complexity* DNA sequences. The result of the program is a detailed annotation of the repeats present in the query sequence, as well as a modified version of the query sequence in which all annotated repeats are present.\n\n## Input dataset for RepeatModeler\n- RepeatModeler requires a single input file, a genome in fasta format.\n\n\n## Outputs dataset for RepeatModeler\n- Two output files are generated:\n    - summary file (.tbl)\n    - fasta file containing alignments in order of appearance in the query sequence\n\n\n## Input dataset for RepeatMasker\n- ReapatMasker requires the fasta file generated by RepeatModeler\n\n## Outputs datasets for RepeatMasker\n- Five output files are generated:\n    - a fasta file\n    - .gff3 file\n    - a table summarizing the repeated content of the sequence analyzed\n    - a file with statistics related to the repeated content of the sequence analyzed\n    - a summary of the mutation sites found and the order of grouping\n    \n","organization":"EuroScienceGateway, Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/575?version=1","name":"v0.1","author":["Romane Libouban"],"descriptor_type":["GALAXY"]}]},{"id":"576","url":"https://workflowhub.eu/workflows/576","name":"ECP experiments","description":"This repository contains the python code to reproduce the experiments in Dłotko, Gurnari \"Euler Characteristic Curves and Profiles: a stable shape invariant for big data problems\"","organization":"Dioscuri TDA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/576?version=1","name":"main @ e60566d","author":["Davide Gurnari"],"descriptor_type":[]}]},{"id":"579","url":"https://workflowhub.eu/workflows/579","name":"average-bigwig-between-replicates/main","description":"We assume the identifiers of the input list are like:\nsample_name_replicateID.\nThe identifiers of the output list will be:\nsample_name","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/579?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/579?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"598","url":"https://workflowhub.eu/workflows/598","name":"SCIPION: acquire -\u003e motionCorr -\u003e ctf -\u003e report","description":"The simplest workflow among a collection of workflows intended to solve tasks up to CTF estimation.","organization":"Scipion CNB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/598?version=1","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"599","url":"https://workflowhub.eu/workflows/599","name":"CEITEC layer 1 workflow","description":"The second-level complexity workflow is one among a collection of workflows designed to address tasks up to CTF estimation. In addition to the functionalities provided by the layer 0 workflow, this workflow aims to enhance the quality of acquisition images using quality protocols.\r\n\r\n**Quality control protocols**\r\n\r\n* **Movie max shift**: automatic reject those movies whose frames move more than a given threshold. \r\n\r\n* **Tilt analysis**: quality score based in the Power Spectrum Density (astigmatism and tilt) \r\n\r\n* **CTF consensus**: acts as a filter discarding micrographs based on their CTF (limit resolution, defocus, astigmatism, etc.).\r\n\r\n**Advantages:** \r\n\r\n* More control of the acquisition quality\r\n\r\n* Reduce unnecessary processing time and storage","organization":"Scipion CNB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/599?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/599?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"600","url":"https://workflowhub.eu/workflows/600","name":"CEITEC layer 2 workflow","description":"The ultimate-level complexity workflow is one among a collection of workflows designed to address tasks up to CTF estimation. In addition to the functionalities provided by layer 0 and 1 workflows, this workflow aims to enhance the quality of both **acquisition images** and **processing**.\r\n\r\n**Quality control protocols**\r\n\r\n…\r\n\r\n**Combination of methods**\r\n* **CTF consensus**\r\n\t* New methods to compare ctf estimations\r\n\t* CTF xmipp criteria (richer parameters i.e. ice detection)\r\n\r\n**Advantages**: \r\n* Control of the acquisition quality\r\n* Robust estimations to continue with the processing","organization":"Scipion CNB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/600?version=1","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"601","url":"https://workflowhub.eu/workflows/601","name":"ERGA DataQC Illumina v2601 (WF0)","description":"The workflow takes a paired-reads collection (like illumina WGS or HiC), runs FastQC and SeqKit, trims with Fastp, and creates a MultiQC report. The main outputs are a paired collection of trimmed reads, a report with raw and trimmed reads stats, and a table with raw reads stats.","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/601?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/601?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"602","url":"https://workflowhub.eu/workflows/602","name":"ERGA DataQC HiFi v2601 (WF0)","description":"The workflow takes a HiFi reads collection, runs FastQC and SeqKit, filters with Cutadapt, and creates a MultiQC report. The main outputs are a collection of filtred reads, a report with raw and filtered reads stats, and a table with raw reads stats.","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/602?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/602?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"603","url":"https://workflowhub.eu/workflows/603","name":"ERGA Profiling Long Reads v2602 (WF1)","description":"The workflow takes a (trimmed) Long reads collection, runs Meryl to create a K-mer database, Genomescope2 to estimate genome properties and Smudgeplot to estimate ploidy (optional). The main results are K-mer database and genome profiling plots, tables, and values useful for downstream analysis. Default K-mer length and ploidy for Genomescope are 31 and 2, respectively. ","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/603?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/603?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"605","url":"https://workflowhub.eu/workflows/605","name":"ERGA Long reads+HiC Assembly+QC Hifiasm v2602 (WF2)","description":"The workflow takes a trimmed long reads collection, and Forward/Reverse HiC reads to run Hifiasm in HiC phasing mode. It produces both Pri/Alt and Hap1/Hap2 assemblies, and runs all the QC analysis (gfastats, BUSCO, and Merqury). The default Hifiasm purge level is aggressive (l3).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/605?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/605?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"606","url":"https://workflowhub.eu/workflows/606","name":"ERGA HiFi Hap1Hap2 Purge+QC v2309 (WF3)","description":"The workflow takes a trimmed HiFi reads collection, Hap1/Hap2 contigs, and the values for transition parameter and max coverage depth (calculated from WF1) to run Purge_Dups. It produces purged Hap1 and Hap2 contigs assemblies, and runs all the QC analysis (gfastats, BUSCO, and Merqury).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/606?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/606?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"607","url":"https://workflowhub.eu/workflows/607","name":"Cancer variant annotation (hg38 VEP-based)","description":"This Galaxy workflow takes a list of tumor/normal sample pair variants in VCF format and\r\n1. annotates them using the ENSEMBL Variant Effect Predictor and custom annotation data\r\n2. turns the annotated VCF into a MAF file for import into cBioPortal\r\n3. generates human-readable variant- and gene-centric reports\r\n\r\nThe input VCF is expected to encode somatic status, somatic p-value and germline p-value of each variant in varscan somatic format, i.e., via SS, SPV and GPV INFO keys, respectively.","organization":"EOSC4Cancer, usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/607?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"608","url":"https://workflowhub.eu/workflows/608","name":"ERGA HiC Hap1Hap2 Scaffolding+QC YaHS v2309 (WF4)","description":"The workflow takes trimmed HiC forward and reverse reads, and Hap1/Hap2 assemblies to produce Hap1 and Hap2 scaffolded assemblies using YaHS. It also runs all the QC analyses (gfastats, BUSCO, Merqury and Pretext).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/608?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"611","url":"https://workflowhub.eu/workflows/611","name":"HPPIDiscovery - Scientific workflow to augment, predict and evaluate host-pathogen protein-protein interactions","description":"## Summary\r\nHPPIDiscovery is a scientific workflow to augment, predict and perform an insilico curation of host-pathogen Protein-Protein Interactions (PPIs) using graph theory to build new candidate ppis and machine learning to predict and evaluate them by combining multiple PPI detection methods of proteins according to three categories: structural,  based on primary aminoacid sequence and functional annotations.\u003cbr\u003e\r\n\r\nHPPIDiscovery contains three main steps: (i) acquirement of pathogen and host proteins information from seed ppis provided by HPIDB search methods, (ii) Model training and generation of new candidate ppis from HPIDB seed proteins' partners, and (iii) Evaluation of new candidate ppis and results exportation.\r\n\r\n(i) The first step acquires the identification of the taxonomy ids of the host and pathogen organisms in the result files. Then it proceeds parsing and cleaning the HPIDB results and downloading the protein interactions of the found organisms from the STRING database. The string protein identifiers are also mapped using the id mapping tool of uniprot API and we retrieve the uniprot entry ids along with the functional annotations, sequence, domain and kegg enzymes.\r\n\r\n(ii) The second step builds the training dataset using the non redundant hpidb validated interactions of each genome as positive set and random string low confidence ppis from each genome as negative set. Then, PredPrin tool is executed in the training mode to obtain the model that will evaluate the new candidate PPIs. The new ppis are then generated by performing a pairwise combination of string partners of host and pathogen hpidb proteins. \r\n\r\nFinally, (iii) in the third step, the predprin tool is used in the test mode to evaluate the new ppis and generate the reports and list of positively predicted ppis.\r\n\r\nThe figure below illustrates the steps of this workflow.\r\n\r\n## Requirements:\r\n* Edit the configuration file (config.yaml) according to your own data, filling out the following fields:\r\n\t- base_data: location of the organism folders directory, example: /home/user/data/genomes \r\n\t- parameters_file: Since this workflow may perform parallel processing of multiple organisms at the same time, you must prepate a tabulated file containng the genome folder names located in base data, where the hpidb files are located. Example: /home/user/data/params.tsv. It must have the following columns: genome (folder name), hpidb_seed_network (the result exported by one of the search methods available in hpidb database), hpidb_search_method (the type of search used to generate the results) and target_taxon (the target taxon id). The column hpidb_source may have two values: keyword or homology. In the keyword mode, you provide a taxonomy, protein name, publication id or detection method and you save all results (mitab.zip) in the genome folder. Finally, in the homology mode allows the user to search for host pathogen ppis giving as input fasta sequences of a set of proteins of the target pathgen for enrichment (so you have to select the search for a pathogen set) and you save the zip folder results (interaction data) in the genome folder. This option is extremely useful when you are not sure that your organism has validated protein interactions, then it finds validated interactions from the closest proteins in the database. In case of using the homology mode, the identifiers of the pathogens' query fasta sequences must be a Uniprot ID. All the query protein IDs must belong to the same target organism (taxon id).\r\n\t- model_file: path of a previously trained model in joblib format (if you want to train from the known validated PPIs given as seeds, just put a 'None' value)\r\n\r\n## Usage Instructions\r\nThe steps below consider the creation of a sqlite database file with all he tasks events which can be used after to retrieve the execution time taken by the tasks. It is possible run locally too (see luigi's documentation to change the running command). \u003cbr \u003e\u003cbr\u003e\r\n* Preparation:\r\n\t1. ````git clone https://github.com/YasCoMa/hppidiscovery.git````\r\n\t2. ````cd hppidiscovery````\r\n\t3. ````mkdir luigi_log```` \r\n\t4. ````luigid --background --logdir luigi_log```` (start luigi server)\r\n\t5. conda env create -f hp_ppi_augmentation.yml\r\n\t6. conda activate hp_ppi_augmentation\r\n\t6.1. (execute ````pip3 install wget```` (it is not installed in the environment))\r\n\t7. run ````pwd```` command and get the full path\r\n\t8. Substitute \u003cpath\u003e in config_example.yaml with the full path obtained in the previous step\r\n\t9. Download SPRINT pre-computed similarities in https://www.csd.uwo.ca/~ilie/SPRINT/precomputed_similarities.zip and unzip it inside workflow_hpAugmentation/predprin/core/sprint/HSP/\r\n\t10. ````cd workflow_hpAugmentation/predprin/````\r\n\t11. Uncompress annotation_data.zip\r\n\t12. Uncompress sequence_data.zip\r\n\t13. ````cd ../../````\r\n\t14. ````cd workflow_hpAugmentation````\r\n\t15. snake -n (check the plan of jobs, it should return no errors and exceptions)\r\n\t16. snakemake -j 4 (change this number according the number of genomes to analyse and the amount of cores available in your machine)","organization":"yPublish - Bioinfo tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/611?version=1","name":"master @ f76d418","author":[],"descriptor_type":["SMK"]}]},{"id":"612","url":"https://workflowhub.eu/workflows/612","name":"Assembly-Hifi-only-VGP3/main","description":"Generate a genome assembly based on PacBio HiFi reads. Part of the VGP suite, it needs to be run after the VGP1 k-mer profiling workflow. The assembly contigs are built using HiFiasm, and the workflow generates assembly statistics, BUSCO reports, Merqury plots, and the contigs in fasta and GFA formats.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/612?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/612?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/612?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/612?version=4","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/612?version=5","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/612?version=6","name":"v0.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/612?version=7","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/612?version=8","name":"v0.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/612?version=9","name":"v0.1.8","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/612?version=10","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/612?version=11","name":"v0.2.1","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/612?version=12","name":"v0.2.2","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/612?version=13","name":"v0.2.3","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/612?version=14","name":"v0.2.4","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/612?version=15","name":"v0.2.5","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/612?version=16","name":"v0.2.6","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/612?version=17","name":"v0.2.7","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/612?version=18","name":"v0.2.8","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/612?version=19","name":"v0.2.9","author":[],"descriptor_type":["GALAXY"]},{"id":"20","url":"https://workflowhub.eu/workflows/612?version=20","name":"v0.2.10","author":[],"descriptor_type":["GALAXY"]},{"id":"21","url":"https://workflowhub.eu/workflows/612?version=21","name":"v0.3.0","author":[],"descriptor_type":["GALAXY"]},{"id":"22","url":"https://workflowhub.eu/workflows/612?version=22","name":"v0.3.1","author":[],"descriptor_type":["GALAXY"]},{"id":"23","url":"https://workflowhub.eu/workflows/612?version=23","name":"v0.3.2","author":[],"descriptor_type":["GALAXY"]},{"id":"24","url":"https://workflowhub.eu/workflows/612?version=24","name":"v0.3.3","author":[],"descriptor_type":["GALAXY"]},{"id":"25","url":"https://workflowhub.eu/workflows/612?version=25","name":"v0.3.4","author":[],"descriptor_type":["GALAXY"]},{"id":"26","url":"https://workflowhub.eu/workflows/612?version=26","name":"v0.3.5","author":[],"descriptor_type":["GALAXY"]}]},{"id":"613","url":"https://workflowhub.eu/workflows/613","name":"openms-metaprosip/main","description":"Automated inference of stable isotope incorporation rates in proteins for functional metaproteomics ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/613?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/613?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/613?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"614","url":"https://workflowhub.eu/workflows/614","name":"eFlows4HPC Demo ROM Workflow","description":"A demonstration workflow for Reduced Order Modeling (ROM) within the eFlows4HPC project, implemented using Kratos Multiphysics, EZyRB, COMPSs, and dislib.","organization":"Pillar I: Manufacturing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/614?version=1","name":"PyCOMPSs 3.3.pr","author":[],"descriptor_type":[]}]},{"id":"615","url":"https://workflowhub.eu/workflows/615","name":"baredsc/baredSC-1d-logNorm","description":"Run baredSC in 1 dimension in logNorm for 1 to N gaussians and combine models.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/615?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/615?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/615?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/615?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/615?version=5","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/615?version=6","name":"v0.6","author":[],"descriptor_type":["GALAXY"]}]},{"id":"616","url":"https://workflowhub.eu/workflows/616","name":"PredPrIn - Scientific workflow to predict protein-protein interactions based in a combined analysis of multiple protein characteristics.","description":"## Summary\r\nPredPrIn is a scientific workflow to predict Protein-Protein Interactions (PPIs) using machine learning to combine multiple PPI detection methods of proteins according to three categories: structural,  based on primary aminoacid sequence and functional annotations.\u003cbr\u003e\r\n\r\nPredPrIn contains three main steps: (i) acquirement and treatment of protein information, (ii) feature generation, and (iii) classification and analysis.\r\n\r\n(i) The first step builds a knowledge base with the available annotations of proteins and reuses this base for other prediction experiments, saving time and becoming more efficient. \r\n\r\n(ii) The feature generation step involves several evidence from different classes, such as: Gene Ontology (GO) information, domain interaction, metabolic pathway participation and sequence-based interaction. For the GO branches, we made a study to evaluate the best method to calculate semantic similarity to enhance the workflow performance. This step can be easily modified by adding new metrics, making PredPrIn flexible for future improvements. \r\n\r\nFinally, (iii) in the third step, the adaboost classifier is responsible for predicting the final scores from the numerical features dataset, exporting results of performance evaluation metrics.\r\n\r\n## Requirements:\r\n* Python packages needed:\r\n    - pip3 install luigi\r\n\t- pip3 install sqlalchemy\r\n\t- pip3 install rdflib\r\n\t- pip3 install sklearn\r\n\t- pip3 install matplotlib\r\n\t- pip3 install numpy\r\n\r\n* Other instalation:\r\n\t- sqlite (to be able to see the documentation generated by luigi about the tasks after execution)\r\n\r\n## Usage Instructions\r\nThe steps below consider the creation of a sqlite database file with all he tasks events which can be used after to retrieve the execution time taken by the tasks. It is possible run locally too (see luigi's documentation to change the running command). \u003cbr \u003e\u003cbr\u003e\r\n* Preparation:\r\n\t1. ````git clone https://github.com/YasCoMa/predprin.git````\r\n\t2. ````cd PredPrIn````\r\n\t3. `pip3 install -r requirements.txt`\r\n\t4. Download annotation_data.zip (https://drive.google.com/file/d/1bWPSyULaooj7GTrDf6QBY3ZyeyH5MRpm/view?usp=share_link)\r\n\t5. Download rdf_data.zip (https://drive.google.com/file/d/1Cp511ioXiw2PiOHdkxa4XsZnxOeM3Pan/view?usp=share_link)\r\n\t6. Download sequence_data.zip (https://drive.google.com/file/d/1uEKh5EF9X_6fgZ9cTTp0jW3XaL48stxA/view?usp=share_link)\r\n\t7. Unzip annotation_data.zip\r\n\t8. Unzip rdf_data.zip\r\n\t9. Unzip sequence_data.zip\r\n\t10. Download SPRINT pre-computed similarities in https://www.csd.uwo.ca/~ilie/SPRINT/precomputed_similarities.zip and unzip it inside core/sprint/HSP/\r\n\t11. Certify that there is a file named client.cfg (to configure the history log and feed the sqlite database). It must have the following data:\r\n\t````\r\n\t[core]\r\n\tdefault-scheduler-host=localhost\r\n\tdefault-scheduler-port=8082\r\n\trpc-connect-timeout=60.0 \r\n\trpc-retry-attempts=10    \r\n\trpc-retry-wait=60        \r\n\r\n\t[scheduler]\r\n\trecord_task_history = True\r\n\r\n\t[task_history]\r\n\tdb_connection = sqlite:///luigi-task-hist.db\r\n\t````\r\n* Parameters:\r\n\t1. parameters-file -\u003e json file with all the information to process the prediction experiment (example: params.json)\r\n\t2. mode -\u003e it can have two values: train (executes cross validation and save the model as a .joblib file) or test (uses a model obtained in train mode to test in some dataset listed in the parameters file)\r\n\t3. model -\u003e it is the model file full path saved in train mode as .joblib\r\n\t\r\n* Running:\r\n\t1. ````mkdir luigi_log```` (or other name for the log folder of your choice)\r\n\t2. ````luigid --background --logdir luigi_log```` (start luigi server)\r\n\t3. ````nohup python3.5 -m luigi --module main RunPPIExperiment --parameters-file params.json --mode 'train' --model none.joblib --workers 3 \u0026```` \u003cbr \u003e\r\n\t   ````nohup python3.5 -m luigi --module main RunPPIExperiment --parameters-file params.json --mode 'test' --model model.jolib --workers 3 \u0026```` \u003cbr \u003e\r\n\t\t- Replace python3.5 by the command python of your environment \u003cbr\u003e\r\n\t\t- Replace the data given as example in params.json using your own data \u003cbr \u003e \r\n\t\t- Adapt the number of workers to use as you need and the capacity of your computational resource available\r\n\r\n\tYou can monitor the prediction experiment execution in localhost:8082\r\n\r\n## Reference\r\nMartins YC, Ziviani A, Nicolás MF, de Vasconcelos AT. Large-Scale Protein Interactions Prediction by Multiple Evidence Analysis Associated With an In-Silico Curation Strategy. Frontiers in Bioinformatics. 2021:38.\r\nhttps://www.frontiersin.org/articles/10.3389/fbinf.2021.731345/full\r\n\r\n## Bug Report\r\nPlease, use the [Issues](https://github.com/YasCoMa/PredPrIn/issues) tab to report any bug.","organization":"yPublish - Bioinfo tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/616?version=1","name":"master @ 10eb8a4","author":[],"descriptor_type":[]}]},{"id":"617","url":"https://workflowhub.eu/workflows/617","name":"PPIVPro - PPI Validation Process","description":"## Summary\r\n\r\nThe validation process proposed has two pipelines for filtering PPIs predicted by some _IN SILICO_  detection method, both pipelines can be executed separately. The first pipeline (i) filter according to association rules of cellular locations extracted from HINT database. The second pipeline (ii) filter according to scientific papers where both proteins in the PPIs appear in interaction context in the sentences.\r\n\r\nThe pipeline (i) starts extracting cellular component annotations from HINT PPIs building a dataset and then the Apriori algorithm is applied in this dataset in an iterative process that repeat the application of this algorithm till the rules cover 15 main locations in the cell. This process generate a database with association rules with two main columns: antecedent and consequent, meaning that a location that occurs in antecedent also occurs with the location in consequent. The filtering task evaluate the PPI checking if some location annotated for the first protein is in the antecedent column and if some location of the second protein is also in the same rule but in the consequent column. If so, the PPI passes according to the criteria.\r\n\r\nThe pipeline (ii) starts getting all papers that mention both proteins in the PPIs and extrating their content using the NCBI [API](https://www.ncbi.nlm.nih.gov/home/develop/api/). These XML files are cleaned removing hypertext markup and references to figures, tables and supplementary materials. The paragraphs of the remaining articles content are processed by Natural language processing steps to extract sentences, tokens, stopwords removal to remove words extremely common in english language and do not help to identify the context of interest, prioritizing tokens using part-of-speech tagging to keep just nouns and verbs. Then the sentences filtered goes to the task that identifies the proteins of the PPI in evaluation among the tokens and also tries to identify tokens or set of tokens that mention experimental methods. The sentences that have the proteins of interest are filtered if the nouns and verbs have some of the items of the list of words indicating interaction relation (recruit, bind, interact, signaling, etc). Finally, a report is made by pair with the article identifiers, the sentences, the proteins and interacting words found.\r\n\r\nThe figure below illustrates all the tasks of these pipelines.\r\n\r\n\u003cdiv style=\"text-align: center\"\u003e\r\n\t\u003cimg src=\"pipeline.png\" alt=\"pipeline\"\r\n\ttitle=\"PPI validation process\" width=\"600px\" /\u003e\r\n\u003c/div\u003e\r\n\r\n## Requirements:\r\n* Python packages needed:\r\n\t- pip3 install pandas\r\n\t- pip3 install rdflib\r\n\t- pip3 install mlxtend\r\n\t- pip3 install inflect\r\n\t- pip3 install nltk\r\n\t- pip3 install biopython\r\n\t- pip3 install lxml\r\n\t- pip3 install bs4 (beautiful soup)\r\n\r\n## Usage Instructions\r\n### Preparation:\r\n1. ````git clone https://github.com/YasCoMa/ppi_validation_process.git````\r\n2. `pip3 install -r requirements.txt`\r\n3. ````cd ppi_validation_process/pipe_location_assocRules/````\r\n4. ````unzip pygosemsim.zip````\r\n5. ````cd ../````\r\n\r\n### Filtering by association rules of cellular locations (first filtering part) - File ````pipe_location_assocRules/find_pattern.py```` :\r\n* Pipeline parameters:\r\n\t- __-fo__ or __--folder__ \u003cbr\u003e\r\n\t\tFolder to store the files (use the folder where the other required file can be found)\r\n\t- __-if__ or __--interactome_file__ \u003cbr\u003e\r\n\t\tFile with the pairs (two columns with uniprot identifiers in tsv format)\u003cbr\u003e\r\n\r\n\t\tExample of this file: pipe_location_assocRules/running_example/all_pairs.tsv\r\n\r\n\r\n* Running modes examples:\r\n\t1. Go to the first filtering part folder: \u003cbr\u003e\r\n\t````cd pipe_location_assocRules/````\r\n\r\n\t2. Uncompress annotation_data.zip\r\n\t\r\n\t3. Run: \u003cbr\u003e\r\n\t````python3 find_pattern.py -fo running_example/ -if all_pairs.tsv````\r\n\r\n\r\n### Filtering by text mining on scientific papers (second filtering part) - File ````ppi_pubminer/pubmed_pmc_literature_pipeline.py````:\r\n\r\n* Pipeline parameters:\r\n\t- __-em__ or __--execution_mode__ \u003cbr\u003e\r\n\t\tUse to indicate the execution mode desired: \u003cbr\u003e\r\n\t\t1 - Mode using a list of protein pairs as bait \u003cbr\u003e\r\n\t\t2 - Mode that tries to find sentences of PPI context for any protein pairs given a list of articles\r\n\t\r\n\t- __-fo__ or __--folder__ \u003cbr\u003e\r\n\t\tFolder to store the files (use the folder where the other required file can be found)\r\n\r\n\t- __-rtm1__ or __--running_type_mode_1__ \u003cbr\u003e\r\n\t\tUse to indicate which execution step you want to run for mode 1 (it is desirable following the order showed): \u003cbr\u003e\r\n\t\t0 (default) - Run all steps \u003cbr\u003e\r\n\t\t1 - Run step 1 (Get mentions of both proteins in PMC articles) \u003cbr\u003e\r\n\t\t2 - Run step 2 (Get the PMC or Pubmed files, clean and store them) \u003cbr\u003e\r\n\t\t3 - Run step 3 (Get the exact sentences where the proteins were found on interacting context)\r\n\r\n\t- __-rtm2__ or __--running_type_mode_2__ \u003cbr\u003e\r\n\t\tUse to indicate which execution step you want to run for mode 2 (it is desirable following the order showed): \u003cbr\u003e\r\n\t\t0 (default) - Run all steps \u003cbr\u003e\r\n\t\t1 - Run step 1 (Get the PMC or Pubmed files from the given list, clean and store them) \u003cbr\u003e\r\n\t\t2 - Run step 2 (Get the exact sentences where the proteins were found on an interacting context)\r\n\r\n\t- __-fp__ or __--file_pairs__ \u003cbr\u003e\r\n\t\t(For mode 1) File with the pairs (two columns with uniprot identifiers in tsv format)\u003cbr\u003e\r\n\t\t\r\n\t\tExample of this file: ppipubminer/running_example/mode_1/all_pairs.tsv\r\n\r\n\t- __-fe__ or __--file_evaluation__ \u003cbr\u003e\r\n\t\t(For mode 1) File exported after step 1 execution in tsv format\u003cbr\u003e\r\n\r\n\t- __-fa__ or __--file_articles__ \u003cbr\u003e\r\n\t\t(For mode 2) File with the articles (First column indicating if it is from pmc or pubmed and the second one is the article id) in tsv format)\u003cbr\u003e\r\n\t\t\r\n\t\tExample of this file: ppipubminer/running_example/mode_2/articles_info.tsv\r\n\r\n* Running modes examples:\r\n\t- Go to the second filtering part folder: \u003cbr\u003e\r\n\t````cd ppipubminer/````\r\n\r\n\t- Mode 1 - From protein pairs (PPIs) to sentences in articles\r\n\t\t1. Running all three steps of mode 1: \u003cbr\u003e\r\n\t\t````python3 pubmed_pmc_literature_pipeline.py -em 1 -rtm1 0 -fo running_example/mode_1/ -fp all_pairs.tsv````\r\n\r\n\t\t2. Running only step 1 of mode 1: \u003cbr\u003e\r\n\t\t````python3 pubmed_pmc_literature_pipeline.py -em 1 -rtm1 1 -fo running_example/mode_1/ -fp all_pairs.tsv````\r\n\r\n\t\t3. Running only step 2 of mode 1: \u003cbr\u003e\r\n\t\t````python3 pubmed_pmc_literature_pipeline.py -em 1 -rtm1 2 -fo running_example/mode_1/ -fp all_pairs.tsv -fe literature_evaluation_pairs.tsv````\r\n\r\n\t\t4. Running only step 3 of mode 1: \u003cbr\u003e\r\n\t\t````python3 pubmed_pmc_literature_pipeline.py -em 1 -rtm1 3 -fo running_example/mode_1/ -fp all_pairs.tsv -fe literature_evaluation_pairs.tsv````\r\n\r\n\t- Mode 2 - From articles to report of sentences with any protein pairs (PPIs)\r\n\t\t1. Running all three steps of mode 2: \u003cbr\u003e\r\n\t\t````python3 pubmed_pmc_literature_pipeline.py -em 2 -rtm1 0 -fo running_example/mode_2/ -fa articles_info.tsv````\r\n\r\n\t\t2. Running only step 1 of mode 2: \u003cbr\u003e\r\n\t\t````python3 pubmed_pmc_literature_pipeline.py -em 2 -rtm1 1 -fo running_example/mode_2/ -fa articles_info.tsv````\r\n\r\n\t\t3. Running only step 2 of mode 2: \u003cbr\u003e\r\n\t\t````python3 pubmed_pmc_literature_pipeline.py -em 2 -rtm1 2 -fo running_example/mode_2/ -fa articles_info.tsv ````\r\n\r\n## Reference\r\nMartins YC, Ziviani A, Nicolás MF, de Vasconcelos AT. Large-Scale Protein Interactions Prediction by Multiple Evidence Analysis Associated With an In-Silico Curation Strategy. Frontiers in Bioinformatics. 2021:38.\r\nhttps://www.frontiersin.org/articles/10.3389/fbinf.2021.731345/full\r\n\r\n## Bug Report\r\nPlease, use the [Issues](https://github.com/YasCoMa/ppi_validation_process/issues) tab to report any bug.","organization":"yPublish - Bioinfo tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/617?version=1","name":"master @ 34c158b","author":[],"descriptor_type":[]}]},{"id":"618","url":"https://workflowhub.eu/workflows/618","name":"PPIntegrator - PPI Triplification Process","description":"## Summary\r\n\r\nThis pipeline has as major goal provide a tool for protein interactions (PPI) prediction data formalization and standardization using the [OntoPPI](https://link.springer.com/chapter/10.1007/978-3-030-36599-8_23) ontology. This pipeline is splitted in two parts: (i) a part to prepare data from three main sources of PPI data ([HINT](http://hint.yulab.org/), [STRING](https://string-db.org/) and [PredPrin](https://github.com/YasCoMa/PredPrin.git)) and create the standard files to be processed by the next part; (ii) the second part uses the data prepared before to semantically describe using ontologies related to the concepts of this domain. It describes the provenance information of PPI prediction experiments, datasets characteristics, functional annotations of proteins involved in the PPIs, description of the PPI detection methods (also named as evidence) used in the experiment,  and the prediction score obtained by each PPI detection method for the PPIs. This pipeline also execute data fusion to map the same protein pairs from different data sources and, finally, it creates a database of all these information in the [alegro](https://allegrograph.com/) graph triplestore.\r\n\r\n## Requirements:\r\n* Python packages needed:\r\n\t- pip3 install numpy\r\n\t- pip3 install rdflib\r\n\t- pip3 install uuid\r\n\t- pip3 install SPARQLWrapper\r\n\t- alegro graph tools (pip3 install agraph-python) \u003cbr \u003e \r\n\t\tGo to this [site](https://franz.com/agraph/support/documentation/current/python/install.html) for the installation tutorial\r\n\r\n## Usage Instructions\r\n### Preparation:\r\n1. ````git clone https://github.com/YasCoMa/ppintegrator.git````\r\n2. ````cd ppintegrator````\r\n3. `pip3 install -r requirements.txt`\r\n**Allegrograph is a triple store, which is a database to maintain semantic descriptions. This database's server provides a web application with a user interface to run, edit and manage queries, visualize results and manipulate the data without writing codes other than SPARQL query language. The use of the Allegregraph option is not mandatory, but if you want to export and use it, you have to install the server and the client.**\r\n4. if you want to use the Allegrograph server option (this triple store has free license up to 5,000,000 triples), install allegrograph server in your machine (configure a user and password): Server - https://franz.com/agraph/support/documentation/current/server-installation.html; Client - https://franz.com/agraph/support/documentation/current/python/install.html\r\n5. Export the following environment variables to configure Allegrograph server\r\n\r\n````\r\nexport AGRAPH_HOST=127.0.0.1\r\nexport AGRAPH_PORT=10035\r\nexport AGRAPH_USER=chosen_user\r\nexport AGRAPH_PASSWORD=chosen_password\r\n````\r\n5. Start allegrograph: ````path/to/allegrograph/bin/agraph-control --config path/to/allegrograph/lib/agraph.cfg start````\r\n6. Read the file data_requirements.txt to understand which files are needed for the process\r\n\r\n### Data preparation (first part) - File ````prepare_data_triplification.py```` :\r\n* Pipeline parameters:\r\n\t- __-rt__ or __--running_type__ \u003cbr\u003e\r\n\t\tUse to indicate from which source you want to prepare PPI data, as follows: \u003cbr\u003e\r\n\t\t1 - Prepare data for PredPrin \u003cbr\u003e\r\n\t\t2 - Prepare data for String \u003cbr\u003e\r\n\t\t3 - Prepare data for HINT\r\n\t- __-fec__ or __--file_experiment_config__ \u003cbr\u003e\r\n\t\tFile with the experiment configuration in json format\u003cbr\u003e\r\n\t\t\r\n\t\tExamples are in these files (all the metadata are required): params_hint.json, params_predrep_5k.json e params_string.json\r\n\r\n\t- __-org__ or __--organism__ \u003cbr\u003e\r\n\t\tPrepare data only for one organism of interest (example: homo_sapiens) \u003cbr \u003e\r\n\r\n\t\tThis parameter is optional. If you do not specify, it will automatically use the organisms described in the experiment configuration file above\r\n\r\n\r\n* Running modes examples:\r\n\t1. Running for PPI data generated by PredPrin: \u003cbr\u003e\r\n\t````python3 prepare_data_triplification.py -rt 1 -fec params_predrep_5k.json````\r\n\r\n\t2. Running for HINT database: \u003cbr\u003e\r\n\t````python3 prepare_data_triplification.py -rt 3 -fec params_hint.json````\r\n\r\n\t3. Running for STRING database: \u003cbr\u003e\r\n\t````python3 prepare_data_triplification.py -rt 2 -fec params_string.json````\r\n\r\n\tIn the file ````auxiliar_data_preparation.py```` you can run it for all the examples provided automatically, as follows: \u003cbr\u003e\r\n\t````python3 auxiliar_data_preparation.py````\r\n\r\n\r\n### PPI data triplification (second part) - File ````triplification_ppi_data.py````:\r\n\r\n* Pipeline parameters:\r\n\t- __-rt__ or __--running_type__ \u003cbr\u003e\r\n\t\tUse to indicate which execution step you want to run (it is desirable following the order showed): \u003cbr\u003e\r\n\t\t0 - Generate the descriptions for all the protein interaction steps of an experiment  (run steps 1, 2 and 3) \u003cbr \u003e\r\n\t\t1 - Generate triples just about data provenance \u003cbr \u003e\r\n\t\t2 - Generate triples just for protein functional annotations\u003cbr \u003e\r\n\t\t3 - Generate triples just for the score results of each evidence\u003cbr \u003e\r\n\t\t4 - Execute data fusion\u003cbr \u003e\r\n\t\t5 - Generate descriptions and execute data fusion (run steps 1, 2, 3 and 4)\u003cbr \u003e\r\n\t\t6 - Export to allegrograph server\r\n\r\n\t- __-fec__ or __--file_experiment_config__ \u003cbr\u003e\r\n\t\tFile with the experiment configuration in json format\u003cbr\u003e\r\n\t\t\r\n\t\tExamples are in these files (all the metadata are required): params_hint.json, params_predrep_5k.json e params_string.json\r\n\r\n\t- __-fev__ or __--file_evidence_info__ \u003cbr\u003e\r\n\t\tFile with the PPI detection methods information in json format\u003cbr\u003e\r\n\t\t\r\n\t\tExamples are in these files (all the metadata are required): evidences_information.json, evidences_information_hint.json e evidences_information_string.json\r\n\r\n\t- __-fcv__ or __--file_config_evidence__ \u003cbr\u003e\r\n\t\tFile with the experiment and evidence methods files addresses in tsv format\u003cbr\u003e\r\n\t\t\r\n\t\tExample of this file: config_evidence_file.tsv\r\n\r\n* Running modes examples:\r\n\t1. Running to generate all semantic descriptions for PredPrin: \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 0 -fec params_predrep_5k.json -fev evidences_information.json````\r\n\r\n\t2. Running to generate only triples of data provenance: \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 1 -fec params_hint.json -fev evidences_information_hint.json````\r\n\r\n\t3. Running to generate only triples of PPI scores for each evidence: \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 3 -fec params_hint.json -fev evidences_information_hint.json````\r\n\r\n\t4. Running to generate only triples of protein functional annotations (only PredPrin exports these annotations): \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 2 -fec params_predrep_5k.json -fev evidences_information.json````\r\n\r\n\t5. Running to generate all semantic descrptions for STRING: \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 0 -fec params_string.json -fev evidences_information_string.json````\r\n    \r\n    **For the next options (4, 5 and 6), it is mandatory running at least mode 1 and 3 for HINT, STRING and PredPrin**\r\n    \r\n\t6. Running to execute data fusion of different sources: \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 4 -fcv config_evidence_file.tsv````\r\n\r\n\t7. Running to generate all semantic descriptions and execute data fusion of different sources (combines mode 0 and 4): \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 5 -fcv config_evidence_file.tsv````\r\n\r\n\t8.  Export semantic data to allegrograph server: \u003cbr\u003e\r\n\t````python3 triplification_ppi_data.py -rt 6 -fcv config_evidence_file.tsv````\r\n\r\n## Query Scenarios for analysis\r\nSupposing you ran all the steps showed in the section above, you can run the following options to analyse the data stored alegro graph triple store. \u003cbr\u003e\r\nFile to use for this section: ````query_analysis_ppitriplificator.py```` \u003cbr\u003e\r\n\r\n* Parameter:\r\n\t- __-q__ or __--query_option__ \u003cbr\u003e\r\n\t\tUse to indicate which query you want to perform: \u003cbr\u003e\r\n\t\t1 - Get all the different organisms whose interactions are stored in the database\u003cbr \u003e\r\n\t\t2 - Get the interactions that have scientific papers associated and the list of these papers\u003cbr \u003e\r\n\t\t3 - Get a list of the most frequent biological processes annotated for the interactions of Escherichia coli bacteria\u003cbr \u003e\r\n\t\t4 - Get only the interactions belonging to a specific biological process (regulation of transcription, DNA-templated) in Escherichia coli bacteria\u003cbr \u003e\r\n\t\t5 - Get the scores of interactions belonging to a specific biological process (regulation of transcription, DNA-templated) in Escherichia coli bacteria\u003cbr \u003e\r\n\t\t6 - Get a list of the most frequent biological processes annotated for the interactions of human organism\u003cbr \u003e\r\n\t\t7 - Get only the interactions belonging to a specific biological process (positive regulation of transcription by RNA polymerase II) in human organism\u003cbr \u003e\r\n\t\t8 - Get the scores of interactions belonging to a specific biological process (positive regulation of transcription by RNA polymerase II) in human organism\r\n\r\n* Running modes examples:\r\n\t1. Running queries: \u003cbr\u003e\r\n\t````python3 query_analysis_ppitriplificator.py -q 1 ```` \u003cbr\u003e\r\n\t\tChange number 1 to the respective number of the query you want to perform\r\n\r\n## Reference\r\nMartins, Y. C., Ziviani, A., Cerqueira e Costa, M. D. O., Cavalcanti, M. C. R., Nicolás, M. F., \u0026 de Vasconcelos, A. T. R. (2023). PPIntegrator: semantic integrative system for protein–protein interaction and application for host–pathogen datasets. Bioinformatics Advances, 3(1), vbad067.\r\n\r\n## Bug Report\r\nPlease, use the [Issues](https://github.com/YasCoMa/ppintegrator/issues) tab to report any bug.","organization":"yPublish - Bioinfo tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/618?version=1","name":"master @ 6d3008c","author":[],"descriptor_type":[]}]},{"id":"619","url":"https://workflowhub.eu/workflows/619","name":"PipePatExp - Pipeline to aggregate gene expression correlation information for PPI","description":"## Summary\r\n\r\nThe PPI information aggregation pipeline starts getting all the datasets in [GEO](https://www.ncbi.nlm.nih.gov/geo/) database whose material was generated using expression profiling by high throughput sequencing. From each database identifiers, it extracts the supplementary files that had the counts table. Once finishing the download step, it identifies those that were normalized or had the raw counts to normalize.  It also identify and map the gene ids to uniprot (the ids found usually were from HGNC and Ensembl). For each normalized counts table belonging to some experiment, il filters those which have the proteins (already mapped from HGNC to Uniprot identifiers) in the pairs in evaluation. Then, it calculates the correlation matrix based on Pearson method in the tables and saves the respective pairs correlation value for each table. Finally, a repor is made for each pair in descending order of correlation value with the experiment identifiers.\r\n\r\n## Requirements:\r\n* Python packages needed:\r\n\t- os\r\n\t- scipy\r\n\t- pandas\r\n\t- sklearn\r\n\t- Bio python\r\n\t- numpy\r\n\r\n## Usage Instructions\r\n* Preparation:\r\n\t1. ````git clone https://github.com/YasCoMa/PipeAggregationInfo.git````\r\n\t2. ````cd PipeAggregationInfo````\r\n\t3. ````pip3 install -r requirements.txt````\r\n\r\n### Preprocessing pipeline\r\n* Go to the ncbi [GDS database webpage](https://www.ncbi.nlm.nih.gov/gds), use the key words to filter your gds datasets of interest and save the results as file (\"Send to\" option), and choose \"Summary (text)\"\r\n* Alternatively, we already saved the results concerning protein interactions, you may use them to run preprocessing in order to obtain the necessary files for the main pipeline\r\n* Running preprocessing:\r\n    - ````cd preprocessing````\r\n    - ````python3 data_preprocessing.py ./workdir_preprocessing filter_files````\r\n    - ````cd ../````\r\n    - Copy the generated output folder \"data_matrices_count\" into the workflow folder: ````cp -R preprocessing/workdir_preprocessing/data_matrices_count .````\r\n\r\n### Main pipeline\r\n\r\n* Pipeline parameters:\r\n\t- __-rt__ or __--running_type__ \u003cbr\u003e\r\n\t\tUse to indicate the step you want to execute (it is desirable following the order): \u003cbr\u003e\r\n\t\t1 - Make the process of finding the experiments and ranking them by correlation \u003cbr\u003e\r\n\t\t2 - Select pairs that were already processed and ranked making a separated folder of interest\r\n\r\n\t- __-fo__ or __--folder__ \u003cbr\u003e\r\n\t\tFolder to store the files (use the folder where the other required file can be found)\r\n\t\r\n\t- __-if__ or __--interactome_file__ \u003cbr\u003e\r\n\t\tFile with the pairs (two columns with uniprot identifiers in tsv format)\u003cbr\u003e\r\n\t\t\r\n\t\tExample of this file: running_example/all_pairs.tsv\r\n\r\n\t- __-spf__ or __--selected_pairs_file__ \u003cbr\u003e\r\n\t\tFile with PPIs of interest (two columns with uniprot identifiers in tsv format)\u003cbr\u003e\r\n\t\t\r\n\t\tExample of this file: running_example/selected_pairs.tsv\r\n\r\n* Running modes examples:\r\n\t1. Run step 1: \u003cbr\u003e\r\n\t````python3 pipeline_expression_pattern.py -rt 1 -fo running_example/ -if all_pairs.tsv ````\r\n\r\n\t2. Run step 2: \u003cbr\u003e\r\n\t````python3 pipeline_expression_pattern.py -rt 2 -fo running_example/ -spf selected_pairs.tsv ````\r\n\r\n## Bug Report\r\nPlease, use the [Issue](https://github.com/YasCoMa/PipeAggregationInfo/issues) tab to report any bug.","organization":"yPublish - Bioinfo tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/619?version=1","name":"main @ beb490b","author":[],"descriptor_type":[]}]},{"id":"620","url":"https://workflowhub.eu/workflows/620","name":"DReCaS - Pipeline for drug ranking based on computed pathway scores of disease and healthy samples","description":"## Summary\r\n\r\nThis pipeline contains the following functions: \r\n(1) Data processing to handle the tansformations needed to obtain the original pathway scores of the samples according to single sample analysis GSEA\r\n(2) Model training based on the disease and healthy sample pathway scores, to classify them\r\n(3) Scoring matrix weights optimization according to a gold standard list of drugs (those that went on clinical trials or are approved for the disease).It tests the weights in a range of 0 to 30 (you may change as you want). The evaluation function tests and try to maximize the number of approved drugs whose modified pathway scores for disease samples is changed from disease to healthy sample classification, according to the trained model.\r\n(4) Computation of the calibrated disease samples pathwa scores according to the interaction among drug and targets found in the sample pathways \u0026 Drug ranking based on the disease samples whose calibrated matrix were responsible to change the trained model decision from disease to healthy state.\r\n(5) Drug combination ranking evaluated the same way as in option (4) but adding the effects of multiple drugs in each sample while calculating the calibrated scoring matrix\r\n            \r\n## Input configuration file:\r\n* The pipeline only needs a configuration file and the step number you want to run.\r\n- Configuration file keys (see also the example in config.json):\r\n    - **identifier**: project identifier to be used in the result files\r\n    - **type_normalization**: normalization type (possible values: tpm, fpkm, tmm, cpm or fpkm_uq)\r\n    - **genome_assembly**: the supported assemblies are the 37 and 38 (values may be: g37 or g38)\r\n    - **pathway_geneset**: pathway-based gene sets, choose one identifier from the list in [genesets_available.txt](https://github.com/YasCoMa/caliscoma_pipeline/blob/master/genesets_available.txt)\r\n    - **folder**: working directory\r\n    - **expression_file**: compressed gene expression file for the desired icgc project, it must be separated by tabulation. The following columns are mandatory: submitted_file_id (sample names), raw_read_count (the read counts without normalization) and gene_id (genes in ensembl or hgnc symbol). File expected to be in {folder}.\r\n    - **labels_file** (optional for function 1): file with two columns, one named 'sample' corresponding to the unique values of submitted_sample_id; the second named 'label' corresponding to a disease (or confirmed tumour) (1) or a healthy (0) case. File expected to be in {folder}.\r\n    - **trained_model** (optional for function 1): file with the trained model to separate healthy and disease cases. Full path is expected.\r\n    - **means_table_file** (optional for function 1): file with the means table calculated when the model is trained by the function 3. Full path is expected.\r\n    - **samples_pathway_scores** (optional for function 1): file with the original model calculated pathway scores by function 1, in order to check the number of features expected by the original model. Full path is expected.\r\n    - **optimized_weights_file**: tab separated table file with two columns representing the weights (w1, w2, w3) and their respective values.\r\n    - **drug_list_file** (only mandatory for function 3): file with the gold standard drug list (one drugbank id per line), this file is expected to be in the in the experiment item folder results ({folder}/{identifier})\r\n    - **drug_combination_file** (only mandatory for function 5): file with the drug combination candidates list (drugbank ids concatenated with comma in each line). Full path is expected.\r\n\r\n- Observation:    \r\n    * The \"labels_file\" parameter is mandatory for the weights optimization, scoring matrix calculation, model traning and drug (or drug combination) ranking \r\n    * In case of transfer learning, \"labels_file\" may be ignored only if both \"trained_model\", \"means_table_file\" and \"samples_pathway_scores\" are present. This is only possible for the functions 2, 4 and 5. For weights optimization, only labels file is accepted.\r\n    * If type_normalization and/or genome_assembly are missing or empty, it will switch to the default fpkm_uq\r\n    * If pathway_geneset is missing or empty, it will switch to the default KEGG_2021_HUMAN\r\n    * If optimized_weights_file is missing or empty, it will switch to the default values (w1: 20, w2: 5, w3: 10)\r\n    \r\n## Usage Instructions\r\n### Preparation:\r\n1. ````git clone https://github.com/YasCoMa/caliscoma_pipeline.git````\r\n2. ````cd caliscoma_pipeline````\r\n3. Create conda environment to handle dependencies: ````conda env create -f drugresponse_env.yml````\r\n4. ````conda activate drugresponse_env````\r\n5. Setup an environment variable named \"path_workflow\" with the full path to this workflow folder\r\n\r\n### Getting data for the running example in the LICA-FR and LIRI-JP projects from ICGC\r\n1. Download the [expression file for LICA-FR](https://dcc.icgc.org/api/v1/download?fn=/current/Projects/LICA-FR/exp_seq.LICA-FR.tsv.gz) and put it in data_icgc folder\r\n2. Download the [expression file for LIRI-JP](https://dcc.icgc.org/api/v1/download?fn=/current/Projects/LIRI-JP/exp_seq.LIRI-JP.tsv.gz) and put it in data_icgc folder\r\n3. For the liri-jp project, the labels file is already processed, to given an example of a project that run all steps proposed by this workflow\r\n\r\n### Run analysis\r\n- Run all steps: ````python3 main.py -rt 0 -cf config.json````\r\n- Run all steps: ````python3 main.py -rt 0 -cf config_transfer_options.json````\r\n\r\n- Run only data processing: ````python3 main.py -rt 1 -cf config.json````\r\n- Run only data processing: ````python3 main.py -rt 1 -cf config_transfer_options.json````\r\n\r\n- Run only model training \u0026 modified pathway score matrix: ````python3 main.py -rt 2 -cf config.json````\r\n- Run only model training \u0026 modified pathway score matrix: ````python3 main.py -rt 2 -cf config_transfer_options.json````\r\n\r\n- Run only weights optimization: ````python3 main.py -rt 3 -cf config.json````\r\n\r\n- Run only drug ranking: ````python3 main.py -rt 4 -cf config.json````\r\n- Run only drug ranking: ````python3 main.py -rt 4 -cf config_transfer_options.json````\r\n\r\n- Run only drug combination evaluation: ````python3 main.py -rt 5 -cf config.json````\r\n- Run only drug combination evaluation: ````python3 main.py -rt 5 -cf config_transfer_options.json````\r\n\r\n## Reference\r\nMartins, Y. C. (2023). Multi-task analysis of gene expression data on cancer public datasets. medRxiv, 2023-09.\r\n\r\n## Bug Report\r\nPlease, use the [Issues](https://github.com/YasCoMa/caliscoma_pipeline/issues) tab to report any bug.","organization":"yPublish - Bioinfo tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/620?version=1","name":"master @ 775adb8","author":[],"descriptor_type":[]}]},{"id":"621","url":"https://workflowhub.eu/workflows/621","name":"ScreenDOP - Screening of strategies for disease outcome prediction","description":"## Summary\r\n\r\nThe data preparation pipeline contains tasks for two distinct scenarios: [leukaemia](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE425) that contains microarray data for 119 patients and [ovarian](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE140082) cancer that contains next generation sequencing data for 380 patients.\r\n\r\nThe disease outcome prediction pipeline offers two strategies for this task:\r\n\r\n**Graph kernel method**: It starts generating personalized networks for each patient using the interactome file provided and generate the patient network checking if each PPI of the interactome has both proteins up regulated or down regulated according to the gene expression table provided. The first step generate a set of graphs for the patients that are evaluated with 4 distinct kernels for graph classification, which are: Linear kernel between edge histograms, Linear kernel between vertex histograms and the Weisfeiler lehman. These kernels functions calculate a similarity matrix for the graphs and then this matrix is used by the support vector machine classifier. Then the predictions are delivered to the last task that exports a report with the accuracy reached by each kernel. It allows some customizations about the network parameters to be used, such as the DEG cutoff to determine up and down regulated based on the log2 fold change, which will determine the topology and the labels distribution in the specific sample graphs. It is also possible customize the type of node/edge attributes passed to the kernel function, which may be only label, only weight or both.\r\n\r\n**GSEA based pathway scores method**: This method is faster and do not rely on tensor inputs such as the previous method. It uses geneset enrichment analysis on the pathways from KEGG 2021 of Human, and uses the scores of the pathways found enriched for the samples to build the numerical features matrix, that is then delivered to the AdaBoost classifier. The user may choose balance the dataset using oversampling strategy provided by SMOTE.\r\n\r\n## Usage Instructions\r\n### Preparation:\r\n1. ````git clone https://github.com/YasCoMa/screendop.git````\r\n2. ````cd screendop````\r\n3. Decompress screening_ovarian/raw_expression_table.tsv.tar.xz\r\n4. Create conda environment to handle dependencies: ````conda env create -f drugresponse_env.yml````\r\n5. ````conda activate drugresponse_env````\r\n6. Setup an environment variable named \"path_workflow_screendop\" with the full path to this workflow folder\r\n\r\n### Data preparation - File ````data_preparation_for_pipeline.py```` :\r\n\r\n#### Files decompression\r\n\r\n- Decompress data_preparation/lekaemia.tar.xz\r\n- Decompress data_preparation/ovarian/GSE140082_data.tar.xz\r\n    - Put the decompressed file GSE140082_series_matrix.txt in data_preparation/ovarian/\r\n    \r\n#### Pipeline parameters\r\n\r\n- __-rt__ or __--running_type__ \u003cbr\u003e\r\n\tUse to prepare data for the desired scenario: \u003cbr\u003e\r\n\t1 - Run with Leukaemia data \u003cbr\u003e\r\n\t2 - Run with Ovarian cancer data\r\n\r\n#### Running modes examples\r\n\r\n1. Run for Leukaemia data: \u003cbr\u003e\r\n````python3 data_preparation_for_pipeline.py -rt 1 ```` \r\n\r\nIn this case, you must have [R](https://www.r-project.org/) installed and also the library [limma](https://bioconductor.org/packages/release/bioc/html/limma.html), it is used to determine DEGs from microarray data. For this dataset, the files are already prepared in the folder.\r\n\r\n2. Run for Ovarian cancer data: \u003cbr\u003e\r\n````python3 data_preparation_for_pipeline.py -rt 2 ```` \r\n\r\nIn this case, you must have [R](https://www.r-project.org/) installed and also the library [DESeq](https://bioconductor.org/packages/release/bioc/html/DESeq.html), because this scenario treats next generation sequencing data\r\n\r\n### Disease outcome prediction execution - File ````main.py````:\r\n\r\n#### Pipeline parameters\r\n\r\n- __-rt__ or __--running_step__ \u003cbr\u003e\r\n\tUse to prepare data for the desired scenario: \u003cbr\u003e\r\n\t1 - Run graph kernel method \u003cbr\u003e\r\n\t2 - Run gsea based pathway scores method\r\n\r\n- __-cf__ or __--configuration_file__ \u003cbr\u003e\r\n\tFile with the expression values for the genes by sample/patient in tsv format\u003cbr\u003e\r\n\t\r\n\tExample of this file: config.json\r\n\t\t\r\n#### Input configuration file\r\n\r\n- Configuration file keys (see also the example in config.json):\r\n    - **folder** (mandatory for both methods): working directory\r\n    - **identifier**: project identifier to be used in the result files\r\n    - **mask_expression_table** (mandatory for both methods): Gene expression values file with the result of the fold change normalized value of a certain gene for each sample, already pruned by the significance (p-value). \r\n    - **raw_expression_table** (mandatory for both methods): Raw gene expression values already normalized following the method pf preference of the user.\r\n    - **labels_file** (mandatory for both methods): File with the prognosis label for each sample\r\n    - **deg_cutoff_up**: Cutoff value to determine up regulated gene. Default value is 1.\r\n    - **deg_cutoff_down**: Cutoff value to determine down regulated gene. Default value is -1.\r\n    - **nodes_enrichment**: Node attributes to be used in the screening evaluation. It may be a list combining the options \"label\", \"weight\" or \"all\". Examples: [\"all\", \"weight\"], [\"label\"], [\"label\", \"weight\"]. Default value is [\"all\"].\r\n    - **edges_enrichment**: Edge attributes to be used in the screening evaluation. It may be a list combining the options \"label\", \"weight\" or \"all\". Examples: [\"all\", \"weight\"], [\"label\"], [\"label\", \"weight\"]. Default value is [\"all\"].\r\n    - **flag_balance**: Flag to indicate whether the user wants to balance the samples in each outcome class, by SMOTE oversampling. Values may be false or true. Default value is false.\r\n\r\n#### Running modes examples\r\n1. Running disease outcome prediction by graph kernel method: \u003cbr\u003e\r\n\t````python3 main.py -rt 1 -cf config.json````\r\n\r\n2. Running disease outcome prediction by gsea enriched network method: \u003cbr\u003e\r\n\t````python3 main.py -rt 2 -cf config.json````\r\n\r\n## Reference\r\nMartins, Y. C. (2023). Multi-task analysis of gene expression data on cancer public datasets. medRxiv, 2023-09.\r\n\r\n## Bug Report\r\nPlease, use the [Issue](https://github.com/YasCoMa/screendop/issues) tab to report any bug.","organization":"yPublish - Bioinfo tools","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/621?version=1","name":"master @ b8cc280","author":[],"descriptor_type":[]}]},{"id":"622","url":"https://workflowhub.eu/workflows/622","name":"sra-manifest-to-concatenated-fastqs/main","description":"This workflow takes as input a SRA_manifest from SRA Run Selector and will generate one fastq file or fastq pair of file for each experiment (concatenated multiple runs if necessary). Output will be relabelled to match the column specified by the user.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/622?version=1","name":"v0.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/622?version=2","name":"v0.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/622?version=3","name":"v0.2.1","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/622?version=4","name":"v0.2.2","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/622?version=5","name":"v0.2.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/622?version=6","name":"v0.2.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/622?version=7","name":"v0.3","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/622?version=8","name":"v0.4","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/622?version=9","name":"v0.5","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/622?version=10","name":"v0.6","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/622?version=11","name":"v0.7","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/622?version=12","name":"v0.9","author":["Wolfgang Maier"],"descriptor_type":["GALAXY"]}]},{"id":"624","url":"https://workflowhub.eu/workflows/624","name":"Analyses of shotgun metagenomics data with MetaPhlAn2","description":"The aim of this workflow is to handle the routine part of shotgun metagenomics data processing on Galaxy Australia. \r\n\r\nThe workflow is using the tools MetaPhlAn2 for taxonomy classification and HUMAnN2 for functional profiling of the metagenomes. The workflow is based on the Galaxy Training tutorial 'Analyses of metagenomics data - The global picture' (Saskia Hiltemann, Bérénice Batut) https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/general-tutorial/tutorial.html#shotgun-metagenomics-data. \r\n\r\nThe how-to guide is available here: https://vmurigneu.github.io/shotgun_howto_ga_workflows/\r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/624?version=1","name":"Version 1","author":["Valentine Murigneux","Mike Thang","Saskia Hiltemann"],"descriptor_type":["GALAXY"]}]},{"id":"625","url":"https://workflowhub.eu/workflows/625","name":"Scaffolding-HiC-VGP8/main","description":"This workflow performs the scaffolding of a genome assembly using HiC data with YAHS. Can be used on any assembly with Hi-C data, and the assembly in the gfa format.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/625?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/625?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/625?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/625?version=4","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/625?version=5","name":"v0.2.1","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/625?version=6","name":"v0.2.2","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/625?version=7","name":"v0.2.3","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/625?version=8","name":"v0.2.4","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/625?version=9","name":"v0.2.5","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/625?version=10","name":"v0.2.6","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/625?version=11","name":"v0.2.7","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/625?version=12","name":"v0.2.8","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/625?version=13","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/625?version=14","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/625?version=15","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/625?version=16","name":"v1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/625?version=17","name":"v1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/625?version=18","name":"v1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/625?version=19","name":"v1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"20","url":"https://workflowhub.eu/workflows/625?version=20","name":"v1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"21","url":"https://workflowhub.eu/workflows/625?version=21","name":"v1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"22","url":"https://workflowhub.eu/workflows/625?version=22","name":"v1.8","author":[],"descriptor_type":["GALAXY"]},{"id":"23","url":"https://workflowhub.eu/workflows/625?version=23","name":"v2.0","author":[],"descriptor_type":["GALAXY"]},{"id":"24","url":"https://workflowhub.eu/workflows/625?version=24","name":"v2.1","author":[],"descriptor_type":["GALAXY"]},{"id":"25","url":"https://workflowhub.eu/workflows/625?version=25","name":"v2.2","author":[],"descriptor_type":["GALAXY"]},{"id":"26","url":"https://workflowhub.eu/workflows/625?version=26","name":"v3.0","author":[],"descriptor_type":["GALAXY"]},{"id":"27","url":"https://workflowhub.eu/workflows/625?version=27","name":"v3.1","author":[],"descriptor_type":["GALAXY"]},{"id":"28","url":"https://workflowhub.eu/workflows/625?version=28","name":"v3.2","author":[],"descriptor_type":["GALAXY"]},{"id":"29","url":"https://workflowhub.eu/workflows/625?version=29","name":"v3.3","author":[],"descriptor_type":["GALAXY"]},{"id":"30","url":"https://workflowhub.eu/workflows/625?version=30","name":"v3.4","author":[],"descriptor_type":["GALAXY"]},{"id":"31","url":"https://workflowhub.eu/workflows/625?version=31","name":"v3.5","author":[],"descriptor_type":["GALAXY"]}]},{"id":"626","url":"https://workflowhub.eu/workflows/626","name":"MMV_Im2Im","description":"# MMV Im2Im Transformation\r\n\r\n[![Build Status](https://github.com/MMV-Lab/mmv_im2im/workflows/Build%20Main/badge.svg)](https://github.com/MMV-Lab/mmv_im2im/actions)\r\n\r\nA generic python package for deep learning based image-to-image transformation in biomedical applications\r\n\r\nThe main branch will be further developed in order to be able to use the latest state of the art techniques and methods in the future. To reproduce the results of our manuscript, we refer to the branch [paper_version](https://github.com/MMV-Lab/mmv_im2im/tree/paper_version).\r\n(We are actively working on the documentation and tutorials. Submit a feature request if there is anything you need.)\r\n\r\n---\r\n\r\n## Overview\r\n\r\nThe overall package is designed with a generic image-to-image transformation framework, which could be directly used for semantic segmentation, instance segmentation, image restoration, image generation, labelfree prediction, staining transformation, etc.. The implementation takes advantage of the state-of-the-art ML engineering techniques for users to focus on researches without worrying about the engineering details. In our pre-print [arxiv link](https://arxiv.org/abs/2209.02498), we demonstrated the effectiveness of *MMV_Im2Im* in more than ten different biomedical problems/datasets. \r\n\r\n* For computational biomedical researchers (e.g., AI algorithm development or bioimage analysis workflow development), we hope this package could serve as the starting point for their specific problems, since the image-to-image \"boilerplates\" can be easily extended further development or adapted for users' specific problems.\r\n* For experimental biomedical researchers, we hope this work provides a comprehensive view of the image-to-image transformation concept through diversified examples and use cases, so that deep learning based image-to-image transformation could be integrated into the assay development process and permit new biomedical studies that can hardly be done only with traditional experimental methods\r\n\r\n\r\n## Installation\r\n\r\nBefore starting, we recommend to [create a new conda environment](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) or [a virtual environment](https://docs.python.org/3/library/venv.html) with Python 3.9+.\r\n\r\nPlease note that the proper setup of hardware is beyond the scope of this pacakge. This package was tested with GPU/CPU on Linux/Windows and CPU on MacOS. [Special note for MacOS users: Directly pip install in MacOS may need [additional setup of xcode](https://developer.apple.com/forums/thread/673827).]\r\n\r\n### Install MONAI\r\n\r\nTo reproduce our results, we need to install MONAI's code version of a specific commit. To do this:\r\n```\r\ngit clone https://github.com/Project-MONAI/MONAI.git\r\ncd ./MONAI\r\ngit checkout 37b58fcec48f3ec1f84d7cabe9c7ad08a93882c0\r\npip install .\r\n```\r\n\r\nWe will remove this step for the main branch in the future to ensure a simplified installation of our tool.\r\n\r\n### Install MMV_Im2Im for basic usage:\r\n\r\n(For users only using this package, not planning to change any code or make any extension):\r\n\r\n**Option 1: core functionality only** `pip install mmv_im2im`\u003cbr\u003e\r\n**Option 2: advanced functionality (core + logger)** `pip install mmv_im2im[advance]`\u003cbr\u003e\r\n**Option 3: to reproduce paper:** `pip install mmv_im2im[paper]`\u003cbr\u003e\r\n**Option 4: install everything:** `pip install mmv_im2im[all]`\u003cbr\u003e\r\n\r\nFor MacOS users, additional ' ' marks are need when using installation tags in zsh. For example, `pip install mmv_im2im[paper]` should be `pip install mmv_im2im'[paper]'` in MacOS.\r\n\r\n### Install MMV_Im2Im for customization or extension:\r\n\r\n\r\n```\r\ngit clone https://github.com/MMV-Lab/mmv_im2im.git\r\ncd mmv_im2im\r\npip install -e .[all]\r\n```\r\n\r\nNote: The `-e` option is the so-called \"editable\" mode. This will allow code changes taking effect immediately. The installation tags, `advance`, `paper`, `all`, are be selected based on your needs.\r\n\r\n### (Optional) Install using Docker\r\n\r\nIt is also possible to use our package through [docker](https://www.docker.com/). The installation tutorial is [here](docker/tutorial.md).\r\n\r\n### (Optional) Use MMV_Im2Im with Google Colab\r\n\r\nWe provide a web-based demo, if cloud computing is preferred. you can [![Open a 2D labelfree DEMO in Google Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MMV-Lab/mmv_im2im/blob/main/tutorials/colab/labelfree_2d.ipynb). The same demo can de adapted for different applications.\r\n\r\n## Quick start\r\n\r\nYou can try out on a simple example following [the quick start guide](tutorials/quick_start.md)\r\n\r\nBasically, you can specify your training configuration in a yaml file and run training with `run_im2im --config /path/to/train_config.yaml`. Then, you can specify the inference configuration in another yaml file and run inference with `run_im2im --config /path/to/inference_config.yaml`. You can also run the inference as a function with the provided API. This will be useful if you want to run the inference within another python script or workflow.  Here is an example:\r\n\r\n```\r\nfrom pathlib import Path\r\nfrom aicsimageio import AICSImage\r\nfrom aicsimageio.writers import OmeTiffWriter\r\nfrom mmv_im2im.configs.config_base import ProgramConfig, parse_adaptor, configuration_validation\r\nfrom mmv_im2im import ProjectTester\r\n\r\n# load the inference configuration\r\ncfg = parse_adaptor(config_class=ProgramConfig, config=\"./paper_configs/semantic_seg_2d_inference.yaml\")\r\ncfg = configuration_validation(cfg)\r\n\r\n# define the executor for inference\r\nexecutor = ProjectTester(cfg)\r\nexecutor.setup_model()\r\nexecutor.setup_data_processing()\r\n\r\n# get the data, run inference, and save the result\r\nfn = Path(\"./data/img_00_IM.tiff\")\r\nimg = AICSImage(fn).get_image_data(\"YX\", Z=0, C=0, T=0)\r\n# or using delayed loading if the data is large\r\n# img = AICSImage(fn).get_image_dask_data(\"YX\", Z=0, C=0, T=0)\r\nseg = executor.process_one_image(img)\r\nOmeTiffWriter.save(seg, \"output.tiff\", dim_orders=\"YX\")\r\n```\r\n\r\n\r\n## Tutorials, examples, demonstrations and documentations\r\n\r\nThe overall package aims to achieve both simplicty and flexibilty with the modularized image-to-image boilerplates. To help different users to best use this package, we provide documentations from four different aspects:\r\n\r\n* [Examples (i.e., scripts and config files)](tutorials/example_by_use_case.md) for reproducing all the experiments in our [pre-print](https://arxiv.org/abs/2209.02498)\r\n* A bottom-up tutorials on [how to understand the modularized image-to-image boilerplates](tutorials/how_to_understand_boilerplates.md) (for extending or adapting the package) and [how to understand the configuration system in details](tutorials/how_to_understand_config.md) (for advance usage to make specific customization).\r\n* A top-down tutorials as [FAQ](tutorials/FAQ.md), which will continuously grow as we receive more questions.\r\n* All the models used in the manuscript and sample data can be found here: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10034416.svg)](https://doi.org/10.5281/zenodo.10034416)\r\n\r\n\r\n### Contribute models to [BioImage Model Zoo](https://bioimage.io/#/)\r\n\r\nWe highly appreciate the BioImage Model Zoo's initiative to provide a comprehensive collection of pre-trained models for a wide range of applications. To make MMV_Im2Im trained models available as well, the first step involves extracting the state_dict from the PyTorch Lightning checkpoint.\r\nThis can be done via:\r\n\r\n```python\r\nimport torch\r\n\r\nckpt_path = \"./lightning_logs/version_0/checkpoints/last.ckpt\"\r\ncheckpoint = torch.load(ckpt_path, map_location=torch.device('cpu'))\r\nstate_dict = checkpoint['state_dict']\r\ntorch.save(state_dict, \"./state_dict.pt\")\r\n```\r\n\r\nAll further steps to provide models can be found in the [official documentation](https://bioimage.io/docs/#/contribute_models/README).\r\n\r\n## Development\r\n\r\nSee [CONTRIBUTING.md](CONTRIBUTING.md) for information related to developing the code.\r\n\r\n\r\n**MIT license**\r\n","organization":"MMV-Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/626?version=1","name":"main @ ba85ae2","author":["Justin Sonneck"],"descriptor_type":[]}]},{"id":"627","url":"https://workflowhub.eu/workflows/627","name":"PyCOMPSs Matrix Multiplication with Objects (inputs generated by the code)","description":"**Name:** Matrix multiplication with Objects  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nMatrix multiplication is a binary operation that takes a pair of matrices and produces another matrix.\r\n\r\nIf A is an n×m matrix and B is an m×p matrix, the result AB of their multiplication is an n×p matrix defined only if the number of columns m in A is equal to the number of rows m in B. When multiplying A and B, the elements of the rows in A are multiplied with corresponding columns in B.\r\n\r\nIn this implementation, A and B are square matrices (same number of rows and columns), and so it is the result matrix C. Each matrix is divided in N blocks of M doubles. The multiplication of two blocks is done by a multiply task method with a simple three-nested-loop implementation. When executed with COMPSs, the main program generates N^3^ tasks arranged as N^2^ chains of N tasks in the dependency graph.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python src/matmul_objects.py numberOfBlocks blockSize\r\n```\r\n\r\nwhere:\r\n* numberOfBlocks: Number of blocks inside each matrix\r\n* blockSize: Size of each block\r\n\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python src/matmul_objects.py 16 4\r\nruncompss src/matmul_objects.py 16 4\r\npython -m pycompss src/matmul_objects.py 16 4\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/627?version=1","name":"COMPSs 3.3.pr","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"628","url":"https://workflowhub.eu/workflows/628","name":"Variant calling from matched tumor/normal sample pair (hg38 version)","description":"Call somatic, germline and LoH event variants from PE Illumina sequencing data obtained from matched pairs of tumor and normal tissue samples.\r\n\r\nThis workflow can be used with whole-genome and whole-exome sequencing data as input. For WES data, parts of the analysis can be restricted to the exome capture kits target regions by providing the optional \"Regions of Interest\" bed dataset.\r\n\r\nThe current version uses bwa-mem for read mapping and varscan somatic for variant calling and somatic status classification.","organization":"EOSC4Cancer, usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/628?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"629","url":"https://workflowhub.eu/workflows/629","name":"Cancer variant annotation (hg38 VEP-based) with MAF export","description":"A variation of the Cancer variant annotation (hg38 VEP-based) workflow at https://doi.org/10.48546/workflowhub.workflow.607.1.\r\n\r\nLike that other workflow it takes a list of tumor/normal sample pair variants in VCF format (see the other workflow for details about the expected format) and\r\n\r\n1. annotates them using the ENSEMBL Variant Effect Predictor and custom annotation data\r\n2. turns the annotated VCF into a MAF file for import into cBioPortal\r\n3. generates human-readable variant- and gene-centric reports\r\n\r\nIn addition, this worklfow exports the resulting MAF dataset to a WebDAV-enabled remote folder for subsequent import into cBioPortal.\r\nWebDAV access details can be configured in the Galaxy user preferences.","organization":"EOSC4Cancer, usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/629?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"631","url":"https://workflowhub.eu/workflows/631","name":"kmer-profiling-hifi-trio-VGP2/main","description":"Create Meryl Database used for the estimation of assembly parameters and quality control with Merqury. Part of the VGP pipeline.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/631?version=1","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/631?version=2","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/631?version=3","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/631?version=4","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/631?version=5","name":"v0.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/631?version=6","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]}]},{"id":"632","url":"https://workflowhub.eu/workflows/632","name":"kmer-profiling-hifi-VGP1/main","description":"Evaluation of Pacbio Hifi Reads and genome profiling. Create Meryl Database used for the estimation of assembly parameters and quality control with Merqury. Part of the VGP pipeline.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/632?version=1","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/632?version=2","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/632?version=3","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/632?version=4","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/632?version=5","name":"v0.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/632?version=6","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/632?version=7","name":"v0.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/632?version=8","name":"v0.1.8","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/632?version=9","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/632?version=10","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/632?version=11","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/632?version=12","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/632?version=13","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/632?version=14","name":"v0.8","author":[],"descriptor_type":["GALAXY"]}]},{"id":"633","url":"https://workflowhub.eu/workflows/633","name":"Purge-duplicate-contigs-VGP6/main","description":"Purge contigs marked as duplicates by purge_dups. ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/633?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/633?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/633?version=3","name":"v0.2.0","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/633?version=4","name":"v0.3.0","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/633?version=5","name":"v0.3.1","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/633?version=6","name":"v0.3.2","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/633?version=7","name":"v0.3.3","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/633?version=8","name":"v0.3.4","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/633?version=9","name":"v0.3.5","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/633?version=10","name":"v0.3.6","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/633?version=11","name":"v0.3.7","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/633?version=12","name":"v0.3.8","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/633?version=13","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/633?version=14","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/633?version=15","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/633?version=16","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/633?version=17","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/633?version=18","name":"v0.9","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/633?version=19","name":"v0.10","author":[],"descriptor_type":["GALAXY"]},{"id":"20","url":"https://workflowhub.eu/workflows/633?version=20","name":"v0.10.1","author":[],"descriptor_type":["GALAXY"]},{"id":"21","url":"https://workflowhub.eu/workflows/633?version=21","name":"v0.10.2","author":[],"descriptor_type":["GALAXY"]},{"id":"22","url":"https://workflowhub.eu/workflows/633?version=22","name":"v0.10.3","author":[],"descriptor_type":["GALAXY"]},{"id":"23","url":"https://workflowhub.eu/workflows/633?version=23","name":"v0.10.4","author":[],"descriptor_type":["GALAXY"]},{"id":"24","url":"https://workflowhub.eu/workflows/633?version=24","name":"v0.10.5","author":[],"descriptor_type":["GALAXY"]}]},{"id":"634","url":"https://workflowhub.eu/workflows/634","name":"Workflow 1: AbritAMR","description":"| tool | version | license |\r\n| -- | -- | -- |\r\n| abritAMR | 1.0.14 | [CC-BY-4.0](https://zenodo.org/records/12514579) | ","organization":"Seq4AMR","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/634?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"635","url":"https://workflowhub.eu/workflows/635","name":"PyCOMPSs Wordcount test, using files as task inputs, and dictionaries as task outputs (executed at Marenostrum IV supercomputer)","description":"**Name:** Word Count  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nWordcount is an application that counts the number of words for a given set of files.\r\n\r\nTo allow parallelism every file is treated separately and merged afterwards.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python src/wordcount.py datasetPath\r\n```\r\n\r\nwhere:\r\n* datasetPath: Absolute path of the file to parse (e.g. /home/compss/tutorial_apps/python/wordcount/data/)\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python src/wordcount.py $(pwd)/data/\r\nruncompss src/wordcount.py $(pwd)/data/\r\npython -m pycompss src/wordcount.py $(pwd)/data/\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/635?version=1","name":"COMPSs 3.3.pr","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"638","url":"https://workflowhub.eu/workflows/638","name":"sanger-tol/insdcdownload v1.1.0 - Deciduous ent","description":"# ![sanger-tol/insdcdownload](docs/images/sanger-tol-insdcdownload_logo.png)\r\n\r\n[![GitHub Actions CI Status](https://github.com/sanger-tol/insdcdownload/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/insdcdownload/actions?query=workflow%3A%22nf-core+CI%22)\r\n\r\n\u003c!-- [![GitHub Actions Linting Status](https://github.com/sanger-tol/insdcdownload/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/insdcdownload/actions?query=workflow%3A%22nf-core+linting%22) --\u003e\r\n\r\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7155119-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7155119)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n\r\n[![Get help on Slack](http://img.shields.io/badge/slack-SangerTreeofLife%20%23pipelines-4A154B?labelColor=000000\u0026logo=slack)](https://SangerTreeofLife.slack.com/channels/pipelines)\r\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40sangertol-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/sangertol)\r\n[![Watch on YouTube](http://img.shields.io/badge/youtube-tree--of--life-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/channel/UCFeDpvjU58SA9V0ycRXejhA)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/insdcdownload** is a pipeline that downloads assemblies from INSDC into a Tree of Life directory structure.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the GitHub CI infrastructure. This ensures that the pipeline runs in a third-party environment, and has sensible resource allocation defaults set to run on real-world datasets.\r\n\r\n## Pipeline summary\r\n\r\n## Overview\r\n\r\nThe pipeline takes an assembly accession number, as well as the assembly name, and downloads it. It also builds a set of common indices (such as `samtools faidx`), and extracts the repeat-masking performed by the NCBI.\r\n\r\nSteps involved:\r\n\r\n- Download from the NCBI the genomic sequence (Fasta) and the assembly\r\n  stats and reports files.\r\n- Turn the masked Fasta file into an unmasked one.\r\n- Compress and index all Fasta files with `bgzip`, `samtools faidx`, and\r\n  `samtools dict`.\r\n- Generate the `.sizes` file usually required for conversion of data\r\n  files to UCSC's \"big\" formats, e.g. bigBed.\r\n- Extract the coordinates of the masked regions into a BED file.\r\n- Compress and index the BED file with `bgzip` and `tabix`.\r\n\r\n## Quick Start\r\n\r\n1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`\u003e=22.04.0`)\r\n\r\n2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.\r\n\r\n3. Download the pipeline and test it on a minimal dataset with a single command:\r\n\r\n   ```bash\r\n   nextflow run sanger-tol/insdcdownload -profile test,YOURPROFILE --outdir \u003cOUTDIR\u003e\r\n   ```\r\n\r\n   Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.\r\n\r\n   \u003e - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`.\r\n   \u003e - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile \u003cinstitute\u003e` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.\r\n   \u003e - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.\r\n   \u003e - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.\r\n\r\n4. Start running your own analysis!\r\n\r\n   ```console\r\n   nextflow run sanger-tol/insdcdownload --assembly_accession GCA_927399515.1 --assembly_name gfLaeSulp1.1 --outdir results\r\n   ```\r\n\r\n## Documentation\r\n\r\nThe sanger-tol/insdcdownload pipeline comes with documentation about the pipeline [usage](docs/usage.md) and [output](docs/output.md).\r\n\r\n## Credits\r\n\r\nsanger-tol/insdcdownload was mainly written by @muffato, with major borrowings from @priyanka-surana's [read-mapping](https://github.com/sanger-tol/readmapping) pipeline, e.g. the script to remove the repeat-masking, and the overall structure and layout of the sub-workflows.\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\nFor further information or help, don't hesitate to get in touch on the [Slack `#pipelines` channel](https://sangertreeoflife.slack.com/channels/pipelines). Please [create an issue](https://github.com/sanger-tol/insdcdownload/issues/new/choose) on GitHub if you are not on the Sanger slack channel.\r\n\r\n## Citations\r\n\r\nIf you use sanger-tol/insdcdownload for your analysis, please cite it using the following doi: [10.5281/zenodo.7155119](https://doi.org/10.5281/zenodo.7155119)\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Analysis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/638?version=1","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"639","url":"https://workflowhub.eu/workflows/639","name":"PyCOMPSs Randomized SVD","description":"**Name:** TruncatedSVD (Randomized SVD)  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum4  \r\n\r\nTruncatedSVD (Randomized SVD) for computing just 456 singular values out of a (3.6M x 1200) size matrix.  \r\nThe input matrix represents a CFD transient simulation of aire moving past a cylinder.  \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/639?version=1","name":"Version 1","author":["Cristian Tatu"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/639?version=2","name":"Version 2","author":["Cristian Tatu"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/639?version=3","name":"Version 3","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"640","url":"https://workflowhub.eu/workflows/640","name":"Autosubmit mHM test domains","description":"Autosubmit mHM test domains","organization":"BSC-CES","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/640?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"641","url":"https://workflowhub.eu/workflows/641","name":"Assembly-Hifi-HiC-phasing-VGP4/main","description":"# Contiging Solo w/HiC:\n\nGenerate phased assembly based on PacBio Hifi Reads using HiC data from the same individual for phasing.\n\n## Inputs\n\n1. Hifi long reads [fastq]\n2. HiC forward reads (if multiple input files, concatenated in same order as reverse reads) [fastq]\n3. HiC reverse reads (if multiple input files, concatenated in same order as forward reads) [fastq]\n4. K-mer database [meryldb]\n5. Genome profile summary generated by Genomescope [txt]\n6. Name of first assembly\n7. Name of second assembly\n\n## Outputs\n\n1. Haplotype 1 assembly ([fasta] and [gfa])\n2. Haplotype 2 assembly ([fasta] and [gfa])\n3. QC: BUSCO report for both assemblies\n4. QC: Merqury report for both assemblies\n5. QC: Assembly statistics for both assemblies\n6. QC: Nx plot for both assemblies\n7. QC: Size plot for both assemblies","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/641?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/641?version=2","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/641?version=3","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/641?version=4","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/641?version=5","name":"v0.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/641?version=6","name":"v0.1.8","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/641?version=7","name":"v0.1.9","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/641?version=8","name":"v0.1.10","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/641?version=9","name":"v0.1.11","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/641?version=10","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/641?version=11","name":"v0.2.1","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/641?version=12","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/641?version=13","name":"v0.2.2","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/641?version=14","name":"v0.2.3","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/641?version=15","name":"v0.2.4","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/641?version=16","name":"v0.2.5","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/641?version=17","name":"v0.3.0","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/641?version=18","name":"v0.3.1","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/641?version=19","name":"v0.3.2","author":[],"descriptor_type":["GALAXY"]},{"id":"20","url":"https://workflowhub.eu/workflows/641?version=20","name":"v0.3.3","author":[],"descriptor_type":["GALAXY"]},{"id":"21","url":"https://workflowhub.eu/workflows/641?version=21","name":"v0.3.4","author":[],"descriptor_type":["GALAXY"]},{"id":"22","url":"https://workflowhub.eu/workflows/641?version=22","name":"v0.3.5","author":[],"descriptor_type":["GALAXY"]},{"id":"23","url":"https://workflowhub.eu/workflows/641?version=23","name":"v0.3.6","author":[],"descriptor_type":["GALAXY"]},{"id":"24","url":"https://workflowhub.eu/workflows/641?version=24","name":"v0.3.7","author":[],"descriptor_type":["GALAXY"]},{"id":"25","url":"https://workflowhub.eu/workflows/641?version=25","name":"v0.3.8","author":[],"descriptor_type":["GALAXY"]},{"id":"26","url":"https://workflowhub.eu/workflows/641?version=26","name":"v0.3.9","author":[],"descriptor_type":["GALAXY"]},{"id":"27","url":"https://workflowhub.eu/workflows/641?version=27","name":"v0.3.10","author":[],"descriptor_type":["GALAXY"]},{"id":"28","url":"https://workflowhub.eu/workflows/641?version=28","name":"v0.3.11","author":[],"descriptor_type":["GALAXY"]},{"id":"29","url":"https://workflowhub.eu/workflows/641?version=29","name":"v0.3.12","author":[],"descriptor_type":["GALAXY"]},{"id":"30","url":"https://workflowhub.eu/workflows/641?version=30","name":"v0.3.13","author":[],"descriptor_type":["GALAXY"]},{"id":"31","url":"https://workflowhub.eu/workflows/641?version=31","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"32","url":"https://workflowhub.eu/workflows/641?version=32","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"33","url":"https://workflowhub.eu/workflows/641?version=33","name":"v0.6","author":[],"descriptor_type":["GALAXY"]}]},{"id":"642","url":"https://workflowhub.eu/workflows/642","name":"Assembly-Hifi-Trio-phasing-VGP5/main","description":"Generate phased assembly based on PacBio HiFi reads and parental Illumina data for phasing. Part of the VGP workflow suite, it needs to be run after the Trio k-mer Profiling workflow VGP2. This workflow uses HiFiasm for contigging, and generates assembly statistics, BUSCO reports, Merqury plots, and the genome assembly contigs in fasta and GFA format. ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/642?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/642?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/642?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/642?version=4","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/642?version=5","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/642?version=6","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/642?version=7","name":"v0.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/642?version=8","name":"v0.1.8","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/642?version=9","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/642?version=10","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/642?version=11","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/642?version=12","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/642?version=13","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/642?version=14","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/642?version=15","name":"v0.8","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/642?version=16","name":"v0.9.0","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/642?version=17","name":"v0.9.1","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/642?version=18","name":"v0.9.2","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/642?version=19","name":"v0.9.3","author":[],"descriptor_type":["GALAXY"]},{"id":"20","url":"https://workflowhub.eu/workflows/642?version=20","name":"v0.9.4","author":[],"descriptor_type":["GALAXY"]},{"id":"21","url":"https://workflowhub.eu/workflows/642?version=21","name":"v0.9.5","author":[],"descriptor_type":["GALAXY"]},{"id":"22","url":"https://workflowhub.eu/workflows/642?version=22","name":"v0.9.6","author":[],"descriptor_type":["GALAXY"]},{"id":"23","url":"https://workflowhub.eu/workflows/642?version=23","name":"v0.9.7","author":[],"descriptor_type":["GALAXY"]},{"id":"24","url":"https://workflowhub.eu/workflows/642?version=24","name":"v0.9.8","author":[],"descriptor_type":["GALAXY"]},{"id":"25","url":"https://workflowhub.eu/workflows/642?version=25","name":"v0.9.9","author":[],"descriptor_type":["GALAXY"]}]},{"id":"643","url":"https://workflowhub.eu/workflows/643","name":"Scaffolding-Bionano-VGP7/main","description":"# Scaffolding with Bionano\n\nScaffolding using Bionano optical map data\n\n## Inputs\n\n1. Bionano data [cmap]\n2. Estimated genome size [txt]\n3. Phased assembly generated by Hifiasm [gfa1]\n\n## Outputs\n\n1. Scaffolds\n2. Non-scaffolded contigs\n3. QC: Assembly statistics\n4. QC: Nx plot\n5. QC: Size plot","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/643?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/643?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/643?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/643?version=4","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/643?version=5","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]}]},{"id":"644","url":"https://workflowhub.eu/workflows/644","name":"Workflow 2: Sciensano","description":"| database | database version |\r\n| -- | -- |\r\n| ResFinder | 2022-07-19 |\r\n| CARD | 2023-12-03 |","organization":"Seq4AMR","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/644?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"645","url":"https://workflowhub.eu/workflows/645","name":"Assembly-decontamination-VGP9/main","description":"Decontamination (foreign contaminants and mitochondrial sequences) of a genome assembly after the final scaffolding step. Uses NCBI FCS GX  to identify foreign contaminants and Blast to identify mitochondrial sequences. Part of the VGP Suite.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/645?version=1","name":"v0.1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/645?version=2","name":"v0.1.1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/645?version=3","name":"v0.1.2","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/645?version=4","name":"v0.1.3","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/645?version=5","name":"v0.1.4","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/645?version=6","name":"v0.1.6","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/645?version=7","name":"v0.2","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/645?version=8","name":"v0.3","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/645?version=9","name":"v0.4","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/645?version=10","name":"v0.5","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/645?version=11","name":"v0.6","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/645?version=12","name":"v0.7","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/645?version=13","name":"v0.8","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/645?version=14","name":"v1.0","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/645?version=15","name":"v1.1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/645?version=16","name":"v1.2","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/645?version=17","name":"v1.3","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"646","url":"https://workflowhub.eu/workflows/646","name":"scRNAseq: Count and Load with Cell Ranger","description":"Takes fastqs and reference data, to produce a single cell counts matrix into and save in annData format - adding a column called sample with the sample name.  \r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/646?version=1","name":"main @ abc9384","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/646?version=2","name":"main @ 7581788","author":[],"descriptor_type":["GALAXY"]}]},{"id":"647","url":"https://workflowhub.eu/workflows/647","name":"scRNAseq Single Sample Processing Cell Ranger","description":"From the R1 and R2 fastq files of a single samples, make a scRNAseq counts matrix, and perform basic QC with scanpy. Then, do further processing by making a UMAP and clustering. Produces a processed AnnData \r\nDepreciated: use individual workflows insead for multiple samples","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/647?version=1","name":"main @ abc9384","author":[],"descriptor_type":["GALAXY"]}]},{"id":"648","url":"https://workflowhub.eu/workflows/648","name":"Workflow 1: Further Quality Control [16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\r\n\r\nThe workflows in this collection are from the '16S Microbial Analysis with mothur' tutorial for analysis of 16S data (Saskia Hiltemann, Bérénice Batut, Dave Clements), adapted for piepline use on galaxy australia (Ahmed Mehdi). The workflows developed in galaxy use mothur software package developed by Schloss et al https://pubmed.ncbi.nlm.nih.gov/19801464/. \r\n\r\nPlease also refer to the 16S tutorials available at Galaxy https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop-short/tutorial.html and [https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop/tutorial.html\r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/648?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"649","url":"https://workflowhub.eu/workflows/649","name":"Workflow 2: Data Cleaning And Chimera Removal [16S Microbial Analysis With Mothur]","description":"The workflows in this collection are from the '16S Microbial Analysis with mothur' tutorial for analysis of 16S data (Saskia Hiltemann, Bérénice Batut, Dave Clements), adapted for piepline use on galaxy australia (Ahmed Mehdi). The workflows developed in galaxy use mothur software package developed by Schloss et al https://pubmed.ncbi.nlm.nih.gov/19801464/. \r\n\r\nPlease also refer to the 16S tutorials available at Galaxy https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop-short/tutorial.html and [https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop/tutorial.html\r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/649?version=1","name":"Version 1","author":["Saskia Hiltemann"],"descriptor_type":["GALAXY"]}]},{"id":"650","url":"https://workflowhub.eu/workflows/650","name":"Workflow 3: Classification [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"The workflows in this collection are from the '16S Microbial Analysis with mothur' tutorial for analysis of 16S data (Saskia Hiltemann, Bérénice Batut, Dave Clements), adapted for pipeline use on galaxy australia (Ahmed Mehdi). The workflows developed in galaxy use mothur software package developed by Schloss et al https://pubmed.ncbi.nlm.nih.gov/19801464/. \r\n\r\nPlease also refer to the 16S tutorials available at Galaxy https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop-short/tutorial.html and [https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop/tutorial.html\r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/650?version=1","name":"Version 1","author":["Saskia Hiltemann"],"descriptor_type":["GALAXY"]}]},{"id":"651","url":"https://workflowhub.eu/workflows/651","name":"Workflow 5: OTU Clustering [16S Microbial Analysis With Mothur]","description":"The workflows in this collection are from the '16S Microbial Analysis with mothur' tutorial for analysis of 16S data (Saskia Hiltemann, Bérénice Batut, Dave Clements), adapted for pipeline use on galaxy australia (Ahmed Mehdi). The workflows developed in galaxy use mothur software package developed by Schloss et al https://pubmed.ncbi.nlm.nih.gov/19801464/. \r\n\r\nPlease also refer to the 16S tutorials available at Galaxy https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop-short/tutorial.html and [https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop/tutorial.html\r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/651?version=1","name":"Version 1","author":["Saskia Hiltemann"],"descriptor_type":["GALAXY"]}]},{"id":"652","url":"https://workflowhub.eu/workflows/652","name":"Workflow 6: Alpha Diversity [16S Microbial Analysis With Mothur]","description":"The workflows in this collection are from the '16S Microbial Analysis with mothur' tutorial for analysis of 16S data (Saskia Hiltemann, Bérénice Batut, Dave Clements), adapted for pipeline use on galaxy australia (Ahmed Mehdi). The workflows developed in galaxy use mothur software package developed by Schloss et al https://pubmed.ncbi.nlm.nih.gov/19801464/. \r\n\r\nPlease also refer to the 16S tutorials available at Galaxy https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop-short/tutorial.html and [https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop/tutorial.html\r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/652?version=1","name":"Version 1","author":["Saskia Hiltemann"],"descriptor_type":["GALAXY"]}]},{"id":"653","url":"https://workflowhub.eu/workflows/653","name":"Workflow 7 : Beta Diversity [16S Microbial Analysis With Mothur]","description":"The workflows in this collection are from the '16S Microbial Analysis with mothur' tutorial for analysis of 16S data (Saskia Hiltemann, Bérénice Batut, Dave Clements), adapted for pipeline use on galaxy australia (Ahmed Mehdi). The workflows developed in galaxy use mothur software package developed by Schloss et al https://pubmed.ncbi.nlm.nih.gov/19801464/. \r\n\r\nPlease also refer to the 16S tutorials available at Galaxy https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop-short/tutorial.html and [https://training.galaxyproject.org/training-material/topics/metagenomics/tutorials/mothur-miseq-sop/tutorial.html\r\n\r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/653?version=1","name":"Version 1","author":["Saskia Hiltemann"],"descriptor_type":["GALAXY"]}]},{"id":"654","url":"https://workflowhub.eu/workflows/654","name":"ANNOTATO - ERGA Genome Annotation Workflow in Nextflow","description":"# ANNOTATO - Annotation workflow To Annotate Them Oll\r\n\r\n- [ANNOTATO - Annotation workflow To Annotate Them Oll](#annotato---annotation-workflow-to-annotate-them-oll)\r\n  - [Overview of the workflow](#overview-of-the-workflow)\r\n    - [Input data](#input-data)\r\n    - [Pipeline steps](#pipeline-steps)\r\n    - [Output data](#output-data)\r\n  - [Prerequisites](#prerequisites)\r\n  - [Installation](#installation)\r\n  - [Running ANNOTATO](#running-annotato)\r\n    - [Before running the pipeline (IMPORTANT)](#before-running-the-pipeline-important)\r\n    - [Without RNASeq and protein data](#without-rnaseq-and-protein-data)\r\n    - [Running ANNOTATO with RNASeq data](#running-annotato-with-rnaseq-data)\r\n    - [Running ANNOTATO with protein data](#running-annotato-with-protein-data)\r\n    - [Running ANNOTATO with both protein and RNASeq data](#running-annotato-with-both-protein-and-rnaseq-data)\r\n    - [Running ANNOTATO with params.json](#running-annotato-with-paramsjson)\r\n    - [Other parameters for running the analysis](#other-parameters-for-running-the-analysis)\r\n  - [Evaluating output GFF to the exon level](#evaluating-output-gff-to-the-exon-level)\r\n  - [Performance of the workflow on annotating difference eukaryote genomes](#performance-of-the-workflow-on-annotating-difference-eukaryote-genomes)\r\n  - [Future work](#future-work)\r\n\r\n## Overview of the workflow\r\n\r\nThe pipeline is based on `Funannotate` or `BRAKER` and was initially developed and tested on the two datasets:\r\n- Drosophila melanogaster: [https://doi.org/10.5281/zenodo.8013373](https://doi.org/10.5281/zenodo.8013373)\r\n- *Pocillopora* cf. *effusa*: [https://www.ncbi.nlm.nih.gov/biosample/26809107](https://www.ncbi.nlm.nih.gov/biosample/26809107)\r\n\r\nThen, it was further tested on these species during the [BioHackathon 2023 - project 20](https://github.com/elixir-europe/biohackathon-projects-2023/tree/main/20)\r\n\r\n- Helleia helle\r\n- Homo sapiens chrom 19\r\n- Melampus jaumei\r\n- Phakellia ventilabrum\r\n- Trifolium dubium\r\n\r\n### Input data\r\n\r\n- Reference genome `genome.[.fna, .fa, .fasta][.gz]`\r\n- RNAseq data listed in a metadata csv file. Input type can be mixed between long and short reads, with the option of single-end read. The input file should follow the format below:\r\n\r\n```\r\nsample_id,R1_path,R2_path,read_type\r\nSAM1,/path/to/R1,,long             # For long reads\r\nSAM2,/path/to/R1,/path/to/R2,short # For PE reads\r\nSAM3,/path/to/R1,,short            # For SE reads\r\n```\r\n\r\n- Protein sequence data in fasta format, could be gzip or not\r\n\r\n### Pipeline steps\r\n\r\n![Pipeline](./assets/images/annotato-workflow.drawio.svg)\r\n\r\nThe main pipeline is divided into five different subworkflows.\r\n- `Preprocess RNA` is where the input RNASeq data are QC and trimmed.\r\n- `Process RNA Minimap` is triggered when long reads FastQ are in the input CSV file.\r\n- `Process RNA STAR` will run when short reads FastQ are in the input CSV.\r\n- `Genome Masking` runs by default if not skipped. It assumes the input genome fasta is not masked and will run Denovo repeat masking with RepeatModeler and RepeatMasker.\r\n- `Filter Repeat` whenever there is a Denovo masking step, this sub-workflow will be triggered to remove the repeat sequences that appeared in the Uniprot Swissprot protein data. \r\n\r\n### Output data\r\n\r\n- MultiQC report for the RNASeq data, before and after trimming, mapping rate of short reads, and the BUSCO results of predicted genes.\r\n- RepeatMasker report containing quantity of masked sequence and distribution among TE families\r\n- Protein-coding gene annotation file in gff3 format\r\n- BUSCO summary of annotated sequences\r\n\r\n## Prerequisites\r\n\r\nThe following programs are required to run the workflow and the listed version were tested. \r\n\r\n`nextflow v23.04.0 or higher`\r\n\r\n`singularity`\r\n\r\n`conda` and `mamba` (currently, having problem with Funannotate and BRAKER installation)\r\n\r\n`docker` (have not been tested but in theory should work fine)\r\n\r\n## Installation\r\n\r\nSimply get the code from github or workflowhub and directly use it for the analysis with `nextflow`.\r\n\r\n```\r\ngit clone https://github.com/ERGA-consortium/pipelines/tree/main/annotation/nextflow\r\n```\r\n\r\n## Running ANNOTATO\r\n\r\n### Before running the pipeline (IMPORTANT)\r\n\r\nOne thing with Nextflow is that it is running off a Java Virtual Machine (JVM), and it will try to use all available memory for Nextflow even though it is unnecessary (for workflow management and job control). This will cause much trouble if you run a job on an HPC cluster. Thus, to minimize the effect of it, we need to limit the maximum memory the JVM can use.\r\n\r\n```\r\nexport NFX_OPTS=\"-Xms=512m -Xmx=3g\"\r\n```\r\n\r\n`-Xms` is the lower limit, which is set as 512 MB.\r\n`-Xmx` is the upper limit, which in this case is set as 3 GB.\r\nPlease modify this according to your situation.\r\n\r\n### Without RNASeq and protein data\r\n\r\nPerform the analysis with only the draft genome and busco database.\r\n\r\n```\r\nnextflow run main.nf --genome /path/to/genome.fasta --species \"Abc def\" --buscodb 'metazoa' \r\n```\r\n\r\nThe workflow will run Denovo repeat masking on the draft genome, then softmask the repeat region and use the genome to run `funannotate`. Add `--run_braker` to run the genome prediction using `BRAKER` instead.\r\n\r\n### Running ANNOTATO with RNASeq data\r\n\r\nWhen you want to let the workflow run the mapping by itself, uses `input.csv` as input with the link to all `FASTQ` file.\r\n\r\n```\r\nnextflow run main.nf --genome /path/to/genome.fasta[.gz] --rnaseq /path/to/input.csv --species \"Abc def\" --buscodb 'metazoa' \r\n```\r\n\r\nBased on the content of the `input.csv` file to trigger different RNASeq processing workflows. The output `bam` file will then be used for genome prediction.\r\n\r\nWhen reads are mapped to the reference genome, the aligned `bam` file can be used as input to the pipeline instead of the raw `FASTQ`\r\n\r\n```\r\nnextflow run main.nf --genome /path/to/genome.fasta[.gz] --short_rna_bam /path/to/shortreads.bam [--long_rna_bam /path/to/longreads.bam] --species \"Abc def\" --buscodb 'metazoa' \r\n```\r\n\r\n**ATTENTION**: One major drawback of the current workflow is that the input genome will be sorted and renamed by the `funannotate sort` function. This is because `AUGUSTUS` and `Funannotate` won't work normally when the header of the input genome is too long and contains weird characters. Therefore, if you want to provide a `bam` file as input instead of the raw `FASTQ`, please run `funannotate sort` on the genome fasta first and then use it as the reference for running alignment. Or in case your genome headers are already shorter than 16 character, please add `--skip_rename` when running the pipeline.\r\n\r\n### Running ANNOTATO with protein data\r\n\r\n```\r\nnextflow run main.nf --genome /path/to/genome.fasta[.gz] --protein /path/to/protein.fasta[.gz] --species \"Abc def\" --buscodb 'metazoa' \r\n```\r\n\r\nWhen only protein data is provided, the workflow will run denovo masking then repeat filter with the additional protein data. The masked genome and protein fasta will then be used for gene prediction.\r\n\r\n### Running ANNOTATO with both protein and RNASeq data\r\n\r\nThe full pipeline is triggered when both RNASeq data and protein fasta is provided.\r\n\r\n```\r\nnextflow run main.nf --genome /path/to/genome.fasta[.gz] --protein /path/to/protein.fasta[.gz] --rnaseq /path/to/input.csv --species \"Abc def\" --buscodb 'metazoa' \r\n```\r\n\r\n### Running ANNOTATO with params.json\r\n\r\nOne plus side with Nextflow is that it can use a parameter JSON file called `params.json` to start the analysis pipeline with all required parameters. Please modify the content of the `params.json` according to your need then run the following command.\r\n\r\n```\r\nnextflow run main.nf -params-file params.json\r\n```\r\n\r\n### Other parameters for running the analysis\r\n\r\n```\r\nCompulsory input:\r\n--genome                       Draft genome fasta file contain the assembled contigs/scaffolds\r\n--species                      Species name for the annotation pipeline, e.g. \"Drosophila melanogaster\"\r\n\r\nOptional input:\r\n--protein                      Fasta file containing known protein sequences used as an additional information for gene prediction pipeline.\r\n                               Ideally this should come from the same species and/or closely related species. [default: null]\r\n--rnaseq                       A CSV file following the pattern: sample_id,R1_path,R2_path,read_type.\r\n                               This could be generated using gen_input.py. Run `python gen_input.py --help` for more information. \r\n                               [default: null]\r\n--long_rna_bam                 A BAM file for the alignment of long reads (if any) to the draft genome. Noted that the header of the draft\r\n                               genome need to be renamed first before alignment otherwise it will causes trouble for AUGUSTUS and funannotate. \r\n                               [default: null]\r\n--short_rna_bam                A BAM file for the alignment of short reads (if any) to the draft genome. Noted that the header of the draft \r\n                               genome need to be renamed first before alignment otherwise it will causes trouble for AUGUSTUS and funannotate. \r\n                               [default: null]\r\n--knownrepeat                  Fasta file containing known repeat sequences of the species, this will be used directly for masking \r\n                               (if --skip_denovo_masking) or in combination with the denovo masking. [default: null]\r\n\r\nOutput option:\r\n--outdir                       Output directory. \r\n--tracedir                     Pipeline information. \r\n--publish_dir_mode             Option for nextflow to move data to the output directory. [default: copy]\r\n--tmpdir                       Database directory. \r\n\r\nFunannotate params:\r\n--run_funannotate              Whether to use funannotate for gene prediction. [default: true]\r\n--organism                     Fungal-specific option. Should be change to \"fungus\" if the annotated organism is fungal. [default: other]\r\n--ploidy                       Set the ploidy for gene prediction, in case of haploid, a cleaning step will be performed by funannotate to remove\r\n                               duplicated contigs/scaffold. [default: 2]\r\n--buscodb                      BUSCO database used for AUGUSTUS training and evaluation. [default: eukaryota]\r\n--buscoseed                    AUGUSTUS pre-trained species to start BUSCO. Will be override if rnaseq data is provided. [default: null]\r\n\r\nBraker params:\r\n--run_braker                   Whether to use BRAKER for gene prediction. [default: false]\r\n\r\nSkipping options:\r\n--skip_rename                  Skip renaming genome fasta file by funannotate sort. \r\n--skip_all_masking             Skip all masking processes, please be sure that your --genome input is soft-masked before triggering this \r\n                               parameter. [default: false]\r\n--skip_denovo_masking          Skip denovo masking using RepeatModeler, this option can only be run when --knownrepeat fasta is provided. \r\n                               [default: false]\r\n--skip_functional_annotation   Skip functional annotation step. [default: false]\r\n--skip_read_preprocessing      Skip RNASeq preprocessing step. [default: false]\r\n\r\nExecution/Engine profiles:\r\nThe pipeline supports profiles to run via different Executers and Engines e.g.: -profile local,conda\r\n\r\nExecuter (choose one):\r\n  local\r\n  slurm\r\n\r\nEngines (choose one):\r\n  conda\r\n  mamba\r\n  docker\r\n  singularity\r\n\r\nPer default: -profile slurm,singularity is executed.\r\n```\r\n\r\n## Evaluating output GFF to the exon level\r\n\r\nWe provided a script to analyze the output GFF of ANNOTATO (which also could be applied to the GFF file output of other pipelines) to report the number of exons per mRNA/tRNA. To run this, simply use:\r\n\r\n```\r\npython bin/analyze_exons.py -f ${GFF}\r\n```\r\n\r\nBelow is the sample output of this script\r\n\r\n```\r\nINFORMATION REGARDING mRNA\r\nNumber of transcripts: 33086\r\nLargest number of exons in all transcripts: 128\r\nMonoexonic transcripts: 4085\r\nMultiexonic transcripts: 29001\r\nMono:Mult Ratio: 0.14\r\nBoxplot of number of exons per transcript:\r\nMin: 1\r\n25%: 2\r\n50%: 4\r\n75%: 8\r\nMax: 128\r\nMean: 6.978812790908542\r\n==================================================\r\nINFORMATION REGARDING tRNA\r\nNumber of transcripts: 2017\r\nLargest number of exons in all transcripts: 1\r\nMonoexonic transcripts: 2017\r\nMultiexonic transcripts: 0\r\nNo multiexonic transcripts, unable to calculate Mono:Mult Ratio\r\nBoxplot of number of exons per transcript:\r\nMin: 1\r\n25%: 1\r\n50%: 1\r\n75%: 1\r\nMax: 1\r\nMean: 1.0\r\n==================================================\r\n```\r\n\r\nThis script was originally written by [Katharina Hoff](https://github.com/Gaius-Augustus/GALBA/blob/main/scripts/analyze_exons.py) and was modified accordingly to suit the analysis of GFF file.\r\n\r\n## Performance of the workflow on annotating difference eukaryote genomes\r\n\r\nThe following table is the result predicted by ANNOTATO on difference species during the [Europe BioHackathon 2023](https://github.com/elixir-europe/biohackathon-projects-2023/tree/main/20).\r\n\r\n| Species                    | Genome size | N.Genes | N.Exons | N.mRNA | BUSCO lineage | BUSCO score                             | OMArk Completeness                                                 | OMArk Consistency                                                                       |\r\n| :---:                      | :---:       | :---:   | :---:   | :---:  | :---:         | :---:                                   | :---:                                                              | :---:                                                                                   |\r\n| Drosophila melanogaster    | 143M        | 14,753  | 57,343  | 14,499 | diptera       | C:96.1%[S:95.6%,D:0.5%],F:1.2%,M:2.7%   | melanogaster subgroup, C:90.38%[S:84.32%,D:6.06%],M:9.62%,,n:12442 | A:94.21%[P:4.05%,F:7.28%],I:1.61%[P:0.5%,F:0.42%],C:0.00%[P:0.00%,F:0.00%],U:4.19%      |\r\n| Helleia helle              | 547M        | 37,367  | 139,302 | 28,445 | lepidoptera   | C:74.6%[S:73.4%,D:1.2%],F:5.4%,M:20.0%  | Papilionidea, C:82.04%[S:66.12%,D:15.92%],M:17.96%, n:7939         | A:44.78%[P:14.41%,F:6.02%],I:3.53%[P:2.1%,F:0.7%],C:0.00%[P:0.00%,F:0.00%],U:51.69%     |\r\n| Homo sapiens chrom 19      | 58M         | 1,872   | 11,937  | 1,862  | primates      | C:5.0%[S:4.8%,D:0.2%],F:0.5%,M:94.5%    | Hominidae, C:8.57%[S:7.74%,D:0.83%],M:91.43%, n=17843              | A:87.54%[P:12.73%,F:13.1%],I:4.78%[P:1.5%,F:2.04%],C:0.00%[P:0.00%,F:0.00%],U:7.68%     |\r\n| Melampus jaumei            | 958M        | 61,128  | 335,483 | 60,720 | mollusca      | C:80.4%[S:67.2%,D:13.2%],F:3.8%,M:15.8% | Lophotrochozoa, C: 92.5%[S: 66.29%, D: 26.21%], M:7.5%, n:2373     | A:41.45%[P:15.72%,F:9.97%],I:15.97%[P:10.68%,F:3.07%],C:0.00%[P:0.00%,F:0.00%],U:42.57% |\r\n| Phakellia ventilabrum      | 186M        | 19,073  | 157,441 | 18,855 | metazoa       | C:80.9%[S:79.2%,D:1.7%],F:6.5%,M:12.6%  | Metazoa, C:86.79%[S:76.9%,D:9.9%],M:13.21% , n:3021                | A:53.81%[P:18.92%,F:5.06%],I:5.0%[P:2.7%,F:0.68%],C:0.00%[P:0.00%,F:0.00%],U:41.19%     |\r\n| *Pocillopora* cf. *effusa* | 347M        | 35,103  | 230,901 | 33,086 | metazoa       | C:95.1%[S:92.2%,D:2.9%],F:1.7%,M:3.2%   | Eumetazoa, C:94.16%[S:84.3%,D:9.86%],M:5.84%,n:3255                | A:52.94%[P:22.30%,F:3.69%],I:3.44%[P:2.08%,F:0.28%],C:0.00%[P:0.00%,F:0.00%],U:43.62%   |\r\n| Trifolium dubium           | 679M        | 78,810  | 354,662 | 77,763 | fabales       | C:95.1%[S:19.5%,D:75.6%],F:1.5%,M:3.4%  | NPAAA clade, C:94.58%[S:19.21%,D:75.38%],M:5.42%,n:15412           | A:71.99%[P:11.03%,F:6.63%],I:2.77%[P:1.66%,F:0.52%],C:0.00%[P:0.00%,F:0.00%],U:25.23%   |\r\n\r\n## Future work\r\n- Python wrapper function to remove intermediate files\r\n- Adding functional annotation with `Interproscan` and `eggnog`\r\n- Adding PASA results to further improve the accuracy of the training\r\n- Adding custom parameter for both `BRAKER` and `funannotate`","organization":"Biodiversity Genomics Europe (general), Bioinformatics Laboratory for Genomics and Biodiversity (LBGB), ERGA Annotation","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/654?version=1","name":"Version 1","author":["Phuong Doan"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/654?version=2","name":"Version 2","author":["Phuong Doan"],"descriptor_type":["NFL"]}]},{"id":"655","url":"https://workflowhub.eu/workflows/655","name":"Obitools eDNA metabarcoding","description":"Galaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, related to the Galaxy training tutorial \"[Metabarcoding/eDNA through Obitools](https://training.galaxyproject.org/training-material/topics/ecology/tutorials/Obitools-metabarcoding/tutorial.html)\" .\r\n\r\nThis workflow allows to analyze DNA metabarcoding / eDNA data produced on Illumina sequencers using the OBITools.","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/655?version=1","name":"Version 1","author":["Yvan Le Bras","Coline Royaux"],"descriptor_type":["GALAXY"]}]},{"id":"656","url":"https://workflowhub.eu/workflows/656","name":"Biodiversity data exploration tutorial","description":"Galaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, related to the Galaxy training tutorial \"[Biodiversity data exploration](https://training.galaxyproject.org/training-material/topics/ecology/tutorials/biodiversity-data-exploration/tutorial.html)\"\r\n\r\nThis workflow allows to explore biodiversity data looking at homoscedasticity, normality or collinearity of presences-absence or abundance data and at comparing beta diversity taking into account space, time and species components","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/656?version=1","name":"Version 1","author":["Yvan Le Bras","Coline Royaux"],"descriptor_type":["GALAXY"]}]},{"id":"657","url":"https://workflowhub.eu/workflows/657","name":"Remote sensing Sentinel 2 data analysis to produce biodiversity metrics","description":"Galaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, related to the Galaxy training tutorial \"[Sentinel 2 biodiversity](https://training.galaxyproject.org/training-material/topics/ecology/tutorials/species-distribution-modeling/tutorial.html)\" .\r\n\r\nThis workflow allows to analyze remote sensing sentinel 2 satellites data to compute spectral indices such as the NDVI and visualizing biodiversity indicators\r\n","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/657?version=1","name":"Version 1","author":["Yvan Le Bras","Coline Royaux"],"descriptor_type":["GALAXY"]}]},{"id":"658","url":"https://workflowhub.eu/workflows/658","name":"Ecoregionalization on Antarctic sea","description":"\r\n\r\nGalaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, related to the Galaxy training tutorial \"Antarctic sea ecoregionalization\" .\r\n\r\nThis workflow allows to analyze marine benthic biodiversity data to compute ecoregions regarding environmental data.\r\n","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/658?version=1","name":"Version 1","author":["Yvan Le Bras","Coline Royaux"],"descriptor_type":["GALAXY"]}]},{"id":"659","url":"https://workflowhub.eu/workflows/659","name":"Animal dive prediction using deep learning","description":"Galaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, related to the Galaxy training tutorial \"Deep learning to predict animal behavior\" .\r\n\r\nThis workflow allows to analyze animal behavior data through deep learning.\r\n","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/659?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"660","url":"https://workflowhub.eu/workflows/660","name":"SPIPOLL MMOS GAPARS crowdsourcing results","description":"Galaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, to analyze crowdsourcing results of the SPIPOLL hoverflies GAPARS European project activity on MMOS server.\r\n","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/660?version=1","name":"Version 1","author":["Yvan Le Bras"],"descriptor_type":["GALAXY"]}]},{"id":"661","url":"https://workflowhub.eu/workflows/661","name":"Boulder fields indicators","description":"Galaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, related to the Galaxy training tutorial \"[Champs blocs](https://training.galaxyproject.org/training-material/topics/ecology/tutorials/champs-blocs/tutorial.html)\" .\r\n\r\nThis workflow allows to produce Visual Rollover Indicator and dissimilarity as diversity indices on boulder fields.\r\n","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/661?version=1","name":"Version 1","author":["Yvan Le Bras"],"descriptor_type":["GALAXY"]}]},{"id":"662","url":"https://workflowhub.eu/workflows/662","name":"Obis biodiversity indicator on Asian pacific","description":"Galaxy Workflow created on Galaxy-E european instance, ecology.usegalaxy.eu, related to the Galaxy training tutorial \"[OBIS marine indicators](https://training.galaxyproject.org/training-material/topics/ecology/tutorials/obisindicators/tutorial.html)\" .\r\n\r\nThis workflow allows to compute and visualize marine biodiversity indicators from OBIS data.\r\n","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/662?version=1","name":"Version 1","author":["Yvan Le Bras"],"descriptor_type":["GALAXY"]}]},{"id":"663","url":"https://workflowhub.eu/workflows/663","name":"Java COMPSs K-means clustering example (executed at Marenostrum IV supercomputer, inputs generated by the code)","description":"**Name:** K-means  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: Public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nK-means clustering is a method of cluster analysis that aims to partition ''n'' points into ''k'' clusters in which each point belongs to the cluster with the nearest mean. It follows an iterative refinement strategy to find the centers of natural clusters in the data.\r\n\r\nWhen executed with COMPSs, K-means first generates the input points by means of initialization tasks. For parallelism purposes, the points are split in a number of fragments received as parameter, each fragment being created by an initialization task and filled with random points.\r\n\r\nAfter the initialization, the algorithm goes through a set of iterations. In every iteration, a computation task is created for each fragment; then, there is a reduction phase where the results of each computation are accumulated two at a time by merge tasks; finally, at the end of the iteration the main program post-processes the merged result, generating the current clusters that will be used in the next iteration. Consequently, if ''F'' is the total number of fragments, K-means generates ''F'' computation tasks and ''F-1'' merge tasks per iteration.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --classpath=application_sources/jar/kmeans.jar kmeans.KMeans \u003c...\u003e\r\n```\r\n\r\nwhere ''\u003c...\u003e'':\r\n* -c Number of clusters\r\n* -i Number of iterations\r\n* -n Number of points\r\n* -d Number of dimensions\r\n* -f Number of fragments\r\n\r\n# Execution Examples\r\n```\r\nruncompss --classpath=application_sources/jar/kmeans.jar kmeans.KMeans\r\nruncompss --classpath=application_sources/jar/kmeans.jar kmeans.KMeans -c 4 -i 10 -n 2000 -d 2 -f 2\r\n```\r\n\r\n# Build\r\n## Option 1: Native java\r\n```\r\ncd application_sources/; javac src/main/java/kmeans/*.java\r\ncd src/main/java/; jar cf kmeans.jar kmeans/\r\ncd ../../../; mv src/main/java/kmeans.jar jar/\r\n```\r\n\r\n## Option 2: Maven\r\n```\r\ncd application_sources/\r\nmvn clean package\r\n```\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/663?version=1","name":"Ran with COMPSs 3.3","author":["Jorge Ejarque"],"descriptor_type":[]}]},{"id":"664","url":"https://workflowhub.eu/workflows/664","name":"covid-sequence-analysis-workflow","description":"# covid-sequence-analysis-workflow\r\n\r\nThis is the official repository of the SARS-CoV-2 variant surveillance pipeline developed by Danish Technical University (DTU), Eotvos Lorand University (ELTE), EMBL-EBI, Erasmus Medical Center (EMC) under the [Versatile Emerging infectious disease Observatory (VEO)](https://www.globalsurveillance.eu/projects/veo-versatile-emerging-infectious-disease-observatory) project. The project consists of 20 European partners. It is funded by the European Commission.\r\n\r\nThe pipeline has been integrated on EMBL-EBI infrastructure to automatically process raw SARS-CoV-2 read data, presenting in the COVID-19 Data Portal: https://www.covid19dataportal.org/sequences?db=sra-analysis-covid19\u0026size=15\u0026crossReferencesOption=all#search-content.\r\n\r\n## Architecture\r\n\r\nThe pipeline supports sequence reads from both Illumina and Nanopore platforms. It is designed to be highly portable for both Google Cloud Platform and High Performance Computing cluster with IBM Spectrum LSF. We have performed secondary and tertiary analysis on millions of public samples. The pipeline shows good performance for large scale production. \r\n\r\n![Component diagram](doc/img/pipeline.components.png)\r\n\r\nThe pipeline takes SRA from the public FTP from ENA. It submits analysis objects back to ENA on the fly. The intermediate results and logs are stored in the cloud storage buckets or high performance local POSIX file system. The metadata is stored in Google BigQuery for metadata and status tracking and analysis. The runtime is created with Docker / Singularity containers and NextFlow. \r\n\r\n## Process to run the pipelines\r\n\r\nThe pipeline requires the Nextflow Tower for the application level monitoring. A free test account can be created for evaluation purposes at https://tower.nf/.\r\n\r\n### Preparation\r\n\r\n1. Store `export TOWER_ACCESS_TOKEN='...'` in `$HOME/.bash_profile`. Restart the current session or source the updated `$HOME/.bash_profile`.\r\n2. Run `git clone https://github.com/enasequence/covid-sequence-analysis-workflow`.\r\n3. Create `./covid-sequence-analysis-workflow/data/projects_accounts.csv` with submission_account_id and submission_passwor, for example:\r\n\u003e  project_id,center_name,meta_key,submission_account_id,submission_password,ftp_password\r\n\u003e  PRJEB45555,\"European Bioinformatics Institute\",public,,,\r\n\r\n### Running pipelines\r\n\r\n1. Run `./covid-sequence-analysis-workflow/init.sra_index.sh` to initialize or reinitialize the metadata in BigQuery.\r\n2. Run `./covid-sequence-analysis-workflow/./start.lsf.jobs.sh` with proper parameters to start the batch jobs on LSF or `./covid-sequence-analysis-workflow/./start.gls.jobs.sh` with proper parameters to start the batch jobs on GCP.\r\n\r\n### Error handling\r\n\r\nIf a job is killed or died, run the following to update the metadata to avoid reprocessing samples completed successfully.\r\n\r\n1. Run `./covid-sequence-analysis-workflow/update.receipt.sh \u003cbatch_id\u003e` to collect the submission receipts and to update submission metadata. The script can be run at anytime. It needs to be run if a batch job is killed instead of completed for any reason.\r\n2. Run `./covid-sequence-analysis-workflow/set.archived.sh` to update stats for analyses submitted. The script can be run at anytime. It needs to be run at least once before ending a snapshot to make sure that the stats are up-to-date.\r\n\r\nTo reprocess the samples failed, delete the record in `sra_processing`.\r\n","organization":"SARS-CoV-2 Data Hubs","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/664?version=1","name":"master @ 6f2c224","author":["David Yuan"],"descriptor_type":["NFL"]}]},{"id":"665","url":"https://workflowhub.eu/workflows/665","name":"sanger-tol/readmapping v1.1.0 - Hebridean Black","description":"# ![sanger-tol/readmapping](docs/images/sanger-tol-readmapping_logo.png)\r\n\r\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6563577-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.6563577)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/readmapping)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/readmapping** is a bioinformatics best-practice analysis pipeline for mapping reads generated using Illumina, HiC, PacBio and Nanopore technologies against a genome assembly.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\nOn merge to `dev` and `main` branch, automated continuous integration tests run the pipeline on a full-sized dataset on the Wellcome Sanger Institute HPC farm using the Nextflow Tower infrastructure. This ensures that the pipeline runs on full sized datasets, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.\r\n\r\n## Pipeline summary\r\n\r\n\u003cimg src=\"https://raw.githubusercontent.com/sanger-tol/readmapping/976525ad7b5327607a049aa85bbca36a48c6ba48/docs/images/sanger-tol-readmapping_workflow.png\" height=\"700\"\u003e\r\n\r\n## Quick Start\r\n\r\n1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`\u003e=22.10.1`)\r\n\r\n2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.\r\n\r\n3. Download the pipeline and test it on a minimal dataset with a single command:\r\n\r\n   ```bash\r\n   nextflow run sanger-tol/readmapping -profile test,YOURPROFILE --outdir \u003cOUTDIR\u003e\r\n   ```\r\n\r\n   Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.\r\n\r\n   \u003e - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`.\r\n   \u003e - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile \u003cinstitute\u003e` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.\r\n   \u003e - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.\r\n   \u003e - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.\r\n\r\n4. Start running your own analysis!\r\n\r\n   ```bash\r\n   nextflow run sanger-tol/readmapping --input samplesheet.csv --fasta genome.fa.gz --outdir \u003cOUTDIR\u003e -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e\r\n   ```\r\n\r\n## Credits\r\n\r\nsanger-tol/readmapping was originally written by [Priyanka Surana](https://github.com/priyanka-surana).\r\n\r\nWe thank the following people for their extensive assistance in the development of this pipeline:\r\n\r\n- [Matthieu Muffato](https://github.com/muffato) for the text logo\r\n- [Guoying Qi](https://github.com/gq1) for being able to run tests using Nf-Tower and the Sanger HPC farm\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\nFor further information or help, don't hesitate to get in touch on the [Slack `#pipelines` channel](https://sangertreeoflife.slack.com/channels/pipelines). Please [create an issue](https://github.com/sanger-tol/readmapping/issues/new/choose) on GitHub if you are not on the Sanger slack channel.\r\n\r\n## Citations\r\n\r\nIf you use sanger-tol/readmapping for your analysis, please cite it using the following doi: [10.5281/zenodo.6563577](https://doi.org/10.5281/zenodo.6563577)\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Analysis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/665?version=1","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"666","url":"https://workflowhub.eu/workflows/666","name":"sanger-tol/insdcdownload v1.0.1 - Hefty mûmakil","description":"# ![sanger-tol/ensemblgenedownload](docs/images/sanger-tol-ensemblgenedownload_logo.png)\r\n\r\n[![GitHub Actions CI Status](https://github.com/sanger-tol/ensemblgenedownload/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/ensemblgenedownload/actions?query=workflow%3A%22nf-core+CI%22)\r\n\r\n\u003c!-- [![GitHub Actions Linting Status](https://github.com/sanger-tol/ensemblgenedownload/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/ensemblgenedownload/actions?query=workflow%3A%22nf-core+linting%22) --\u003e\r\n\r\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7183206-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7183206)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n\r\n[![Get help on Slack](http://img.shields.io/badge/slack-SangerTreeofLife%20%23pipelines-4A154B?labelColor=000000\u0026logo=slack)](https://SangerTreeofLife.slack.com/channels/pipelines)\r\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40sangertol-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/sangertol)\r\n[![Watch on YouTube](http://img.shields.io/badge/youtube-tree--of--life-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/channel/UCFeDpvjU58SA9V0ycRXejhA)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/ensemblgenedownload** is a pipeline that downloads gene annotations from Ensembl into the Tree of Life directory structure.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the GitHub CI infrastructure. This ensures that the pipeline runs in a third-party environment, and has sensible resource allocation defaults set to run on real-world datasets.\r\n\r\n## Pipeline summary\r\n\r\n## Overview\r\n\r\nThe pipeline takes a CSV file that contains assembly accession number, Ensembl species names (as they may differ from Tree of Life ones !), output directories, and geneset versions.\r\nAssembly accession numbers are optional. If missing, the pipeline assumes it can be retrieved from files named `ACCESSION` in the standard location on disk.\r\nThe pipeline downloads the Fasta files of the genes (cdna, cds, and protein sequences) as well as the GFF3 file.\r\nAll files are compressed with `bgzip`, and indexed with `samtools faidx` or `tabix`.\r\n\r\nSteps involved:\r\n\r\n- Download from Ensembl the GFF3 file, and the sequences of the genes in\r\n  Fasta format.\r\n- Compress and index all Fasta files with `bgzip`, `samtools faidx`, and\r\n  `samtools dict`.\r\n- Compress and index the GFF3 file with `bgzip` and `tabix`.\r\n\r\n## Quick Start\r\n\r\n1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`\u003e=22.04.0`)\r\n\r\n2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.\r\n\r\n3. Download the pipeline and test it on a minimal dataset with a single command:\r\n\r\n   ```bash\r\n   nextflow run sanger-tol/ensemblgenedownload -profile test,YOURPROFILE --outdir \u003cOUTDIR\u003e\r\n   ```\r\n\r\n   Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.\r\n\r\n   \u003e - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`.\r\n   \u003e - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile \u003cinstitute\u003e` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.\r\n   \u003e - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.\r\n   \u003e - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.\r\n\r\n4. Start running your own analysis!\r\n\r\n   ```console\r\n   nextflow run sanger-tol/ensemblgenedownload --input $PWD/assets/samplesheet.csv --outdir \u003cOUTDIR\u003e -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e\r\n   ```\r\n\r\n## Documentation\r\n\r\nThe sanger-tol/ensemblgenedownload pipeline comes with documentation about the pipeline [usage](docs/usage.md) and [output](docs/output.md).\r\n\r\n## Credits\r\n\r\nsanger-tol/ensemblgenedownload was originally written by @muffato.\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\nFor further information or help, don't hesitate to get in touch on the [Slack `#pipelines` channel](https://sangertreeoflife.slack.com/channels/pipelines). Please [create an issue](https://github.com/sanger-tol/ensemblgenedownload/issues/new/choose) on GitHub if you are not on the Sanger slack channel.\r\n\r\n## Citations\r\n\r\nIf you use sanger-tol/ensemblgenedownload for your analysis, please cite it using the following doi: [10.5281/zenodo.7183206](https://doi.org/10.5281/zenodo.7183206)\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Analysis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/666?version=1","name":"1.0.1","author":[],"descriptor_type":["NFL"]}]},{"id":"667","url":"https://workflowhub.eu/workflows/667","name":"sanger-tol/ensemblrepeatdownload v1.0.0 - Gwaihir the Windlord","description":"# ![sanger-tol/ensemblrepeatdownload](docs/images/sanger-tol-ensemblrepeatdownload_logo.png)\r\n\r\n[![GitHub Actions CI Status](https://github.com/sanger-tol/ensemblrepeatdownload/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/ensemblrepeatdownload/actions?query=workflow%3A%22nf-core+CI%22)\r\n\r\n\u003c!-- [![GitHub Actions Linting Status](https://github.com/sanger-tol/ensemblrepeatdownload/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/ensemblrepeatdownload/actions?query=workflow%3A%22nf-core+linting%22) --\u003e\r\n\r\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7183380-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7183380)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n\r\n[![Get help on Slack](http://img.shields.io/badge/slack-SangerTreeofLife%20%23pipelines-4A154B?labelColor=000000\u0026logo=slack)](https://SangerTreeofLife.slack.com/channels/pipelines)\r\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40sangertol-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/sangertol)\r\n[![Watch on YouTube](http://img.shields.io/badge/youtube-tree--of--life-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/channel/UCFeDpvjU58SA9V0ycRXejhA)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/ensemblrepeatdownload** is a pipeline that downloads repeat annotations from Ensembl into a Tree of Life directory structure.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the GitHub CI infrastructure. This ensures that the pipeline runs in a third-party environment, and has sensible resource allocation defaults set to run on real-world datasets.\r\n\r\n## Pipeline summary\r\n\r\n## Overview\r\n\r\nThe pipeline takes a CSV file that contains assembly accession number, Ensembl species names (as they may differ from Tree of Life ones !), output directories.\r\nAssembly accession numbers are optional too. If missing, the pipeline assumes it can be retrieved from files named `ACCESSION` in the standard location on disk.\r\nThe pipeline downloads the repeat annotation as the masked Fasta file and a BED file.\r\nAll files are compressed with `bgzip`, and indexed with `samtools faidx` or `tabix`.\r\n\r\nSteps involved:\r\n\r\n- Download the masked fasta file from Ensembl.\r\n- Extract the coordinates of the masked regions into a BED file.\r\n- Compress and index the BED file with `bgzip` and `tabix`.\r\n\r\n## Quick Start\r\n\r\n1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`\u003e=22.04.0`)\r\n\r\n2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.\r\n\r\n3. Download the pipeline and test it on a minimal dataset with a single command:\r\n\r\n   ```bash\r\n   nextflow run sanger-tol/ensemblrepeatdownload -profile test,YOURPROFILE --outdir \u003cOUTDIR\u003e\r\n   ```\r\n\r\n   Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.\r\n\r\n   \u003e - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`.\r\n   \u003e - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile \u003cinstitute\u003e` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.\r\n   \u003e - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.\r\n   \u003e - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.\r\n\r\n4. Start running your own analysis!\r\n\r\n   ```console\r\n   nextflow run sanger-tol/ensemblrepeatdownload --input $PWD/assets/samplesheet.csv --outdir \u003cOUTDIR\u003e -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e\r\n   ```\r\n\r\n## Documentation\r\n\r\nThe sanger-tol/ensemblrepeatdownload pipeline comes with documentation about the pipeline [usage](docs/usage.md) and [output](docs/output.md).\r\n\r\n## Credits\r\n\r\nsanger-tol/ensemblrepeatdownload was originally written by @muffato.\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\nFor further information or help, don't hesitate to get in touch on the [Slack `#pipelines` channel](https://sangertreeoflife.slack.com/channels/pipelines). Please [create an issue](https://github.com/sanger-tol/ensemblrepeatdownload/issues/new/choose) on GitHub if you are not on the Sanger slack channel.\r\n\r\n## Citations\r\n\r\nIf you use sanger-tol/ensemblrepeatdownload for your analysis, please cite it using the following doi: [10.5281/zenodo.7183380](https://doi.org/10.5281/zenodo.7183380)\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Analysis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/667?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"668","url":"https://workflowhub.eu/workflows/668","name":"sanger-tol/treeval v1.0 - Ancient Atlantis","description":"[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/treeval)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/treeval** is a bioinformatics best-practice analysis pipeline for the generation of data supplemental to the curation of reference quality genomes. This pipeline has been written to generate flat files compatible with [JBrowse2](https://jbrowse.org/jb2/).\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\nThe treeval pipeline has a sister pipeline currently named [curationpretext](https://github.com/sanger-tol/curationpretext) which acts to regenerate the pretext maps and accessory files during genomic curation in order to confirm interventions. This pipeline is sufficiently different to the treeval implementation that it is written as it's own pipeline.\r\n\r\n1. Parse input yaml ( YAML_INPUT )\r\n2. Generate my.genome file ( GENERATE_GENOME )\r\n3. Generate insilico digests of the input assembly ( INSILICO_DIGEST )\r\n4. Generate gene alignments with high quality data against the input assembly ( GENE_ALIGNMENT )\r\n5. Generate a repeat density graph ( REPEAT_DENSITY )\r\n6. Generate a gap track ( GAP_FINDER )\r\n7. Generate a map of self complementary sequence ( SELFCOMP )\r\n8. Generate syntenic alignments with a closely related high quality assembly ( SYNTENY )\r\n9. Generate a coverage track using PacBio data ( LONGREAD_COVERAGE )\r\n10. Generate HiC maps, pretext and higlass using HiC cram files ( HIC_MAPPING )\r\n11. Generate a telomere track based on input motif ( TELO_FINDER )\r\n12. Run Busco and convert results into bed format ( BUSCO_ANNOTATION )\r\n13. Ancestral Busco linkage if available for clade ( BUSCO_ANNOTATION:ANCESTRAL_GENE )\r\n\r\n## Usage\r\n\r\n\u003e **Note**\r\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how\r\n\u003e to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)\r\n\u003e with `-profile test` before running the workflow on actual data.\r\n\r\nCurrently, it is advised to run the pipeline with docker or singularity as a small number of major modules do not currently have a conda env associated with them.\r\n\r\nNow, you can run the pipeline using:\r\n\r\n```bash\r\n# For the FULL pipeline\r\nnextflow run main.nf -profile singularity --input treeval.yaml --outdir {OUTDIR}\r\n\r\n# For the RAPID subset\r\nnextflow run main.nf -profile singularity --input treeval.yaml -entry RAPID --outdir {OUTDIR}\r\n```\r\n\r\nAn example treeval.yaml can be found [here](assets/local_testing/nxOscDF5033.yaml).\r\n\r\nFurther documentation about the pipeline can be found in the following files: [usage](https://pipelines.tol.sanger.ac.uk/treeval/dev/usage), [parameters](https://pipelines.tol.sanger.ac.uk/treeval/dev/parameters) and [output](https://pipelines.tol.sanger.ac.uk/treeval/dev/output).\r\n\r\n\u003e **Warning:**\r\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\r\n\u003e provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\r\n\u003e see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).\r\n\r\n## Credits\r\n\r\nsanger-tol/treeval has been written by Damon-Lee Pointon (@DLBPointon), Yumi Sims (@yumisims) and William Eagles (@weaglesBio).\r\n\r\nWe thank the following people for their extensive assistance in the development of this pipeline:\r\n\r\n\u003cul\u003e\r\n  \u003cli\u003e@gq1 - For building the infrastructure around TreeVal and helping with code review\u003c/li\u003e\r\n  \u003cli\u003e@ksenia-krasheninnikova - For help with C code implementation and YAML parsing\u003c/li\u003e\r\n  \u003cli\u003e@mcshane - For guidance on algorithms \u003c/li\u003e\r\n  \u003cli\u003e@muffato - For code reviews and code support\u003c/li\u003e\r\n  \u003cli\u003e@priyanka-surana - For help with the majority of code reviews and code support\u003c/li\u003e\r\n\u003c/ul\u003e\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\n## Citations\r\n\r\n\u003c!--TODO: Citation--\u003e\r\n\r\nIf you use sanger-tol/treeval for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX).\r\n\r\n### Tools\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nYou can cite the `nf-core` publication as follows:\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/668?version=1","name":"v1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"673","url":"https://workflowhub.eu/workflows/673","name":"PyCOMPSs simple example (ran on macOS laptop, input generated by the code, INOUT file example)","description":"**Contact Person:** support-compss@bsc.es  \r\n**Access Level:** public  \r\n**License Agreement:** Apache2  \r\n**Platform:** COMPSs  \r\n\r\n# Description\r\n\r\nSimple is an application that takes one value and increases it by five units. The purpose of this application is to show how tasks are managed by COMPSs.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python src/simple.py initValue\r\n```\r\n\r\nwhere:\r\n* initValue: Initial value for counter\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python src/simple.py 1\r\nruncompss src/simple.py 1\r\npython -m pycompss src/simple.py 1\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/673?version=1","name":"COMPSs 3.3","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"675","url":"https://workflowhub.eu/workflows/675","name":"Evaluation of Swin Transformer and knowledge transfer for denoising of super-resolution structured illumination microscopy data","description":"# Evaluation of Swin Transformer and knowledge transfer for denoising of super-resolution structured illumination microscopy data\r\n\r\nIn recent years, convolutional neural network (CNN)-based methods have shown remarkable performance in the denoising and reconstruction of super-resolved structured illumination microscopy (SR-SIM) data. Therefore, CNN-based architectures have been the main focus of existing studies. Recently, however, an alternative and highly\r\ncompetitive deep learning architecture, Swin Transformer, has been proposed for image restoration tasks. In this work, we present SwinT-fairSIM, a novel method for restoring SR-SIM images with low signal-to-noise ratio (SNR) based on Swin Transformer. The experimental results show that SwinT-fairSIM outperforms previous CNN-based denoising methods. Furthermore, the generalization capabilities of deep learning methods for image restoration tasks on real fluorescence microscopy data have not been fully explored yet, i.e., the extent to which trained artificial neural networks are limited to specific types of cell structures and noise. Therefore, as a second contribution, we benchmark two types of transfer learning, i.e., direct transfer and fine-tuning, in combination with SwinT-fairSIM and two CNN-based methods for denoising SR-SIM data. Direct transfer does not prove to be a viable strategy, but fine-tuning achieves results comparable to conventional training from scratch while saving computational time and potentially reducing the amount of required training data. As a third contribution, we published four datasets of raw SIM images and already reconstructed SR-SIM images. These datasets cover two types of cell structures, tubulin filaments and vesicle structures. Different noise levels are available for the tubulin filaments. These datasets are structured in such a way that they can be easily used by the research community for research on denoising, super-resolution, and transfer learning strategies.\r\n\r\nThe SIM microscopy datasets that were used during this work can be downloaded through this link: http://dx.doi.org/10.5524/102461\r\n\r\n\r\n## Installation:\r\n\r\nThis implementation requires the Tensorflow-GPU2.5 version. To avoid package conflicts, we recommend you create a new environment by using our provided environment.yml file. To create a new environment please run the following script:\r\n\r\n\u003e  conda env create -f environment.yml\r\n\r\n## How to use this code:\r\n\r\nThis code can be used to train a denoising model from scratch or to fine-tune a pretrained model. After the installation of the Python environment from the yml file, the next step is to set the input parameters in the JSON parameter file (i.e., ParameterFile.json). Most of the input parameters are self-explanatory but below we will discuss some of the important input parameters from the JSON file:\r\n\r\n- TrainNetworkfromScratch: This input parameter will train the model from scratch If set to True, otherwise, for fine-tuning, It should be False.\r\n- ActivateTrainandTestModel: This parameter will be set to False If you want to use this code for evaluation of the trained model or the reproducibility of the results by using pretrained models.\r\n- PretrainedmodelPath: This parameter is mandatory in case of fine-tuning or evaluation of a pretrained model.\r\n- FineTuneStartingpoint and FineTuneEndingpoint: These two input parameters are essential in the fine-tuning of a pretrained model. All the layers between the starting and ending points will be frozen during the fine-tuning of the pretrained model.\r\n\r\nAfter the assignment of the input parameters. You can run the following script from the command line to start training the model:\r\n\r\n\u003e python MainModule.py 'ParameterFile.json'\r\n\r\n## Reproducibility and evaluation:\r\n\r\nTo reproduce the results of the paper all the trained models used in this work are available in the 'Models' directory at [zenodo](https://doi.org/10.5281/zenodo.7626173). This code is capable of performing all the necessary steps for the training and test phases. It will automatically evaluate the model and generate a result directory to write all the results. Similarly, during the training process, It will also create a model directory and save the trained model along with the best checkpoints in the model directory.   \r\n\r\n## Important Note:\r\n\r\nThis code will work with at least one GPU.\r\n\r\n## Reference:\r\n\r\nPlease cite our paper in case you use this code for any scientific publication. We will soon upload the citation index!\r\n\r\n\r\n\r\n\r\n","organization":"Evaluation of Swin Transformer and knowledge transfer for denoising of super-resolution structured illumination microscopy data","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/675?version=1","name":"main @ 2474694","author":["Zafran Hussain Shah"],"descriptor_type":[]}]},{"id":"676","url":"https://workflowhub.eu/workflows/676","name":"Somatic-Variant-Discovery-from-WES-Data-Using-Control-FREEC","description":"This workflow is created as part of a tutorial listed on GTN. The workflow shows the steps in human copy number variance detection using the Contrl_FREEC tool. ","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/676?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"677","url":"https://workflowhub.eu/workflows/677","name":"lcms-preprocessing/main","description":"This workflow is composed with the XCMS tool R package (Smith, C.A. 2006) able to extract, filter, align and fill gapand the possibility to annotate isotopes, adducts and fragments using the CAMERA R package (Kuhl, C 2012).\n\n\nhttps://training.galaxyproject.org/training-material/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.html ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/677?version=1","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/677?version=2","name":"v1.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"678","url":"https://workflowhub.eu/workflows/678","name":"PyCOMPSs Increment example, ran at Marenostrum IV supercomputer, example of INOUT file and compss_open usage","description":"**Name:** Increment  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nIncrement is an application that takes three different values and increases them a number of given times.\r\n\r\nThe purpose of this application is to show parallelism between the different increments.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python src/increment.py N initValue1 initValue2 initValue3\r\n```\r\n\r\nwhere:\r\n* N: Number of times to increase the counters\r\n* initValue1: Initial value for counter 1\r\n* initValue2: Initial value for counter 2\r\n* initValue3: Initial value for counter 3\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python src/increment.py 10 1 2 3\r\nruncompss src/wordcount.py src/increment.py 10 1 2 3\r\npython -m pycompss src/wordcount.py src/increment.py 10 1 2 3\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/678?version=1","name":"COMPSs 3.3","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"680","url":"https://workflowhub.eu/workflows/680","name":"gcms-metams/main","description":"This workflow is composed with the XCMS tool R package (Smith, C.A. 2006) able to extract and the metaMS R package (Wehrens, R 2014) for the field of untargeted metabolomics. \n\nhttps://training.galaxyproject.org/training-material/topics/metabolomics/tutorials/gcms/tutorial.html","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/680?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/680?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"681","url":"https://workflowhub.eu/workflows/681","name":"Inclusion Body Myositis Active Subnetwork Identification Workflow","description":"Workflow for Creating a large disease network from various datasets and databases for IBM, and applying the active subnetwork identification method MOGAMUN.","organization":"EJPRD WP13 case-studies workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/681?version=1","name":"master @ 5d0df39","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/681?version=2","name":"master @ 7fd2fb2","author":[],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/681?version=3","name":"master @ 7fd2fb2","author":[],"descriptor_type":["CWL"]},{"id":"4","url":"https://workflowhub.eu/workflows/681?version=4","name":"master @ 7fd2fb2","author":[],"descriptor_type":["CWL"]},{"id":"5","url":"https://workflowhub.eu/workflows/681?version=5","name":"master @ 7fd2fb2","author":[],"descriptor_type":["CWL"]},{"id":"6","url":"https://workflowhub.eu/workflows/681?version=6","name":"master @ fbe3ed5","author":[],"descriptor_type":["CWL"]},{"id":"7","url":"https://workflowhub.eu/workflows/681?version=7","name":"master @ 7e4122f","author":[],"descriptor_type":["CWL"]}]},{"id":"683","url":"https://workflowhub.eu/workflows/683","name":"ONTViSc (ONT-based Viral Screening for Biosecurity)","description":"# ONTViSc (ONT-based Viral Screening for Biosecurity)\r\n\r\n## Introduction\r\neresearchqut/ontvisc is a Nextflow-based bioinformatics pipeline designed to help diagnostics of viruses and viroid pathogens for biosecurity. It takes fastq files generated from either amplicon or whole-genome sequencing using Oxford Nanopore Technologies as input.\r\n\r\nThe pipeline can either: 1) perform a direct search on the sequenced reads, 2) generate clusters, 3) assemble the reads to generate longer contigs or 4) directly map reads to a known reference. \r\n\r\nThe reads can optionally be filtered from a plant host before performing downstream analysis.\r\n\r\n## Pipeline overview\r\n- Data quality check (QC) and preprocessing\r\n  - Merge fastq files (Fascat, optional)\r\n  - Raw fastq file QC (Nanoplot)\r\n  - Trim adaptors (PoreChop ABI - optional)\r\n  - Filter reads based on length and/or quality (Chopper - optional)\r\n  - Reformat fastq files so read names are trimmed after the first whitespace (bbmap)\r\n  - Processed fastq file QC (if PoreChop and/or Chopper is run) (Nanoplot)\r\n- Host read filtering\r\n  - Align reads to host reference provided (Minimap2)\r\n  - Extract reads that do not align for downstream analysis (seqtk)\r\n- QC report\r\n  - Derive read counts recovered pre and post data processing and post host filtering\r\n- Read classification analysis mode\r\n- Clustering mode\r\n  - Read clustering (Rattle)\r\n  - Convert fastq to fasta format (seqtk)\r\n  - Cluster scaffolding (Cap3)\r\n  - Megablast homology search against ncbi or custom database (blast)\r\n  - Derive top candidate viral hits\r\n  - Align reads back to top reference and derive coverage statistics (mosdepth and coverM)\r\n- De novo assembly mode\r\n  - De novo assembly (Canu or Flye)\r\n  - Megablast homology search against ncbi or custom database or reference (blast)\r\n  - Derive top candidate viral hits\r\n  - Align reads back to top reference and derive coverage statistics (mosdepth and coverM)\r\n- Read classification mode\r\n  - Option 1 Nucleotide-based taxonomic classification of reads (Kraken2, Braken)\r\n  - Option 2 Protein-based taxonomic classification of reads (Kaiju, Krona)\r\n  - Option 3 Convert fastq to fasta format (seqtk) and perform direct homology search using megablast (blast)\r\n- Map to reference mode\r\n  - Align reads to reference fasta file (Minimap2) and derive bam file and alignment statistics (Samtools)\r\n\r\nCode and detailed instructions can be found [here](https://github.com/eresearchqut/ontvisc). A comprehensive, step-by-step guide on setting up and executing the ONTViSc pipeline across three high-performance computing systems hosted by Australian research and computing facilities - Lyra (Queensland University of Technology), Gadi (National Computational Infrastructure), and Setonix (Pawsey) - utilising the Australian Nextflow Seqera Service, can be found [here](https://mantczakaus.github.io/ontvisc_hpc_seqera_service_guide/).\r\n\r\n## Authors\r\nMarie-Emilie Gauthier \u003cgauthiem@qut.edu.au\u003e  \r\nCraig Windell \u003cc.windell@qut.edu.au\u003e  \r\nMagdalena Antczak \u003cmagdalena.antczak@qcif.edu.au\u003e  \r\nRoberto Barrero \u003croberto.barrero@qut.edu.au\u003e  \r\n","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/683?version=1","name":"main @ d333445","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/683?version=2","name":"v1.3","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/683?version=3","name":"main @ 2274c83","author":[],"descriptor_type":["NFL"]}]},{"id":"684","url":"https://workflowhub.eu/workflows/684","name":"Java COMPSs wordcount example (laptop run, files used as inputs)","description":"**Name:** Java Wordcount  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nWordcount application. There are two versions of Wordcount, depending on how the input data is given.\r\n\r\n## Version 1\r\n''Single input file'', where all the text is given in the same file and the chunks are calculated with a BLOCK_SIZE parameter.\r\n\r\n## Version 2\r\n''Multiple input files'', where the text fragments are already in different files under the same directory\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --classpath=application_sources/jar/wordcount.jar wordcount.multipleFiles.Wordcount DATA_FOLDER\r\nruncompss --classpath=application_sources/jar/wordcount.jar wordcount.uniqueFile.Wordcount DATA_FILE BLOCK_SIZE\r\n```\r\n\r\nwhere:\r\n* DATA_FOLDER: Absolute path to the base folder of the dataset files\r\n* DATA_FILE: Absolute path to the dabase file\r\n* BLOCK_SIZE: Number of bytes of each block\r\n\r\n# Execution Examples\r\n```\r\nruncompss --classpath=application_sources/jar/wordcount.jar wordcount.multipleFiles.Wordcount dataset/data-set/\r\nruncompss --classpath=application_sources/jar/wordcount.jar wordcount.uniqueFile.Wordcount dataset/data-set/file_small.txt 650\r\nruncompss --classpath=application_sources/jar/wordcount.jar wordcount.uniqueFile.Wordcount dataset/data-set/file_long.txt 250000\r\n\r\n```\r\n\r\n# Build\r\n\r\n## Option 1: Native java\r\n```\r\ncd application_sources/; javac src/main/java/wordcount/*.java\r\ncd src/main/java/; jar cf wordcount.jar wordcount/\r\ncd ../../../; mv src/main/java/wordcount.jar jar/\r\n```\r\n\r\n## Option 2: Maven\r\n```\r\ncd application_sources/\r\nmvn clean package\r\n```\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/684?version=1","name":"COMPSs 3.3","author":["Jorge Ejarque"],"descriptor_type":[]}]},{"id":"685","url":"https://workflowhub.eu/workflows/685","name":"mRNA-Seq BY-COVID Pipeline","description":"Analyse Bulk RNA-Seq data in preparation for downstream Pathways analysis with MINERVA","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/685?version=1","name":"Version 1","author":["Helena Rasche"],"descriptor_type":["GALAXY"]}]},{"id":"686","url":"https://workflowhub.eu/workflows/686","name":"dna-seq-varlociraptor","description":"# Snakemake workflow: dna-seq-varlociraptor\r\n\r\n[![Snakemake](https://img.shields.io/badge/snakemake-≥6.3.0-brightgreen.svg)](https://snakemake.github.io)\r\n[![GitHub actions status](https://github.com/snakemake-workflows/dna-seq-varlociraptor/workflows/Tests/badge.svg?branch=master)](https://github.com/snakemake-workflows/dna-seq-varlociraptor/actions?query=branch%3Amaster+workflow%3ATests)\r\n[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4675661.svg)](https://doi.org/10.5281/zenodo.4675661)\r\n\r\n\r\nA Snakemake workflow for calling small and structural variants under any kind of scenario (tumor/normal, tumor/normal/relapse, germline, pedigree, populations) via the unified statistical model of [Varlociraptor](https://varlociraptor.github.io).\r\n\r\n\r\n## Usage\r\n\r\nThe usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?usage=snakemake-workflows%2Fdna-seq-varlociraptor).\r\n\r\nIf you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this (original) repository and its DOI (see above).\r\n","organization":"Snakemake-Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/686?version=1","name":"master @ ee973c6","author":[],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/686?version=2","name":"v5.2.0","author":[],"descriptor_type":["SMK"]}]},{"id":"687","url":"https://workflowhub.eu/workflows/687","name":"PyCOMPSs Wordcount test, dividing input file in blocks, only Python dictionaries used as task parameters (run at MareNostrum IV)","description":"**Name:** Word Count  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nWordcount is an application that counts the number of words for a given set of files.\r\n\r\nTo allow parallelism the file is divided in blocks that are treated separately and merged afterwards.\r\n\r\nResults are printed to a Pickle binary file, so they can be checked using: python -mpickle result.txt\r\n\r\nThis example also shows how to manually add input or output datasets to the workflow provenance recording (using the 'input' and 'output' terms in the ro-crate-info.yaml file).\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python $(pwd)/application_sources/src/wordcount_blocks.py filePath resultPath blockSize\r\n```\r\n\r\nwhere:\r\n* filePath: Absolute path of the file to parse\r\n* resultPath: Absolute path to the result file\r\n* blockSize: Size of each block. The lower the number, the more tasks will be generated in the workflow\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python $(pwd)/application_sources/src/wordcount_blocks.py $(pwd)/dataset/data/compss.txt result.txt 300\r\nruncompss $(pwd)/application_sources/src/wordcount_blocks.py $(pwd)/dataset/data/compss.txt result.txt 300\r\npython -m pycompss $(pwd)/application_sources/src/wordcount.py $(pwd)/dataset/data/compss.txt result.txt 300\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/687?version=1","name":"COMPSs 3.3","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"688","url":"https://workflowhub.eu/workflows/688","name":"mRNA-Seq BY-COVID Pipeline: Counts","description":"This portion of the workflow produces sets of feature Counts ready for analysis by limma/etc.","organization":"BY-COVID (general), Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/688?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"689","url":"https://workflowhub.eu/workflows/689","name":"mRNA-Seq BY-COVID Pipeline: Analysis","description":"Analyse Bulk RNA-Seq data in preparation for downstream Pathways analysis with MINERVA","organization":"BY-COVID (general), Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/689?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"690","url":"https://workflowhub.eu/workflows/690","name":"Lanczos SVD","description":"**Name:** Lanczos SVD  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum4  \r\n\r\nLanczos SVD for computing singular values needed to reach an epsilon of 1e-3 on a matrix of (150000, 150).  \r\nThe input matrix is generated synthetically.  \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"Workflows and Distributed Computing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/690?version=1","name":"Lanczos SVD","author":["Fernando Vázquez-Novoa"],"descriptor_type":[]}]},{"id":"691","url":"https://workflowhub.eu/workflows/691","name":"Somatic-ShortV-nf","description":"This is a Nextflow implementaion of the GATK Somatic Short Variant Calling workflow. This workflow can be used to discover somatic short variants (SNVs and indels) from tumour and matched normal BAM files following GATK's Best Practices Workflow. The workflowis currently optimised to run efficiently and at scale on the National Compute Infrastructure, Gadi.","organization":"Australian BioCommons, Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/691?version=1","name":"main @ 495dafb","author":["Nandan Deshpande","Tracy Chew","Cali Willet","Georgina Samaha"],"descriptor_type":["NFL"]}]},{"id":"692","url":"https://workflowhub.eu/workflows/692","name":"PerMedCoE Cancer Diagnosis","description":"# Cancer Invasion Workflow\r\n\r\n## Table of Contents\r\n\r\n- [Cancer Invasion Workflow](#cancer-invasion-workflow)\r\n  - [Table of Contents](#table-of-contents)\r\n  - [Description](#description)\r\n  - [Contents](#contents)\r\n    - [Building Blocks](#building-blocks)\r\n    - [Workflows](#workflows)\r\n    - [Resources](#resources)\r\n    - [Tests](#tests)\r\n  - [Instructions](#instructions)\r\n    - [Local machine](#local-machine)\r\n      - [Requirements](#requirements)\r\n      - [Usage steps](#usage-steps)\r\n    - [MareNostrum 4](#marenostrum-4)\r\n      - [Requirements in MN4](#requirements-in-mn4)\r\n      - [Usage steps in MN4](#usage-steps-in-mn4)\r\n    - [Mahti or Puhti](#mahti-or-puhti)\r\n      - [Requirements](#requirements)\r\n      - [Steps](#steps)\r\n  - [License](#license)\r\n  - [Contact](#contact)\r\n\r\n## Description\r\n\r\nUses multiscale simulations to describe cancer progression into invasion.\r\n\r\nThe workflow uses the following building blocks, described in order of execution:\r\n\r\n1. PhysiBoSS-Invasion\r\n\r\nFor details on individual workflow steps, see the user documentation for each building block.\r\n\r\n[`GitHub repository`](\u003chttps://github.com/PerMedCoE/cancer-invasion-workflow\u003e)\r\n\r\n\r\n## Contents\r\n\r\n### Building Blocks\r\n\r\nThe ``BuildingBlocks`` folder contains the script to install the\r\nBuilding Blocks used in the Cancer Invasion Workflow.\r\n\r\n### Workflows\r\n\r\nThe ``Workflow`` folder contains the workflows implementations.\r\n\r\nCurrently contains the implementation using PyCOMPSs and Snakemake (in progress).\r\n\r\n### Resources\r\n\r\nThe ``Resources`` folder contains dataset files.\r\n\r\n### Tests\r\n\r\nThe ``Tests`` folder contains the scripts that run each Building Block\r\nused in the workflow for the given small dataset.\r\nThey can be executed individually for testing purposes.\r\n\r\n## Instructions\r\n\r\n### Local machine\r\n\r\nThis section explains the requirements and usage for the Cancer Invasion Workflow in a laptop or desktop computer.\r\n\r\n#### Requirements\r\n\r\n- [`permedcoe`](https://github.com/PerMedCoE/permedcoe) package\r\n- [PyCOMPSs](https://pycompss.readthedocs.io/en/stable/Sections/00_Quickstart.html) / [Snakemake](https://snakemake.readthedocs.io/en/stable/)\r\n- [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)\r\n\r\n#### Usage steps\r\n\r\n1. Clone this repository:\r\n\r\n  ```bash\r\n  git clone https://github.com/PerMedCoE/cancer-invasion-workflow\r\n  ```\r\n\r\n2. Install the Building Blocks required for the Cancer Invasion Workflow:\r\n\r\n  ```bash\r\n  cancer-invasion-workflow/BuildingBlocks/./install_BBs.sh\r\n  ```\r\n\r\n3. Get the required Building Block images from the project [B2DROP](https://b2drop.bsc.es/index.php/f/444350):\r\n\r\n  - Required images:\r\n      - PhysiCell-Invasion.singularity\r\n\r\n  The path where these files are stored **MUST be exported in the `PERMEDCOE_IMAGES`** environment variable.\r\n\r\n  \u003e :warning: **TIP**: These containers can be built manually as follows (be patient since some of them may take some time):\r\n  1. Clone the `BuildingBlocks` repository\r\n     ```bash\r\n     git clone https://github.com/PerMedCoE/BuildingBlocks.git\r\n     ```\r\n  2. Build the required Building Block images\r\n     ```bash\r\n     cd BuildingBlocks/Resources/images\r\n     sudo singularity build PhysiCell-Invasion.sif PhysiCell-Invasion.singularity\r\n     cd ../../..\r\n     ```\r\n\r\n**If using PyCOMPSs in local PC** (make sure that PyCOMPSs in installed):\r\n\r\n4. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflows/PyCOMPSs\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n\r\n**If using Snakemake in local PC** (make sure that SnakeMake is installed):\r\n\r\n4. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflows/SnakeMake\r\n   ```\r\n\r\n5. Execute `./run.sh`\r\n  \u003e **TIP**: If you want to run the workflow with a different dataset, please update the `run.sh` script setting the `dataset` variable to the new dataset folder and their file names.\r\n\r\n\r\n### MareNostrum 4\r\n\r\nThis section explains the requirements and usage for the Cancer Invasion Workflow in the MareNostrum 4 supercomputer.\r\n\r\n#### Requirements in MN4\r\n\r\n- Access to MN4\r\n\r\nAll Building Blocks are already installed in MN4, and the Cancer Invasion Workflow available.\r\n\r\n#### Usage steps in MN4\r\n\r\n1. Load the `COMPSs`, `Singularity` and `permedcoe` modules\r\n\r\n   ```bash\r\n   export COMPSS_PYTHON_VERSION=3\r\n   module load COMPSs/3.1\r\n   module load singularity/3.5.2\r\n   module use /apps/modules/modulefiles/tools/COMPSs/libraries\r\n   module load permedcoe\r\n   ```\r\n\r\n   \u003e **TIP**: Include the loading into your `${HOME}/.bashrc` file to load it automatically on the session start.\r\n\r\n   This commands will load COMPSs and the permedcoe package which provides all necessary dependencies, as well as the path to the singularity container images (`PERMEDCOE_IMAGES` environment variable) and testing dataset (`CANCERINVASIONWORKFLOW_DATASET` environment variable).\r\n\r\n2. Get a copy of the pilot workflow into your desired folder\r\n\r\n   ```bash\r\n   mkdir desired_folder\r\n   cd desired_folder\r\n   get_cancerinvasionworkflow\r\n   ```\r\n\r\n3. Go to `Workflow/PyCOMPSs` folder\r\n\r\n   ```bash\r\n   cd Workflow/PyCOMPSs\r\n   ```\r\n\r\n4. Execute `./launch.sh`\r\n\r\n  This command will launch a job into the job queuing system (SLURM) requesting 2 nodes (one node acting half master and half worker, and other full worker node) for 20 minutes, and is prepared to use the singularity images that are already deployed in MN4 (located into the `PERMEDCOE_IMAGES` environment variable). It uses the dataset located into `../../Resources/data` folder.\r\n\r\n  \u003e :warning: **TIP**: If you want to run the workflow with a different dataset, please edit the `launch.sh` script and define the appropriate dataset path.\r\n\r\n  After the execution, a `results` folder will be available with with Cancer Invasion Workflow results.\r\n\r\n### Mahti or Puhti\r\n\r\nThis section explains how to run the Cancer Invasion workflow on CSC supercomputers using SnakeMake.\r\n\r\n#### Requirements\r\n\r\n- Install snakemake (or check if there is a version installed using `module spider snakemake`)\r\n- Install workflow, using the same steps as for the local machine. With the exception that containers have to be built elsewhere.\r\n\r\n#### Steps\r\n\r\n\r\n1. Go to `Workflow/SnakeMake` folder\r\n\r\n   ```bash\r\n   cd Workflow/SnakeMake\r\n   ```\r\n\r\n2. Edit `launch.sh` with the correct partition, account, and resource specifications.  \r\n\r\n3. Execute `./launch.sh`\r\n\r\n  \u003e :warning: Snakemake provides a `--cluster` flag, but this functionality should be avoided as it's really not suited for HPC systems.\r\n\r\n## License\r\n\r\n[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\r\n\r\n## Contact\r\n\r\n\u003chttps://permedcoe.eu/contact/\u003e\r\n\r\nThis software has been developed for the [PerMedCoE project](https://permedcoe.eu/), funded by the European Commission (EU H2020 [951773](https://cordis.europa.eu/project/id/951773)).\r\n\r\n![](https://permedcoe.eu/wp-content/uploads/2020/11/logo_1.png \"PerMedCoE\")\r\n","organization":"PerMedCoE","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/692?version=1","name":"main @ 8be6bd2","author":[],"descriptor_type":[]}]},{"id":"693","url":"https://workflowhub.eu/workflows/693","name":"Reference-based assembly with bacpage","description":"![bacpage](https://raw.githubusercontent.com/CholGen/bacpage/split_into_command/.github/logo_dark.png){width=500}\r\n\r\nThis repository contains an easy-to-use pipeline for the assembly and analysis of bacterial genomes using ONT long-read or Illumina short-read technology.\r\n\r\n# Introduction\r\nAdvances in sequencing technology during the COVID-19 pandemic has led to massive increases in the generation of sequencing data. Many bioinformatics tools have been developed to analyze this data, but very few tools can be utilized by individuals without prior bioinformatics training.\r\n\r\nThis pipeline was designed to encapsulate pre-existing tools to automate analysis of whole genome sequencing of bacteria. Installation is fast and straightfoward. The pipeline is easy to setup and contains rationale defaults, but is highly modular and configurable by more advance users.\r\nA successful run generates consensus sequences, typing information, phylogenetic tree, and quality control report.\r\n\r\n# Features\r\nWe anticipate the pipeline will be able to perform the following functions:\r\n- [x] Reference-based assembly of Illumina paired-end reads\r\n- [x] *De novo* assembly of Illumina paired-end reads\r\n- [ ] *De novo* assembly of ONT long reads\r\n- [x] Run quality control checks\r\n- [x] Variant calling using [bcftools](https://github.com/samtools/bcftools)\r\n- [x] Maximum-likelihood phylogenetic inference of processed samples and background dataset using [iqtree](https://github.com/iqtree/iqtree2) \r\n- [x] MLST profiling and virulence factor detection\r\n- [x] Antimicrobial resistance genes detection\r\n- [ ] Plasmid detection\r\n\r\n# Installation\r\n1. Install `miniconda` by running the following two command:\r\n```commandline\r\ncurl -L -O \"https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh\"\r\nbash Mambaforge-$(uname)-$(uname -m).sh\r\n```\r\n\r\n2. Clone the repository:\r\n```commandline\r\ngit clone https://github.com/CholGen/bacpage.git\r\n```\r\n\r\n3. Install and activate the pipeline's conda environment:\r\n```commandline\r\ncd bacpage/\r\nmamba env create -f environment.yaml\r\nmamba activate bacpage\r\n```\r\n\r\n4. Install the `bacpage` command:\r\n```commandline\r\npip install .\r\n```\r\n\r\n5. Test the installation:\r\n```commandline\r\nbacpage -h\r\nbacpage version\r\n```\r\nThese command should print the help and version of the program. Please create an issue if this is not the case.\r\n\r\n# Usage\r\n0. Navigate to the pipeline's directory.\r\n1. Copy the `example/` directory to create a directory specifically for each batch of samples.\r\n```commandline\r\ncp example/ \u003cyour-project-directory-name\u003e\r\n```\r\n2. Place raw sequencing reads in the `input/` directory of your project directory.\r\n3. Record the name and absolute path of raw sequencing reads in the `sample_data.csv` found within your project directory.\r\n4. Replace the values `\u003cyour-project-directory-name\u003e` and `\u003csequencing-directory\u003e` in `config.yaml` found within your project directory, with the absolute path of your project directory and pipeline directory, respectively.\r\n5. Determine how many cores are available on your computer:\r\n```commandline\r\ncat /proc/cpuinfo | grep processor\r\n```\r\n6. From the pipeline's directory, run the entire pipeline on your samples using the following command:\r\n```commandline\r\nsnakemake --configfile \u003cyour-project-directory-name\u003e/config.yaml --cores \u003ccores\u003e\r\n```\r\nThis will generate a consensus sequence in FASTA format for each of your samples and place them in `\u003cyour-project-directory-name\u003e/results/consensus_sequences/\u003csample\u003e.masked.fasta`. An HTML report containing alignment and quality metrics for your samples can be found at `\u003cyour-project-directory-name\u003e/results/reports/qc_report.html`. A phylogeny comparing your sequences to the background dataset can be found at `\u003cyour-project-directory-name\u003e/results/phylogeny/phylogeny.tree`\r\n","organization":"CholGen","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/693?version=1","name":"split_into_command @ ea128c8","author":[],"descriptor_type":[]}]},{"id":"694","url":"https://workflowhub.eu/workflows/694","name":"fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-cellplex","description":"This workflow processes the CMO fastqs with CITE-seq-Count and include the translation step required for cellPlex processing. In parallel it processes the Gene Expresion fastqs with STARsolo, filter cells with DropletUtils and reformat all outputs to be easily used by the function 'Read10X' from Seurat.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/694?version=1","name":"v0.1","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/694?version=2","name":"v0.2","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/694?version=3","name":"v0.3","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/694?version=4","name":"v0.4","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/694?version=5","name":"v0.5","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/694?version=6","name":"v0.6","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/694?version=7","name":"v0.6.2","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/694?version=8","name":"v0.6.3","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/694?version=9","name":"v0.6.4","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/694?version=10","name":"v0.6.5","author":["Wendi Bacon"],"descriptor_type":["GALAXY"]}]},{"id":"695","url":"https://workflowhub.eu/workflows/695","name":"Phylogeny reconstruction using bacpage","description":"# BACPAGE\r\n\r\nThis repository contains an easy-to-use pipeline for the assembly and analysis of bacterial genomes using ONT long-read or Illumina short-read technology. \r\nRead the complete documentation and instructions for bacpage and each of its functions [here](https://cholgen.github.io/sequencing-resources/bacpage-command.html)\r\n\r\n# Introduction\r\nAdvances in sequencing technology during the COVID-19 pandemic has led to massive increases in the generation of sequencing data. Many bioinformatics tools have been developed to analyze this data, but very few tools can be utilized by individuals without prior bioinformatics training.\r\n\r\nThis pipeline was designed to encapsulate pre-existing tools to automate analysis of whole genome sequencing of bacteria. \r\nInstallation is fast and straightfoward. \r\nThe pipeline is easy to setup and contains rationale defaults, but is highly modular and configurable by more advance users.\r\nBacpage has individual commands to generate consensus sequences, perform *de novo* assembly, construct phylogenetic tree, and generate quality control reports.\r\n\r\n# Features\r\nWe anticipate the pipeline will be able to perform the following functions:\r\n- [x] Reference-based assembly of Illumina paired-end reads\r\n- [x] *De novo* assembly of Illumina paired-end reads\r\n- [ ] *De novo* assembly of ONT long reads\r\n- [x] Run quality control checks\r\n- [x] Variant calling using [bcftools](https://github.com/samtools/bcftools)\r\n- [x] Maximum-likelihood phylogenetic inference of processed samples and background dataset using [iqtree](https://github.com/iqtree/iqtree2) \r\n- [x] MLST profiling and virulence factor detection\r\n- [x] Antimicrobial resistance genes detection\r\n- [ ] Plasmid detection\r\n\r\n# Installation\r\n1. Install `mamba` by running the following two command:\r\n```commandline\r\ncurl -L -O \"https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh\"\r\nbash Mambaforge-$(uname)-$(uname -m).sh\r\n```\r\n\r\n2. Clone the bacpage repository:\r\n```commandline\r\ngit clone https://github.com/CholGen/bacpage.git\r\n```\r\n\r\n3. Switch to the development branch of the pipeline:\r\n```commandline\r\ncd bacpage/\r\ngit checkout -b split_into_command\r\n```\r\n\r\n3. Install and activate the pipeline's conda environment:\r\n```commandline\r\nmamba env create -f environment.yaml\r\nmamba activate bacpage\r\n```\r\n\r\n4. Install the `bacpage` command:\r\n```commandline\r\npip install .\r\n```\r\n\r\n5. Test the installation:\r\n```commandline\r\nbacpage -h\r\nbacpage version\r\n```\r\nThese command should print the help and version of the program. Please create an issue if this is not the case.\r\n\r\n# Updating\r\n\r\n1. Navigate to the directory where you cloned the bacpage repository on the command line:\r\n```commandline\r\ncd bacpage/\r\n```\r\n2. Activate the bacpage conda environment:\r\n```commandline\r\nmamba activate bacpage\r\n```\r\n3. Pull the lastest changes from GitHub:\r\n```commandline\r\ngit pull\r\n```\r\n4. Update the bacpage conda environemnt:\r\n```commandline\r\nmamba env update -f environment.yaml\r\n```\r\n5. Reinstall the `bacpage` command:\r\n```commandline\r\npip install .\r\n```\r\n\r\n# Usage\r\n0. Activate the bacpage conda environment:\r\n```commandline\r\nmamba activate bacpage\r\n```\r\n1. Create a directory specifically for the batch of samples you would like to analyze (called a project directory).\r\n```commandline\r\nbacpage setup [your-project-directory-name]\r\n```\r\n2. Place paired sequencing reads in the `input/` directory of your project directory.\r\n3. From the pipeline's directory, run the reference-based assembly pipeline on your samples using the following command:\r\n```commandline\r\nbacpage assemble [your-project-directory-name]\r\n```\r\nThis will generate a consensus sequence in FASTA format for each of your samples and place them in \r\n`\u003cyour-project-directory-name\u003e/results/consensus_sequences/\u003csample\u003e.masked.fasta`. An HTML report containing alignment \r\nand quality metrics for your samples can be found at `\u003cyour-project-directory-name\u003e/results/reports/qc_report.html`.\r\n","organization":"CholGen","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/695?version=1","name":"split_into_command @ 1d7909a","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/695?version=2","name":"split_into_command @ ad59e4b","author":[],"descriptor_type":[]}]},{"id":"697","url":"https://workflowhub.eu/workflows/697","name":"ERGA DataQC ONT v2505 (WF0)","description":"The workflow takes ONT reads collection, runs SeqKit and Nanoplot. The main outputs are a table and plots of raw reads stats.","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/697?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"698","url":"https://workflowhub.eu/workflows/698","name":"ERGA Profiling Illumina v2311 (WF1)","description":"The workflow takes a trimmed Illumina paired-end reads collection, runs Meryl to create a K-mer database, Genomescope2 to estimate genome properties and Smudgeplot to estimate ploidy. The main results are K-mer ddatabase and genome profiling plots, tables, and values useful for downstream analysis. Default K-mer length and ploidy for Genomescope are 21 and 2, respectively. ","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/698?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"700","url":"https://workflowhub.eu/workflows/700","name":"Pangenome databases provide superior host removal and mycobacteria classification from clinical metagenomic data","description":"# Pangenome databases provide superior host removal and mycobacteria classification from clinical metagenomic data\r\n\r\n\u003e Hall, M, Coin, L., Pangenome databases provide superior host removal and mycobacteria classification from clinical metagenomic data. bioRxiv 2023. doi: [10.1101/2023.09.18.558339][doi]\r\n\r\nBenchmarking different ways of doing read (taxonomic) classification, with a focus on\r\nremoval of contamination and classification of _M. tuberculosis_ reads.\r\n\r\nThis repository contains the code and snakemake pipeline to build/download the\r\ndatabases, obtain all results from [the paper][doi], along with accompanying configuration\r\nfiles.\r\n\r\nCustom databases have all been uploaded to Zenodo, along with the simulated reads:\r\n\r\n- Nanopore simulated metagenomic reads - \u003chttps://doi.org/10.5281/zenodo.8339788\u003e\r\n- Illumina simulated metagenomic reads - \u003chttps://doi.org/10.5281/zenodo.8339790\u003e\r\n- Nanopore and Illumina artificial real reads - \u003chttps://doi.org/10.5281/zenodo.10472796\u003e\r\n- Kraken2 database built from the Human Pangenome Reference Consortium\r\n  genomes - \u003chttps://doi.org/10.5281/zenodo.8339731\u003e\r\n- Kraken2 database built from the kraken2 Human\r\n  library - \u003chttps://doi.org/10.5281/zenodo.8339699\u003e\r\n- Kraken2 database built from a *Mycobacterium* representative set of\r\n  genomes - \u003chttps://doi.org/10.5281/zenodo.8339821\u003e\r\n- A (fasta) database of representative genomes from the *Mycobacterium*\r\n  genus - \u003chttps://doi.org/10.5281/zenodo.8339940\u003e\r\n- A (fasta) database of *M. tuberculosis* genomes from a variety of\r\n  lineages - \u003chttps://doi.org/10.5281/zenodo.8339947\u003e\r\n- The fasta file built from the [Clockwork](https://github.com/iqbal-lab-org/clockwork)\r\n  decontamination pipeline - \u003chttps://doi.org/10.5281/zenodo.8339802\u003e\r\n\r\n## Example usage\r\n\r\nWe provide some usage examples showing how to download the databases and then use them\r\non your reads.\r\n\r\n### Human read removal\r\n\r\nThe method we found to give the best balance of runtime, memory usage, and precision and\r\nrecall was kraken2 with a database built from the Human Pangenome Reference Consortium\r\ngenomes.\r\n\r\nThis example has been wrapped into a standalone tool called [`nohuman`](https://github.com/mbhall88/nohuman/) which takes a fastq as input and returns a fastq with human reads removed.\r\n\r\n#### Download human database\r\n\r\n```\r\nmkdir HPRC_db/\r\ncd HPRC_db\r\nURL=\"https://zenodo.org/record/8339732/files/k2_HPRC_20230810.tar.gz\"\r\nwget \"$URL\"\r\ntar -xzf k2_HPRC_20230810.tar.gz\r\nrm k2_HPRC_20230810.tar.gz\r\n```\r\n\r\n#### Run kraken2 with HPRC database\r\n\r\nYou'll need [kraken2](https://github.com/DerrickWood/kraken2) installed for this step.\r\n\r\n```\r\nkraken2 --threads 4 --db HPRC_db/ --output classifications.tsv reads.fq\r\n```\r\n\r\nIf you are using Illumina reads, a slight adjustment is needed\r\n\r\n```\r\nkraken2 --paired --threads 4 --db HPRC_db/ --output classifications.tsv reads_1.fq reads_2.fq\r\n```\r\n\r\n#### Extract non-human reads\r\n\r\nYou'll need [seqkit](https://github.com/shenwei356/seqkit) installed for this step\r\n\r\nFor Nanopore data\r\n\r\n```\r\nawk -F'\\t' '$1==\"U\" {print $2}' classifications.tsv | \\\r\n  seqkit grep -f - -o reads.depleted.fq reads.fq\r\n```\r\n\r\nFor Illumina data\r\n\r\n```\r\nawk -F'\\t' '$1==\"U\" {print $2}' classifications.tsv \u003e ids.txt\r\nseqkit grep --id-regexp '^(\\S+)/[12]' -f ids.txt -o reads_1.depleted.fq reads_1.fq\r\nseqkit grep --id-regexp '^(\\S+)/[12]' -f ids.txt -o reads_2.depleted.fq reads_2.fq\r\n```\r\n\r\n### *M. tuberculosis* classification/enrichment\r\n\r\nFor this step we recommend either [minimap2](https://github.com/lh3/minimap2) or kraken2\r\nwith a *Mycobacterium* genus database. We leave it to the user to decide which approach\r\nthey prefer based on the results in our manuscript.\r\n\r\n#### Download databases\r\n\r\n```\r\nmkdir Mycobacterium_db\r\ncd Mycobacterium_db\r\n# download database for use with minimap2\r\nURL=\"https://zenodo.org/record/8339941/files/Mycobacterium.rep.fna.gz\"\r\nwget \"$URL\"\r\nIDS_URL=\"https://zenodo.org/record/8343322/files/mtb.ids\"\r\nwget \"$IDS_URL\"\r\n# download kraken database\r\nURL=\"https://zenodo.org/record/8339822/files/k2_Mycobacterium_20230817.tar.gz\"\r\nwget \"$URL\"\r\ntar -xzf k2_Mycobacterium_20230817.tar.gz\r\nrm k2_Mycobacterium_20230817.tar.gz\r\n```\r\n\r\n#### Classify reads\r\n\r\n**minimap2**\r\n\r\n```\r\n# nanopore\r\nminimap2 --secondary=no -c -t 4 -x map-ont -o reads.aln.paf Mycobacterium_db/Mycobacterium.rep.fna.gz reads.depleted.fq\r\n# illumina\r\nminimap2 --secondary=no -c -t 4 -x sr -o reads.aln.paf Mycobacterium_db/Mycobacterium.rep.fna.gz reads_1.depleted.fq reads_2.depleted.fq\r\n```\r\n\r\n**kraken2**\r\n\r\n```\r\n# nanopore\r\nkraken2 --db Mycobacterium_db --threads 4 --report myco.kreport --output classifications.myco.tsv reads.depleted.fq\r\n# illumina\r\nkraken2 --db Mycobacterium_db --paired --threads 4 --report myco.kreport --output classifications.myco.tsv reads_1.depleted.fq reads_2.depleted.fq\r\n```\r\n\r\n#### Extract *M. tuberculosis* reads\r\n\r\n**minimap2**\r\n\r\n```\r\n# nanopore\r\ngrep -Ff Mycobacterium_db/mtb.ids reads.aln.paf | cut -f1 | \\\r\n  seqkit grep -f - -o reads.enriched.fq reads.depleted.fq\r\n# illumina\r\ngrep -Ff Mycobacterium_db/mtb.ids reads.aln.paf | cut -f1 \u003e keep.ids\r\nseqkit grep -f keep.ids -o reads_1.enriched.fq reads_1.depleted.fq\r\nseqkit grep -f keep.ids -o reads_2.enriched.fq reads_2.depleted.fq\r\n```\r\n\r\n**kraken2**\r\n\r\nWe'll use\r\nthe [`extract_kraken_reads.py` script](https://github.com/jenniferlu717/KrakenTools#extract_kraken_readspy)\r\nfor this\r\n\r\n```\r\n# nanopore\r\npython extract_kraken_reads.py -k classifications.myco.tsv -1 reads.depleted.fq -o reads.enriched.fq -t 1773 -r myco.kreport --include-children\r\n# illumina\r\npython extract_kraken_reads.py -k classifications.myco.tsv -1 reads_1.depleted.fq -2 reads_2.depleted.fq -o reads_1.enriched.fq -o2 reads_2.enriched.fq -t 1773 -r myco.kreport --include-children\r\n```\r\n\r\n[doi]: https://doi.org/10.1101/2023.09.18.558339 \r\n","organization":"Pangenome database project","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/700?version=1","name":"main @ 5b1d96c","author":["Michael Hall"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/700?version=2","name":"main @ f926f32","author":["Michael Hall"],"descriptor_type":["SMK"]}]},{"id":"701","url":"https://workflowhub.eu/workflows/701","name":"ERGA ONT+Illumina Collapsed Purge+QC v2311 (WF3)","description":"The workflow takes a trimmed Illumina WGS paired-end reads collection, Collapsed contigs, and the values for transition parameter and max coverage depth (calculated from WF1) to run Purge_Dups. It produces purged Collapsed contigs assemblies, and runs all the QC analysis (gfastats, BUSCO, and Merqury). ","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/701?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"702","url":"https://workflowhub.eu/workflows/702","name":"ERGA HiC Collapsed Scaffolding+QC YaHS v2311 (WF4)","description":"The workflow takes trimmed HiC forward and reverse reads, and one assembly (e.g.: Hap1 or Pri or Collapsed) to produce a scaffolded assembly using YaHS. It also runs all the QC analyses (gfastats, BUSCO, and Merqury). ","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/702?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"703","url":"https://workflowhub.eu/workflows/703","name":"HP2NET - Framework for Construction of Phylogenetic Networks on High Performance Computing (HPC) Environment","description":"# Framework for construction of phylogenetic networks on High Performance Computing (HPC) environment\r\n\r\n## Introduction\r\n\r\nPhylogeny refers to the evolutionary history and relationship between biological lineages related by common descent. Reticulate evolution refers to the origination of lineages through the complete or partial merging of ancestor lineages. Networks may be used to represent lineage independence events in non-treelike phylogenetic processes.\r\n\r\nThe methodology for reconstructing networks is still in development. Here we explore two methods for reconstructing rooted explicit phylogenetic networks, PhyloNetworks and Phylonet, which employ computationally expensive and time consuming algorithms. The construction of phylogenetic networks follows a coordinated processing flow of data sets analyzed and processed by the coordinated execution of a set of different programs, packages, libraries or pipelines, called workflow activities. \r\n\r\nIn view of the complexity in modeling network experiments, the present work introduces a workflow for phylogenetic network analyses coupled to be executed in High-Performance Computing (HPC) environments. The workflow aims to integrate well-established software, pipelines and scripts, implementing a challenging task since these tools do not consistently profit from the HPC environment, leading to an increase in the expected makespan and idle computing resources.\r\n\r\n## Requirements\r\n\r\n1. Python \u003e= 3.8\r\n   1. Biopython \u003e= 1.75\r\n   2. Pandas \u003e= 1.3.2\r\n   3. Parsl \u003e= 1.0\r\n3. Raxml \u003e= 8.2.12\r\n4. Astral  \u003e= 5.7.1\r\n5. SnaQ (PhyloNetworks) \u003e= 0.13.0\r\n6. MrBayes \u003e= 3.2.7a\r\n7. BUCKy \u003e=  1.4.4\r\n8. Quartet MaxCut \u003e= 2.10\r\n9. PhyloNet \u003e= 3.8.2\r\n10. Julia \u003e= 1.4.1\r\n11. IQTREE \u003e= 2.0\r\n\r\n\r\n## How to use\r\n\r\n### Setting up the framework\r\n\r\nThe framework uses a file to get all the needed parameters. For default it loads the file *default.ini* in the config folder, but you can explicitly load other files using the argument ``-s name_of_the_file``, *e.g.* ``-s config/test.ini``.\r\n\r\n* Edit *parl.env* with the environment variables you may need, such as modules loadeds in SLURM\r\n* Edit *work.config* with the directories of your phylogeny studies (the framework receives as input a set of homologous gene alignments of species in the nexus format).\r\n* Edit *default.ini* with the path for each of the needed softwares and the parameters of the execution provider.\r\n\r\nFor default, the execution logs are created in the ``runinfo`` folder. To change it you can use the `-r folder_path` parameter.\r\n\r\n#### Contents of the configuration file\r\n\r\n* General settings\r\n\r\n```ini\r\n[GENERAL]\r\nExecutionProvider = SLURM\r\nScriptDir \t\t= ./scripts\r\nEnviron\t\t\t= config/parsl.env\r\nWorkload\t\t= config/work.config\r\nNetworkMethod   = MP\r\nTreeMethod      = RAXML\r\nBootStrap       = 1000\r\n```\r\n\r\n1. The framework can be executed in a HPC environment using the Slurm resource manager using the parameter ``ExecutionProvider`` equals to ``SLURM`` or locally with ``LOCAL``. \r\n2. The path of the scripts folder is assigned  in ``ScriptDir``. It's recommended to use the absolute path to avoid errors.\r\n3. The ``Environ`` parameter contains the path of the file used to set environment variables. More details can be seen below.\r\n4. In ``Workload`` is the path of the experiments that will be performed.\r\n5. ``NetworkMethod`` and ``TreeMethod`` are the default network and tree methods that will be used to perform the workloads' studies.\r\n6. ``Bootstrap`` is the parameter used in all the software that use bootstrap (RAxML, IQTREE and ASTRAL)\r\n\r\n* Workflow execution settings\r\n \r\n  When using SLURM, these are the needed parameters:\r\n  ```ini\r\n  [WORKFLOW]\r\n  Monitor\t\t\t= False\r\n  PartCore\t= 24\r\n  PartNode\t= 1\r\n  Walltime\t= 00:20:00\r\n  ```\r\n\r\n  1. ``Monitor`` is a parameter to use parsl's monitor module in HPC environment. It can be *true* or *false*. If you want to use it, it's necessary to set it as *true* and manually change the address in ``infra_manager.py``\r\n  2. If you are using it in a HPC environment (using SLURM), the framework is going to submit in a job. ``PartCore`` is the number of cores of the node; ``PartNode`` is the number of nodes of the partition; and the ``Walltime`` parameter is the maximum amount of time the job will be able to run.\r\n\r\n  However, if the the desired execution method is the LocalProvider, _i.e._ the execution is being performed in your own machine, only these parameters are necessary:\r\n\r\n  ```ini\r\n  [WORKFLOW]\r\n  Monitor\t\t\t= False\r\n  MaxCore\t= 6\r\n  CoresPerWorker\t= 1\r\n\r\n  ```\r\n\r\n* RAxML settings\r\n\r\n  ```ini\r\n  [RAXML]\r\n  RaxmlExecutable = raxmlHPC-PTHREADS\r\n  RaxmlThreads \t= 6\r\n  RaxmlEvolutionaryModel = GTRGAMMA --HKY85\r\n  ```\r\n\r\n* IQTREE settings\r\n\r\n  ```ini\r\n  [IQTREE]\r\n  IqTreeExecutable = iqtree2\r\n  IqTreeEvolutionaryModel = TIM2+I+G \r\n  IqTreeThreads = 6\r\n  ```\r\n\r\n* ASTRAL settings\r\n\r\n  ```ini\r\n  [ASTRAL]\r\n  AstralExecDir \t= /opt/astral/5.7.1\r\n  AstralJar \t\t= astral.jar\r\n  ```\r\n\r\n* PhyloNet settings\r\n\r\n  ```ini\r\n  [PHYLONET]\r\n  PhyloNetExecDir \t= /opt/phylonet/3.8.2/\r\n  PhyloNetJar \t\t= PhyloNet.jar\r\n  PhyloNetThreads     = 6\r\n  PhyloNetHMax        = 3\r\n  PhyloNetRuns        = 5\r\n  ```\r\n\r\n* SNAQ settings\r\n\r\n  ```ini\r\n  [SNAQ]\r\n  SnaqThreads\t\t= 6\r\n  SnaqHMax        = 3\r\n  SnaqRuns        = 3\r\n  ```\r\n\r\n* Mr. Bayes settings\r\n\r\n  ```ini\r\n  [MRBAYES]\r\n  MBExecutable\t= mb\r\n  MBParameters\t= set usebeagle=no beagledevice=cpu beagleprecision=double; mcmcp ngen=100000 burninfrac=.25 samplefreq=50 printfreq=10000 diagnfreq=10000 nruns=2 nchains=2 temp=0.40 swapfreq=10\r\n  ```\r\n\r\n* Bucky settings\r\n\r\n  ```ini\r\n  [BUCKY]\r\n  BuckyExecutable = bucky\r\n  MbSumExecutable = mbsum\r\n  ```\r\n\r\n* Quartet MaxCut\r\n\r\n  ```ini\r\n  QUARTETMAXCUT]\r\n  QmcExecDir       = /opt/quartet/\r\n  QmcExecutable    = find-cut-Linux-64\r\n  ```\r\n\r\n#### Workload file\r\n\r\nFor default the workload file is ``work.config`` in the *config* folder. The file contains the absolute paths of the experiment's folders.\r\n\r\n```\r\n/home/rafael.terra/Biocomp/data/Denv_1\r\n```\r\n\r\nYou can comment folders using the # character in the beginning of the path. *e. g.* ``#/home/rafael.terra/Biocomp/data/Denv_1``. That way the framework won't read this path.\r\n\r\nYou can also run a specific flow for a path using ``@TreeMethod|NetworkMethod`` in the end of a path. Where *TreeMethod* can be RAXML, IQTREE or MRBAYES and *NetworkMethod* can be MPL or MP (case sensitive). The supported flows are: ``RAXML|MPL``, ``RAXML|MP``, ``IQTREE|MPL``, ``IQTREE|MP`` and ``MRBAYES|MPL``. For example:\r\n\r\n```\r\n/home/rafael.terra/Biocomp/data/Denv_1@RAXML|MPL\r\n```\r\n\r\n#### Environment file\r\n\r\nThe environment file contains all the environment variables (like module files used in SLURM) used during the framework execution. Example:\r\n\r\n```sh\r\nmodule load python/3.8.2\r\nmodule load raxml/8.2_openmpi-2.0_gnu\r\nmodule load java/jdk-12\r\nmodule load iqtree/2.1.1\r\nmodule load bucky/1.4.4\r\nmodule load mrbayes/3.2.7a-OpenMPI-4.0.4\r\nsource /scratch/app/modulos/julia-1.5.1.sh\r\n```\r\n\r\n#### Experiment folder\r\n\r\nEach experiment folder needs to have a *input folder* containing a *.tar.gz* compressed file and a *.json* with the following content. **The framework considers that there is only one file of each extension in the input folder**.\r\n\r\n```json\r\n{\r\n\t\"Mapping\":\"\",\r\n\t\"Outgroup\":\"\"\r\n}\r\n```\r\n\r\nWhere ``Mapping`` is a direct mapping of the taxon, when there are multiple alleles per species, in the format ``species1:taxon1,taxon2;species2:taxon3,taxon4`` *(white spaces are not supported)* and ``Outgroup`` is the taxon used to root the network. The Mapping parameter is optional (although it has to be in the json file without value), but the outgroup is obligatory. It's important to say that the flow *MRBAYES|MPL* doesn't support multiple alleles per species. Example:\r\n\r\n```json\r\n{\r\n  \"Mapping\": \"dengue_virus_type_2:FJ850082,FJ850088,JX669479,JX669482,JX669488,KP188569;dengue_virus_type_3:FJ850079,FJ850094,JN697379,JX669494;dengue_virus_type_1:FJ850073,FJ850084,FJ850093,JX669465,JX669466,JX669475,KP188545,KP188547;dengue_virus_type_4:JN559740,JQ513337,JQ513341,JQ513343,JQ513344,JQ513345,KP188563,KP188564;Zika_virus:MH882543\", \r\n  \"Outgroup\": \"MH882543\"\r\n}\r\n```\r\n\r\n\r\n## Running the framework\r\n\r\n* In a local machine:\r\n\r\n  After setting up the framework, just run ``python3 parsl_workflow.py``.\r\n  \r\n* In a SLURM environment:\r\n\r\n  Create an submition script that inside contains: ``python3 parsl_workflow.py``.\r\n\r\n  ```sh\r\n  #!/bin/bash\r\n  #SBATCH --time=15:00:00\r\n  #SBATCH -e slurm-%j.err\r\n  #SBATCH -o slurm-%j.out\r\n  module load python/3.9.6\r\n  cd /path/to/biocomp\r\n  python3 parsl_workflow.py\r\n  ```\r\n\r\nThe framework is under heavy development. If you notice any bug, please create an issue here on GitHub.\r\n\r\n### Running in a DOCKER container\r\n\r\nThe framework is also available to be used in Docker. It can be built from source or pushed from DockerHub.\r\n\r\n#### Building it from the source code\r\n\r\nAdapt the default settings file ``config/default.ini`` according to your machine, setting the number of threads and bootstrap. After that, run ``docker build -t hp2net .`` in the project's root folder.\r\n\r\n#### Downloading it from Dockerhub\r\n\r\nThe docker image can also be downloaded from [Docker hub](https://hub.docker.com/repository/docker/rafaelstjf/hp2net/general). To do that, just run the command ``docker pull rafaelstjf/hp2net:main``\r\n\r\n#### Running\r\n\r\nThe first step to run the framework is to setup your dataset. To test if the framework is running without problems in your machine, you can use the [example datasets](example_data).\r\n\r\n![Alt text](docs/example_data.png)\r\n\r\nExtracting the ``example_data.zip`` file, a new folder called ``with_outgroup`` is created. This folder contain four datasets of DENV sequences.\r\n\r\nThe next step is the creation of the settings and workload files. For the settings file, download the [default.ini](config/default.ini) from this repository and change it to you liking (the path of all software are already configured to run on docker). The workload file is a text file containing the absolute path of the datasets, followed by the desired pipeline, as shown before in this document. Here for example purposes, the ``input.txt`` file was created.\r\n\r\n![Alt text](docs/example_files.png)\r\n\r\nWith all the files prepared, the framework can be executed from the ``example_data`` folder as following:\r\n\r\n``docker run --rm -v $PWD:$PWD rafaelstjf/hp2net:main -s $PWD/default.ini -w $PWD/input.txt``\r\n\r\n**Important:** the docker doesn't save your logs, for that add the parameter: ``-r $PWD/name_of_your_log_folder``.\r\n\r\n---\r\nIf you are running it on **Santos Dumont Supercomputer**, both downloading and execution of the docker container need to be performed from a submission script and executed using ``sg docker -c \"sbatch script.sh\"``. The snippet below shows an example of submission script.\r\n\r\n```sh\r\n#!/bin/bash\r\n#SBATCH --nodes=1\r\n#SBATCH --ntasks-per-node=24\r\n#SBATCH -p cpu_small\r\n#SBATCH -J Hp2NET\r\n#SBATCH --exclusive\r\n#SBATCH --time=02:00:00\r\n#SBATCH -e slurm-%j.err\r\n#SBATCH -o slurm-%j.out\r\n\r\nDIR='/scratch/pcmrnbio2/rafael.terra/WF_parsl/example_data'\r\ndocker  pull rafaelstjf/hp2net:main\r\n\r\ndocker run --rm -v $DIR:$DIR rafaelstjf/hp2net:main -s ${DIR}/sdumont.ini -w ${DIR}/entrada.txt -r ${DIR}/logs\r\n```\r\n\r\n## If you use it, please cite\r\n\r\nTerra, R., Coelho, M., Cruz, L., Garcia-Zapata, M., Gadelha, L., Osthoff, C., ... \u0026 Ocana, K. (2021, July). Gerência e Análises de Workflows aplicados a Redes Filogenéticas de Genomas de Dengue no Brasil. In *Anais do XV Brazilian e-Science Workshop* (pp. 49-56). SBC.\r\n\r\n**Also cite all the coupled software!**\r\n\r\n","organization":"HP2NET - Framework for construction of phylogenetic networks on High Performance Computing (HPC) environment","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/703?version=1","name":"main @ 20ecbe3","author":["Rafael Terra"],"descriptor_type":[]}]},{"id":"706","url":"https://workflowhub.eu/workflows/706","name":"Bactria: BarCode TRee Inference and Analysis","description":"![workflow](https://github.com/naturalis/barcode-constrained-phylogeny/actions/workflows/python-package-conda.yml/badge.svg)\r\n[![License: Apache-2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)\r\n[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10519081.svg)](https://doi.org/10.5281/zenodo.10519081)\r\n\r\n![Logo](https://github.com/naturalis/barcode-constrained-phylogeny/blob/main/doc/logo-small.png?raw=true)\r\n\r\n# Bactria: BarCode TRee Inference and Analysis\r\nThis repository contains code and data for building very large, topologically-constrained \r\nbarcode phylogenies through a divide-and-conquer strategy. Such trees are useful as \r\nreference materials for curating barcode data by detecting rogue terminals (indicating\r\nincorrect taxonomic annotation) and in the comparable calculation of alpha and beta \r\nbiodiversity metrics across metabarcoding assays. \r\n\r\nThe input data for the approach we develop here currently comes from BOLD data dumps. \r\nThe international database [BOLD Systems](https://www.boldsystems.org/index.php) \r\ncontains DNA barcodes for hundreds of thousands of species, with multiple barcodes per \r\nspecies. The data dumps we use here are TSV files whose columns conform to the nascent\r\nBCDM (barcode data model) vocabulary. As such, other data sources that conform to this\r\nvocabulary could in the future be used as well, such as [UNITE](https://unite.ut.ee/).\r\n\r\nTheoretically, such data could be filtered and aligned per DNA marker to make \r\nphylogenetic trees. However, there are two limiting factors: building very large \r\nphylogenies is computationally intensive, and barcodes are not considered ideal for \r\nbuilding big trees because they are short (providing insufficient signal to resolve large \r\ntrees) and because they tend to saturate across large patristic distances.\r\n\r\n![concept](https://github.com/naturalis/barcode-constrained-phylogeny/blob/main/doc/concept.png)\r\n\r\nBoth problems can be mitigated by using the \r\n[Open Tree of Life](https://tree.opentreeoflife.org/opentree/argus/opentree13.4@ott93302) \r\nas a further source of phylogenetic signal. The BOLD data can be split into chunks that \r\ncorrespond to Open Tree of Life clades. These chunks can be made into alignments and \r\nsubtrees. The OpenTOL can be used as a constraint in the algorithms to make these. The \r\nchunks are then combined in a large synthesis by grafting them on a backbone made from \r\nexemplar taxa from the subtrees. Here too, the OpenTOL is a source of phylogenetic \r\nconstraint.\r\n\r\nIn this repository this concept is developed for both animal species and plant species.\r\n\r\n## Installation\r\n\r\nThe pipeline and its dependencies are managed using conda. On a linux or osx system, you \r\ncan follow these steps to set up the `bactria` Conda environment using an `environment.yml` \r\nfile and a `requirements.txt` file:\r\n\r\n1. **Clone the Repository:**  \r\n   Clone the repository containing the environment files to your local machine:\r\n   ```bash\r\n   git clone https://github.com/naturalis/barcode-constrained-phylogeny.git\r\n   cd barcode-constrained-phylogeny\r\n   ```\r\n2. **Create the Conda Environment:**\r\n   Create the bactria Conda environment using the environment.yml file with the following \r\n   command:\r\n   ```bash\r\n   conda env create -f workflow/envs/environment.yml\r\n   ```\r\n   This command will create a new Conda environment named bactria with the packages \r\n   specified in the environment.yml file. This step is largely a placeholder because\r\n   most of the dependency management is handled at the level of individual pipeline\r\n   steps, which each have their own environment specification.\r\n3. **Activate the Environment:**\r\n   After creating the environment, activate it using the conda activate command:\r\n   ```bash\r\n   conda activate bactria\r\n   ```\r\n4. **Verify the Environment:**\r\n   Verify that the bactria environment was set up correctly and that all packages were \r\n   installed using the conda list command:\r\n   ```bash\r\n   conda list\r\n   ```\r\n   This command will list all packages installed in the active conda environment. You should \r\n   see all the packages specified in the environment.yml file and the requirements.txt file.\r\n\r\n## How to run\r\n\r\nThe pipeline is implemented using snakemake, which is available within the conda \r\nenvironment that results from the installation. Important before running the snakemake pipeline \r\nis to change in [config/config.yaml](config/config.yaml) the number of threads available on your \r\ncomputer. Which marker gene is used in the pipeline is also specified in the config.yaml (default \r\nCOI-5P). Prior to execution, the BOLD data package to use (we used the \r\n[release of 30 December 2022](https://www.boldsystems.org/index.php/datapackage?id=BOLD_Public.30-Dec-2022)) \r\nmust be downloaded manually and stored in the [resources/](resources/) directory. If a BOLD release \r\nfrom another date is used the file names in config.yaml need to be updated. \r\n\r\nHow to run the entire pipeline:\r\n\r\n```bash \r\nsnakemake -j {number of threads} --use-conda\r\n```\r\n\r\nSnakemake rules can be performed separately:\r\n```bash \r\nsnakemake -R {Rule} -j {number of threads} --use-conda\r\n```\r\n\r\nEnter the same number at {number of threads} as you filled in previously in src/config.yaml.\r\nIn {Rule} insert the rule to be performed.\r\n\r\nHere is an overview of all the rules in the Snakefile:\r\n\r\n![graphviz (1)](https://github.com/naturalis/barcode-constrained-phylogeny/blob/main/doc/dag.svg)\r\n(zoomed view is available [here](https://raw.githubusercontent.com/naturalis/barcode-constrained-phylogeny/main/doc/dag.svg))\r\n\r\n## Repository layout\r\n\r\nBelow is the top-level layout of the repository. This layout is in line with \r\n[community standards](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html) and must be adhered to.\r\nAll of these subfolders contains further explanatory READMEs to explain their contents in more detail.\r\n\r\n- [config](config/) - configuration files\r\n- [doc](doc/) - documentation and background literature\r\n- [logs](logs/) - where log files are written during pipeline runtime\r\n- [resources](resources/) - external data resources (from BOLD and OpenTree) are downloaded here\r\n- [results](results/) - intermediate and final results are generated here\r\n- [workflow](workflow/) - script source code and driver snakefile \r\n\r\n## License\r\n\r\n\u0026copy; 2023 Naturalis Biodiversity Center\r\n\r\nLicensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except \r\nin compliance with the License. You may obtain a copy of the License at\r\n\r\n[http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0)\r\n   \r\nUnless required by applicable law or agreed to in writing, software distributed under the License \r\nis distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express \r\nor implied. See the License for the specific language governing permissions and limitations under \r\nthe License.","organization":"Biodiversity Genomics Europe (general)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/706?version=1","name":"main @ 9be54a7","author":[],"descriptor_type":["SMK"]}]},{"id":"707","url":"https://workflowhub.eu/workflows/707","name":"Lysozyme in water, using dataset_small, data_persistence True","description":"Lysozyme in water full COMPSs application, using dataset_small","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/707?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"708","url":"https://workflowhub.eu/workflows/708","name":"Lysozyme in water full version, using dataset_small, data_persistence False","description":"Lysozyme in water full COMPSs application, using dataset_small","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/708?version=1","name":"COMPSs 3.3","author":[],"descriptor_type":[]}]},{"id":"709","url":"https://workflowhub.eu/workflows/709","name":"Wordcount merge version, data_persistence False","description":"Wordcount merge version COMPSs application","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/709?version=1","name":"COMPSs 3.3","author":[],"descriptor_type":[]}]},{"id":"710","url":"https://workflowhub.eu/workflows/710","name":"Wordcount reduce version, data_persistence True","description":"Wordcount reduce version COMPSs application","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/710?version=1","name":"COMPSs 3.3","author":[],"descriptor_type":[]}]},{"id":"711","url":"https://workflowhub.eu/workflows/711","name":"Cholesky factorisation, SIZE 4, BSIZE 512, data_persistence True","description":"Cholesky factorisation COMPSs application","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/711?version=1","name":"COMPSs 3.3","author":[],"descriptor_type":[]}]},{"id":"712","url":"https://workflowhub.eu/workflows/712","name":"K-means data_persistence True","description":"K-means COMPSs application","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/712?version=1","name":"COMPSs 3.3","author":[],"descriptor_type":[]}]},{"id":"713","url":"https://workflowhub.eu/workflows/713","name":"Cluster Comparison data_persistence True","description":"Cluster Comparison COMPSs application","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/713?version=1","name":"COMPSs 3.3","author":[],"descriptor_type":[]}]},{"id":"714","url":"https://workflowhub.eu/workflows/714","name":"Lysozyme in water sample, dataset_small, data_persistence True, nct00014 username, 4 workers","description":"Lysozyme in water sample COMPSs application","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/714?version=1","name":"COMPSs 3.3","author":[],"descriptor_type":[]}]},{"id":"715","url":"https://workflowhub.eu/workflows/715","name":"velocyto/Velocyto-on10X-from-bundled","description":"Run velocyto to get loom with counts of spliced and unspliced. It will extract the 'barcodes' from the bundled outputs.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/715?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/715?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/715?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"737","url":"https://workflowhub.eu/workflows/737","name":"pipesnake","description":"![](https://github.com/AusARG/pipesnake/blob/main/docs/images/pipesnake_Logo.png)\r\n\u0026nbsp;\r\n\r\nWelcome to the *pipesnake*.  \r\nLet's get started. \r\n\r\n---\r\n\r\n# Introduction\r\n\r\n**pipesnake** is a bioinformatics best-practice analysis pipeline for phylogenomic reconstruction starting from short-read 'second-generation' sequencing data.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies.\r\n\r\n---\r\n\r\n# Motivation + Project Background\r\n\r\nWe developed *pipesnake* as part of the [***Aus***tralian ***A***mphibian and ***R***eptile ***G***enomics](https://ausargenomics.com/) (*AusARG*) initiative.  **AusARG** is a national collaborative project aiming to facilitate the development of genomics resources for Australia's unique amphibian and reptile fauna. This pipeline was developed specifically as part of the *AusARG Phylogenomics Working Group* with the goal of collecting a consistent set of phylogenomic data for all of Australia's frogs and reptiles, under similar assembly, alignment, and tree estimation procedures. \r\n\r\n*pipesnake* is however, applicable to much broader phylogenomic questions, and is appropriate for processing exon-capture or transcriptomic data, so long as the **input is second-generation (short-read) data**. ","organization":"Australian BioCommons","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/737?version=1","name":"v1.0","author":["Ziad Al-Bkhetan","Ian Brennan"],"descriptor_type":["NFL"]}]},{"id":"738","url":"https://workflowhub.eu/workflows/738","name":"Mitogenome-assembly-VGP0/main","description":"Generate mitochondrial assembly based on PacBio HiFi reads. Part of the VGP suite, it can be run at any time independently of the other workflows. This workflow uses MitoHiFi and a mitochondrial reference to assemble the mitochondrial genome from PacBio reads. You do not need to provide the reference yourself, only the Latin name of the species.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/738?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/738?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/738?version=3","name":"v0.2.1","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/738?version=4","name":"v0.2.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"740","url":"https://workflowhub.eu/workflows/740","name":"EBP-Nor Genome Assembly Pipeline","description":"## EBP-Nor Genome Assembly pipeline\r\n\r\nThis repository contains the EBP-Nor genome assembly pipeline. This pipeline is implemented in snakemake.\r\nThis pipeline is developed to create haplotype-resolved genome assemblies from PacBio HiFi reads and HiC reads,\r\nand is primarly designed for diploid eukaryotic organisms. The pipeline is designed to work on a linux cluster with slurm as workload manager.\r\n\r\n## Requirements \u0026 Setup\r\n\r\nSome software need to be configured/installed before the pipeline can be run\r\n\r\n### Conda setup\r\n\r\nMost required software, including snakemake itself, can be installed using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html).\r\n\r\nOnce conda is installed, you can create a new environment containing most necessary software from the provided asm_pipeline.yaml file as follows:\r\n\r\n```shell\r\nconda create -n asm_pipeline --file=worfklow/envs/asm_pipeline.yaml\r\n```\r\n\r\n### Other software setup\r\n\r\nThe following software need to be installed manually:\r\n\r\n- KMC v3.1.1 (https://github.com/tbenavi1/KMC)\r\n- HiFiAdapterFilt (https://github.com/sheinasim/HiFiAdapterFilt)\r\n- Oatk (https://github.com/c-zhou/oatk)\r\n- OatkDB (https://github.com/c-zhou/OatkDB)\r\n- NCBI FCS-Adaptor (https://github.com/ncbi/fcs/wiki/FCS-adaptor)\r\n- NCBI FCS-GX (https://github.com/ncbi/fcs/wiki/FCS-GX)\r\n\r\nPlease refer to their respective installation instructions to properly install them. You will need to privide the installation paths of these software to the config file (see Parameter section).\r\n\r\n### BUSCO database setup\r\n\r\nAs in general, computing nodes are not connected to the internet, BUSCO lineage datasets need to be downloaded manually before running the pipeline.\r\nThis can easily be done by running\r\n\r\n```shell\r\nbusco --download eukaryota\r\n```\r\n\r\nYou will need to specify the folder where you downloaded the busco lineages in the config file (see Parameter section).\r\n\r\n### Data\r\n\r\nThis pipeline is created for using PacBio HiFi reads together with paired-end Hi-C data.\r\nYou will need to specify the absolute paths to these files in the config file (see Parameters section).\r\n\r\n### Parameters\r\n\r\nThe necessary config files for running the pipeline can be found in the config folder.\r\n\r\nGeneral snakemake and cluster submission parameters are defined in ```config/config.yaml```, \r\ndata- and software-specfic parameters are defined in ```config/asm_params.yaml```.\r\n\r\nFirst, define the paths of the input files you want to use:\r\n- pacbio: path to the location of the PacBio HiFi reads (```.fastq.gz```)\r\n- hicF and hicR: path to the forward and reverse HiC reads respectively\r\n\r\nFor software not installed by conda, the installation path needs to be provided to the Snakemake pipeline by editing following parameters in the ```config/asm_params.yaml```:\r\n\r\n- Set the \"adapterfilt_install_dir\" parameter to the installation path of HiFiAdapterFilt\r\n- Set the \"KMC_path\" parameter to the installation path of KMC\r\n- Set the \"oatk_dir\" parameter to the installation path of oatk\r\n- Set the \"oatk_db\" parameter to the directory where you downloaded the oatk_db files\r\n- Set the \"fcs_path\" parameter to the location of the ```run_fcsadaptor.sh``` and ```fcs.py``` scripts\r\n- Set the \"fcs_adaptor_image\" and \"fcs_gx_image\" parameters to the paths to the ```fcs-adaptor.sif``` and ```fcs-gx.sif``` files respectively\r\n- Set the \"fcs_gx_db\" parameter to the path of the fcs-gx database\r\n\r\nA couple of other parameters need to be verified as well in the config/asm_params.yaml file before running the pipeline:\r\n\r\n- The location of the input data (```input_dir```) should be set to the folder containing the input data.\r\n- The location of the downloaded busco lineages (```busco_db_dir```) should be set to the folder containing the busco lineages files downloaded earlier\r\n- The required BUSCO lineage for running the BUSCO analysis needs to set (```busco_lineage``` parameter). Run ```busco --list-datasets``` to get an overview of all available datasets.\r\n- The required oatk lineage for running organelle genome assembly (```oatk_lineage``` parameter). Check https://github.com/c-zhou/OatkDB for an overview of available lineages.\r\n- A boolean value wether the species is plant (for plastid prediction) or not (```oatk_isPlant```; set to either True or False)\r\n- The NCBI taxid of your species, required for the decontamination step (```taxid``` parameter)\r\n\r\n## Usage and run modes\r\n\r\nBefore running, make sure to activate the conda environment containing the necessary software: ```conda activate asm_assembly```.\r\nTo run the pipeline, run the following command:\r\n\r\n```\r\nsnakemake --profile config/ --configfile config/asm_params.yaml --snakefile workflow/Snakefile {run_mode}\r\n```\r\n\r\nIf you invoke the snakemake command in another directory than the one containing the ```workflow``` and ```config``` folders, \r\nor if the config files (```config.yaml``` and ```asm_params.yaml```) are in another location, you need to specify their correct paths on the command line.\r\n\r\nThe workflow parameters can be modified in 3 ways:\r\n- Directly modifying the ```config/asm_parameters.yaml``` file\r\n- Overriding the default parameters on the command line: ```--config parameter=new_value```\r\n- Overriding the default parameters using a different yaml file: ```--configfile path_to_parameters.yaml```\r\n\r\nThe pipeline has different runing modes, and the run mode should always be the last argument on the command line:\r\n\r\n- \"all\" (default): will run the full workflow including pre-assembly (genomescope \u0026 smudgeplot), assembly, scaffolding, decontamination, and organelle assembly\r\n- \"pre_assembly\": will run only the pre-assembly steps (genomescope \u0026 smudgeplot)\r\n- \"assembly\": will filter the HiFi reads and assemble them using hifiasm (also using the Hi-C reads), and run busco\r\n- \"scaffolding\": will run all steps necessary for scaffolding (filtering, assembly, HiC filtering, scaffolding, busco), but without pre-assembly\r\n- \"decontamination\": will run assembly, scaffolding, and decontamination, but without pre-assembly and busco analyses\r\n- \"organelles\": will run only organnelle genome assembly\r\n\r\n## Output\r\n\r\nAll generated output will be present in the \"results\" directory, which will be created in the folder from where you invoke the snakemake command.\r\nThis results directory contains different subdirectories related to the different steps in the assembly:\r\n- results/pre_assembly: genomescope and smudgeplot output (each in its own subfolder)\r\n- results/assembly: Hifiasm assembly output and corresponding busco results\r\n- results/scaffolding: scaffolding output, separated in two folders:\r\n  - meryl: meryl databases used for filtering HiC reads\r\n  - yahs: scaffolding output, including final scaffolds and their corresponding busco results\r\n- results/decontamination: decontamination output of the final scaffolded assembly\r\n- results/organelles: assembled organellar genomes\r\n\r\nAdditionally, a text file containing all software versions will be created in the specified input directory.\r\nThe log files of the different steps in the workflow can be found in the ```logs``` directory that will be created.","organization":"EBP-Nor","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/740?version=1","name":"main @ 6d5595d","author":[],"descriptor_type":["SMK"]}]},{"id":"748","url":"https://workflowhub.eu/workflows/748","name":"Random Forest Classifier executed in 3 nodes, 1 master and 2 workers, with a generated dataset, using 1 Million rows x 100 features","description":"**Name:** Random Forest \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum4  \r\nThis is an example of Random Forest algorithm from dislib. To show the usage, the code generates a synthetical input matrix.\r\nThe results are printed by screen.\r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"Workflows and Distributed Computing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/748?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"749","url":"https://workflowhub.eu/workflows/749","name":"Refining Genome Annotations with Apollo (prokaryotes)","description":"Refining Genome Annotations with Apollo","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/749?version=1","name":"Version 1","author":["Anthony Bretaudeau"],"descriptor_type":["GALAXY"]}]},{"id":"750","url":"https://workflowhub.eu/workflows/750","name":"assembly-with-flye/main","description":"Assemble long reads with Flye, then view assembly statistics and assembly graph","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/750?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/750?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/750?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/750?version=4","name":"v0.4","author":[],"descriptor_type":["GALAXY"]}]},{"id":"751","url":"https://workflowhub.eu/workflows/751","name":"Purge-duplicates-one-haplotype-VGP6b/main","description":"Purge contigs marked as duplicates by purge_dups in a single haplotype (could be haplotypic duplication or overlap duplication). If you think the purged contigs might belong to the other haplotype, use the workflow VGP6 instead. This workflow is the 6th workflow of the VGP pipeline. It is meant to be run after one of the contigging steps (Workflow 3, 4, or 5).","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/751?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/751?version=2","name":"v0.3","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/751?version=3","name":"v0.4","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/751?version=4","name":"v0.5","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/751?version=5","name":"v0.6","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/751?version=6","name":"v0.7","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/751?version=7","name":"v0.7.1","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/751?version=8","name":"v0.7.2","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/751?version=9","name":"v0.7.3","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/751?version=10","name":"v0.7.4","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/751?version=11","name":"v0.7.5","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/751?version=12","name":"v0.7.6","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/751?version=13","name":"v0.7.7","author":[],"descriptor_type":["GALAXY"]},{"id":"14","url":"https://workflowhub.eu/workflows/751?version=14","name":"v0.8.0","author":[],"descriptor_type":["GALAXY"]},{"id":"15","url":"https://workflowhub.eu/workflows/751?version=15","name":"v0.8.1","author":[],"descriptor_type":["GALAXY"]},{"id":"16","url":"https://workflowhub.eu/workflows/751?version=16","name":"v0.8.2","author":[],"descriptor_type":["GALAXY"]},{"id":"17","url":"https://workflowhub.eu/workflows/751?version=17","name":"v0.8.3","author":[],"descriptor_type":["GALAXY"]},{"id":"18","url":"https://workflowhub.eu/workflows/751?version=18","name":"v0.8.4","author":[],"descriptor_type":["GALAXY"]},{"id":"19","url":"https://workflowhub.eu/workflows/751?version=19","name":"v0.8.5","author":[],"descriptor_type":["GALAXY"]}]},{"id":"753","url":"https://workflowhub.eu/workflows/753","name":"Masking repeats with RepeatMasker","description":"Masking repeats in a genome using RepeatMasker","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/753?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"754","url":"https://workflowhub.eu/workflows/754","name":"Genome annotation with Funannotate","description":"Structural and functional genome annotation with Funannotate","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/754?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"755","url":"https://workflowhub.eu/workflows/755","name":"Functional protein annotation using EggNOG-mapper and InterProScan","description":"Functional annotation of protein sequences","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/755?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"756","url":"https://workflowhub.eu/workflows/756","name":"Sentinel 5P volcanic data visualization","description":"From Copernicus Sentinel 5P data to panoply visualization of volcanic activity impact to atmosphere","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/756?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"757","url":"https://workflowhub.eu/workflows/757","name":"Finding the Muon Stopping Site using PyMuonSuite","description":"Finding potential muon stopping sites in crystalline copper","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/757?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"758","url":"https://workflowhub.eu/workflows/758","name":"Calculating and visualizing OBIS marine biodiversity indicators","description":"Calculating and visualizing marine biodiversity indicators","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/758?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"759","url":"https://workflowhub.eu/workflows/759","name":"Visualizing NDVI time-series data with HoloViz","description":"NDVI data with OpenEO to time series visualisation with HoloViz","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/759?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"761","url":"https://workflowhub.eu/workflows/761","name":"flavivirushelicase_proteindrugcomplex","description":"This is a Galaxy workflow for performing molecular dynamics simulations and analysis with flavivirus helicases bound to a ligand/drug molecule. \r\nThe associated input files can be found at:\r\nhttps://zenodo.org/records/7493015\r\nThe associated output files can be found at:\r\nhttps://zenodo.org/records/7850935","organization":"Generalized Open-Source Workflows for Atomistic Molecular Dynamics Simulations of Viral Helicases","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/761?version=1","name":"Version 1","author":["Bryan Raubenolt"],"descriptor_type":["GALAXY"]}]},{"id":"762","url":"https://workflowhub.eu/workflows/762","name":"flavivirushelicase_apo","description":"This is a Galaxy workflow for performing molecular dynamics simulations and analysis with flavivirus helicases in the Apo or unbound state. The associated input files can be found at: https://zenodo.org/records/7493015 The associated output files can be found at: https://zenodo.org/records/7850935","organization":"Generalized Open-Source Workflows for Atomistic Molecular Dynamics Simulations of Viral Helicases","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/762?version=1","name":"Version 1","author":["Bryan Raubenolt"],"descriptor_type":["GALAXY"]}]},{"id":"763","url":"https://workflowhub.eu/workflows/763","name":"coronavirushelicase_proteindrugcomplex","description":"This is a Galaxy workflow for performing molecular dynamics simulations and analysis with coronavirus helicases bound to a ligand/drug molecule. The associated input files can be found at: https://zenodo.org/records/7492987. The associated output files can be found at: https://zenodo.org/records/7851000.","organization":"Generalized Open-Source Workflows for Atomistic Molecular Dynamics Simulations of Viral Helicases","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/763?version=1","name":"Version 1","author":["Bryan Raubenolt"],"descriptor_type":["GALAXY"]}]},{"id":"764","url":"https://workflowhub.eu/workflows/764","name":"coronavirushelicase_apo","description":"This is a Galaxy workflow for performing molecular dynamics simulations and analysis with coronavirus helicases in the Apo or unbound state. The associated input files can be found at: https://zenodo.org/records/7492987. The associated output files can be found at: https://zenodo.org/records/7851000.","organization":"Generalized Open-Source Workflows for Atomistic Molecular Dynamics Simulations of Viral Helicases","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/764?version=1","name":"Version 1","author":["Bryan Raubenolt"],"descriptor_type":["GALAXY"]}]},{"id":"766","url":"https://workflowhub.eu/workflows/766","name":"Example Multi-Wavelength Light-Curve Analysis","description":"Protype demonstrator of a workflow reducing HESS and INTEGRAL/SPI-ACS data to common Light Curve format and combining the lightcurves into a multi-wavelength observation.","organization":"EuroScienceGateway, ODA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/766?version=1","name":"Version 1","author":["Volodymyr Savchenko","Denys Savchenko"],"descriptor_type":["GALAXY"]}]},{"id":"771","url":"https://workflowhub.eu/workflows/771","name":"fluorescence-nuclei-segmentation-and-counting/main","description":"This workflow performs segmentation and counting of cell nuclei using fluorescence microscopy images. The segmentation step is performed using Otsu thresholding (Otsu, 1979). The workflow is based on the tutorial: https://training.galaxyproject.org/training-material/topics/imaging/tutorials/imaging-introduction/tutorial.html","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/771?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/771?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"772","url":"https://workflowhub.eu/workflows/772","name":"How Usable Are Published Permeability Data?","description":"Current version of this workflow: [https://workflowhub.eu/workflows/1109](http://https://workflowhub.eu/workflows/1109). Please use only with the new version.\r\nKNIME workflow to gather ChEMBL permeability data is availbale: [https://workflowhub.eu/workflows/1169](https://workflowhub.eu/workflows/1169).","organization":"Chemical Data Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/772?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"773","url":"https://workflowhub.eu/workflows/773","name":"Jupyter Notebook Classical Molecular Interaction Potentials","description":"# CMIP tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of computing **classical molecular interaction potentials** from **protein structures**, step by step, using the **BioExcel Building Blocks library (biobb)**. Examples shown are **Molecular Interaction Potentials (MIPs) grids, protein-protein/ligand interaction potentials, and protein titration**. The particular structures used are the **Lysozyme** protein (PDB code [1AKI](https://www.rcsb.org/structure/1aki)), and a MD simulation of the complex formed by the **SARS-CoV-2 Receptor Binding Domain and the human Angiotensin Converting Enzyme 2** (PDB code [6VW1](https://www.rcsb.org/structure/6vw1)).\r\n\r\nThe code wrapped is the ***Classical Molecular Interaction Potentials (CMIP)*** code:\r\n\r\n**Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins.**\r\n*Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001)*\r\n*Proteins, 45: 428-437. [https://doi.org/10.1002/prot.1159](https://doi.org/10.1002/prot.1159)*\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/773?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/773?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/773?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"4","url":"https://workflowhub.eu/workflows/773?version=4","name":"Version 4","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"774","url":"https://workflowhub.eu/workflows/774","name":"Python Classical Molecular Interaction Potentials","description":"# CMIP tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of computing **classical molecular interaction potentials** from **protein structures**, step by step, using the **BioExcel Building Blocks library (biobb)**. Examples shown are **Molecular Interaction Potentials (MIPs) grids, protein-protein/ligand interaction potentials, and protein titration**. The particular structures used are the **Lysozyme** protein (PDB code [1AKI](https://www.rcsb.org/structure/1aki)), and a MD simulation of the complex formed by the **SARS-CoV-2 Receptor Binding Domain and the human Angiotensin Converting Enzyme 2** (PDB code [6VW1](https://www.rcsb.org/structure/6vw1)).\r\n\r\nThe code wrapped is the ***Classical Molecular Interaction Potentials (CMIP)*** code:\r\n\r\n**Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins.**\r\n*Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001)*\r\n*Proteins, 45: 428-437. [https://doi.org/10.1002/prot.1159](https://doi.org/10.1002/prot.1159)*\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/774?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/774?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/774?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"775","url":"https://workflowhub.eu/workflows/775","name":"Jupyter Notebook Molecular Structure Checking","description":"# Molecular Structure Checking using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **checking** a **molecular structure** before using it as an input for a **Molecular Dynamics** simulation. The workflow uses the **BioExcel Building Blocks library (biobb)**. The particular structure used is the crystal structure of **human Adenylate Kinase 1A (AK1A)**, in complex with the **AP5A inhibitor** (PDB code [1Z83](https://www.rcsb.org/structure/1z83)).  \r\n\r\n**Structure checking** is a key step before setting up a protein system for **simulations**. A number of **common issues** found in structures at **Protein Data Bank** may compromise the success of the **simulation**, or may suggest that longer **equilibration** procedures are necessary.\r\n\r\nThe **workflow** shows how to:\r\n\r\n- Run **basic manipulations on structures** (selection of models, chains, alternative locations\r\n- Detect and fix **amide assignments** and **wrong chiralities**\r\n- Detect and fix **protein backbone** issues (missing fragments, and atoms, capping)\r\n- Detect and fix **missing side-chain atoms**\r\n- **Add hydrogen atoms** according to several criteria\r\n- Detect and classify **atomic clashes**\r\n- Detect possible **disulfide bonds (SS)**\r\n\r\nAn implementation of this workflow in a **web-based Graphical User Interface (GUI)** can be found in the [https://mmb.irbbarcelona.org/biobb-wfs/](https://mmb.irbbarcelona.org/biobb-wfs/) server (see [https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check](https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check)).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/775?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/775?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/775?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"776","url":"https://workflowhub.eu/workflows/776","name":"CWL Molecular Structure Checking","description":"# Molecular Structure Checking using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **checking** a **molecular structure** before using it as an input for a **Molecular Dynamics** simulation. The workflow uses the **BioExcel Building Blocks library (biobb)**. The particular structure used is the crystal structure of **human Adenylate Kinase 1A (AK1A)**, in complex with the **AP5A inhibitor** (PDB code [1Z83](https://www.rcsb.org/structure/1z83)).  \r\n\r\n**Structure checking** is a key step before setting up a protein system for **simulations**. A number of **common issues** found in structures at **Protein Data Bank** may compromise the success of the **simulation**, or may suggest that longer **equilibration** procedures are necessary.\r\n\r\nThe **workflow** shows how to:\r\n\r\n- Run **basic manipulations on structures** (selection of models, chains, alternative locations\r\n- Detect and fix **amide assignments** and **wrong chiralities**\r\n- Detect and fix **protein backbone** issues (missing fragments, and atoms, capping)\r\n- Detect and fix **missing side-chain atoms**\r\n- **Add hydrogen atoms** according to several criteria\r\n- Detect and classify **atomic clashes**\r\n- Detect possible **disulfide bonds (SS)**\r\n\r\nAn implementation of this workflow in a **web-based Graphical User Interface (GUI)** can be found in the [https://mmb.irbbarcelona.org/biobb-wfs/](https://mmb.irbbarcelona.org/biobb-wfs/) server (see [https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check](https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check)).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/776?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/776?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["CWL"]}]},{"id":"777","url":"https://workflowhub.eu/workflows/777","name":"Python Molecular Structure Checking","description":"# Molecular Structure Checking using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **checking** a **molecular structure** before using it as an input for a **Molecular Dynamics** simulation. The workflow uses the **BioExcel Building Blocks library (biobb)**. The particular structure used is the crystal structure of **human Adenylate Kinase 1A (AK1A)**, in complex with the **AP5A inhibitor** (PDB code [1Z83](https://www.rcsb.org/structure/1z83)).  \r\n\r\n**Structure checking** is a key step before setting up a protein system for **simulations**. A number of **common issues** found in structures at **Protein Data Bank** may compromise the success of the **simulation**, or may suggest that longer **equilibration** procedures are necessary.\r\n\r\nThe **workflow** shows how to:\r\n\r\n- Run **basic manipulations on structures** (selection of models, chains, alternative locations\r\n- Detect and fix **amide assignments** and **wrong chiralities**\r\n- Detect and fix **protein backbone** issues (missing fragments, and atoms, capping)\r\n- Detect and fix **missing side-chain atoms**\r\n- **Add hydrogen atoms** according to several criteria\r\n- Detect and classify **atomic clashes**\r\n- Detect possible **disulfide bonds (SS)**\r\n\r\nAn implementation of this workflow in a **web-based Graphical User Interface (GUI)** can be found in the [https://mmb.irbbarcelona.org/biobb-wfs/](https://mmb.irbbarcelona.org/biobb-wfs/) server (see [https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check](https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check)).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/777?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/777?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/777?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"778","url":"https://workflowhub.eu/workflows/778","name":"Galaxy Molecular Structure Checking","description":"# Molecular Structure Checking using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **checking** a **molecular structure** before using it as an input for a **Molecular Dynamics** simulation. The workflow uses the **BioExcel Building Blocks library (biobb)**. The particular structure used is the crystal structure of **human Adenylate Kinase 1A (AK1A)**, in complex with the **AP5A inhibitor** (PDB code [1Z83](https://www.rcsb.org/structure/1z83)).  \r\n\r\n**Structure checking** is a key step before setting up a protein system for **simulations**. A number of **common issues** found in structures at **Protein Data Bank** may compromise the success of the **simulation**, or may suggest that longer **equilibration** procedures are necessary.\r\n\r\nThe **workflow** shows how to:\r\n\r\n- Run **basic manipulations on structures** (selection of models, chains, alternative locations\r\n- Detect and fix **amide assignments** and **wrong chiralities**\r\n- Detect and fix **protein backbone** issues (missing fragments, and atoms, capping)\r\n- Detect and fix **missing side-chain atoms**\r\n- **Add hydrogen atoms** according to several criteria\r\n- Detect and classify **atomic clashes**\r\n- Detect possible **disulfide bonds (SS)**\r\n\r\nAn implementation of this workflow in a **web-based Graphical User Interface (GUI)** can be found in the [https://mmb.irbbarcelona.org/biobb-wfs/](https://mmb.irbbarcelona.org/biobb-wfs/) server (see [https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check](https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check)).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2023 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2023 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/778?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":["GALAXY"]}]},{"id":"779","url":"https://workflowhub.eu/workflows/779","name":"PyCOMPSs Probabilistic Tsunami Forecast (PTF) - Boumerdes-2003 earthquake and tsunami test-case","description":"PyCOMPSs implementation of Probabilistic Tsunami Forecast (PTF). PTF explicitly treats data- and forecast-uncertainties, enabling alert level definitions according to any predefined level of conservatism, which is connected to the average balance of missed-vs-false-alarms. Run of the Boumerdes-2003 event test-case with 1000 scenarios, 8h tsunami simulation for each and forecast calculations for partial and full ensembles with focal mechanism and tsunami data updates.","organization":"Pillar III: Urgent computing for natural hazards, Workflows and Distributed Computing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/779?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"780","url":"https://workflowhub.eu/workflows/780","name":"Lysozyme in water full version, using REMOTE dataset, two workers, data_persistence False","description":"Lysozyme in water full COMPSs application run at MareNostrum IV, using full dataset with two workers","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/780?version=1","name":"COMPSs 3.3","author":["Rosa M Badia"],"descriptor_type":[]}]},{"id":"781","url":"https://workflowhub.eu/workflows/781","name":"PyCOMPSs Probabilistic Tsunami Forecast (PTF) - Kos-Bodrum 2017 earthquake and tsunami test-case","description":"PyCOMPSs implementation of Probabilistic Tsunami Forecast (PTF). PTF explicitly treats data- and forecast-uncertainties, enabling alert level definitions according to any predefined level of conservatism, which is connected to the average balance of missed-vs-false-alarms. Run of the Kos-Bodrum 2017 event test-case with 1000 scenarios, 8h tsunami simulation for each and forecast calculations for partial and full ensembles with focal mechanism and tsunami data updates.","organization":"Pillar III: Urgent computing for natural hazards, Workflows and Distributed Computing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/781?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"782","url":"https://workflowhub.eu/workflows/782","name":"Wound Healing Scratch Assay Image Analysis","description":"This project is about the automated quantification of wound healing in high-throughput microscopy scratch assays.","organization":"EMBL-Bioimage Analysis Support","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/782?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"786","url":"https://workflowhub.eu/workflows/786","name":"score-assemblies","description":"# score-assemblies\r\n\r\nA Snakemake-wrapper for evaluating *de novo* bacterial genome assemblies, e.g. from Oxford Nanopore (ONT) or Illumina sequencing.\r\n\r\nThe workflow includes the following programs:\r\n* [pomoxis](https://github.com/nanoporetech/pomoxis) assess_assembly and assess_homopolymers\r\n* dnadiff from the [mummer](https://mummer4.github.io/index.html) package\r\n* [NucDiff](https://github.com/uio-cels/NucDiff/)\r\n* [QUAST](http://quast.sourceforge.net/quast)\r\n* [BUSCO](https://busco.ezlab.org/)\r\n* [ideel](https://github.com/mw55309/ideel/), which uses [prodigal](https://github.com/hyattpd/Prodigal) and [diamond](https://github.com/bbuchfink/diamond)\r\n* [bakta](https://github.com/oschwengers/bakta)\r\n\r\n\r\n","organization":"Peter Menzel's Team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/786?version=1","name":"master @ 043710f","author":["Peter Menzel"],"descriptor_type":["SMK"]}]},{"id":"787","url":"https://workflowhub.eu/workflows/787","name":"ont-assembly-snake","description":"# ont-assembly-snake\r\n\r\nA Snakemake wrapper for easily creating *de novo* bacterial genome assemblies from Oxford Nanopore (ONT) sequencing data, and optionally Illumina data,\r\nusing any combination of read filtering, assembly, long and short read polishing, and reference-based polishing.\r\n\r\n## Included programs\r\n\r\n| read filtering | assembly | long read polishing | short read polishing | reference-based polishing |\r\n| --- | --- | --- | --- | --- |\r\n| [Filtlong](https://github.com/rrwick/Filtlong)\u003cbr/\u003e [Rasusa](https://github.com/mbhall88/rasusa) | [Flye](https://github.com/fenderglass/Flye)\u003cbr/\u003e [raven](https://github.com/lbcb-sci/raven)\u003cbr/\u003e [miniasm](https://github.com/lh3/miniasm)\u003cbr/\u003e [Unicycler](https://github.com/rrwick/Unicycler)\u003cbr/\u003e [Canu](https://github.com/marbl/canu)  | [racon](https://github.com/lbcb-sci/racon)\u003cbr/\u003e [medaka](https://github.com/nanoporetech/medaka) | [pilon](https://github.com/broadinstitute/pilon/wiki)\u003cbr/\u003e [Polypolish](https://github.com/rrwick/Polypolish)\u003cbr/\u003e [POLCA](https://github.com/alekseyzimin/masurca#polca) | [Homopolish](https://github.com/ythuang0522/homopolish)\u003cbr/\u003e [proovframe](https://github.com/thackl/proovframe) | \r\n\r\n","organization":"Peter Menzel's Team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/787?version=1","name":"master @ f4d6add","author":["Peter Menzel"],"descriptor_type":["SMK"]}]},{"id":"788","url":"https://workflowhub.eu/workflows/788","name":"ERGA ONT+Illumina Assembly+QC Flye+HyPo v2403 (WF2)","description":"The workflow takes raw ONT reads and trimmed Illumina WGS paired reads collections, and the estimated genome size and Max depth (both calculated from WF1) to run Flye and subsequently polish the assembly with HyPo. It produces collapsed assemblies (unpolished and polished) and runs all the QC analyses (gfastats, BUSCO, and Merqury).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/788?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"789","url":"https://workflowhub.eu/workflows/789","name":"ERGA ONT+Illumina Assembly+QC NextDenovo+HyPo v2403 (WF2)","description":"The workflow takes raw ONT reads and trimmed Illumina WGS paired reads collections, the ONT raw stats table (calculated from WF1) and the estimated genome size (calculated from WF1) to run NextDenovo and subsequently polish the assembly with HyPo. It produces collapsed assemblies (unpolished and polished) and runs all the QC analyses (gfastats, BUSCO, and Merqury).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/789?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"790","url":"https://workflowhub.eu/workflows/790","name":"dada2/main","description":"dada2 amplicon analysis for paired end data\n\nThe workflow has three main outputs: \n- the sequence table (output of makeSequenceTable)\n- the taxonomy (output of assignTaxonomy)\n- the counts which allow to track the number of sequences in the samples through the steps (output of sequence counts)","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/790?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/790?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/790?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"791","url":"https://workflowhub.eu/workflows/791","name":"skim2mito","description":"# skim2mito\r\n\r\n**skim2mito** is a snakemake pipeline for the batch assembly, annotation, and phylogenetic analysis of mitochondrial genomes from low coverage genome skims. The pipeline was designed to work with sequence data from museum collections. However, it should also work with genome skims from recently collected samples.\r\n\r\n## Contents\r\n - [Setup](#setup)\r\n - [Example data](#example-data)\r\n - [Input](#input)\r\n - [Output](#output)\r\n - [Filtering contaminants](#filtering-contaminants)\r\n - [Assembly and annotation only](#assembly-and-annotation-only)\r\n - [Running your own data](#running-your-own-data)\r\n - [Getting help](#getting-help)\r\n - [Citations](#citations)\r\n\r\n## Setup\r\n\r\nThe pipeline is written in Snakemake and uses conda to install the necessary tools.\r\n\r\nIt is *strongly recommended* to install conda using Mambaforge. See details here https://snakemake.readthedocs.io/en/stable/getting_started/installation.html\r\n\r\nOnce conda is installed, you can pull the github repo and set up the base conda environment.\r\n\r\n```\r\n# get github repo\r\ngit clone https://github.com/o-william-white/skim2mito\r\n\r\n# change dir\r\ncd skim2mito\r\n\r\n# setup conda env\r\nconda env create -n skim2mito_env -f workflow/envs/conda_env.yaml\r\nconda config --set channel_priority flexible\r\n```\r\n\r\nIf you need to install the conda environment to a specific location, use the following example, where the prefix argument can be updated to include a specific path:\r\n\r\n```\r\nconda env create -n skim2mito_env --prefix /your_path/skim2mito_env -f workflow/envs/conda_env.yaml\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Example data\r\n\r\nBefore you run your own data, it is recommended to run the example datasets provided. This will confirm there are no user-specific issues with the setup and it also installs all the dependencies. The example data includes simulated mitochondrial data from 25 different butterfly species. \r\n\r\nTo run the example data, use the code below. **Note that you need to change the user email to your own address**. The email is required by the Bio Entrez package to fetch reference sequences. The first time you run the pipeline, it will take some time to install each of the conda environments, so it is a good time to take a tea break :).\r\n```\r\nconda activate skim2mito_env\r\n\r\nsnakemake --cores 4 --use-conda --config user_email=user@example_email.com\r\n```\r\n\r\nOnce this has finished, you can generate a snakemake report using the following command. As above, you need to change the user email to your own address.\r\n\r\n```\r\nsnakemake --cores 4 --use-conda --config user_email=user@example_email.com --report skim2mito_report.html\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Input\r\n\r\nSnakemake requires a `config.yaml` and `samples.csv` to define input parameters and sequence data for each sample. \r\n\r\nFor the example data provided, the config file is located here `config/config.yaml` and it looks like this:\r\n```\r\n# path to sample sheet csv with columns for ID,forward,reverse,taxid,seed,gene\r\nsamples: config/samples.csv\r\n\r\n# user email\r\nuser_email: user@example_email.com\r\n\r\n# getorganelle reference (go_fetch, custom)\r\ngo_reference: go_fetch\r\n\r\n# forward adapter\r\nforward_adapter: AGATCGGAAGAGCACACGTCTGAACTCCAGTCA\r\n\r\n# reverse adapter\r\nreverse_adapter: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT\r\n\r\n# fastp deduplication (True/False)\r\nfastp_dedup: True\r\n\r\n# mitos refseq database (refseq39, refseq63f, refseq63m, refseq63o, refseq89f, refseq89m, refseq89o)\r\nmitos_refseq: refseq39\r\n\r\n# mito code (2 = Vertebrate, 4 = Mold, 5 = Invertebrate, 9 = Echinoderm, 13 = Ascidian, 14 = Alternative flatworm)\r\nmitos_code: 5\r\n\r\n# alignment trimming method to use (gblocks or clipkit)\r\nalignment_trim: gblocks\r\n\r\n# alignment missing data threshold for alignment (0.0 - 1.0)\r\nmissing_threshold: 0.5\r\n\r\n# name of outgroup sample (optional)\r\n# use \"NA\" if there is no obvious outgroup\r\n# if more than one outgroup use a comma separated list i.e. \"sampleA,sampleB\"\r\noutgroup: Eurema_blanda\r\n\r\n# plot dimensions (cm)\r\nplot_height: 20\r\nplot_width: 20\r\n```\r\n\r\nThe example samples.csv file is located here `config/samples.csv` and it looks like this (note that the seed and gene columns are only required if the custom getorganelle database option is specified in the config file):\r\n\r\n\r\n ID | forward | reverse | taxid | seed | gene \r\n----|---------|---------|-------|------|------\r\nAdelpha_iphiclus | .test/reads/Adelpha_iphiclus_1.fq.gz | .test/reads/Adelpha_iphiclus_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nAnartia_jatrophae_saturata | .test/reads/Anartia_jatrophae_saturata_1.fq.gz | .test/reads/Anartia_jatrophae_saturata_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nAraschnia_levana | .test/reads/Araschnia_levana_1.fq.gz | .test/reads/Araschnia_levana_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nAuzakia_danava | .test/reads/Auzakia_danava_1.fq.gz | .test/reads/Auzakia_danava_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nBaeotus_beotus | .test/reads/Baeotus_beotus_1.fq.gz | .test/reads/Baeotus_beotus_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nCatacroptera_cloanthe | .test/reads/Catacroptera_cloanthe_1.fq.gz | .test/reads/Catacroptera_cloanthe_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nChalinga_pratti | .test/reads/Chalinga_pratti_1.fq.gz | .test/reads/Chalinga_pratti_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nDiaethria_gabaza_eupepla | .test/reads/Diaethria_gabaza_eupepla_1.fq.gz | .test/reads/Diaethria_gabaza_eupepla_2.fq.gz | 127268 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nDoleschallia_melana | .test/reads/Doleschallia_melana_1.fq.gz | .test/reads/Doleschallia_melana_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nEurema_blanda | .test/reads/Eurema_blanda_1.fq.gz | .test/reads/Eurema_blanda_2.fq.gz | 42450 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nHypolimnas_usambara | .test/reads/Hypolimnas_usambara_1.fq.gz | .test/reads/Hypolimnas_usambara_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nJunonia_villida | .test/reads/Junonia_villida_1.fq.gz | .test/reads/Junonia_villida_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nKallima_paralekta | .test/reads/Kallima_paralekta_1.fq.gz | .test/reads/Kallima_paralekta_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nKallimoides_rumia | .test/reads/Kallimoides_rumia_1.fq.gz | .test/reads/Kallimoides_rumia_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nLitinga_cottini | .test/reads/Litinga_cottini_1.fq.gz | .test/reads/Litinga_cottini_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nMallika_jacksoni | .test/reads/Mallika_jacksoni_1.fq.gz | .test/reads/Mallika_jacksoni_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nModuza_procris | .test/reads/Moduza_procris_1.fq.gz | .test/reads/Moduza_procris_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nParasarpa_zayla | .test/reads/Parasarpa_zayla_1.fq.gz | .test/reads/Parasarpa_zayla_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nPhaedyma_columella | .test/reads/Phaedyma_columella_1.fq.gz | .test/reads/Phaedyma_columella_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nPrecis_pelarga | .test/reads/Precis_pelarga_1.fq.gz | .test/reads/Precis_pelarga_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nProtogoniomorpha_temora | .test/reads/Protogoniomorpha_temora_1.fq.gz | .test/reads/Protogoniomorpha_temora_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nSalamis_cacta | .test/reads/Salamis_cacta_1.fq.gz | .test/reads/Salamis_cacta_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nSmyrna_blomfildia | .test/reads/Smyrna_blomfildia_1.fq.gz | .test/reads/Smyrna_blomfildia_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nTacola_larymna | .test/reads/Tacola_larymna_1.fq.gz | .test/reads/Tacola_larymna_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nYoma_algina | .test/reads/Yoma_algina_1.fq.gz | .test/reads/Yoma_algina_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\n\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Output\r\n\r\nAll output files are saved to the `results` direcotry. Below is a table summarising all of the output files generated by the pipeline.\r\n\r\n| Directory             | Description               |\r\n|-----------------------|---------------------------|\r\n| fastqc_raw            | Fastqc reports for raw input reads |\r\n| fastp                 | Fastp reports from quality control of raw reads |\r\n| fastqc_qc             | Fastqc reports for quality controlled reads |\r\n| go_fetch              | Optional output containing reference databasesused by GetOrganelle |\r\n| getorganelle          | GetOrganelle output with a directory for each sample |\r\n| assembled_sequence    | Assembled sequences selected from GetOrganelle output and renamed |\r\n| seqkit                | Seqkit summary of each assembly |\r\n| blastn                | Blastn output of each assembly |\r\n| minimap               | Mapping output of quality filtered reads against each assembly |\r\n| blobtools             | Blobtools assembly summary collating blastn and mapping output |\r\n| assess_assembly       | Plots of annotations, mean depth, GC content and proportion mismatches |\r\n| annotations           | Annotation outputs of mitos |\r\n| summary               | Summary per sample (seqkit stats), contig (GC content, length, coverage, taxonomy and annotations) and annotated gene counts |\r\n| annotated_genes  | Unaligned fasta files of annotated genes identified across all samples |\r\n| mafft                 | Mafft aligned fasta files of annotated genes identified across all samples |\r\n| mafft_filtered        | Mafft aligned fasta files after the removal of sequences based on a missing data threshold |\r\n| alignment_trim        | Ambiguous parts of alignment removed using either gblocks or clipkit |\r\n| iqtree                | Iqtree phylogenetic analysis of annotated genes |\r\n| plot_tree             | Plots of phylogenetic trees |\r\n| multiqc               | Multiqc summary report |\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Filtering contaminants\r\n\r\nIf you are working with museum collections, it is possible that you may assemble and annotate sequences from contaminant/non-target species. *Contaminant sequences can be identified based on the blast search output or unusual placement in the phylogenetic trees* (see blobtools and plot_tree outputs). \r\n\r\nA supplementary python script `format_alignments.py `is provided to remove putative contaminants from alignments, and format the alignments for downstream phylogenetic analysis.\r\n\r\nFor example, let's say we wanted to remove all sequences from the sample \"Kallima_paralekta\" and atp6 gene sequences, you could run the script as shown below. The script works by identifying and removing sequences that have names with  `Kallima_paralekta` or `atp6` in the sequence names. The filtered alignments are written to a new output directory `filter_alignments_output`.\r\n\r\n```\r\npython workflow/scripts/format_alignments.py  \\\r\n   --input results/mafft_filtered/ \\\r\n   --cont Kallima_paralekta atp6 \\\r\n   --output filter_alignments_output\r\n```\r\n\r\n*Note that the output fasta files have been reformatted so each alignment file is named after the gene and each sequence is named after the sample.* This is useful if you would like to run our related pipeline **gene2phylo** for further phylogenetic analyses.\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Assembly and annotation only\r\n\r\nIf you are only interested in the assembly of mitochondrial sequences and annotation of genes without the phylogenetic analysis, you can stop the pipeline from running the gene alignment and phylogenetic analyses using the `--omit-from` parameter.\r\n```\r\nsnakemake --cores 4 --use-conda --config user_email=user@example_email.com --omit-from mafft\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Running your own data\r\n\r\nThe first thing you need to do is generate your own config.yaml and samples.csv files, using the files provided as a template.\r\n\r\nGetOrganelle requires reference data in the format of seed and gene reference fasta files. By default the pipeline uses a basic python script called go_fetch.py https://github.com/o-william-white/go_fetch to download and format reference data formatted for GetOrganelle. \r\n\r\ngo_fetch.py works by searching NCBI based on the NCBI taxonomy specified by the taxid column in the samples.csv file. Note that the seed and gene columns in the samples.csv file are only required if you want to provide your own custom GetOrganelle seed and gene reference databases. \r\n\r\nYou can use the default reference data for GetOrganelle, but I would recommend using custom reference databases where possible. See here for details of how to set up your own databases https://github.com/Kinggerm/GetOrganelle/wiki/FAQ#how-to-assemble-a-target-organelle-genome-using-my-own-reference\r\n\r\n## Getting help\r\n\r\nIf you have any questions, please do get in touch in the issues or by email o.william.white@gmail.com\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Citations\r\n\r\nIf you use the pipeline, please cite our bioarxiv preprint: https://doi.org/10.1101/2023.08.11.552985\r\n\r\nSince the pipeline is a wrapper for several other bioinformatic tools we also ask that you cite the tools used by the pipeline:\r\n - Fastqc https://github.com/s-andrews/FastQC\r\n - Fastp https://doi.org/10.1093/bioinformatics/bty560\r\n - GetOrganelle https://doi.org/10.1186/s13059-020-02154-5\r\n - Blastn https://doi.org/10.1186/1471-2105-10-421\r\n - Minimap2 https://doi.org/10.1093/bioinformatics/bty191\r\n - Blobtools https://doi.org/10.12688/f1000research.12232.1\r\n - Seqkit https://doi.org/10.1371/journal.pone.0163962\r\n - MITOS2 https://doi.org/10.1016/j.ympev.2012.08.023\r\n - Gblocks (default) https://doi.org/10.1093/oxfordjournals.molbev.a026334\r\n - Clipkit (optional) https://doi.org/10.1371/journal.pbio.3001007\r\n - Mafft https://doi.org/10.1093/molbev/mst010\r\n - Iqtree https://doi.org/10.1093/molbev/msu300\r\n - ete3 https://doi.org/10.1093/molbev/msw046\r\n - ggtree https://doi.org/10.1111/2041-210X.12628\r\n - Multiqc https://doi.org/10.1093/bioinformatics/btw354\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2mito\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n","organization":"NHM Clark group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/791?version=1","name":"main @ 34ef5b6","author":[],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/791?version=2","name":"main @ a63288c","author":[],"descriptor_type":["SMK"]}]},{"id":"792","url":"https://workflowhub.eu/workflows/792","name":"skim2rrna","description":"# skim2rrna\r\n\r\n**skim2rrna** is a snakemake pipeline for the batch assembly, annotation, and phylogenetic analysis of ribosomal genes from low coverage genome skims. The pipeline was designed to work with sequence data from museum collections. However, it should also work with genome skims from recently collected samples.\r\n\r\n## Contents\r\n - [Setup](#setup)\r\n - [Example data](#example-data)\r\n - [Input](#input)\r\n - [Output](#output)\r\n - [Filtering contaminants](#filtering-contaminants)\r\n - [Assembly and annotation only](#assembly-and-annotation-only)\r\n - [Running your own data](#running-your-own-data)\r\n - [Getting help](#getting-help)\r\n - [Citations](#citations)\r\n\r\n## Setup\r\n\r\nThe pipeline is written in Snakemake and uses conda and singularity to install the necessary tools.\r\n\r\nIt is *strongly recommended* to install conda using Mambaforge. See details here https://snakemake.readthedocs.io/en/stable/getting_started/installation.html\r\n\r\nOnce conda is installed, you can pull the github repo and set up the base conda environment.\r\n\r\n```\r\n# get github repo\r\ngit clone https://github.com/o-william-white/skim2rrna\r\n\r\n# change dir\r\ncd skim2rrna\r\n\r\n# setup conda env\r\nconda env create -n snakemake -f workflow/envs/conda_env.yaml\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Example data\r\n\r\nBefore you run your own data, it is recommended to run the example datasets provided . This will confirm there are no user-specific issues with the setup and it also installs all the dependencies. The example data includes simulated ribosomal data from 25 different butterfly species. \r\n\r\nTo run the example data, use the code below. **Note that you need to change the user email to your own address**. The email is required by the Bio Entrez package to fetch reference sequences. The first time you run the pipeline, it will take some time to install each of the conda environments, so it is a good time to take a tea break :).\r\n```\r\nconda activate snakemake\r\n\r\nsnakemake \\\r\n   --cores 4 \\\r\n   --use-conda \\\r\n   --use-singularity \\ \r\n   --config user_email=user@example_email.com\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Input\r\n\r\nSnakemake requires a `config.yaml` and `samples.csv` to define input parameters and sequence data for each sample. \r\n\r\nFor the example data provided, the config file is located here `config/config.yaml` and it looks like this:\r\n```\r\n# path to sample sheet csv with columns for ID,forward,reverse,taxid,seed,gene\r\nsamples: config/samples.csv\r\n\r\n# user email\r\nuser_email: user@example_email.com\r\n\r\n# getorganelle reference (go_fetch, custom)\r\ngo_reference: go_fetch\r\n\r\n# forward adapter\r\nforward_adapter: AGATCGGAAGAGCACACGTCTGAACTCCAGTCA\r\n\r\n# reverse adapter\r\nreverse_adapter: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT\r\n\r\n# fastp deduplication (True/False)\r\nfastp_dedup: True\r\n\r\n# barrnap kindgom (Bacteria:bac, Archaea:arc, Eukaryota:euk, None:NA)\r\nbarrnap_kingdom: euk\r\n\r\n# alignment trimming method to use (gblocks or clipkit)\r\nalignment_trim: gblocks\r\n\r\n# alignment missing data threshold for alignment (0.0 - 1.0)\r\nmissing_threshold: 0.5\r\n\r\n# name of outgroup sample (optional)\r\n# use \"NA\" if there is no obvious outgroup\r\n# if more than one outgroup use a comma separated list i.e. \"sampleA,sampleB\"\r\noutgroup: Eurema_blanda\r\n\r\n# plot dimensions (cm)\r\nplot_height: 20\r\nplot_width: 20\r\n```\r\n\r\nThe example samples.csv file is located here `config/samples.csv` and it looks like this (note that the seed and gene columns are only required if the custom getorganelle database option is specified in the config file):\r\n\r\n\r\n ID | forward | reverse | taxid | seed | gene \r\n----|---------|---------|-------|------|------\r\nAdelpha_iphiclus | .test/reads/Adelpha_iphiclus_1.fq.gz | .test/reads/Adelpha_iphiclus_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nAnartia_jatrophae_saturata | .test/reads/Anartia_jatrophae_saturata_1.fq.gz | .test/reads/Anartia_jatrophae_saturata_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nAraschnia_levana | .test/reads/Araschnia_levana_1.fq.gz | .test/reads/Araschnia_levana_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nAuzakia_danava | .test/reads/Auzakia_danava_1.fq.gz | .test/reads/Auzakia_danava_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nBaeotus_beotus | .test/reads/Baeotus_beotus_1.fq.gz | .test/reads/Baeotus_beotus_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nCatacroptera_cloanthe | .test/reads/Catacroptera_cloanthe_1.fq.gz | .test/reads/Catacroptera_cloanthe_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nChalinga_pratti | .test/reads/Chalinga_pratti_1.fq.gz | .test/reads/Chalinga_pratti_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nDiaethria_gabaza_eupepla | .test/reads/Diaethria_gabaza_eupepla_1.fq.gz | .test/reads/Diaethria_gabaza_eupepla_2.fq.gz | 127268 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nDoleschallia_melana | .test/reads/Doleschallia_melana_1.fq.gz | .test/reads/Doleschallia_melana_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nEurema_blanda | .test/reads/Eurema_blanda_1.fq.gz | .test/reads/Eurema_blanda_2.fq.gz | 42450 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nHypolimnas_usambara | .test/reads/Hypolimnas_usambara_1.fq.gz | .test/reads/Hypolimnas_usambara_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nJunonia_villida | .test/reads/Junonia_villida_1.fq.gz | .test/reads/Junonia_villida_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nKallima_paralekta | .test/reads/Kallima_paralekta_1.fq.gz | .test/reads/Kallima_paralekta_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nKallimoides_rumia | .test/reads/Kallimoides_rumia_1.fq.gz | .test/reads/Kallimoides_rumia_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nLitinga_cottini | .test/reads/Litinga_cottini_1.fq.gz | .test/reads/Litinga_cottini_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nMallika_jacksoni | .test/reads/Mallika_jacksoni_1.fq.gz | .test/reads/Mallika_jacksoni_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nModuza_procris | .test/reads/Moduza_procris_1.fq.gz | .test/reads/Moduza_procris_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nParasarpa_zayla | .test/reads/Parasarpa_zayla_1.fq.gz | .test/reads/Parasarpa_zayla_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nPhaedyma_columella | .test/reads/Phaedyma_columella_1.fq.gz | .test/reads/Phaedyma_columella_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nPrecis_pelarga | .test/reads/Precis_pelarga_1.fq.gz | .test/reads/Precis_pelarga_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nProtogoniomorpha_temora | .test/reads/Protogoniomorpha_temora_1.fq.gz | .test/reads/Protogoniomorpha_temora_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nSalamis_cacta | .test/reads/Salamis_cacta_1.fq.gz | .test/reads/Salamis_cacta_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nSmyrna_blomfildia | .test/reads/Smyrna_blomfildia_1.fq.gz | .test/reads/Smyrna_blomfildia_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nTacola_larymna | .test/reads/Tacola_larymna_1.fq.gz | .test/reads/Tacola_larymna_2.fq.gz | 100750 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\nYoma_algina | .test/reads/Yoma_algina_1.fq.gz | .test/reads/Yoma_algina_2.fq.gz | 40040 | .test/seed_mitochondrion.fasta | .test/gene_mitochondrion.fasta\r\n\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Output\r\n\r\nAll output files are saved to the `results` direcotry. Below is a table summarising all of the output files generated by the pipeline.\r\n\r\n| Directory             | Description               |\r\n|-----------------------|---------------------------|\r\n| fastqc_raw            | Fastqc reports for raw input reads |\r\n| fastp                 | Fastp reports from quality control of raw reads |\r\n| fastqc_qc             | Fastqc reports for quality controlled reads |\r\n| go_fetch              | Optional output containing reference databases used by GetOrganelle |\r\n| getorganelle          | GetOrganelle output with a directory for each sample |\r\n| assembled_sequence    | Assembled sequences selected from GetOrganelle output and renamed |\r\n| seqkit                | Seqkit summary of each assembly |\r\n| blastn                | Blastn output of each assembly |\r\n| minimap               | Mapping output of quality filtered reads against each assembly |\r\n| blobtools             | Blobtools assembly summary collating blastn and mapping output |\r\n| annotations           | Annotation outputs of mitos |\r\n| summary               | Summary per sample (seqkit stats), contig (GC content, length, coverage, taxonomy and annotations) and annotated gene counts |\r\n| annotated_genes  | Unaligned fasta files of annotated genes identified across all samples |\r\n| mafft                 | Mafft aligned fasta files of annotated genes identified across all samples |\r\n| mafft_filtered        | Mafft aligned fasta files after the removal of sequences based on a missing data threshold |\r\n| alignment_trim        | Ambiguous parts of alignment removed using either gblocks or clipkit |\r\n| iqtree                | Iqtree phylogenetic analysis of annotated genes |\r\n| plot_tree             | Plots of phylogenetic trees |\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Filtering contaminants\r\n\r\nIf you are working with museum collections, it is possible that you may assemble and annotate sequences from contaminant/non-target species. *Contaminant sequences can be identified based on the blast search output or unusual placement in the phylogenetic trees* (see blobtools and plot_tree outputs). \r\n\r\nA supplementary python script `format_alignments.py `is provided to remove putative contaminants from alignments, and format the alignments for downstream phylogenetic analysis.\r\n\r\nFor example, let's say we wanted to remove all sequences from the sample \"Kallima_paralekta\" and 5.8S ribosomal sequence, you could run the script as shown below. The script works by identifying and removing sequences that have names with  `Kallima_paralekta` or `5_8S` in the sequence names. The filtered alignments are written to a new output directory `filter_alignments_output`.\r\n\r\n```\r\npython workflow/scripts/format_alignments.py  \\\r\n   --input results/mafft_filtered/ \\\r\n   --cont Kallima_paralekta 5_8S \\\r\n   --output filter_alignments_output\r\n```\r\n\r\n*Note that the output fasta files have been reformatted so each alignment file is named after the gene and each sequence is named after the sample.* This is useful if you would like to run our related pipeline **gene2phylo** for further phylogenetic analyses.\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Assembly and annotation only\r\n\r\nIf you are only interested in the assembly of ribosomal sequences and annotation of genes without the phylogenetic analysis, you can stop the pipeline from running the gene alignment and phylogenetic analyses using the `--omit-from` parameter.\r\n```\r\nsnakemake \\\r\n   --cores 4 \\\r\n   --use-conda \\\r\n   --use-singularity \\\r\n   --config user_email=user@example_email.com \\\r\n   --omit-from mafft \r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Running your own data\r\n\r\nThe first thing you need to do is generate your own config.yaml and samples.csv files, using the files provided as a template.\r\n\r\nGetOrganelle requires reference data in the format of seed and gene reference fasta files. By default the pipeline uses a basic python script called go_fetch.py https://github.com/o-william-white/go_fetch to download and format reference data formatted for GetOrganelle. \r\n\r\ngo_fetch.py works by searching NCBI based on the NCBI taxonomy specified by the taxid column in the samples.csv file. Note that the seed and gene columns in the samples.csv file are only required if you want to provide your own custom GetOrganelle seed and gene reference databases. \r\n\r\nYou can use the default reference data for GetOrganelle, but I would recommend using custom reference databases where possible. See here for details of how to set up your own databases https://github.com/Kinggerm/GetOrganelle/wiki/FAQ#how-to-assemble-a-target-organelle-genome-using-my-own-reference\r\n\r\n## Getting help\r\n\r\nIf you have any questions, please do get in touch in the issues or by email o.william.white@gmail.com\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Citations\r\n\r\nIf you use the pipeline, please cite our bioarxiv preprint: https://doi.org/10.1101/2023.08.11.552985\r\n\r\nSince the pipeline is a wrapper for several other bioinformatic tools we also ask that you cite the tools used by the pipeline:\r\n - Fastqc https://github.com/s-andrews/FastQC\r\n - Fastp https://doi.org/10.1093/bioinformatics/bty560\r\n - GetOrganelle https://doi.org/10.1186/s13059-020-02154-5\r\n - Blastn https://doi.org/10.1186/1471-2105-10-421\r\n - Minimap2 https://doi.org/10.1093/bioinformatics/bty191\r\n - Blobtools https://doi.org/10.12688/f1000research.12232.1\r\n - Seqkit https://doi.org/10.1371/journal.pone.0163962\r\n - MITOS2 https://doi.org/10.1016/j.ympev.2012.08.023\r\n - Gblocks (default) https://doi.org/10.1093/oxfordjournals.molbev.a026334\r\n - Clipkit (optional) https://doi.org/10.1371/journal.pbio.3001007\r\n - Mafft https://doi.org/10.1093/molbev/mst010\r\n - Iqtree https://doi.org/10.1093/molbev/msu300\r\n - ete3 https://doi.org/10.1093/molbev/msw046\r\n - ggtree https://doi.org/10.1111/2041-210X.12628\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#skim2rrna\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n","organization":"NHM Clark group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/792?version=1","name":"main @ 77f800c","author":[],"descriptor_type":["SMK"]}]},{"id":"793","url":"https://workflowhub.eu/workflows/793","name":"gene2phylo","description":"# gene2phylo\r\n\r\n**gene2phylo** is a snakemake pipeline for batch phylogenetic analysis of a given set of input genes. \r\n\r\n## Contents\r\n - [Setup](#setup)\r\n - [Example data](#example-data)\r\n - [Input](#input)\r\n - [Output](#output)\r\n - [Running your own data](#running-your-own-data)\r\n - [Getting help](#getting-help)\r\n - [Citations](#citations)\r\n\r\n## Setup\r\n\r\nThe pipeline is written in Snakemake and uses conda to install the necessary tools.\r\n\r\nIt is *strongly recommended* to install conda using Mambaforge. See details here https://snakemake.readthedocs.io/en/stable/getting_started/installation.html\r\n\r\nOnce conda is installed, you can pull the github repo and set up the base conda environment.\r\n\r\n```\r\n# get github repo\r\ngit clone https://github.com/o-william-white/gene2phylo\r\n\r\n# change dir\r\ncd gene2phylo\r\n\r\n# setup conda env\r\nconda env create -n snakemake -f workflow/envs/conda_env.yaml\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#gene2phylo\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Example data\r\n\r\nBefore you run your own data, it is recommended to run the example datasets provided . This will confirm there are no user-specific issues with the setup and it also installs all the dependencies. The example data includes mitochondrial and ribosomal genes from 25 different butterfly species. \r\n\r\nTo run the example data, use the code below. The first time you run the pipeline, it will take some time to install each of the conda environments, so it is a good time to take a tea break :).\r\n```\r\nconda activate snakemake\r\n\r\nsnakemake \\\r\n   --cores 4 \\\r\n   --use-conda\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#gene2phylo\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Input\r\n\r\nSnakemake requires a `config.yaml` to define input parameters. \r\n\r\nFor the example data provided, the config file is located here `config/config.yaml` and it looks like this:\r\n```\r\n# name of input directory containg genes\r\ninput_dir: .test\r\n\r\n# realign (True or False)\r\nrealign: True\r\n\r\n# alignment missing data threshold for alignment (0.0 - 1.0), only required if realign == True\r\nmissing_threshold: 0.5\r\n\r\n# alignment trimming method to use (gblocks or clipkit), only required if realign == True\r\nalignment_trim: gblocks\r\n\r\n# name of outgroup sample (optional)\r\n# use \"NA\" if there is no obvious outgroup\r\n# if more than one outgroup use a comma separated list i.e. \"sampleA,sampleB\"\r\noutgroup: Eurema_blanda\r\n\r\n# plot dimensions (cm)\r\nplot_height: 20\r\nplot_width: 20\r\n```\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#gene2phylo\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Output\r\n\r\nAll output files are saved to the `results` direcotry. Below is a table summarising all of the output files generated by the pipeline.\r\n\r\n| Directory                 | Description               |\r\n|---------------------------|---------------------------|\r\n| mafft                     | Optional: Mafft aligned fasta files of all genes |\r\n| mafft_filtered            | Optional: Mafft aligned fasta files after the removal of sequences based on a missing data threshold |\r\n| alignment_trim            | Optional: Ambiguous parts of alignment removed using either gblocks or clipkit |\r\n| iqtree                    | Iqtree phylogenetic analysis for each gene |\r\n| iqtree_plots              | Plots of Iqtree phylogenetic tree for each gene  |\r\n| concatenate_alignments    | Partitioned alignment of all genes  |\r\n| iqtree_partitioned        | Iqtree partitioned phylogenetic analysis |\r\n| iqtree_partitioned_plot   | Plot of Iqtree partitioned tree |\r\n| astral                    | Astral phylogenetic analysis of all gene trees |\r\n| astral_plot               | Plot of Astral tree |\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#gene2phylo\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Running your own data\r\n\r\nFor the pipeline to function properly, the input gene alignments must be: \r\n- in a single directory \r\n- end with \".fasta\"\r\n- named after the aligned gene (e.g. \"cox1.fasta\" or \"28S.fasta\")\r\n- share identical sample names across alignments (e.g. all genes from sample A share the same name)\r\n\r\nPlease see the example data in the `.test/` directory as an example. \r\n\r\nThen you need to generate your own config.yaml file, using the example template provided.\r\n\r\n## Getting help\r\n\r\nIf you have any questions, please do get in touch in the issues or by email o.william.white@gmail.com\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#gene2phylo\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n## Citations\r\n\r\nIf you use the pipeline, please cite our bioarxiv preprint: https://doi.org/10.1101/2023.08.11.552985\r\n\r\nSince the pipeline is a wrapper for several other bioinformatic tools we also ask that you cite the tools used by the pipeline:\r\n - Gblocks (default) https://doi.org/10.1093/oxfordjournals.molbev.a026334\r\n - Clipkit (optional) https://doi.org/10.1371/journal.pbio.3001007\r\n - Mafft (optional) https://doi.org/10.1093/molbev/mst010\r\n - Iqtree https://doi.org/10.1093/molbev/msu300\r\n - Ete3 https://doi.org/10.1093/molbev/msw046\r\n - Ggtree https://doi.org/10.1111/2041-210X.12628\r\n - Astral https://doi.org/10.1186/s12859-018-2129-y\r\n\r\n\u003cbr/\u003e\r\n\u003cdiv align=\"right\"\u003e\r\n    \u003cb\u003e\u003ca href=\"#gene2phylo\"\u003e↥ back to top\u003c/a\u003e\u003c/b\u003e\r\n\u003c/div\u003e\r\n\u003cbr/\u003e\r\n\r\n","organization":"NHM Clark group","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/793?version=1","name":"main @ d051134","author":[],"descriptor_type":[]}]},{"id":"794","url":"https://workflowhub.eu/workflows/794","name":"Genome-assessment-post-assembly","description":"Post-genome assembly quality control workflow using Quast, BUSCO, Meryl, Merqury and Fasta Statistics. Updates November 2023.  Inputs: reads as fastqsanger.gz (not fastq.gz), and assembly.fasta. New default settings for BUSCO: lineage = eukaryota; for Quast: lineage = eukaryotes, genome = large. Reports assembly stats into a table called metrics.tsv, including selected metrics from Fasta Stats, and read coverage; reports BUSCO versions and dependencies; and displays these tables in the workflow report. Note: a known bug is that sometimes the workflow report text resets to default text. To restore, look for an earlier workflow version with correct workflow report text, and copy and paste report text into current version.","organization":"Australian BioCommons","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/794?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/794?version=2","name":"Version 2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"795","url":"https://workflowhub.eu/workflows/795","name":"HiC contact map generation","description":"# HiC contact map generation\r\n\r\nSnakemake pipeline for the generation of `.pretext` and `.mcool` files for visualisation of HiC contact maps with the softwares PretextView and HiGlass, respectively.\r\n\r\n## Prerequisites\r\n\r\nThis pipeine has been tested using `Snakemake v7.32.4` and requires conda for installation of required tools. To run the pipline use the command:\r\n\r\n`snakemake --use-conda`\r\n\r\nThere are provided a set of configuration and running scripts for exectution on a slurm queueing system. After configuring the `cluster.json` file run:\r\n\r\n`./run_cluster`\r\n\r\n## Before starting\r\n\r\nYou need to create a temporary folder and specify the path in the `config.yaml` file. This should be able to hold the temporary files created when sorting the `.pairsam` file (100s of GB or even many TBs)\r\n\r\nThe path to the genome assemly must be given in the `config.yaml`.\r\n\r\nThe HiC reads should be paired and named as follows: `Library_1.fastq.gz Library_2.fastq.gz`. The pipeline can accept any number of paired HiC read files, but the naming must be consistent. The folder containing these files must be provided in the `config.yaml`.\r\n","organization":"Biodiversity Genomics Europe (general), ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/795?version=1","name":"Version 1","author":["Tom Brown"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/795?version=2","name":"Version 2","author":["Tom Brown"],"descriptor_type":["SMK"]}]},{"id":"796","url":"https://workflowhub.eu/workflows/796","name":"HiC scaffolding pipeline","description":"# HiC scaffolding pipeline\r\n\r\nSnakemake pipeline for scaffolding of a genome using HiC reads using yahs.\r\n\r\n## Prerequisites\r\n\r\nThis pipeine has been tested using `Snakemake v7.32.4` and requires conda for installation of required tools. To run the pipline use the command:\r\n\r\n`snakemake --use-conda --cores N`\r\n\r\nwhere N is number of cores to use. There are provided a set of configuration and running scripts for exectution on a slurm queueing system. After configuring the `cluster.json` file run:\r\n\r\n`./run_cluster`\r\n\r\n## Before starting\r\n\r\nYou need to create a temporary folder and specify the path in the `config.yaml` file. This should be able to hold the temporary files created when sorting the `.pairsam` file (100s of GB or even many TBs)\r\n\r\nThe path to the genome assemly must be given in the `config.yaml`.\r\n\r\nThe HiC reads should be paired and named as follows: `Library_1.fastq.gz Library_2.fastq.gz`. The pipeline can accept any number of paired HiC read files, but the naming must be consistent. The folder containing these files must be provided in the `config.yaml`.","organization":"Biodiversity Genomics Europe (general), ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/796?version=1","name":"Version 1","author":["Tom Brown"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/796?version=2","name":"Version 2","author":["Tom Brown"],"descriptor_type":["SMK"]},{"id":"3","url":"https://workflowhub.eu/workflows/796?version=3","name":"Version 3","author":["Tom Brown"],"descriptor_type":["SMK"]}]},{"id":"797","url":"https://workflowhub.eu/workflows/797","name":"COMPSs GPU Matrix Multiplication","description":"**Name:** Matmul GPU Case 1 Cache-OFF  \r\n**Contact Person**: cristian.tatu@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs 3.3  \r\n**Machine**: Minotauro-MN4  \r\n\r\nMatmul running on the GPU without Cache.  \r\nLaunched using 32 GPUs (16 nodes).  \r\nPerforms C = A @ B  \r\nWhere A: shape (320, 56_900_000)  block_size (10, 11_380_000)  \r\n\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;B: shape (56_900_000, 10)    \u0026nbsp;\u0026nbsp;block_size (11_380_000, 10)  \r\n\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;C: shape (320, 10)                  \u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;block_size (10, 10)  \r\nTotal dataset size 291 GB.  \r\nVersion dislib-0.9\r\n\r\nAverage task execution time: 56 seconds\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/797?version=1","name":"Version 1","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"798","url":"https://workflowhub.eu/workflows/798","name":"COMPSs GPU Cache Matrix Multiplication","description":"**Name:** Matmul GPU Case 1 Cache-ON  \r\n**Contact Person**: cristian.tatu@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: Minotauro-MN4  \r\n\r\nMatmul running on the GPU leveraging COMPSs GPU Cache for deserialization speedup.  \r\nLaunched using 32 GPUs (16 nodes).  \r\nPerforms C = A @ B  \r\nWhere A: shape (320, 56_900_000)  block_size (10, 11_380_000)  \r\n\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;B: shape (56_900_000, 10)    \u0026nbsp;\u0026nbsp;block_size (11_380_000, 10)  \r\n\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;C: shape (320, 10)                  \u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;block_size (10, 10)  \r\nTotal dataset size 291 GB.  \r\nVersion dislib-0.9\r\n\r\nAverage task execution time: 32 seconds\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/798?version=1","name":"Version 1","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"799","url":"https://workflowhub.eu/workflows/799","name":"COMPSs GPU K-Means","description":"**Name:** K-Means GPU Cache OFF  \r\n**Contact Person**: cristian.tatu@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: Minotauro-MN4  \r\n\r\nK-Means running on GPUs.    \r\nLaunched using 32 GPUs (16 nodes). Parameters used: K=40 and 32 blocks of size (1_000_000, 1200).  \r\nIt creates a block for each GPU. Total dataset shape is (32_000_000, 1200).  \r\nVersion dislib-0.9\r\n\r\nAverage task execution time: 194 seconds","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/799?version=1","name":"Final Version","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"800","url":"https://workflowhub.eu/workflows/800","name":"COMPSs GPU Cache K-Means","description":"**Name:** K-Means GPU Cache ON  \r\n**Contact Person**: cristian.tatu@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: Minotauro-MN4  \r\n\r\nK-Means running on the GPU leveraging COMPSs GPU Cache for deserialization speedup.  \r\nLaunched using 32 GPUs (16 nodes). Parameters used: K=40 and 32 blocks of size (1_000_000, 1200).  \r\nIt creates a block for each GPU. Total dataset shape is (32_000_000, 1200).  \r\nVersion dislib-0.9\r\n\r\nAverage task execution time: 16 seconds","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/800?version=1","name":"Final ","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"801","url":"https://workflowhub.eu/workflows/801","name":"COMPSs GPU DNN Distributed Training","description":"**Name:** Dislib Distributed Training - Cache OFF  \r\n**Contact Person**: cristian.tatu@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: Minotauro-MN4  \r\n\r\nPyTorch distributed training of CNN on GPU.  \r\nLaunched using 32 GPUs (16 nodes).  \r\nDataset: Imagenet  \r\nVersion dislib-0.9  \r\nVersion PyTorch 1.7.1+cu101  \r\n\r\nAverage task execution time: 84 seconds","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/801?version=1","name":"Version 1","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"802","url":"https://workflowhub.eu/workflows/802","name":"COMPSs GPU Cache DNN Distributed Training","description":"**Name:** Dislib Distributed Training - Cache ON  \r\n**Contact Person**: cristian.tatu@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: Minotauro-MN4  \r\n\r\nPyTorch distributed training of CNN on GPU and leveraging COMPSs GPU Cache for deserialization speedup.  \r\nLaunched using 32 GPUs (16 nodes).  \r\nDataset: Imagenet  \r\nVersion dislib-0.9  \r\nVersion PyTorch 1.7.1+cu101  \r\n\r\nAverage task execution time: 36 seconds","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/802?version=1","name":"Version 1","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"804","url":"https://workflowhub.eu/workflows/804","name":"My COMPSs Fibonacci Series","description":"Calculates the Fibonacci series up to a specified length.","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/804?version=1","name":"COMPSs version 3.3","author":[],"descriptor_type":[]}]},{"id":"807","url":"https://workflowhub.eu/workflows/807","name":"A workflow demonstrating the 'Run interpolation based on IDW' tool","description":"","organization":"Building spatio-temporal workflows in Galaxy","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/807?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"810","url":"https://workflowhub.eu/workflows/810","name":"BioDT Cultural Ecosystem Services prototype Digital Twin - Biodiversity Component","description":"This is the workflow for the biodiversity component of the cultural ecosystems digital twin","organization":"BioDT Use Case 4.1.1.2 Ecosystem services","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/810?version=1","name":"main @ 0961b1c","author":[],"descriptor_type":[]}]},{"id":"811","url":"https://workflowhub.eu/workflows/811","name":"BioDT Cultural Ecosystem Services prototype Digital Twin - Recreation Potential Component","description":"This is the workflow for the recreation potential component of the cultural ecosystems digital twin","organization":"BioDT Use Case 4.1.1.2 Ecosystem services","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/811?version=1","name":"main @ 0961b1c","author":[],"descriptor_type":[]}]},{"id":"813","url":"https://workflowhub.eu/workflows/813","name":"sanger-tol/treeval v1.1.0 - Ancient Aurora","description":"[![Cite with Zenodo](https://zenodo.org/badge/509096312.svg)](https://zenodo.org/doi/10.5281/zenodo.10047653)\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/treeval)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/treeval [1.1.0 - Ancient Aurora]** is a bioinformatics best-practice analysis pipeline for the generation of data supplemental to the curation of reference quality genomes. This pipeline has been written to generate flat files compatible with [JBrowse2](https://jbrowse.org/jb2/) as well as HiC maps for use in Juicebox, PretextView and HiGlass.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\nYou can also set up and attempt to run the pipeline here: https://gitpod.io/#https://github.com/BGAcademy23/treeval-curation\r\nThis is a gitpod set up for BGA23 with a version of TreeVal, although for now gitpod will not run a nextflow pipeline die to issues with using singularity. We will be replacing this with an AWS instance soon.\r\n\r\nThe treeval pipeline has a sister pipeline currently named [curationpretext](https://github.com/sanger-tol/curationpretext) which acts to regenerate the pretext maps and accessory files during genomic curation in order to confirm interventions. This pipeline is sufficiently different to the treeval implementation that it is written as it's own pipeline.\r\n\r\n1. Parse input yaml ( YAML_INPUT )\r\n2. Generate my.genome file ( GENERATE_GENOME )\r\n3. Generate insilico digests of the input assembly ( INSILICO_DIGEST )\r\n4. Generate gene alignments with high quality data against the input assembly ( GENE_ALIGNMENT )\r\n5. Generate a repeat density graph ( REPEAT_DENSITY )\r\n6. Generate a gap track ( GAP_FINDER )\r\n7. Generate a map of self complementary sequence ( SELFCOMP )\r\n8. Generate syntenic alignments with a closely related high quality assembly ( SYNTENY )\r\n9. Generate a coverage track using PacBio data ( LONGREAD_COVERAGE )\r\n10. Generate HiC maps, pretext and higlass using HiC cram files ( HIC_MAPPING )\r\n11. Generate a telomere track based on input motif ( TELO_FINDER )\r\n12. Run Busco and convert results into bed format ( BUSCO_ANNOTATION )\r\n13. Ancestral Busco linkage if available for clade ( BUSCO_ANNOTATION:ANCESTRAL_GENE )\r\n14. Count KMERs with FastK and plot the spectra using MerquryFK ( KMER )\r\n15. Generate a coverge track using KMER data ( KMER_READ_COVERAGE )\r\n\r\n## Usage\r\n\r\n\u003e **Note**\r\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how\r\n\u003e to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)\r\n\u003e with `-profile test` before running the workflow on actual data.\r\n\r\nCurrently, it is advised to run the pipeline with docker or singularity as a small number of major modules do not currently have a conda env associated with them.\r\n\r\nNow, you can run the pipeline using:\r\n\r\n```bash\r\n# For the FULL pipeline\r\nnextflow run main.nf -profile singularity --input treeval.yaml --outdir {OUTDIR}\r\n\r\n# For the RAPID subset\r\nnextflow run main.nf -profile singularity --input treeval.yaml -entry RAPID --outdir {OUTDIR}\r\n```\r\n\r\nAn example treeval.yaml can be found [here](assets/local_testing/nxOscDF5033.yaml).\r\n\r\nFurther documentation about the pipeline can be found in the following files: [usage](https://pipelines.tol.sanger.ac.uk/treeval/dev/usage), [parameters](https://pipelines.tol.sanger.ac.uk/treeval/dev/parameters) and [output](https://pipelines.tol.sanger.ac.uk/treeval/dev/output).\r\n\r\n\u003e **Warning:**\r\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\r\n\u003e provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\r\n\u003e see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).\r\n\r\n## Credits\r\n\r\nsanger-tol/treeval has been written by Damon-Lee Pointon (@DLBPointon), Yumi Sims (@yumisims) and William Eagles (@weaglesBio).\r\n\r\nWe thank the following people for their extensive assistance in the development of this pipeline:\r\n\r\n\u003cul\u003e\r\n  \u003cli\u003e@gq1 - For building the infrastructure around TreeVal and helping with code review\u003c/li\u003e\r\n  \u003cli\u003e@ksenia-krasheninnikova - For help with C code implementation and YAML parsing\u003c/li\u003e\r\n  \u003cli\u003e@mcshane - For guidance on algorithms \u003c/li\u003e\r\n  \u003cli\u003e@muffato - For code reviews and code support\u003c/li\u003e\r\n  \u003cli\u003e@priyanka-surana - For help with the majority of code reviews and code support\u003c/li\u003e\r\n\u003c/ul\u003e\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\n## Citations\r\n\r\n\u003c!--TODO: Citation--\u003e\r\n\r\nIf you use sanger-tol/treeval for your analysis, please cite it using the following doi: [10.5281/zenodo.10047653](https://doi.org/10.5281/zenodo.10047653).\r\n\r\n### Tools\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nYou can cite the `nf-core` publication as follows:\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/813?version=1","name":"v1.1.0","author":["Damon-Lee Pointon","William Eagles"],"descriptor_type":["NFL"]}]},{"id":"814","url":"https://workflowhub.eu/workflows/814","name":"Use Case 13: Novel Cell Surface Targets for Individual Cancer Patients Analyzed with Common Fund Datasets","description":"The input to this workflow is a data matrix of gene expression that was collected from a pediatric patient tumor patient from the KidsFirst Common Fund program [1]. The RNA-seq samples are the columns of the matrix, and the rows are the raw expression gene count for all human coding genes (Table 1). This data matrix is fed into TargetRanger [2] to screen for targets which are highly expressed in the tumor but lowly expressed across most healthy human tissues based on gene expression data collected from postmortem patients with RNA-seq by the GTEx Common Fund program [3]. Based on this analysis the gene IMP U3 small nucleolar ribonucleoprotein 3 (IMP3) was selected because it was the top candidate returned from the TargetRanger analysis (Tables 2-3). IMP3 is also commonly called insulin-like growth factor 2 mRNA-binding protein 3 (IGF2BP3). Next, we leverage unique knowledge from various other Common Fund programs to examine various functions and knowledge related to IMP3. First, we queried the LINCS L1000 data [4] from the LINCS program [5] converted into RNA-seq-like LINCS L1000 Signatures [6] using the SigCom LINCS API [7] to identify mimicker or reverser small molecules that maximally impact the expression of IMP3 in human cell lines (Fig. 1, Table 4). In addition, we also queried the LINCS L1000 data to identify single gene CRISPR knockouts that down-regulate the expression of IMP3 (Fig. 1, Table 5). These potential drug targets were filtered using the Common Fund IDG program's list of understudied proteins [8] to produce a set of additional targets (Table 6). Next, IMP3 was searched for knowledge provided by the with the Metabolomics Workbench MetGENE tool [9]. MetGENE aggregates knowledge about pathways, reactions, metabolites, and studies from the Metabolomics Workbench Common Fund supported resource [10]. The Metabolomics Workbench was searched to find associated metabolites linked to IMP3 [10]. Furthermore, we leveraged the Linked Data Hub API [11] to list knowledge about regulatory elements associated with IMP3 (Table 6). Finally, the GlyGen database [12] was queried to identify relevant sets of proteins that are the product of the IMP3 genes, as well as known post-translational modifications discovered on IMP3.\r\n\r\n1. Lonsdale, J. et al. The Genotype-Tissue Expression (GTEx) project. Nature Genetics vol. 45 580–585 (2013). doi:10.1038/ng.2653\r\n2. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328\r\n3. IDG Understudied Proteins, https://druggablegenome.net/AboutIDGProteinList\r\n4. MetGENE, https://sc-cfdewebdev.sdsc.edu/MetGENE/metGene.php\r\n5. The Metabolomics Workbench, https://www.metabolomicsworkbench.org/\r\n6. Linked Data Hub, https://ldh.genome.network/cfde/ldh/\r\n7. York, W. S. et al. GlyGen: Computational and Informatics Resources for Glycoscience. Glycobiology vol. 30 72–73 (2019). doi:10.1093/glycob/cwz080","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/814?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"815","url":"https://workflowhub.eu/workflows/815","name":"Stochastic Gravitational Wave Backgorund (SGWB) tool","description":"The tool provides a calculation of the power spectrum of Stochastic Gravitational Wave Backgorund (SGWB) from a first-order cosmological phase transition based on the parameterisations of Roper Pol et al. (2023). The power spectrum includes two components: from the sound waves excited by collisions of bubbles of the new phase and from the turbulence that is induced by these collisions.\r\n\r\nThe cosmological epoch of the phase transition is described by the temperature, T_star and by the number(s) of relativistic degrees of freedom, g_star that should be specified as parameters.\r\n\r\nThe phase transition itself is characterised by phenomenological parameters, alpha, beta_H and epsilon_turb, the latent heat, the ratio of the Hubble radius to the bubble size at percolation and the fraction of the energy otuput of the phase transition that goes into turbulence.\r\n\r\nThe product Model spectrum outputs the power spectrum for fixed values of these parameters. The product Phase transition parameters reproduces the constraints on the phase transition parameters from the Pulsar Timing Array gravitational wave detectors, reported by Boyer \u0026 Neronov (2024), including the estimate of the cosmological magnetic field induced by turbulence.\r\n","organization":"EuroScienceGateway, ODA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/815?version=1","name":"Version 1","author":["Denys Savchenko","Volodymyr Savchenko"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/815?version=2","name":"Version 2","author":["Denys Savchenko","Volodymyr Savchenko"],"descriptor_type":["GALAXY"]}]},{"id":"816","url":"https://workflowhub.eu/workflows/816","name":"COMPSs with Incrementation and Fibonacci series example","description":"**Name:** Incrementation and Fibonacci     \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\n**Brief Overview:** Demonstrates COMPSs task parallelism with increment and Fibonacci computations. Helps to understand COMPSs.\r\n\r\n**Detailed Description:**\r\n 1. Performs multiple increments of input values in parallel using COMPSs.\r\n 2. Concurrently calculates Fibonacci numbers using recursive COMPSs tasks.\r\n 3. Demonstrates task synchronization via `compss_wait_on`.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss src/increment_fibonacci.py value1 Value2 Value3 \r\n#add more values if you want\r\n\r\n```\r\n\r\n# Execution Examples\r\n```\r\nruncompss src/increment_fibonacci.py 1 4 3 9 6 9 \r\n\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/816?version=1","name":"COMPSs version 3.3","author":["Ashish Bhawel"],"descriptor_type":[]}]},{"id":"817","url":"https://workflowhub.eu/workflows/817","name":"Docker ABC MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/817?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/817?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/817?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"818","url":"https://workflowhub.eu/workflows/818","name":"Docker Amber Protein Ligand Complex MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/818?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/818?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/818?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"819","url":"https://workflowhub.eu/workflows/819","name":"Docker Amber Protein MD Setup tutorial","description":"# AMBER Protein MD Setup tutorials using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)** wrapping the **Ambertools MD package**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/819?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/819?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/819?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"820","url":"https://workflowhub.eu/workflows/820","name":"Docker Classical Molecular Interaction Potentials","description":"# CMIP tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of computing **classical molecular interaction potentials** from **protein structures**, step by step, using the **BioExcel Building Blocks library (biobb)**. Examples shown are **Molecular Interaction Potentials (MIPs) grids, protein-protein/ligand interaction potentials, and protein titration**. The particular structures used are the **Lysozyme** protein (PDB code [1AKI](https://www.rcsb.org/structure/1aki)), and a MD simulation of the complex formed by the **SARS-CoV-2 Receptor Binding Domain and the human Angiotensin Converting Enzyme 2** (PDB code [6VW1](https://www.rcsb.org/structure/6vw1)).\r\n\r\nThe code wrapped is the ***Classical Molecular Interaction Potentials (CMIP)*** code:\r\n\r\n**Classical molecular interaction potentials: Improved setup procedure in molecular dynamics simulations of proteins.**\r\n*Gelpí, J.L., Kalko, S.G., Barril, X., Cirera, J., de la Cruz, X., Luque, F.J. and Orozco, M. (2001)*\r\n*Proteins, 45: 428-437. [https://doi.org/10.1002/prot.1159](https://doi.org/10.1002/prot.1159)*\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/820?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/820?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/820?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"821","url":"https://workflowhub.eu/workflows/821","name":"Docker Structural DNA helical parameters tutorial","description":"# Structural DNA helical parameters from MD trajectory tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the [NAFlex](https://mmb.irbbarcelona.org/NAFlex) server and in particular in its [Nucleic Acids Analysis section](https://mmb.irbbarcelona.org/NAFlex/help.php?id=tutorialAnalysisNA).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **extracting structural and dynamical properties** from a **DNA MD trajectory helical parameters**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Drew Dickerson Dodecamer** sequence -CGCGAATTCGCG- (PDB code [1BNA](https://www.rcsb.org/structure/1BNA)). The trajectory used is a  500ns-long MD simulation taken from the [BigNASim](https://mmb.irbbarcelona.org/BIGNASim/) database ([NAFlex_DDD_II](https://mmb.irbbarcelona.org/BIGNASim/getStruc.php?idCode=NAFlex_DDD_II) entry).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/821?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/821?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/821?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"822","url":"https://workflowhub.eu/workflows/822","name":"Docker Protein conformational ensembles generation","description":"# Protein Conformational ensembles generation\r\n\r\n## Workflow included in the [ELIXIR 3D-Bioinfo](https://elixir-europe.org/communities/3d-bioinfo) Implementation Study:\r\n\r\n### Building on PDBe-KB to chart and characterize the conformation landscape of native proteins\r\n\r\nThis tutorial aims to illustrate the process of generating **protein conformational ensembles** from** 3D structures **and analysing its **molecular flexibility**, step by step, using the **BioExcel Building Blocks library (biobb)**.\r\n\r\n## Conformational landscape of native proteins\r\n**Proteins** are **dynamic** systems that adopt multiple **conformational states**, a property essential for many **biological processes** (e.g. binding other proteins, nucleic acids, small molecule ligands, or switching between functionaly active and inactive states). Characterizing the different **conformational states** of proteins and the transitions between them is therefore critical for gaining insight into their **biological function** and can help explain the effects of genetic variants in **health** and **disease** and the action of drugs.\r\n\r\n**Structural biology** has become increasingly efficient in sampling the different **conformational states** of proteins. The **PDB** has currently archived more than **170,000 individual structures**, but over two thirds of these structures represent **multiple conformations** of the same or related protein, observed in different crystal forms, when interacting with other proteins or other macromolecules, or upon binding small molecule ligands. Charting this conformational diversity across the PDB can therefore be employed to build a useful approximation of the **conformational landscape** of native proteins.\r\n\r\nA number of resources and **tools** describing and characterizing various often complementary aspects of protein **conformational diversity** in known structures have been developed, notably by groups in Europe. These tools include algorithms with varying degree of sophistication, for aligning the 3D structures of individual protein chains or domains, of protein assemblies, and evaluating their degree of **structural similarity**. Using such tools one can **align structures pairwise**, compute the corresponding **similarity matrix**, and identify ensembles of **structures/conformations** with a defined **similarity level** that tend to recur in different PDB entries, an operation typically performed using **clustering** methods. Such workflows are at the basis of resources such as **CATH, Contemplate, or PDBflex** that offer access to **conformational ensembles** comprised of similar **conformations** clustered according to various criteria. Other types of tools focus on differences between **protein conformations**, identifying regions of proteins that undergo large **collective displacements** in different PDB entries, those that act as **hinges or linkers**, or regions that are inherently **flexible**.\r\n\r\nTo build a meaningful approximation of the **conformational landscape** of native proteins, the **conformational ensembles** (and the differences between them), identified on the basis of **structural similarity/dissimilarity** measures alone, need to be **biophysically characterized**. This may be approached at **two different levels**. \r\n- At the **biological level**, it is important to link observed **conformational ensembles**, to their **functional roles** by evaluating the correspondence with **protein family classifications** based on sequence information and **functional annotations** in public databases e.g. Uniprot, PDKe-Knowledge Base (KB). These links should provide valuable mechanistic insights into how the **conformational and dynamic properties** of proteins are exploited by evolution to regulate their **biological function**. \u003cbr\u003e\u003cbr\u003e\r\n\r\n- At the **physical level** one needs to introduce **energetic consideration** to evaluate the likelihood that the identified **conformational ensembles** represent **conformational states** that the protein (or domain under study) samples in isolation. Such evaluation is notoriously **challenging** and can only be roughly approximated by using **computational methods** to evaluate the extent to which the observed **conformational ensembles** can be reproduced by algorithms that simulate the **dynamic behavior** of protein systems. These algorithms include the computationally expensive **classical molecular dynamics (MD) simulations** to sample local thermal fluctuations but also faster more approximate methods such as **Elastic Network Models** and **Normal Node Analysis** (NMA) to model low energy **collective motions**. Alternatively, **enhanced sampling molecular dynamics** can be used to model complex types of **conformational changes** but at a very high computational cost. \r\n\r\nThe **ELIXIR 3D-Bioinfo Implementation Study** *Building on PDBe-KB to chart and characterize the conformation landscape of native proteins* focuses on:\r\n\r\n1. Mapping the **conformational diversity** of proteins and their homologs across the PDB. \r\n2. Characterize the different **flexibility properties** of protein regions, and link this information to sequence and functional annotation.\r\n3. Benchmark **computational methods** that can predict a biophysical description of protein motions.\r\n\r\nThis notebook is part of the third objective, where a list of **computational resources** that are able to predict **protein flexibility** and **conformational ensembles** have been collected, evaluated, and integrated in reproducible and interoperable workflows using the **BioExcel Building Blocks library**. Note that the list is not meant to be exhaustive, it is built following the expertise of the implementation study partners.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/822?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/822?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/822?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"823","url":"https://workflowhub.eu/workflows/823","name":"Docker Macromolecular Coarse-Grained Flexibility tutorial","description":"# Macromolecular Coarse-Grained Flexibility (FlexServ) tutorial using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorial aims to illustrate the process of generating protein conformational ensembles from 3D structures and analysing its molecular flexibility, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/823?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/823?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/823?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"824","url":"https://workflowhub.eu/workflows/824","name":"Docker Protein Conformational Transitions calculations tutorial","description":"# Protein Conformational Transitions calculations tutorial using BioExcel Building Blocks (biobb) and GOdMD\r\n\r\nThis tutorial aims to illustrate the process of computing a conformational transition between two known structural conformations of a protein, step by step, using the BioExcel Building Blocks library (biobb).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/824?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/824?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/824?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"825","url":"https://workflowhub.eu/workflows/825","name":"Docker GMX Notebook Automatic Ligand Parameterization tutorial","description":"# Automatic Ligand parameterization tutorial using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **ligand parameterization** for a **small molecule**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Sulfasalazine** protein (3-letter code SAS), used to treat rheumatoid arthritis, ulcerative colitis, and Crohn's disease.\r\n\r\n**OpenBabel and ACPype** packages are used to **add hydrogens, energetically minimize the structure**, and **generate parameters** for the **GROMACS** package. With *Generalized Amber Force Field (GAFF) forcefield and AM1-BCC* charges.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/825?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/825?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/825?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"826","url":"https://workflowhub.eu/workflows/826","name":"Docker Protein MD Setup tutorial","description":"# Protein MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/lysozyme/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation** system containing a **protein**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **Lysozyme** protein (PDB code 1AKI).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/826?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/826?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/826?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"827","url":"https://workflowhub.eu/workflows/827","name":"Docker Mutation Free Energy Calculations","description":"# Mutation Free Energy Calculations using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\n**Based on the official [pmx tutorial](http://pmx.mpibpc.mpg.de/sardinia2018_tutorial1/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate how to compute a **fast-growth mutation free energy** calculation, step by step, using the BioExcel **Building Blocks library (biobb)**. The particular example used is the **Staphylococcal nuclease** protein (PDB code 1STN), a small, minimal protein, appropriate for a short tutorial.\r\n\r\nThe **non-equilibrium free energy calculation** protocol performs a **fast alchemical transition** in the direction **WT-\u003eMut** and back **Mut-\u003eWT**. The two equilibrium trajectories needed for the tutorial, one for **Wild Type (WT)** and another for the **Mutated (Mut)** protein (Isoleucine 10 to Alanine -I10A-), have already been generated and are included in this example. We will name **WT as stateA** and **Mut as stateB**.\r\n\r\n![](https://raw.githubusercontent.com/bioexcel/biobb_wf_pmx_tutorial/master/biobb_wf_pmx_tutorial/notebooks/schema.png)\r\n\r\nThe tutorial calculates the **free energy difference** in the folded state of a protein. Starting from **two 1ns-length independent equilibrium simulations** (WT and mutant), snapshots are selected to start **fast (50ps) transitions** driving the system in the **forward** (WT to mutant) and **reverse** (mutant to WT) directions, and the **work values** required to perform these transitions are collected. With these values, **Crooks Gaussian Intersection** (CGI), **Bennett Acceptance Ratio** (BAR) and **Jarzynski estimator** methods are used to calculate the **free energy difference** between the two states.\r\n\r\n*Please note that for the sake of disk space this tutorial is using 1ns-length equilibrium trajectories, whereas in the [original example](http://pmx.mpibpc.mpg.de/sardinia2018_tutorial1/eq.mdp) the equilibrium trajectories used were obtained from 10ns-length simulations.*\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/827?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/827?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/827?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"828","url":"https://workflowhub.eu/workflows/828","name":"Docker Protein Ligand Complex MD Setup tutorial","description":"# Protein Ligand Complex MD Setup tutorial using BioExcel Building Blocks (biobb)\r\n\r\n**Based on the official [GROMACS tutorial](http://www.mdtutorials.com/gmx/complex/index.html).**\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **setting up a simulation system** containing a **protein in complex with a ligand**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular example used is the **T4 lysozyme** L99A/M102Q protein (PDB code 3HTB), in complex with the **2-propylphenol** small molecule (3-letter Code JZ4). \r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/828?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/828?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/828?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"829","url":"https://workflowhub.eu/workflows/829","name":"Docker Molecular Structure Checking","description":"# Molecular Structure Checking using BioExcel Building Blocks (biobb)\r\n\r\n***\r\n\r\nThis tutorial aims to illustrate the process of **checking** a **molecular structure** before using it as an input for a **Molecular Dynamics** simulation. The workflow uses the **BioExcel Building Blocks library (biobb)**. The particular structure used is the crystal structure of **human Adenylate Kinase 1A (AK1A)**, in complex with the **AP5A inhibitor** (PDB code [1Z83](https://www.rcsb.org/structure/1z83)).  \r\n\r\n**Structure checking** is a key step before setting up a protein system for **simulations**. A number of **common issues** found in structures at **Protein Data Bank** may compromise the success of the **simulation**, or may suggest that longer **equilibration** procedures are necessary.\r\n\r\nThe **workflow** shows how to:\r\n\r\n- Run **basic manipulations on structures** (selection of models, chains, alternative locations\r\n- Detect and fix **amide assignments** and **wrong chiralities**\r\n- Detect and fix **protein backbone** issues (missing fragments, and atoms, capping)\r\n- Detect and fix **missing side-chain atoms**\r\n- **Add hydrogen atoms** according to several criteria\r\n- Detect and classify **atomic clashes**\r\n- Detect possible **disulfide bonds (SS)**\r\n\r\nAn implementation of this workflow in a **web-based Graphical User Interface (GUI)** can be found in the [https://mmb.irbbarcelona.org/biobb-wfs/](https://mmb.irbbarcelona.org/biobb-wfs/) server (see [https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check](https://mmb.irbbarcelona.org/biobb-wfs/help/create/structure#check)).\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/829?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/829?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/829?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"830","url":"https://workflowhub.eu/workflows/830","name":"Docker Protein-ligand Docking tutorial (Fpocket)","description":"# Protein-ligand Docking tutorials using BioExcel Building Blocks (biobb)\r\n\r\nThis tutorials aim to illustrate the process of **protein-ligand docking**, step by step, using the **BioExcel Building Blocks library (biobb)**. The particular examples used are based on the **Mitogen-activated protein kinase 14** (p38-α) protein (PDB code [3HEC](https://www.rcsb.org/structure/3HEC)), a well-known **Protein Kinase enzyme**,\r\n in complex with the FDA-approved **Imatinib** (PDB Ligand code [STI](https://www.rcsb.org/ligand/STI), DrugBank Ligand Code [DB00619](https://go.drugbank.com/drugs/DB00619)) and **Dasatinib** (PDB Ligand code [1N1](https://www.rcsb.org/ligand/1N1), DrugBank Ligand Code [DB01254](https://go.drugbank.com/drugs/DB01254)), small **kinase inhibitors** molecules used to treat certain types of **cancer**.\r\n\r\nThe tutorials will guide you through the process of identifying the **active site cavity** (pocket) without previous knowledge, and the final prediction of the **protein-ligand complex**.\r\n\r\n***\r\n\r\n## Copyright \u0026 Licensing\r\nThis software has been developed in the [MMB group](http://mmb.irbbarcelona.org) at the [BSC](http://www.bsc.es/) \u0026 [IRB](https://www.irbbarcelona.org/) for the [European BioExcel](http://bioexcel.eu/), funded by the European Commission (EU H2020 [823830](http://cordis.europa.eu/projects/823830), EU H2020 [675728](http://cordis.europa.eu/projects/675728)).\r\n\r\n* (c) 2015-2026 [Barcelona Supercomputing Center](https://www.bsc.es/)\r\n* (c) 2015-2026 [Institute for Research in Biomedicine](https://www.irbbarcelona.org/)\r\n\r\nLicensed under the\r\n[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), see the file LICENSE for details.\r\n\r\n![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png \"Bioexcel\")","organization":"BioBB Building Blocks","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/830?version=1","name":"Version 1","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/830?version=2","name":"Version 2","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/830?version=3","name":"Version 3","author":["Adam Hospital","Genís Bayarri"],"descriptor_type":[]}]},{"id":"831","url":"https://workflowhub.eu/workflows/831","name":"SGWB model spectrum","description":"This workflow provides a calculaiton of the power spectrum of Stochastic Gravitational Wave Backgorund (SGWB) from a first-order cosmological phase transition based on the parameterisations of Roper Pol et al. (2023). The power spectrum includes two components: from the sound waves excited by collisions of bubbles of the new phase and from the turbulence that is induced by these collisions.\r\n\r\nThe cosmological epoch of the phase transition is described by the temperature, T_star and by the number(s) of relativistic degrees of freedom, g_star that should be specified as parameters.\r\n\r\nThe phase transition itself is characterised by phenomenological parameters, alpha, beta_H and epsilon_turb, the latent heat, the ratio of the Hubble radius to the bubble size at percolation and the fraction of the energy otuput of the phase transition that goes into turbulence.\r\n\r\n\r\n","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/831?version=1","name":"Version 1","author":["Andrii Neronov"],"descriptor_type":["GALAXY"]}]},{"id":"833","url":"https://workflowhub.eu/workflows/833","name":"Library curation BOLD","description":"![Perl CI](https://github.com/FabianDeister/Library_curation_BOLD/actions/workflows/ci.yml/badge.svg)\r\n[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10975576.svg)](https://doi.org/10.5281/zenodo.10975576)\r\n\r\n# Library curation BOLD\r\n\r\n![alt text](https://github.com/FabianDeister/Library_curation_BOLD/blob/main/doc/IBOL_LOGO_TRANSPARENT.png?raw=true)\r\n\r\nThis repository contains scripts and synonymy data for pipelining the \r\nautomated curation of [BOLD](https://boldsystems.org) data dumps in \r\nBCDM TSV format. The goal is to implement the classification of barcode \r\nreference sequences as is being developed by the \r\n[BGE](https://biodiversitygenomics.eu) consortium. A living document\r\nin which these criteria are being developed is located\r\n[here](https://docs.google.com/document/d/18m-7UnoJTG49TbvTsq_VncKMYZbYVbau98LE_q4rQvA/edit).\r\n\r\nA further goal of this project is to develop the code in this repository\r\naccording to the standards developed by the community in terms of automation,\r\nreproducibility, and provenance. In practice, this means including the\r\nscripts in a pipeline system such as [snakemake](https://snakemake.readthedocs.io/),\r\nadopting an environment configuration system such as\r\n[conda](https://docs.conda.io/), and organizing the folder structure\r\nin compliance with the requirements of\r\n[WorkFlowHub](https://workflowhub.eu/). The latter will provide it with \r\na DOI and will help generate [RO-crate](https://www.researchobject.org/ro-crate/)\r\ndocuments, which means the entire tool chain is FAIR compliant according\r\nto the current state of the art.\r\n\r\n## Install\r\nClone the repo:\r\n```{shell}\r\ngit clone https://github.com/FabianDeister/Library_curation_BOLD.git\r\n```\r\nChange directory: \r\n```{shell}\r\ncd Library_curation_BOLD\r\n```\r\nThe code in this repo depends on various tools. These are managed using\r\nthe `mamba` program (a drop-in replacement of `conda`). The following\r\nsets up an environment in which all needed tools are installed:\r\n\r\n```{shell}\r\nmamba env create -f environment.yml\r\n```\r\n\r\nOnce set up, this is activated like so:\r\n\r\n```{shell}\r\nmamba activate bold-curation\r\n```\r\n\r\n## How to run\r\n### Bash\r\nAlthough the aim of this project is to integrate all steps of the process\r\nin a simple snakemake pipeline, at present this is not implemented. Instead,\r\nthe steps are executed individually on the command line as perl scripts\r\nwithin the conda/mamba environment. Because the current project has its own\r\nperl modules in the `lib` folder, every script needs to be run with the \r\nadditional include flag to add the module folder to the search path. Hence,\r\nthe invocation looks like the following inside the scripts folder:\r\n\r\n```{shell}\r\nperl -I../../lib scriptname.pl -arg1 val1 -arg2 val2\r\n```\r\n### snakemake\r\n\r\nFollow the installation instructions above.\r\n\r\nUpdate config/config.yml to define your input data.\r\n\r\nNavigate to the directory \"workflow\" and type:\r\n```{shell}\r\nsnakemake -p -c {number of cores} target\r\n```\r\n\r\nIf running on an HPC cluster with a SLURM scheduler you could use a bash script like this one:\r\n```{shell}\r\n#!/bin/bash\r\n#SBATCH --partition=hour\r\n#SBATCH --output=job_curate_bold_%j.out\r\n#SBATCH --error=job_curate_bold_%j.err\r\n#SBATCH --mem=24G\r\n#SBATCH --cpus-per-task=2\r\n\r\nsource activate bold-curation\r\n\r\nsnakemake -p -c 2 target\r\n\r\necho Complete!\r\n```\r\n","organization":"Biodiversity Genomics Europe (general)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/833?version=1","name":"main @ 4a78148","author":[],"descriptor_type":["SMK"]}]},{"id":"836","url":"https://workflowhub.eu/workflows/836","name":"Parabricks-Genomics-nf","description":"Parabricks-Genomics-nf is a GPU-enabled pipeline for alignment and germline short variant calling for short read sequencing data. The pipeline utilises [NVIDIA's Clara Parabricks](https://docs.nvidia.com/clara/parabricks/4.2.0/index.html) toolkit to dramatically speed up the execution of best practice bioinformatics tools. Currently, this pipeline is **configured specifically for [NCI's Gadi HPC](https://nci.org.au/our-systems/hpc-systems)**. \r\n\r\nNVIDIA's Clara Parabricks can deliver a significant speed improvement over traditional CPU-based methods, and is designed to be used only with NVIDIA GPUs. This pipeline is suitable for population screening projects as it executes Parabrick's implementations of BWA mem for short read alignment and Google's DeepVariant for short variant calling. Additionally, it uses standard CPU implementations of data quality evaluation tools [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) and [MultiQC](https://multiqc.info/) and [DNAnexus' GLnexus](https://academic.oup.com/bioinformatics/article/36/24/5582/6064144) for scalable gVCF merging and joint variant calling. Optionally, [Variant Effect Predictor (VEP)](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0974-4) can be run for variant annotation. \r\n","organization":"Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/836?version=1","name":"main @ 8eea47c","author":["Georgina Samaha"],"descriptor_type":["NFL"]}]},{"id":"837","url":"https://workflowhub.eu/workflows/837","name":"ProGFASTAGen - Protein-Graph FASTA Generation (and Identification) Workflows","description":"# ProGFASTAGen\r\n\r\nThe ProGFASTAGen (**Pro**tein-**G**raph-**FASTA**-**Gen**erator or **Pro**t**G**raph-**FASTA**-**Gen**erator) repository contains workflows to generate so-called precursor-specific-FASTAs (using the precursors from MGF-files) including feature-peptides, like VARIANTs or CONFLICTs if desired, or global-FASTAs (as described in [ProtGraph](https://github.com/mpc-bioinformatics/ProtGraph)). The single workflow scripts have been implemented with [Nextflow-DSL-2](https://www.nextflow.io/docs/latest/dsl2.html) and are independent to each other. Each of these workflows can be used on their own or can be imported to other workflows for other use-cases. Further, we included three main-workflows, to show how the single workflows can be chained together. The `main_workflow_protein_fasta.nf`-workflow converts Thermo-RAW-files into MGF, searches with Comet (and Percolator) and the identification results are then further summarized. The workflows `main_workflow_global_fasta.nf` and `main_workflow_precursor_specific_fasta.nf` generate specific FASTA-files before search-engine-identification. Below are example nextflow-calls, which can be used.\r\n\r\nRegarding the precursor-specific-FASTA-generation: The source-code of the C++ implementation for traversal can be found in `bin`. There, four implementations are present: `Float/Int`-Versions as well as `DryRun/VarLimitter`-Versions of the traversal. The `Float/Int`-Versions can be faster/slower depending on th processor-architecture and can be used via a flag in the `create_precursor_specific_fasta.nf`-workflow. The `DryRun`-Version does not generate a FASTA but tests the used system (depending on a query-timeout) to determine the maximum number of variants which can be used, while not timing out. The actual FASTA-generation happens in the `VarLimitter`-Version using the generated protein-graphs at hand.\r\n\r\nin **Prerequisites** a small description of dependencies and how to set up the host system is given. **Individual steps** describes the single workflows and how they can be called, while **Main Workflow Scripts** shows example-calls of the main workflows. In **Regenerate Results from Publication**, the calls and parameters are shown, which were used in the publication. Using the same FASTA or UniProt flat file format with a similar server-setting should yield similar results as used in the publication.\r\n\r\n## Prerequisites\r\n\r\n### Executing on Linux\r\n\r\nThis workflow can be only executed on linux (tested on Ubuntu 22.04 and ArchLinux). Before setting up the `bin`-folder, some requiered binaries need to be present on the OS. (Focusing on Ubuntu:) The following packages need to be installed on Ubuntu (via `apt`), if not already:\r\n\r\n```text\r\nbuild-essential\r\nwget\r\ncurl\r\nunzip\r\ncmake\r\nmono-complete\r\npython3-pip (or any environment with Python3, where pip is available)\r\npython-is-python3 (needed for ubuntu, so that python points to python3)\r\n```\r\n\r\nIf all packages are installed (and the python environment is set up), the setup-script needs to be executed, which downloads needed dependencies and compiles the source-code located in the `bin`-folder:\r\n\r\n```shell\r\nchmod +x compile_and_setup_depencies.sh  # In case this file is not executable\r\n./compile_and_setup_depencies.sh  # Downloads dependencies, compiles the C++-implementation and sets all binaries in the bin-folder as executable\r\n```\r\n\r\nIf the script exits without errors, the provided workflows can be executed with the command `nextflow`.\r\n\r\n### Executing in Docker\r\n\r\nAlternatively, docker can be used. For this, please follow the [installation guide](https://docs.docker.com/engine/install/ubuntu/) for docker. After installing docker, a local docker-container can be build with all needed dependencies for the workflows. We provide a `Dockerfile` in the `docker`-folder. To build it, execute (while beeing with a shell in the root-folder of this repository) the following:\r\n\r\n```shell\r\ndocker build -t progfastagen:local . -f docker/Dockerfile\r\n```\r\n\r\nThis command builds a local docker container, tagging it with `progfastagen:local`, which can be later used by nextflow. To use it with nextflow, make sure that `nextflow` is installed on the host-system. For each of the workflow example calls below, the `-with-docker progfastagen:local` then needs to be appended, to let `nextflow` know to use the local docker-container.\r\n\r\n## Individual Steps\r\n\r\nEach step has been implemented in such a way, that it can be executed on its own. Each subsection below, provides a brief overview and an example call of the required parameters to demonstrate how the workflow can be called. If you are interested for all the available parameters within a workflow and want modify or tune them, then please refer to the source of the workflows, where each parameter is described briefly.\r\n\r\n### Converting RAW-files to MGF\r\n\r\nThe workflow `convert_to_mgf.nf` is a wrapper around the ThermoRawFileParser and converts RAW-files to the MGF-format. The `ctm_raws` parameter needs to be set, in order to generate the MGF-files:\r\n\r\n```text\r\nnextflow run convert_to_mgf.nf \\\r\n    --ctm_raws \u003c Folder containing RAW-files \u003e \\\r\n    --ctm_outdir \u003c Output-Folder, where the MGFs should be stored \u003e\r\n```\r\n\r\n### Generating a Precursor-Specific-FASTA\r\n\r\nThe workflow `create_precursor_specific_fasta.nf` generates a precursor-specific-FASTA-file, tailored to a set of MGF-files. Here, Protein-Graphs are generated, using the UniProt flat file format (which can be downloaded from [UniProt](https://www.uniprot.org/) by selecting `Text` as format) and a python script prepares the queries, by extracting the MS2-precursors from the MGF-files (using a tolerance, in ppm). Using the Protein-Graphs and a `DryRun`-Version of the traversal, the maximum-variant-limits are determined for each Protein-Graph (and mass-query-range) using a binary-search. These limits are then used for the actual ms2-specific-FASTA-generation in conjunction with the extracted MS2-precursors and a compacted FASTA is returned, which is tailored to the MGF-files.\r\n\r\nAltough of the complexity, the workflow only requires the following parameters to generate such a FASTA:\r\n\r\n```text\r\nnextflow run create_precursor_specific_fasta.nf \\\r\n    --cmf_mgf_files \u003c Folder containing MGF-files \u003e \\\r\n    --cmf_sp_embl_file \u003c Path to a SP-EMBL-File (UniProt flat file format) \u003e \\\r\n    --cmf_outdir \u003cThe Output-Folder where the traversal-limits are saved and the ms2-specific-FASTA is stored \u003e\r\n```\r\n\r\nThe optional parameter: `cmf_pg_additional_params` is added to ProtGraph directly, allowing every parameter, ProtGraph provides to be set there (e.g. useful if the digestion should be changed or features/PTMs should be included/excluded, etc...), allowing arbitrary settings to generate Protein-Graphs if desired. It defaults to use all features, ProtGraph can parse.\r\n\r\n**Note regarding PTMs/Tolerance**: The FASTA is tailored to the MS2-precursors, therefore variable and fixed modifications need to be set to the same settings as for the actual identification. This workflow defaults to carbamidomethylation (C, fixed) and oxidation (M, variable). See ProtGraph (and the workflow-parameter `cmf_pg_additional_params`) to set the PTMs accordingly in the Protein-Graphs. The same applies for the MS2-precursor-tolereance which can be set with `cmf_query_ppm` and defaults to `5ppm`.\r\n\r\n**Note regarding Limits**: This workflows defaults to allow up to 5 seconds per query and limits peptides to contain at most 5 variants (with a maximum of 5000 Da per peptide), resulting into FASTA-files which can be 15-200GB large (depending on dataset and species). Changing these settings can drastically increase/decrease the runtime/memory usage/disk usage. We advise to change those settings slightly and to pay attention on the runtime/memory usage/disk usage if run with the newly set limits (and dataset + species) the first time.\r\n\r\n**Note regarding identification**: If digestion is enabled (default is `Trypsin`), the resulting FASTA contains already digested entries, thus searching with a search-engine, the digestion should be set to `off/no_cut`.\r\n\r\n### Generating a Global-FASTA\r\n\r\nThis workflow generates a so called global-FASTA, using ProtGraph, the UniProt flat file format and some global limits for writing out peptides/proteins. Global-FASTAs can be generated with the `create_global_fasta.nf`-workflow. To generate a global-FASTA, only a path to a single SP-EMBL-file (UniProt flat file format) is required. Such a file can be downloaded from [UniProt](https://www.uniprot.org/) directly, by selecting `Text` instead of `FASTA` as the download format.\r\n\r\n```text\r\nnextflow run create_global_fasta.nf \\\r\n    --cgf_sp_embl_file \u003c Path to a SP-EMBL-File (UniProt flat file format) \u003e \\\r\n    --cgf_outdir \u003c The output-folder, where the gloabl-FASTA and some Protein-Graph-statistics should be saved \u003e\r\n```\r\n\r\nPer default, this workflow does not export feature-peptides and is set to only export peptides with up to 5000 Da mass and maximum of two miscleavages. It is possible to generate global-FASTA with some specific features (like containing, `SIGNAL`, `PEPTIDE` or others) and other limits. The parameters `cgf_features_in_graphs` and `cgf_peptide_limits` can be set accordingly. These are added to ProtGraph directly, hence every parameter ProtGraph provides, can be set here (including different digestion settings).\r\n\r\n**Note**: A dry run with ProtGraph to generate statistics how many peptide would be theoretically exported is advised prior for testing. Some Protein-Graphs with some features (e.g. P53 using variants) can contain to many peptides, which could result to very long runtimes and huge FASTAs.\r\n\r\n**Note regarding identification**: If digestion is enabled (default is `Trypsin`), the resulting FASTA contains already digested entries, thus searching with a search-engine, the digestion should be set to `off/no_cut`.\r\n\r\n### Identification via Coment (and Percolator)\r\n\r\nWe provide an identification workflow to showcase, that the generated FASTAs can be used with search-engines. The workflow `identification_via_comet.nf` identifies MGF-files individually, using custom search-settings for Comet (and if desired rescores the results with Percolator), applies an FDR-cutoff using the q-value (for each file) and exposes the identification results into an output-folder.\r\n\r\nThree parameters are required, to execute the workflow:\r\n\r\n1. The MGFs which should be identified\r\n2. The Comet-Parameter file to set the search-settings\r\n3. The FASTA-file which should be used for identification\r\n\r\nBelow is an example call with all required parameters (Percolator is enabled by default):\r\n\r\n```text\r\nnextflow run identification_via_comet.nf \\\r\n    --idc_mgf_folder \u003c Folder containing MGF-files \u003e \\\r\n    --idc_fasta_file \u003c The FASTA which should be used for identification \u003e \\\r\n    --idc_search_parameter_file \u003c The Comet-Parameters file (Search Configuration) \u003e \\\r\n    --idc_outdir \u003c Output-Folder where the results of the identification files are stored \u003e\r\n```\r\n\r\nHere is another example call with all required parameters (this time, turning Percolator off):\r\n\r\n```text\r\nnextflow run identification_via_comet.nf \\\r\n    --idc_mgf_folder \u003c Folder containing MGF-files \u003e \\\r\n    --idc_fasta_file \u003c The FASTA which should be used for identification \u003e \\\r\n    --idc_search_parameter_file \u003c The Comet-Parameters file (Search Configuration) \u003e \\\r\n    --idc_outdir \u003c Output-Folder where the results of the identification files are stored \u003e \\\r\n    --idc_use_percolator 0\r\n```\r\n\r\n**Note**: This identification-workflow defaults to an FDR-cutoff (q-value) of `--idc_fdr \"0.01\"`, reporting only 1% filtered PSMs. Arbitrary and multiple FDR-cutoffs can be set and can be changed to the desired value.\r\n\r\n### Summarization of results\r\n\r\nThe `summarize_ident_results.nf`-workflow genereates convenient summarization of the identification results. Here, the identification-results are binned into 4 groups:\r\n\r\n1. Unique PSMs (a match, which can only originate from one protein)\r\n2. Shared PSMs (a match, which can originate from multiple proteins)\r\n3. Unique Feature PSMs (as 1., but only containing peptides, which can be explained by a features)\r\n4. Shared Feature PSMs (as 2., but only can be explained by features from all originating proteins)\r\n\r\nFurthermore, heatmaps are generated to provide an overview of found peptides across all MGFs/RAW-files.\r\n\r\nTo call this method, a `glob` needs to be specified in this workflow:\r\n\r\n```text\r\nnextflow run summarize_ident_results.nf \\\r\n    --sir_identified_files_glob \u003c The glob matching the desired output from the identification results \u003e\r\n    --sir_outdir \u003c The output directory where the summarized results should be saved \u003e\r\n```\r\n\r\nIn case, the identification workflow was executed using an FDR of 0.01, you could use the following `glob`:\r\n\r\n```text\r\nnextflow run summarize_ident_results.nf \\\r\n    --sir_identified_files_glob \"\u003cPath_to_folder\u003e/*qvalue_no_decoys_fdr_0.01.tsv\"\r\n    --sir_outdir \u003c The output directory where the summarized results should be saved \u003e\r\n```\r\n\r\n**Note**: This step can be used only if specific columns are present in the tables. Furthermore, it distinguishes between the identification results from a FASTA by UniProt or by ProtGraph. The additional parameters control, whether to bin results in group 3 and 4, decide if variable modifications should be considered as unique, as well as if a peptide, which originates multiple times to the same protein should be considered as unique. The main-workflows set these parameters accordingly and can be used as an example.\r\n\r\n## Main Workflow Scripts\r\n\r\nEach individual step described above, is also imported and chained into three main-workflows:\r\n\r\n1. `main_workflow_protein_fasta.nf` (UniProt-FASTA-search)\r\n2. `main_workflow_global_fasta.nf` (Generation of a global-FASTA and search)\r\n3. `main_workflow_precursor_specific_fasta.nf` (Generation of a precursor-specific-FASTA and search)\r\n\r\ngenerating summarized identification results across multiple RAW-files.\r\n\r\nIn each of these workflows, it is possible to modify the parameters of the imported subworkflows, by using the imported subworkflows parameters directly (as shown in the **Individual Steps** above).\r\n\r\nFor protein-FASTA identification, only three parameters are required:\r\n\r\n```text\r\nnextflow run main_workflow_protein_fasta.nf \\\r\n    --main_fasta_file \u003c The FASTA-file, to be used for identification \u003e \\\r\n    --main_raw_files_folder \u003c The folder containing RAW-files \u003e \\\r\n    --main_comet_params \u003c The parameters file for comet (for identification) \u003e \\\r\n    --main_outdir \u003c Output-Folder where all the results from the workflows should be saved \u003e\r\n```\r\n\r\nThis is also true for the other two workflows, where instead of a FASTA-file, the UniProt flat file format needs to be provided. Such a file can be downloaded from [UniProt](https://www.uniprot.org/) directly, by selecting the format `Text` instead of the format `FASTA`.\r\n\r\nHere are the correpsonding calls for global-FASTA and precurosr-specific-FASTA generation and identification:\r\n\r\n```text\r\n# global-FASTA\r\nnextflow run main_workflow_global_fasta.nf \\\r\n    --main_sp_embl_file \u003c The SP-EMBL-file used for Protein-Graph- and FASTA-generation (UniProt flat file format) \u003e \\\r\n    --main_raw_files_folder \u003c The folder containing RAW-files \u003e \\\r\n    --main_comet_params\u003c The parameters file for comet (for identification) \u003e \\\r\n    --main_outdir \u003c Output-Folder where all the results from the workflows should be saved \u003e\r\n\r\n# precursor-specific-FASTA\r\nnextflow run main_workflow_precursor_specific_fasta.nf \\\r\n    --main_sp_embl_file \u003c The SP-EMBL-file used for Protein-Graph- and FASTA-generation (UniProt flat file format) \u003e \\\r\n    --main_raw_files_folder \u003c The folder containing RAW-files \u003e \\\r\n    --main_comet_params \u003c The parameters file for comet (for identification) \u003e \\\r\n    --main_outdir \u003c Output-Folder where all the results from the workflows should be saved \u003e\r\n```\r\n\r\n**Note**: Only defining the required parameters, uses the default parameters for every other setting. For all workflows, this would mean, that the FDR-cutoff (q-value) is set to `0.01` resulting into both FDRs considered. Furthermore, the global-FASTA and precursor-specific-FASTA workflows assume Trypsin digestion. For the global-FASTA-workflow, no features are exported by default, which may not be desired, if someone whishes to search for peptide-features (like `SIGNAL`, etc..). For the precursor-specific-FASTA-workflow, the PTMs carbamidomethylation (C, fixed) and oxidation (M, variable) are assumed, which may need to be modified.\r\n\r\n**Note regarding example calls**: Further below you can find the calls as used in the publication. These set the most minimal parameters for a correct execution on custom datasets and can be used as an example.\r\n\r\n## Regenerate Results from Publication\r\n\r\nIn this subsection you can find the nextflow-calls which were used to execute the 3 workflows. Executing this with the same UniProt flat file/FASTA-file should yield the similar/same results. For generated precursor-specific-FASTAs it may happen, that these are generated with slightly different variant-limits, therefore a slightly different FASTA to search with and slightly different identification results.\r\n\r\nThe FASTA/UniProt flat file used for identification can be found [here](https://cloud.mpc.rub.de/s/LJ2bgGNmsxzSaod). The Comet configuration files are provided in the `example_configuration`-folder. The datasets can be retrieved from [PRIDE](https://www.ebi.ac.uk/pride/).\r\n\r\n### PXD002171\r\n\r\n```shell\r\n# PXD002171 Precursor-Specific\r\nnextflow run main_workflow_precursor_specific_fasta.nf \\\r\n    -with-report \"PXD002171_results_precursor_specific/nextflow_report.html\" \\\r\n    -with-timeline \"PXD002171_results_precursor_specific/nextflow_timeline.html\" \\\r\n    --main_sp_embl_file 20230619_homo_sapiens_proteome.txt \\\r\n    --main_raw_files_folder PXD002171 \\\r\n    --main_comet_params example_configurations/PXD002171_no_dig.txt \\\r\n    --main_outdir PXD002171_results_precursor_specific \\\r\n    --cmf_max_precursor_da 5000 \\\r\n    --cmf_query_ppm 5 \\\r\n    --cmf_timeout_for_single_query 5 \\\r\n    --cmf_maximum_variant_limit 5 \\\r\n    --cmf_pg_additional_params \"-ft VARIANT -ft SIGNAL -ft INIT_MET -ft CONFLICT -ft VAR_SEQ -ft PEPTIDE -ft PROPEP -ft CHAIN -vm 'M:15.994915' -vm 'C:71.037114'\" \\\r\n    --idc_fdr \"0.01\"\r\n    \r\n# PXD002171 Global digested FASTA\r\nnextflow run main_workflow_global_fasta.nf \\\r\n    -with-report \"PXD002171_global_fasta/nextflow_report.html\" \\\r\n    -with-timeline \"PXD002171_global_fasta/nextflow_timeline.html\" \\\r\n    --main_sp_embl_file 20230619_homo_sapiens_proteome.txt \\\r\n    --main_raw_files_folder PXD002171 \\\r\n    --main_comet_params example_configurations/PXD002171_no_dig.txt \\\r\n    --main_outdir PXD002171_global_fasta \\\r\n    --cgf_features_in_graphs \"-ft None\" \\\r\n    --cgf_peptide_limits \"--pep_miscleavages 2 --pep_min_pep_length 5\" \\\r\n    --idc_fdr \"0.01\"\r\n\r\n# PXD002171 Protein FASTA\r\nnextflow run main_workflow_protein_fasta.nf \\\r\n    -with-report \"PXD002171_protein_fasta/nextflow_report.html\" \\\r\n    -with-timeline \"PXD002171_protein_fasta/nextflow_timeline.html\" \\\r\n    --main_fasta_file 20230619_homo_sapiens_proteome.fasta \\\r\n    --main_raw_files_folder PXD002171 \\\r\n    --main_comet_params example_configurations/PXD002171_trypsin_dig.txt \\\r\n    --main_outdir PXD002171_protein_fasta \\\r\n    --idc_fdr \"0.01\"\r\n```\r\n\r\n### PXD028605\r\n\r\n```shell\r\n# PXD028605 Precursor-Specific\r\nnextflow run main_workflow_precursor_specific_fasta.nf \\\r\n    -with-report \"PXD028605_results_precursor_specific/nextflow_report.html\" \\\r\n    -with-timeline \"PXD028605_results_precursor_specific/nextflow_timeline.html\" \\\r\n    --main_sp_embl_file 20230619_homo_sapiens_proteome.txt \\\r\n    --main_raw_files_folder PXD028605 \\\r\n    --main_comet_params example_configurations/PXD028605_no_dig.txt \\\r\n    --main_outdir PXD028605_results_precursor_specific \\\r\n    --cmf_max_precursor_da 5000 \\\r\n    --cmf_query_ppm 20 \\\r\n    --cmf_timeout_for_single_query 5 \\\r\n    --cmf_maximum_variant_limit 5 \\\r\n    --cmf_pg_additional_params \"-ft VARIANT -ft SIGNAL -ft INIT_MET -ft CONFLICT -ft VAR_SEQ -ft PEPTIDE -ft PROPEP -ft CHAIN -fm 'C:57.021464' -vm 'M:15.9949'\" \\\r\n    --idc_fdr \"0.01\"\r\n\r\n# PXD028605 Global digested FASTA\r\nnextflow run main_workflow_global_fasta.nf \\\r\n    -with-report \"PXD028605_global_fasta/nextflow_report.html\" \\\r\n    -with-timeline \"PXD028605_global_fasta/nextflow_timeline.html\" \\\r\n    --main_sp_embl_file 20230619_homo_sapiens_proteome.txt \\\r\n    --main_raw_files_folder PXD028605 \\\r\n    --main_comet_params example_configurations/PXD028605_no_dig.txt \\\r\n    --main_outdir PXD028605_global_fasta \\\r\n    --cgf_features_in_graphs \"-ft None\" \\\r\n    --cgf_peptide_limits \"--pep_miscleavages 2 --pep_min_pep_length 5\" \\\r\n    --idc_fdr \"0.01\"\r\n\r\n# PXD028605 Protein FASTA\r\nnextflow run main_workflow_protein_fasta.nf \\\r\n    -with-report \"PXD028605_protein_fasta/nextflow_report.html\" \\\r\n    -with-timeline \"PXD028605_protein_fasta/nextflow_timeline.html\" \\\r\n    --main_fasta_file 20230619_homo_sapiens_proteome.fasta \\\r\n    --main_raw_files_folder PXD028605 \\\r\n    --main_comet_params example_configurations/PXD028605_trypsin_dig.txt \\\r\n    --main_outdir PXD028605_protein_fasta \\\r\n    --idc_fdr \"0.01\"\r\n```\r\n","organization":"Medizinisches Proteom-Center, Medical Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/837?version=1","name":"main @ 2384d36","author":["Dominik Lux","Julian Uszkoreit"],"descriptor_type":["NFL"]}]},{"id":"838","url":"https://workflowhub.eu/workflows/838","name":"PyCOMPSs Matrix Multiplication, out-of-core using files, reproducibility example","description":"**Name:** Matrix multiplication with Files, reproducibility example  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nMatrix multiplication is a binary operation that takes a pair of matrices and produces another matrix.\r\n\r\nIf A is an n×m matrix and B is an m×p matrix, the result AB of their multiplication is an n×p matrix defined only if the number of columns m in A is equal to the number of rows m in B. When multiplying A and B, the elements of the rows in A are multiplied with corresponding columns in B.\r\n\r\nIn this implementation, A and B are square matrices (same number of rows and columns), and so it is the result matrix C. Each matrix is divided in N blocks of M doubles (N hardcoded to 2, and M hardcoded to 8). The multiplication of two blocks is done by a multiply task method with a simple three-nested-loop implementation. When executed with COMPSs, the main program generates N^3^ tasks arranged as N^2^ chains of N tasks in the dependency graph.\r\n\r\n# Reproducibility\r\nTo reproduce the exact results of this example, follow the instructions at the [Workflow Provenance section at COMPSs User Manual](https://compss-doc.readthedocs.io/en/stable/Sections/05_Tools/04_Workflow_Provenance.html), WITH data persistence, PyCOMPSs application\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python src/matmul_files.py inputs_folder/ outputs_folder/\r\n```\r\n\r\nwhere:\r\n* inputs_folder/: Folder where A and B matrices are located\r\n* outputs_folder/: Folder with the resulting C matrix\r\n\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python src/matmul_files.py inputs/ outputs/\r\nruncompss src/matmul_files.py inputs/ outputs/\r\npython -m pycompss src/matmul_files.py inputs/ outputs/\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/838?version=1","name":"COMPSs 3.3.1","author":["Raül Sirvent"],"descriptor_type":[]}]},{"id":"839","url":"https://workflowhub.eu/workflows/839","name":"PyCOMPSs Matrix Multiplication, out-of-core using files, MareNostrum V, reproducibility example, without data persistence","description":"**Name:** Matrix multiplication with Files, reproducibility example, without data persistence\r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n\r\n# Description\r\nMatrix multiplication is a binary operation that takes a pair of matrices and produces another matrix.\r\n\r\nIf A is an n×m matrix and B is an m×p matrix, the result AB of their multiplication is an n×p matrix defined only if the number of columns m in A is equal to the number of rows m in B. When multiplying A and B, the elements of the rows in A are multiplied with corresponding columns in B.\r\n\r\nIn this implementation, A and B are square matrices (same number of rows and columns), and so it is the result matrix C. Each matrix is divided in N blocks of M doubles (N hardcoded to 2, and M hardcoded to 8). The multiplication of two blocks is done by a multiply task method with a simple three-nested-loop implementation. When executed with COMPSs, the main program generates N^3^ tasks arranged as N^2^ chains of N tasks in the dependency graph.\r\n\r\n# Reproducibility\r\nTo reproduce the exact results of this example, follow the instructions at the [Workflow Provenance section at COMPSs User Manual](https://compss-doc.readthedocs.io/en/stable/Sections/05_Tools/04_Workflow_Provenance.html), WITHOUT data persistence, PyCOMPSs application.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --lang=python src/matmul_files.py inputs_folder/ outputs_folder/\r\n```\r\n\r\nwhere:\r\n* inputs_folder/: Folder where A and B matrices are located\r\n* outputs_folder/: Folder with the resulting C matrix\r\n\r\n\r\n# Execution Examples\r\n```\r\nruncompss --lang=python src/matmul_files.py inputs/ outputs/\r\nruncompss src/matmul_files.py inputs/ outputs/\r\npython -m pycompss src/matmul_files.py inputs/ outputs/\r\n```\r\n\r\n# Build\r\nNo build is required\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/839?version=1","name":"COMPSs 3.3.1","author":["Raül Sirvent"],"descriptor_type":[]}]},{"id":"873","url":"https://workflowhub.eu/workflows/873","name":"Tango: Numerical reconciliation of bacterial fermentation in cheese production","description":"Complete workflow for TANGO as reported in Lecomte et al (2024),\r\n\"Revealing the dynamics and mechanisms of bacterial interactions in\r\ncheese production with metabolic modelling\", Metabolic Eng. 83:24-38\r\nhttps://doi.org/10.1016/j.ymben.2024.02.014\r\n\r\n1. Parameters for individual models are obtained by optimization\r\n2. Individual dynamics and community dynamics are simulated\r\n3. Figures for the manuscript are assembled from the results.","organization":"MISTIC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/873?version=1","name":"Version 0.9.0","author":[],"descriptor_type":["CWL"]}]},{"id":"874","url":"https://workflowhub.eu/workflows/874","name":"JAX NGS Operations Nextflow DSL2 Pipelines","description":"[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11068736.svg)](https://doi.org/10.5281/zenodo.11068736)\r\n\r\n# JAX NGS Operations Nextflow DSL2 Pipelines\r\n\r\nThis repository contains production bioinformatic analysis pipelines for a variety of bulk 'omics data analysis. Please see the [Wiki documentation](https://github.com/TheJacksonLaboratory/cs-nf-pipelines/wiki) associated with this repository for all documentation and available analysis workflows.\r\n","organization":"Jackson Laboratory NGS-Ops","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/874?version=1","name":"main @ 07d4c11","author":[],"descriptor_type":["NFL"]}]},{"id":"875","url":"https://workflowhub.eu/workflows/875","name":"Repeat masking - TSI","description":"This is part of a series of workflows to annotate a genome, tagged with `TSI-annotation`. \r\nThese workflows are based on command-line code by Luke Silver, converted into Galaxy Australia workflows. \r\n\r\nThe workflows can be run in this order: \r\n* Repeat masking\r\n* RNAseq QC and read trimming\r\n* Find transcripts\r\n* Combine transcripts\r\n* Extract transcripts\r\n* Convert formats\r\n* Fgenesh annotation\r\n\r\n****\r\n\r\nWorkflow information:\r\n* Input = genome.fasta.\r\n* Outputs = soft_masked_genome.fasta, hard_masked_genome.fasta, and table of repeats found. \r\n* Runs RepeatModeler with default settings, uses the output of this (repeat library) as input into RepeatMasker. \r\n* Runs RepeatMasker with default settings except for: Skip masking of simple tandem repeats and low complexity regions. (-nolow) : default set to yes.  Perform softmasking instead of hardmasking - set to yes. \r\n* Converts the soft-masked genome to hard-masked for for use in other tools if required. \r\n* Workflow report displays an edited table of repeats found. Note: a known bug is that sometimes the workflow report text resets to default text. To restore, look for an earlier workflow version with correct workflow report text, and copy and paste report text into current version.\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/875?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/875?version=2","name":"Version 1.1","author":["Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/875?version=3","name":"Version 3","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"876","url":"https://workflowhub.eu/workflows/876","name":"QC and trimming of RNAseq reads - TSI","description":"This is part of a series of workflows to annotate a genome, tagged with `TSI-annotation`. \r\nThese workflows are based on command-line code by Luke Silver, converted into Galaxy Australia workflows. \r\n\r\nThe workflows can be run in this order: \r\n* Repeat masking\r\n* RNAseq QC and read trimming\r\n* Find transcripts\r\n* Combine transcripts\r\n* Extract transcripts\r\n* Convert formats\r\n* Fgenesh annotation\r\n\r\n****\r\n\r\nAbout this workflow:\r\n\r\n* Repeat this workflow separately for datasets from different tissues. \r\n* Inputs = collections of R1 files, and R2 files (all from a single tissue type). \r\n* Runs FastQC with default settings, separately for raw reads R1 and R2 collections; all output to MultiQC. \r\n* Runs Trimmomatic with initial ILLUMINACLIP step (using standard adapter sequence for TruSeq3 paired-ended), uses settings SLIDINGWINDOW:4:5 LEADING:5 TRAILING:5 MINLEN:25, retain paired (not unpaired) outputs. User can modify at runtime. \r\n* Runs FastQC with default settings, separately for trimmed R1 and R2 collections; all output to MultiQC. \r\n* From Trimmomatic output: concatenate all R1 reads; concatenate all R2 reads. \r\n* Outputs = trimmed merged R1 file, trimmed merged R2 file. \r\n* Log files from Trimmomatic to MultiQC, to summarise trimming results. \r\n* Note: a known bug with MultiQC html output is that plot is labelled as \"R1\" reads, when it actually contains information from both R1 and R2 read sets - this is under investigation (and is due to a Trimmomatic output file labelling issue). \r\n* MultiQC results table formatted to show % of reads retained after trimming, table included in workflow report. \r\n* Note: a known bug is that sometimes the workflow report text resets to default text. To restore, look for an earlier workflow version with correct workflow report text, and copy and paste report text into current version. ","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/876?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"877","url":"https://workflowhub.eu/workflows/877","name":"Find transcripts - TSI","description":"This is part of a series of workflows to annotate a genome, tagged with `TSI-annotation`. \r\nThese workflows are based on command-line code by Luke Silver, converted into Galaxy Australia workflows. \r\n\r\nThe workflows can be run in this order: \r\n* Repeat masking\r\n* RNAseq QC and read trimming\r\n* Find transcripts\r\n* Combine transcripts\r\n* Extract transcripts\r\n* Convert formats\r\n* Fgenesh annotation\r\n\r\n****\r\n\r\nAbout this workflow:\r\n\r\n* Run this workflow per tissue. \r\n* Inputs: masked_genome.fasta and the trimmed RNAseq reads (R1 and R2) from one type of tissue. \r\n* Index genome and align reads to genome with HISAT2, with default settings except for: Advanced options: spliced alignment options: specify options: Transcriptome assembly reporting: selected option: Report alignments tailored for transcript assemblers including StringTie (equivalent to -dta flag). \r\n* Runs samtools sort to sort bam by coordinate. \r\n* Runs StringTie to generate gtf from sorted bam. \r\n* Output: transcripts.gtf from a single tissue.","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/877?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"878","url":"https://workflowhub.eu/workflows/878","name":"Combine transcripts - TSI","description":"This is part of a series of workflows to annotate a genome, tagged with `TSI-annotation`. \r\nThese workflows are based on command-line code by Luke Silver, converted into Galaxy Australia workflows. \r\n\r\nThe workflows can be run in this order: \r\n* Repeat masking\r\n* RNAseq QC and read trimming\r\n* Find transcripts\r\n* Combine transcripts\r\n* Extract transcripts\r\n* Convert formats\r\n* Fgenesh annotation\r\n\r\n****\r\n\r\nAbout this workflow:\r\n\r\n* Inputs: multiple transcriptome.gtfs from different tissues, genome.fasta, coding_seqs.fasta, non_coding_seqs.fasta \r\n* Runs StringTie merge to combine transcriptomes, with default settings except for -m = 30 and -F = 0.1, to produce a merged_transcriptomes.gtf. \r\n* Runs Convert GTF to BED12 with default settings, to produce a merged_transcriptomes.bed. \r\n* Runs bedtools getfasta with default settings except for -name = yes, -s = yes, -split - yes, to produce a merged_transcriptomes.fasta\r\n* Runs CPAT to generate seqs with high coding probability. \r\n* Filters out non-coding seqs from the merged_transcriptomes.fasta\r\n* Output: filtered_merged_transcriptomes.fasta","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/878?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/878?version=2","name":"v1.1","author":["Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/878?version=3","name":"v1.2","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"879","url":"https://workflowhub.eu/workflows/879","name":"Extract transcripts - TSI","description":"This is part of a series of workflows to annotate a genome, tagged with `TSI-annotation`. \r\nThese workflows are based on command-line code by Luke Silver, converted into Galaxy Australia workflows. \r\n\r\nThe workflows can be run in this order: \r\n* Repeat masking\r\n* RNAseq QC and read trimming\r\n* Find transcripts\r\n* Combine transcripts\r\n* Extract transcripts\r\n* Convert formats\r\n* Fgenesh annotation\r\n\r\n****\r\n\r\nAbout this workflow:\r\n\r\n* Input: merged_transcriptomes.fasta. \r\n* Runs TransDecoder to produce longest_transcripts.fasta\r\n* (Runs both the LongOrfs and Predict parts together. Default settings except Long Orfs options: -m =20)\r\n* Runs Busco on output. ","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/879?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"880","url":"https://workflowhub.eu/workflows/880","name":"Convert formats - TSI","description":"This is part of a series of workflows to annotate a genome, tagged with `TSI-annotation`. \r\nThese workflows are based on command-line code by Luke Silver, converted into Galaxy Australia workflows. \r\n\r\nThe workflows can be run in this order: \r\n* Repeat masking\r\n* RNAseq QC and read trimming\r\n* Find transcripts\r\n* Combine transcripts\r\n* Extract transcripts\r\n* Convert formats\r\n* Fgenesh annotation\r\n\r\n****\r\n\r\nAbout this workflow:\r\n\r\n* Inputs: transdecoder-peptides.fasta, transdecoder-nucleotides.fasta\r\n* Runs many steps to convert outputs into the formats required for Fgenesh - .pro, .dat and .cdna","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/880?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/880?version=2","name":"v1.1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"881","url":"https://workflowhub.eu/workflows/881","name":"Fgenesh annotation -TSI","description":"# Fgenesh Annotation - TSI Workflow Description\r\n\r\n## Overview\r\nOne of a series of workflows to annotate a genome, tagged `TSI-annotation`. Based on command-line code by Luke Silver, converted into Galaxy Australia workflows.\r\n\r\n## Workflow Sequence\r\nRun in this order:\r\n- Repeat masking\r\n- RNAseq QC and read trimming\r\n- Find transcripts\r\n- Combine transcripts\r\n- Extract transcripts\r\n- Convert formats\r\n- **Fgenesh annotation** (this workflow)\r\n\r\n## Inputs Required\r\n\r\n**Files uploaded by the user:**\r\n- `assembled_genome.fasta` — the assembled genome\r\n- `hard_masked_genome.fasta` — hard repeat-masked genome\r\n- `mRNA_sequences.fasta` — known mRNAs in Fgenesh header format (typically the output of the upstream \"Convert formats\" workflow). Optional — see \"Running without mRNA\" below.\r\n\r\n**Selected at runtime (dropdowns / tick-boxes, not uploads):**\r\n- Closely-related species (Fgenesh species matrix, from those installed on Galaxy Australia)\r\n- Mammal or non-mammal\r\n- NR database (for Fgenesh get proteins)\r\n- BUSCO lineage\r\n- Licence agreement (tick to accept Fgenesh terms)\r\n\r\n## Running without mRNA\r\nIf no known mRNA sequences are available, edit the Fgenesh-annotate step's mRNA option to \"no\" and disconnect the mRNA input.\r\n\r\n## Processing Steps\r\nSplits the input genomes into single sequences (to reduce runtime), annotates each with Fgenesh++, and merges the outputs.\r\n\r\n## Outputs\r\n- Genome annotation (GFF3)\r\n- Annotation stats\r\n- FASTA files of mRNAs, cDNAs and proteins\r\n- BUSCO report of proteins\r\n\r\n## Key Note\r\nThe sequences passed to the mRNA/cDNA extraction tools are the unmasked assembly; there may be situations where the masked version is preferable.\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/881?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/881?version=2","name":"Version 2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/881?version=3","name":"Version 2.1","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/881?version=4","name":"Version 2.2","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/881?version=5","name":"Version 3","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/881?version=6","name":"Version 3.1","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/881?version=7","name":"Version 3.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"882","url":"https://workflowhub.eu/workflows/882","name":"beacon-workflow","description":"# beacon-omop-worker-workflows","organization":"TRE-FX","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/882?version=1","name":"main @ b08dc69","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/882?version=2","name":"main @ 17afa9b","author":[],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/882?version=3","name":"main @ 8f28313","author":[],"descriptor_type":["CWL"]}]},{"id":"883","url":"https://workflowhub.eu/workflows/883","name":"Training a CNN model for classification of transcriptional subtypes and survival prediction in glioblastoma","description":"# GBMatch_CNN\r\nWork in progress...\r\nPredicting TS \u0026amp; risk from glioblastoma whole slide images\r\n\r\n# Reference\r\nUpcoming paper: stay tuned...\r\n\r\n# Dependencies\r\npython 3.7.7\r\n\r\nrandaugment by Khrystyna Faryna: https://github.com/tovaroe/pathology-he-auto-augment\r\n\r\ntensorflow 2.1.0\r\n\r\nscikit-survival 0.13.1\r\n\r\npandas 1.0.3\r\n\r\nlifelines 0.25.0\r\n\r\n# Description\r\nThe pipeline implemented here predicts transcriptional subtypes and survival of glioblastoma patients based on H\u0026E stained whole slide scans. Sample data is provided in this repository. To test the basic functionality with 5-fold-CV simply run train_model_OS.py (for survival) or train_model_TS.py (for transcriptional subtypes). Please note that this will not reproduce the results from the manuscript, as only a small fraction of the image data can be provided in this repository due to size constraints. In order to reproduce the results from the manuscript, please refer to the step by step guide below. The whole dataset can be accessed at https://www.medical-epigenomics.org/papers/GBMatch/.\r\nIf you wish to adopt this pipeline for your own use, please be sure to set the correct parameters in config.py.\r\n\r\nMoreover, we provide a fully trained model in gbm_predictor.py for predicting new samples (supported WSI formats are ndpi and svs). To use GBMPredictor, simply initialize by calling \r\n`gbm_predictor = GBMPredictor()`\r\nand predict your sample by calling\r\n`(predicted_TS, risk_group, median_riskscore) = gbm_predictor.predict(*path_to_slidescan*)`\r\nHeatmaps and detailed results will be automatically saved in a subfolder in your sample path.\r\n\r\n# Reproducing the manuscript results - step by step guide\r\n\r\n## Training the CNN model\r\n1. Clone this repository and install the dependencies in your environment. Make sure that the path for randaugment is correctly set in the config.py (should be correct by default).\r\n2. Download all included image tiles at https://doi.org/10.5281/zenodo.8358673 and replace the data/training/image_tiles folder with the image_tiles folder from zenodo.\r\n3. Run train_model_OS.py and/or train_model_TS.py to reproduce the training with 5-fold cross validation. Models and results will be saved in the data/models folder.\r\n4. Run train_final_model_OS.py and/or train_final_model_TS.py to train the final model on the whole training dataset.\r\n\r\n## Validate the CNN model on TCGA data\r\n1. Download scans and clinical data of the TCGA glioblastoma cohort from https://www.cbioportal.org/ and/or https://portal.gdc.cancer.gov/\r\n2. Copy tumor segmentations from GBMatch_CNN/data/validation/segmentation into the same folder as the TCGA slide scans\r\n3. Predict TCGA samples with gbm_predictor (see above).\r\n(You can also find all prediction results in GBMatch_CNN/data/validation/TCGA_annotation_prediction.csv.)\r\n\r\n## Evaluation of the tumor microenvironment\r\n1. Install qupath 0.3.0 (newer versions should also work): https://qupath.github.io/.\r\n2. Download immunohistochemical slides from https://www.medical-epigenomics.org/papers/GBMatch/.\r\n3. Download annotation (IHC_geojsons) from https://doi.org/10.5281/zenodo.8358673.\r\n4. Create a new project and import all immunohistochemical slides \u0026 annotations.\r\n5. Copy the CD34 and HLA-DR thresholder from GBMatch_CNN/qupath into your project.\r\n6. Run GBMatch_CNN/qupath/IHC_eval.groovy for all slides - immunohistochemistry results will be saved to a IHC_results-folder.\r\n7. Create a new project and import all HE image tiles.\r\n8. Run GBMatch_CNN/qupath/cellularity.groovy for all slides - cellularity results will be saved to a HE-results-folder.\r\n","organization":"BRAIN - Biomedical Research on Adult Intracranial Neoplasms","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/883?version=1","name":"main @ 9b4911f","author":["Thomas Roetzer-Pejrimovsky"],"descriptor_type":[]}]},{"id":"885","url":"https://workflowhub.eu/workflows/885","name":"GSC (Genotype Sparse Compression)","description":"# GSC (Genotype Sparse Compression)\r\nGenotype Sparse Compression (GSC) is an advanced tool for lossless compression of VCF files, designed to efficiently store and manage VCF files in a compressed format. It accepts VCF/BCF files as input and utilizes advanced compression techniques to significantly reduce storage requirements while ensuring fast query capabilities. In our study, we successfully compressed the VCF files from the 1000 Genomes Project (1000Gpip3), consisting of 2504 samples and 80 million variants, from an uncompressed VCF file of 803.70GB to approximately 1GB.\r\n\r\n## Requirements \r\n### GSC requires:\r\n\r\n- **Compiler Compatibility**: GSC requires a modern C++14-ready compiler, such as:\r\n  - g++ version 10.1.0 or higher\r\n\r\n- **Build System**: Make build system is necessary for compiling GSC.\r\n\r\n- **Operating System**: GSC supports 64-bit operating systems, including:\r\n  - Linux (Ubuntu 18.04)\r\n  \r\n## Installation\r\nTo download, build and install GSC use the following commands.\r\n```bash\r\ngit clone https://github.com/luo-xiaolong/GSC.git\r\ncd GSC\r\nmake\r\n```\r\nTo clean the GSC build use:\r\n```bash\r\nmake clean\r\n```\r\n## Usage\r\n```bash\r\nUsage: gsc [option] [arguments] \r\nAvailable options: \r\n        compress - compress VCF/BCF file\r\n        decompress     - query and decompress to VCF/BCF file\r\n```\r\n- Compress the input VCF/BCF file\r\n```bash\r\nUsage of gsc compress:\r\n\r\n        gsc compress [options] [--in [in_file]] [--out [out_file]]\r\n\r\nWhere:\r\n\r\n        [options]              Optional flags and parameters for compression.\r\n        -i,  --in [in_file]    Specify the input file (default: VCF or VCF.GZ). If omitted, input is taken from standard input (stdin).\r\n        -o,  --out [out_file]  Specify the output file. If omitted, output is sent to standard output (stdout).\r\n\r\nOptions:\r\n\r\n        -M,  --mode_lossly     Choose lossy compression mode (lossless by default).\r\n        -b,  --bcf             Input is a BCF file (default: VCF or VCF.GZ).\r\n        -p,  --ploidy [X]      Set ploidy of samples in input VCF to [X] (default: 2).\r\n        -t,  --threads [X]     Set number of threads to [X] (default: 1).\r\n        -d,  --depth [X]       Set maximum replication depth to [X] (default: 100, 0 means no matches).\r\n        -m,  --merge [X]       Specify files to merge, separated by commas (e.g., -m chr1.vcf,chr2.vcf), or '@' followed by a file containing a list of VCF files (e.g., -m @file_with_IDs.txt). By default, all VCF files are compressed.\r\n```\r\n- Decompress / Query\r\n```bash\r\nUsage of gsc decompress and query:\r\n\r\n        gsc decompress [options] --in [in_file] --out [out_file]\r\n\r\nWhere:\r\n        [options]              Optional flags and parameters for compression.\r\n        -i,  --in [in_file]    Specify the input file . If omitted, input is taken from standard input (stdin).\r\n        -o,  --out [out_file]  Specify the output file (default: VCF). If omitted, output is sent to standard output (stdout).\r\n\r\nOptions:\r\n\r\n    General Options:\r\n\r\n        -M,  --mode_lossly      Choose lossy compression mode (default: lossless).\r\n        -b,  --bcf              Output a BCF file (default: VCF).\r\n\r\n    Filter options (applicable in lossy compression mode only): \r\n\r\n        -r,  --range [X]        Specify range in format [start],[end] (e.g., -r 4999756,4999852).\r\n        -s,  --samples [X]      Samples separated by comms (e.g., -s HG03861,NA18639) OR '@' sign followed by the name of a file with sample name(s) separated by whitespaces (for exaple: -s @file_with_IDs.txt). By default all samples/individuals are decompressed. \r\n        --header-only           Output only the header of the VCF/BCF.\r\n        --no-header             Output without the VCF/BCF header (only genotypes).\r\n        -G,  --no-genotype      Don't output sample genotypes (only #CHROM, POS, ID, REF, ALT, QUAL, FILTER, and INFO columns).\r\n        -C,  --out-ac-an        Write AC/AN to the INFO field.\r\n        -S,  --split            Split output into multiple files (one per chromosome).\r\n        -I, [ID=^]              Include only sites with specified ID (e.g., -I \"ID=rs6040355\").\r\n        --minAC [X]             Include only sites with AC \u003c= X.\r\n        --maxAC [X]             Include only sites with AC \u003e= X.\r\n        --minAF [X]             Include only sites with AF \u003e= X (X: 0 to 1).\r\n        --maxAF [X]             Include only sites with AF \u003c= X (X: 0 to 1).\r\n        --min-qual [X]          Include only sites with QUAL \u003e= X.\r\n        --max-qual [X]          Include only sites with QUAL \u003c= X.\r\n```\r\n## Example\r\nThere is an example VCF/VCF.gz/BCF file, `toy.vcf`/`toy.vcf.gz`/`toy.bcf`, in the toy folder, which can be used to test GSC\r\n### compress\r\n\r\n#### lossless compression:\r\nThe input file format is VCF. You can compress a VCF file in lossless mode using one of the following methods:\r\n1. **Explicit input and output file parameters**:\r\n   \r\n   Use the `--in` option to specify the input VCF file and the `--out` option for the output compressed file.\r\n   ```bash\r\n   ./gsc compress --in toy/toy.vcf --out toy/toy_lossless.gsc\r\n   ```\r\n2. **Input file parameter and output redirection**:\r\n   \r\n   Use the `--out` option for the output compressed file and redirect the input VCF file into the command.\r\n   ```bash\r\n   ./gsc compress --out toy/toy_lossless.gsc \u003c toy/toy.vcf\r\n   ```\r\n3. **Output file redirection and input file parameter**:\r\n   \r\n   Specify the input VCF file with the `--in` option and redirect the output to create the compressed file.\r\n   ```bash\r\n   ./gsc compress --in toy/toy.vcf \u003e toy/toy_lossless.gsc\r\n   ```\r\n4. **Input and output redirection**:\r\n   \r\n   Use shell redirection for both input and output. This method does not use the `--in` and `--out` options.\r\n   ```bash\r\n   ./gsc compress \u003c toy/toy.vcf \u003e toy/toy_lossless.gsc\r\n   ```\r\nThis will create a file:\r\n* `toy_lossless.gsc` - The compressed archive of the entire VCF file.\r\n\r\n#### lossy compression:\r\n\r\nThe input file format is VCF. The commands are similar to those used for lossless compression, with the addition of the `-M` parameter to enable lossy compression.\r\n\r\n   For example, to compress a VCF file in lossy mode:\r\n\r\n   ```bash\r\n   ./gsc compress -M --in toy/toy.vcf --out toy/toy_lossy.gsc\r\n   ```\r\n   Or using redirection:\r\n   ```bash\r\n   ./gsc compress -M --out toy/toy_lossy.gsc \u003c toy/toy.vcf\r\n   ``` \r\n   This will create a file:\r\n   * `toy_lossy.gsc` - The compressed archive of the entire VCF file is implemented with lossy compression. It only retains the 'GT' subfield within the INFO and FORMAT fields, and excludes all other subfields..\r\n    \r\n### Decompress   (The commands are similar to those used for compression)\r\nlossless decompression:\r\n\r\nTo decompress the compressed toy_lossless.gsc into a VCF file named toy_lossless.vcf:\r\n```bash\r\n./gsc decompress --in toy/toy_lossless.gsc --out toy/toy_lossless.vcf\r\n```\r\nlossy decompression:\r\n\r\nTo decompress the compressed toy_lossy.gsc into a VCF file named toy_lossy.vcf:\r\n```bash\r\n./gsc decompress -M --in toy/toy_lossy.gsc --out toy/toy_lossy.vcf\r\n```\r\n## Dockerfile\r\nDockerfile can be used to build a Docker image with all necessary dependencies and GSC compressor. The image is based on Ubuntu 18.04. To build a Docker image and run a Docker container, you need Docker Desktop (https://www.docker.com). Example commands (run it within a directory with Dockerfile):\r\n```bash\r\ndocker build -t gsc_project .\r\ndocker run -it gsc_project\r\n```\r\n## Citations\r\n- **bio.tools ID**: `gsc_genotype_sparse_compression`\r\n- **Research Resource Identifier (RRID)**: `SCR_025071`\r\n","organization":"Genome Data Compression Team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/885?version=1","name":"master @ f9e0b1e","author":[],"descriptor_type":["CWL"]}]},{"id":"887","url":"https://workflowhub.eu/workflows/887","name":"GSC (Genotype Sparse Compression)","description":"# GSC (Genotype Sparse Compression)\r\nGenotype Sparse Compression (GSC) is an advanced tool for lossless compression of VCF files, designed to efficiently store and manage VCF files in a compressed format. It accepts VCF/BCF files as input and utilizes advanced compression techniques to significantly reduce storage requirements while ensuring fast query capabilities. In our study, we successfully compressed the VCF files from the 1000 Genomes Project (1000Gpip3), consisting of 2504 samples and 80 million variants, from an uncompressed VCF file of 803.70GB to approximately 1GB.\r\n\r\n## Requirements \r\n### GSC requires:\r\n\r\n- **Compiler Compatibility**: GSC requires a modern C++14-ready compiler, such as:\r\n  - g++ version 10.1.0 or higher\r\n\r\n- **Build System**: Make build system is necessary for compiling GSC.\r\n\r\n- **Operating System**: GSC supports 64-bit operating systems, including:\r\n  - Linux (Ubuntu 18.04)\r\n  \r\n## Installation\r\nTo download, build and install GSC use the following commands.\r\n```bash\r\ngit clone https://github.com/luo-xiaolong/GSC.git\r\ncd GSC\r\nmake\r\n```\r\nTo clean the GSC build use:\r\n```bash\r\nmake clean\r\n```\r\n## Usage\r\n```bash\r\nUsage: gsc [option] [arguments] \r\nAvailable options: \r\n        compress - compress VCF/BCF file\r\n        decompress     - query and decompress to VCF/BCF file\r\n```\r\n- Compress the input VCF/BCF file\r\n```bash\r\nUsage of gsc compress:\r\n\r\n        gsc compress [options] [--in [in_file]] [--out [out_file]]\r\n\r\nWhere:\r\n\r\n        [options]              Optional flags and parameters for compression.\r\n        -i,  --in [in_file]    Specify the input file (default: VCF or VCF.GZ). If omitted, input is taken from standard input (stdin).\r\n        -o,  --out [out_file]  Specify the output file. If omitted, output is sent to standard output (stdout).\r\n\r\nOptions:\r\n\r\n        -M,  --mode_lossly     Choose lossy compression mode (lossless by default).\r\n        -b,  --bcf             Input is a BCF file (default: VCF or VCF.GZ).\r\n        -p,  --ploidy [X]      Set ploidy of samples in input VCF to [X] (default: 2).\r\n        -t,  --threads [X]     Set number of threads to [X] (default: 1).\r\n        -d,  --depth [X]       Set maximum replication depth to [X] (default: 100, 0 means no matches).\r\n        -m,  --merge [X]       Specify files to merge, separated by commas (e.g., -m chr1.vcf,chr2.vcf), or '@' followed by a file containing a list of VCF files (e.g., -m @file_with_IDs.txt). By default, all VCF files are compressed.\r\n```\r\n- Decompress / Query\r\n```bash\r\nUsage of gsc decompress and query:\r\n\r\n        gsc decompress [options] --in [in_file] --out [out_file]\r\n\r\nWhere:\r\n        [options]              Optional flags and parameters for compression.\r\n        -i,  --in [in_file]    Specify the input file . If omitted, input is taken from standard input (stdin).\r\n        -o,  --out [out_file]  Specify the output file (default: VCF). If omitted, output is sent to standard output (stdout).\r\n\r\nOptions:\r\n\r\n    General Options:\r\n\r\n        -M,  --mode_lossly      Choose lossy compression mode (default: lossless).\r\n        -b,  --bcf              Output a BCF file (default: VCF).\r\n\r\n    Filter options (applicable in lossy compression mode only): \r\n\r\n        -r,  --range [X]        Specify range in format [start],[end] (e.g., -r 4999756,4999852).\r\n        -s,  --samples [X]      Samples separated by comms (e.g., -s HG03861,NA18639) OR '@' sign followed by the name of a file with sample name(s) separated by whitespaces (for exaple: -s @file_with_IDs.txt). By default all samples/individuals are decompressed. \r\n        --header-only           Output only the header of the VCF/BCF.\r\n        --no-header             Output without the VCF/BCF header (only genotypes).\r\n        -G,  --no-genotype      Don't output sample genotypes (only #CHROM, POS, ID, REF, ALT, QUAL, FILTER, and INFO columns).\r\n        -C,  --out-ac-an        Write AC/AN to the INFO field.\r\n        -S,  --split            Split output into multiple files (one per chromosome).\r\n        -I, [ID=^]              Include only sites with specified ID (e.g., -I \"ID=rs6040355\").\r\n        --minAC [X]             Include only sites with AC \u003c= X.\r\n        --maxAC [X]             Include only sites with AC \u003e= X.\r\n        --minAF [X]             Include only sites with AF \u003e= X (X: 0 to 1).\r\n        --maxAF [X]             Include only sites with AF \u003c= X (X: 0 to 1).\r\n        --min-qual [X]          Include only sites with QUAL \u003e= X.\r\n        --max-qual [X]          Include only sites with QUAL \u003c= X.\r\n```\r\n## Example\r\nThere is an example VCF/VCF.gz/BCF file, `toy.vcf`/`toy.vcf.gz`/`toy.bcf`, in the toy folder, which can be used to test GSC\r\n### compress\r\n\r\n#### lossless compression:\r\nThe input file format is VCF. You can compress a VCF file in lossless mode using one of the following methods:\r\n1. **Explicit input and output file parameters**:\r\n   \r\n   Use the `--in` option to specify the input VCF file and the `--out` option for the output compressed file.\r\n   ```bash\r\n   ./gsc compress --in toy/toy.vcf --out toy/toy_lossless.gsc\r\n   ```\r\n2. **Input file parameter and output redirection**:\r\n   \r\n   Use the `--out` option for the output compressed file and redirect the input VCF file into the command.\r\n   ```bash\r\n   ./gsc compress --out toy/toy_lossless.gsc \u003c toy/toy.vcf\r\n   ```\r\n3. **Output file redirection and input file parameter**:\r\n   \r\n   Specify the input VCF file with the `--in` option and redirect the output to create the compressed file.\r\n   ```bash\r\n   ./gsc compress --in toy/toy.vcf \u003e toy/toy_lossless.gsc\r\n   ```\r\n4. **Input and output redirection**:\r\n   \r\n   Use shell redirection for both input and output. This method does not use the `--in` and `--out` options.\r\n   ```bash\r\n   ./gsc compress \u003c toy/toy.vcf \u003e toy/toy_lossless.gsc\r\n   ```\r\nThis will create a file:\r\n* `toy_lossless.gsc` - The compressed archive of the entire VCF file.\r\n\r\n#### lossy compression:\r\n\r\nThe input file format is VCF. The commands are similar to those used for lossless compression, with the addition of the `-M` parameter to enable lossy compression.\r\n\r\n   For example, to compress a VCF file in lossy mode:\r\n\r\n   ```bash\r\n   ./gsc compress -M --in toy/toy.vcf --out toy/toy_lossy.gsc\r\n   ```\r\n   Or using redirection:\r\n   ```bash\r\n   ./gsc compress -M --out toy/toy_lossy.gsc \u003c toy/toy.vcf\r\n   ``` \r\n   This will create a file:\r\n   * `toy_lossy.gsc` - The compressed archive of the entire VCF file is implemented with lossy compression. It only retains the 'GT' subfield within the INFO and FORMAT fields, and excludes all other subfields..\r\n    \r\n### Decompress   (The commands are similar to those used for compression)\r\nlossless decompression:\r\n\r\nTo decompress the compressed toy_lossless.gsc into a VCF file named toy_lossless.vcf:\r\n```bash\r\n./gsc decompress --in toy/toy_lossless.gsc --out toy/toy_lossless.vcf\r\n```\r\nlossy decompression:\r\n\r\nTo decompress the compressed toy_lossy.gsc into a VCF file named toy_lossy.vcf:\r\n```bash\r\n./gsc decompress -M --in toy/toy_lossy.gsc --out toy/toy_lossy.vcf\r\n```\r\n## Dockerfile\r\nDockerfile can be used to build a Docker image with all necessary dependencies and GSC compressor. The image is based on Ubuntu 18.04. To build a Docker image and run a Docker container, you need Docker Desktop (https://www.docker.com). Example commands (run it within a directory with Dockerfile):\r\n```bash\r\ndocker build -t gsc_project .\r\ndocker run -it gsc_project\r\n```\r\n## Citations\r\n- **bio.tools ID**: `gsc_genotype_sparse_compression`\r\n- **Research Resource Identifier (RRID)**: `SCR_025071`\r\n","organization":"Genome Data Compression Team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/887?version=1","name":"master @ f9e0b1e","author":["Xiaolong Luo"],"descriptor_type":[]}]},{"id":"888","url":"https://workflowhub.eu/workflows/888","name":"Theoretical fragment substructure generation and in silico mass spectral library high-resolution upcycling workflow","description":"# Galaxy Workflow Documentation: MS Finder Pipeline\r\n\r\nThis document outlines a MSFinder Galaxy workflow designed for peak annotation. The workflow consists of several steps aimed at preprocessing MS data, filtering, enhancing, and running MSFinder.\r\n\r\n## Step 1: Data Collection and Preprocessing\r\nCollect if the inchi and smiles are missing from the dataset, and subsequently filter out the spectra which are missing inchi and smiles.\r\n\r\n### 1.1 MSMetaEnhancer: Collect InChi, Isomeric_smiles, and Nominal_mass\r\n- Utilizes MSMetaEnhancer to collect InChi and Isomeric_smiles using PubChem and IDSM databases.\r\n- Utilizes MSMetaEnhancer to collect MW using RDkit (For GOLM).\r\n\r\n### 1.2 replace key\r\n- replace isomeric_smiles key to smiles using replace text tool\r\n- replace MW key to parent_mass using replace text tool (For GOLM)\r\n\r\n### 1.3 Matchms Filtering\r\n- Filters out invalid SMILES and InChi from the dataset using Matchms filtering.\r\n\r\n## Step 2: Complex Removal and Subsetting Dataset\r\nRemoves coordination complexes from the dataset.\r\n\r\n### 2.1 Remove Complexes and Subset Data\r\n- Removes complexes from the dataset.\r\n- Exports metadata using Matchms metadata export, cuts the SMILES column, removes complexes using Rem_Complex tool, and updates the dataset using Matchms subsetting.\r\n\r\n## Step 3: Data Key Manipulation\r\nAdd missing metadata required by the MSFinder for annotation.\r\n\r\n### 3.1 Matchms Remove Key\r\n- Removes existing keys such as adduct, charge, and ionmode from the dataset.\r\n\r\n### 3.2 Matchms Add Key\r\n- Adds necessary keys like charge, ionmode, and adduct to the dataset.\r\n\r\n### 3.3 Matchms Filtering\r\n- Derives precursor m/z using parent mass and adduct information using matchms filtering.\r\n\r\n### 3.4 Matchms Convert\r\n- Converts the dataset to Riken format for compatibility with MSFinder using matchms convert.\r\n\r\n## Step 4: Peak Annotation\r\n### 4.1 Recetox-MSFinder\r\n- Executes MSFinder with a 0.5 Da tolerance for both MS1 and MS2, including all element checks and an extended range for peak annotation.\r\n\r\n## Step 5: Error Handling and Refinement\r\nCheck the MSFinder output to see if the output is the results or the log file. If the output is log file remove the smile from the dataset using matchms subsetting tool and rerun MSFinder.\r\n\r\n### 5.1 Error Handling\r\n- Handles errors in peak annotation by removing SMILES that are not accepted by MSFinder.\r\n- Reruns MSFinder after error correction or with different parameter (if applicable).\r\n\r\n## Step 6: High-res Annotation\r\n### 6.1 High-Res Peak Overwriting\r\n- Utilizes the Use_Theoretical_mz_Annotations tool to Overwrite experimentally measured mz values for peaks with theoretical values from peak comments.\r\n","organization":"RECETOX SpecDatRI","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/888?version=1","name":"Version 1","author":["Zargham Ahmad","Helge Hecht","Elliott J. Price"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/888?version=2","name":"Version 2","author":["Zargham Ahmad","Helge Hecht","Elliott J. Price"],"descriptor_type":["GALAXY"]}]},{"id":"889","url":"https://workflowhub.eu/workflows/889","name":"POD5 by pore","description":"# pod5_by_pore\r\n\r\nA Snakemake workflow to take the POD5 files produced by an Oxford Nanopore sequencing run and\r\nre-batch them by pore (ie. by channel).\r\n\r\nThis is useful if you want to run duplex basecalling because you can meaningfully run\r\n\"dorado duplex\" on a single (or a subset of) the POD5 files.\r\n\r\n## Know issues\r\n\r\nIt is assumed all POD5 input files are from the same sequencing run, but this is not checked.\r\n","organization":"Edinburgh Genomics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/889?version=1","name":"master @ b2977c5","author":["Tim Booth"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/889?version=2","name":"master @ 6b1da33","author":["Tim Booth"],"descriptor_type":["SMK"]}]},{"id":"892","url":"https://workflowhub.eu/workflows/892","name":"qiime2-II-denoising/IIa-denoising-se","description":"Use DADA2 for sequence quality control. DADA2 is a pipeline for detecting and correcting (where possible) Illumina amplicon sequence data. As implemented in the q2-dada2 plugin, this quality control process will additionally filter any phiX reads (commonly present in marker gene Illumina sequence data) that are identified in the sequencing data, and will filter chimeric sequences.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/892?version=1","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/892?version=2","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/892?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"893","url":"https://workflowhub.eu/workflows/893","name":"qiime2-I-import/Ia-import-multiplexed-se","description":"Importing single-end multiplexed data (not demultiplexed yet)","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/893?version=1","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/893?version=2","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/893?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"897","url":"https://workflowhub.eu/workflows/897","name":"End-to-end EI+ mass spectra prediction workflow using QCxMS","description":"High-Performance Computing (HPC) environments are integral to quantum chemistry and computationally intense research, yet their complexity poses challenges for non-HPC experts. Navigating these environments proves challenging for researchers lacking extensive computational knowledge, hindering efficient use of domain specific research software. The prediction of mass spectra for in silico annotation is therefore inaccessible for many wet lab scientists. Our main goal is to facilitate non-experts in HPC navigate this complexity and make semi-empirical Quantum Chemistry (QC)-based predictions available without needing advanced computational skills. To address this challenge, a comprehensive approach is proposed. We chose specific file formats for storing molecular structures, ensuring compatibility across diverse tools and platforms. The xTB quantum chemistry package for molecular geometry optimization is leveraged for its capability to balance between accuracy and computational cost, making it well-suited for non-HPC focused applications. Integrating QC-based Mass Spectrometry (QCxMS) into Galaxy enables the prediction of mass spectra and offers insights into molecular composition and properties. Our workflow demonstrates the utility of computing spectra using QCxMS along with complementary tools. We also present details of runtime performance metrics for four distinct molecules. This work highlights how non-HPC users can execute these predictions with ease, without requiring advanced computational skills. Additionally, a Docker image is created to encapsulate necessary tools, accompanied by user-friendly wrappers, simplifying the entire process for non-expert users. Within this context, potential improvements are considered, focusing on improving the Conda package for better performance by incorporating Fortran and Intel compiler optimizations. These considerations play a crucial role in refining the proposed methodology, enhancing user experience, and expanding the reach of semi-empirical predictions in quantum chemistry for mass spectra predictions.\r\n","organization":"ELIXIR Metabolomics, RECETOX SpecDatRI","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/897?version=1","name":"Version 1","author":["Zargham Ahmad","Helge Hecht","Wudmir Rojas"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/897?version=2","name":"Galaxy Workflow End-to-end EI mass spectra prediction workflow using QCxMS","author":["Zargham Ahmad","Helge Hecht","Wudmir Rojas"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/897?version=3","name":"Version 3","author":["Zargham Ahmad","Helge Hecht","Wudmir Rojas"],"descriptor_type":["GALAXY"]}]},{"id":"963","url":"https://workflowhub.eu/workflows/963","name":"nf-core/airrflow","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-airrflow_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/airrflow\" src=\"docs/images/nf-core-airrflow_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/airrflow/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/airrflow/actions?query=workflow%3A%22nf-core+CI%22)\n[![GitHub Actions Linting Status](https://github.com/nf-core/airrflow/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/airrflow/actions?query=workflow%3A%22nf-core+linting%22)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/airrflow/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2642009-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2642009)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/airrflow)\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23airrflow-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/airrflow)\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n[![AIRR compliant](https://img.shields.io/static/v1?label=AIRR-C%20sw-tools%20v1\u0026message=compliant\u0026color=008AFF\u0026labelColor=000000\u0026style=plastic)](https://docs.airr-community.org/en/stable/swtools/airr_swtools_standard.html)\n\n## Introduction\n\n**nf-core/airrflow** is a bioinformatics best-practice pipeline to analyze B-cell or T-cell repertoire sequencing data. The input data can be targeted amplicon bulk sequencing data of the V, D, J and C regions of the B/T-cell receptor with multiplex PCR or 5' RACE protocol, single-cell VDJ sequencing using the 10xGenomics libraries, or assembled reads (bulk or single-cell). It can also extract BCR and TCR sequences from bulk or single-cell untargeted RNAseq data. It makes use of the [Immcantation](https://immcantation.readthedocs.io) toolset as well as other AIRR-seq analysis tools.\n\n![nf-core/airrflow overview](docs/images/airrflow_workflow_overview.png)\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/airrflow/results).\n\n## Pipeline summary\n\nnf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single cell targeted sequencing data. Several protocols are supported, please see the [usage documentation](https://nf-co.re/airrflow/usage) for more details on the supported protocols. The pipeline has been certified as [AIRR compliant](https://docs.airr-community.org/en/stable/swtools/airr_swtools_compliant.html) by the AIRR community, which means that it is compatible with downstream analysis tools also supporting this format.\n\n![nf-core/airrflow overview](docs/images/metro-map-airrflow.png)\n\n1. QC and sequence assembly\n\n- Bulk\n  - Raw read quality control, adapter trimming and clipping (`Fastp`).\n  - Filter sequences by base quality (`pRESTO FilterSeq`).\n  - Mask amplicon primers (`pRESTO MaskPrimers`).\n  - Pair read mates (`pRESTO PairSeq`).\n  - For UMI-based sequencing:\n    - Cluster sequences according to similarity (optional for insufficient UMI diversity) (`pRESTO ClusterSets`).\n    - Build consensus of sequences with the same UMI barcode (`pRESTO BuildConsensus`).\n  - Assemble R1 and R2 read mates (`pRESTO AssemblePairs`).\n  - Remove and annotate read duplicates (`pRESTO CollapseSeq`).\n  - Filter out sequences that do not have at least 2 duplicates (`pRESTO SplitSeq`).\n- single cell\n  - cellranger vdj\n    - Assemble contigs\n    - Annotate contigs\n    - Call cells\n    - Generate clonotypes\n\n2. V(D)J annotation and filtering (bulk and single-cell)\n\n- Assign gene segments with `IgBlast` using a germline reference (`Change-O AssignGenes`).\n- Annotate alignments in AIRR format (`Change-O MakeDB`)\n- Filter by alignment quality (locus matching v_call chain, min 200 informative positions, max 10% N nucleotides)\n- Filter productive sequences (`Change-O ParseDB split`)\n- Filter junction length multiple of 3\n- Annotate metadata (`EnchantR`)\n\n3. QC filtering (bulk and single-cell)\n\n- Bulk sequencing filtering:\n  - Remove chimeric sequences (optional) (`SHazaM`, `EnchantR`)\n  - Detect cross-contamination (optional) (`EnchantR`)\n  - Collapse duplicates (`Alakazam`, `EnchantR`)\n- Single-cell QC filtering (`EnchantR`)\n  - Remove cells without heavy chains.\n  - Remove cells with multiple heavy chains.\n  - Remove sequences in different samples that share the same `cell_id` and nucleotide sequence.\n  - Modify `cell_id`s to ensure they are unique in the project.\n\n4. Clonal analysis (bulk and single-cell)\n\n- Find threshold for clone definition (`SHazaM`, `EnchantR`).\n- Create germlines and define clones, repertoire analysis (`SCOPer`, `EnchantR`).\n- Build lineage trees (`Dowser`, `IgphyML`, `RAxML`, `EnchantR`).\n\n5. Repertoire analysis and reporting\n\n- Custom repertoire analysis pipeline report (`Alakazam`).\n- Aggregate QC reports (`MultiQC`).\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, ensure that the pipeline tests run on your infrastructure:\n\n```bash\nnextflow run nf-core/airrflow -profile test,\u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e --outdir \u003cOUTDIR\u003e\n```\n\nTo run nf-core/airrflow with your data, prepare a tab-separated samplesheet with your input data. Depending on the input data type (bulk or single-cell, raw reads or assembled reads) the input samplesheet will vary. Please follow the [documentation on samplesheets](https://nf-co.re/airrflow/usage#input-samplesheet) for more details. An example samplesheet for running the pipeline on bulk BCR / TCR sequencing data in fastq format looks as follows:\n\n| sample_id | filename_R1                     | filename_R2                     | filename_I1                     | subject_id | species | pcr_target_locus | tissue | sex    | age | biomaterial_provider | single_cell | intervention   | collection_time_point_relative | cell_subset  |\n| --------- | ------------------------------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- | -------------- | ------------------------------ | ------------ |\n| sample01  | sample1_S8_L001_R1_001.fastq.gz | sample1_S8_L001_R2_001.fastq.gz | sample1_S8_L001_I1_001.fastq.gz | Subject02  | human   | IG               | blood  | NA     | 53  | sequencing_facility  | FALSE       | Drug_treatment | Baseline                       | plasmablasts |\n| sample02  | sample2_S8_L001_R1_001.fastq.gz | sample2_S8_L001_R2_001.fastq.gz | sample2_S8_L001_I1_001.fastq.gz | Subject02  | human   | TR               | blood  | female | 78  | sequencing_facility  | FALSE       | Drug_treatment | Baseline                       | plasmablasts |\n\nEach row represents a sample with fastq files (paired-end).\n\nA typical command to run the pipeline from **bulk raw fastq files** is:\n\n```bash\nnextflow run nf-core/airrflow \\\n-r \u003crelease\u003e \\\n-profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e \\\n--mode fastq \\\n--input input_samplesheet.tsv \\\n--library_generation_method specific_pcr_umi \\\n--cprimers CPrimers.fasta \\\n--vprimers VPrimers.fasta \\\n--umi_length 12 \\\n--umi_position R1 \\\n--outdir ./results\n```\n\nFor common **bulk sequencing protocols** we provide pre-set profiles that specify primers, UMI length, etc for common commercially available sequencing protocols. Please check the [Supported protocol profiles](#supported-protocol-profiles) for a full list of available profiles. An example command running the NEBNext UMI protocol profile with docker containers is:\n\n```bash\nnextflow run nf-core/airrflow \\\n-profile nebnext_umi,docker \\\n--mode fastq \\\n--input input_samplesheet.tsv \\\n--outdir results\n```\n\nA typical command to run the pipeline from **single cell raw fastq files** (10X genomics) is:\n\n```bash\nnextflow run nf-core/airrflow -r dev \\\n-profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e \\\n--mode fastq \\\n--input input_samplesheet.tsv \\\n--library_generation_method sc_10x_genomics \\\n--reference_10x reference/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz \\\n--outdir ./results\n```\n\nA typical command to run the pipeline from **single-cell AIRR rearrangement tables or assembled bulk sequencing fasta** data is:\n\n```bash\nnextflow run nf-core/airrflow \\\n-r \u003crelease\u003e \\\n-profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e \\\n--input input_samplesheet.tsv \\\n--mode assembled \\\n--outdir results\n```\n\nSee the [usage documentation](https://nf-co.re/airrflow/usage) and the [parameter documentation](https://nf-co.re/airrflow/parameters) for more details on how to use the pipeline and all the available parameters.\n\n:::warning\nPlease provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\nprovided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\nsee [docs](https://nf-co.re/usage/configuration#custom-configuration-files).\n:::\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/airrflow/usage) and the [parameter documentation](https://nf-co.re/airrflow/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/airrflow/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/airrflow/output).\n\n## Credits\n\nnf-core/airrflow was originally written by:\n\n- [Gisela Gabernet](https://github.com/ggabernet)\n- [Susanna Marquez](https://github.com/ssnn-airr)\n- [Alexander Peltzer](https://github.com/apeltzer)\n\nWe thank the following people for their extensive assistance in the development of the pipeline:\n\n- [David Ladd](https://github.com/dladd)\n- [Friederike Hanssen](https://github.com/friederikehanssen)\n- [Simon Heumos](https://github.com/subwaystation)\n- [Mark Polster](https://github.com/mapo9)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#airrflow` channel](https://nfcore.slack.com/channels/airrflow) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/airrflow for your analysis, please cite the article as follows:\n\n\u003e **nf-core/airrflow: an adaptive immune receptor repertoire analysis workflow employing the Immcantation framework**\n\u003e\n\u003e Gisela Gabernet, Susanna Marquez, Robert Bjornson, Alexander Peltzer, Hailong Meng, Edel Aron, Noah Y. Lee, Cole G. Jensen, David Ladd, Mark Polster, Friederike Hanssen, Simon Heumos, nf-core community, Gur Yaari, Markus C. Kowarik, Sven Nahnsen, Steven H. Kleinstein. (2024) PLOS Computational Biology, 20(7), e1012265. doi: [https://doi.org/10.1371/journal.pcbi.1012265](https://doi.org/10.1371/journal.pcbi.1012265). Pubmed PMID: 39058741.\n\nThe specific pipeline version using the following DOI: [10.5281/zenodo.2642009](https://doi.org/10.5281/zenodo.2642009)\n\nPlease also cite all the tools that are being used by the pipeline. An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/963?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/963?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/963?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/963?version=4","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/963?version=5","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/963?version=6","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/963?version=7","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/963?version=8","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/963?version=9","name":"3.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/963?version=10","name":"3.1.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/963?version=11","name":"3.2.0","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/963?version=12","name":"3.3.0","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/963?version=13","name":"4.0","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/963?version=14","name":"4.1.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/963?version=15","name":"4.2.0","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/963?version=16","name":"4.3.0","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/963?version=17","name":"4.3.1","author":[],"descriptor_type":["NFL"]}]},{"id":"964","url":"https://workflowhub.eu/workflows/964","name":"nf-core/ampliseq","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-ampliseq_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/ampliseq\" src=\"docs/images/nf-core-ampliseq_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/ampliseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/ampliseq/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/ampliseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/ampliseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/ampliseq/results)[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1493841-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1493841)[![Cite Publication](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-important?labelColor=000000)](https://doi.org/10.3389/fmicb.2020.550420)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/ampliseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23ampliseq-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/ampliseq)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)[![Watch on YouTube](http://img.shields.io/badge/youtube-ampliseq-FFFF00?labelColor=000000\u0026logo=youtube)](https://youtu.be/a0VOEeAvETs)\n\n## Introduction\n\n**nfcore/ampliseq** is a bioinformatics analysis pipeline used for amplicon sequencing, supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment including 16S, ITS, CO1 and 18S. Phylogenetic placement is also possible. Multiple region analysis such as 5R is implemented. Supported is paired-end Illumina or single-end Illumina, PacBio and IonTorrent data. Default is the analysis of 16S rRNA gene amplicons sequenced paired-end with Illumina.\n\nA video about relevance, usage and output of the pipeline (version 2.1.0; 26th Oct. 2021) can also be found in [YouTube](https://youtu.be/a0VOEeAvETs) and [billibilli](https://www.bilibili.com/video/BV1B44y1e7MM), the slides are deposited at [figshare](https://doi.org/10.6084/m9.figshare.16871008.v1).\n\n\u003cp align=\"center\"\u003e\n    \u003cimg src=\"docs/images/ampliseq_workflow.png\" alt=\"nf-core/ampliseq workflow overview\" width=\"60%\"\u003e\n\u003c/p\u003e\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/ampliseq/results).\n\n## Pipeline summary\n\nBy default, the pipeline currently performs the following:\n\n- Sequencing quality control ([FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n- Trimming of reads ([Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200))\n- Infer Amplicon Sequence Variants (ASVs) ([DADA2](https://doi.org/10.1038/nmeth.3869))\n- Optional post-clustering with [VSEARCH](https://github.com/torognes/vsearch)\n- Predict whether ASVs are ribosomal RNA sequences ([Barrnap](https://github.com/tseemann/barrnap))\n- Phylogenetic placement ([EPA-NG](https://github.com/Pbdas/epa-ng))\n- Taxonomical classification using DADA2; alternatives are [SINTAX](https://doi.org/10.1101/074161), [Kraken2](https://doi.org/10.1186/s13059-019-1891-0), and [QIIME2](https://www.nature.com/articles/s41587-019-0209-9)\n- Excludes unwanted taxa, produces absolute and relative feature/taxa count tables and plots, plots alpha rarefaction curves, computes alpha and beta diversity indices and plots thereof ([QIIME2](https://www.nature.com/articles/s41587-019-0209-9))\n- Creates phyloseq R objects ([Phyloseq](https://www.bioconductor.org/packages/release/bioc/html/phyloseq.html) and [TreeSE](https://doi.org/10.12688/f1000research.26669.2))\n- Pipeline QC summaries ([MultiQC](https://multiqc.info/))\n- Pipeline summary report ([R Markdown](https://github.com/rstudio/rmarkdown))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, you need to know whether the sequencing files at hand are expected to contain primer sequences (usually yes) and if yes, what primer sequences. In the example below, the paired end sequencing data was produced with 515f (GTGYCAGCMGCCGCGGTAA) and 806r (GGACTACNVGGGTWTCTAAT) primers of the V4 region of the 16S rRNA gene. Please note, that those sequences should not contain any sequencing adapter sequences, only the sequence that matches the biological amplicon.\n\nNext, the data needs to be organized in a folder, here `data`, or detailed in a samplesheet (see [input documentation](https://nf-co.re/ampliseq/usage#input-specifications)).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/ampliseq \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input \"data\" \\\n   --FW_primer \"GTGYCAGCMGCCGCGGTAA\" \\\n   --RV_primer \"GGACTACNVGGGTWTCTAAT\" \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!NOTE]\n\u003e Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata).\n\n\u003e [!TIP]\n\u003e By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). Differential abundance testing with ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) or ([ANCOM-BC](https://www.ncbi.nlm.nih.gov/pubmed/32665548)) when opting in.\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/ampliseq/usage) and the [parameter documentation](https://nf-co.re/ampliseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/ampliseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/ampliseq/output).\n\n## Credits\n\nnf-core/ampliseq was originally written by Daniel Straub ([@d4straub](https://github.com/d4straub)) and Alexander Peltzer ([@apeltzer](https://github.com/apeltzer)) for use at the [Quantitative Biology Center (QBiC)](https://www.info.qbic.uni-tuebingen.de/) and [Microbial Ecology, Center for Applied Geosciences](http://www.uni-tuebingen.de/de/104325), part of Eberhard Karls Universität Tübingen (Germany). Daniel Lundin [@erikrikarddaniel](https://github.com/erikrikarddaniel) ([Linnaeus University, Sweden](https://lnu.se/)) joined before pipeline release 2.0.0 and helped to improve the pipeline considerably.\n\nWe thank the following people for their extensive assistance in the development of this pipeline (in alphabetical order):\n\n[Adam Bennett](https://github.com/a4000), [Diego Brambilla](https://github.com/DiegoBrambilla), [Emelie Nilsson](https://github.com/emnilsson), [Jeanette Tångrot](https://github.com/jtangrot), [Lokeshwaran Manoharan](https://github.com/lokeshbio), [Marissa Dubbelaar](https://github.com/marissaDubbelaar), [Sabrina Krakau](https://github.com/skrakau), [Sam Minot](https://github.com/sminot), [Till Englert](https://github.com/tillenglert)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#ampliseq` channel](https://nfcore.slack.com/channels/ampliseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use `nf-core/ampliseq` for your analysis, please cite the `ampliseq` article as follows:\n\n\u003e **Interpretations of Environmental Microbial Community Studies Are Biased by the Selected 16S rRNA (Gene) Amplicon Sequencing Pipeline**\n\u003e\n\u003e Daniel Straub, Nia Blackwell, Adrian Langarica-Fuentes, Alexander Peltzer, Sven Nahnsen, Sara Kleindienst\n\u003e\n\u003e _Frontiers in Microbiology_ 2020, 11:2652 [doi: 10.3389/fmicb.2020.550420](https://doi.org/10.3389/fmicb.2020.550420).\n\nYou can cite the `nf-core/ampliseq` zenodo record for a specific version using the following [doi: 10.5281/zenodo.1493841](https://zenodo.org/badge/latestdoi/150448201)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/964?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/964?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/964?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/964?version=4","name":"1.1.2","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/964?version=5","name":"1.1.3","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/964?version=6","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/964?version=7","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/964?version=8","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/964?version=9","name":"2.1.1","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/964?version=10","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/964?version=11","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/964?version=12","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/964?version=13","name":"2.3.2","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/964?version=14","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/964?version=15","name":"2.4.1","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/964?version=16","name":"2.5.0","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/964?version=17","name":"2.6.0","author":[],"descriptor_type":["NFL"]},{"id":"18","url":"https://workflowhub.eu/workflows/964?version=18","name":"2.6.1","author":[],"descriptor_type":["NFL"]},{"id":"19","url":"https://workflowhub.eu/workflows/964?version=19","name":"2.7.0","author":[],"descriptor_type":["NFL"]},{"id":"20","url":"https://workflowhub.eu/workflows/964?version=20","name":"2.7.1","author":[],"descriptor_type":["NFL"]},{"id":"21","url":"https://workflowhub.eu/workflows/964?version=21","name":"2.8.0","author":[],"descriptor_type":["NFL"]},{"id":"22","url":"https://workflowhub.eu/workflows/964?version=22","name":"2.9.0","author":[],"descriptor_type":["NFL"]},{"id":"23","url":"https://workflowhub.eu/workflows/964?version=23","name":"2.10.0","author":[],"descriptor_type":["NFL"]},{"id":"24","url":"https://workflowhub.eu/workflows/964?version=24","name":"2.11.0","author":[],"descriptor_type":["NFL"]},{"id":"25","url":"https://workflowhub.eu/workflows/964?version=25","name":"2.12.0","author":[],"descriptor_type":["NFL"]},{"id":"26","url":"https://workflowhub.eu/workflows/964?version=26","name":"2.13.0","author":[],"descriptor_type":["NFL"]},{"id":"27","url":"https://workflowhub.eu/workflows/964?version=27","name":"2.14.0","author":[],"descriptor_type":["NFL"]},{"id":"28","url":"https://workflowhub.eu/workflows/964?version=28","name":"2.15.0","author":[],"descriptor_type":["NFL"]}]},{"id":"965","url":"https://workflowhub.eu/workflows/965","name":"nf-core/atacseq","description":"ATACSeq peak-calling and differential analysis pipeline.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/965?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/965?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/965?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/965?version=4","name":"1.2.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/965?version=5","name":"1.2.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/965?version=6","name":"2.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/965?version=7","name":"2.1.1","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/965?version=8","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/965?version=9","name":"2.1.2","author":[],"descriptor_type":["NFL"]}]},{"id":"966","url":"https://workflowhub.eu/workflows/966","name":"nf-core/bacass","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-bacass_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/bacass\" src=\"docs/images/nf-core-bacass_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/bacass/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/bacass/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/bacass/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/bacass/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/bacass/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/bacass)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23bacass-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/bacass)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/bacass** is a bioinformatics best-practice analysis pipeline for simple bacterial assembly and annotation. The pipeline is able to assemble short reads, long reads, or a mixture of short and long reads (hybrid assembly).\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/bacass/results).\n\n## Pipeline summary\n\n### Short Read Assembly\n\nThis pipeline is primarily for bacterial assembly of next-generation sequencing reads. It can be used to quality trim your reads using [FastP](https://github.com/OpenGene/fastp) and performs basic sequencing QC using [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/). Afterwards, the pipeline performs read assembly using [Unicycler](https://github.com/rrwick/Unicycler). Contamination of the assembly is checked using [Kraken2](https://ccb.jhu.edu/software/kraken2/) and [Kmerfinder](https://bitbucket.org/genomicepidemiology/kmerfinder/src/master/) to verify sample purity.\n\n### Long Read Assembly\n\nFor users that only have Nanopore data, the pipeline quality trims these using [PoreChop](https://github.com/rrwick/Porechop) or filter long reads by quality using [Filtlong](https://github.com/rrwick/Filtlong) and assesses basic sequencing QC utilizing [NanoPlot](https://github.com/wdecoster/NanoPlot) and [PycoQC](https://github.com/a-slide/pycoQC). Contamination of the assembly is checked using [Kraken2](https://ccb.jhu.edu/software/kraken2/) and [Kmerfinder](https://bitbucket.org/genomicepidemiology/kmerfinder/src/master/) to verify sample purity.\n\nThe pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), [Canu](https://github.com/marbl/canu) or [Flye](https://github.com/fenderglass/Flye) by using the [Dragonflye](https://github.com/rpetit3/dragonflye)(\\*) pipeline. Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files.\n\n\u003e [!NOTE]\n\u003e Dragonflye is a comprehensive pipeline designed for genome assembly of Oxford Nanopore Reads. It facilitates the utilization of Flye (default), Miniasm, and Raven assemblers, along with Racon (default) and Medaka polishers. For more information, visit the [Dragonflye GitHub](https://github.com/rpetit3/dragonflye) repository.\n\n### Hybrid Assembly\n\nFor users specifying both short read and long read (NanoPore) data, the pipeline can perform a hybrid assembly approach utilizing [Unicycler](https://github.com/rrwick/Unicycler) (short read assembly followed by gap closing with long reads) or [Dragonflye](https://github.com/rpetit3/dragonflye) (long read assembly followed by polishing with short reads), taking the full set of information from short reads and long reads into account.\n\n### Assembly QC and annotation\n\nIn all cases, the assembly is assessed using [QUAST](http://bioinf.spbau.ru/quast) and [BUSCO](https://busco.ezlab.org/). The resulting bacterial assembly is furthermore annotated using [Prokka](https://github.com/tseemann/prokka), [Bakta](https://github.com/oschwengers/bakta) or [DFAST](https://github.com/nigyta/dfast_core).\n\nIf Kmerfinder is invoked, the pipeline will group samples according to the [Kmerfinder](https://bitbucket.org/genomicepidemiology/kmerfinder/src/master/)-estimated reference genomes. Afterwards, two QUAST steps will be carried out: an initial ('general') [QUAST](http://bioinf.spbau.ru/quast) of all samples without reference genomes, and subsequently, a 'by reference genome' [QUAST](http://bioinf.spbau.ru/quast) to aggregate samples with their reference genomes.\n\n\u003e [!NOTE]\n\u003e This scenario is supported when [Kmerfinder](https://bitbucket.org/genomicepidemiology/kmerfinder/src/master/) analysis is performed only.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.tsv`:\n\n```tsv\nID      R1                            R2                            LongFastQ                    Fast5    GenomeSize\nshortreads      ./data/S1_R1.fastq.gz       ./data/S1_R2.fastq.gz       NA                            NA      NA\nlongreads       NA                          NA                          ./data/S1_long_fastq.gz      ./data/FAST5  2.8m\nshortNlong      ./data/S1_R1.fastq.gz       ./data/S1_R2.fastq.gz       ./data/S1_long_fastq.gz      ./data/FAST5  2.8m\n\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\nShort read assembly with Unicycler, `--kraken2db` can be any [compressed database (`.tar.gz`/`.tgz`)](https://benlangmead.github.io/aws-indexes/k2):\n\n```console\nnextflow run nf-core/bacass -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e --input samplesheet.tsv --assembly_type 'short' --kraken2db \"https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz\"\n```\n\nLong read assembly with Miniasm:\n\n```console\nnextflow run nf-core/bacass -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e --input samplesheet.tsv --assembly_type 'long' --assembler 'miniasm' --kraken2db \"https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20210517.tar.gz\"\n```\n\n```bash\nnextflow run nf-core/bacass \\\n  -profile \u003cdocker/singularity/.../institute\u003e \\\n  --input samplesheet.tsv \\\n  --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/bacass/usage) and the [parameter documentation](https://nf-co.re/bacass/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/bacass/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/bacass/output).\n\n## Credits\n\nnf-core/bacass was initiated by [Andreas Wilm](https://github.com/andreas-wilm), originally written by [Alex Peltzer](https://github.com/apeltzer) (DSL1), rewritten by [Daniel Straub](https://github.com/d4straub) (DSL2) and maintained by [Daniel Valle-Millares](https://github.com/Daniel-VM).\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#bacass` channel](https://nfcore.slack.com/channels/bacass) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/bacass for your analysis, please cite it using the following doi: [10.5281/zenodo.2669428](https://doi.org/10.5281/zenodo.2669428)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/966?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/966?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/966?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/966?version=4","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/966?version=5","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/966?version=6","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/966?version=7","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/966?version=8","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/966?version=9","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/966?version=10","name":"2.5.0","author":[],"descriptor_type":["NFL"]}]},{"id":"967","url":"https://workflowhub.eu/workflows/967","name":"nf-core/bactmap","description":"A mapping-based pipeline for creating a phylogeny from bacterial whole genome sequences","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/967?version=1","name":"v0.9","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/967?version=2","name":"0.9.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/967?version=3","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"968","url":"https://workflowhub.eu/workflows/968","name":"nf-core/bamtofastq","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-bamtofastq_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/bamtofastq\" src=\"docs/images/nf-core-bamtofastq_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/bamtofastq/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/bamtofastq/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/bamtofastq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/bamtofastq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/bamtofastq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.4710628-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.4710628)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/bamtofastq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23bamtofastq-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/bamtofastq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/bamtofastq** is a bioinformatics best-practice analysis pipeline that converts (un)mapped `.bam` or `.cram` files into `fq.gz` files. Initially, it auto-detects, whether the input file contains single-end or paired-end reads. Following this step, the reads are sorted using `samtools collate` and extracted with `samtools fastq`. Furthermore, for mapped bam/cram files it is possible to only convert reads mapping to a specific region or chromosome. The obtained FastQ files can then be used to further process with other pipelines.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/bamtofastq/results).\n\n## Pipeline summary\n\nBy default, the pipeline currently performs the following steps:\n\n1. Quality control (QC) of input (bam/cram) files ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)).\n2. Check if input files are single- or paired-end ([`Samtools`](https://www.htslib.org/)).\n3. Compute statistics on input files ([`Samtools`](https://www.htslib.org/)).\n4. Convert to fastq reads ([`Samtools`](https://www.htslib.org/)).\n5. QC of converted fastq reads ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)).\n6. Checking whether the produced fastq files are valid ([fastq_utils](https://github.com/nunofonseca/fastq_utils)).\n7. Summarize QC and statistics before and after format conversion ([`MultiQC`](http://multiqc.info/)).\n\n\u003cp align=\"center\"\u003e\n    \u003cimg title=\"Bamtofastq Workflow\" src=\"docs/images/nf-core-bamtofastq-subway.png\" width=60%\u003e\n\u003c/p\u003e\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nDownload the pipeline and test it on a minimal dataset with a single command:\n\n```bash\nnextflow run nf-core/bamtofastq -profile test,\u003cdocker/singularity/.../institute\u003e --outdir './results'\n```\n\nTo run your own analysis, start by preparing a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample_id,mapped,index,file_type\ntest,test1.bam,test1.bam.bai,bam\ntest2,test2.bam,test2.bam.bai,bam\n```\n\nEach row represents a bam/cram file with or without indices.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/bamtofastq \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/bamtofastq/usage) and the [parameter documentation](https://nf-co.re/bamtofastq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/bamtofastq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/bamtofastq/output).\n\n## Credits\n\nnf-core/bamtofastq was originally written by Friederike Hanssen. It was ported to DSL2 by Susanne Jodoin.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Gisela Gabernet](https://github.com/ggabernet)\n- [Matilda Åslin](https://github.com/matrulda)\n- [Bruno Grande](https://github.com/BrunoGrandePhd)\n\n### Resources\n\nThe individual steps of this pipeline are based of on the following tutorials and resources:\n\n1.  [Extracting paired FASTQ read data from a BAM mapping file](http://darencard.net/blog/2017-09-07-extract-fastq-bam/)\n2.  [Check if BAM is derived from pair-end or single-end reads](https://www.biostars.org/p/178730/)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#bamtofastq` channel](https://nfcore.slack.com/channels/bamtofastq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/bamtofastq for your analysis, please cite it using the following doi: [10.5281/zenodo.4710628](https://doi.org/10.5281/zenodo.4710628)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/968?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/968?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/968?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/968?version=4","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/968?version=5","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/968?version=6","name":"2.1.1","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/968?version=7","name":"2.2.0","author":[],"descriptor_type":["NFL"]}]},{"id":"969","url":"https://workflowhub.eu/workflows/969","name":"nf-core/cageseq","description":"CAGE-seq pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/969?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/969?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/969?version=3","name":"1.0.2","author":[],"descriptor_type":["NFL"]}]},{"id":"970","url":"https://workflowhub.eu/workflows/970","name":"nf-core/callingcards","description":"An automated processing pipeline for mammalian bulk calling cards experiments","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/970?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"971","url":"https://workflowhub.eu/workflows/971","name":"nf-core/chipseq","description":"ChIP-seq peak-calling and differential analysis pipeline.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/971?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/971?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/971?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/971?version=4","name":"1.2.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/971?version=5","name":"1.2.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/971?version=6","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/971?version=7","name":"2.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"972","url":"https://workflowhub.eu/workflows/972","name":"nf-core/circdna","description":"Pipeline for the identification of circular DNAs","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/972?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/972?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/972?version=3","name":"1.0.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/972?version=4","name":"1.0.3dev-alpha","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/972?version=5","name":"1.0.3dev","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/972?version=6","name":"1.0.3","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/972?version=7","name":"1.0.4","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/972?version=8","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"973","url":"https://workflowhub.eu/workflows/973","name":"nf-core/clipseq","description":"CLIP analysis pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/973?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"974","url":"https://workflowhub.eu/workflows/974","name":"nf-core/coproid","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-coproid_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/coproid\" src=\"docs/images/nf-core-coproid_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/coproid/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/coproid/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/coproid/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/coproid/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/coproid/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/coproid)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23coproid-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/coproid)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/coproid** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/coproid \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/coproid/usage) and the [parameter documentation](https://nf-co.re/coproid/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/coproid/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/coproid/output).\n\n## Credits\n\nnf-core/coproid was originally written by Maxime Borry \u0026 Meriam Van Os.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#coproid` channel](https://nfcore.slack.com/channels/coproid) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/coproid for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/974?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/974?version=2","name":"1.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/974?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/974?version=4","name":"2.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"975","url":"https://workflowhub.eu/workflows/975","name":"nf-core/crisprseq","description":"Pipeline for the analysis of crispr data","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/975?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/975?version=2","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/975?version=3","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/975?version=4","name":"2.1.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/975?version=5","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/975?version=6","name":"2.2.1","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/975?version=7","name":"2.3.0","author":[],"descriptor_type":["NFL"]}]},{"id":"976","url":"https://workflowhub.eu/workflows/976","name":"nf-core/cutandrun","description":"Analysis pipeline for CUT\u0026RUN and CUT\u0026TAG experiments that includes sequencing QC, spike-in normalisation, IgG control normalisation, peak calling and downstream peak analysis.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/976?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/976?version=2","name":"1.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/976?version=3","name":"2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/976?version=4","name":"3.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/976?version=5","name":"3.1","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/976?version=6","name":"3.2","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/976?version=7","name":"3.2.1","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/976?version=8","name":"3.2.2","author":[],"descriptor_type":["NFL"]}]},{"id":"977","url":"https://workflowhub.eu/workflows/977","name":"nf-core/deepvariant","description":"Google DeepVariant variant caller as a Nextflow pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/977?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"978","url":"https://workflowhub.eu/workflows/978","name":"nf-core/demultiplex","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-demultiplex_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/demultiplex\" src=\"docs/images/nf-core-demultiplex_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/demultiplex/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/demultiplex/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/demultiplex/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/demultiplex/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/demultiplex/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7153103-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7153103)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/demultiplex)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23demultiplex-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/demultiplex)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/demultiplex** is a bioinformatics pipeline used to demultiplex the raw data produced by next generation sequencing machines. The following platforms are supported:\n\n1. Illumina (via `bcl2fastq` or `bclconvert`)\n2. Element Biosciences (via `bases2fastq`)\n3. Singular Genomics (via [`sgdemux`](https://github.com/Singular-Genomics/singular-demux))\n4. FASTQ files with user supplied read structures (via [`fqtk`](https://github.com/fulcrumgenomics/fqtk))\n5. 10x Genomics (via [`mkfastq`](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq))\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/demultiplex/results).\n\n## Pipeline summary\n\n1. [samshee](#samshee) - Validates illumina v2 samplesheets.\n2. Demultiplexing\n\n- [bcl-convert](#bcl-convert) - converting bcl files to fastq, and demultiplexing (CONDITIONAL)\n- [bases2fastq](#bases2fastq) - converting bases files to fastq, and demultiplexing (CONDITIONAL)\n- [bcl2fastq](#bcl2fastq) - converting bcl files to fastq, and demultiplexing (CONDITIONAL)\n- [sgdemux](#sgdemux) - demultiplexing bgzipped fastq files produced by Singular Genomics (CONDITIONAL)\n- [fqtk](#fqtk) - a toolkit for working with FASTQ files, written in Rust (CONDITIONAL)\n- [mkfastq](#mkfastq) - converting bcl files to fastq, and demultiplexing for single-cell sequencing data (CONDITIONAL)\n\n3. [checkqc](#checkqc) - (optional) Check quality criteria after demultiplexing (bcl2fastq only)\n4. [fastp](#fastp) - Adapter and quality trimming\n5. [Falco](#falco) - Raw read QC\n6. [md5sum](#md5sum) - Creates an MD5 (128-bit) checksum of every fastq.\n7. [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline\n\n![subway map](docs/demultiplex.png)\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\n```console\nnextflow run nf-core/demultiplex --input samplesheet.csv --outdir \u003cOUTDIR\u003e -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e\n```\n\n```bash\nnextflow run nf-core/demultiplex \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/demultiplex/usage) and the [parameter documentation](https://nf-co.re/demultiplex/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/demultiplex/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/demultiplex/output).\n\n## Credits\n\nThe nf-core/demultiplex pipeline was written by Chelsea Sawyer from The Bioinformatics \u0026 Biostatistics Group for use at The Francis Crick Institute, London.\n\nThe pipeline was re-written in Nextflow DSL2 and is primarily maintained by Matthias De Smet([@matthdsm](https://github.com/matthdsm)) from [Center For Medical Genetics Ghent, Ghent University](https://github.com/CenterForMedicalGeneticsGhent) and Edmund Miller([@edmundmiller](https://github.com/edmundmiller)) from [Element Biosciences](https://www.elementbiosciences.com/)\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [`@ChristopherBarrington`](https://github.com/ChristopherBarrington)\n- [`@drpatelh`](https://github.com/drpatelh)\n- [`@danielecook`](https://github.com/danielecook)\n- [`@escudem`](https://github.com/escudem)\n- [`@crickbabs`](https://github.com/crickbabs)\n- [`@nh13`](https://github.com/nh13)\n- [`@sam-white04`](https://github.com/sam-white04)\n- [`@maxulysse`](https://github.com/maxulysse)\n- [`@atrigila`](https://github.com/atrigila)\n- [`@nschcolnicov`](https://github.com/nschcolnicov)\n- [`@aratz`](https://github.com/aratz)\n- [`@grst`](https://github.com/grst)\n- [`@apeltzer`](https://github.com/apeltzer)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#demultiplex` channel](https://nfcore.slack.com/channels/demultiplex) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/demultiplex for your analysis, please cite it using the following doi: [10.5281/zenodo.7153103](https://doi.org/10.5281/zenodo.7153103)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/978?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/978?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/978?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/978?version=4","name":"1.3.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/978?version=5","name":"1.3.1","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/978?version=6","name":"1.3.2","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/978?version=7","name":"1.4.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/978?version=8","name":"1.4.1","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/978?version=9","name":"1.5.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/978?version=10","name":"1.5.1","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/978?version=11","name":"1.5.2","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/978?version=12","name":"1.5.3","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/978?version=13","name":"1.5.4","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/978?version=14","name":"1.6.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/978?version=15","name":"1.6.1","author":[],"descriptor_type":["NFL"]}]},{"id":"979","url":"https://workflowhub.eu/workflows/979","name":"nf-core/detaxizer","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-detaxizer_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/detaxizer\" src=\"docs/images/nf-core-detaxizer_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![Cite Preprint](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-important?labelColor=000000)](https://doi.org/10.1101/2025.03.27.645632)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10877147-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10877147)\n\n[![GitHub Actions CI Status](https://github.com/nf-core/detaxizer/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/detaxizer/actions/workflows/nf-test.yml)[![GitHub Actions Linting Status](https://github.com/nf-core/detaxizer/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/detaxizer/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/detaxizer/results)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/detaxizer)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23detaxizer-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/detaxizer)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/detaxizer** is a bioinformatics pipeline that checks for the presence of a specific taxon in (meta)genomic fastq files and to filter out this taxon or taxonomic subtree. The process begins with quality assessment via FastQC and optional preprocessing (adapter trimming, quality cutting and optional length and quality filtering) using fastp, followed by taxonomic classification with kraken2 and/or bbduk, and optionally employs blastn for validation of the reads associated with the identified taxa. Users must provide a samplesheet to indicate the fastq files and, if utilizing bbduk in the classification and/or the validation step, fasta files for usage of bbduk and creating the blastn database to verify the targeted taxon.\n\n![detaxizer metro workflow](docs/images/Detaxizer_metro_workflow.png)\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Optional pre-processing ([`fastp`](https://github.com/OpenGene/fastp))\n3. Classification of reads ([`Kraken2`](https://ccb.jhu.edu/software/kraken2/), and/or [`bbduk`](https://sourceforge.net/projects/bbmap/))\n4. Optional validation of searched taxon/taxa ([`blastn`](https://blast.ncbi.nlm.nih.gov/Blast.cgi))\n5. Filtering of the searched taxon/taxa from the reads (either from the raw files or the preprocessed reads, using either the output from the classification (kraken2 and/or bbduk) or blastn)\n6. Summary of the processes (how many were classified and optionally how many were validated)\n7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,short_reads_fastq_1,short_reads_fastq_2,long_reads_fastq_1\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,AEG588A1_S1_L002_R3_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end). A third fastq file can be provided if long reads are present in your project. For more detailed information about the samplesheet, see the [usage documentation](docs/usage.md).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/detaxizer \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --classification_bbduk \\\n   --classification_kraken2 \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/detaxizer/usage) and the [parameter documentation](https://nf-co.re/detaxizer/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/detaxizer/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/detaxizer/output).\n\nGenerated samplesheets from the directory `/downstream_samplesheets/` can be used for the pipelines:\n\n- [nf-core/mag](https://nf-co.re/mag)\n- [nf-core/taxprofiler](https://nf-co.re/taxprofiler)\n\n## Credits\n\nnf-core/detaxizer was originally written by [Jannik Seidel](https://github.com/jannikseidelQBiC) at the [Quantitative Biology Center (QBiC)](http://qbic.life/).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Daniel Straub](https://github.com/d4straub)\n\nThis work was initially funded by the German Center for Infection Research (DZIF).\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#detaxizer` channel](https://nfcore.slack.com/channels/detaxizer) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/detaxizer for your analysis, please cite it using the following preprint:\n\n\u003e **nf-core/detaxizer: A Benchmarking Study for Decontamination from Human Sequences**\n\u003e\n\u003e Jannik Seidel, Camill Kaipf, Daniel Straub, Sven Nahnsen\n\u003e\n\u003e bioRxiv 2025.03.27.645632 [doi: 10.1101/2025.03.27.645632](https://doi.org/10.1101/2025.03.27.645632).\n\nAdditionally, the following doi can be cited: [10.5281/zenodo.10877147](https://doi.org/10.5281/zenodo.10877147)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/979?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/979?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/979?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]}]},{"id":"980","url":"https://workflowhub.eu/workflows/980","name":"nf-core/diaproteomics","description":"Automated quantitative analysis of DIA proteomics mass spectrometry measurements.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/980?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/980?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/980?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/980?version=4","name":"1.2.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/980?version=5","name":"1.2.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/980?version=6","name":"1.2.3","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/980?version=7","name":"1.2.4","author":[],"descriptor_type":["NFL"]}]},{"id":"981","url":"https://workflowhub.eu/workflows/981","name":"nf-core/differentialabundance","description":"Differential abundance analysis","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/981?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/981?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/981?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/981?version=4","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/981?version=5","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/981?version=6","name":"1.3.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/981?version=7","name":"1.3.1","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/981?version=8","name":"1.4.0","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/981?version=9","name":"1.5.0","author":[],"descriptor_type":["NFL"]}]},{"id":"982","url":"https://workflowhub.eu/workflows/982","name":"nf-core/dualrnaseq","description":"Dual RNA-seq pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/982?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"983","url":"https://workflowhub.eu/workflows/983","name":"nf-core/eager","description":"A fully reproducible and state-of-the-art ancient DNA analysis pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/983?version=1","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/983?version=2","name":"2.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/983?version=3","name":"2.0.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/983?version=4","name":"2.0.3","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/983?version=5","name":"2.0.4","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/983?version=6","name":"2.0.5","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/983?version=7","name":"2.0.6","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/983?version=8","name":"2.0.7","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/983?version=9","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/983?version=10","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/983?version=11","name":"2.2.1","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/983?version=12","name":"2.2.2","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/983?version=13","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/983?version=14","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/983?version=15","name":"2.3.2","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/983?version=16","name":"2.3.3","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/983?version=17","name":"2.3.4","author":[],"descriptor_type":["NFL"]},{"id":"18","url":"https://workflowhub.eu/workflows/983?version=18","name":"2.3.5","author":[],"descriptor_type":["NFL"]},{"id":"19","url":"https://workflowhub.eu/workflows/983?version=19","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"20","url":"https://workflowhub.eu/workflows/983?version=20","name":"2.4.1","author":[],"descriptor_type":["NFL"]},{"id":"21","url":"https://workflowhub.eu/workflows/983?version=21","name":"2.4.2","author":[],"descriptor_type":["NFL"]},{"id":"22","url":"https://workflowhub.eu/workflows/983?version=22","name":"2.4.3","author":[],"descriptor_type":["NFL"]},{"id":"23","url":"https://workflowhub.eu/workflows/983?version=23","name":"2.4.4","author":[],"descriptor_type":["NFL"]},{"id":"24","url":"https://workflowhub.eu/workflows/983?version=24","name":"2.4.5","author":[],"descriptor_type":["NFL"]},{"id":"25","url":"https://workflowhub.eu/workflows/983?version=25","name":"2.4.6","author":[],"descriptor_type":["NFL"]},{"id":"26","url":"https://workflowhub.eu/workflows/983?version=26","name":"2.4.7","author":[],"descriptor_type":["NFL"]},{"id":"27","url":"https://workflowhub.eu/workflows/983?version=27","name":"2.5.0","author":[],"descriptor_type":["NFL"]},{"id":"28","url":"https://workflowhub.eu/workflows/983?version=28","name":"2.5.1","author":[],"descriptor_type":["NFL"]},{"id":"29","url":"https://workflowhub.eu/workflows/983?version=29","name":"2.5.2","author":[],"descriptor_type":["NFL"]},{"id":"30","url":"https://workflowhub.eu/workflows/983?version=30","name":"2.5.3","author":[],"descriptor_type":["NFL"]}]},{"id":"984","url":"https://workflowhub.eu/workflows/984","name":"nf-core/epitopeprediction","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-epitopeprediction_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/epitopeprediction\" src=\"docs/images/nf-core-epitopeprediction_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/epitopeprediction/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/epitopeprediction/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/epitopeprediction/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/epitopeprediction/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/epitopeprediction)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23epitopeprediction-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/epitopeprediction)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/epitopeprediction** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/epitopeprediction \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/epitopeprediction/usage) and the [parameter documentation](https://nf-co.re/epitopeprediction/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/epitopeprediction/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/epitopeprediction/output).\n\n## Credits\n\nnf-core/epitopeprediction was originally written by Christopher Mohr, Jonas Scheid.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#epitopeprediction` channel](https://nfcore.slack.com/channels/epitopeprediction) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/epitopeprediction for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/984?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/984?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/984?version=3","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/984?version=4","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/984?version=5","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/984?version=6","name":"2.2.1","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/984?version=7","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/984?version=8","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/984?version=9","name":"3.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"985","url":"https://workflowhub.eu/workflows/985","name":"nf-core/fastquorum","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-fastquorum_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/fastquorum\" src=\"docs/images/nf-core-fastquorum_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/fastquorum/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/fastquorum/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/fastquorum/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/fastquorum/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/fastquorum/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.11267672-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.11267672)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/fastquorum)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23fastquorum-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/fastquorum)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/fastquorum** is a bioinformatics pipeline that implements the pipeline implements the [fgbio Best Practices FASTQ to Consensus Pipeline][fgbio-best-practices-link] to produce consensus reads using unique molecular indexes/barcodes (UMIs).\n`nf-core/fastquorum` can produce consensus reads from single or multi UMI reads, and even [Duplex Sequencing][duplex-seq-link] reads.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/fastquorum/results).\n\n| Tools                                                                                                              | Description                                                                                                                   |\n| ------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------- |\n| \u003cp align=\"center\"\u003e\u003cimg title=\"Fastquorum Workflow (Tools)\" src=\"docs/images/fastquorum_subway.png\" width=100%\u003e\u003c/p\u003e | \u003cp align=\"center\"\u003e\u003cimg title=\"Fastquorum Workflow (Description)\" src=\"docs/images/fastquorum_subway.desc.png\" width=100%\u003e\u003c/p\u003e |\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Fastq to BAM, extracting UMIs ([`fgbio FastqToBam`](http://fulcrumgenomics.github.io/fgbio/tools/latest/FastqToBam.html))\n3. Align ([`bwa mem`](https://github.com/lh3/bwa)), reformat ([`fgbio ZipperBam`](http://fulcrumgenomics.github.io/fgbio/tools/latest/ZipperBam.html)), and template-coordinate sort ([`samtools sort`](http://www.htslib.org/doc/samtools.html))\n4. Group reads by UMI ([`fgbio GroupReadsByUmi`](http://fulcrumgenomics.github.io/fgbio/tools/latest/GroupReadsByUmi.html))\n5. Call consensus reads\n   1. For [Duplex-Sequencing][duplex-seq-link] data\n      1. Call duplex consensus reads ([`fgbio CallDuplexConsensusReads`](http://fulcrumgenomics.github.io/fgbio/tools/latest/CallDuplexConsensusReads.html))\n      2. Collect duplex sequencing specific metrics ([`fgbio CollectDuplexSeqMetrics`](http://fulcrumgenomics.github.io/fgbio/tools/latest/CollectDuplexSeqMetrics.html))\n   2. For non-Duplex-Sequencing data:\n      1. Call molecular consensus reads ([`fgbio CallMolecularConsensusReads`](http://fulcrumgenomics.github.io/fgbio/tools/latest/CallMolecularConsensusReads.html))\n6. Align ([`bwa mem`](https://github.com/lh3/bwa))\n7. Filter consensus reads ([`fgbio FilterConsensusReads`](http://fulcrumgenomics.github.io/fgbio/tools/latest/FilterConsensusReads.html))\n8. Present QC ([`MultiQC`](http://multiqc.info/))\n\n## Verified Vendors, Kits, and Assays\n\n\u003e [!WARNING]\n\u003e The following Vendors, Kits, and Assays are provided for informational purposes only.\n\u003e _No warranty for the accuracy or completeness of the information or parameters is implied._\n\n| Verified | Assay                                                     | Company                     | Strand | Randomness | UMI Location     | Read Structure  | URL                                                                                                                                                                                 |\n| -------- | --------------------------------------------------------- | --------------------------- | ------ | ---------- | ---------------- | --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| No       | SureSelect XT HS                                          | Agilent Technologies        | Single | Random     |                  |                 | [link](https://www.agilent.com/en/product/next-generation-sequencing/ngs-library-prep-target-enrichment-reagents/dna-seq-reagents/sureselectxt-hs-reagent-kits-4252208)             |\n| No       | SureSelect XT HS2 (MBC)                                   | Agilent Technologies        | Dual   | Random     |                  |                 | [link](https://www.agilent.com/en/product/next-generation-sequencing/ngs-library-prep-target-enrichment-reagents/dna-seq-reagents/sureselect-xt-hs2-dna-reagent-kit-4252207)        |\n| No       | TruSight Oncology (TSO)                                   | Illumina                    | Dual   | Nonrandom  |                  |                 | [link](https://www.illumina.com/products/by-type/clinical-research-products/trusight-oncology-umi.html)                                                                             |\n| No       | xGen dual index UMI Adapters                              | Integrated DNA Technologies | Single | Random     | index1 (i7)      |                 | [link](https://www.idtdna.com/pages/products/next-generation-sequencing/workflow/xgen-ngs-library-preparation/ngs-adapters-indexing-primers/adapters-indexing-primers-for-illumina) |\n| No       | xGen Prism (xGen cfDNA \u0026 FFPE DNA Library Prep MC v2 Kit) | Integrated DNA Technologies | Dual   | Nonrandom  |                  |                 | [link](https://www.idtdna.com/pages/products/next-generation-sequencing/workflow/xgen-ngs-library-preparation/dna-library-preparation/cfdna-ffpe-prep-kit)                          |\n| No       | NEBNext                                                   | New England Biosciences     | Single | Random     | index1 (i7)      |                 | [link](https://www.neb.com/en-us/products/e7874nebnext-multiplex-oligos-for-illumina-unique-dual-index-umi-adaptors-dna-set-2)                                                      |\n| No       | AML MRD                                                   | TwinStrand Biosciences      | Dual   | Random     |                  |                 | [link](https://twinstrandbio.com/aml-assay/)                                                                                                                                        |\n| No       | Mutagenesis                                               | TwinStrand Biosciences      | Dual   | Random     |                  |                 | [link](https://twinstrandbio.com/mutagenesis-assay/)                                                                                                                                |\n| No       | UMI Adapter System                                        | Twist Biosciences           | Dual   | Random     | Inline (R1 \u0026 R2) | `5M2S+T 5M2S+T` | [link](https://www.twistbioscience.com/products/ngs/library-preparation/twist-umi-adapter-system)                                                                                   |\n\nColumn Definitions:\n\n- Assay: the name of the assay or kit\n- Company: the name of the company or vendor providing the assay or kit\n- Strand: Dual if both strands of a double-stranded source molecule are sequences (e.g. Duplex Sequencing), Single otherwise\n- Randomness: if the unique molecular identifiers (UMIs) are fully random (degenerate) or are synthesized from a fixed set\n- UMI Location: the location of UMIs within the reads.\n- Read Structure: the [`read_structure`][read-structure-link] describes how the bases in a sequencing run should be allocated into logical reads, including the unique molecular index(es)\n- URL: link(s) to vendor documentation or further information\n\nTo become \"Verified\" by `nf-core/fastquorum`, please open an issue and provide the maintainers with an example dataset that can be shared publicly.\nThe dataset or a subset will be added to [nf-core/test-datasets](https://github.com/nf-core/test-datasets/tree/fastquorum).\nPlease reach out to maintainers if additional support is needed to prepare or select such data.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow.Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,read_structure\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,5M2S+T 5M2S+T\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\nThe `sample` column provides a unique identifier for the given sample, while the `read_structure` describes how the bases in a sequencing run should be allocated into logical reads, including the unique molecular index(es).\n(Please see the [fgbio documentation](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures) for detailed information on read structure syntax and formatting.)\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/fastquorum \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --genome GRCh38 \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nTwo modes of running this pipeline are supported:\n\n1. Research and Development (R\u0026D): use `--mode rd` or `params.mode=rd`. This mode is desirable to be able to branch off from the pipeline and test e.g. multiple consensus calling or filtering parameters\n2. High Throughput (HT): use `--mode ht` or `params.mode=ht`. This mode is intended for high throughput production environments where performance and throughput take precedence over flexibility\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/fastquorum/usage) and the [parameter documentation](https://nf-co.re/fastquorum/parameters).\n\nSee also:\n\n1. The [fgbio Best Practice FASTQ -\u003e Consensus Pipeline][fgbio-best-practices-link]\n2. [Read structures](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures) as required in the input sample sheet.\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/fastquorum/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/fastquorum/output).\n\n## Credits\n\nnf-core/fastquorum was originally written and is primarily maintained by Nils Homer.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Nils Homer](https://github.com/nh13)\n\n## Acknowledgements\n\nWe thank [Fulcrum Genomics](https://www.fulcrumgenomics.com/) for their extensive assistance in the development of this pipeline.\n\n\u003cp align=\"left\"\u003e\n\u003ca href=\"https://fulcrumgenomics.com\"\u003e\n\u003cimg width=\"500\" height=\"100\" src=\"docs/images/Fulcrum.svg\" alt=\"Fulcrum Genomics\"/\u003e\n\u003c/a\u003e\n\u003c/p\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#fastquorum` channel](https://nfcore.slack.com/channels/fastquorum) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/fastquorum for your analysis, please cite [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11267672.svg)](https://doi.org/10.5281/zenodo.11267672) for this pipeline and [![DOI](https://zenodo.org/badge/53011104.svg)](https://zenodo.org/doi/10.5281/zenodo.10456900) for [`fgbio`](https://github.com/fulcrumgenomics/fgbio).\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\n[fgbio-best-practices-link]: https://github.com/fulcrumgenomics/fgbio/blob/main/docs/best-practice-consensus-pipeline.md\n[duplex-seq-link]: https://en.wikipedia.org/wiki/Duplex_sequencing\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/985?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/985?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/985?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/985?version=4","name":"1.2.0","author":[],"descriptor_type":["NFL"]}]},{"id":"986","url":"https://workflowhub.eu/workflows/986","name":"nf-core/fetchngs","description":"Pipeline to fetch metadata and raw FastQ files from public databases","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/986?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/986?version=2","name":"1.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/986?version=3","name":"1.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/986?version=4","name":"1.3","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/986?version=5","name":"1.4","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/986?version=6","name":"1.5","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/986?version=7","name":"1.6","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/986?version=8","name":"1.7","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/986?version=9","name":"1.8","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/986?version=10","name":"1.9","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/986?version=11","name":"1.10.0","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/986?version=12","name":"1.10.1","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/986?version=13","name":"1.11.0","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/986?version=14","name":"1.12.0","author":[],"descriptor_type":["NFL"]}]},{"id":"987","url":"https://workflowhub.eu/workflows/987","name":"nf-core/funcscan","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-funcscan_logo_flat_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/funcscan\" src=\"nf-core-funcscan_logo_flat_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/funcscan)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23funcscan-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/funcscan)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?style=plastic\u0026label=HiRSE\u0026labelColor=005aa0\u0026link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/funcscan** is a bioinformatics best-practice analysis pipeline for the screening of nucleotide sequences such as assembled contigs for functional genes. It currently features mining for antimicrobial peptides, antibiotic resistance genes and biosynthetic gene clusters.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/funcscan/results).\n\nThe nf-core/funcscan AWS full test dataset are contigs generated by the MGnify service from the ENA. We used contigs generated from assemblies of chicken cecum shotgun metagenomes (study accession: MGYS00005631).\n\n## Pipeline summary\n\n1. Quality control of input sequences with [`SeqKit`](https://bioinf.shenwei.me/seqkit/)\n2. Taxonomic classification of contigs of **prokaryotic origin** with [`MMseqs2`](https://github.com/soedinglab/MMseqs2)\n3. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta)\n4. Annotation of coding sequences from 3. to obtain general protein families and domains with [`InterProScan`](https://github.com/ebi-pf-team/interproscan)\n5. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify)\n6. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms.\n7. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/)\n8. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs\n9. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/)\n\n![funcscan metro workflow](docs/images/funcscan_metro_workflow.png)\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta\nCONTROL_REP1,AEG588A1_001.fasta\nCONTROL_REP2,AEG588A1_002.fasta\nCONTROL_REP3,AEG588A1_003.fasta\n```\n\nEach row represents a (multi-)fasta file of assembled contig sequences.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/funcscan \\\n   -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e \\\n   --run_amp_screening \\\n   --run_arg_screening \\\n   --run_bgc_screening\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/funcscan/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/funcscan/output).\n\n## Credits\n\nnf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\nAdam Talbot, Alexandru Mizeranschi, Hugo Tavares, Júlia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#funcscan` channel](https://nfcore.slack.com/channels/funcscan) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/funcscan for your analysis, please cite it using the following doi: [10.5281/zenodo.7643099](https://doi.org/10.5281/zenodo.7643099)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/987?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/987?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/987?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/987?version=4","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/987?version=5","name":"1.1.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/987?version=6","name":"1.1.3","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/987?version=7","name":"1.1.4","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/987?version=8","name":"1.1.5","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/987?version=9","name":"1.1.6","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/987?version=10","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/987?version=11","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/987?version=12","name":"3.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"988","url":"https://workflowhub.eu/workflows/988","name":"nf-core/hgtseq","description":"A pipeline to investigate horizontal gene transfer from NGS data","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/988?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/988?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"989","url":"https://workflowhub.eu/workflows/989","name":"nf-core/hic","description":"Analysis of Chromosome Conformation Capture data (Hi-C)","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/989?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/989?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/989?version=3","name":"1.2.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/989?version=4","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/989?version=5","name":"1.2.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/989?version=6","name":"1.3.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/989?version=7","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/989?version=8","name":"2.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"990","url":"https://workflowhub.eu/workflows/990","name":"nf-core/hicar","description":"This pipeline analyses data for HiCAR data, a robust and sensitive multi-omic co-assay for simultaneous measurement of transcriptome, chromatin accessibility and cis-regulatory chromatin contacts.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/990?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"991","url":"https://workflowhub.eu/workflows/991","name":"nf-core/hlatyping","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-hlatyping_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/hlatyping\" src=\"docs/images/nf-core-hlatyping_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/hlatyping/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/hlatyping/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/hlatyping/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/hlatyping/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/hlatyping/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/hlatyping)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23hlatyping-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/hlatyping)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/hlatyping** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/hlatyping \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/hlatyping/usage) and the [parameter documentation](https://nf-co.re/hlatyping/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/hlatyping/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/hlatyping/output).\n\n## Credits\n\nnf-core/hlatyping was originally written by Christopher Mohr, Alexander Peltzer, Sven Fillinger.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#hlatyping` channel](https://nfcore.slack.com/channels/hlatyping) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/hlatyping for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/991?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/991?version=2","name":"1.0.0-rc1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/991?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/991?version=4","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/991?version=5","name":"1.1.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/991?version=6","name":"1.1.3","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/991?version=7","name":"1.1.4","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/991?version=8","name":"1.1.5","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/991?version=9","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/991?version=10","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/991?version=11","name":"2.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"992","url":"https://workflowhub.eu/workflows/992","name":"nf-core/imcyto","description":"Image Mass Cytometry analysis pipeline.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/992?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"993","url":"https://workflowhub.eu/workflows/993","name":"nf-core/isoseq","description":"Genes and transcripts annotation with Isoseq using uLTRA and TAMA","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/993?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/993?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/993?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/993?version=4","name":"1.1.2","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/993?version=5","name":"1.1.3","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/993?version=6","name":"1.1.4","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/993?version=7","name":"1.1.5","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/993?version=8","name":"2.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"994","url":"https://workflowhub.eu/workflows/994","name":"nf-core/kmermaid","description":"Compare DNA/RNA/protein sequences on k-mer content","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/994?version=1","name":"0.1.0-alpha","author":[],"descriptor_type":["NFL"]}]},{"id":"995","url":"https://workflowhub.eu/workflows/995","name":"nf-core/mag","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/mag_logo_mascot_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/mag\" src=\"docs/images/mag_logo_mascot_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3589527-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3589527)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n[![Cite Publication](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-orange)](https://doi.org/10.1093/nargab/lqac007)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.10.0-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/mag)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mag-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/mag)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/mag** is a bioinformatics best-practise analysis pipeline for assembly, binning and annotation of metagenomes.\n\n\u003cp align=\"center\"\u003e\n    \u003cimg src=\"docs/images/mag_workflow.png\" alt=\"nf-core/mag workflow overview\" width=\"90%\"\u003e\n\u003c/p\u003e\n\n## Pipeline summary\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nBy default, the pipeline currently performs the following: it supports both short and long reads, quality trims the reads and adapters with [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval](https://github.com/MikkelSchubert/adapterremoval), or [trimmomatic](https://github.com/usadellab/Trimmomatic) and [Porechop](https://github.com/rrwick/Porechop), and performs basic QC with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), and merges multiple sequencing runs.\n\nThe pipeline then:\n\n- assigns taxonomy to reads using [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) and/or [Kraken2](https://github.com/DerrickWood/kraken2/wiki)\n- performs assembly using [MEGAHIT](https://github.com/voutcn/megahit) and [SPAdes](http://cab.spbu.ru/software/spades/), and checks their quality using [Quast](http://quast.sourceforge.net/quast)\n- (optionally) performs ancient DNA assembly validation using [PyDamage](https://github.com/maxibor/pydamage) and contig consensus sequence recalling with [Freebayes](https://github.com/freebayes/freebayes) and [BCFtools](http://samtools.github.io/bcftools/bcftools.html)\n- predicts protein-coding genes for the assemblies using [Prodigal](https://github.com/hyattpd/Prodigal), and bins with [Prokka](https://github.com/tseemann/prokka) and optionally [MetaEuk](https://www.google.com/search?channel=fs\u0026client=ubuntu-sn\u0026q=MetaEuk)\n- performs metagenome binning using [MetaBAT2](https://bitbucket.org/berkeleylab/metabat/src/master/), [MaxBin2](https://sourceforge.net/projects/maxbin2/), and/or with [CONCOCT](https://github.com/BinPro/CONCOCT), and checks the quality of the genome bins using [Busco](https://busco.ezlab.org/), [CheckM](https://ecogenomics.github.io/CheckM/), or [CheckM2](https://github.com/chklovski/CheckM2) and optionally [GUNC](https://grp-bork.embl-community.io/gunc/).\n- Performs ancient DNA validation and repair with [pyDamage](https://github.com/maxibor/pydamage) and [freebayes](https://github.com/freebayes/freebayes)\n- optionally refines bins with [DAS Tool](https://github.com/cmks/DAS_Tool)\n- assigns taxonomy to bins using [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk) and/or [CAT](https://github.com/dutilh/CAT) and optionally identifies viruses in assemblies using [geNomad](https://github.com/apcamargo/genomad), or Eukaryotes with [Tiara](https://github.com/ibe-uw/tiara)\n\nFurthermore, the pipeline creates various reports in the results directory specified, including a [MultiQC](https://multiqc.info/) report summarizing some of the findings and software versions.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n```bash\nnextflow run nf-core/mag -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e --input '*_R{1,2}.fastq.gz' --outdir \u003cOUTDIR\u003e\n```\n\nor\n\n```bash\nnextflow run nf-core/mag -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e --input samplesheet.csv --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mag/usage) and the [parameter documentation](https://nf-co.re/mag/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/mag/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/mag/output).\n\n### Group-wise co-assembly and co-abundance computation\n\nEach sample has an associated group ID (see [input specifications](https://nf-co.re/mag/usage#input_specifications)). This group information can be used for group-wise co-assembly with `MEGAHIT` or `SPAdes` and/or to compute co-abundances for the binning step with `MetaBAT2`. By default, group-wise co-assembly is disabled, while the computation of group-wise co-abundances is enabled. For more information about how this group information can be used see the documentation for the parameters [`--coassemble_group`](https://nf-co.re/mag/parameters#coassemble_group) and [`--binning_map_mode`](https://nf-co.re/mag/parameters#binning_map_mode).\n\nWhen group-wise co-assembly is enabled, `SPAdes` is run on accordingly pooled read files, since `metaSPAdes` does not yet allow the input of multiple samples or libraries. In contrast, `MEGAHIT` is run for each group while supplying lists of the individual readfiles.\n\n## Credits\n\nnf-core/mag was written by [Hadrien Gourlé](https://hadriengourle.com) at [SLU](https://slu.se), [Daniel Straub](https://github.com/d4straub) and [Sabrina Krakau](https://github.com/skrakau) at the [Quantitative Biology Center (QBiC)](http://qbic.life). [James A. Fellows Yates](https://github.com/jfy133) and [Maxime Borry](https://github.com/maxibor) at the [Max Planck Institute for Evolutionary Anthropology](https://www.eva.mpg.de) joined in version 2.2.0.\n\nOther code contributors include:\n\n- [Antonia Schuster](https://github.com/AntoniaSchuster)\n- [Alexander Ramos](https://github.com/alxndrdiaz)\n- [Carson Miller](https://github.com/CarsonJM)\n- [Daniel Lundin](https://github.com/erikrikarddaniel)\n- [Danielle Callan](https://github.com/d-callan)\n- [Gregory Sprenger](https://github.com/gregorysprenger)\n- [Jim Downie](https://github.com/prototaxites)\n- [Phil Palmer](https://github.com/PhilPalmer)\n- [@willros](https://github.com/willros)\n- [Adam Rosenbaum](https://github.com/muabnezor)\n- [Diego Alvarez](https://github.com/dialvarezs)\n- [Nikolaos Vergoulidis](https://github.com/IceGreb)\n\nLong read processing was inspired by [caspargross/HybridAssembly](https://github.com/caspargross/HybridAssembly) written by Caspar Gross [@caspargross](https://github.com/caspargross)\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Alexander Peltzer](https://github.com/apeltzer)\n- [Phil Ewels](https://github.com/ewels)\n- [Gisela Gabernet](https://github.com/ggabernet)\n- [Harshil Patel](https://github.com/drpatelh)\n- [Johannes Alneberg](https://github.com/alneberg)\n- [Maxime Garcia](https://github.com/MaxUlysse)\n- [Michael L Heuer](https://github.com/heuermh)\n- [Alex Hübner](https://github.com/alexhbnr)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#mag` channel](https://nfcore.slack.com/channels/mag) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/mag for your analysis, please cite the preprint as follows:\n\n\u003e **nf-core/mag: a best-practice pipeline for metagenome hybrid assembly and binning**\n\u003e\n\u003e Sabrina Krakau, Daniel Straub, Hadrien Gourlé, Gisela Gabernet, Sven Nahnsen.\n\u003e\n\u003e NAR Genom Bioinform. 2022 Feb 2;4(1):lqac007. doi: [10.1093/nargab/lqac007](https://doi.org/10.1093/nargab/lqac007).\n\nAdditionally you can cite the pipeline directly with the following doi: [10.5281/zenodo.3589527](https://doi.org/10.5281/zenodo.3589527)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/995?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/995?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/995?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/995?version=4","name":"1.1.2","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/995?version=5","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/995?version=6","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/995?version=7","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/995?version=8","name":"2.1.1","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/995?version=9","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/995?version=10","name":"2.2.1","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/995?version=11","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/995?version=12","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/995?version=13","name":"2.3.2","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/995?version=14","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/995?version=15","name":"2.5.0","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/995?version=16","name":"2.5.1","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/995?version=17","name":"2.5.2","author":[],"descriptor_type":["NFL"]},{"id":"18","url":"https://workflowhub.eu/workflows/995?version=18","name":"2.5.3","author":[],"descriptor_type":["NFL"]},{"id":"19","url":"https://workflowhub.eu/workflows/995?version=19","name":"2.5.4","author":[],"descriptor_type":["NFL"]},{"id":"20","url":"https://workflowhub.eu/workflows/995?version=20","name":"3.0.0","author":[],"descriptor_type":["NFL"]},{"id":"21","url":"https://workflowhub.eu/workflows/995?version=21","name":"3.0.1","author":[],"descriptor_type":["NFL"]},{"id":"22","url":"https://workflowhub.eu/workflows/995?version=22","name":"3.0.2","author":[],"descriptor_type":["NFL"]},{"id":"23","url":"https://workflowhub.eu/workflows/995?version=23","name":"3.0.3","author":[],"descriptor_type":["NFL"]},{"id":"24","url":"https://workflowhub.eu/workflows/995?version=24","name":"3.1.0","author":[],"descriptor_type":["NFL"]},{"id":"25","url":"https://workflowhub.eu/workflows/995?version=25","name":"3.2.0","author":[],"descriptor_type":["NFL"]},{"id":"26","url":"https://workflowhub.eu/workflows/995?version=26","name":"3.2.1","author":[],"descriptor_type":["NFL"]},{"id":"27","url":"https://workflowhub.eu/workflows/995?version=27","name":"3.3.0","author":[],"descriptor_type":["NFL"]},{"id":"28","url":"https://workflowhub.eu/workflows/995?version=28","name":"3.3.1","author":[],"descriptor_type":["NFL"]},{"id":"29","url":"https://workflowhub.eu/workflows/995?version=29","name":"3.4.0","author":[],"descriptor_type":["NFL"]},{"id":"30","url":"https://workflowhub.eu/workflows/995?version=30","name":"4.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"996","url":"https://workflowhub.eu/workflows/996","name":"nf-core/marsseq","description":"MARS-seq v2 preprocessing pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/996?version=1","name":"v0.1","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/996?version=2","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/996?version=3","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/996?version=4","name":"1.0.2","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/996?version=5","name":"1.0.3","author":[],"descriptor_type":["NFL"]}]},{"id":"997","url":"https://workflowhub.eu/workflows/997","name":"nf-core/metatdenovo","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-metatdenovo_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/metatdenovo\" src=\"docs/images/nf-core-metatdenovo_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/metatdenovo/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/metatdenovo/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/metatdenovo/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/metatdenovo/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/metatdenovo/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10666590-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10666590)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/metatdenovo)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23metatdenovo-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/metatdenovo)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/metatdenovo** is a bioinformatics best-practice analysis pipeline for assembly and annotation of metatranscriptomic and metagenomic data from prokaryotes, eukaryotes or viruses.\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/metatdenovo/results).\n\n## Usage\n\n![nf-core/metatdenovo metro map](docs/images/metat-metromap.png)\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n3. Quality trimming and adapter removal for raw reads ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))\n4. Optional: Filter sequences with [`BBduk`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/)\n5. Optional: Normalize the sequencing depth with [`BBnorm`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbnorm-guide/)\n6. Merge trimmed, pair-end reads ([`Seqtk`](https://github.com/lh3/seqtk))\n7. Choice of de novo assembly programs:\n   1. [`RNAspades`](https://cab.spbu.ru/software/rnaspades/) suggested for both prokaryote and eukaryote assembly\n   2. [`Megahit`](https://github.com/voutcn/megahit) suggested for both prokaryote and eukaryote assembly; requires less resources\n8. Choice of orf caller:\n   1. [`TransDecoder`](https://github.com/TransDecoder/TransDecoder) suggested for eukaryotes; only ORFs\n   2. [`Prokka`](https://github.com/tseemann/prokka) suggested for prokaryotes; ORFs and other features plus functional annotation\n   3. [`Prodigal`](https://github.com/hyattpd/Prodigal) suggested for Prokaryotes; only ORFs\n9. Quantification of genes identified in assemblies:\n   1. Generate index of assembly ([`BBmap index`](https://sourceforge.net/projects/bbmap/))\n   2. Mapping cleaned reads to the assembly for quantification ([`BBmap`](https://sourceforge.net/projects/bbmap/))\n   3. Get raw counts per each gene present in the assembly ([`Featurecounts`](http://subread.sourceforge.net)) -\u003e TSV table with collected featurecounts output\n10. Functional annotation:\n    1. [`Prokka`](https://github.com/tseemann/prokka) feature identification and annotation for prokaryotes\n    2. [`eggNOG-mapper`](https://github.com/eggnogdb/eggnog-mapper)\n    3. [`KofamScan`](https://github.com/takaram/kofam_scan)\n    4. [`HMMER`](https://www.ebi.ac.uk/Tools/hmmer/search/hmmsearch) search ORFs with a set of HMM profiles, and rank results\n11. Taxonomic annotation:\n    1. [`EUKulele`](https://github.com/AlexanderLabWHOI/EUKulele)\n    2. [`Diamond`](https://github.com/bbuchfink/diamond)\n12. Summary statistics.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```\nsample,fastq_1,fastq_2\nsample1,./data/S1_R1_001.fastq.gz,./data/S1_R2_001.fastq.gz\nsample2,./data/S2_fw.fastq.gz,./data/S2_rv.fastq.gz\nsample3,./S4x.fastq.gz,./S4y.fastq.gz\nsample3,./a.fastq.gz,./b.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired-end).\nThe fastq files need to end with `.fq` or `.fastq`, followed by `.gz` if gzipped.\nRead files from multiple rows with the same sample name will be concatenated and treated as a single sample.\nA mix of single-end and paired-end files is allowed, but do not mix single-end and paired-end for the same sample name.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/metatdenovo \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/metatdenovo/usage) and the [parameter documentation](https://nf-co.re/metatdenovo/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/metatdenovo/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/metatdenovo/output).\n\n\u003e [!NOTE]\n\u003e Tables in the `summary_tables` directory under the output directory are made especially for further analysis in tools like R or Python.\n\u003e Their formats are standardized and column names consistent between tables.\n\n## Credits\n\nnf-core/metatdenovo was originally written by Danilo Di Leo (@danilodileo), Emelie Nilsson (@emnilsson) \u0026 Daniel Lundin (@erikrikarddaniel).\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#metatdenovo` channel](https://nfcore.slack.com/channels/metatdenovo) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/metatdenovo for your analysis, please cite it using the following doi: [10.5281/zenodo.10666590](https://doi.org/10.5281/zenodo.10666590)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/997?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/997?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/997?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/997?version=4","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/997?version=5","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/997?version=6","name":"1.3.0","author":[],"descriptor_type":["NFL"]}]},{"id":"998","url":"https://workflowhub.eu/workflows/998","name":"nf-core/methylseq","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-methylseq_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/methylseq\" src=\"docs/images/nf-core-methylseq_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/methylseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/methylseq/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/methylseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/methylseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/methylseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1343417-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1343417)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/methylseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23methylseq-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/methylseq)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/methylseq** is a bioinformatics analysis pipeline used for Methylation (Bisulfite) sequencing data. It pre-processes raw data from FastQ inputs, aligns the reads and performs extensive quality-control on the results.\n\n![nf-core/methylseq metro map](docs/images/4.0.0_metromap.png)\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker / Singularity / Podman / Charliecloud / Apptainer containers making installation trivial and results highly reproducible.\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/methylseq/results).\n\n\u003e Read more about **Bisulfite Sequencing \u0026 Three-Base Aligners** used in this pipeline [here](docs/usage/bs-seq-primer.md)\n\n## Pipeline Summary\n\nThe pipeline allows you to choose between running either [Bismark](https://github.com/FelixKrueger/Bismark) or [bwa-meth](https://github.com/brentp/bwa-meth) / [MethylDackel](https://github.com/dpryan79/methyldackel).\n\nChoose between workflows by using `--aligner bismark` (default, uses bowtie2 for alignment), `--aligner bismark_hisat` or `--aligner bwameth`. For higher performance, the pipeline can leverage the [Parabricks implementation of bwa-meth (fq2bammeth)](https://docs.nvidia.com/clara/parabricks/latest/documentation/tooldocs/man_fq2bam_meth.html), which implements the baseline tool `bwa-meth` in a performant method using fq2bam (BWA-MEM + GATK) as a backend for processing on GPU. To use this option, include the `gpu` profile along with `--aligner bwameth`.\n\nNote: For faster CPU runs with BWA-Meth, enable the BWA-MEM2 algorithm using `--use_mem2`. The GPU pathway (Parabricks) requires `-profile gpu` and a container runtime (Docker, Singularity, or Podman); Conda/Mamba are not supported for the GPU module.\n\n| Step                                         | Bismark workflow         | bwa-meth workflow     |\n| -------------------------------------------- | ------------------------ | --------------------- |\n| Generate Reference Genome Index _(optional)_ | Bismark                  | bwa-meth              |\n| Merge re-sequenced FastQ files               | cat                      | cat                   |\n| Raw data QC                                  | FastQC                   | FastQC                |\n| Adapter sequence trimming                    | Trim Galore!             | Trim Galore!          |\n| Align Reads                                  | Bismark (bowtie2/hisat2) | bwa-meth              |\n| Deduplicate Alignments                       | Bismark                  | Picard MarkDuplicates |\n| Extract methylation calls                    | Bismark                  | MethylDackel          |\n| Sample report                                | Bismark                  | -                     |\n| Summary Report                               | Bismark                  | -                     |\n| Alignment QC                                 | Qualimap _(optional)_    | Qualimap _(optional)_ |\n| Sample complexity                            | Preseq _(optional)_      | Preseq _(optional)_   |\n| Project Report                               | MultiQC                  | MultiQC               |\n\nOptional targeted sequencing analysis is available via `--run_targeted_sequencing` and `--target_regions_file`; see the [usage documentation](https://nf-co.re/methylseq/usage) for details.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,genome\nSRR389222_sub1,https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz,,\nSRR389222_sub2,https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz,,\nSRR389222_sub3,https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub3.fastq.gz,,\nEcoli_10K_methylated,https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R1.fastq.gz,https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R2.fastq.gz,\n```\n\n\u003e Each row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\nNow, you can run the pipeline using default parameters as:\n\n```bash\nnextflow run nf-core/methylseq --input samplesheet.csv --outdir \u003cOUTDIR\u003e --genome GRCh37 -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/methylseq/usage) and the [parameter documentation](https://nf-co.re/methylseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/methylseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/methylseq/output).\n\n## Credits\n\nnf-core/methylseq was originally written by Phil Ewels ([@ewels](https://github.com/ewels)), and Sateesh Peri ([@sateeshperi](https://github.com/sateeshperi)) is its active maintainer.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- Felix Krueger ([@FelixKrueger](https://github.com/FelixKrueger))\n- Edmund Miller ([@EMiller88](https://github.com/emiller88))\n- Rickard Hammarén ([@Hammarn](https://github.com/Hammarn/))\n- Alexander Peltzer ([@apeltzer](https://github.com/apeltzer/))\n- Patrick Hüther ([@phue](https://github.com/phue/))\n- Maxime U Garcia ([@maxulysse](https://github.com/maxulysse/))\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#methylseq` channel](https://nfcore.slack.com/channels/methylseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/methylseq for your analysis, please cite it using the following doi: [10.5281/zenodo.1343417](https://doi.org/10.5281/zenodo.1343417)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/998?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/998?version=2","name":"1.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/998?version=3","name":"1.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/998?version=4","name":"1.3","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/998?version=5","name":"1.4","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/998?version=6","name":"1.5","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/998?version=7","name":"1.6","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/998?version=8","name":"1.6.1","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/998?version=9","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/998?version=10","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/998?version=11","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/998?version=12","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/998?version=13","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/998?version=14","name":"2.5.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/998?version=15","name":"2.6.0","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/998?version=16","name":"2.7.0","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/998?version=17","name":"2.7.1","author":[],"descriptor_type":["NFL"]},{"id":"18","url":"https://workflowhub.eu/workflows/998?version=18","name":"3.0.0","author":[],"descriptor_type":["NFL"]},{"id":"19","url":"https://workflowhub.eu/workflows/998?version=19","name":"4.0.0","author":[],"descriptor_type":["NFL"]},{"id":"20","url":"https://workflowhub.eu/workflows/998?version=20","name":"4.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"999","url":"https://workflowhub.eu/workflows/999","name":"nf-core/mhcquant","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-mhcquant_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/mhcquant\" src=\"docs/images/nf-core-mhcquant_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/mhcquant/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mhcquant/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/mhcquant/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mhcquant/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/mhcquant/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.8427707-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.8427707)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/mhcquant)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mhcquant-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/mhcquant)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/mhcquant** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/mhcquant \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mhcquant/usage) and the [parameter documentation](https://nf-co.re/mhcquant/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/mhcquant/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/mhcquant/output).\n\n## Credits\n\nnf-core/mhcquant was originally written by Jonas Scheid, Steffen Lemke, Leon Bichmann, Marissa Dubbelaar.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#mhcquant` channel](https://nfcore.slack.com/channels/mhcquant) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/mhcquant for your analysis, please cite it using the following doi: [10.5281/zenodo.8427707](https://doi.org/10.5281/zenodo.8427707) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/999?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/999?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/999?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/999?version=4","name":"1.2.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/999?version=5","name":"1.2.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/999?version=6","name":"1.2.3","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/999?version=7","name":"1.2.4","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/999?version=8","name":"1.2.5","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/999?version=9","name":"1.2.6","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/999?version=10","name":"1.3","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/999?version=11","name":"1.4","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/999?version=12","name":"1.5","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/999?version=13","name":"1.5.1","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/999?version=14","name":"1.6.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/999?version=15","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/999?version=16","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/999?version=17","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"18","url":"https://workflowhub.eu/workflows/999?version=18","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"19","url":"https://workflowhub.eu/workflows/999?version=19","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"20","url":"https://workflowhub.eu/workflows/999?version=20","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"21","url":"https://workflowhub.eu/workflows/999?version=21","name":"2.4.1","author":[],"descriptor_type":["NFL"]},{"id":"22","url":"https://workflowhub.eu/workflows/999?version=22","name":"2.5.0","author":[],"descriptor_type":["NFL"]},{"id":"23","url":"https://workflowhub.eu/workflows/999?version=23","name":"2.6.0","author":[],"descriptor_type":["NFL"]},{"id":"24","url":"https://workflowhub.eu/workflows/999?version=24","name":"3.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1000","url":"https://workflowhub.eu/workflows/1000","name":"nf-core/mnaseseq","description":"MNase-seq analysis pipeline using BWA and DANPOS2.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1000?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1001","url":"https://workflowhub.eu/workflows/1001","name":"nf-core/molkart","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-molkart_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/molkart\" src=\"docs/images/nf-core-molkart_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/molkart/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/molkart/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/molkart/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/molkart/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/molkart/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10650748-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10650748)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/molkart)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23molkart-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/molkart)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/molkart** is a pipeline for processing Molecular Cartography data from Resolve Bioscience (combinatorial FISH). It takes as input a table of FISH spot positions (x,y,z,gene), a corresponding DAPI image (`TIFF` format) and optionally an additional staining image in the `TIFF` format. nf-core/molkart performs end-to-end processing of the data including image processing, QC filtering of spots, cell segmentation, spot-to-cell assignment and reports quality metrics such as the spot assignment rate, average spots per cell and segmentation mask size ranges.\n\n\u003cp align=\"center\"\u003e\n    \u003cimg title=\"Molkart Workflow\" src=\"docs/images/molkart_workflow.png\" width=100%\u003e\n\u003c/p\u003e\n\nImage preprocessing\n\n- Fill the grid pattern in provided images ([`Mindagap`](https://github.com/ViriatoII/MindaGap))\n- Optionally apply contrast-limited adaptive histogram equalization\n- If a second (membrane) image is present, combine images into a multichannel stack (if required for segmentation)\n\nCell segmentation\n\n- Apply cell segmentation based on provided images, available options are: - [`Cellpose`](https://www.cellpose.org/) - [`Mesmer`](https://deepcell.readthedocs.io/en/master/API/deepcell.applications.html#mesmer) - [`ilastik`](https://www.ilastik.org/) - [`Stardist`](https://github.com/stardist/stardist)\n- Filter cells based on cell size to remove artifacts\n\nSpot processing\n\n- Find duplicated spots near grid lines ([`Mindagap`](https://github.com/ViriatoII/MindaGap))\n- Assign spots to segmented cells\n\nQuality control\n\n- Create quality-control metrics specific to this pipeline\n- provide them to ([`MultiQC`](http://multiqc.info/)) to create a report\n\n## Usage\n\n:::note\nIf you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how\nto set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)\nwith `-profile test` before running the workflow on actual data.\n:::\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,nuclear_image,spot_locations,membrane_image\nsample0,sample0_DAPI.tiff,sample0_spots.txt,sample0_WGA.tiff\n```\n\nEach row represents an FOV (field-of-view). Columns represent the sample ID (all must be unique), the path to the respective nuclear image, the spot table, and optionally the path to the respective membrane image (or any additional image to improve segmentation).\n\nNow, you can run the pipeline using all default values with:\n\n```bash\nnextflow run nf-core/molkart \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/molkart/usage) and the [parameter documentation](https://nf-co.re/molkart/parameters).\n\n## Pipeline output\n\nThe pipeline outputs a matched cell-by-transcript table based on deduplicated spots and segmented cells, as well as preprocessing and segmentation intermediaries.\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/molkart/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/molkart/output).\n\n## Credits\n\nnf-core/molkart was originally written by @kbestak, @FloWuenne.\n\nWe thank [Maxime U Garcia](https://github.com/maxulysse) for his assistance and support in the development of this pipeline.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#molkart` channel](https://nfcore.slack.com/channels/molkart) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/molkart for your analysis, please cite it using the following doi: [10.5281/zenodo.10650749](https://doi.org/10.5281/zenodo.10650749)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1001?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1001?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1002","url":"https://workflowhub.eu/workflows/1002","name":"nf-core/nanoseq","description":"A pipeline to demultiplex, QC and map Nanopore data","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1002?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1002?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1002?version=3","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1002?version=4","name":"2.0.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1002?version=5","name":"3.0.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1002?version=6","name":"3.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1003","url":"https://workflowhub.eu/workflows/1003","name":"nf-core/nanostring","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-nanostring_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/nanostring\" src=\"docs/images/nf-core-nanostring_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/nanostring/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/nanostring/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/nanostring/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/nanostring/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/nanostring/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/nanostring)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23nanostring-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/nanostring)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/nanostring** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/nanostring \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/nanostring/usage) and the [parameter documentation](https://nf-co.re/nanostring/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/nanostring/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/nanostring/output).\n\n## Credits\n\nnf-core/nanostring was originally written by Peltzer, Alexander \u0026 Mohr, Christopher.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#nanostring` channel](https://nfcore.slack.com/channels/nanostring) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/nanostring for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1003?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1003?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1003?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1003?version=4","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1003?version=5","name":"1.2.1","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1003?version=6","name":"1.3.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1003?version=7","name":"1.3.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1004","url":"https://workflowhub.eu/workflows/1004","name":"nf-core/nascent","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-nascent_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/nascent\" src=\"docs/images/nf-core-nascent_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/nascent/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/nascent/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/nascent/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/nascent/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/nascent/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/nascent)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23nascent-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/nascent)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/nascent** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/nascent \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/nascent/usage) and the [parameter documentation](https://nf-co.re/nascent/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/nascent/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/nascent/output).\n\n## Credits\n\nnf-core/nascent was originally written by Edmund Miller, Ignacio Tripodi, Margaret Gruca.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#nascent` channel](https://nfcore.slack.com/channels/nascent) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/nascent for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1004?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1004?version=2","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1004?version=3","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1004?version=4","name":"2.1.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1004?version=5","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1004?version=6","name":"2.3.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1005","url":"https://workflowhub.eu/workflows/1005","name":"nf-core/neutronstar","description":"De novo assembly pipeline for 10X linked-reads, used at the SciLifeLab National Genomics Infrastructure.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1005?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1006","url":"https://workflowhub.eu/workflows/1006","name":"nf-core/oncoanalyser","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-oncoanalyser_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/oncoanalyser\" src=\"docs/images/nf-core-oncoanalyser_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/oncoanalyser/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.15189386-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.15189386)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/oncoanalyser)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23oncoanalyser-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/oncoanalyser)\n[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer DNA and RNA sequencing data\nusing the [WiGiTS](https://github.com/hartwigmedical/hmftools) toolkit from the Hartwig Medical Foundation. The pipeline\nsupports a wide range of experimental setups:\n\n- FASTQ, BAM, and / or CRAM input files\n- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing\u003csup\u003e1\u003c/sup\u003e\n- Paired tumor / normal and tumor-only samples, and support for donor samples for further normal subtraction\n- Purity estimate for longitudinal samples using genomic features of the primary sample from the same patient\u003csup\u003e2\u003c/sup\u003e\n- UMI (unique molecular identifier) processing supported for DNA sequencing data\n- Most GRCh37 and GRCh38 reference genome builds\n\n\u003csub\u003e\u003csup\u003e1\u003c/sup\u003e built-in support for the [TSO500\npanel](https://www.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other\npanels and exomes requiring [creation of custom panel reference\ndata](https://nf-co.re/oncoanalyser/usage#custom-panels)\u003c/sub\u003e\n\u003cbr /\u003e\n\u003csub\u003e\u003csup\u003e2\u003c/sup\u003e for example a primary WGS tissue biospy and longitudinal low-pass WGS ccfDNA sample taken from the\nsame patient\u003c/sub\u003e\n\n## Pipeline overview\n\n\u003cp align=\"center\"\u003e\u003cimg src=\"docs/images/oncoanalyser_pipeline.png\"\u003e\u003c/p\u003e\n\nThe pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some other external\ntools. There are [several workflows available](https://nf-co.re/oncoanalyser/usage#introduction) in `oncoanalyser` and\nthe tool information below primarily relates to the `wgts` and `targeted` analysis modes.\n\n\u003e [!NOTE]\n\u003e Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode.\n\n- Read alignment: [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) (DNA), [STAR](https://github.com/alexdobin/STAR) (RNA)\n- Read post-processing: [REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) (DNA), [Picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard) (RNA)\n- SNV, MNV, INDEL calling: [SAGE](https://github.com/hartwigmedical/hmftools/tree/master/sage), [PAVE](https://github.com/hartwigmedical/hmftools/tree/master/pave)\n- SV calling: [ESVEE](https://github.com/hartwigmedical/hmftools/tree/master/esvee)\n- CNV calling: [AMBER](https://github.com/hartwigmedical/hmftools/tree/master/amber), [COBALT](https://github.com/hartwigmedical/hmftools/tree/master/cobalt), [PURPLE](https://github.com/hartwigmedical/hmftools/tree/master/purple)\n- SV and driver event interpretation: [LINX](https://github.com/hartwigmedical/hmftools/tree/master/linx)\n- RNA transcript analysis: [ISOFOX](https://github.com/hartwigmedical/hmftools/tree/master/isofox)\n- Oncoviral detection: [VIRUSbreakend](https://github.com/PapenfussLab/gridss)\\*, [VirusInterpreter](https://github.com/hartwigmedical/hmftools/tree/master/virus-interpreter)\\*\n- Telomere characterisation: [TEAL](https://github.com/hartwigmedical/hmftools/tree/master/teal)\\*\n- Immune analysis: [LILAC](https://github.com/hartwigmedical/hmftools/tree/master/lilac), [CIDER](https://github.com/hartwigmedical/hmftools/tree/master/cider), [NEO](https://github.com/hartwigmedical/hmftools/tree/master/neo)\\*\n- Mutational signature fitting: [SIGS](https://github.com/hartwigmedical/hmftools/tree/master/sigs)\\*\n- HRD prediction: [CHORD](https://github.com/hartwigmedical/hmftools/tree/master/chord)\\*\n- Tissue of origin prediction: [CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa)\\*\n- Pharmacogenomics: [PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach)\n- Summary report: [ORANGE](https://github.com/hartwigmedical/hmftools/tree/master/orange), [linxreport](https://github.com/umccr/linxreport)\n\nFor the `purity_estimate` mode, several of the above tools are run with adjusted configuration in addition to the following.\n\n- Tumor fraction estimation: [WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp)\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCreate a samplesheet with your inputs (WGS/WTS BAMs in this example):\n\n```csv\ngroup_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath\nPATIENT1_WGTS,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T-RNA,tumor,rna,bam,/path/to/PATIENT1-T.rna.bam\n```\n\nLaunch `oncoanalyser`:\n\n```bash\nnextflow run nf-core/oncoanalyser \\\n  -profile \u003cdocker/singularity/.../institute\u003e \\\n  -revision 2.2.0 \\\n  --mode \u003cwgts/targeted\u003e \\\n  --genome \u003cGRCh37_hmf/GRCh38_hmf\u003e \\\n  --input samplesheet.csv \\\n  --outdir output/\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/oncoanalyser/usage) and the [parameter documentation](https://nf-co.re/oncoanalyser/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/oncoanalyser/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/oncoanalyser/output).\n\n## Version information\n\n### Extended support\n\nAs `oncoanalyser` is used in clinical settings and subject to accreditation standards in some instances, there is a need\nfor long-term stability and reliability for feature releases in order to meet operational requirements. This is\naccomplished through long-term support of several nominated feature releases, which all receive bug fixes and security\nfixes during the period of extended support.\n\nEach release that is given extended support is allocated a separate long-lived git branch with the 'stable' prefix, e.g.\n`stable/1.2.x`, `stable/1.5.x`. Feature development otherwise occurs on the `dev` branch with stable releases pushed to\n`master`.\n\nVersions nominated to have current long-term support:\n\n- TBD\n\n## Known issues\n\nPlease refer to [this page](https://github.com/nf-core/oncoanalyser/issues/177) for details regarding any known issues.\n\n## Credits\n\nThe `oncoanalyser` pipeline was written and is maintained by Stephen Watts ([@scwatts](https://github.com/scwatts)) from\nthe [Genomics Platform\nGroup](https://mdhs.unimelb.edu.au/centre-for-cancer-research/our-research/genomics-platform-group) at the [University\nof Melbourne Centre for Cancer Research](https://mdhs.unimelb.edu.au/centre-for-cancer-research).\n\nWe thank the following organisations and people for their extensive assistance in the development of this pipeline,\nlisted in alphabetical order:\n\n- [Hartwig Medical Foundation\n  Australia](https://www.hartwigmedicalfoundation.nl/en/partnerships/hartwig-medical-foundation-australia/)\n- Oliver Hofmann\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#oncoanalyser`\nchannel](https://nfcore.slack.com/channels/oncoanalyser) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nYou can cite the `oncoanalyser` Zenodo record for a specific version using the following DOI:\n[10.5281/zenodo.15189386](https://doi.org/10.5281/zenodo.15189386)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md)\nfile.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia,\n\u003e Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1006?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1006?version=2","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1006?version=3","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1006?version=4","name":"2.2.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1007","url":"https://workflowhub.eu/workflows/1007","name":"nf-core/pangenome","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-pangenome_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/pangenome\" src=\"docs/images/nf-core-pangenome_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/pangenome/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/pangenome/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/pangenome/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/pangenome/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/pangenome/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.8202636-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.8202636)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/pangenome)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23pangenome-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/pangenome)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/pangenome** is a bioinformatics best-practice analysis pipeline for pangenome graph construction. The pipeline renders a collection of sequences into a pangenome graph. Its goal is to build a graph that is locally directed and acyclic while preserving large-scale variation. Maintaining local linearity is important for interpretation, visualization, mapping, comparative genomics, and reuse of pangenome graphs.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/pangenome/results).\n\n\u003cp align=\"center\"\u003e\n    \u003cimg title=\"Pangenome Workflow\" src=\"docs/images/pangenome_workflow.png\" width=100%\u003e\n\u003c/p\u003e\n\n## Pipeline summary\n\n- All versus all alignment (`WFMASH`)\n- Graph induction (`SEQWISH`)\n- Graph normalization (`SMOOTHXG`)\n- Remove redundancy (`GFAFFIX`)\n- Graph statistics and qualitative visualizations (`ODGI`)\n- Combine diagnostic information into a report (`MULTIQC`)\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/pangenome -r dev --input \u003cBGZIPPED_FASTA\u003e --n_haplotypes \u003cNUM_HAPS_IN_FASTA\u003e --outdir \u003cOUTDIR\u003e -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/pangenome/usage) and the [parameter documentation](https://nf-co.re/pangenome/parameters).\n\n## Advantages over [`PGGB`](https://github.com/pangenome/pggb)\n\nThis Nextflow pipeline version's major advantage is that it can distribute the usually computationally heavy all versus all alignment step across a whole cluster. It is capable of splitting the initial approximate alignments into problems of equal size. The base-level alignments are then distributed across several processes. Assuming you have a cluster with 10 nodes and you are the only one using it, we would recommend to set `--wfmash_chunks 10`.\nIf you have a cluster with 20 nodes, but you have to share it with others, maybe setting it to `--wfmash_chunks 10` could be a good fit, because then you don't have to wait too long for your jobs to finish.\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/pangenome/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/pangenome/output).\n\n## Credits\n\nnf-core/pangenome was originally adapted from [PGGB](https://github.com/pangenome/pggb) by [Simon Heumos](https://github.com/subwaystation), [Michael Heuer](https://github.com/heuermh).\n\n\u003e [Simon Heumos](https://github.com/subwaystation) is currently the sole developer.\n\nMany thanks to all who have helped out and contributed along the way, including (but not limited to)\\*:\n\n| Name                                                       | Affiliation                                                                                                                                                                                                                                                                                                                                                                       |\n| ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| [Philipp Ehmele](https://github.com/imipenem)              | [Institute of Computational Biology, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/icb/index.html)                                                                                                                                                                                                                                                |\n| [Gisela Gabernet](https://github.com/ggabernet)            | [Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany](https://uni-tuebingen.de/en/research/research-infrastructure/quantitative-biology-center-qbic/) \u003cbr\u003e [Department of Pathology, Yale School of Medicine, New Haven, USA](https://medicine.yale.edu/pathology/)                                                                                      |\n| [Erik Garrison](https://github.com/ekg)                    | [University of Tennessee Health Science Center, Memphis, Tennessee, TN, USA](https://uthsc.edu/)                                                                                                                                                                                                                                                                                  |\n| [Andrea Guarracino](https://github.com/AndreaGuarracino)   | [University of Tennessee Health Science Center, Memphis, Tennessee, TN, USA](https://uthsc.edu/)                                                                                                                                                                                                                                                                                  |\n| [Friederike Hanssen](https://github.com/FriederikeHanssen) | [Seqera](https://seqera/io)                                                                                                                                                                                                                                                                                                                                                       |\n| [Peter Heringer](https://github.com/heringerp)             | [Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany](https://uni-tuebingen.de/en/research/research-infrastructure/quantitative-biology-center-qbic/) \u003cbr\u003e [Biomedical Data Science, Department of Computer Science, University of Tübingen, Germany](https://uni-tuebingen.de/en/faculties/faculty-of-science/departments/computer-science/department/) |\n| [Michael Heuer](https://github.com/heuermh)                | [Mammoth Biosciences, Inc., San Francisco, CA, USA](https://mammoth.bio)                                                                                                                                                                                                                                                                                                          |\n| [Lukas Heumos](https://github.com/zethson)                 | [Institute of Computational Biology, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/icb/index.html) \u003cbr\u003e [Institute of Lung Biology and Disease and Comprehensive Pneumology Center, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/ilbd/the-institute/cpc/index.html)                                              |\n| [Simon Heumos](https://github.com/subwaystation)           | [Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany](https://uni-tuebingen.de/en/research/research-infrastructure/quantitative-biology-center-qbic/) \u003cbr\u003e [Biomedical Data Science, Department of Computer Science, University of Tübingen, Germany](https://uni-tuebingen.de/en/faculties/faculty-of-science/departments/computer-science/department/) |\n| [Susanne Jodoin](https://github.com/SusiJo)                | [Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany](https://uni-tuebingen.de/en/research/research-infrastructure/quantitative-biology-center-qbic/)                                                                                                                                                                                                    |\n| [Júlia Mir Pedrol](https://github.com/mirpedrol)           | [Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany](https://uni-tuebingen.de/en/research/research-infrastructure/quantitative-biology-center-qbic/)                                                                                                                                                                                                    |\n\n\u003e \\* Listed in alphabetical order\n\n## Acknowledgments\n\n- [QBiC](https://www.qbic.uni-tuebingen.de)\n- [deNBI](https://www.denbi.de/)\n- [Human Pangenome Reference Consortium](https://humanpangenome.org)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#pangenome` channel](https://nfcore.slack.com/channels/pangenome) (you can join with [this invite](https://nf-co.re/join/slack)), or contact me [Simon Heumos](mailto:simon.heumos@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/pangenome).\n\n## Citations\n\nIf you use nf-core/pangenome for your analysis, please cite it using the following doi: [10.5281/zenodo.8202636](https://doi.org/10.5281/zenodo.8202636)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\n## Changelog\n\n[CHANGELOG](CHANGELOG.md)\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1007?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1007?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1007?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1007?version=4","name":"1.1.2","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1007?version=5","name":"1.1.3","author":[],"descriptor_type":["NFL"]}]},{"id":"1008","url":"https://workflowhub.eu/workflows/1008","name":"nf-core/pgdb","description":"Proteogenomics database creation workflow using pypgatk framework. ","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1008?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1009","url":"https://workflowhub.eu/workflows/1009","name":"nf-core/phyloplace","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-phyloplace_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/phyloplace\" src=\"docs/images/nf-core-phyloplace_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/phyloplace/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/phyloplace/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/phyloplace/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/phyloplace/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/phyloplace/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/phyloplace)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23phyloplace-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/phyloplace)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/phyloplace** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/phyloplace \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/phyloplace/usage) and the [parameter documentation](https://nf-co.re/phyloplace/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/phyloplace/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/phyloplace/output).\n\n## Credits\n\nnf-core/phyloplace was originally written by Daniel Lundin.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#phyloplace` channel](https://nfcore.slack.com/channels/phyloplace) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/phyloplace for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1009?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1009?version=2","name":"2.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1010","url":"https://workflowhub.eu/workflows/1010","name":"nf-core/pixelator","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-pixelator_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/pixelator\" src=\"docs/images/nf-core-pixelator_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/pixelator/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/pixelator/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/pixelator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/pixelator/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/pixelator/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/pixelator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23pixelator-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/pixelator)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/pixelator** is a bioinformatics best-practice analysis pipeline for analysis of data from the\nMolecular Pixelation (MPX) and Proximity Network (PNA) assays. It takes a samplesheet as input and will process your data\nusing `pixelator` to produce a PXL file containing single-cell protein abundance and protein interactomics data.\n\n![](./docs/images/nf-core-pixelator-metromap.svg)\n\nDepending on the input data the pipeline will run different steps.\n\nFor PNA data, the pipeline will run the following steps:\n\n1. Do quality control checks of input reads and build amplicons ([`pixelator single-cell-pna amplicon`](https://github.com/PixelgenTechnologies/pixelator))\n2. Create groups of amplicons based on their marker assignments ([`pixelator single-cell-pna demux`](https://github.com/PixelgenTechnologies/pixelator))\n3. Derive original molecules to use as edge list downstream by error correcting, and counting input amplicons ([`pixelator single-cell-pna collapse`](https://github.com/PixelgenTechnologies/pixelator))\n4. Compute the components of the graph from the edge list in order to create putative cells ([`pixelator single-cell-pna graph`](https://github.com/PixelgenTechnologies/pixelator))\n5. Analyze the spatial information in the cell graphs ([`pixelator single-cell-pna analysis`](https://github.com/PixelgenTechnologies/pixelator))\n6. Generate 3D graph layouts for visualization of cells ([`pixelator single-cell-pna layout`](https://github.com/PixelgenTechnologies/pixelator))\n7. Report generation ([`pixelator single-cell-pna report`](https://github.com/PixelgenTechnologies/pixelator))\n\nFor MPX data, the pipeline will run the following steps:\n\n1. Build an amplicons from the input reads ([`pixelator single-cell-mpx amplicon`](https://github.com/PixelgenTechnologies/pixelator))\n2. Read QC and filtering, correctness of the pixel binding sequence sequences ([`pixelator single-cell-mpx preqc | pixelator adapterqc`](https://github.com/PixelgenTechnologies/pixelator))\n3. Assign a marker (barcode) to each read ([`pixelator single-cell-mpx demux`](https://github.com/PixelgenTechnologies/pixelator))\n4. Error correction, duplicate removal, compute read counts ([`pixelator single-cell-mpx collapse`](https://github.com/PixelgenTechnologies/pixelator))\n5. Compute the components of the graph from the edge list in order to create putative cells ([`pixelator single-cell-mpx graph`](https://github.com/PixelgenTechnologies/pixelator))\n6. Call and annotate cells ([`pixelator single-cell-mpx annotate`](https://github.com/PixelgenTechnologies/pixelator))\n7. Analyze the cells for polarization and colocalization ([`pixelator single-cell-mpx analysis`](https://github.com/PixelgenTechnologies/pixelator))\n8. Generate 3D graph layouts for visualization of cells ([`pixelator single-cell-mpx layout`](https://github.com/PixelgenTechnologies/pixelator))\n9. Report generation ([`pixelator single-cell-mpx report`](https://github.com/PixelgenTechnologies/pixelator))\n\n\u003e [!WARNING]\n\u003e Since Nextflow 23.07.0-edge, Nextflow no longer mounts the host's home directory when using Apptainer or Singularity.\n\u003e This causes issues in some dependencies. As a workaround, you can revert to the old behavior by setting the environment variable\n\u003e `NXF_APPTAINER_HOME_MOUNT` or `NXF_SINGULARITY_HOME_MOUNT` to `true` in the machine from which you launch the pipeline.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows (the exact values you need to input depend on the design and panel you are using):\n\n`samplesheet.csv`:\n\n```csv\nsample,design,panel,fastq_1,fastq_2\nsample1,pna-2,proxiome-immuno-155,sample1_R1_001.fastq.gz,sample1_R2_001.fastq.gz\n```\n\nEach row represents a sample and gives the design, a panel file and the input fastq files.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/pixelator \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e This version of the pipeline does not support conda environments, due to issues with upstream dependencies.\n\u003e This means you cannot use the `conda` and `mamba` profiles. Please use `docker` or `singularity` instead.\n\u003e We hope to add support for conda environments in the future.\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/pixelator/usage) and the [parameter documentation](https://nf-co.re/pixelator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/pixelator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/pixelator/output).\n\n## Credits\n\nnf-core/pixelator was originally written for [Pixelgen Technologies AB](https://www.pixelgen.com/) by:\n\n- Florian De Temmerman\n- Johan Dahlberg\n- Alvaro Martinez Barrio\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#pixelator` channel](https://nfcore.slack.com/channels/pixelator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/pixelator for your analysis, please cite it using the following doi: [10.5281/zenodo.10015112](https://doi.org/10.5281/zenodo.10015112)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\nYou can cite the molecular pixelation technology as follows:\n\n\u003e **Molecular pixelation: spatial proteomics of single cells by sequencing.**\n\u003e\n\u003e Filip Karlsson, Tomasz Kallas, Divya Thiagarajan, Max Karlsson, Maud Schweitzer, Jose Fernandez Navarro, Louise Leijonancker, Sylvain Geny, Erik Pettersson, Jan Rhomberg-Kauert, Ludvig Larsson, Hanna van Ooijen, Stefan Petkov, Marcela González-Granillo, Jessica Bunz, Johan Dahlberg, Michele Simonetti, Prajakta Sathe, Petter Brodin, Alvaro Martinez Barrio \u0026 Simon Fredriksson\n\u003e\n\u003e _Nat Methods._ 2024 May 08. doi: [10.1038/s41592-024-02268-9](https://doi.org/10.1038/s41592-024-02268-9)\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1010?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1010?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1010?version=3","name":"1.0.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1010?version=4","name":"1.0.3","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1010?version=5","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1010?version=6","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1010?version=7","name":"1.3.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1010?version=8","name":"1.3.1","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1010?version=9","name":"1.4.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1010?version=10","name":"2.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1011","url":"https://workflowhub.eu/workflows/1011","name":"nf-core/proteinfold","description":"Protein 3D structure prediction pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1011?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1011?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1011?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1012","url":"https://workflowhub.eu/workflows/1012","name":"nf-core/proteomicslfq","description":"Proteomics label-free quantification (LFQ) analysis pipeline using OpenMS and MSstats, with feature quantification, feature summarization, quality control and group-based statistical analysis.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1012?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1013","url":"https://workflowhub.eu/workflows/1013","name":"nf-core/quantms","description":"Quantitative Mass Spectrometry nf-core workflow","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1013?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1013?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1013?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1013?version=4","name":"1.2.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1014","url":"https://workflowhub.eu/workflows/1014","name":"nf-core/raredisease","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-raredisease_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/raredisease\" src=\"docs/images/nf-core-raredisease_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/raredisease/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/ci.yml)\n\n[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/raredisease)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23raredisease-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/raredisease)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n#### TOC\n\n- [Introduction](#introduction)\n- [Pipeline summary](#pipeline-summary)\n- [Usage](#usage)\n- [Pipeline output](#pipeline-output)\n- [Credits](#credits)\n- [Contributions and Support](#contributions-and-support)\n- [Citations](#citations)\n\n## Introduction\n\n**nf-core/raredisease** is a best-practice bioinformatic pipeline for calling and scoring variants from WGS/WES data from rare disease patients. This pipeline is heavily inspired by [MIP](https://github.com/Clinical-Genomics/MIP).\n\n\u003e [!NOTE]\n\u003e Right now, we only support paired-end data from Illumina. If you've got other types of data and the pipeline doesn't work for you, just open an issue. We'd be happy to chat about a solution.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/raredisease/results).\n\n## Pipeline summary\n\n  \u003cpicture align=\"center\"\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/raredisease_metromap_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/raredisease workflow\" src=\"docs/images/raredisease_metromap_light.png\"\u003e\n  \u003c/picture\u003e\n\n**1. Metrics:**\n\n- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\n- [Mosdepth](https://github.com/brentp/mosdepth)\n- [MultiQC](http://multiqc.info/)\n- [Picard's CollectMutipleMetrics, CollectHsMetrics, and CollectWgsMetrics](https://broadinstitute.github.io/picard/)\n- [Qualimap](http://qualimap.conesalab.org/)\n- [Sentieon's WgsMetricsAlgo](https://support.sentieon.com/manual/usages/general/)\n- [TIDDIT's cov](https://github.com/J35P312/)\n- [VerifyBamID2](https://github.com/Griffan/VerifyBamID)\n\n**2. Alignment:**\n\n- [Bwa-mem2](https://github.com/bwa-mem2/bwa-mem2)\n- [BWA-MEME](https://github.com/kaist-ina/BWA-MEME)\n- [BWA](https://github.com/lh3/bwa)\n- [Sentieon DNAseq](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/)\n\n**3. Variant calling - SNV:**\n\n- [DeepVariant](https://github.com/google/deepvariant)\n- [Sentieon DNAscope](https://support.sentieon.com/manual/DNAscope_usage/dnascope/)\n\n**4. Variant calling - SV:**\n\n- [Manta](https://github.com/Illumina/manta)\n- [TIDDIT's sv](https://github.com/SciLifeLab/TIDDIT)\n- Copy number variant calling:\n  - [CNVnator](https://github.com/abyzovlab/CNVnator)\n  - [GATK GermlineCNVCaller](https://github.com/broadinstitute/gatk)\n\n**5. Annotation - SNV:**\n\n- [bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh)\n- [vcfanno](https://github.com/brentp/vcfanno)\n- [CADD](https://cadd.gs.washington.edu/)\n- [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n- [UPD](https://github.com/bjhall/upd)\n- [Chromograph](https://github.com/Clinical-Genomics/chromograph)\n\n**6. Annotation - SV:**\n\n- [SVDB query](https://github.com/J35P312/SVDB#Query)\n- [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n\n**7. Mitochondrial analysis:**\n\n- [Alignment and variant calling - GATK Mitochondrial short variant discovery pipeline ](https://gatk.broadinstitute.org/hc/en-us/articles/4403870837275-Mitochondrial-short-variant-discovery-SNVs-Indels-)\n- [eKLIPse](https://github.com/dooguypapua/eKLIPse/tree/master)\n- Annotation:\n  - [HaploGrep2](https://github.com/seppinho/haplogrep-cmd)\n  - [Hmtnote](https://github.com/robertopreste/HmtNote)\n  - [vcfanno](https://github.com/brentp/vcfanno)\n  - [CADD](https://cadd.gs.washington.edu/)\n  - [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n\n**8. Variant calling - repeat expansions:**\n\n- [Expansion Hunter](https://github.com/Illumina/ExpansionHunter)\n- [Stranger](https://github.com/Clinical-Genomics/stranger)\n\n**9. Variant calling - mobile elements:**\n\n- [RetroSeq](https://github.com/tk2/RetroSeq)\n\n**10. Rank variants - SV and SNV:**\n\n- [GENMOD](https://github.com/Clinical-Genomics/genmod)\n\n**11. Variant evaluation:**\n\n- [RTG Tools](https://github.com/RealTimeGenomics/rtg-tools)\n\nNote that it is possible to include/exclude certain tools or steps.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,lane,fastq_1,fastq_2,sex,phenotype,paternal_id,maternal_id,case_id\nhugelymodelbat,1,reads_1.fastq.gz,reads_2.fastq.gz,1,2,,,justhusky\n```\n\nEach row represents a pair of fastq files (paired end).\n\nSecond, ensure that you have defined the path to reference files and parameters required for the type of analysis that you want to perform. More information about this can be found [here](https://github.com/nf-core/raredisease/blob/dev/docs/usage.md).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/raredisease \\\n   -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/raredisease/usage) and the [parameter documentation](https://nf-co.re/raredisease/parameters).\n\n## Pipeline output\n\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/raredisease/output).\n\n## Credits\n\nnf-core/raredisease was written in a collaboration between the Clinical Genomics nodes in Sweden, with major contributions from [Ramprasad Neethiraj](https://github.com/ramprasadn), [Anders Jemt](https://github.com/jemten), [Lucia Pena Perez](https://github.com/Lucpen), and [Mei Wu](https://github.com/projectoriented) at Clinical Genomics Stockholm.\n\nAdditional contributors were [Sima Rahimi](https://github.com/sima-r), [Gwenna Breton](https://github.com/Gwennid) and [Emma Västerviga](https://github.com/EmmaCAndersson) (Clinical Genomics Gothenburg); [Halfdan Rydbeck](https://github.com/hrydbeck) and [Lauri Mesilaakso](https://github.com/ljmesi) (Clinical Genomics Linköping); [Subazini Thankaswamy Kosalai](https://github.com/sysbiocoder) (Clinical Genomics Örebro); [Annick Renevey](https://github.com/rannick), [Peter Pruisscher](https://github.com/peterpru) and [Eva Caceres](https://github.com/fevac) (Clinical Genomics Stockholm); [Ryan Kennedy](https://github.com/ryanjameskennedy) (Clinical Genomics Lund); [Anders Sune Pedersen](https://github.com/asp8200) (Danish National Genome Center) and [Lucas Taniguti](https://github.com/lmtani).\n\nWe thank the nf-core community for their extensive assistance in the development of this pipeline.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#raredisease` channel](https://nfcore.slack.com/channels/raredisease) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/raredisease for your analysis, please cite it using the following doi: [10.5281/zenodo.7995798](https://doi.org/10.5281/zenodo.7995798)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\nYou can read more about MIP's use in healthcare in,\n\n\u003e Stranneheim H, Lagerstedt-Robinson K, Magnusson M, et al. Integration of whole genome sequencing into a healthcare setting: high diagnostic rates across multiple clinical entities in 3219 rare disease patients. Genome Med. 2021;13(1):40. doi:10.1186/s13073-021-00855-5\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1014?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1014?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1014?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1014?version=4","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1014?version=5","name":"2.0.1","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1014?version=6","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1014?version=7","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1014?version=8","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1014?version=9","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1014?version=10","name":"2.5.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1014?version=11","name":"2.6.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1015","url":"https://workflowhub.eu/workflows/1015","name":"nf-core/readsimulator","description":"A workflow to simulate reads","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1015?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1015?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1016","url":"https://workflowhub.eu/workflows/1016","name":"nf-core/riboseq","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-riboseq_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/riboseq\" src=\"docs/images/nf-core-riboseq_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/riboseq/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/riboseq/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/riboseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/riboseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/riboseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/riboseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23riboseq-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/riboseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/riboseq** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/riboseq \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/riboseq/usage) and the [parameter documentation](https://nf-co.re/riboseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/riboseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/riboseq/output).\n\n## Credits\n\nnf-core/riboseq was originally written by Jonathan Manning.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#riboseq` channel](https://nfcore.slack.com/channels/riboseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/riboseq for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1016?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1016?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1016?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1017","url":"https://workflowhub.eu/workflows/1017","name":"nf-core/rnafusion","description":"Nextflow rnafusion analysis pipeline, part of the nf-core community.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1017?version=1","name":"1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1017?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1017?version=3","name":"1.0.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1017?version=4","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1017?version=5","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1017?version=6","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1017?version=7","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1017?version=8","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1017?version=9","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1017?version=10","name":"2.3.4","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1017?version=11","name":"2.3.3","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/1017?version=12","name":"2.3.2","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/1017?version=13","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/1017?version=14","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/1017?version=15","name":"3.0.0","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/1017?version=16","name":"3.0.1","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/1017?version=17","name":"3.0.2","author":[],"descriptor_type":["NFL"]}]},{"id":"1018","url":"https://workflowhub.eu/workflows/1018","name":"nf-core/rnasplice","description":"Alternative splicing analysis using RNA-seq.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1018?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1018?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1018?version=3","name":"1.0.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1018?version=4","name":"1.0.3","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1018?version=5","name":"1.0.4","author":[],"descriptor_type":["NFL"]}]},{"id":"1019","url":"https://workflowhub.eu/workflows/1019","name":"nf-core/rnavar","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-rnavar_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/rnavar\" src=\"docs/images/nf-core-rnavar_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/rnavar/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/rnavar/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/rnavar/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnavar/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/rnavar/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6669636-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.6669636)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rnavar)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnavar-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/rnavar)\n[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/rnavar** is a bioinformatics pipeline for RNA variant calling analysis following GATK4 best practices.\n\n## Pipeline summary\n\n1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html))\n2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n3. (Optionally) Extract UMIs from FASTQ reads ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n4. (Optionally) HLATyping from FASTQ reads ([`Seq2HLA`](https://github.com/TRON-Bioinformatics/seq2HLA))\n5. Align reads to reference genome ([`STAR`](https://github.com/alexdobin/STAR))\n6. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n7. Duplicate read marking ([`Picard MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard))\n8. Scatter one interval-list into many interval-files ([`GATK4 IntervalListTools`](https://gatk.broadinstitute.org/hc/en-us/articles/4409917392155-IntervalListTools-Picard-))\n9. Splits reads that contain Ns in their cigar string ([`GATK4 SplitNCigarReads`](https://gatk.broadinstitute.org/hc/en-us/articles/4409917482651-SplitNCigarReads))\n10. Estimate and correct systematic bias using base quality score recalibration ([`GATK4 BaseRecalibrator`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897206043-BaseRecalibrator), [`GATK4 ApplyBQSR`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897168667-ApplyBQSR))\n11. Convert a BED file to a Picard Interval List ([`GATK4 BedToIntervalList`](https://gatk.broadinstitute.org/hc/en-us/articles/4409924780827-BedToIntervalList-Picard-))\n12. Call SNPs and indels ([`GATK4 HaplotypeCaller`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897180827-HaplotypeCaller))\n13. Merge multiple VCF files into one VCF ([`GATK4 MergeVCFs`](https://gatk.broadinstitute.org/hc/en-us/articles/4409924817691-MergeVcfs-Picard-))\n14. Index the VCF ([`Tabix`](http://www.htslib.org/doc/tabix.html))\n15. Filter variant calls based on certain criteria ([`GATK4 VariantFiltration`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897204763-VariantFiltration))\n16. Annotate variants ([`BCFtools Annotate`](https://samtools.github.io/bcftools/bcftools.html), [`snpEff`](https://pcingola.github.io/SnpEff/se_introduction/), [Ensembl VEP](https://www.ensembl.org/info/docs/tools/vep/index.html))\n17. Present QC for raw read, alignment, gene biotype, sample similarity, and strand-specificity checks ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/))\n\n### Summary of tools and version used in the pipeline\n\n| Tool        | Version |\n| ----------- | ------- |\n| BCFtools    | 1.21    |\n| BEDtools    | 2.31.1  |\n| Ensembl VEP | 114.2   |\n| FastQC      | 0.12.1  |\n| GATK        | 4.6.1.0 |\n| mosdepth    | 0.3.10  |\n| MultiQC     | 1.29    |\n| Picard      | 3.3.0   |\n| Samtools    | 1.21    |\n| Seq2HLA     | 2.3     |\n| SnpEff      | 5.1     |\n| STAR        | 2.7.11b |\n| Tabix       | 1.20    |\n| UMI-tools   | 1.1.5   |\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow.Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\nNow, you can run the pipeline using:\n\n```console\nnextflow run nf-core/rnavar -profile \u003cdocker/singularity/podman/shifter/charliecloud/conda/institute\u003e --input samplesheet.csv  --outdir \u003cOUTDIR\u003e --genome GRCh38\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnavar/usage) and the [parameter documentation](https://nf-co.re/rnavar/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnavar/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/rnavar/output).\n\n## Credits\n\nrnavar was originally written by Praveen Raj and Maxime U Garcia at [The Swedish Childhood Tumor Biobank (Barntumörbanken), Karolinska Institutet](https://ki.se/forskning/barntumorbanken).\nNicolas Vannieuwkerke at [CMGG](https://www.cmgg.be/en/) later joined and helped with further development (v 1.1.0 and forward).\n\nMaintenance is now lead by Maxime U Garcia (now at [Seqera](https://seqera.io))\n\nMain developers:\n\n- [Maxime U Garcia](https://github.com/maxulysse)\n- [Nicolas Vannieuwkerke](https://github.com/nvnieuwk)\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Harshil Patel](https://github.com/drpatelh)\n- [Nicolás Schcolnicov](https://github.com/nschcolnicov)\n- [Ömer An](https://github.com/bounlu)\n- [Phil Ewels](https://github.com/ewels)\n- [Praveen Raj](https://github.com/praveenraj2018)\n- [Sarah Maman](https://github.com/SarahMaman)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#rnavar` channel](https://nfcore.slack.com/channels/rnavar) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/rnavar for your analysis, please cite it using the following doi: [10.5281/zenodo.6669636](https://doi.org/10.5281/zenodo.6669636)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1019?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1019?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1019?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1019?version=4","name":"1.2.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1020","url":"https://workflowhub.eu/workflows/1020","name":"nf-core/sarek","description":"An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1020?version=1","name":"2.5","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1020?version=2","name":"2.5.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1020?version=3","name":"2.5.2","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1020?version=4","name":"2.6","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1020?version=5","name":"2.6.1","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1020?version=6","name":"2.7","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1020?version=7","name":"2.7.1","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1020?version=8","name":"2.7.2","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1020?version=9","name":"3.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1020?version=10","name":"3.0.1","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1020?version=11","name":"3.0.2","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/1020?version=12","name":"3.1","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/1020?version=13","name":"3.1.1","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/1020?version=14","name":"3.1.2","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/1020?version=15","name":"3.2.0","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/1020?version=16","name":"3.2.1","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/1020?version=17","name":"3.2.2","author":[],"descriptor_type":["NFL"]},{"id":"18","url":"https://workflowhub.eu/workflows/1020?version=18","name":"3.2.3","author":[],"descriptor_type":["NFL"]},{"id":"19","url":"https://workflowhub.eu/workflows/1020?version=19","name":"3.3.0","author":[],"descriptor_type":["NFL"]},{"id":"20","url":"https://workflowhub.eu/workflows/1020?version=20","name":"3.3.1","author":[],"descriptor_type":["NFL"]},{"id":"21","url":"https://workflowhub.eu/workflows/1020?version=21","name":"3.3.2","author":[],"descriptor_type":["NFL"]},{"id":"22","url":"https://workflowhub.eu/workflows/1020?version=22","name":"3.4.0","author":[],"descriptor_type":["NFL"]},{"id":"23","url":"https://workflowhub.eu/workflows/1020?version=23","name":"3.4.1","author":[],"descriptor_type":["NFL"]},{"id":"24","url":"https://workflowhub.eu/workflows/1020?version=24","name":"3.4.2","author":[],"descriptor_type":["NFL"]},{"id":"25","url":"https://workflowhub.eu/workflows/1020?version=25","name":"3.4.3","author":[],"descriptor_type":["NFL"]},{"id":"26","url":"https://workflowhub.eu/workflows/1020?version=26","name":"3.4.4","author":[],"descriptor_type":["NFL"]},{"id":"27","url":"https://workflowhub.eu/workflows/1020?version=27","name":"3.5.0","author":[],"descriptor_type":["NFL"]},{"id":"28","url":"https://workflowhub.eu/workflows/1020?version=28","name":"3.5.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1021","url":"https://workflowhub.eu/workflows/1021","name":"nf-core/scrnaseq","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-scrnaseq_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/scrnaseq\" src=\"docs/images/nf-core-scrnaseq_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/scrnaseq/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scrnaseq/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3568187-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3568187)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scrnaseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scrnaseq-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/scrnaseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scrnaseq** is a bioinformatics best-practice analysis pipeline for processing 10x Genomics single-cell RNA-seq data.\n\nThis is a community effort in building a pipeline capable to support:\n\n- SimpleAF(Alevin-Fry) + AlevinQC\n- STARSolo\n- Kallisto + BUStools\n- Cellranger\n\n\u003e [!IMPORTANT]\n\u003e Cellranger is a commercial tool from 10X Genomics Inc. and falls under the EULA from 10X Genomics Inc. The container provided for the CellRanger functionality in this pipeline has been built by the nf-core community and is therefore _not supported by 10X genomics_ directly. We are in discussions with 10X on how to improve the user experience and licence situation for both us as a community as well as 10X and end users and will update this statement here accordingly.\n\n## Documentation\n\nThe nf-core/scrnaseq pipeline comes with documentation about the pipeline [usage](https://nf-co.re/scrnaseq/usage), [parameters](https://nf-co.re/scrnaseq/parameters) and [output](https://nf-co.re/scrnaseq/output).\n\n![scrnaseq workflow](docs/images/scrnaseq_pipeline_V3.0-metro_clean.png)\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,expected_cells\npbmc8k,pbmc8k_S1_L007_R1_001.fastq.gz,pbmc8k_S1_L007_R2_001.fastq.gz,10000\npbmc8k,pbmc8k_S1_L008_R1_001.fastq.gz,pbmc8k_S1_L008_R2_001.fastq.gz,10000\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/scrnaseq \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --fasta GRCm38.p6.genome.chr19.fa \\\n   --gtf gencode.vM19.annotation.chr19.gtf \\\n   --protocol 10XV2 \\\n   --aligner \u003csimpleaf/kallisto/star/cellranger\u003e \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scrnaseq/usage) and the [parameter documentation](https://nf-co.re/scrnaseq/parameters).\n\n## Decision Tree for users\n\nThe nf-core/scrnaseq pipeline features several paths to analyze your single cell data. Future additions will also be done soon, e.g. the addition of multi-ome analysis types. To aid users in analyzing their data, we have added a decision tree to help people decide on what type of analysis they want to run and how to choose appropriate parameters for that.\n\n```mermaid\ngraph TD\n    A[sc RNA] --\u003e|alevin-fry| B(h5ad/seurat/mtx matrices)\n    A[sc RNA] --\u003e|CellRanger| B(h5ad/seurat/mtx matrices)\n    A[sc RNA] --\u003e|kbpython| B(h5ad/seurat/mtx matrices)\n    A[sc RNA] --\u003e|STARsolo| B(h5ad/seurat/mtx matrices)\n```\n\nOptions for the respective alignment method can be found [here](https://github.com/nf-core/scrnaseq/blob/dev/docs/usage.md#aligning-options) to choose between methods.\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scrnaseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/scrnaseq/output).\n\n## Credits\n\nnf-core/scrnaseq was originally written by Bailey PJ, Botvinnik O, Marques de Almeida F, Gabernet G, Peltzer A, Sturm G.\n\nWe thank the following people and teams for their extensive assistance in the development of this pipeline:\n\n- @heylf\n- @KevinMenden\n- @FloWuenne\n- @rob-p\n- [GHGA](https://www.ghga.de/)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scrnaseq` channel](https://nfcore.slack.com/channels/scrnaseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/scrnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3568187](https://doi.org/10.5281/zenodo.3568187)\n\nThe basic benchmarks that were used as motivation for incorporating the available modular workflows can be found in [this publication](https://www.biorxiv.org/content/10.1101/673285v2).\n\nWe offer all three paths for the processing of scRNAseq data so it remains up to the user to decide which pipeline workflow is chosen for a particular analysis question.\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1021?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1021?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1021?version=3","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1021?version=4","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1021?version=5","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1021?version=6","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1021?version=7","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1021?version=8","name":"2.3.2","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1021?version=9","name":"2.4.0","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1021?version=10","name":"2.4.1","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1021?version=11","name":"2.5.0","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/1021?version=12","name":"2.5.1","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/1021?version=13","name":"2.6.0","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/1021?version=14","name":"2.7.0","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/1021?version=15","name":"2.7.1","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/1021?version=16","name":"3.0.0","author":[],"descriptor_type":["NFL"]},{"id":"17","url":"https://workflowhub.eu/workflows/1021?version=17","name":"4.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1022","url":"https://workflowhub.eu/workflows/1022","name":"nf-core/slamseq","description":"SLAMseq analysis using Slamdunk with various T\u003eC conversion quantifications and QC","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1022?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1023","url":"https://workflowhub.eu/workflows/1023","name":"nf-core/smrnaseq","description":"Small RNA-Seq Best Practice Analysis Pipeline.","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1023?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1023?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1023?version=3","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1023?version=4","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1023?version=5","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1023?version=6","name":"2.2.1","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1023?version=7","name":"2.2.2","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1023?version=8","name":"2.2.3","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1023?version=9","name":"2.2.4","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1023?version=10","name":"2.3.0","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1023?version=11","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/1023?version=12","name":"2.4.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1025","url":"https://workflowhub.eu/workflows/1025","name":"nf-core/taxprofiler","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-taxprofiler_logo_custom_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/taxprofiler\" src=\"docs/images/nf-core-taxprofiler_logo_custom_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/taxprofiler/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/taxprofiler/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/taxprofiler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/taxprofiler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7728364-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7728364)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.2-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/taxprofiler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/taxprofiler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n[![Cite Preprint](https://img.shields.io/badge/Cite%20Us!-Cite%20Preprint-orange)](https://doi.org/10.1101/2023.10.20.563221)\n\n## Introduction\n\n**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic classification and profiling of shotgun short- and long-read metagenomic data. It allows for in-parallel taxonomic identification of reads or taxonomic abundance estimation with multiple classification and profiling tools against multiple databases, and produces standardised output tables for facilitating results comparison between different tools and databases.\n\n## Pipeline summary\n\n![](docs/images/taxprofiler_tube.png)\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)\n2. Performs optional read pre-processing\n   - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop), [Porechop_ABI](https://github.com/bonsai-team/Porechop_ABI))\n   - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong)), [Nanoq](https://github.com/esteinig/nanoq)\n   - Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))\n   - Run merging\n3. Supports statistics metagenome coverage estimation ([Nonpareil](https://nonpareil.readthedocs.io/en/latest/)) and for host-read removal ([Samtools](http://www.htslib.org/))\n4. Performs taxonomic classification and/or profiling using one or more of:\n   - [Kraken2](https://ccb.jhu.edu/software/kraken2/)\n   - [MetaPhlAn](https://huttenhower.sph.harvard.edu/metaphlan/)\n   - [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)\n   - [DIAMOND](https://github.com/bbuchfink/diamond)\n   - [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)\n   - [Kaiju](https://kaiju.binf.ku.dk/)\n   - [mOTUs](https://motu-tool.org/)\n   - [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)\n   - [KMCP](https://github.com/shenwei356/kmcp)\n   - [ganon](https://pirovc.github.io/ganon/)\n5. Perform optional post-processing with:\n   - [bracken](https://ccb.jhu.edu/software/bracken/)\n6. Standardises output tables ([`Taxpasta`](https://taxpasta.readthedocs.io))\n7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,run_accession,instrument_platform,fastq_1,fastq_2,fasta\n2612,run1,ILLUMINA,2612_run1_R1.fq.gz,,\n2612,run2,ILLUMINA,2612_run2_R1.fq.gz,,\n2612,run3,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz,\n```\n\nEach row represents a fastq file (single-end), a pair of fastq files (paired end), or a fasta (with long reads).\n\nAdditionally, you will need a database sheet that looks as follows:\n\n```csv title=\"databases.csv\"\ntool,db_name,db_params,db_path\nkraken2,db2,--quick,/\u003cpath\u003e/\u003cto\u003e/kraken2/testdb-kraken2.tar.gz\nmetaphlan,db1,,/\u003cpath\u003e/\u003cto\u003e/metaphlan/metaphlan_database/\n```\n\nThat includes directories or `.tar.gz` archives containing databases for the tools you wish to run the pipeline against.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/taxprofiler \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --databases databases.csv \\\n   --outdir \u003cOUTDIR\u003e  \\\n   --run_kraken2 --run_metaphlan\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/taxprofiler/usage) and the [parameter documentation](https://nf-co.re/taxprofiler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/taxprofiler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/taxprofiler/output).\n\n## Credits\n\nnf-core/taxprofiler was originally written by James A. Fellows Yates, Sofia Stamouli, Moritz E. Beber, Lili Andersson-Li, and the nf-core/taxprofiler team.\n\n### Team\n\n- [James A. Fellows Yates](https://github.com/jfy133)\n- [Sofia Stamouli](https://github.com/sofstam)\n- [Moritz E. Beber](https://github.com/Midnighter)\n- [Lili Andersson-Li](https://github.com/LilyAnderssonLee)\n\nWe thank the following people for their contributions to the development of this pipeline:\n\n- [Lauri Mesilaakso](https://github.com/ljmesi)\n- [Tanja Normark](https://github.com/talnor)\n- [Maxime Borry](https://github.com/maxibor)\n- [Thomas A. Christensen II](https://github.com/MillironX)\n- [Jianhong Ou](https://github.com/jianhong)\n- [Rafal Stepien](https://github.com/rafalstepien)\n- [Mahwash Jamy](https://github.com/mjamy)\n- [Alex Caswell](https://github.com/AlexHoratio)\n- [Aidan Epstein](https://github.com/epstein6)\n\n### Acknowledgments\n\nWe also are grateful for the feedback and comments from:\n\n- The general [nf-core/community](https://nf-co.re/community)\n\nAnd specifically to\n\n- [Alex Hübner](https://github.com/alexhbnr)\n\n❤️ also goes to [Zandra Fagernäs](https://github.com/ZandraFagernas) for the logo.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#taxprofiler` channel](https://nfcore.slack.com/channels/taxprofiler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/taxprofiler for your analysis, please cite it using the following doi: [10.1101/2023.10.20.563221](https://doi.org/10.1101/2023.10.20.563221).\n\n\u003e Stamouli, S., Beber, M. E., Normark, T., Christensen II, T. A., Andersson-Li, L., Borry, M., Jamy, M., nf-core community, \u0026 Fellows Yates, J. A. (2023). nf-core/taxprofiler: Highly parallelised and flexible pipeline for metagenomic taxonomic classification and profiling. In bioRxiv (p. 2023.10.20.563221). https://doi.org/10.1101/2023.10.20.563221\n\nFor the latest version of the code, cite the Zenodo doi: [10.5281/zenodo.7728364](https://doi.org/10.5281/zenodo.7728364)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1025?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1025?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1025?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1025?version=4","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1025?version=5","name":"1.1.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1025?version=6","name":"1.1.3","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1025?version=7","name":"1.1.4","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1025?version=8","name":"1.1.5","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1025?version=9","name":"1.1.6","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1025?version=10","name":"1.1.7","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1025?version=11","name":"1.1.8","author":[],"descriptor_type":["NFL"]},{"id":"12","url":"https://workflowhub.eu/workflows/1025?version=12","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"13","url":"https://workflowhub.eu/workflows/1025?version=13","name":"1.2.1","author":[],"descriptor_type":["NFL"]},{"id":"14","url":"https://workflowhub.eu/workflows/1025?version=14","name":"1.2.2","author":[],"descriptor_type":["NFL"]},{"id":"15","url":"https://workflowhub.eu/workflows/1025?version=15","name":"1.2.3","author":[],"descriptor_type":["NFL"]},{"id":"16","url":"https://workflowhub.eu/workflows/1025?version=16","name":"1.2.4","author":[],"descriptor_type":["NFL"]}]},{"id":"1026","url":"https://workflowhub.eu/workflows/1026","name":"nf-core/viralintegration","description":"Integration of viral sequences in genomic data","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1026?version=1","name":"0.1.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1026?version=2","name":"0.1.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1027","url":"https://workflowhub.eu/workflows/1027","name":"nf-core/viralrecon","description":"Assembly and intrahost/low-frequency variant calling for viral samples","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1027?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1027?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1027?version=3","name":"2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1027?version=4","name":"2.1","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1027?version=5","name":"2.2","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1027?version=6","name":"2.3","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1027?version=7","name":"2.3.1","author":[],"descriptor_type":["NFL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1027?version=8","name":"2.4","author":[],"descriptor_type":["NFL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1027?version=9","name":"2.4.1","author":[],"descriptor_type":["NFL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1027?version=10","name":"2.5","author":[],"descriptor_type":["NFL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1027?version=11","name":"2.6.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1030","url":"https://workflowhub.eu/workflows/1030","name":"IMPaCT-Data quality control workflow implementation in nf-core/Sarek","description":"# ![IMPaCT program](https://github.com/EGA-archive/sarek-IMPaCT-data-QC/blob/master/impact_qc/docs/png/impact_data_logo_pink_horitzontal.png)\r\n\r\n[![IMPaCT](https://img.shields.io/badge/Web%20-IMPaCT-blue)](https://impact.isciii.es/)\r\n[![IMPaCT-isciii](https://img.shields.io/badge/Web%20-IMPaCT--isciii-red)](https://www.isciii.es/QueHacemos/Financiacion/IMPaCT/Paginas/default.aspx)\r\n[![IMPaCT-Data](https://img.shields.io/badge/Web%20-IMPaCT--Data-1d355c.svg?labelColor=000000)](https://impact-data.bsc.es/)\r\n\r\n## Introduction of the project\r\n\r\nIMPaCT-Data is the IMPaCT program that aims to support the development of a common, interoperable and integrated system for the collection and analysis of clinical and molecular data by providing the knowledge and resources available in the Spanish Science and Technology System. This development will make it possible to answer research questions based on the different clinical and molecular information systems available. Fundamentally, it aims to provide researchers with a population perspective based on individual data.\r\n\r\nThe IMPaCT-Data project is divided into different work packages (WP). In the context of IMPaCT-Data WP3 (Genomics), a working group of experts worked on the generation of a specific quality control (QC) workflow for germline exome samples.\r\n\r\nTo achieve this, a set of metrics related to human genomic data was decided upon, and the toolset or software to extract these metrics was implemented in an existing variant calling workflow called Sarek, part of the nf-core community. The final outcome is a Nextflow subworkflow, called IMPaCT-QC implemented in the Sarek pipeline.\r\n\r\nBelow you can find the explanation of this workflow (raw pipeline), the link to the documentation of the IMPaCT QC subworkflow and a linked documentation associated to the QC metrics added in the mentioned workflow.\r\n\r\n- [IMPaCT-data subworkflow documentation](https://github.com/EGA-archive/sarek-IMPaCT-data-QC/tree/master/impact_qc)\r\n\r\n- [Metrics documentation](https://github.com/EGA-archive/sarek-IMPaCT-data-QC/blob/master/impact_qc/docs/QC_Sarek_supporing_documentation.pdf)\r\n\r\n\u003ch1\u003e\r\n  \u003cpicture\u003e\r\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-sarek_logo_dark.png\"\u003e\r\n    \u003cimg alt=\"nf-core/sarek\" src=\"docs/images/nf-core-sarek_logo_light.png\"\u003e\r\n  \u003c/picture\u003e\r\n\u003c/h1\u003e\r\n\r\n[![GitHub Actions CI Status](https://github.com/nf-core/sarek/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/ci.yml)\r\n[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/linting.yml)\r\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/sarek/results)\r\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\r\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3476425-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3476425)\r\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/sarek)\r\n\r\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23sarek-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/sarek)\r\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)\r\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)\r\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\r\n\r\n## Introduction\r\n\r\n**nf-core/sarek** is a workflow designed to detect variants on whole genome or targeted sequencing data. Initially designed for Human, and Mouse, it can work on any species with a reference genome. Sarek can also handle tumour / normal pairs and could include additional relapses.\r\n\r\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\r\n\r\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/sarek/results).\r\n\r\nIt's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek).\r\n\r\n\u003cp align=\"center\"\u003e\r\n    \u003cimg title=\"Sarek Workflow\" src=\"docs/images/sarek_workflow.png\" width=30%\u003e\r\n\u003c/p\u003e\r\n\r\n## Pipeline summary\r\n\r\nDepending on the options and samples provided, the pipeline can currently perform the following:\r\n\r\n- Form consensus reads from UMI sequences (`fgbio`)\r\n- Sequencing quality control and trimming (enabled by `--trim_fastq`) (`FastQC`, `fastp`)\r\n- Map Reads to Reference (`BWA-mem`, `BWA-mem2`, `dragmap` or `Sentieon BWA-mem`)\r\n- Process BAM file (`GATK MarkDuplicates`, `GATK BaseRecalibrator` and `GATK ApplyBQSR` or `Sentieon LocusCollector` and `Sentieon Dedup`)\r\n- Summarise alignment statistics (`samtools stats`, `mosdepth`)\r\n- Variant calling (enabled by `--tools`, see [compatibility](https://nf-co.re/sarek/latest/docs/usage#which-variant-calling-tool-is-implemented-for-which-data-type)):\r\n  - `ASCAT`\r\n  - `CNVkit`\r\n  - `Control-FREEC`\r\n  - `DeepVariant`\r\n  - `freebayes`\r\n  - `GATK HaplotypeCaller`\r\n  - `Manta`\r\n  - `mpileup`\r\n  - `MSIsensor-pro`\r\n  - `Mutect2`\r\n  - `Sentieon Haplotyper`\r\n  - `Strelka2`\r\n  - `TIDDIT`\r\n- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`)\r\n- Summarise and represent QC (`MultiQC`)\r\n\r\n\u003cp align=\"center\"\u003e\r\n    \u003cimg title=\"Sarek Workflow\" src=\"docs/images/sarek_subway.png\" width=60%\u003e\r\n\u003c/p\u003e\r\n\r\n## Usage\r\n\r\n\u003e [!NOTE]\r\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\r\n\r\nFirst, prepare a samplesheet with your input data that looks as follows:\r\n\r\n`samplesheet.csv`:\r\n\r\n```csv\r\npatient,sample,lane,fastq_1,fastq_2\r\nID1,S1,L002,ID1_S1_L002_R1_001.fastq.gz,ID1_S1_L002_R2_001.fastq.gz\r\n```\r\n\r\nEach row represents a pair of fastq files (paired end).\r\n\r\nNow, you can run the pipeline using:\r\n\r\n```bash\r\nnextflow run nf-core/sarek \\\r\n   -profile \u003cdocker/singularity/.../institute\u003e \\\r\n   --input samplesheet.csv \\\r\n   --outdir \u003cOUTDIR\u003e\r\n```\r\n\r\n\u003e [!WARNING]\r\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\r\n\u003e see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).\r\n\r\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/sarek/usage) and the [parameter documentation](https://nf-co.re/sarek/parameters).\r\n\r\n## Pipeline output\r\n\r\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/sarek/results) tab on the nf-core website pipeline page.\r\nFor more details about the output files and reports, please refer to the\r\n[output documentation](https://nf-co.re/sarek/output).\r\n\r\n## Benchmarking\r\n\r\nOn each release, the pipeline is run on 3 full size tests:\r\n\r\n- `test_full` runs tumor-normal data for one patient from the SEQ2C consortium\r\n- `test_full_germline` runs a WGS 30X Genome-in-a-Bottle(NA12878) dataset\r\n- `test_full_germline_ncbench_agilent` runs two WES samples with 75M and 200M reads (data available [here](https://github.com/ncbench/ncbench-workflow#contributing-callsets)). The results are uploaded to Zenodo, evaluated against a truth dataset, and results are made available via the [NCBench dashboard](https://ncbench.github.io/report/report.html#).\r\n\r\n## Credits\r\n\r\nSarek was originally written by Maxime U Garcia and Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken).\r\nFriederike Hanssen and Gisela Gabernet at [QBiC](https://www.qbic.uni-tuebingen.de/) later joined and helped with further development.\r\n\r\nThe Nextflow DSL2 conversion of the pipeline was lead by Friederike Hanssen and Maxime U Garcia.\r\n\r\nMaintenance is now lead by Friederike Hanssen and Maxime U Garcia (now at [Seqera Labs](https://seqera/io))\r\n\r\nMain developers:\r\n\r\n- [Maxime U Garcia](https://github.com/maxulysse)\r\n- [Friederike Hanssen](https://github.com/FriederikeHanssen)\r\n\r\nWe thank the following people for their extensive assistance in the development of this pipeline:\r\n\r\n- [Abhinav Sharma](https://github.com/abhi18av)\r\n- [Adam Talbot](https://github.com/adamrtalbot)\r\n- [Adrian Lärkeryd](https://github.com/adrlar)\r\n- [Alexander Peltzer](https://github.com/apeltzer)\r\n- [Alison Meynert](https://github.com/ameynert)\r\n- [Anders Sune Pedersen](https://github.com/asp8200)\r\n- [arontommi](https://github.com/arontommi)\r\n- [BarryDigby](https://github.com/BarryDigby)\r\n- [Bekir Ergüner](https://github.com/berguner)\r\n- [bjornnystedt](https://github.com/bjornnystedt)\r\n- [cgpu](https://github.com/cgpu)\r\n- [Chela James](https://github.com/chelauk)\r\n- [David Mas-Ponte](https://github.com/davidmasp)\r\n- [Edmund Miller](https://github.com/edmundmiller)\r\n- [Francesco Lescai](https://github.com/lescai)\r\n- [Gavin Mackenzie](https://github.com/GCJMackenzie)\r\n- [Gisela Gabernet](https://github.com/ggabernet)\r\n- [Grant Neilson](https://github.com/grantn5)\r\n- [gulfshores](https://github.com/gulfshores)\r\n- [Harshil Patel](https://github.com/drpatelh)\r\n- [James A. Fellows Yates](https://github.com/jfy133)\r\n- [Jesper Eisfeldt](https://github.com/J35P312)\r\n- [Johannes Alneberg](https://github.com/alneberg)\r\n- [José Fernández Navarro](https://github.com/jfnavarro)\r\n- [Júlia Mir Pedrol](https://github.com/mirpedrol)\r\n- [Ken Brewer](https://github.com/kenibrewer)\r\n- [Lasse Westergaard Folkersen](https://github.com/lassefolkersen)\r\n- [Lucia Conde](https://github.com/lconde-ucl)\r\n- [Malin Larsson](https://github.com/malinlarsson)\r\n- [Marcel Martin](https://github.com/marcelm)\r\n- [Nick Smith](https://github.com/nickhsmith)\r\n- [Nicolas Schcolnicov](https://github.com/nschcolnicov)\r\n- [Nilesh Tawari](https://github.com/nilesh-tawari)\r\n- [Nils Homer](https://github.com/nh13)\r\n- [Olga Botvinnik](https://github.com/olgabot)\r\n- [Oskar Wacker](https://github.com/WackerO)\r\n- [pallolason](https://github.com/pallolason)\r\n- [Paul Cantalupo](https://github.com/pcantalupo)\r\n- [Phil Ewels](https://github.com/ewels)\r\n- [Sabrina Krakau](https://github.com/skrakau)\r\n- [Sam Minot](https://github.com/sminot)\r\n- [Sebastian-D](https://github.com/Sebastian-D)\r\n- [Silvia Morini](https://github.com/silviamorins)\r\n- [Simon Pearce](https://github.com/SPPearce)\r\n- [Solenne Correard](https://github.com/scorreard)\r\n- [Susanne Jodoin](https://github.com/SusiJo)\r\n- [Szilveszter Juhos](https://github.com/szilvajuhos)\r\n- [Tobias Koch](https://github.com/KochTobi)\r\n- [Winni Kretzschmar](https://github.com/winni2k)\r\n\r\n## Acknowledgements\r\n\r\n|      [![Barntumörbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken)      |            [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se)             |\r\n| :-----------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------: |\r\n| [![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) |\r\n|              [![QBiC](docs/images/QBiC_logo.png)](https://www.qbic.uni-tuebingen.de)              |                   [![GHGA](docs/images/GHGA_logo.png)](https://www.ghga.de/)                   |\r\n|                     [![DNGC](docs/images/DNGC_logo.png)](https://eng.ngc.dk/)                     |                                                                                                |\r\n\r\n## Contributions \u0026 Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\nFor further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime U Garcia](mailto:maxime.garcia@seqera.io?subject=[GitHub]%20nf-core/sarek), [Friederike Hanssen](mailto:friederike.hanssen@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/sarek)\r\n\r\n## Citations\r\n\r\nIf you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows:\r\n\r\n\u003e Friederike Hanssen, Maxime U Garcia, Lasse Folkersen, Anders Sune Pedersen, Francesco Lescai, Susanne Jodoin, Edmund Miller, Oskar Wacker, Nicholas Smith, nf-core community, Gisela Gabernet, Sven Nahnsen **Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery** _NAR Genomics and Bioinformatics_ Volume 6, Issue 2, June 2024, lqae031, [doi: 10.1093/nargab/lqae031](https://doi.org/10.1093/nargab/lqae031).\r\n\r\n\u003e Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** _F1000Research_ 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2).\r\n\r\nYou can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476425](https://doi.org/10.5281/zenodo.3476425)\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nYou can cite the `nf-core` publication as follows:\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n\r\n## CHANGELOG\r\n\r\n- [CHANGELOG](CHANGELOG.md)\r\n","organization":"EGA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1030?version=1","name":"IMPaCT-data QC + sarek (master @ fc7059f)","author":["Arnau Soler Costa","Amy Curwin","Jordi Rambla"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1030?version=2","name":"IMPaCT-Data QC + sarek (master @ fc7059f)","author":["Arnau Soler Costa","Amy Curwin","Jordi Rambla"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1030?version=3","name":"IMPaCT-Data QC + sarek (master @ 81c1fc9)","author":["Arnau Soler Costa","Amy Curwin","Jordi Rambla"],"descriptor_type":["NFL"]}]},{"id":"1034","url":"https://workflowhub.eu/workflows/1034","name":"Monte Carlo Simulation for Pi Estimation","description":"# Monte Carlo Pi Estimation Program Description\r\n\r\nThis program is a Monte Carlo simulation designed to estimate the value of Pi using PyCOMPSs.\r\n\r\n## Tasks in the Program\r\n\r\n1. **Count Points in Circle Task (`count_points_in_circle`)**:\r\n   - Generates random points within a square with side length 1.\r\n   - Counts points falling within the inscribed circle (x^2 + y^2 \u003c= 1).\r\n   - Input: Number of points to generate (num_points)\r\n   - Output: Tuple containing count of points within the circle and list of generated points\r\n\r\n2. **Write Points to File Task (`write_points_to_file`)**:\r\n   - Writes a list of points to a file named according to the task ID.\r\n   - Input: List of points and task ID\r\n   - Output: None\r\n\r\n## Main Function Operation\r\n\r\n- Takes num_points and num_tasks as input.\r\n- Divides points among tasks for parallel processing.\r\n- Launches count_points_in_circle tasks in parallel.\r\n- Launches write_points_to_file tasks after count tasks complete.\r\n- Calculates Pi estimate and writes to Result.txt.\r\n\r\n\r\n## Execution\r\n\r\nTo execute the script, use the following command-line format:\r\n\r\n```bash\r\npython script_name.py num_points num_tasks\r\n","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1034?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1035","url":"https://workflowhub.eu/workflows/1035","name":"TB Variant Analysis v1.0","description":"Predict variants and drug resistance from M. tuberculosis sequence samples (Illumina)","organization":"SANBI Pathogen Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1035?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1039","url":"https://workflowhub.eu/workflows/1039","name":"CNVand","description":"# CNVand\r\n[![Snakemake](https://img.shields.io/badge/snakemake-≥8.0.0-brightgreen.svg?style=flat-square)](https://snakemake.bitbucket.io)\r\n[![Conda](https://img.shields.io/badge/conda-≥23.11.0-brightgreen.svg?style=flat-square)](https://anaconda.org/conda-forge/mamba)\r\n![Docker](https://img.shields.io/badge/docker-≥26.1.4-brightgreen.svg?style=flat-square)\r\n![License](https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square)\r\n[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](code_of_conduct.md) \r\n\r\nCNVand is a snakemake workflow for CNV analysis, tailored for preparing data used by the [CNVizard](https://github.com/IHGGM-Aachen/CNVizard) CNV visualization tool. Given a set of BAM and VCF files, it utilizes the tools `CNVkit` and `AnnotSV` to analyze and annotate copy number variations.\r\n\r\n## General Settings and Samplesheet\r\nTo configure this pipeline, modify the config under `config/config.yaml` as needed. Detailed explanations for each setting are provided within the file.\r\n\r\nAdd samples to the pipeline by completing `config/samplesheet.tsv`. Each `sample` should be associated with a `path` to the corresponding BAM and VCF file.\r\n\r\nFor detailed instructions on how to configure CNVand see `config/README.md`.\r\n\r\n## Reference Files\r\nTo use CNVand some external reference files are needed alongside your sample data.\r\n\r\n### Genome\r\n\r\nFor `cnvkit_fix` to work, you need to specify a reference genome in the config file. Take care to use the same reference file for your entire workflow!\r\n\r\n### Annotations\r\n\r\nFor AnnotSV to work, the annotation files must be downloaded separately and be referenced in the config file under the respective key. For human annotations, this can be done [here](https://www.lbgi.fr/~geoffroy/Annotations/Annotations_Human_3.4.2.tar.gz). In case this link is not working, check the original [AnnotSV](https://github.com/lgmgeo/AnnotSV/tree/master) repository for updates on how to obtain the annotations.\r\n\r\n## Pipeline Setup\r\nCNVand can be executed using mamba environments or a pre-built docker container.\r\n\r\n### Mamba (Snakedeploy)\r\nFor a one-click installation, snakedeploy can be used. For further information, see the entry for CNVand in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?repo=IHGGM-Aachen/CNVand)\r\n\r\n### Mamba (Manual)\r\nThis workflow can easily setup manually with the given environment file. Install Snakemake and dependencies using the command:\r\n\r\n```bash\r\nmamba env create -f environment.yml\r\n```\r\n\r\nThen activate the newly created environment with: \r\n\r\n```bash\r\nmamba activate cnvand\r\n```\r\n\r\nNow configure the pipeline and download the needed annotation and refenrece files. When everything is set up, Execute the pipeline with:\r\n\r\n```bash\r\nsnakemake --cores all --use-conda\r\n```\r\n\r\nGenerate a comprehensive execution report by running:\r\n\r\n```bash\r\nsnakemake --report report.zip\r\n```\r\n\r\n\r\n### Docker\r\n\r\nCNVand can also be used inside a Docker container. To do so, first pull the Docker image with:\r\n\r\n```bash\r\ndocker pull ghcr.io/ihggm-aachen/cnvand:latest\r\n```\r\n\r\nThen run the container with the bind mounts needed in your setup:\r\n\r\n```bash\r\ndocker run -it -v /path/to/your/data:/data ghcr.io/ihggm-aachen/cnvand:latest /bin/bash\r\n```\r\n\r\nThis command opens an interactive shell inside the Docker container. Once inside the container, you are placed inside the `/cnvand` the directory. From there then run the pipeline once you set an appropriate configuration:\r\n\r\n```bash\r\nsnakemake --cores all --use-conda\r\n```\r\n\r\n## Contributing\r\n\r\nWe welcome contributions to improve CNVand. Please see our [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to get started.\r\n\r\n## Code of Conduct\r\n\r\nWe are committed to fostering an open and welcoming environment. Please see our [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) for our community guidelines.\r\n\r\n## Documentation\r\n\r\nDetailed documentation for the workflow can be found in `workflow/documentation.md`.\r\n\r\n## Testing\r\n\r\nTo ensure the pipeline runs correctly, we have set up both unit and integration tests. Unit tests are generated from successful workflow runs, and integration tests are configured to run the entire workflow with test data.\r\n\r\n### Integration Tests\r\n\r\nThe integration test can be run using the data and config provided. Remember to download the correct reference/annotations (GRCh38 in case of the bundled NIST data) by yourself and adjust your local paths as necessary!\r\n\r\n### Unit Tests\r\n\r\nRun the unit tests with:\r\n\r\n```bash\r\npytest -v .tests/unit\r\n```\r\n\r\nThis will check for the correct CNVand output per rule.\r\n\r\n## License\r\n\r\nThis project is licensed under the MIT License. See the [LICENSE](LICENSE.md) file for details.\r\n","organization":"Institute for Human Genetics and Genomic Medicine Aachen","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1039?version=1","name":"main @ ccafcfb","author":["Carlos Classen"],"descriptor_type":["SMK"]}]},{"id":"1040","url":"https://workflowhub.eu/workflows/1040","name":"Porto-Sinusoidal Vascular Disease transcriptomics analysis workflow","description":"Workflow for gene set enrichment analsysis (GSEA) and co-expression analysis (WGCNA) on transcriptomics data to analyze pathways affected in Porto-Sinusoidal Vascular Disease.","organization":"EJPRD WP13 case-studies workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1040?version=1","name":"Version 1","author":["Aishwarya Iyer","Friederike Ehrhart"],"descriptor_type":["CWL"]}]},{"id":"1041","url":"https://workflowhub.eu/workflows/1041","name":"nf-core/reportho","description":"A pipeline for ortholog fetching and analysis","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1041?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1041?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1043","url":"https://workflowhub.eu/workflows/1043","name":"bacterial-genome-assembly/main","description":"Assembly of bacterial paired-end short read data with generation of quality metrics and reports","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1043?version=1","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1043?version=2","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1043?version=3","name":"v1.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1043?version=4","name":"v1.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1043?version=5","name":"v1.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/1043?version=6","name":"v1.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/1043?version=7","name":"v1.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/1043?version=8","name":"v1.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/1043?version=9","name":"v1.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/1043?version=10","name":"v1.1.8","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/1043?version=11","name":"v2.0","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/1043?version=12","name":"v2.0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1046","url":"https://workflowhub.eu/workflows/1046","name":"PyCOMPSs Matrix Multiplication, out-of-core using files. Example using DIRECTORY parameters executed at laptop, data persistence True.","description":"COMPSs Matrix Multiplication, out-of-core using files. Hypermatrix size used 2x2 blocks (MSIZE=2), block size used 2x2 elements (BSIZE=2)","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1046?version=1","name":"COMPSs 3.3.1","author":["Raül Sirvent"],"descriptor_type":[]}]},{"id":"1047","url":"https://workflowhub.eu/workflows/1047","name":"Java COMPSs LU Factorization for Sparse Matrices, MareNostrum V, 3 nodes, no data persistence","description":"**Name:** SparseLU \r\n**Contact Person:** support-compss@bsc.es \r\n**Access Level:** public \r\n**License Agreement:** Apache2 \r\n**Platform:** COMPSs \r\n\r\n# Description\r\nThe Sparse LU application computes an LU matrix factorization on a sparse blocked matrix. The matrix size (number of blocks) and the block size are parameters of the application. \r\n\r\nAs the algorithm progresses, the area of the matrix that is accessed is smaller; concretely, at each iteration, the 0th row and column of the current matrix are discarded. On the other hand, due to the sparseness of the matrix, some of its blocks might not be allocated and, therefore, no work is generated for them.\r\n\r\nWhen executed with COMPSs, Sparse LU produces several types of task with different granularity and numerous dependencies between them.\r\n\r\n# Versions\r\nThere are three versions of Sparse LU, depending on the data types used to store the blocks.\r\n## Version 1\r\n''files'', where the matrix blocks are stored in files.\r\n## Version 2\r\n''objects'', where the matrix blocks are represented by objects.\r\n## Version 3\r\n''arrays'', where the matrix blocks are stored in arrays.\r\n\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss sparseLU.files.SparseLU numberOfBlocks blockSize\r\nruncompss sparseLU.objects.SparseLU numberOfBlocks blockSize\r\nruncompss sparseLU.arrays.SparseLU numberOfBlocks blockSize\r\n```\r\n\r\nwhere:\r\n  * numberOfBlocks: Number of blocks inside each matrix\r\n  * blockSize: Size of each block\r\n\r\n\r\n# Execution Example\r\n```\r\nruncompss sparseLU.objects.SparseLU 16 4 \r\nruncompss sparseLU.files.SparseLU 16 4\r\nruncompss sparseLU.arrays.SparseLU 16 4 \r\n```\r\n\r\n\r\n# Build\r\n## Option 1: Native java\r\n```\r\ncd application_sources/; javac src/main/java/sparseLU/*/*.java\r\ncd src/main/java/; jar cf sparseLU.jar sparseLU/\r\ncd ../../../; mv src/main/java/sparseLU.jar jar/\r\n```\r\n\r\n## Option 2: Maven\r\n```\r\ncd application_sources/\r\nmvn clean package\r\n```\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1047?version=1","name":"COMPSs 3.3.1","author":["Raül Sirvent"],"descriptor_type":[]}]},{"id":"1049","url":"https://workflowhub.eu/workflows/1049","name":"amr_gene_detection/main","description":"Antimicrobial resistance gene detection from assembled bacterial genomes","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1049?version=1","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1049?version=2","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1049?version=3","name":"v1.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1049?version=4","name":"v1.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1049?version=5","name":"v1.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/1049?version=6","name":"v1.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/1049?version=7","name":"v1.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/1049?version=8","name":"v1.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/1049?version=9","name":"v1.1.7","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1050","url":"https://workflowhub.eu/workflows/1050","name":"bacterial_genome_annotation/main","description":"Annotation of an assembled bacterial genomes to detect genes, potential plasmids, integrons and Insertion sequence (IS) elements.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1050?version=1","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1050?version=2","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1050?version=3","name":"v1.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1050?version=4","name":"v1.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1050?version=5","name":"v1.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/1050?version=6","name":"v1.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/1050?version=7","name":"v1.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/1050?version=8","name":"v1.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/1050?version=9","name":"v1.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/1050?version=10","name":"v1.1.9","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/1050?version=11","name":"v1.1.10","author":[],"descriptor_type":["GALAXY"]},{"id":"12","url":"https://workflowhub.eu/workflows/1050?version=12","name":"v1.1.11","author":[],"descriptor_type":["GALAXY"]},{"id":"13","url":"https://workflowhub.eu/workflows/1050?version=13","name":"v1.2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1051","url":"https://workflowhub.eu/workflows/1051","name":"brew3r/main","description":"This workflow takes a collection of BAM (output of STAR) and a gtf. It extends the input gtf using de novo annotation.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1051?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1051?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1052","url":"https://workflowhub.eu/workflows/1052","name":"quality-and-contamination-control/main","description":"Short paired-end read analysis to provide quality analysis, read cleaning and taxonomy assignation","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1052?version=1","name":"v1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1052?version=2","name":"v1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1052?version=3","name":"v1.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1052?version=4","name":"v1.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1052?version=5","name":"v1.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/1052?version=6","name":"v1.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/1052?version=7","name":"v1.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/1052?version=8","name":"v1.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/1052?version=9","name":"v1.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"10","url":"https://workflowhub.eu/workflows/1052?version=10","name":"v1.1.8","author":[],"descriptor_type":["GALAXY"]},{"id":"11","url":"https://workflowhub.eu/workflows/1052?version=11","name":"v1.1.9","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1054","url":"https://workflowhub.eu/workflows/1054","name":"TSI-Scaffolding-with-HiC (based on VGP-HiC-scaffolding)","description":"# Scaffolding using HiC data with YAHS\r\n\r\nThis workflow has been created from a Vertebrate Genomes Project (VGP) scaffolding workflow. \r\n\r\n* For more information about the VGP project see https://galaxyproject.org/projects/vgp/. \r\n* The scaffolding workflow is at https://dockstore.org/workflows/github.com/iwc-workflows/Scaffolding-HiC-VGP8/main:main?tab=info\r\n* Please see that link for the workflow diagram. \r\n\r\nSome minor changes have been made to better fit with TSI project data: \r\n\r\n* optional inputs of SAK info and sequence graph have been removed\r\n* the required input format for the genome is changed from gfa to fasta\r\n* the estimated genome size now requires user input rather than being extracted from output of a previous workflow.  \r\n\r\nInputs: \r\n\r\n* assembly.fasta  [note - scaffolding is done only one haplotype at a time. eg hap1 or primary]\r\n* Concatenated HiC forward reads in fastqsanger.gz\r\n* Concatenated HiC reverse reads in fastqsanger.gz\r\n* Restriction enzyme sequence\r\n* Estimated genome size (enter as integer)\r\n* Lineage for busco \r\n\r\nOutputs: the main outputs are: \r\n\r\n* scaffolded_assmbly.fasta\r\n* comparison of pre- post- scaffolding contact maps\r\n\r\n\r\n\r\n\r\n","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1054?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1055","url":"https://workflowhub.eu/workflows/1055","name":"nf-core/demo","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-demo_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/demo\" src=\"docs/images/nf-core-demo_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/demo/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/demo/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/demo/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/demo/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/demo/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12192442-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12192442)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/demo)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23demo-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/demo)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/demo** is a simple nf-core style bioinformatics pipeline for workshops and demonstrations. It was created using the nf-core template and is designed to run quickly using small test data files.\n\n![nf-core/demo metro map](docs/images/nf-core-demo-subway.png)\n\n1. Read QC ([`FASTQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Adapter and quality trimming ([`SEQTK_TRIM`](https://github.com/lh3/seqtk))\n3. Present QC for raw reads ([`MULTIQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nSAMPLE1_PE,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R2.fastq.gz\nSAMPLE2_PE,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample2_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample2_R2.fastq.gz\nSAMPLE3_SE,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R1.fastq.gz,\nSAMPLE3_SE,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample2_R1.fastq.gz,\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/demo \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/demo/usage) and the [parameter documentation](https://nf-co.re/demo/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/demo/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/demo/output).\n\n## Credits\n\nnf-core/demo was originally written by Chris Hakkaart ([@christopher-hakkaart](https://github.com/christopher-hakkaart)).\n\nThe pipeline is currently being maintained by the Nextflow community team as well as [Geraldine Van der Auwera](https://github.com/vdauwera) and [Florian Wuennemann](https://github.com/FloWuenne).\n\n\u003c!-- We thank the following people for their extensive assistance in the development of this pipeline: --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#demo` channel](https://nfcore.slack.com/channels/demo) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/demo for your analysis, please cite it using the following doi: [10.5281/zenodo.12192442](https://doi.org/10.5281/zenodo.12192442)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1055?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1055?version=2","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1055?version=3","name":"1.0.2","author":[],"descriptor_type":["NFL"]}]},{"id":"1057","url":"https://workflowhub.eu/workflows/1057","name":"Plot-Nx-Size/main","description":"## Generate Nx and Size plot for multiple assemblies\n\n\n### Inputs\n\nCollection of fasta files. The name of each item in the collection will be used as label for the Nx and Size plots.\n\n### Outputs\n\n\n1. Nx plot \n2. Size plot ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1057?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1057?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1057?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1057?version=4","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1057?version=5","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/1057?version=6","name":"v0.1.5","author":[],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/1057?version=7","name":"v0.1.6","author":[],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/1057?version=8","name":"v0.1.7","author":[],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/1057?version=9","name":"v0.1.8","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1058","url":"https://workflowhub.eu/workflows/1058","name":"AssemblyQC: A NextFlow pipeline for evaluating assembly quality","description":"[![GitHub Actions CI Status](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/ci.yml/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/ci.yml)\r\n[![GitHub Actions Linting Status](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/linting.yml/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10647870-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10647870)\r\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with conda ❌](http://img.shields.io/badge/run%20with-conda%20❌-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/plant-food-research-open/assemblyqc)\r\n\r\n## Introduction\r\n\r\n**plant-food-research-open/assemblyqc** is a [NextFlow](https://www.nextflow.io/docs/latest/index.html) pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report. The tools are shown in the [Pipeline Flowchart](#pipeline-flowchart) and their references are listed in [CITATIONS.md](./CITATIONS.md).\r\n\r\n## Pipeline Flowchart\r\n\r\n```mermaid\r\n%%{init: {\r\n    'theme': 'base',\r\n    'themeVariables': {\r\n    'fontSize': '52px\",\r\n    'primaryColor': '#9A6421',\r\n    'primaryTextColor': '#ffffff',\r\n    'primaryBorderColor': '#9A6421',\r\n    'lineColor': '#B180A8',\r\n    'secondaryColor': '#455C58',\r\n    'tertiaryColor': '#ffffff'\r\n  }\r\n}}%%\r\nflowchart LR\r\n  forEachTag(Assembly) ==\u003e VALIDATE_FORMAT[VALIDATE FORMAT]\r\n\r\n  VALIDATE_FORMAT ==\u003e ncbiFCS[NCBI FCS\\nADAPTOR]\r\n  ncbiFCS ==\u003e Check{Check}\r\n\r\n  VALIDATE_FORMAT ==\u003e ncbiGX[NCBI FCS GX]\r\n  ncbiGX ==\u003e Check\r\n  Check ==\u003e |Clean|Run(Run)\r\n\r\n  Check ==\u003e |Contamination|Skip(Skip All)\r\n  Skip ==\u003e REPORT\r\n\r\n  VALIDATE_FORMAT ==\u003e GFF_STATS[GENOMETOOLS GT STAT]\r\n\r\n  Run ==\u003e ASS_STATS[ASSEMBLATHON STATS]\r\n  Run ==\u003e BUSCO\r\n  Run ==\u003e TIDK\r\n  Run ==\u003e LAI\r\n  Run ==\u003e KRAKEN2\r\n  Run ==\u003e HIC_CONTACT_MAP[HIC CONTACT MAP]\r\n  Run ==\u003e MUMMER\r\n  Run ==\u003e MINIMAP2\r\n  Run ==\u003e MERQURY\r\n\r\n  MUMMER ==\u003e CIRCOS\r\n  MUMMER ==\u003e DOTPLOT\r\n\r\n  MINIMAP2 ==\u003e PLOTSR\r\n\r\n  ASS_STATS ==\u003e REPORT\r\n  GFF_STATS ==\u003e REPORT\r\n  BUSCO ==\u003e REPORT\r\n  TIDK ==\u003e REPORT\r\n  LAI ==\u003e REPORT\r\n  KRAKEN2 ==\u003e REPORT\r\n  HIC_CONTACT_MAP ==\u003e REPORT\r\n  CIRCOS ==\u003e REPORT\r\n  DOTPLOT ==\u003e REPORT\r\n  PLOTSR ==\u003e REPORT\r\n  MERQURY ==\u003e REPORT\r\n```\r\n\r\n- [FASTA VALIDATOR](https://github.com/linsalrob/fasta_validator) + [SEQKIT RMDUP](https://github.com/shenwei356/seqkit): FASTA validation\r\n- [GENOMETOOLS GT GFF3VALIDATOR](https://genometools.org/tools/gt_gff3validator.html): GFF3 validation\r\n- [ASSEMBLATHON STATS](https://github.com/PlantandFoodResearch/assemblathon2-analysis/blob/a93cba25d847434f7eadc04e63b58c567c46a56d/assemblathon_stats.pl): Assembly statistics\r\n- [GENOMETOOLS GT STAT](https://genometools.org/tools/gt_stat.html): Annotation statistics\r\n- [NCBI FCS ADAPTOR](https://github.com/ncbi/fcs): Adaptor contamination pass/fail\r\n- [NCBI FCS GX](https://github.com/ncbi/fcs): Foreign organism contamination pass/fail\r\n- [BUSCO](https://gitlab.com/ezlab/busco): Gene-space completeness estimation\r\n- [TIDK](https://github.com/tolkit/telomeric-identifier): Telomere repeat identification\r\n- [LAI](https://github.com/oushujun/LTR_retriever/blob/master/LAI): Continuity of repetitive sequences\r\n- [KRAKEN2](https://github.com/DerrickWood/kraken2): Taxonomy classification\r\n- [HIC CONTACT MAP](https://github.com/igvteam/juicebox.js): Alignment and visualisation of HiC data\r\n- [MUMMER](https://github.com/mummer4/mummer) → [CIRCOS](http://circos.ca/documentation/) + [DOTPLOT](https://plotly.com) \u0026 [MINIMAP2](https://github.com/lh3/minimap2) → [PLOTSR](https://github.com/schneebergerlab/plotsr): Synteny analysis\r\n- [MERQURY](https://github.com/marbl/merqury): K-mer completeness, consensus quality and phasing assessment\r\n\r\n## Usage\r\n\r\nRefer to [usage](./docs/usage.md), [parameters](./docs/parameters.md) and [output](./docs/output.md) documents for details.\r\n\r\n\u003e [!NOTE]\r\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\r\n\r\nPrepare an `assemblysheet.csv` file with following columns representing target assemblies and associated meta-data.\r\n\r\n- `tag:` A unique tag which represents the target assembly throughout the pipeline and in the final report\r\n- `fasta:` FASTA file\r\n\r\nNow, you can run the pipeline using:\r\n\r\n```bash\r\nnextflow run plant-food-research-open/assemblyqc \\\r\n   -profile \u003cdocker/singularity/.../institute\u003e \\\r\n   --input assemblysheet.csv \\\r\n   --outdir \u003cOUTDIR\u003e\r\n```\r\n\r\n\u003e [!WARNING]\r\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\r\n\u003e see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).\r\n\r\n### Plant\u0026Food Users\r\n\r\nDownload the pipeline to your `/workspace/$USER` folder. Change the parameters defined in the [pfr/params.json](./pfr/params.json) file. Submit the pipeline to SLURM for execution.\r\n\r\n```bash\r\nsbatch ./pfr_assemblyqc\r\n```\r\n\r\n## Credits\r\n\r\nplant-food-research-open/assemblyqc was originally written by Usman Rashid ([@gallvp](https://github.com/gallvp)) and Ken Smith ([@hzlnutspread](https://github.com/hzlnutspread)).\r\n\r\nRoss Crowhurst ([@rosscrowhurst](https://github.com/rosscrowhurst)), Chen Wu ([@christinawu2008](https://github.com/christinawu2008)) and Marcus Davy ([@mdavy86](https://github.com/mdavy86)) generously contributed their QC scripts.\r\n\r\nMahesh Binzer-Panchal ([@mahesh-panchal](https://github.com/mahesh-panchal)) helped port the pipeline modules and sub-workflows to [nf-core](https://nf-co.re) schema.\r\n\r\nWe thank the following people for their extensive assistance in the development of this pipeline:\r\n\r\n- [Cecilia Deng](https://github.com/CeciliaDeng)\r\n- [Ignacio Carvajal](https://github.com/ignacio3437)\r\n- [Jason Shiller](https://github.com/jasonshiller)\r\n- [Sarah Bailey](https://github.com/SarahBailey1998)\r\n- [Susan Thomson](https://github.com/cflsjt)\r\n- [Ting-Hsuan Chen](https://github.com/ting-hsuan-chen)\r\n\r\nThe pipeline uses nf-core modules contributed by following authors:\r\n\r\n\u003ca href=\"https://github.com/gallvp\"\u003e\u003cimg src=\"https://github.com/gallvp.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/drpatelh\"\u003e\u003cimg src=\"https://github.com/drpatelh.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/midnighter\"\u003e\u003cimg src=\"https://github.com/midnighter.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/mahesh-panchal\"\u003e\u003cimg src=\"https://github.com/mahesh-panchal.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/jfy133\"\u003e\u003cimg src=\"https://github.com/jfy133.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/adamrtalbot\"\u003e\u003cimg src=\"https://github.com/adamrtalbot.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/maxulysse\"\u003e\u003cimg src=\"https://github.com/maxulysse.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/matthdsm\"\u003e\u003cimg src=\"https://github.com/matthdsm.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/joseespinosa\"\u003e\u003cimg src=\"https://github.com/joseespinosa.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/ewels\"\u003e\u003cimg src=\"https://github.com/ewels.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/sofstam\"\u003e\u003cimg src=\"https://github.com/sofstam.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/sateeshperi\"\u003e\u003cimg src=\"https://github.com/sateeshperi.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/priyanka-surana\"\u003e\u003cimg src=\"https://github.com/priyanka-surana.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/phue\"\u003e\u003cimg src=\"https://github.com/phue.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/muffato\"\u003e\u003cimg src=\"https://github.com/muffato.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/lescai\"\u003e\u003cimg src=\"https://github.com/lescai.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/kevinmenden\"\u003e\u003cimg src=\"https://github.com/kevinmenden.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/jvhagey\"\u003e\u003cimg src=\"https://github.com/jvhagey.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/joon-klaps\"\u003e\u003cimg src=\"https://github.com/joon-klaps.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/jeremy1805\"\u003e\u003cimg src=\"https://github.com/jeremy1805.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/heuermh\"\u003e\u003cimg src=\"https://github.com/heuermh.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/grst\"\u003e\u003cimg src=\"https://github.com/grst.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/friederikehanssen\"\u003e\u003cimg src=\"https://github.com/friederikehanssen.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/felixkrueger\"\u003e\u003cimg src=\"https://github.com/felixkrueger.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/erikrikarddaniel\"\u003e\u003cimg src=\"https://github.com/erikrikarddaniel.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/edmundmiller\"\u003e\u003cimg src=\"https://github.com/edmundmiller.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/d4straub\"\u003e\u003cimg src=\"https://github.com/d4straub.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\u003ca href=\"https://github.com/charles-plessy\"\u003e\u003cimg src=\"https://github.com/charles-plessy.png\" width=\"50\" height=\"50\"\u003e\u003c/a\u003e\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\n## Citations\r\n\r\nIf you use plant-food-research-open/assemblyqc for your analysis, please cite it as:\r\n\r\n\u003e Rashid, U., Wu, C., Shiller, J., Smith, K., Crowhurst, R., Davy, M., Chen, T.-H., Thomson, S., \u0026 Deng, C. (2024). AssemblyQC: A NextFlow pipeline for evaluating assembly quality (2.0.0). Zenodo. https://doi.org/10.5281/zenodo.10647870\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Plant-Food-Research-Open","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1058?version=1","name":"main @ 690e56b","author":[],"descriptor_type":["NFL"]}]},{"id":"1059","url":"https://workflowhub.eu/workflows/1059","name":"taxonomy-profiling-and-visualization-with-krona/main","description":"Microbiome - Taxonomy Profiling","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1059?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1060","url":"https://workflowhub.eu/workflows/1060","name":"pathogen-detection-pathogfair-samples-aggregation-and-visualisation/main","description":"Pathogens of all samples report generation and visualization","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1060?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1061","url":"https://workflowhub.eu/workflows/1061","name":"nanopore-pre-processing/main","description":"Microbiome - QC and Contamination Filtering","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1061?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1062","url":"https://workflowhub.eu/workflows/1062","name":"gene-based-pathogen-identification/main","description":"Nanopore datasets analysis - Phylogenetic Identification - antibiotic resistance genes detection and contigs building","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1062?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1063","url":"https://workflowhub.eu/workflows/1063","name":"allele-based-pathogen-identification/main","description":"Microbiome - Variant calling and Consensus Building","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1063?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1063?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1063?version=3","name":"v0.1.2","author":[],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1063?version=4","name":"v0.1.3","author":[],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1063?version=5","name":"v0.1.4","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1068","url":"https://workflowhub.eu/workflows/1068","name":"Wordcount","description":"Application that counts the number of words in the passed a file or a group of files.","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1068?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1069","url":"https://workflowhub.eu/workflows/1069","name":"mettannotator","description":"[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n\r\n# mettannotator\r\n\r\n\u003cimg align=\"right\" width=\"162\" height=\"149\" src=\"media/mettannotator-logo.png\"\u003e\r\n\r\n- [ Introduction ](#intro)\r\n- [ Workflow and tools](#wf)\r\n- [ Installation and dependencies ](#install)\r\n  - [Reference databases](#reference-databases)\r\n- [ Usage ](#usage)\r\n- [ Test ](#test)\r\n- [ Outputs ](#out)\r\n- [Preparing annotations for ENA or GenBank submission](#submission)\r\n- [ Mobilome annotation ](#mobilome)\r\n- [ Credits ](#credit)\r\n- [ Contributions and Support ](#contribute)\r\n- [ Citation ](#cite)\r\n\r\n\u003ca name=\"intro\"\u003e\u003c/a\u003e\r\n\r\n## Introduction\r\n\r\n**mettannotator** is a bioinformatics pipeline that generates an exhaustive annotation of prokaryotic genomes using existing tools. The output is a GFF file that integrates the results of all pipeline components. Results of each individual tool are also provided.\r\n\r\n\u003ca name=\"wf\"\u003e\u003c/a\u003e\r\n\r\n## Workflow and tools\r\n\r\n\u003cimg src=\"media/mettannotator-schema.png\"\u003e\r\n\u003cbr /\u003e\r\n\u003cbr /\u003e\r\n\r\nThe workflow uses the following tools and databases:\r\n\r\n| Tool/Database                                                                                    | Version                                       | Purpose                                                                                                                |\r\n| ------------------------------------------------------------------------------------------------ | --------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- |\r\n| [Prokka](https://github.com/tseemann/prokka)                                                     | 1.14.6                                        | CDS calling and functional annotation (default)                                                                        |\r\n| [Bakta](https://github.com/oschwengers/bakta)                                                    | 1.9.3                                         | CDS calling and functional annotation (if --bakta flag is used)                                                        |\r\n| [Bakta db](https://zenodo.org/record/10522951/)                                                  | 2024-01-19 with AMRFinderPlus DB 2024-01-31.1 | Bakta DB (when Bakta is used as the gene caller)                                                                       |\r\n| [Pseudofinder](https://github.com/filip-husnik/pseudofinder)                                     | v1.1.0                                        | Identification of possible pseudogenes                                                                                 |\r\n| [Swiss-Prot](https://www.uniprot.org/help/downloads)                                             | 2024_06                                       | Database for Pseudofinder                                                                                              |\r\n| [InterProScan](https://www.ebi.ac.uk/interpro/about/interproscan/)                               | 5.62-94.0                                     | Protein annotation (InterPro, Pfam)                                                                                    |\r\n| [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper)                                       | 2.1.11                                        | Protein annotation (eggNOG, KEGG, COG, GO-terms)                                                                       |\r\n| [eggNOG DB](http://eggnog6.embl.de/download/)                                                    | 5.0.2                                         | Database for eggNOG-mapper                                                                                             |\r\n| [UniFIRE](https://gitlab.ebi.ac.uk/uniprot-public/unifire)                                       | 2023.4                                        | Protein annotation                                                                                                     |\r\n| [AMRFinderPlus](https://github.com/ncbi/amr)                                                     | 3.12.8                                        | Antimicrobial resistance gene annotation; virulence factors, biocide, heat, acid, and metal resistance gene annotation |\r\n| [AMRFinderPlus DB](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/)              | 3.12 2024-01-31.1                             | Database for AMRFinderPlus                                                                                             |\r\n| [DefenseFinder](https://github.com/mdmparis/defense-finder)                                      | 1.2.0                                         | Annotation of anti-phage systems                                                                                       |\r\n| [DefenseFinder models](https://github.com/mdmparis/defense-finder-models)                        | 1.2.3                                         | Database for DefenseFinder                                                                                             |\r\n| [GECCO](https://github.com/zellerlab/GECCO)                                                      | 0.9.8                                         | Biosynthetic gene cluster annotation                                                                                   |\r\n| [antiSMASH](https://antismash.secondarymetabolites.org/#!/download)                              | 7.1.0                                         | Biosynthetic gene cluster annotation                                                                                   |\r\n| [SanntiS](https://github.com/Finn-Lab/SanntiS)                                                   | 0.9.3.4                                       | Biosynthetic gene cluster annotation                                                                                   |\r\n| [run_dbCAN](https://github.com/linnabrown/run_dbcan)                                             | 4.1.2                                         | PUL prediction                                                                                                         |\r\n| [dbCAN DB](https://bcb.unl.edu/dbCAN2/download/Databases/)                                       | V12                                           | Database for run_dbCAN                                                                                                 |\r\n| [CRISPRCasFinder](https://github.com/dcouvin/CRISPRCasFinder)                                    | 4.3.2                                         | Annotation of CRISPR arrays                                                                                            |\r\n| [cmscan](http://eddylab.org/infernal/)                                                           | 1.1.5                                         | ncRNA predictions                                                                                                      |\r\n| [Rfam](https://rfam.org/)                                                                        | 14.9                                          | Identification of SSU/LSU rRNA and other ncRNAs                                                                        |\r\n| [tRNAscan-SE](https://github.com/UCSC-LoweLab/tRNAscan-SE)                                       | 2.0.9                                         | tRNA predictions                                                                                                       |\r\n| [pyCirclize](https://github.com/moshi4/pyCirclize)                                               | 1.4.0                                         | Visualise the merged GFF file                                                                                          |\r\n| [VIRify](https://github.com/EBI-Metagenomics/emg-viral-pipeline)                                 | 2.0.0                                         | Viral sequence annotation (runs separately)                                                                            |\r\n| [Mobilome annotation pipeline](https://github.com/EBI-Metagenomics/mobilome-annotation-pipeline) | 2.0                                           | Mobilome annotation (runs separately)                                                                                  |\r\n\r\n\u003ca name=\"install\"\u003e\u003c/a\u003e\r\n\r\n## Installation and dependencies\r\n\r\nThis workflow is built using [Nextflow](https://www.nextflow.io/). It uses containers (Docker or Singularity) making installation simple and results highly reproducible.\r\n\r\n- Install [Nextflow version \u003e=21.10](https://www.nextflow.io/docs/latest/getstarted.html#installation)\r\n- Install [Singularity](https://github.com/apptainer/singularity/blob/master/INSTALL.md)\r\n- Install [Docker](https://docs.docker.com/engine/install/)\r\n\r\nAlthough it's possible to run the pipeline on a personal computer, due to the compute requirements, we encourage users to run it on HPC clusters. Any HPC scheduler supported by [Nextflow](https://www.nextflow.io/) is compatible; however, our team primarily uses [Slurm](https://slurm.schedmd.com/) and [IBM LSF](https://www.ibm.com/docs/en/spectrum-lsf) for the EBI HPC cluster, so those are the profiles we ship with the pipeline.\r\n\r\n\u003ca name=\"reference-databases\"\u003e\u003c/a\u003e\r\n\r\n### Reference databases\r\n\r\nThe pipeline needs reference databases in order to work, they take roughly 180G.\r\n\r\n| Path                | Size |\r\n| ------------------- | ---- |\r\n| amrfinder           | 217M |\r\n| antismash           | 9.4G |\r\n| bakta               | 71G  |\r\n| dbcan               | 7.5G |\r\n| defense_finder      | 242M |\r\n| eggnog              | 48G  |\r\n| interproscan        | 45G  |\r\n| interpro_entry_list | 2.6M |\r\n| rfam_models         | 637M |\r\n| pseudofinder        | 273M |\r\n| total               | 182G |\r\n\r\n`mettannotator` has an automated mechanism to download the databases using the `--dbs \u003cdb_path\u003e` flag. When this flag is provided, the pipeline inspects the folder to verify if the required databases are already present. If any of the databases are missing, the pipeline will automatically download them.\r\n\r\nUsers can also provide individual paths to each reference database and its version if needed. For detailed instructions, please refer to the Reference databases section in the `--help` of the pipeline.\r\n\r\nIt's important to note that users are not allowed to mix the `--dbs` flag with individual database paths and versions; they are mutually exclusive. We recommend users to run the pipeline with the `--dbs` flag for the first time in an appropriate path and to avoid downloading the individual databases separately.\r\n\r\n\u003ca name=\"usage\"\u003e\u003c/a\u003e\r\n\r\n## Usage\r\n\r\n### Input file\r\n\r\nFirst, prepare an input file in the CSV format that looks as follows:\r\n\r\n`assemblies_sheet.csv`:\r\n\r\n```csv\r\nprefix,assembly,taxid\r\nBU_ATCC8492VPI0062,/path/to/BU_ATCC8492VPI0062_NT5002.fa,820\r\nEC_ASM584v2,/path/to/GCF_000005845.2.fna,562\r\n...\r\n```\r\n\r\nHere,\r\n`prefix` is the prefix and the locus tag that will be assigned to output files and proteins during the annotation process;\r\nmaximum length is 24 characters;\r\n\r\n`assembly` is the path to where the assembly file in FASTA format is located;\r\n\r\n`taxid` is the NCBI TaxId (if the species-level TaxId is not known, a TaxId for a higher taxonomic level can be used). If the taxonomy is known, look up the TaxID [here](https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi).\r\n\r\n#### Finding TaxIds\r\n\r\nIf NCBI taxonomies of input genomes are not known, a tool such as [CAT/BAT](https://github.com/MGXlab/CAT_pack) can be used.\r\nFollow the [instructions](https://github.com/MGXlab/CAT_pack?tab=readme-ov-file#installation) for getting the tool and downloading the NCBI nr database for it.\r\n\r\nIf using CAT/BAT, here is the suggested process for making the `mettannotator` input file:\r\n\r\n```bash\r\n# Run BAT on each input genome, saving all results to the same folder\r\nCAT bins -b ${genome_name}.fna -d ${path_to_CAT_database} -t ${path_to_CAT_tax_folder} -o BAT_results/${genome_name}\r\n\r\n# Optional: to check what taxa were assigned, you can add names to them\r\nCAT add_names -i BAT_results/${genome_name}.bin2classification.txt -o BAT_results/${genome_name}.name.txt -t ${path_to_CAT_tax_folder}\r\n```\r\n\r\nTo generate an input file for `mettannotator`, use [generate_input_file.py](preprocessing/generate_input_file.py):\r\n\r\n```\r\npython3 preprocessing/generate_input_file.py -h\r\nusage: generate_input_file.py [-h] -i INFILE -d INPUT_DIR -b BAT_DIR -o OUTFILE [--no-prefix]\r\n\r\nThe script takes a list of genomes and the taxonomy results generated by BAT and makes a\r\nmettannotator input csv file. The user has the option to either use the genome file name\r\n(minus the extension) as the prefix for mettannotator or leave the prefix off and fill it\r\nout themselves after the script generates an input file with just the FASTA location and\r\nthe taxid. It is expected that for all genomes, BAT results are stored in the same folder\r\nand are named as {fasta_base_name}.bin2classification.txt. The script will use the lowest-\r\nlevel taxid without an asterisk as the taxid for the genome.\r\n\r\noptional arguments:\r\n  -h, --help    show this help message and exit\r\n  -i INFILE     A file containing a list of genome files to include (file name only, with file\r\n                extension, unzipped, one file per line).\r\n  -d INPUT_DIR  Full path to the directory where the input FASTA files are located.\r\n  -b BAT_DIR    Folder with BAT results. Results for all genomes should be in the same folder\r\n                and should be named {fasta_base_name}.bin2classification.txt\r\n  -o OUTFILE    Path to the file where the output will be saved to.\r\n  --no-prefix   Skip prefix generation and leave the first column of the output file empty for\r\n                the user to fill out. Default: False\r\n```\r\n\r\nFor example:\r\n\r\n```bash\r\npython3 generate_input_file.py -i list_of_genome_fasta_files.txt -d /path/to/the/fasta/files/folder/ -b BAT_results/ -o mettannotator_input.csv\r\n```\r\n\r\nIt is always best to check the outputs to ensure the results are as expected. Correct any wrongly detected taxa before starting `mettannotator`.\r\n\r\nNote, that by default the script uses FASTA file names as prefixes and truncates them to 24 characters if they exceed the limit.\r\n\r\n### Running mettannotator\r\n\r\nRunning `mettannotator` with the `--help` option will pull the repository and display the help message:\r\n\r\n\u003e [!NOTE]\r\n\u003e We use the `-latest` flag with the `nextflow run` command, which ensures that the latest available version of the pipeline is pulled.\r\n\u003e If you encounter any issues with the `nextflow run` command, please refer to the [Nextflow documentation](https://www.nextflow.io/docs/latest/reference/cli.html#run).\r\n\r\n```angular2html\r\n$ nextflow run -latest ebi-metagenomics/mettannotator/main.nf --help\r\nN E X T F L O W  ~  version 23.04.3\r\nLaunching `mettannotator/main.nf` [disturbed_davinci] DSL2 - revision: f2a0e51af6\r\n\r\n\r\n------------------------------------------------------\r\n  ebi-metagenomics/mettannotator \u003cversion\u003e\r\n------------------------------------------------------\r\nTypical pipeline command:\r\n\r\n  nextflow run ebi-metagenomics/mettannotator --input assemblies_sheet.csv -profile docker\r\n\r\nInput/output options\r\n  --input                            [string]  Path to comma-separated file containing information about the assemblies with the prefix to be used.\r\n  --outdir                           [string]  The output directory where the results will be saved. You have to use absolute paths to storage on Cloud\r\n                                               infrastructure.\r\n  --fast                             [boolean] Run the pipeline in fast mode. In this mode, InterProScan, UniFIRE, and SanntiS won't be executed, saving\r\n                                               resources and speeding up the pipeline.\r\n  --email                            [string]  Email address for completion summary.\r\n  --multiqc_title                    [string]  MultiQC report title. Printed as page header, used for filename if not otherwise specified.\r\n\r\nReference databases\r\n  --dbs                              [string]  Folder for the tools' reference databases used by the pipeline for downloading. It's important to note that\r\n                                               mixing the --dbs flag with individual database paths and versions is not allowed; they are mutually\r\n                                               exclusive.\r\n  --interproscan_db                  [string]  The InterProScan reference database, ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/\r\n  --interproscan_db_version          [string]  The InterProScan reference database version. [default: 5.62-94.0]\r\n  --interpro_entry_list              [string]  TSV file listing basic InterPro entry information - the accessions, types and names,\r\n                                               ftp://ftp.ebi.ac.uk/pub/databases/interpro/releases/94.0/entry.list\r\n  --interpro_entry_list_version      [string]  InterPro entry list version [default: 94]\r\n  --eggnog_db                        [string]  The EggNOG reference database folder,\r\n                                               https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.12#requirements\r\n  --eggnog_db_version                [string]  The EggNOG reference database version. [default: 5.0.2]\r\n  --rfam_ncrna_models                [string]  Rfam ncRNA models, ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/genomes-pipeline/ncrna/\r\n  --rfam_ncrna_models_rfam_version   [string]  Rfam release version where the models come from. [default: 14.9]\r\n  --amrfinder_plus_db                [string]  AMRFinderPlus reference database,\r\n                                               https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/. Go to the following\r\n                                               documentation for the db setup https://github.com/ncbi/amr/wiki/Upgrading#database-updates.\r\n  --amrfinder_plus_db_version        [string]  The AMRFinderPlus reference database version. [default: 2023-02-23.1]\r\n  --defense_finder_db                [string]  Defense Finder reference models, https://github.com/mdmparis/defense-finder#updating-defensefinder. The\r\n                                               Microbiome Informatics team provides a pre-indexed version of the models for version 1.2.3 on this ftp location:\r\n                                               ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/defense-finder/defense-finder-models_1.2.3.tar.gz.\r\n  --defense_finder_db_version        [string]  The Defense Finder models version. [default: 1.2.3]\r\n  --antismash_db                     [string]  antiSMASH reference database, go to this documentation to do the database setup\r\n                                               https://docs.antismash.secondarymetabolites.org/install/#installing-the-latest-antismash-release.\r\n  --antismash_db_version             [string]  The antiSMASH reference database version. [default: 7.1.0]\r\n  --dbcan_db                         [string]  dbCAN indexed reference database, please go to the documentation for the setup\r\n                                               https://dbcan.readthedocs.io/en/latest/. The Microbiome Informatics team provides a pre-indexed version of the\r\n                                               database for version 4.0 on this ftp location:\r\n                                               ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/dbcan/dbcan_4.0.tar.gz\r\n  --dbcan_db_version                 [string]  The dbCAN reference database version. [default: 4.1.3_V12]\r\n  --pseudofinder_db                  [string]  Pseudofinder reference database. Mettannotator uses SwissProt as the database for Pseudofinder.\r\n  --pseudofinder_db_version          [string]  SwissProt version. [default: 2024_06]\r\n\r\nGeneric options\r\n  --multiqc_methods_description      [string]  Custom MultiQC yaml file containing HTML including a methods description.\r\n\r\nOther parameters\r\n  --bakta                            [boolean] Use Bakta instead of Prokka for CDS annotation. Prokka will still be used for archaeal genomes.\r\n\r\n !! Hiding 17 params, use --validationShowHiddenParams to show them !!\r\n------------------------------------------------------\r\nIf you use ebi-metagenomics/mettannotator for your analysis please cite:\r\n\r\n* The nf-core framework\r\n  https://doi.org/10.1038/s41587-020-0439-x\r\n\r\n* Software dependencies\r\n  https://github.com/ebi-metagenomics/mettannotator/blob/master/CITATIONS.md\r\n------------------------------------------------------\r\n\r\n```\r\n\r\nNow, you can run the pipeline using:\r\n\r\n```bash\r\nnextflow run ebi-metagenomics/mettannotator \\\r\n   -profile \u003cdocker/singularity/...\u003e \\\r\n   --input assemblies_sheet.csv \\\r\n   --outdir \u003cOUTDIR\u003e \\\r\n   --dbs \u003cPATH/TO/WHERE/DBS/WILL/BE/SAVED\u003e\r\n```\r\n\r\n\u003e [!WARNING]\r\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\r\n\u003e provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\r\n\u003e see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).\r\n\r\n#### Running the pipeline from the source code\r\n\r\nIf the Nextflow integration with Git does not work, users can download the tarball from the releases page. After extracting the tarball, the pipeline can be run directly by executing the following command:\r\n\r\n```bash\r\n$ nextflow run path-to-source-code/main.nf --help\r\n```\r\n\r\n#### Local execution\r\n\r\nThe pipeline can be run on a desktop or laptop, with the caveat that it will take a few hours to complete depending on the resources. There is a local profile in the Nextflow config that limits the total resources the pipeline can use to 8 cores and 12 GB of RAM. In order to run it (Docker or Singularity are still required):\r\n\r\n```bash\r\nnextflow run -latest ebi-metagenomics/mettannotator \\\r\n   -profile local,\u003cdocker or singulairty\u003e \\\r\n   --input assemblies_sheet.csv \\\r\n   --outdir \u003cOUTDIR\u003e \\\r\n   --dbs \u003cPATH/TO/WHERE/DBS/WILL/BE/SAVED\u003e\r\n```\r\n\r\n### Gene caller choice\r\n\r\nBy default, `mettannotator` uses Prokka to identify protein-coding genes. Users can choose to use Bakta instead by\r\nrunning `mettannotator` with the `--bakta` flag. `mettannotator` runs Bakta without ncRNA and CRISPR\r\nannotation as these are produced by separate tools in the pipeline. Archaeal genomes will continue to be annotated using\r\nProkka as Bakta is only intended for annotation of bacterial genomes.\r\n\r\n### Fast mode\r\n\r\nTo reduce the compute time and the amount of resources used, the pipeline can be executed with the `--fast` flag. When\r\nrun in the fast mode, `mettannotator` will skip InterProScan, UniFIRE and SanntiS. This could be a suitable option\r\nfor a first-pass of annotation or if computational resources are limited, however, we recommend running the full version\r\nof the pipeline whenever possible.\r\n\r\nWhen generating an input file for a fast mode run, it is sufficient to indicate the taxid of the superkingdom (`2` for\r\nbacteria and `2157` for Archaea) in the \"taxid\" column rather than the taxid of the lowest known taxon.\r\n\r\n\u003ca name=\"test\"\u003e\u003c/a\u003e\r\n\r\n## Test\r\n\r\nTo run the pipeline using a test dataset, execute the following command:\r\n\r\n```bash\r\nwget https://raw.githubusercontent.com/EBI-Metagenomics/mettannotator/master/tests/test.csv\r\n\r\nnextflow run -latest ebi-metagenomics/mettannotator \\\r\n   -profile \u003cdocker/singularity/...\u003e \\\r\n   --input test.csv \\\r\n   --outdir \u003cOUTDIR\u003e \\\r\n   --dbs \u003cPATH/TO/WHERE/DBS/WILL/BE/SAVED\u003e\r\n```\r\n\r\n\u003ca name=\"out\"\u003e\u003c/a\u003e\r\n\r\n## Outputs\r\n\r\nThe output folder structure will look as follows:\r\n\r\n```\r\n└─\u003cPREFIX\u003e\r\n   ├─antimicrobial_resistance\r\n   │  └─amrfinder_plus\r\n   ├─antiphage_defense\r\n   │  └─defense_finder\r\n   ├─biosynthetic_gene_clusters\r\n   │  ├─antismash\r\n   │  ├─gecco\r\n   │  └─sanntis\r\n   ├─functional_annotation\r\n   │  ├─dbcan\r\n   │  ├─eggnog_mapper\r\n   │  ├─interproscan\r\n   │  ├─merged_gff\r\n   │  ├─prokka\r\n   │  ├─pseudofinder\r\n   │  └─unifire\r\n   ├─mobilome\r\n   │  └─crisprcas_finder\r\n   ├─quast\r\n   │  └─\u003cPREFIX\u003e\r\n   │      ├─basic_stats\r\n   │      └─icarus_viewers\r\n   ├─rnas\r\n   │  ├─ncrna\r\n   │  └─trna\r\n   ├─multiqc\r\n   │  ├─multiqc_data\r\n   │  └─multiqc_plots\r\n   │      ├─pdf\r\n   │      ├─png\r\n   │      └─svg\r\n   ├─pipeline_info\r\n   │  ├─software_versions.yml\r\n   │  ├─execution_report_\u003ctimestamp\u003e.txt\r\n   │  ├─execution_report_\u003ctimestamp\u003e.html\r\n   │  ├─execution_timeline_\u003ctimestamp\u003e.txt\r\n   │  ├─execution_timeline_\u003ctimestamp\u003e.html\r\n   │  ├─execution_trace_\u003ctimestamp\u003e.txt\r\n   │  ├─execution_trace_\u003ctimestamp\u003e.html\r\n   │  └─pipeline_dag_\u003ctimestamp\u003e.html\r\n\r\n```\r\n\r\n### Merged GFF\r\n\r\nThe two main output files for each genome are located in `\u003cOUTDIR\u003e/\u003cPREFIX\u003e/functional_annotation/merged_gff/`:\r\n\r\n- `\u003cPREFIX\u003e_annotations.gff`: annotations produced by all tools merged into a single file\r\n\r\n- `\u003cPREFIX\u003e_annotations_with_descriptions.gff`: a version of the GFF file above that includes descriptions of all InterPro terms to make the annotations human-readable. Not generated if `--fast` flag was used.\r\n\r\nBoth files include the genome sequence in the FASTA format at the bottom of the file.\r\n\r\nAdditionally, for genomes with no more than 50 annotated contigs, a Circos plot of the `\u003cPREFIX\u003e_annotations.gff` file is generated and included in the same folder. An example of such plot is shown below:\r\n\r\n\u003cimg src=\"media/circos-plot-example.png\"\u003e\r\n\r\n#### Data sources\r\n\r\nBelow is an explanation of how each field in column 3 and 9 of the final GFF file is populated. In most cases, information is taken as is from the reporting tool's output.\r\n\r\n| Feature (column 3)    | Attribute Name (column 9)                                               | Reporting Tool  | Description                                                                                                                                                                                                 |\r\n| --------------------- | ----------------------------------------------------------------------- | --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\r\n| ncRNA                 | all\\*                                                                   | cmscan + Rfam   | ncRNA annotation (excluding tRNA)                                                                                                                                                                           |\r\n| tRNA                  | all\\*                                                                   | tRNAscan-SE     | tRNA annotation                                                                                                                                                                                             |\r\n| LeftFLANK, RightFLANK | all\\*                                                                   | CRISPRCasFinder | CRISPR array flanking sequence                                                                                                                                                                              |\r\n| CRISPRdr              | all\\*                                                                   | CRISPRCasFinder | Direct repeat region of a CRISPR array                                                                                                                                                                      |\r\n| CRISPRspacer          | all\\*                                                                   | CRISPRCasFinder | CRISPR spacer                                                                                                                                                                                               |\r\n| CDS                   | `ID`, `eC_number`, `Name`, `Dbxref`, `gene`, `inference`, `locus_tag`   | Prokka/Bakta    | Protein annotation                                                                                                                                                                                          |\r\n| CDS                   | `product`                                                               | mettannotator   | Product assigned as described in [ Determining the product ](#product)                                                                                                                                      |\r\n| CDS                   | `product_source`                                                        | mettannotator   | Tool that reported the product chosen by mettannotator                                                                                                                                                      |\r\n| CDS                   | `eggNOG`                                                                | eggNOG-mapper   | Seed ortholog from eggNOG                                                                                                                                                                                   |\r\n| CDS                   | `cog`                                                                   | eggNOG-mapper   | COG category                                                                                                                                                                                                |\r\n| CDS                   | `kegg`                                                                  | eggNOG-mapper   | KEGG orthology term                                                                                                                                                                                         |\r\n| CDS                   | `Ontology_term`                                                         | eggNOG-mapper   | GO associations                                                                                                                                                                                             |\r\n| CDS                   | `pfam`                                                                  | InterProScan    | Pfam accessions                                                                                                                                                                                             |\r\n| CDS                   | `interpro`                                                              | InterProScan    | InterPro accessions. In `\u003cPREFIX\u003e_annotations_with_descriptions.gff` each accession is followed by its description and entry type: Domain [D], Family [F], Homologous Superfamily [H], Repeat [R], Site [S] |\r\n| CDS                   | `nearest_MiBIG`                                                         | SanntiS         | MiBIG accession of the nearest BGC to the cluster in the MIBIG space                                                                                                                                        |\r\n| CDS                   | `nearest_MiBIG_class`                                                   | SanntiS         | BGC class of nearest_MiBIG                                                                                                                                                                                  |\r\n| CDS                   | `gecco_bgc_type`                                                        | GECCO           | BGC type                                                                                                                                                                                                    |\r\n| CDS                   | `antismash_bgc_function`                                                | antiSMASH       | BGC function                                                                                                                                                                                                |\r\n| CDS                   | `amrfinderplus_gene_symbol`                                             | AMRFinderPlus   | Gene symbol according to AMRFinderPlus                                                                                                                                                                      |\r\n| CDS                   | `amrfinderplus_sequence_name`                                           | AMRFinderPlus   | Product description                                                                                                                                                                                         |\r\n| CDS                   | `amrfinderplus_scope`                                                   | AMRFinderPlus   | AMRFinderPlus database (core or plus)                                                                                                                                                                       |\r\n| CDS                   | `element_type`, `element_subtype`                                       | AMRFinderPlus   | Functional category                                                                                                                                                                                         |\r\n| CDS                   | `drug_class`, `drug_subclass`                                           | AMRFinderPlus   | Class and subclass of drugs that this gene is known to contribute to resistance of                                                                                                                          |\r\n| CDS                   | `dbcan_prot_type`                                                       | run_dbCAN       | Predicted protein function: transporter (TC), transcription factor (TF), signal transduction protein (STP), CAZyme                                                                                          |\r\n| CDS                   | `dbcan_prot_family`                                                     | run_dbCAN       | Predicted protein family                                                                                                                                                                                    |\r\n| CDS                   | `substrate_dbcan-pul`                                                   | run_dbCAN       | Substrate predicted by dbCAN-PUL search                                                                                                                                                                     |\r\n| CDS                   | `substrate_dbcan-sub`                                                   | run_dbCAN       | Substrate predicted by dbCAN-subfam                                                                                                                                                                         |\r\n| CDS                   | `defense_finder_type`, `defense_finder_subtype`                         | DefenseFinder   | Type and subtype of the anti-phage system found                                                                                                                                                             |\r\n| CDS                   | `uf_prot_rec_fullname`, `uf_prot_rec_shortname`, `uf_prot_rec_ecnumber` | UniFIRE         | Protein recommended full name, short name and EC number according to UniFIRE                                                                                                                                |\r\n| CDS                   | `uf_prot_alt_fullname`, `uf_prot_alt_shortname`, `uf_prot_alt_ecnumber` | UniFIRE         | Protein alternative full name, short name and EC number according to UniFIRE                                                                                                                                |\r\n| CDS                   | `uf_chebi`                                                              | UniFIRE         | ChEBI identifiers                                                                                                                                                                                           |\r\n| CDS                   | `uf_ontology_term`                                                      | UniFIRE         | GO associations                                                                                                                                                                                             |\r\n| CDS                   | `uf_keyword`                                                            | UniFIRE         | UniFIRE keywords                                                                                                                                                                                            |\r\n| CDS                   | `uf_gene_name`, `uf_gene_name_synonym`                                  | UniFIRE         | Gene name and gene name synonym according to UniFIRE                                                                                                                                                        |\r\n| CDS                   | `uf_pirsr_cofactor`                                                     | UniFIRE         | Cofactor names from PIRSR                                                                                                                                                                                   |\r\n\r\n\\*all attributes in column 9 are populated by the tool\r\n\u003cbr\u003e\r\n\u003cbr\u003e\r\n\r\n\u003ca name=\"product\"\u003e\u003c/a\u003e\r\n\r\n#### Determining the product\r\n\r\nThe following logic is used by `mettannotator` to fill out the `product` field in the 9th column of the GFF:\r\n\r\n\u003cimg src=\"media/mettannotator-product.png\"\u003e\r\n\r\nIf the pipeline is executed with the `--fast` flag, only the output of eggNOG-mapper is used to determine the product of proteins that were labeled as hypothetical by the gene caller.\r\n\r\n#### Detection of pseudogenes and spurious ORFs\r\n\r\n`mettannotator` uses several approaches to detect pseudogenes and spurious ORFs:\r\n\r\n- If Bakta is used as the initial annotation tool, `mettannotator` will inherit the pseudogene labels assigned by Bakta.\r\n- `mettannotator` runs Pseudofinder and labels genes that Pseudofinder predicts to be pseudogenes by adding `\"pseudo=true\"` to the 9th column of the final merged GFF file. If there is a disagreement between Pseudofinder and Bakta and one of the tools calls a gene a pseudogene, it will be labeled as a pseudogene.\r\n- AntiFam, which is a part of InterPro, is used to identify potential spurious ORFs. If an ORF has an AntiFam hit, `mettannotator` will remove it from the final merged GFF file. These ORFs will still appear in the raw outputs of Bakta/Prokka and may appear in other tool outputs.\r\n\r\n`mettannotator` produces a report file which is located in the `merged_gff` folder and includes a list of CDS with AntiFam hits and pseudogenes. For each pseudogene, the report shows which tool predicted it.\r\n\r\n### Contents of the tool output folders\r\n\r\nThe output folders of each individual tool contain select output files of the third-party tools used by `mettannotator`. For file descriptions, please refer to the tool documentation. For some tools that don't output a GFF, `mettannotator` converts the output into a GFF.\r\n\r\nNote: if the pipeline completed without errors but some of the tool-specific output folders are empty, those particular tools did not generate any annotations to output.\r\n\r\n\u003ca name=\"submission\"\u003e\u003c/a\u003e\r\n\r\n## Preparing annotations for ENA or GenBank submission\r\n\r\n`mettannotator` produces a final annotation file in GFF3 format. To submit the annotations to data archives, it is first necessary to convert the GFF3 file into the required format, using third-party tools available. `mettannotator` outputs a specially formatted GFF3 file, named `\u003cprefix\u003e_submission.gff` to be used with converters.\r\n\r\n### ENA\r\n\r\nENA accepts annotations in the EMBL flat-file format.\r\nPlease use [EMBLmyGFF3](https://github.com/NBISweden/EMBLmyGFF3) to perform the conversion; the repository includes detailed instructions. The two files required for conversion are:\r\n\r\n- the genome FASTA file\r\n- `\u003cmettannotator_results_folder\u003e/\u003cprefix\u003e/functional_annotation/merged_gff/\u003cprefix\u003e_submission.gff`\r\n\r\nPlease note that it is necessary to register the project and locus tags in ENA prior to conversion. Follow links in the [EMBLmyGFF3](https://github.com/NBISweden/EMBLmyGFF3) repository for more details.\r\n\r\n### GenBank\r\n\r\nTo convert annotations for GenBank submission, please use [table2asn](https://www.ncbi.nlm.nih.gov/genbank/table2asn/).\r\nThree files are required:\r\n\r\n- the genome FASTA file\r\n- `\u003cmettannotator_results_folder\u003e/\u003cprefix\u003e/functional_annotation/merged_gff/\u003cprefix\u003e_submission.gff`\r\n- Submission template file (can be generated [here](https://submit.ncbi.nlm.nih.gov/genbank/template/submission/))\r\n\r\nMore instructions on running `table2asn` are available via [GenBank](https://www.ncbi.nlm.nih.gov/genbank/genomes_gff/).\r\n\r\n\u003ca name=\"mobilome\"\u003e\u003c/a\u003e\r\n\r\n## Mobilome annotation\r\n\r\nThe mobilome annotation workflow is not currently integrated into `mettannotator`. However, the outputs produced by `mettannotator` can be used to run [VIRify](https://github.com/EBI-Metagenomics/emg-viral-pipeline) and the [mobilome annotation pipeline](https://github.com/EBI-Metagenomics/mobilome-annotation-pipeline) and the outputs of these tools can be integrated back into the GFF file produced by `mettannotator`.\r\n\r\nAfter installing both tools, follow these steps to add the mobilome annotation:\r\n\r\n1. Run the [viral annotation pipeline](https://github.com/EBI-Metagenomics/emg-viral-pipeline):\r\n\r\n```bash\r\nnextflow run \\\r\n    emg-viral-pipeline/virify.nf \\\r\n    -profile \u003cprofile\u003e \\\r\n    --fasta \u003cgenome_fasta.fna\u003e \\\r\n    --output \u003cprefix\u003e\r\n```\r\n\r\n2. Run the [mobilome annotation pipeline](https://github.com/EBI-Metagenomics/mobilome-annotation-pipeline):\r\n\r\n```bash\r\nnextflow run mobilome-annotation-pipeline/main.nf \\\r\n    --assembly \u003cgenome_fasta.fna\u003e \\\r\n    --user_genes true \\\r\n    --prot_gff \u003cmettannotator_results_folder/\u003cprefix\u003e/functional_annotation/merged_gff/\u003cprefix\u003e_annotations.gff \\\r\n    --virify true # only if the next two VIRify files exist, otherwise skip this line \\\r\n    --vir_gff Virify_output_folder/08-final/gff/\u003cprefix\u003e_virify.gff # only if file exists, otherwise skip this line \\\r\n    --vir_checkv Virify_output_folder/07-checkv/\\*quality_summary.tsv # only if the GFF file above exists, otherwise skip this line \\\r\n    --outdir \u003cmobilome_output_folder\u003e \\\r\n    --skip_crispr true \\\r\n    --skip_amr true \\\r\n    -profile \u003cprofile\u003e\"\r\n```\r\n\r\n3. Integrate the output into the `mettannotator` GFF\r\n\r\n```bash\r\n# Add mobilome to the merged GFF produced by mettannotator\r\npython3 postprocessing/add_mobilome_to_gff.py \\\r\n    -m \u003cmobilome_output_folder\u003e/gff_output_files/mobilome_nogenes.gff \\\r\n    -i \u003cmettannotator_results_folder\u003e/\u003cprefix\u003e/functional_annotation/merged_gff/\u003cprefix\u003e_annotations.gff \\\r\n    -o \u003cprefix\u003e_annotations_with_mobilome.gff\r\n\r\n# Add mobilome to the GFF with descriptions produced by mettannotator\r\npython3 postprocessing/add_mobilome_to_gff.py \\\r\n    -m \u003cmobilome_output_folder\u003e/gff_output_files/mobilome_nogenes.gff \\\r\n    -i \u003cmettannotator_results_folder\u003e/\u003cprefix\u003e/functional_annotation/merged_gff/\u003cprefix\u003e_annotations_with_descriptions.gff \\\r\n    -o \u003cprefix\u003e_annotations_with_descriptions_with_mobilome.gff\r\n```\r\n\r\n4. Optional: regenerate the Circos plot with the mobilome track added\r\n\r\n```bash\r\npip install pycirclize\r\npip install matplotlib\r\n\r\npython3 bin/circos_plot.py \\\r\n    -i \u003cprefix\u003e_annotations_with_mobilome.gff \\\r\n    -o plot.png \\\r\n    -p \u003cprefix\u003e \\\r\n    --mobilome\r\n```\r\n\r\n\u003ca name=\"credit\"\u003e\u003c/a\u003e\r\n\r\n## Credits\r\n\r\nebi-metagenomics/mettannotator was originally written by the Microbiome Informatics Team at [EMBL-EBI](https://www.ebi.ac.uk/about/teams/microbiome-informatics/)\r\n\r\n\u003ca name=\"contribute\"\u003e\u003c/a\u003e\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\n\u003ca name=\"cite\"\u003e\u003c/a\u003e\r\n\r\n## Citations\r\n\r\nIf you use the software, please cite:\r\n\r\nGurbich TA, Beracochea M, De Silva NH, Finn RD. mettannotator: a comprehensive and scalable Nextflow annotation pipeline for prokaryotic assemblies. bioRxiv 2024.07.11.603040; doi: https://doi.org/10.1101/2024.07.11.603040\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nThis pipeline uses code developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"MGnify","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1069?version=1","name":"v1.1","author":["Tatiana Gurbich","Martin Beracochea"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1069?version=2","name":"v1.2","author":["Tatiana Gurbich","Martin Beracochea"],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1069?version=3","name":"v1.4.0","author":["Tatiana Gurbich","Martin Beracochea"],"descriptor_type":["NFL"]}]},{"id":"1070","url":"https://workflowhub.eu/workflows/1070","name":"Randomized SVD","description":"**Name:** TruncatedSVD (Randomized SVD)  \r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum5 \r\n\r\nTruncatedSVD (Randomized SVD) for computing just 456 singular values out of a (4.5M x 850) size matrix.  \r\nThe input matrix represents a CFD transient simulation of air moving past a cylinder.  \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1070?version=1","name":"COMPSs 3.3.1","author":["Cristian Tatu"],"descriptor_type":[]}]},{"id":"1072","url":"https://workflowhub.eu/workflows/1072","name":"KMeans housing","description":"**Name:** KMeans\r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum5 \r\n\r\nKMEans for clustering the housing.csv dataset (https://github.com/sonarsushant/California-House-Price-Prediction/blob/master/housing.csv). \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"Workflows and Distributed Computing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1072?version=1","name":"COMPSs 3.3.1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1072?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1073","url":"https://workflowhub.eu/workflows/1073","name":"CRYOSPARC: acquire -\u003e motionCorr -\u003e ctf -\u003e report","description":"SPA workflow using cryosparc processing engine","organization":"CEMCOF","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1073?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1075","url":"https://workflowhub.eu/workflows/1075","name":"Deepconsensus for Sequel2/2e subreads","description":"# deepconsensus 1.2 snakemake pipeline\r\nThis snakemake-based workflow takes in a subreads.bam and results in a deepconsensus.fastq\r\n- no methylation calls !\r\n\r\nThe metadata id of the subreads file needs to be: \"m[numeric]_[numeric]_[numeric].subreads.bam\"\r\n\r\nChunking (how many subjobs) and ccs min quality filter can be adjusted in the config.yaml\r\n\r\nthe checkpoint model for deepconsensus1.2 should be accessible like this:\r\ngsutil cp -r gs://brain-genomics-public/research/deepconsensus/models/v1.2/model_checkpoint/* \"${QS_DIR}\"/model/\r\nif that does not work, try to download all at:\r\nhttps://console.cloud.google.com/storage/browser/brain-genomics-public/research/deepconsensus/models?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))\u0026prefix=\u0026forceOnObjectsSortingFiltering=false\r\n\r\nA run example is included in the run_snake.sh\r\n\r\nFeedback / pull requests welcome!\r\n\r\nDeveloped by Daniel Rickert @ WGGC Düsseldorf\r\n\r\nmore to look at:\r\n\r\nhttps://www.youtube.com/watch?v=TlWtIao2i9E\r\n\r\nhttps://www.nature.com/articles/s41587-022-01435-7\r\n","organization":"WGGC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1075?version=1","name":"main @ fea7480","author":[],"descriptor_type":["SMK"]}]},{"id":"1076","url":"https://workflowhub.eu/workflows/1076","name":"Lysozyme in water full (GitHub)","description":"Lysozyme in water full COMPSs application","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1076?version=1","name":"main @ 8a3f3ef","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1076?version=2","name":"main @ 4d44d8b","author":[],"descriptor_type":[]}]},{"id":"1077","url":"https://workflowhub.eu/workflows/1077","name":"Workflow - Standard processing of 10X single cell ATAC-seq data with SnapATAC2","description":"Workflow for Single-cell ATAC-seq standard processing with SnapATAC2.\r\nThis workflow takes a fragment file as input and performs the standard steps of scATAC-seq analysis: filtering, dimension reduction, embedding and visualization of marker genes with SnapATAC2. Finally, the clusters are manually annotated with the help of marker genes. \r\nIn an alternative step, the fragment file can also be generated from a BAM file. \r\n* newer Version: Updated SnapATAC2 version from 2.5.3 to 2.6.4","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1077?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1078","url":"https://workflowhub.eu/workflows/1078","name":"Multisample Batch Correction with SnapATAC2 and Harmony","description":"This Workflow takes a dataset collection of single-cell ATAC-seq fragments and performs:\r\n- preprocessing\r\n- filtering\r\n- concatenation\r\n- dimension reduction\r\n- batch correction (with Harmony and optionally Scanorama and MNC-correct)\r\n- leiden clustering\r\n\r\n* new SnapATAC2 version: from 2.5.3 to 2.6.4","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1078?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1080","url":"https://workflowhub.eu/workflows/1080","name":"Lysozyme in water full, no MPI","description":"Lysozyme in water full COMPSs application","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1080?version=1","name":"Version 1","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"1081","url":"https://workflowhub.eu/workflows/1081","name":"cocomico_wf_simulated","description":"CoCoMiCo analyses on collections of simulated communities.\r\nCommunity: scatterplots and KW tests of cooperation and competition\r\npotentials, for each collection.\r\nSimilarity: cooperation and competition potentials vs model\r\nsimilarity, defined as the Jaccard distance on sets of reactions.\r\nAdded value: boxplots comparing the added value of models in\r\ncommunities.","organization":"MISTIC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1081?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"1083","url":"https://workflowhub.eu/workflows/1083","name":"miniTurboID of RAI14, EPHA2 and PHACTR4","description":"KNIME workflow describing the analysis of mass spectrometry dataset related to the publication \"Proximity interactomics identifies RAI14, EPHA2 and PHACTR4 as essential components of Wnt/planar cell polarity pathway in vertebrates\". Workflow was built using the KNIME software container environment, version 4.7.7a, which can be created using \"docker pull cfprot/knime:4.7.7a\" command in Docker. The input data for the KNIME workflow (the report.tsv from DIA-NN) can be found on PRIDE repository under the identifier PXD048678.","organization":"Proteomics CEITEC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1083?version=1","name":"Version 1","author":["Kristina Gomoryova"],"descriptor_type":[]}]},{"id":"1086","url":"https://workflowhub.eu/workflows/1086","name":"Java COMPSs Matrix Multiplication, out-of-core using files, reproducible example, data persistence True","description":"**Name:** Matrix Multiplication  \r\n**Contact Person:** support-compss@bsc.es  \r\n**Access Level:** public  \r\n**License Agreement:** Apache2  \r\n**Platform:** COMPSs  \r\n\r\n# Description\r\nMatrix multiplication is a binary operation that takes a pair of matrices and produces another matrix.\r\n\r\nIf A is an n×m matrix and B is an m×p matrix, the result AB of their multiplication is an n×p matrix defined only if the number of columns m in A is equal to the number of rows m in B. When multiplying A and B, the elements of the rows in A are multiplied with corresponding columns in B.\r\n\r\nIn this implementation, A and B are square matrices (same number of rows and columns), and so it is the result matrix C. Each matrix is divided in N blocks of M doubles. The multiplication of two blocks is done by a multiply task method with a simple three-nested-loop implementation. When executed with COMPSs, the main program generates N^3^ tasks arranged as N^2^ chains of N tasks in the dependency graph.\r\n\r\nN and M have been hardcoded to 6 and 8 respectively.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --classpath=application_sources/jar/matmul.jar matmul.files.Matmul inputFolder/ outputFolder/\r\n``` \r\n\r\nwhere:\r\n  * inputFolder: folder where input files are located\r\n  * outputFolder: folder where output files are located\r\n\r\n# Build\r\n## Option 1: Native java\r\n```\r\njavac src/main/java/matmul/*/*.java\r\ncd src/main/java/; jar cf matmul.jar matmul/\r\ncd ../../../; mv src/main/java/matmul.jar jar/\r\n```\r\n\r\n## Option 2: Maven\r\n```\r\nmvn clean package\r\n```\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1086?version=1","name":"COMPSs 3.3.1","author":["Raül Sirvent"],"descriptor_type":[]}]},{"id":"1088","url":"https://workflowhub.eu/workflows/1088","name":"Java COMPSs Matrix Multiplication, out-of-core using files, reproducible example, data persistence False, MareNostrum V","description":"**Name:** Matrix Multiplication  \r\n**Contact Person:** support-compss@bsc.es  \r\n**Access Level:** public  \r\n**License Agreement:** Apache2  \r\n**Platform:** COMPSs  \r\n\r\n# Description\r\nMatrix multiplication is a binary operation that takes a pair of matrices and produces another matrix.\r\n\r\nIf A is an n×m matrix and B is an m×p matrix, the result AB of their multiplication is an n×p matrix defined only if the number of columns m in A is equal to the number of rows m in B. When multiplying A and B, the elements of the rows in A are multiplied with corresponding columns in B.\r\n\r\nIn this implementation, A and B are square matrices (same number of rows and columns), and so it is the result matrix C. Each matrix is divided in N blocks of M doubles. The multiplication of two blocks is done by a multiply task method with a simple three-nested-loop implementation. When executed with COMPSs, the main program generates N^3^ tasks arranged as N^2^ chains of N tasks in the dependency graph.\r\n\r\nN and M have been hardcoded to 6 and 8 respectively.\r\n\r\n# Execution instructions\r\nUsage:\r\n```\r\nruncompss --classpath=application_sources/jar/matmul.jar matmul.files.Matmul inputFolder/ outputFolder/\r\n``` \r\n\r\nwhere:\r\n  * inputFolder: folder where input files are located\r\n  * outputFolder: folder where output files are located\r\n\r\n# Build\r\n## Option 1: Native java\r\n```\r\njavac src/main/java/matmul/*/*.java\r\ncd src/main/java/; jar cf matmul.jar matmul/\r\ncd ../../../; mv src/main/java/matmul.jar jar/\r\n```\r\n\r\n## Option 2: Maven\r\n```\r\nmvn clean package\r\n```\r\n","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1088?version=1","name":"COMPSs 3.3.1","author":["Raül Sirvent"],"descriptor_type":[]}]},{"id":"1089","url":"https://workflowhub.eu/workflows/1089","name":"Differential peak analysis with SnapATAC2","description":"This workflow takes a cell-type-annotated AnnData object (processed with SnapATAC2) and performs peak calling with MACS3 on the cell types. Next, a cell-by-peak matrix is constructed and differential accessibility tests are performed for comparison of either two cell types or one cell type with a background of all other cells. \r\nLastly, differentially accessible marker regions for each cell type are identified. ","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1089?version=1","name":"Version 1","author":["Timon Schlegel"],"descriptor_type":["GALAXY"]}]},{"id":"1090","url":"https://workflowhub.eu/workflows/1090","name":"qiime2-III-VI-downsteam/QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis","description":"This workflow \n- Reconstruct phylogeny (insert fragments in a reference)\n- Alpha rarefaction analysis\n- Taxonomic analysis","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1090?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1090?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1091","url":"https://workflowhub.eu/workflows/1091","name":"cfDNA UniFlow: A unified preprocessing pipeline for cell-free DNA data from liquid biopsies","description":"cfDNA UniFlow is a unified, standardized, and ready-to-use workflow for processing whole genome sequencing (WGS) cfDNA samples from liquid biopsies. It includes essential steps for pre-processing raw cfDNA samples, quality control and reporting. Additionally, several optional utility functions like GC bias correction and estimation of copy number state are included. Finally, we provide specialized methods for extracting coverage derived signals and visualizations comparing cases and controls.\r\n\r\nMore Information can be found in the official [documentaion](https://github.com/kircherlab/cfDNA-UniFlow).\r\n","organization":"KircherLab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1091?version=1","name":"main @ 6e11331","author":["Sebastian Röner"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/1091?version=2","name":"v1.0.0","author":["Sebastian Röner"],"descriptor_type":["SMK"]}]},{"id":"1094","url":"https://workflowhub.eu/workflows/1094","name":"GridSearch on kNN","description":"**Name:** GridSearchCV\r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum5 \r\n\r\nGridSearch of kNN algorithm for the iris.csv dataset (https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv). \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"Workflows and Distributed Computing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1094?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1095","url":"https://workflowhub.eu/workflows/1095","name":"GridSearch on kNN non data persistence","description":"**Name:** GridSearchCV\r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum5 \r\n\r\nGridSearch of kNN algorithm for the iris.csv dataset (https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv). \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"Workflows and Distributed Computing, eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1095?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1096","url":"https://workflowhub.eu/workflows/1096","name":"ERGA-BGE Genome Report ANNOT analyses","description":"The workflow requires the user to provide:\r\n* ENSEMBL link address of the annotation GFF3 file\r\n* ENSEMBL link address of the assembly FASTA file\r\n* NCBI taxonomy ID\r\n* BUSCO lineage\r\n* OMArk database\r\n\r\nThw workflow will produce statistics of the annotation based on AGAT, BUSCO and OMArk.","organization":"ERGA Annotation","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1096?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1096?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"1100","url":"https://workflowhub.eu/workflows/1100","name":"nf-core/denovotranscript","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-denovotranscript_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/denovotranscript\" src=\"docs/images/nf-core-denovotranscript_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n[![GitHub Actions CI Status](https://github.com/nf-core/denovotranscript/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/denovotranscript/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/denovotranscript/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/denovotranscript/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/denovotranscript/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13324371-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13324371)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/denovotranscript)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23denovotranscript-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/denovotranscript)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/denovotranscript** is a bioinformatics pipeline for de novo transcriptome assembly of paired-end short reads from bulk RNA-seq. It takes a samplesheet and FASTQ files as input, perfoms quality control (QC), trimming, assembly, redundancy reduction, pseudoalignment, and quantification. It outputs a transcriptome assembly FASTA file, a transcript abundance TSV file, and a MultiQC report with assembly quality and read QC metrics.\n\n![nf-core/transfuse metro map](docs/images/denovotranscript_metro_map.drawio.svg)\n\n1. Read QC of raw reads ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Adapter and quality trimming ([`fastp`](https://github.com/OpenGene/fastp))\n3. Read QC of trimmed reads ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n4. Remove rRNA or mitochondrial DNA (optional) ([`SortMeRNA`](https://hpc.nih.gov/apps/sortmeRNA.html))\n5. Transcriptome assembly using any combination of the following:\n\n   - [`Trinity`](https://github.com/trinityrnaseq/trinityrnaseq/wiki) with normalised reads (default=True)\n   - [`Trinity`](https://github.com/trinityrnaseq/trinityrnaseq/wiki) with non-normalised reads\n   - [`rnaSPAdes`](https://ablab.github.io/spades/rna.html) medium filtered transcripts outputted (default=True)\n   - [`rnaSPAdes`](https://ablab.github.io/spades/rna.html) soft filtered transcripts outputted\n   - [`rnaSPAdes`](https://ablab.github.io/spades/rna.html) hard filtered transcripts outputted\n\n6. Redundancy reduction with [`Evidential Gene tr2aacds`](http://arthropods.eugenes.org/EvidentialGene/). A transcript to gene mapping is produced from Evidential Gene's outputs using [`gawk`](https://www.gnu.org/software/gawk/).\n7. Assembly completeness QC ([`BUSCO`](https://busco.ezlab.org/))\n8. Other assembly quality metrics ([`rnaQUAST`](https://github.com/ablab/rnaquast))\n9. Transcriptome quality assessment with [`TransRate`](https://hibberdlab.com/transrate/), including the use of reads for assembly evaluation. This step is not performed if profile is set to `conda` or `mamba`.\n10. Pseudo-alignment and quantification ([`Salmon`](https://combine-lab.github.io/salmon/))\n11. HTML report for raw reads, trimmed reads, BUSCO, and Salmon ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a pair of fastq files (paired end).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/denovotranscript \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/denovotranscript/usage) and the [parameter documentation](https://nf-co.re/denovotranscript/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/denovotranscript/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/denovotranscript/output).\n\n## Credits\n\nnf-core/denovotranscript was written by Avani Bhojwani ([@avani-bhojwani](https://github.com/avani-bhojwani/)) and Timothy Little ([@timslittle](https://github.com/timslittle/)).\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#denovotranscript` channel](https://nfcore.slack.com/channels/denovotranscript) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/denovotranscript for your analysis, please cite it using the following doi: [10.5281/zenodo.13324371](https://doi.org/10.5281/zenodo.13324371)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1100?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1100?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1100?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1100?version=4","name":"1.2.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1102","url":"https://workflowhub.eu/workflows/1102","name":"RiboSnake: 16S rRNA analysis workflow with QIIME2 and Snakemake","description":"# RiboSnake: 16S rRNA analysis workflow with QIIME2 and Snakemake\r\n\r\n[![Snakemake](https://img.shields.io/badge/snakemake-≥6.10-brightgreen.svg)](https://snakemake.bitbucket.io)\r\n[![Build Status](https://travis-ci.org/snakemake-workflows/16S.svg?branch=master)](https://travis-ci.org/snakemake-workflows/16S)\r\n\r\nQiime2 workflow for 16S analysis created with snakemake.\r\n\r\n## Authors\r\n\r\n* Ann-Kathrin Dörr (@AKBrueggemann)\r\n\r\n## Usage\r\n\r\nIf you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this (original) repository and, if available, its DOI (see above).\r\n\r\n### Step 1: Obtain a copy of this workflow\r\n\r\nIf you want to use the workflow, please obtain a copy of it by either:\r\n[Cloning](https://help.github.com/en/articles/cloning-a-repository) the repository to your local system, into the place where you want to perform the data analysis or\r\nDownloading a zip-file of the repository to your local machine.\r\n\r\nWhen you have the folder structure added on your local machine, please add a \"data\" folder manually.\r\n\r\n### Step 2: Configure workflow\r\n\r\nConfigure the workflow according to your needs via editing the files in the `config/` folder. Adjust `config.yaml` to configure the workflow execution, and `metadata.txt` to specify your sample setup.\r\n\r\nSome important parameters you should check and set according to your own FASTQ-files in the `config.yaml` are primers for the forward and reverse reads, the `datatype`, that should be used by QIIME2 and the `min-seq-length`. Based on the sequencing, the length of the reads can vary.\r\n\r\nThe default parameters for filtering and truncation were validated with the help of a MOCK community and fitted to retrieve all bacteria from that community.\r\n\r\nIn addition to that, you need to fit the metadata-parameters to your data. Please change the names of the used metadata-columns according to your information.\r\nTake special care of the \"remove-columns\" information. Here you can add the columns you don't want to have analyzed or the workflow can't anlyse. This can happen when\r\nall of the values in one column are unique or all the same. You should also look out for the information under \"metadata-parameters\" and \"songbird\" as well as \"ancom\".\r\nIn every case you have to specify the column names based on your own data.\r\n\r\nIf your metadata is not containing numeric values, please use the \"reduced-analysis\" option in the config file to run the workflow, as the workflow is currently not able to run only on categorical metadata for the full analysis version. We are going to fix that in the future.\r\n\r\nThe workflow is able to perform clustering and denoising either with vsearch, leading to OTU creation, or with DADA2, creating ASVs. You can decide which modus to use by setting the variable \"DADA2\" to `True` (DADA2 usage) or `False` (vsearch).\r\n\r\nPlease make sure, that the names of your FASTQ files are correctly formatted. They should look like this:\r\n\r\n    samplename_SNumber_Lane_R1/R2_001.fastq.gz\r\n\r\nIn the config file you can also set the input and output directory. You can either create a specific directory for your input data and then put that filepath in the config file, or you can put the path to an existing directory where the data is located.\r\nThe data will then be copied to the workflow's data directory. The compressed and final file holding the results will be copied to the directory you specified in \"output\". It will also stay in the local \"results\" folder together with important intermediate results.\r\nThe \"data\" folder is also not provided by the repository. It is the folder the fastq files are copied to before being used in the workflow. It is best if you create the folder inside the workflows folder structure. It must definitely be created on the machine, the workflow is running on.\r\n\r\n### Step 3: Install Snakemake\r\n\r\nCreate a snakemake environment using [mamba](https://mamba.readthedocs.io/en/latest/) via:\r\n\r\n    mamba create -c conda-forge -c bioconda -n snakemake snakemake\r\n\r\nFor installation details, see the [instructions in the Snakemake documentation](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html).\r\n\r\n### Step 4: Execute workflow\r\n\r\nActivate the conda environment:\r\n\r\n    conda activate snakemake\r\n\r\nFill up the `metadata.txt` with the information of your samples:\r\n\r\n    Please be careful to not include spaces between the commas. If there is a column, that you don't have any information about, please leave it empty and simply go on with the next column.\r\n\r\nTest your configuration by performing a dry-run via\r\n\r\n    snakemake --use-conda -n\r\n\r\nExecuting the workflow takes two steps:\r\n\r\n    Data preparation: snakemake --cores $N --use-conda data_prep\r\n    Workflow execution: snakemake --cores $N --use-conda\r\n\r\nusing `$N` cores.\r\n\r\nWhen running on snakemake \u003e 8.0 we recommend setting the --shared-fs-usage none as well as setting the environment variable TEMP to a local directory to prevent problems with the usage of the fs-storage system.\r\nThe environment variable can be set like this:\r\n\r\n    conda activate your_environment_name\r\n    export TEMP=/path/to/local/tmp\r\n\r\nThen run the snakemake command like above with the addition of the storage flag:\r\n\r\n    snakemake --cores $N --use-conda --shared-fs-usage none\r\n\r\n### Step 5: Investigate results\r\n\r\nAfter successful execution, the workflow provides you with a compressed folder, holding all interesting results ready to decompress or to download to your local machine.\r\nThe compressed file 16S-report.tar.gz holds several qiime2-artifacts that can be inspected via qiime-view. In the zipped folder report.zip is the snakemake html\r\nreport holding graphics as well as the DAG of the executed jobs and html files leading you directly to the qiime2-results, without the need of using qiime-view.\r\n\r\nThis report can, e.g., be forwarded to your collaborators.\r\n\r\n### Step 6: Obtain updates from upstream\r\n\r\nWhenever you want to synchronize your workflow copy with new developments from upstream, do the following.\r\n\r\n1. Once, register the upstream repository in your local copy: `git remote add -f upstream git@github.com:snakemake-workflows/16S.git` or `git remote add -f upstream https://github.com/snakemake-workflows/16S.git` if you do not have setup ssh keys.\r\n2. Update the upstream version: `git fetch upstream`.\r\n3. Create a diff with the current version: `git diff HEAD upstream/master workflow \u003e upstream-changes.diff`.\r\n4. Investigate the changes: `vim upstream-changes.diff`.\r\n5. Apply the modified diff via: `git apply upstream-changes.diff`.\r\n6. Carefully check whether you need to update the config files: `git diff HEAD upstream/master config`. If so, do it manually, and only where necessary, since you would otherwise likely overwrite your settings and samples.\r\n\r\n## Contribute back\r\n\r\nIn case you have also changed or added steps, please consider contributing them back to the original repository:\r\n\r\n### Step 1: Forking the repository\r\n\r\n[Fork](https://help.github.com/en/articles/fork-a-repo) the original repo to a personal or lab account.\r\n\r\n### Step 2: Cloning\r\n\r\n[Clone](https://help.github.com/en/articles/cloning-a-repository) the fork to your local system, to a different place than where you ran your analysis.\r\n\r\n### Step 3: Add changes\r\n\r\n1. Copy the modified files from your analysis to the clone of your fork, e.g., `cp -r workflow path/to/fork`. Make sure to **not** accidentally copy config file contents or sample sheets. Instead, manually update the example config files if necessary.\r\n2. Commit and push your changes to your fork.\r\n3. Create a [pull request](https://help.github.com/en/articles/creating-a-pull-request) against the original repository.\r\n4. If you want to add your config file and the parameters as a new default parameter sets, please do this by opening a pull request adding the file to the \"contributions\" folder.\r\n\r\n## Testing\r\n\r\nTest cases are in the subfolder `.test`. They are automatically executed via continuous integration with [Github Actions](https://github.com/features/actions).\r\nIf you want to test the RiboSnake functions yourself, you can use the same data used for the CI/CD tests. The used fastq files can be downloaded [here](https://data.qiime2.org/2022.2/tutorials/importing/casava-18-paired-end-demultiplexed.zip). They have been published by Neilson et al., mSystems, 2017.\r\n\r\n### Example\r\n\r\n1. First clone teh repository to your local machine as described above.\r\n2. Download a dataset of your liking, or the data used for testing the pipeline. The FASTQ files can be downloaded with:\r\n    curl -sL \\\r\n          \"https://data.qiime2.org/2022.2/tutorials/importing/casava-18-paired-end-demultiplexed.zip\"\r\n3. Unzip the data into a folder of your liking, it can be called \"incoming\" but it does not have to be.\r\nIf you name your folder differently, please change the \"input\" path in the config file.\r\n4. If you don't want to use the whole dataset for testing, remove some of the FASTQ files from the folder:\r\n    rm PAP*\r\n    rm YUN*\r\n    rm Rep*\r\n    rm blank*\r\n5. Use the information that can be found in [this](https://data.qiime2.org/2024.5/tutorials/atacama-soils/sample_metadata.tsv) file from the Qiime2 tutorial, to fill out your metadata.txt file for the samples starting with \"BAQ\".\r\n6. The default-parameters to be used in the config file can be found in the provided file \"PowerSoil-Illumina-soil.yaml\" in the config folder.\r\n7. With these parameters and the previous steps, you should be able to execute the workflow.\r\n\r\n## Tools\r\n\r\nA list of the tools used in this pipeline:\r\n\r\n| Tool         | Link                                              |\r\n|--------------|---------------------------------------------------|\r\n| QIIME2       | www.doi.org/10.1038/s41587-019-0209-9             |\r\n| Snakemake    | www.doi.org/10.12688/f1000research.29032.1        |\r\n| FastQC       | www.bioinformatics.babraham.ac.uk/projects/fastqc |\r\n| MultiQC      | www.doi.org/10.1093/bioinformatics/btw354         |\r\n| pandas       | pandas.pydata.org                                 |\r\n| kraken2      | www.doi.org/10.1186/s13059-019-1891-0             |\r\n| vsearch      | www.github.com/torognes/vsearch                   |\r\n| DADA2        | www.doi.org/10.1038/nmeth.3869                    |\r\n| songbird     | www.doi.org/10.1038/s41467-019-10656-5            |\r\n| bowtie2      | www.doi.org/10.1038/nmeth.1923                    |\r\n| Ancom        | www.doi.org/10.3402/mehd.v26.27663                |\r\n| cutadapt     | www.doi.org/10.14806/ej.17.1.200                  |\r\n| BLAST        | www.doi.org/10.1016/S0022-2836(05)80360-2         |\r\n| gneiss       | www.doi.org/10.1128/mSystems.00162-16             |\r\n| qurro        | www.doi.org/10.1093/nargab/lqaa023                |\r\n| Rescript     | www.doi.org/10.1371/journal.pcbi.1009581          |","organization":"16S rRNA Analysis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1102?version=1","name":"main @ 956dc5c","author":["Ann-Kathrin Dörr"],"descriptor_type":["SMK"]}]},{"id":"1103","url":"https://workflowhub.eu/workflows/1103","name":"ERGA-BGE Genome Report ASM analyses (one-asm WGS Illumina PE + HiC)","description":"**Assembly Evaluation for ERGA-BGE Reports**\r\n\r\n_One Assembly, Illumina WGS reads + HiC reads_\r\n\r\nThe workflow requires the following:\r\n* Species Taxonomy ID number\r\n* NCBI Genome assembly accession code\r\n* BUSCO Lineage\r\n* WGS accurate reads accession code\r\n* NCBI HiC reads accession code\r\n\r\nThe workflow will get the data and process it to generate genome profiling (genomescope, smudgeplot -optional-), assembly stats (gfastats), merqury stats (QV, completeness), BUSCO, snailplot, contamination blobplot, and HiC heatmap.\r\n\r\n**Use this workflow for ONT-based assemblies where the WGS accurate reads are Illumina PE**","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1103?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1103?version=2","name":"Version 1.1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1103?version=3","name":"Version 1.2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"1104","url":"https://workflowhub.eu/workflows/1104","name":"ERGA-BGE Genome Report ASM analyses (one-asm HiFi + HiC)","description":"**Assembly Evaluation for ERGA-BGE Reports**\r\n\r\n_One Assembly, HiFi WGS reads + HiC reads_\r\n\r\nThe workflow requires the following:\r\n* Species Taxonomy ID number\r\n* NCBI Genome assembly accession code\r\n* BUSCO Lineage\r\n* WGS accurate reads accession code\r\n* NCBI HiC reads accession code\r\n\r\nThe workflow will get the data and process it to generate genome profiling (genomescope, smudgeplot -optional-), assembly stats (gfastats), merqury stats (QV, completeness), BUSCO, snailplot, contamination blobplot, and HiC heatmap.\r\n\r\n**Use this workflow for HiFi-based assemblies where the WGS accurate reads are PacBio HiFi**","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1104?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1104?version=2","name":"Version 1.1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"1105","url":"https://workflowhub.eu/workflows/1105","name":"Marine Omics identifying biosynthetic gene clusters","description":"Secondary metabolite biosynthetic gene cluster (SMBGC) Annotation using Neural Networks Trained on Interpro Signatures ","organization":"FAIR-EASE, usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1105?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1106","url":"https://workflowhub.eu/workflows/1106","name":"Swedish Earth Biogenome Project Genome Assembly Workflow","description":"# Swedish Earth Biogenome Project - Genome Assembly Workflow\r\n\r\nThe primary genome assembly workflow for the Earth Biogenome Project at NBIS.\r\n\r\n## Workflow overview\r\n\r\nGeneral aim:\r\n\r\n```mermaid\r\nflowchart LR\r\n    hifi[/ HiFi reads /] --\u003e data_inspection\r\n    ont[/ ONT reads /] --\u003e  data_inspection\r\n    hic[/ Hi-C reads /] --\u003e data_inspection\r\n    data_inspection[[ Data inspection ]] --\u003e preprocessing\r\n    preprocessing[[ Preprocessing ]] --\u003e assemble\r\n    assemble[[ Assemble ]] --\u003e validation\r\n    validation[[ Assembly validation ]] --\u003e curation\r\n    curation[[ Assembly curation ]] --\u003e validation\r\n```\r\n\r\nCurrent implementation:\r\n\r\n```mermaid\r\nflowchart TD\r\n    input[/ Input file/] --\u003e hifi\r\n    input --\u003e hic\r\n    input --\u003e taxonkit[[ TaxonKit name2taxid/reformat ]]\r\n    taxonkit --\u003e goat_taxon[[ GOAT taxon search ]]\r\n    goat_taxon --\u003e busco\r\n    goat_taxon --\u003e dtol[[ DToL lookup ]]\r\n    hifi --\u003e samtools_fa[[ Samtools fasta ]]\r\n    samtools_fa --\u003e fastk_hifi\r\n    samtools_fa --\u003e mash_screen\r\n    hifi[/ HiFi reads /] --\u003e fastk_hifi[[ FastK - HiFi ]]\r\n    hifi --\u003e meryl_hifi[[ Meryl - HiFi ]]\r\n    hic[/ Hi-C reads /] --\u003e fastk_hic[[ FastK - Hi-C ]]\r\n    hifi --\u003e meryl_hic[[ Meryl - Hi-C ]]\r\n    assembly[/ Assembly /] --\u003e quast[[ Quast ]]\r\n    fastk_hifi --\u003e histex[[ Histex ]]\r\n    histex --\u003e genescopefk[[ GeneScopeFK ]]\r\n    fastk_hifi --\u003e ploidyplot[[ PloidyPlot ]]\r\n    fastk_hifi --\u003e katgc[[ KatGC ]]\r\n    fastk_hifi --\u003e merquryfk[[ MerquryFK ]]\r\n    assembly --\u003e merquryfk\r\n    meryl_hifi --\u003e merqury[[ Merqury ]]\r\n    assembly --\u003e merqury\r\n    fastk_hifi --\u003e katcomp[[ KatComp ]]\r\n    fastk_hic --\u003e katcomp\r\n    assembly --\u003e busco[[ Busco ]]\r\n    refseq_sketch[( RefSeq sketch )] --\u003e mash_screen[[ Mash Screen ]]\r\n    hifi --\u003e mash_screen\r\n    fastk_hifi --\u003e hifiasm[[ HiFiasm ]]\r\n    hifiasm --\u003e assembly\r\n    assembly --\u003e purgedups[[ Purgedups ]]\r\n    input --\u003e mitoref[[ Mitohifi - Find reference ]]\r\n    assembly --\u003e mitohifi[[ Mitohifi ]]\r\n    assembly --\u003e fcsgx[[ FCS GX ]]\r\n    fcs_fetchdb[( FCS fetchdb )] --\u003e fcsgx\r\n    mitoref --\u003e mitohifi\r\n    genescopefk --\u003e quarto[[ Quarto ]]\r\n    goat_taxon --\u003e multiqc[[ MultiQC ]]\r\n    quarto --\u003e multiqc\r\n    dtol --\u003e multiqc\r\n    katgc --\u003e multiqc\r\n    ploidyplot --\u003e multiqc\r\n    busco --\u003e multiqc\r\n    quast --\u003e multiqc\r\n```\r\n\r\n## Usage\r\n\r\n```bash\r\nnextflow run -params-file \u003cparams.yml\u003e \\\r\n    [ -c \u003ccustom.config\u003e ] \\\r\n    [ -profile \u003cprofile\u003e ] \\\r\n    NBISweden/Earth-Biogenome-Project-pilot\r\n```\r\n\r\nwhere:\r\n- `params.yml` is a YAML formatted file containing workflow parameters\r\n    such as input paths to the assembly specification, and settings for tools within the workflow.\r\n\r\n    Example:\r\n\r\n    ```yml\r\n    input: 'assembly_spec.yml'\r\n    outdir: results\r\n    fastk: # Optional\r\n      kmer_size: 31 # default 31\r\n    genescopefk: # Optional\r\n      kmer_size: 31 # default 31\r\n    hifiasm: # Optional, default = no extra options: Key (e.g. 'opts01') is used in assembly build name (e.g., 'hifiasm-raw-opts01').\r\n      opts01: \"--opts A\"\r\n      opts02: \"--opts B\"\r\n    busco: # Optional, default: retrieved from GOAT\r\n      lineages: 'auto' # comma separated string of lineages or auto.\r\n    ```\r\n\r\n    Alternatively parameters can be provided on the\r\n    command-line using the `--parameter` notation (e.g., `--input \u003cpath\u003e` ).\r\n- `\u003ccustom.config\u003e` is a Nextflow configuration file which provides\r\n    additional configuration. This is used to customise settings other than\r\n    workflow parameters, such as cpus, time, and command-line options to tools.\r\n\r\n    Example:\r\n    ```nextflow\r\n    process {\r\n        withName: 'BUSCO' {  // Selects the process to apply settings.\r\n            cpus     = 6     // Overrides cpu settings defined in nextflow.config\r\n            time     = 4.d   // Overrides time settings defined in nextflow.config to 4 days. Use .h for hours, .m for minutes.\r\n            memory   = '20GB'  // Overrides memory settings defined in nextflow.config to 20 GB.\r\n            // ext.args supplies command-line options to the process tool\r\n            // overrides settings found in configs/modules.config\r\n            ext.args = '--long'  // Supplies these as command-line options to Busco\r\n        }\r\n    }\r\n    ```\r\n- `\u003cprofile\u003e` is one of the preconfigured execution profiles\r\n    (`uppmax`, `singularity_local`, `docker_local`, etc: see nextflow.config). Alternatively,\r\n    you can provide a custom configuration to configure this workflow\r\n    to your execution environment. See [Nextflow Configuration](https://www.nextflow.io/docs/latest/config.html#scope-executor)\r\n    for more details.\r\n\r\n\r\n### Workflow parameter inputs\r\n\r\nMandatory:\r\n\r\n- `input`: A YAML formatted input file.\r\n    Example `assembly_spec.yml` (See also [test profile input](assets/test_hsapiens.yml) TODO:: Update test profile):\r\n\r\n    ```yml\r\n    sample:                          # Required: Meta data\r\n      name: 'Laetiporus sulphureus'  # Required: Species name. Correct spelling is important to look up species information.\r\n      ploidy: 2                      # Optional: Estimated ploidy (default: retrieved from GOAT)\r\n      genome_size: 2345              # Optional: Estimated genome size (default: retrieved from GOAT)\r\n      haploid_number: 13             # Optional: Estimated haploid chromosome count (default: retrieved from GOAT)\r\n      taxid: 5630                    # Optional: Taxon ID (default: retrieved with Taxonkit)\r\n      kingdom: Eukaryota             # Optional: (default: retrived with Taxonkit)\r\n    assembly:                        # Optional: List of assemblies to curate and validate.\r\n      - assembler: hifiasm           # For each entry, the assembler,\r\n        stage: raw                   # stage of assembly,\r\n        id: uuid                     # unique id,\r\n        pri_fasta: /path/to/primary_asm.fasta # and paths to sequences are required.\r\n        alt_fasta: /path/to/alternate_asm.fasta\r\n        pri_gfa: /path/to/primary_asm.gfa\r\n        alt_gfa: /path/to/alternate_asm.gfa\r\n      - assembler: ipa\r\n        stage: raw\r\n        id: uuid\r\n        pri_fasta: /path/to/primary_asm.fasta\r\n        alt_fasta: /path/to/alternate_asm.fasta\r\n    hic:                             # Optional: List of hi-c reads to QC and use for scaffolding\r\n      - read1: '/path/to/raw/data/hic/LS_HIC_R001_1.fastq.gz'\r\n        read2: '/path/to/raw/data/hic/LS_HIC_R001_2.fastq.gz'\r\n    hifi:                            # Required: List of hifi-reads to QC and use for assembly/validation\r\n      - reads: '/path/to/raw/data/hifi/LS_HIFI_R001.bam'\r\n    rnaseq:                          # Optional: List of Rna-seq reads to use for validation\r\n      - read1: '/path/to/raw/data/rnaseq/LS_RNASEQ_R001_1.fastq.gz'\r\n        read2: '/path/to/raw/data/rnaseq/LS_RNASEQ_R001_2.fastq.gz'\r\n    isoseq:                          # Optional: List of Isoseq reads to use for validation\r\n      - reads: '/path/to/raw/data/isoseq/LS_ISOSEQ_R001.bam'\r\n    ```\r\n\r\n\r\nOptional:\r\n\r\n- `outdir`: The publishing path for results (default: `results`).\r\n- `publish_mode`: (values: `'symlink'` (default), `'copy'`) The file\r\npublishing method from the intermediate results folders\r\n(see [Table of publish modes](https://www.nextflow.io/docs/latest/process.html#publishdir)).\r\n- `steps`: The workflow steps to execute (default is all steps). Choose from:\r\n\r\n    - `inspect`: 01 - Read inspection\r\n    - `preprocess`: 02 - Read preprocessing\r\n    - `assemble`: 03 - Assembly\r\n    - `purge`: 04 - Duplicate purging\r\n    - `polish`: 05 - Error polishing\r\n    - `screen`: 06 - Contamination screening\r\n    - `scaffold`: 07 - Scaffolding\r\n    - `curate`: 08 - Rapid curation\r\n    - `alignRNA`: 09 - Align RNAseq data\r\n\r\nSoftware specific:\r\n\r\nTool specific settings are provided by supplying values to specific keys or supplying an array of\r\nsettings under a tool name. The input to `-params-file` would look like this:\r\n\r\n```yml\r\ninput: assembly.yml\r\noutdir: results\r\nfastk:\r\n  kmer_size: 31\r\ngenescopefk:\r\n  kmer_size: 31\r\nhifiasm:\r\n  opts01: \"--opts A\"\r\n  opts02: \"--opts B\"\r\nbusco:\r\n  lineages: 'auto'\r\n```\r\n\r\n- `multiqc_config`: Path to MultiQC configuration file (default: `configs/multiqc_conf.yaml`).\r\n\r\nUppmax and PDC cluster specific:\r\n\r\n- `project`: NAISS Compute allocation number.\r\n\r\n### Workflow outputs\r\n\r\nAll results are published to the path assigned to the workflow parameter `results`.\r\n\r\nTODO:: List folder contents in results file\r\n### Customization for Uppmax\r\n\r\nA custom profile named `uppmax` is available to run this workflow specifically\r\non UPPMAX clusters. The process `executor` is `slurm` so jobs are\r\nsubmitted to the Slurm Queue Manager. All jobs submitted to slurm\r\nmust have a project allocation. This is automatically added to the `clusterOptions`\r\nin the `uppmax` profile. All Uppmax clusters have node local disk space to do\r\ncomputations, and prevent heavy input/output over the network (which\r\nslows down the cluster for all).\r\nThe path to this disk space is provided by the `$SNIC_TMP` variable, used by\r\nthe `process.scratch` directive in the `uppmax` profile. Lastly\r\nthe profile enables the use of Singularity so that all processes must be\r\nexecuted within Singularity containers. See [nextflow.config](nextflow.config)\r\nfor the profile specification.\r\n\r\nThe profile is enabled using the `-profile` parameter to nextflow:\r\n```bash\r\nnextflow run -profile uppmax \u003cnextflow_script\u003e\r\n```\r\n\r\nA NAISS compute allocation should also be supplied using the `--project` parameter.\r\n\r\n### Customization for PDC\r\n\r\nA custom profile named `dardel` is available to run this workflow specifically\r\non the PDC cluster *Dardel*. The process `executor` is `slurm` so jobs are\r\nsubmitted to the Slurm Queue Manager. All jobs submitted to slurm\r\nmust have a project allocation. This is automatically added to the `clusterOptions`\r\nin the `dardel` profile. Calculations are performed in the scratch space allocated\r\nby `PDC_TMP` which is also on the lustre file system and is not node local storage.\r\nThe path to this disk space is provided by the `$PDC_TMP` variable, used by\r\nthe `process.scratch` directive in the `dardel` profile. Lastly\r\nthe profile enables the use of Singularity so that all processes must be\r\nexecuted within Singularity containers. See [nextflow.config](nextflow.config)\r\nfor the profile specification.\r\n\r\nThe profile is enabled using the `-profile` parameter to nextflow:\r\n```bash\r\nnextflow run -profile dardel \u003cnextflow_script\u003e\r\n```\r\n\r\nA NAISS compute allocation should also be supplied using the `--project` parameter.\r\n\r\n## Workflow organization\r\n\r\nThe workflows in this folder manage the execution of your analyses\r\nfrom beginning to end.\r\n\r\n```\r\nworkflow/\r\n | - .github/                        Github data such as actions to run\r\n | - assets/                         Workflow assets such as test samplesheets\r\n | - bin/                            Custom workflow scripts\r\n | - configs/                        Configuration files that govern workflow execution\r\n | - dockerfiles/                    Custom container definition files\r\n | - docs/                           Workflow usage and interpretation information\r\n | - modules/                        Process definitions for tools used in the workflow\r\n | - subworkflows/                   Custom workflows for different stages of the main analysis\r\n | - tests/                          Workflow tests\r\n | - main.nf                         The primary analysis script\r\n | - nextflow.config                 General Nextflow configuration\r\n \\ - modules.json                    nf-core file which tracks modules/subworkflows from nf-core\r\n```\r\n\r\n","organization":"ERGA Assembly, NBIS","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1106?version=1","name":"main @ bae13f1","author":["Mahesh Binzer-Panchal","Martin Pippel"],"descriptor_type":["NFL"]}]},{"id":"1107","url":"https://workflowhub.eu/workflows/1107","name":"Protein-protein and protein-nucleic acid binding site prediction via interpretable hierarchical geometric deep learning","description":"GraphRBF is a state-of-the-art protein-protein/nucleic acid interaction site prediction model built by enhanced graph neural networks and prioritized radial basis function neural networks. \r\nThis project serves users to use our software to directly predict protein binding sites or train our model on a new database.  \r\nIdentification of protein-protein and protein-nucleic acid binding sites provides insights into biological processes related to protein functions and technical guidance for disease diagnosis and drug design. However, accurate predictions by computational approaches remain highly challenging due to the limited knowledge of residue binding patterns. The binding pattern of a residue should be characterized by the spatial distribution of its neighboring residues combined with their physicochemical information interaction, which yet can not be achieved by previous methods. Here, we design GraphRBF, a hierarchical geometric deep learning model to learn residue binding patterns from big data. To achieve it, GraphRBF describes physicochemical information interactions by designing an enhanced graph neural network and characterizes residue spatial distributions by introducing a prioritized radial basis function neural network. After training and testing, GraphRBF shows great improvements over existing state-of-the-art methods and strong interpretability of its learned representations. \r\n","organization":"Protein-protein and protein-nucleic acid binding site prediction research","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1107?version=1","name":"main @ 0fc2d88","author":["仕卓 张"],"descriptor_type":[]}]},{"id":"1108","url":"https://workflowhub.eu/workflows/1108","name":"nf-core/pairgenomealign","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-pairgenomealign_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/pairgenomealign\" src=\"docs/images/nf-core-pairgenomealign_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/pairgenomealign/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/pairgenomealign/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/pairgenomealign/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/pairgenomealign/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/pairgenomealign/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13910535-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13910535)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/pairgenomealign)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23pairgenomealign-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/pairgenomealign)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/pairgenomealign** is a bioinformatics pipeline that aligns one or more _query_ genomes to a _target_ genome, and plots pairwise representations.\n\n![Tubemap workflow summary](docs/images/pairgenomealign-tubemap.png \"Tubemap workflow summary\")\n\nThe main steps of the pipeline are:\n\n1. Genome QC ([`assembly-scan`](https://github.com/rpetit3/assembly-scan)).\n2. Genome indexing ([`lastdb`](https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst)).\n3. Genome pairwise alignments ([`lastal`](https://gitlab.com/mcfrith/last/-/blob/main/doc/lastal.rst)).\n4. Alignment plotting ([`last-dotplot`](https://gitlab.com/mcfrith/last/-/blob/main/doc/last-dotplot.rst)).\n5. Alignment export to various formats with [`maf-convert`](https://gitlab.com/mcfrith/last/-/blob/main/doc/maf-convert.rst), plus [`Samtools`](https://www.htslib.org/) for SAM/BAM/CRAM.\n\nThe pipeline can generate four kinds of outputs, called _many-to-many_, _many-to-one_, _one-to-many_ and _one-to-one_, depending on whether sequences of one genome are allowed match the other genome multiple times or not.\n\nThese alignments are output in [MAF](https://genome.ucsc.edu/FAQ/FAQformat.html#format5) format, and optional line plot representations are output in PNG format.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta\nquery_1,path-to-query-genome-file-one.fasta\nquery_2,path-to-query-genome-file-two.fasta\n```\n\nEach row represents a fasta file, this can also contain multiple rows to accomodate multiple query genomes in fasta format.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/pairgenomealign \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --target sequencefile.fa \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/pairgenomealign/usage) and the [parameter documentation](https://nf-co.re/pairgenomealign/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/pairgenomealign/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/pairgenomealign/output).\n\n## Credits\n\n`nf-core/pairgenomealign` was originally written by [charles-plessy](https://github.com/charles-plessy); the original versions are available at \u003chttps://github.com/oist/plessy_pairwiseGenomeComparison\u003e.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Mahdi Mohammed](https://github.com/U13bs1125) ported the original pipeline to _nf-core_ template 2.14.x.\n- [Martin Frith](https://github.com/mcfrith/), the author of LAST, gave us extensive feedback and advices.\n- [Michael Mansfield](https://github.com/mjmansfi) tested the pipeline and provided critical comments.\n- [Aleksandra Bliznina](https://github.com/aleksandrabliznina) contributed to the creation of the initial `last/*` modules.\n- [Jiashun Miao](https://github.com/miaojiashun) and [Huyen Pham](https://github.com/ngochuyenpham) tested the pipeline on vertebrate genomes.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#pairgenomealign` channel](https://nfcore.slack.com/channels/pairgenomealign) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use this pipeline, please cite:\n\n\u003e **Extreme genome scrambling in marine planktonic Oikopleura dioica cryptic species.**\n\u003e Charles Plessy, Michael J. Mansfield, Aleksandra Bliznina, Aki Masunaga, Charlotte West, Yongkai Tan, Andrew W. Liu, Jan Grašič, María Sara del Río Pisula, Gaspar Sánchez-Serna, Marc Fabrega-Torrus, Alfonso Ferrández-Roldán, Vittoria Roncalli, Pavla Navratilova, Eric M. Thompson, Takeshi Onuma, Hiroki Nishida, Cristian Cañestro, Nicholas M. Luscombe.\n\u003e _Genome Res._ 2024. 34: 426-440; doi: [10.1101/2023.05.09.539028](https://doi.org/10.1101/gr.278295.123). PubMed ID: [38621828](https://pubmed.ncbi.nlm.nih.gov/38621828/)\n\n[OIST research news article](https://www.oist.jp/news-center/news/2024/4/25/oikopleura-who-species-identity-crisis-genome-community)\n\nAnd also please cite the [LAST papers](https://gitlab.com/mcfrith/last/-/blob/main/doc/last-papers.rst).\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1108?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1108?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1108?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1108?version=4","name":"2.0.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1108?version=5","name":"2.1.0","author":[],"descriptor_type":["NFL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1108?version=6","name":"2.2.0","author":[],"descriptor_type":["NFL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1108?version=7","name":"2.2.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1109","url":"https://workflowhub.eu/workflows/1109","name":"Meta-analysis of permeability literature data shows possibilities and limitations of popular methods","description":"# Article abstract\r\n\r\nPermeability is an important molecular property in drug discovery, as it co-determines pharmacokinetics whenever a drug crosses the phospholipid bilayer, e.g., into the cell, in the gastrointestinal tract or across the blood-brain barrier. Many methods for the determination of permeability have been developed, including cell line assays, cell-free model systems like PAMPA mimicking, e.g., gastrointestinal epithelia or the skin, as well as the Black lipid membrane (BLM) and sub-micrometer liposomes. Furthermore, many in silico approaches have been developed for permeability prediction. Meta-analysis of publicly available databases for permeability data (MolMeDB and ChEMBL) was performed to establish their usability. Firstly, experimental data can only be measured between thresholds for the lowest and highest permeation rate obtainable within physical boundaries. These thresholds vary strongly between methods. Secondly, computed data do not obey these thresholds but, on the other hand, can produce incorrect results. Thirdly, even for the same method and molecule, there is often a strong discrepancy between individual measured values. These differences are based not only on the statistics but also on the varying approaches and evaluation of the measured data. Thus, when working with in-house measured or published permeability data, we recommend to be cautious with their interpretation.\r\n\r\n# Keywords\r\nmembrane, permeability, PAMPA, BLM, liposome, CACO-2, MDCK, PerMM, COSMOperm, MolMeDB\r\n\r\n# Please cite the original article:\r\n\r\nStorchmannová K, Balouch M, Juračka J, Štěpánek F, Berka K. Meta-analysis of permeability literature data shows possibilities and limitations of popular methods. ChemRxiv. 2024;[ doi:10.26434/chemrxiv-2024-ndc8k-v2](http://chemrxiv.org/engage/chemrxiv/article-details/66d0e15ff3f4b052908e1f79)\r\n\r\nNote that this article is currently a preprint and has not undergone peer review. This section will be updated once the article is published.","organization":"Chemical Data Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1109?version=1","name":"Version 2","author":["Kateřina Storchmannová"],"descriptor_type":[]}]},{"id":"1110","url":"https://workflowhub.eu/workflows/1110","name":"Workflow for differential expression analysis in proteomics data and overlaying DEPs on COVID-19 Disease Map","description":"A R workflow for proteomics data analysis is reported. This pipeline was basing on protein expression projects, stored on the PRIDE database and reported on the COVID-19 Data portal. This is an R pipeline to analyze protein expression data, built on lung cell lines infected by SARS-CoV-2 variants: ​B.1, Delta, and Omicron BA.1 (Mezler et al. 2023) https://www.ebi.ac.uk/pride/archive/projects/PXD037265. This pipeline can obtain DEPs for each variant, starting from normalized protein expression data, and it enables to obtain LogFC and FDR values highly overlapping with DEPs in Mezler et al. 2023, as well as building an input file to overlay the DEPs on COVID-19 Disease Map (C19DM) (https://covid19map.elixir-luxembourg.org/minerva/). ","organization":"BY-COVID (general)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1110?version=1","name":"main @ e91c0ae","author":["Francesco Messina"],"descriptor_type":[]}]},{"id":"1112","url":"https://workflowhub.eu/workflows/1112","name":"EuCanImage FHIR ETL Implementation I: Hepatocellular Carcinoma","description":"# EuCanImage FHIR ETL Implementation\r\n\r\nThis repository contains the ETL implementation for EuCanImage, encouraging semantic interoperability of the clinical data obtained in the studies by transforming it into a machine-readable format following FHIR standards. This parser uses [FHIR Resources](https://github.com/nazrulworld/fhir.resources) in order to create the dictionaries following a FHIR compliant structure.\r\n- Code Language is written in [Python 3.11](https://www.python.org/downloads/release/python-3110/).\r\n- The outputs are JSON files compliant with [FHIR 4.3](https://hl7.org/fhir/R4B/) schemas.\r\n- This script is specifically created for the Extract, Transform and Load implementation for EuCanImage, and will follow the structures obtained from the REDCap databases within the study. To create your own implementation in a different study, you may use the previously mentioned [FHIR Resources](https://github.com/nazrulworld/fhir.resources).\r\n\r\n#### Data conversion process:\r\nThis code followed the structure to go through the following steps:\r\n- Importing and transforming CSV with patient data\r\n- Defining dictionaries for ontologies and functions to populate FHIR dictionaries\r\n- Transforming dictionaries into FHIR resources\r\n- Grouping FHIR resources into a defined bundle/envelope of resources\r\n- Exporting as json file\r\n\r\n#### Input \u0026 Output\r\n- CSV file for each use case (CSV folder)\r\n- JSON file following FHIR standards (OUTPUT folder)\r\n\r\n## Installation and Guide\r\nThe first step is to clone or download the repository to your computer\r\n```bash\r\ngit clone https://github.com/EGA-archive/EuCanImage-FHIR.git\r\n```\r\n#### Requirements\r\n- Python 3.11.2\r\n- [FHIR Resources](https://github.com/nazrulworld/fhir.resources) 6.5.0\r\n- pandas 2.1.3\r\n- numpy 1.26.2\r\n\r\nIn order to use these scripts, you will need to have access to [Python 3.11](https://www.python.org/downloads/release/python-3110/) in your systems.\r\n\r\nTo install the libraries used for this study, it can easily be done with `pip install`. The latest versions of each library should not cause any incompatibility.\r\n```bash\r\npip install fhir.resources\r\npip install pandas\r\npip install numpy\r\n```\r\n### Instructions\r\nThe steps are the same on each Use Case, so we will be using Use Case 1 as an example for the steps to follow.\r\n\r\nFirst of all, you will need to provide with a [CSV file](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC1_Hepatocellular_Carcinoma/CSV/UseCase1_testdata.csv) that follows the structure of the eCRF of the study. Each use case will have its own eCRF. Save the CSV file in the [CSV folder](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC1_Hepatocellular_Carcinoma/CSV) of the specific use case you will be using.\r\n\r\nNext, in the beginning of each python file (For example, for Use Case 1 it would be [UC1-ETL.py](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC1_Hepatocellular_Carcinoma/UC1-ETL.py), you will need to change the variable `relative_path_csv` to change the name of the file matching the one of the input.\r\n```bash\r\nrelative_path_csv = \"/UC1_Hepatocellular_Carcinoma/CSV/UseCase1_testdata.csv\"\r\n```\r\nThen, you can run the parser in the terminal, changing `PATH-TO-FOLDER` to the specific folder the parser is in, unless the terminal is run in the folder itself.\r\n```bash\r\npython PATH-TO-FOLDER/UC1-ETL.py\r\n```\r\nOnce it is finished, you will have all of the parsed JSON files in the [OUTPUT](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC1_Hepatocellular_Carcinoma/OUTPUT) folder\r\n","organization":"EGA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1112?version=1","name":"main @ 294527d","author":["Aldar Cabrelles"],"descriptor_type":[]}]},{"id":"1113","url":"https://workflowhub.eu/workflows/1113","name":"Full Analyse Argo data","description":"Process argo data with the Pangeo Ecosystem and visualise them with Ocean Data View (ODV)","organization":"FAIR-EASE, usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1113?version=1","name":"Version 1","author":["Marie Jossé"],"descriptor_type":["GALAXY"]}]},{"id":"1114","url":"https://workflowhub.eu/workflows/1114","name":"Genome assembly workflow for nanopore reads, for TSI","description":"**Genome assembly workflow for nanopore reads, for TSI**\r\n\r\nInput: \r\n* Nanopore reads (can be in format: fastq, fastq.gz, fastqsanger, or fastqsanger.gz) \r\n\r\nOptional settings to specify when the workflow is run:\r\n* [1] how many input files to split the original input into (to speed up the workflow). default = 0. example: set to 2000 to split a 60 GB read file into 2000 files of ~ 30 MB. \r\n* [2] filtering: min average read quality score. default = 10\r\n* [3] filtering: min read length. default = 200\r\n* [4] trimming: trim this many nucleotides from start of read. default = 50\r\n* [5] note: these are suggestions and will depend on the characteristics of your raw reads and downstream aims. If filtering and trimming settings are too stringent, there may be no reads remaining and workflow will fail. \r\n\r\nWorkflow steps:\r\n* [1] runs FastQC on raw reads\r\n* [2] splits input reads file into separate files to speed up the next step of Porechop\r\n* [3] trims nanopore adapters using Porechop\r\n* [4] trims and filters nanopore reads by quality and length using Nanofilt\r\n* [5] collapses back into a single read file, fastqsanger format\r\n* [6] runs FastqQC on trimmed/filtered reads\r\n* [7] assembles genome with Flye\r\n* [8] calculates statistics on genome assembly contigs with Fasta Statistics\r\n* [9] draws genome assembly graph with Bandage \r\n\r\nMain outputs:\r\n* [1] FastQC report on raw reads, html\r\n* [2] Adpater-chopped, trimmed, filtered reads in fastqsanger format\r\n* [3] FastQC report on filtered reads, html\r\n* [4] genome assembly contigs in fasta format (primary assembly)\r\n* [5] genome assembly statistics\r\n* [6] genome assembly graph in Bandage format \r\n\r\nNote: You may wish to plot raw reads first (e.g. using the tool NanoPlot), to get a better of idea of read lengths and quality, to decide on filtering/trimming settings.","organization":"Australian BioCommons, Galaxy Australia","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1114?version=1","name":"Version 1","author":["Anna Syme"],"descriptor_type":["GALAXY"]}]},{"id":"1122","url":"https://workflowhub.eu/workflows/1122","name":"PhysioNet RF Kfold","description":"**Name:** PhysioNet RF Kfold\r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum5 \r\n\r\nKfold to evaluate RandomForest accuracy on PhysioNet dataset (https://b2drop.bsc.es/index.php/s/8Q8MefXX2rrzaWs). \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1122?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1123","url":"https://workflowhub.eu/workflows/1123","name":"PhysioNet kNN Kfold","description":"**Name:** PhysioNet kNN Kfold\r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum5 \r\n\r\nKfold to evaluate kNN accuracy on PhysioNet dataset (https://b2drop.bsc.es/index.php/s/8Q8MefXX2rrzaWs). \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1123?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1123?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1124","url":"https://workflowhub.eu/workflows/1124","name":"PhysioNet CascadeSVM Kfold","description":"**Name:** PhysioNet CascadeCSVM Kfold\r\n**Contact Person**: support-compss@bsc.es  \r\n**Access Level**: public  \r\n**License Agreement**: Apache2  \r\n**Platform**: COMPSs  \r\n**Machine**: MareNostrum5 \r\n\r\nKfold to evaluate CascadeCSVM accuracy on PhysioNet dataset (https://b2drop.bsc.es/index.php/s/8Q8MefXX2rrzaWs). \r\nThis application used [dislib-0.9.0](https://github.com/bsc-wdc/dislib/tree/release-0.9)\r\n","organization":"eFlows4HPC general","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1124?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1125","url":"https://workflowhub.eu/workflows/1125","name":"GADES reproducibility workflow","description":"# Article-GADES\r\n\r\nThis repository represents generating and benchmarking the results of the [GADES](https://github.com/lab-medvedeva/GADES-main) package for Distance Matrix Calculation\r\n\r\n## Installation\r\n\r\n```shell\r\ngit lfs install\r\ngit clone https://github.com/lab-medvedeva/Article-GADES.git\r\ncd Article-GADES\r\n```\r\n\r\nPut the Real datasets in the MEX format to the folder `Datasets/Real`.\r\n\r\n### Running benchmark using Docker Deployment\r\n```shell\r\ndocker run --gpus all \\\r\n    -v $PWD/Datasets:/workspace/Article-GADES/Datasets \\\r\n    -v $PWD/results:/workspace/Article-GADES/results \\\r\n    akhtyamovpavel/article-gades \r\n```\r\n\r\n## Step 01. Generation of the datasets\r\n\r\n### Step 01.1. Generated Dense Datasets\r\n\r\n```shell\r\ncd ./scripts/MatricesGeneration\r\n./generate_dense.sh ../../Datasets/\r\n```\r\n\r\n### Step 01.2. Generated Sparse Datasets\r\n\r\n```shell\r\ncd ./scripts/MatricesGeneration\r\n./generate_sparse.sh ../../Datasets/\r\n```\r\n\r\n## Step 02. Benchmarking\r\n\r\n### Step 02.1. Generated Dense Datasets\r\n```shell\r\ncd ./scripts/Benchmarking\r\n\r\n./run_benchmark_generated_dense.sh ../../\r\n\r\n./run_benchmark_python_dense.sh ../../\r\n```\r\n\r\n### Step 02.2. Generated Sparse Datasets\r\n\r\n```shell\r\n\r\ncd ./scripts/Benchmarking/\r\n\r\n./run_benchmark_generated_sparse.sh ../../\r\n```\r\n\r\n### Step 02.3 Real Datasets\r\n```shell\r\n\r\ncd ./Scripts/Benchmarking/\r\n./run_benchmark_real_python.sh \u003cpath to dataset\u003e ../../results/RealDatasets/\u003cname of the real dataset\u003e/\r\n./run_benchmark_real_R.sh \u003cpath to dataset\u003e ../../results/RealDatasets/\u003cname of the dataset\u003e/\r\n```\r\n\r\nExample:\r\n```shell\r\n./run_benchmark.sh ../../Datasets/Real/HLCA_marrow.mtx ../results/RealDatasets/HLCA_marrow/\r\n```\r\n\r\n\r\n### Step 02.4. Ablation Study for the Batch Size Usage\r\n\r\n### Step 02.5. Ablation Study for the Memory Usage\r\n```\r\ncd ./Scripts/Benchmarking\r\n./run_benchmark_real_python_memory_usage.sh \u003cpath to dataset\u003e ../../results/RealDatasetsBatchSizeFixedMemory/\u003cname of the real dataset\u003e/500/\r\n```\r\n\r\nExample:\r\n```\r\n./run_benchmark_real_python_memory_usage.sh ../../Datasets/CellLines.mtx ../../results/RealDatasetsBatchSizeFixedMemory/CellLines/500/\r\n```\r\n\r\n## Step 03. Drawing charts\r\n\r\nWe split reproducibility notebooks into two parts:\r\n* Aggregation over datasets\r\n* Plotting charts\r\n\r\n### Aggregation\r\n\r\n1. For Generated Dense datasets you could use the [GeneratedDatasetsCollector](/reproducibility/00-GeneratedDatasetsCollector.ipynb) notebook.\r\n2. For Generated Sparse datasets you could use the [GeneratedSparseCollector](/reproducibility/00-GeneratedSparseCollector.ipynb) notebook.\r\n\r\n### Analyzing datasets\r\n\r\n1. Generated datasets analyzed in the [GeneratedDatasetAnalysis](/reproducibility/GeneratedDatasetAnalysis.ipynb) notebook.\r\n2. Real datasets analyzed in the [RealDatasetAnalysis](/reproducibility/RealDatasetsAnalysis.ipynb) notebook.\r\n3. Analysis of ablation study could be found in the [reproducibility](/reproducibility/03-AblationStudy.ipynb) notebook.\r\n","organization":"Medvedeva Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1125?version=1","name":"main @ 39b937a","author":["Pavel Akhtyamov"],"descriptor_type":[]}]},{"id":"1156","url":"https://workflowhub.eu/workflows/1156","name":"EuCanImage FHIR ETL Implementation III: Colorectal Liver metastasis","description":"# EuCanImage FHIR ETL Implementation\r\n\r\nThis repository contains the ETL implementation for EuCanImage, encouraging semantic interoperability of the clinical data obtained in the studies by transforming it into a machine-readable format following FHIR standards. This parser uses [FHIR Resources](https://github.com/nazrulworld/fhir.resources) in order to create the dictionaries following a FHIR compliant structure.\r\n- Code Language is written in [Python 3.11](https://www.python.org/downloads/release/python-3110/).\r\n- The outputs are JSON files compliant with [FHIR 4.3](https://hl7.org/fhir/R4B/) schemas.\r\n- This script is specifically created for the Extract, Transform and Load implementation for EuCanImage, and will follow the structures obtained from the REDCap databases within the study. To create your own implementation in a different study, you may use the previously mentioned [FHIR Resources](https://github.com/nazrulworld/fhir.resources).\r\n\r\n#### Data conversion process:\r\nThis code followed the structure to go through the following steps:\r\n- Importing and transforming CSV with patient data\r\n- Defining dictionaries for ontologies and functions to populate FHIR dictionaries\r\n- Transforming dictionaries into FHIR resources\r\n- Grouping FHIR resources into a defined bundle/envelope of resources\r\n- Exporting as json file\r\n\r\n#### Input \u0026 Output\r\n- CSV file for each use case (CSV folder)\r\n- JSON file following FHIR standards (OUTPUT folder)\r\n\r\n## Installation and Guide\r\nThe first step is to clone or download the repository to your computer\r\n```bash\r\ngit clone https://github.com/EGA-archive/EuCanImage-FHIR.git\r\n```\r\n#### Requirements\r\n- Python 3.11.2\r\n- [FHIR Resources](https://github.com/nazrulworld/fhir.resources) 6.5.0\r\n- pandas 2.1.3\r\n- numpy 1.26.2\r\n\r\nIn order to use these scripts, you will need to have access to [Python 3.11](https://www.python.org/downloads/release/python-3110/) in your systems.\r\n\r\nTo install the libraries used for this study, it can easily be done with `pip install`. The latest versions of each library should not cause any incompatibility.\r\n```bash\r\npip install fhir.resources\r\npip install pandas\r\npip install numpy\r\n```\r\n### Instructions\r\nThe steps are the same on each Use Case, so we will be using Use Case 3 as an example for the steps to follow.\r\n\r\nFirst of all, you will need to provide with a [CSV file](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC3_Colorectal_Liver_metastasis/CSV/UseCase3_testdata.csv) that follows the structure of the eCRF of the study. Each use case will have its own eCRF. Save the CSV file in the [CSV folder](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC3_Colorectal_Liver_metastasis/CSV/) of the specific use case you will be using.\r\n\r\nNext, in the beginning of each python file (For example, for Use Case 3 it would be [UC3-ETL.py](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC3_Colorectal_Liver_metastasis/UC3-ETL.py), you will need to change the variable `relative_path_csv` to change the name of the file matching the one of the input.\r\n```bash\r\nrelative_path_csv = \"/UC3_Colorectal_Liver_metastasis/CSV/UseCase3_testdata.csv\"\r\n```\r\nThen, you can run the parser in the terminal, changing `PATH-TO-FOLDER` to the specific folder the parser is in, unless the terminal is run in the folder itself.\r\n```bash\r\npython PATH-TO-FOLDER/UC3-ETL.py\r\n```\r\nOnce it is finished, you will have all of the parsed JSON files in the [OUTPUT](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC3_Colorectal_Liver_metastasis/OUTPUT) folder\r\n","organization":"EGA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1156?version=1","name":"main @ 294527d","author":["Aldar Cabrelles"],"descriptor_type":[]}]},{"id":"1157","url":"https://workflowhub.eu/workflows/1157","name":"EuCanImage FHIR ETL Implementation IV \u0026 V: Rectal Cancer","description":"# EuCanImage FHIR ETL Implementation\r\n\r\nThis repository contains the ETL implementation for EuCanImage, encouraging semantic interoperability of the clinical data obtained in the studies by transforming it into a machine-readable format following FHIR standards. This parser uses [FHIR Resources](https://github.com/nazrulworld/fhir.resources) in order to create the dictionaries following a FHIR compliant structure.\r\n- Code Language is written in [Python 3.11](https://www.python.org/downloads/release/python-3110/).\r\n- The outputs are JSON files compliant with [FHIR 4.3](https://hl7.org/fhir/R4B/) schemas.\r\n- This script is specifically created for the Extract, Transform and Load implementation for EuCanImage, and will follow the structures obtained from the REDCap databases within the study. To create your own implementation in a different study, you may use the previously mentioned [FHIR Resources](https://github.com/nazrulworld/fhir.resources).\r\n\r\n#### Data conversion process:\r\nThis code followed the structure to go through the following steps:\r\n- Importing and transforming CSV with patient data\r\n- Defining dictionaries for ontologies and functions to populate FHIR dictionaries\r\n- Transforming dictionaries into FHIR resources\r\n- Grouping FHIR resources into a defined bundle/envelope of resources\r\n- Exporting as json file\r\n\r\n#### Input \u0026 Output\r\n- CSV file for each use case (CSV folder)\r\n- JSON file following FHIR standards (OUTPUT folder)\r\n\r\n## Installation and Guide\r\nThe first step is to clone or download the repository to your computer\r\n```bash\r\ngit clone https://github.com/EGA-archive/EuCanImage-FHIR.git\r\n```\r\n#### Requirements\r\n- Python 3.11.2\r\n- [FHIR Resources](https://github.com/nazrulworld/fhir.resources) 6.5.0\r\n- pandas 2.1.3\r\n- numpy 1.26.2\r\n\r\nIn order to use these scripts, you will need to have access to [Python 3.11](https://www.python.org/downloads/release/python-3110/) in your systems.\r\n\r\nTo install the libraries used for this study, it can easily be done with `pip install`. The latest versions of each library should not cause any incompatibility.\r\n```bash\r\npip install fhir.resources\r\npip install pandas\r\npip install numpy\r\n```\r\n### Instructions\r\nThe steps are the same on each Use Case, so we will be using Use Case 4 \u0026 5 as an example for the steps to follow.\r\n\r\nFirst of all, you will need to provide with a [CSV file](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC4%265_Rectal_Cancer/CSV/UseCase45_testdata.csv) that follows the structure of the eCRF of the study. Each use case will have its own eCRF. Save the CSV file in the [CSV folder](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC4%265_Rectal_Cancer/CSV) of the specific use case you will be using.\r\n\r\nNext, in the beginning of each python file (For example, for Use Case 4 \u0026 5 it would be [UC45-ETL.py](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC4%265_Rectal_Cancer/UC45-ETL.py), you will need to change the variable `relative_path_csv` to change the name of the file matching the one of the input.\r\n```bash\r\nrelative_path_csv = \"/UC4\u00265_Rectal_Cancer/CSV/UseCase45_testdata.csv\"\r\n```\r\nThen, you can run the parser in the terminal, changing `PATH-TO-FOLDER` to the specific folder the parser is in, unless the terminal is run in the folder itself.\r\n```bash\r\npython PATH-TO-FOLDER/UC45-ETL.py\r\n```\r\nOnce it is finished, you will have all of the parsed JSON files in the [OUTPUT](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC4%265_Rectal_Cancer/OUTPUT) folder\r\n","organization":"EGA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1157?version=1","name":"main @ 294527d","author":["Aldar Cabrelles"],"descriptor_type":[]}]},{"id":"1158","url":"https://workflowhub.eu/workflows/1158","name":"EuCanImage FHIR ETL Implementation VI \u0026 VIII: Breast Cancer MMG","description":"# EuCanImage FHIR ETL Implementation\r\n\r\nThis repository contains the ETL implementation for EuCanImage, encouraging semantic interoperability of the clinical data obtained in the studies by transforming it into a machine-readable format following FHIR standards. This parser uses [FHIR Resources](https://github.com/nazrulworld/fhir.resources) in order to create the dictionaries following a FHIR compliant structure.\r\n- Code Language is written in [Python 3.11](https://www.python.org/downloads/release/python-3110/).\r\n- The outputs are JSON files compliant with [FHIR 4.3](https://hl7.org/fhir/R4B/) schemas.\r\n- This script is specifically created for the Extract, Transform and Load implementation for EuCanImage, and will follow the structures obtained from the REDCap databases within the study. To create your own implementation in a different study, you may use the previously mentioned [FHIR Resources](https://github.com/nazrulworld/fhir.resources).\r\n\r\n#### Data conversion process:\r\nThis code followed the structure to go through the following steps:\r\n- Importing and transforming CSV with patient data\r\n- Defining dictionaries for ontologies and functions to populate FHIR dictionaries\r\n- Transforming dictionaries into FHIR resources\r\n- Grouping FHIR resources into a defined bundle/envelope of resources\r\n- Exporting as json file\r\n\r\n#### Input \u0026 Output\r\n- CSV file for each use case (CSV folder)\r\n- JSON file following FHIR standards (OUTPUT folder)\r\n\r\n## Installation and Guide\r\nThe first step is to clone or download the repository to your computer\r\n```bash\r\ngit clone https://github.com/EGA-archive/EuCanImage-FHIR.git\r\n```\r\n#### Requirements\r\n- Python 3.11.2\r\n- [FHIR Resources](https://github.com/nazrulworld/fhir.resources) 6.5.0\r\n- pandas 2.1.3\r\n- numpy 1.26.2\r\n\r\nIn order to use these scripts, you will need to have access to [Python 3.11](https://www.python.org/downloads/release/python-3110/) in your systems.\r\n\r\nTo install the libraries used for this study, it can easily be done with `pip install`. The latest versions of each library should not cause any incompatibility.\r\n```bash\r\npip install fhir.resources\r\npip install pandas\r\npip install numpy\r\n```\r\n### Instructions\r\nThe steps are the same on each Use Case, so we will be using Use Case 6 \u0026 8 as an example for the steps to follow.\r\n\r\nFirst of all, you will need to provide with a [CSV file](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC6%268_Breast_Cancer_MMG/CSV/UseCase68_testdata.csv) that follows the structure of the eCRF of the study. Each use case will have its own eCRF. Save the CSV file in the [CSV folder](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC6%268_Breast_Cancer_MMG/CSV) of the specific use case you will be using.\r\n\r\nNext, in the beginning of each python file (For example, for Use Case 6 \u0026 8 it would be [UC68-ETL.py](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC6%268_Breast_Cancer_MMG/UC68-ETL.py), you will need to change the variable `relative_path_csv` to change the name of the file matching the one of the input.\r\n```bash\r\nrelative_path_csv = \"UC6\u00268_Breast_Cancer_MMG/CSV/UseCase68_testdata.csv\"\r\n```\r\nThen, you can run the parser in the terminal, changing `PATH-TO-FOLDER` to the specific folder the parser is in, unless the terminal is run in the folder itself.\r\n```bash\r\npython PATH-TO-FOLDER/UC68-ETL.py\r\n```\r\nOnce it is finished, you will have all of the parsed JSON files in the [OUTPUT](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC6%268_Breast_Cancer_MMG/OUTPUT) folder\r\n","organization":"EGA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1158?version=1","name":"main @ 294527d","author":["Aldar Cabrelles"],"descriptor_type":[]}]},{"id":"1159","url":"https://workflowhub.eu/workflows/1159","name":"EuCanImage FHIR ETL Implementation VII: Breast Cancer MRI","description":"# EuCanImage FHIR ETL Implementation\r\n\r\nThis repository contains the ETL implementation for EuCanImage, encouraging semantic interoperability of the clinical data obtained in the studies by transforming it into a machine-readable format following FHIR standards. This parser uses [FHIR Resources](https://github.com/nazrulworld/fhir.resources) in order to create the dictionaries following a FHIR compliant structure.\r\n- Code Language is written in [Python 3.11](https://www.python.org/downloads/release/python-3110/).\r\n- The outputs are JSON files compliant with [FHIR 4.3](https://hl7.org/fhir/R4B/) schemas.\r\n- This script is specifically created for the Extract, Transform and Load implementation for EuCanImage, and will follow the structures obtained from the REDCap databases within the study. To create your own implementation in a different study, you may use the previously mentioned [FHIR Resources](https://github.com/nazrulworld/fhir.resources).\r\n\r\n#### Data conversion process:\r\nThis code followed the structure to go through the following steps:\r\n- Importing and transforming CSV with patient data\r\n- Defining dictionaries for ontologies and functions to populate FHIR dictionaries\r\n- Transforming dictionaries into FHIR resources\r\n- Grouping FHIR resources into a defined bundle/envelope of resources\r\n- Exporting as json file\r\n\r\n#### Input \u0026 Output\r\n- CSV file for each use case (CSV folder)\r\n- JSON file following FHIR standards (OUTPUT folder)\r\n\r\n## Installation and Guide\r\nThe first step is to clone or download the repository to your computer\r\n```bash\r\ngit clone https://github.com/EGA-archive/EuCanImage-FHIR.git\r\n```\r\n#### Requirements\r\n- Python 3.11.2\r\n- [FHIR Resources](https://github.com/nazrulworld/fhir.resources) 6.5.0\r\n- pandas 2.1.3\r\n- numpy 1.26.2\r\n\r\nIn order to use these scripts, you will need to have access to [Python 3.11](https://www.python.org/downloads/release/python-3110/) in your systems.\r\n\r\nTo install the libraries used for this study, it can easily be done with `pip install`. The latest versions of each library should not cause any incompatibility.\r\n```bash\r\npip install fhir.resources\r\npip install pandas\r\npip install numpy\r\n```\r\n### Instructions\r\nThe steps are the same on each Use Case, so we will be using Use Case 7 as an example for the steps to follow.\r\n\r\nFirst of all, you will need to provide with a [CSV file](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC7_Breast_Cancer_MRI/CSV/UseCase7_testdata.csv) that follows the structure of the eCRF of the study. Each use case will have its own eCRF. Save the CSV file in the [CSV folder](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC7_Breast_Cancer_MRI/CSV) of the specific use case you will be using.\r\n\r\nNext, in the beginning of each python file (For example, for Use Case 7 it would be [UC7-ETL.py](https://github.com/EGA-archive/EuCanImage-FHIR/blob/main/UC7_Breast_Cancer_MRI/UC7-ETL.py), you will need to change the variable `relative_path_csv` to change the name of the file matching the one of the input.\r\n```bash\r\nrelative_path_csv = \"/UC7_Breast_Cancer_MRI/CSV/UseCase7_testdata.csv\"\r\n```\r\nThen, you can run the parser in the terminal, changing `PATH-TO-FOLDER` to the specific folder the parser is in, unless the terminal is run in the folder itself.\r\n```bash\r\npython PATH-TO-FOLDER/UC7-ETL.py\r\n```\r\nOnce it is finished, you will have all of the parsed JSON files in the [OUTPUT](https://github.com/EGA-archive/EuCanImage-FHIR/tree/main/UC7_Breast_Cancer_MRI/OUTPUT) folder\r\n","organization":"EGA","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1159?version=1","name":"main @ 294527d","author":["Aldar Cabrelles"],"descriptor_type":[]}]},{"id":"1160","url":"https://workflowhub.eu/workflows/1160","name":"Demultiplexing Doublet Benchmark","description":"# demux_doublet_sim\r\n\r\nRepository for Nextflow pipeline used in demuxSNP demultipelxing paper\r\n\r\n## Overall workflow\r\n\r\n1. Simulate doublets\r\n- Add per-sample suffix to barcodes in BAM\r\n- Merge per-sample BAMs\r\n- Generate lookup of barcodes to rename to reach a set % doublets\r\n-  Rename barcodes in BAM as per lookup\r\n2. Benchmark methods\r\n- Experiments 1: Vary doublet rate\r\n- Experiment 2: Vary SNP subsetting\r\n\r\n## Inputs\r\n\r\nMost inputs are specified in nextflow.config:\r\n    container__souporcell: path to souporcell apptainer image, ideally at top level of project.  \r\n    bam_path: Path to demultiplexed bam files.  \r\n    barcodes_path: Path to demultiplexed barcodes.  \r\n    tenx: Path to barcodes.tsv, features.tsv and matrix.mtx files from multiplexed 10X output.  \r\n    common_variants: common variants e.g. from 1K genome project.  \r\n    ref: path to reference genome, ideally in data/input directory.  \r\n\r\nDoublet simulation parameters are specified in params_ccrcc.csv\r\nThe workflow caters for subsampling (also specified in params_ccrcc.csv) although this was not explored in the paper.\r\n\r\n## Outputs\r\n\r\nFolder for each simulated scenario (e.g. seed, % doublets, number of genes used to subset)\r\nSingleCellExperiment object in each demuxSNP folder.\r\n\r\n## Known issues\r\n\r\nInput files used by souporcell/apptainer need to be stored below the image.\r\nApptainer must be bound to the project directory (variable in nextflow.config).","organization":"Culhane Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1160?version=1","name":"0.99.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1160?version=2","name":"0.99.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1161","url":"https://workflowhub.eu/workflows/1161","name":"ECTI Atopic Dermatitis","description":"# **Stratum corneum nanotexture feature detection using deep learning and spatial analysis: a non-invasive tool for skin barrier assessment**\r\n\r\n\u003cimg src=\"./source/Overview.png\" alt=\"Data Processing\" width=\"95%\" /\u003e\r\n\r\nThis repository presents an objective, quantifiable method for assessing atopic dermatitis (AD) severity. The program integrates deep learning object detection with spatial analysis algorithms to accurately calculate the density of circular nano-size objects (CNOs), termed the Effective Corneocyte Topographical Index (ECTI). The ECTI demonstrates remarkable robustness in overcoming the inherent challenges of nano-imaging, such as environmental noise and structural occlusions on the corneocyte surface, further enhancing its applicability in clinical settings.\r\n\r\n## **Dependencies**\r\n- Python 3.9+\r\n- matplotlib\r\n- numpy\r\n- opencv-python\r\n- scipy\r\n- scikit-image\r\n- ultralytics\r\n- scikit-learn\r\n- customtkinter\r\n\r\n## **Directories**\r\n- `AD_Assessment_GUI.zip` contains a cross-platform executable GUI, sample data, and a tutorial video.\r\n- `utils/Img_Preprocessing.py` demonstrates the image enhancement algorithms applied to the corneocyte nanotexture images.\r\n\r\n## **Usage**\r\n1. Execution via cross-platform executable GUI\r\n    - Download [AD_Assessment_GUI.zip](https://huggingface.co/jenhung/ECTI_Assessment_GUI)\r\n    - Run `AD_Assessment_GUI.exe`\r\n    - Analysis results will be saved within the selected path in a folder titled `CNO_Detection`\r\n\r\n2. Execution via python script\r\n    - Install packages in terminal:\r\n        ```    \r\n        pip install -r requirements.txt\r\n        ```\r\n    - Run `AD_Assessment_GUI.py`\r\n    - Analysis results will be saved within the selected path in a folder titled `CNO_Detection`\r\n\r\n## **Executable**\r\n\r\n1. Install PyInstaller in terminal:\r\n                \r\n    ```    \r\n    pip install pyinstaller\r\n    ```\r\n   \r\n2. Run command in terminal:\r\n\r\n    ```    \r\n    pyinstaller --onedir .\\AD_Assessment_GUI.py\r\n    ```\r\n   \r\n## **Performance**\r\n\r\n| Model                                                                | Test Size | #Parameter (M) | FLOPs (G) | AP\u003csup\u003e50\u003c/sup\u003e (%) | AP\u003csup\u003e50-95\u003c/sup\u003e (%) | Latency (ms) |\r\n|:---------------------------------------------------------------------|:---------:|:--------------:|:---------:|:-------------------:|:----------------------:|:------------:|\r\n| [YOLOv10-N](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L)   |    512    |      2.7       |    8.2    |        89.6         |          51.4          |     3.3      |\r\n| [YOLOv10-S](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L)   |    512    |      8.0       |   24.4    |        90.8         |          55.5          |     4.58     |\r\n| [YOLOv10-M](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L)   |    512    |      16.5      |   63.4    |        91.3         |          59.7          |     7.17     |\r\n| [YOLOv10-B](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L)   |    512    |      20.4      |   97.7    |        91.1         |          62.5          |     7.58     |\r\n| [YOLOv10-L](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L)   |    512    |      25.7      |   126.3   |        91.4         |          63.2          |     9.01     |\r\n| [YOLOv10-X](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L)   |    512    |      31.6      |   169.8   |        91.2         |          62.9          |    10.95     |\r\n| [RT-DETRv2-S](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L) |    512    |      20.0      |   60.0    |        87.6         |          39.6          |     5.51     |\r\n| [RT-DETRv2-M](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L) |    512    |      31.0      |   100.0   |        84.0         |          37.2          |     7.48     |\r\n| [RT-DETRv2-L](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L) |    512    |      42.0      |   136.0   |        84.3         |          33.4          |    13.50     |\r\n| [RT-DETRv2-X](https://huggingface.co/jenhung/CNO_DETECTION_YOLOv8-L) |    512    |      76.0      |   259.0   |        83.3         |          32.0          |    21.15     |\r\n\r\n## **Dataset**\r\nThe corneocyte nanotexture dataset is available for download at the following link: [Corneocyte Nanotexture Dataset](https://huggingface.co/datasets/jenhung/Corneocyte_Nanotexture_Dataset).\r\n\r\n## **Contributions**\r\n\r\n[1] Liao, H-S., Wang, J-H., Raun, E., Nørgaard, L. O., Dons, F. E., \u0026 Hwu, E. E-T. (2022). Atopic Dermatitis Severity Assessment using High-Speed Dermal Atomic Force Microscope. Abstract from AFM BioMed Conference 2022, Nagoya-Okazaki, Japan.\r\n\r\n[2] Pereda, J., Liao, H-S., Werner, C., Wang, J-H., Huang, K-Y., Raun, E., Nørgaard, L. O., Dons, F. E., \u0026 Hwu, E. E. T. (2022). Hacking Consumer Electronics for Biomedical Imaging. Abstract from 5th Global Conference on Biomedical Engineering \u0026 Annual Meeting of TSBME, Taipei, Taiwan, Province of China.\r\n\r\n[3] Liao, H. S., Akhtar, I., Werner, C., Slipets, R., Pereda, J., Wang, J. H., Raun, E., Nørgaard, L. O., Dons, F. E., \u0026 Hwu, E. E. T. (2022). Open-source controller for low-cost and high-speed atomic force microscopy imaging of skin corneocyte nanotextures. HardwareX, 12, [e00341]. https://doi.org/10.1016/j.ohx.2022.e00341\r\n\r\n----\r\n\r\n### Contact: [Jen-Hung Wang](mailto:jenhw@dtu.dk) / [Assoc. Professor En-Te Hwu](mailto:etehw@dtu.dk)\r\n","organization":"IDUN - Drug Delivery and Sensing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1161?version=1","name":"master @ fafd5bf","author":["Jen-Hung Wang"],"descriptor_type":[]}]},{"id":"1162","url":"https://workflowhub.eu/workflows/1162","name":"ERGA Long reads-only Assembly+QC Hifiasm v2505 (WF2)","description":"The workflow takes a long reads collection (HiFi, or ONT also possible now), and max coverage depth (calculated from WF1) to run Hifiasm in solo mode. It produces a Pri/Alt assembly, Bandage plots, and runs all the QC analysis (gfastats, BUSCO, and Merqury).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1162?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1162?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"1163","url":"https://workflowhub.eu/workflows/1163","name":"ERGA Long Reads PriAlt Purge+QC v2505 (WF3)","description":"The workflow takes a Long Reads collection, Pri/Alt contigs, and the values for transition parameter and max coverage depth (calculated from WF1) to run Purge_Dups. It produces purged Pri and Alt contigs assemblies, and runs all the QC analysis (gfastats, BUSCO, and Merqury).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1163?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1163?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"1164","url":"https://workflowhub.eu/workflows/1164","name":"ERGA HiC Pri Scaffolding+QC YaHS v2505 (WF4)","description":"The workflow takes trimmed HiC paired-end reads collection, and Pri/Alt assemblies to produce a scaffolded primary assembly (and alternate contigs) using YaHS. It also runs Pretext and all the QC analyses (gfastats, BUSCO, and Merqury).","organization":"ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1164?version=1","name":"Version 1","author":["Diego De Panis"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1164?version=2","name":"Version 2","author":["Diego De Panis"],"descriptor_type":["GALAXY"]}]},{"id":"1169","url":"https://workflowhub.eu/workflows/1169","name":"KNIME workflow to gather ChEMBL permeability data","description":"","organization":"Chemical Data Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1169?version=1","name":"Version 1","author":["Kateřina Storchmannová"],"descriptor_type":[]}]},{"id":"1170","url":"https://workflowhub.eu/workflows/1170","name":"Microbial (meta-) genome annotation","description":"### Workflow for microbial (meta-)genome annotation\r\n\r\nInput is a (meta)genome sequence in fasta format.\r\n\r\n* bakta\r\n* KoFamScan (optional)\r\n* InterProScan (optional)\r\n* eggNOG mapper (optional)\r\n\r\n* To RDF conversion with SAPP (optional, default on) --\u003e [SAPP conversion Workflow in WorkflowHub](https://workflowhub.eu/workflows/1174/)\r\n\r\ngit: [https://gitlab.com/m-unlock/cwl](https://gitlab.com/m-unlock/cwl)","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1170?version=1","name":"Version 1","author":["Jasper Koehorst","Bart Nijsse"],"descriptor_type":["CWL"]}]},{"id":"1174","url":"https://workflowhub.eu/workflows/1174","name":"SAPP conversion Workflow","description":"### Workflow for converting (genome) annotation tool output into a GBOL RDF file (TTL/HDT) using SAPP\r\n\r\nCurrent formats / tools:\r\n* \tEMBL format\r\n* \tInterProScan (JSON/TSV)\r\n* \teggNOG-mapper (TSV)\r\n* \tKoFamScan (TSV)\r\n\r\ngit: [https://gitlab.com/m-unlock/cwl](https://gitlab.com/m-unlock/cwl)\r\n\r\n**SAPP** (Semantic Annotation Platform with Provenance): \u003cbr\u003e\r\nhttps://gitlab.com/sapp \u003cbr\u003e\r\nhttps://academic.oup.com/bioinformatics/article/34/8/1401/4653704\r\n\r\n","organization":"UNLOCK","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1174?version=1","name":"Version 1","author":["Bart Nijsse","Jasper Koehorst"],"descriptor_type":["CWL"]}]},{"id":"1177","url":"https://workflowhub.eu/workflows/1177","name":"clinicalmp-quantitation/main","description":"Clinical Metaproteomics 4: Quantitation ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1177?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1178","url":"https://workflowhub.eu/workflows/1178","name":"nf-core/multiplesequencealign","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-multiplesequencealign_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/multiplesequencealign\" src=\"docs/images/nf-core-multiplesequencealign_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/multiplesequencealign/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/multiplesequencealign/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/multiplesequencealign/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/multiplesequencealign/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/multiplesequencealign/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13889386-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13889386)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A525.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/multiplesequencealign)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23multiplesequencealign-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/multiplesequencealign)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\nUse **nf-core/multiplesequencealign** to:\n\n1. **Deploy** one (or many) of the most popular Multiple Sequence Alignment (MSA) tools.\n2. **Benchmark** MSA tools (and their inputs) using various metrics.\n\nMain steps:\n\n  \u003cdetails\u003e\n      \u003csummary\u003e\u003cstrong\u003eInputs summary\u003c/strong\u003e (Optional)\u003c/summary\u003e\n      \u003cp\u003eComputation of summary statistics on the input files (e.g., average sequence similarity across the input sequences, their length, pLDDT extraction if available).\u003c/p\u003e\n  \u003c/details\u003e\n\n  \u003cdetails\u003e\n      \u003csummary\u003e\u003cstrong\u003eGuide Tree\u003c/strong\u003e (Optional)\u003c/summary\u003e\n      \u003cp\u003eRenders a guide tree with a chosen tool (list available in \u003ca href=\"https://nf-co.re/multiplesequencealign/usage#2-guide-trees\"\u003eusage\u003c/a\u003e). Some aligners use guide trees to define the order in which the sequences are aligned.\u003c/p\u003e\n  \u003c/details\u003e\n\n  \u003cdetails\u003e\n      \u003csummary\u003e\u003cstrong\u003eAlign\u003c/strong\u003e (Required)\u003c/summary\u003e\n      \u003cp\u003eAligns the sequences with a chosen tool (list available in \u003ca href=\"https://nf-co.re/multiplesequencealign/usage#3-align\"\u003eusage\u003c/a\u003e).\u003c/p\u003e\n  \u003c/details\u003e\n\n  \u003cdetails\u003e\n      \u003csummary\u003e\u003cstrong\u003eEvaluate\u003c/strong\u003e (Optional)\u003c/summary\u003e\n      \u003cp\u003eEvaluates the generated alignments with different metrics: Sum Of Pairs (SoP), Total Column score (TC), iRMSD, Total Consistency Score (TCS), etc.\u003c/p\u003e\n  \u003c/details\u003e\n\n  \u003cdetails\u003e\n      \u003csummary\u003e\u003cstrong\u003eReport\u003c/strong\u003e(Optional)\u003c/summary\u003e\n      \u003cp\u003eReports the collected information of the runs in a Shiny app and a summary table in MultiQC. Optionally, it can also render the \u003ca href=\"https://github.com/steineggerlab/foldmason\"\u003eFoldmason\u003c/a\u003e MSA visualization in HTML format.\u003c/p\u003e\n  \u003c/details\u003e\n\n\u003cbr\u003e\n\nMore introductory material: [bytesize talk](https://youtu.be/iRY-Y1p5gtc), [nextflow summit talk](https://www.youtube.com/watch?v=suNulysHIN0) from the nextlow summit, [poster](https://github.com/nf-core/multiplesequencealign/blob/dev/docs/images/poster-nf-msa.pdf).\n\n![Alt text](docs/images/nf-core-msa_metro_map.png?raw=true \"nf-core-msa metro map\")\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n### Quick start - test run\n\nTo get a feeling of what the pipeline does, run:\n\n(You don't need to download or provide any file, try it!)\n\n```\nnextflow run nf-core/multiplesequencealign \\\n   -profile test_tiny,docker \\\n   --outdir results\n```\n\nand if you want to see how a more complete run looks like, you can try:\n\n```\nnextflow run nf-core/multiplesequencealign \\\n   -profile test,docker \\\n   --outdir results\n```\n\n## How to set up an easy run:\n\n\u003e [!NOTE]\n\u003e We have a lot more of use cases examples under [FAQs](\"https://nf-co.re/multiplesequencealign/usage/FAQs)\n\n### Input data\n\nYou can provide either (or both) a **fasta** file or a set of **protein structures**.\n\nAlternatively, you can provide a [samplesheet](https://nf-co.re/multiplesequencealign/usage/#samplesheet-input) and a [toolsheet](https://nf-co.re/multiplesequencealign/usage/#toolsheet-input).\n\nSee below how to provide them.\n\n\u003e Find some example input data [here](https://github.com/nf-core/test-datasets/tree/multiplesequencealign)\n\n### CASE 1: One input dataset, one tool.\n\nIf you only have one dataset and want to align it using one specific MSA tool (e.g. FAMSA or FOLDMASON), you can run the pipeline with one single command.\n\nIs your input a fasta file ([example](https://github.com/nf-core/test-datasets/blob/multiplesequencealign/testdata/setoxin-ref.fa))? Then:\n\n```bash\nnextflow run nf-core/multiplesequencealign \\\n   -profile easy_deploy,docker \\\n   --seqs \u003cYOUR_FASTA.fa\u003e \\\n   --aligner FAMSA \\\n   --outdir outdir\n```\n\nIs your input a directory where your PDB files are stored ([example](https://github.com/nf-core/test-datasets/blob/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz))? Then:\n\n```bash\nnextflow run nf-core/multiplesequencealign \\\n   -profile easy_deploy,docker \\\n   --pdbs_dir \u003cPATH_TO_YOUR_PDB_DIR\u003e \\\n   --aligner FOLDMASON \\\n   --outdir outdir\n```\n\n\u003cdetails\u003e\n  \u003csummary\u003e FAQ: Which are the available tools I can use?\u003c/summary\u003e\n  Check the list here: \u003ca href=\"https://nf-co.re/multiplesequencealign/usage/#3-align\"\u003e available tools\u003c/a\u003e.\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e FAQ: Can I use both \u003cem\u003e--seqs\u003c/em\u003e and \u003cem\u003e--pdbs_dir\u003c/em\u003e?\u003c/summary\u003e\n  Yes, go for it! This might be useful if you want a structural evaluation of a sequence-based aligner for instance.\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e FAQ: Can I specify also which guidetree to use? \u003c/summary\u003e\n  Yes, use the \u003ccode\u003e--tree\u003c/code\u003e flag. More info: \u003ca href=\"https://nf-co.re/multiplesequencealign/usage\"\u003eusage\u003c/a\u003e and \u003ca href=\"https://nf-co.re/multiplesequencealign/parameters\"\u003eparameters\u003c/a\u003e.\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e FAQ: Can I specify the arguments of the tools (tree and aligner)? \u003c/summary\u003e\n  Yes, use the \u003ccode\u003e--args_tree\u003c/code\u003e and \u003ccode\u003e--args_aligner\u003c/code\u003e flags. More info: \u003ca href=\"https://nf-co.re/multiplesequencealign/usage\"\u003eusage\u003c/a\u003e and \u003ca href=\"https://nf-co.re/multiplesequencealign/parameters\"\u003eparameters\u003c/a\u003e.\n\u003c/details\u003e\n\n### CASE 2: Multiple datasets, multiple tools.\n\n```bash\nnextflow run nf-core/multiplesequencealign \\\n   -profile test,docker \\\n   --input \u003csamplesheet.csv\u003e \\\n   --tools \u003ctoolsheet.csv\u003e \\\n   --outdir outdir\n```\n\nYou need **2 input files**:\n\n- **samplesheet** (your datasets)\n- **toolsheet** (which tools you want to use).\n\n\u003cdetails\u003e\n  \u003csummary\u003e What is a samplesheet?\u003c/summary\u003e\n  The sample sheet defines the \u003cb\u003einput datasets\u003c/b\u003e (sequences, structures, etc.) that the pipeline will process.\n\nA minimal version:\n\n```csv\nid,fasta\nseatoxin,seatoxin.fa\ntoxin,toxin.fa\n```\n\nA more complete one:\n\n```csv\nid,fasta,reference,optional_data\nseatoxin,seatoxin.fa,seatoxin-ref.fa,seatoxin_structures\ntoxin,toxin.fa,toxin-ref.fa,toxin_structures\n```\n\nEach row represents a set of sequences (in this case the seatoxin and toxin protein families) to be aligned and the associated (if available) reference alignments and dependency files (this can be anything from protein structure or any other information you would want to use in your favourite MSA tool).\n\nPlease check: \u003ca href=\"https://nf-co.re/multiplesequencealign/usage/#samplesheet-input\"\u003eusage\u003c/a\u003e.\n\n\u003e [!NOTE]\n\u003e The only required input is the id column and either fasta or optional_data.\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e What is a toolsheet?\u003c/summary\u003e\n  The toolsheet specifies \u003cem\u003ewhich combination of tools will be deployed and benchmarked in the pipeline\u003c/em\u003e.\n\nEach line defines a combination of guide tree and multiple sequence aligner to run with the respective arguments to be used.\n\nThe only required field is `aligner`. The fields `tree`, `args_tree` and `args_aligner` are optional and can be left empty.\n\nA minimal version:\n\n```csv\ntree,args_tree,aligner,args_aligner\n,,FAMSA,\n```\n\nThis will run the FAMSA aligner.\n\nA more complex one:\n\n```csv\ntree,args_tree,aligner,args_aligner\nFAMSA, -gt upgma -medoidtree, FAMSA,\n, ,TCOFFEE,\nFAMSA,,REGRESSIVE,\n```\n\nThis will run, in parallel:\n\n- the FAMSA guidetree with the arguments \u003cem\u003e-gt upgma -medoidtree\u003c/em\u003e. This guidetree is then used as input for the FAMSA aligner.\n- the TCOFFEE aligner\n- the FAMSA guidetree with default arguments. This guidetree is then used as input for the REGRESSIVE aligner.\n\nPlease check: \u003ca href=\"https://nf-co.re/multiplesequencealign/usage/#toolsheet-input\"\u003eusage\u003c/a\u003e.\n\n\u003e [!NOTE]\n\u003e The only required input is `aligner`.\n\n\u003c/details\u003e\n\nFor more details on more advanced runs: [usage documentation](https://nf-co.re/multiplesequencealign/usage) and the [parameter documentation](https://nf-co.re/multiplesequencealign/parameters).\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Pipeline resources\n\nWhich resources is the pipeline using? You can find the default resources used in [base.config](conf/base.config).\n\nIf you are using specific profiles, e.g. [test](conf/test.config), these will overwrite the defaults.\n\nIf you want to modify the needed resources, please refer [usage](https://nf-co.re/multiplesequencealign/docs/usage/#custom-configuration).\n\n## Pipeline output\n\nExample results: [results](https://nf-co.re/multiplesequencealign/results) tab on the nf-core website pipeline page.\nFor more details: [output documentation](https://nf-co.re/multiplesequencealign/output).\n\n## Extending the pipeline\n\nFor details on how to add your favourite guide tree, MSA or evaluation step in nf-core/multiplesequencealign please refer to the [extending documentation](https://nf-co.re/multiplesequencealign/usage/adding_a_tool).\n\n## Credits\n\nnf-core/multiplesequencealign was originally written by Luisa Santus ([@luisas](https://github.com/luisas)) and Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)) from The Comparative Bioinformatics Group at The Centre for Genomic Regulation, Spain.\n\nThe following people have significantly contributed to the development of the pipeline and its modules: Leon Rauschning ([@lrauschning](https://github.com/lrauschning)), Alessio Vignoli ([@alessiovignoli](https://github.com/alessiovignoli)), Igor Trujnara ([@itrujnara](https://github.com/itrujnara)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)).\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#multiplesequencealign` channel](https://nfcore.slack.com/channels/multiplesequencealign) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/multiplesequencealign for your analysis, please cite it using the following doi: [10.5281/zenodo.13889386](https://doi.org/10.5281/zenodo.13889386)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1178?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1178?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1178?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1179","url":"https://workflowhub.eu/workflows/1179","name":"nf-core/scnanoseq","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-scnanoseq_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/scnanoseq\" src=\"docs/images/nf-core-scnanoseq_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/scnanoseq/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/scnanoseq/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scnanoseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scnanoseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/scnanoseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13899279-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13899279)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scnanoseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scnanoseq-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/scnanoseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scnanoseq** is a bioinformatics best-practice analysis pipeline for 10X Genomics single-cell/nuclei RNA-seq data derived from Oxford Nanopore Q20+ chemistry ([R10.4 flow cells (\u003eQ20)](https://nanoporetech.com/about-us/news/oxford-nanopore-announces-technology-updates-nanopore-community-meeting)). Due to the expectation of \u003eQ20 quality, the input data for the pipeline does not depend on Illumina paired data. **Please note `scnanoseq` can also process Oxford data with older chemistry, but we encourage usage of the Q20+ chemistry when possible**.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scnanoseq/results).\n\n## Pipeline summary\n\n![scnanoseq diagram](assets/scnanoseq_tube_map.png)\n\n1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))\n2. Unzip and split FASTQ ([`pigz`](https://github.com/madler/pigz))\n   1. Optional: Split FASTQ for faster processing ([`split`](https://linux.die.net/man/1/split))\n3. Trim and filter reads ([`Nanofilt`](https://github.com/wdecoster/nanofilt))\n4. Post trim QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))\n5. Barcode detection using a custom whitelist or 10X whitelist. ([`BLAZE`](https://github.com/shimlab/BLAZE))\n6. Extract barcodes. Consists of the following steps:\n   1. Parse FASTQ files into R1 reads containing barcode and UMI and R2 reads containing sequencing without barcode and UMI (custom script `./bin/pre_extract_barcodes.py`)\n   2. Re-zip FASTQs ([`pigz`](https://github.com/madler/pigz))\n7. Barcode correction (custom script `./bin/correct_barcodes.py`)\n8. Post-extraction QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))\n9. Alignment to the genome, transcriptome, or both ([`minimap2`](https://github.com/lh3/minimap2))\n10. Post-alignment filtering of mapped reads and gathering mapping QC ([`SAMtools`](http://www.htslib.org/doc/samtools.html))\n11. Post-alignment QC in unfiltered BAM files ([`NanoComp`](https://github.com/wdecoster/nanocomp), [`RSeQC`](https://rseqc.sourceforge.net/))\n12. Barcode (BC) tagging with read quality, BC quality, UMI quality (custom script `./bin/tag_barcodes.py`)\n13. Read deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools) OR [`Picard MarkDuplicates`](https://broadinstitute.github.io/picard/))\n14. Gene and transcript level matrices generation with [`IsoQuant`](https://github.com/ablab/IsoQuant) and/or transcript level matrices with [`oarfish`](https://github.com/COMBINE-lab/oarfish)\n15. Preliminary matrix QC ([`Seurat`](https://github.com/satijalab/seurat))\n16. Compile QC for raw reads, trimmed reads, pre and post-extracted reads, mapping metrics and preliminary single-cell/nuclei QC ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,fastq,cell_count\nCONTROL_REP1,AEG588A1_S1.fastq.gz,5000\nCONTROL_REP1,AEG588A1_S2.fastq.gz,5000\nCONTROL_REP2,AEG588A2_S1.fastq.gz,5000\nCONTROL_REP3,AEG588A3_S1.fastq.gz,5000\nCONTROL_REP4,AEG588A4_S1.fastq.gz,5000\nCONTROL_REP4,AEG588A4_S2.fastq.gz,5000\nCONTROL_REP4,AEG588A4_S3.fastq.gz,5000\n```\n\nEach row represents a single-end fastq file. Rows with the same sample identifier are considered technical replicates and will be automatically merged. `cell_count` refers to the expected number of cells you expect.\n\n```bash\nnextflow run nf-core/scnanoseq \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scnanoseq/usage) and the [parameter documentation](https://nf-co.re/scnanoseq/parameters).\n\n## Pipeline output\n\nThis pipeline produces feature-barcode matrices as the main output. These feature-barcode matrices are able to be ingested directly by most packages used for downstream analyses such as `Seurat`. Additionally, the pipeline produces a number of quality control metrics to ensure that the samples processed meet expected metrics for single-cell/nuclei data.\n\nThe pipeline provides two tools to produce the aforementioned feature-barcode matrices, `IsoQuant` and `oarfish`, and the user is given the ability to choose whether to run both or just one. `IsoQuant` will require a genome fasta to be used as input to the pipeline, and will produce both gene and transcript level matrices. `oarfish` will require a transcriptome fasta to be used as input to the pipeline and will produce only transcript level matrices.\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scnanoseq/results) tab on the nf-core website pipeline page.\nFor more details about the full set of output files and reports, please refer to the\n[output documentation](https://nf-co.re/scnanoseq/output).\n\n## Troubleshooting\n\nIf you experience any issues, please make sure to reach out on the [#scnanoseq slack channel](https://nfcore.slack.com/archives/C03TUE2K6NS) or [open an issue on our GitHub repository](https://github.com/nf-core/scnanoseq/issues/new/choose). However, some resolutions for common issues will be noted below:\n\n- Due to the nature of the data this pipeline analyzes, some tools may experience increased runtimes. For some of the custom tools made for this pipeline (`preextract_fastq.py` and `correct_barcodes.py`), we have leveraged the splitting done via the `split_amount` parameter to decrease their overall runtimes. The `split_amount` parameter will split the input FASTQs into a number of FASTQ files, each containing a number of lines based on the value used for this parameter. As a result, it is important not to set this parameter to be too low as doing so would cause the creation of a large number of files the pipeline will be processed. While this value can be highly dependent on the data, a good starting point for an analysis would be to set this value to `500000`. If you find that `PREEXTRACT_FASTQ` and `CORRECT_BARCODES` are still taking long amounts of time to run, it would be worth reducing this parameter to `200000` or `100000`, but keeping the value on the order of hundred of thousands or tens of thousands should help with keeping the total number of processes minimal. An example of setting this parameter to be equal to 500000 is shown below:\n\n```yml title=\"params.yml\"\nsplit_amount: 500000\n```\n\n- We have seen a recurrent node failure on slurm clusters that does seem to be related to submission of Nextflow jobs. This issue is not related to this pipeline per se, but rather to Nextflow itself. We are currently working on a resolution. But we have two methods that appear to help overcome should this issue arise:\n  1. Provide a custom config that increases the memory request for the job that failed. This may take a couple attempts to find the correct requests, but we have noted that there does appear to be a memory issue occasionally with these errors.\n  2. Request an interactive session with a decent amount of time and memory and CPUs in order to run the pipeline on the single node. Note that this will take time as there will be minimal parallelization, but this does seem to resolve the issue.\n- We note that umitools dedup can take a large amount of time in order to perform deduplication. One approach we have implemented to assist with speed is to split input files based on chromosome. However for the transcriptome aligned bams, there is some additional work required that involves grouping transcripts into appropriate chromosomes. In order to accomplish this, the pipeline needs to parse the transcript id from the transcriptome FASTA file. The transcript id is often nested in the sequence identifier with additional data and the data is delimited. We have included the delimiters used by reference files obtained from GENCODE, NCBI, and Ensembl. However in case you wish to explicitly control this or if the reference file source uses a different delimiter, you are able to manually set it via the `--fasta_delimiter` parameter.\n- We acknowledge that analyzing PromethION data is a common use case for this pipeline. Currently, the pipeline has been developed with defaults to analyze GridION and average sized PromethION data. For cases, where jobs have fail due for larger PromethION datasets, the defaults can be overwritten by a custom configuation file (provided by the `-c` Nextflow option) where resources can be increased (substantially in some cases). Below are some of the overrides we have used, and while these amounts may not work on every dataset, these will hopefully at least note which processes will need to have their resources increased:\n\n```groovy title=\"custom.config\"\n\nprocess\n{\n    withName: '.*:.*FASTQC.*'\n    {\n        cpus = 20\n    }\n}\n\nprocess\n{\n    withName: '.*:BLAZE'\n    {\n        cpus = 30\n    }\n}\n\nprocess\n{\n    withName: '.*:TAG_BARCODES'\n    {\n        memory = '60.GB'\n    }\n}\n\nprocess\n{\n    withName: '.*:SAMTOOLS_SORT'\n    {\n        cpus = 20\n    }\n}\n\nprocess\n{\n    withName: '.*:MINIMAP2_ALIGN'\n    {\n        cpus = 20\n    }\n}\n\nprocess\n{\n    withName: '.*:ISOQUANT'\n    {\n        cpus = 30\n        memory = '85.GB'\n    }\n}\n```\n\nWe further note that while we encourage the use of `split_amount` as discussed above for larger datasets, the pipeline can be executed without enabling this parameter. When doing this, please consider increasing the time limit to `CORRECT_BARCODES` as it can take hours instead of minutes when `split_amount` is disabled:\n\n```groovy title=\"custom.config\"\n//NOTE: with split_amount disabled, consider increasing the time limit to CORRECT_BARCODES\nprocess\n{\n    withName: '.*:CORRECT_BARCODES'\n    {\n        time = '15.h'\n    }\n}\n```\n\n## Credits\n\nnf-core/scnanoseq was originally written by [Austyn Trull](https://github.com/atrull314), and [Dr. Lara Ianov](https://github.com/lianov).\n\nWe would also like to thank the following people and groups for their support, including financial support:\n\n- Dr. Elizabeth Worthey\n- University of Alabama at Birmingham Biological Data Science Core (U-BDS), RRID:SCR_021766, \u003chttps://github.com/U-BDS\u003e\n- Civitan International Research Center\n- Support from: 3P30CA013148-48S8\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scnanoseq` channel](https://nfcore.slack.com/channels/scnanoseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/scnanoseq for your analysis, please cite the article as follows:\n\n\u003e **scnanoseq: an nf-core pipeline for Oxford Nanopore single-cell RNA-sequencing**\n\u003e\n\u003e Austyn Trull, nf-core community, Elizabeth A. Worthey, Lara Ianov\n\u003e\n\u003e bioRxiv 2025.04.08.647887; doi: https://doi.org/10.1101/2025.04.08.647887\n\nThe specific pipleine version can be cited using the following doi: [10.5281/zenodo.13899279](https://doi.org/10.5281/zenodo.13899279)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1179?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1179?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1179?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1179?version=4","name":"1.2.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1180","url":"https://workflowhub.eu/workflows/1180","name":"beacon-omop-worker-survival","description":"# beacon-omop-worker-survival-analysis","organization":"TRE-FX","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1180?version=1","name":"main @ 8f28313","author":[],"descriptor_type":["CWL"]}]},{"id":"1181","url":"https://workflowhub.eu/workflows/1181","name":"Evaluation IA-Biodiv workflow","description":"Workflow permettant de prendre en entrée les résultats du challenge IA-biodiv par tâche, le fichier de référence par tâche afin de faire tourner un jupyter notebook produisant les scores pour chaque consortium participant.","organization":"PNDB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1181?version=1","name":"Version 1","author":["Yvan Le Bras"],"descriptor_type":["GALAXY"]}]},{"id":"1182","url":"https://workflowhub.eu/workflows/1182","name":"Ocean's variables 2.0","description":"Subset data on the Mediterreanean see and extract and visualise the Phosphate variable","organization":"FAIR-EASE, usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1182?version=1","name":"Version 1","author":["Marie Jossé"],"descriptor_type":["GALAXY"]}]},{"id":"1183","url":"https://workflowhub.eu/workflows/1183","name":"Metaenome-Atlas","description":"# Metagenome-Atlas\r\n\r\n[![Anaconda-Server Badge](https://anaconda.org/bioconda/metagenome-atlas/badges/latest_release_relative_date.svg)](https://anaconda.org/bioconda/metagenome-atlas)\r\n[![Bioconda](https://img.shields.io/conda/dn/bioconda/metagenome-atlas.svg?label=Bioconda )](https://anaconda.org/bioconda/metagenome-atlas)\r\n[![Documentation Status](https://readthedocs.org/projects/metagenome-atlas/badge/?version=latest)](https://metagenome-atlas.readthedocs.io/en/latest/?badge=latest)\r\n![Mastodon Follow](https://img.shields.io/mastodon/follow/109273833677404282?domain=https%3A%2F%2Fmstdn.science\u0026style=social)\r\n\u003c!--[![follow on twitter](https://img.shields.io/twitter/follow/SilasKieser.svg?style=social\u0026label=Follow)](https://twitter.com/search?f=tweets\u0026q=%40SilasKieser%20%23metagenomeAtlas\u0026src=typd) --\u003e\r\n\r\n\r\nMetagenome-atlas is a easy-to-use metagenomic pipeline based on snakemake. It handles all steps from QC, Assembly, Binning, to Annotation.\r\n\r\n![scheme of workflow](resources/images/atlas_list.png?raw=true)\r\n\r\nYou can start using atlas with three commands:\r\n```\r\n    mamba install -y -c bioconda -c conda-forge metagenome-atlas={latest_version}\r\n    atlas init --db-dir databases path/to/fastq/files\r\n    atlas run all\r\n```\r\nwhere `{latest_version}` should be replaced by [![Version](https://anaconda.org/bioconda/metagenome-atlas/badges/version.svg)](https://anaconda.org/bioconda/metagenome-atlas)\r\n\r\n\r\n# Webpage\r\n\r\n[metagenome-atlas.github.io](https://metagenome-atlas.github.io/)\r\n\r\n# Documentation\r\n\r\nhttps://metagenome-atlas.readthedocs.io/\r\n\r\n[Tutorial](https://github.com/metagenome-atlas/Tutorial)\r\n\r\n# Citation\r\n\r\n\u003e ATLAS: a Snakemake workflow for assembly, annotation, and genomic binning of metagenome sequence data.  \r\n\u003e Kieser, S., Brown, J., Zdobnov, E. M., Trajkovski, M. \u0026 McCue, L. A.   \r\n\u003e BMC Bioinformatics 21, 257 (2020).  \r\n\u003e doi: [10.1186/s12859-020-03585-4](https://doi.org/10.1186/s12859-020-03585-4)\r\n\r\n\r\n# Developpment/Extensions\r\n\r\nHere are some ideas I work or want to work on when I have time. If you want to contribute or have some ideas let me know via a feature request issue.\r\n\r\n- Optimized MAG recovery (e.g. [Spacegraphcats](https://github.com/spacegraphcats/spacegraphcats))\r\n- Integration of viruses/plasmid that live for now as [extensions](https://github.com/metagenome-atlas/virome_atlas)\r\n- Add statistics and visualisations as in [atlas_analyze](https://github.com/metagenome-atlas/atlas_analyze)\r\n- Implementation of most rules as snakemake wrapper\r\n- Cloud execution\r\n- Update to new Snakemake version and use cool reports.\r\n","organization":"Snakemake-Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1183?version=1","name":"2.19.0","author":[],"descriptor_type":[]}]},{"id":"1189","url":"https://workflowhub.eu/workflows/1189","name":"AMR-Pathfinder","description":"This is an aggregation of the work done in [Seq4AMR](https://workflowhub.eu/projects/110) consisting of the following workflows:\r\n\r\n- [WF1: AbritAMR / AMRFinderPlus](https://workflowhub.eu/workflows/634)\r\n- [WF2: Sciensano](https://workflowhub.eu/workflows/644) (**not currently included**)\r\n- [WF3: SRST2](https://workflowhub.eu/workflows/407) \r\n- [WF4: StarAMR](https://workflowhub.eu/workflows/470)\r\n\r\n## Installation\r\n\r\n- You will need to:\r\n    - run the [RGI Database Builder](https://my.galaxy.training/?path=?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fcard%2Frgi%2Frgi_database_builder%2F1.2.0) as a Galaxy admin (if this hasn't been done already)\r\n    - [Have the en_US.UTF-8 locale installed](https://github.com/galaxyproject/tools-iuc/issues/6467) on the compute nodes executing cast/melt jobs.\r\n    - Install the requisite tools with e.g. [`shed-tools`](https://ephemeris.readthedocs.io/en/latest/commands/shed-tools.html) command from the [`ephemeris`](https://ephemeris.readthedocs.io/en/latest/) suite: `shed-tools install -g https://galaxy.example.com -a API_KEY -t tools.yaml` (tools.yaml is provided in this repository.)\r\n- Then you can import this workflow\r\n    - Navigate to `/workflows/import` of your Galaxy server\r\n    - Select \"GA4GH servers\"\r\n    - Enter `name:\"AMR-Pathfinder\"`\r\n- And run it\r\n    - You must provide a Sequencing collection (list:paired of fastq files)\r\n    - And a Genomes collection (list of fasta files) \r\n    - Both of these should use **identical** collection element identifiers\r\n\r\n## Outputs\r\n\r\nThis will produce two important tables: \"Binary Comparison\" and a \"% Identity Scored Outputs\". \r\n\r\n### Binary comparison\r\n\r\nThis file reports the discovery or absence of specific AMR genes across all tested AMR Analysis tools. You will mostly see 1s (presence) or 0s (absence) but you may occasionally see higher numbers when an AMR tool reports multiple hits for a specific gene.\r\n\r\n### % Identity Scored Outputs\r\n\r\nThis is similar to binary comparison, but using the % identity reported by each AMR tool. For cases where multiple hits were detected, we take the highest.\r\n\r\n## Known Issues\r\n\r\nThe names for identified AMR genes is highly inconsistent across AMR analysis tools. We urge the AMR community to rectify this by standardising gene names used in their tooling.","organization":"ErasmusMC Clinical Bioinformatics, Seq4AMR","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1189?version=1","name":"Version 4.6","author":["Helena Rasche","Dennis Dollée","Birgit Rijvers"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1189?version=2","name":"Version 4.7","author":["Helena Rasche","Dennis Dollée","Birgit Rijvers"],"descriptor_type":["GALAXY"]}]},{"id":"1190","url":"https://workflowhub.eu/workflows/1190","name":"haploid-variant-calling-wgs-pe/main","description":"Workflow for variant analysis against a reference genome in GenBank format","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1190?version=1","name":"v0.1","author":["Anton Nekrutenko"],"descriptor_type":["GALAXY"]}]},{"id":"1191","url":"https://workflowhub.eu/workflows/1191","name":"Multiomic ALS signatures highlight sex differences and molecular subclusters and identify the MAPK pathway as therapeutic target","description":"This repository contains the analytical pipeline for the MAXOMOD project, which focuses on the multi-omic analysis of axono-synaptic degeneration in the motor neuron disease amyotrophic lateral sclerosis (ALS). The project explores sex differences and molecular subclusters in ALS and investigates the MAPK pathway as a potential therapeutic target.\r\n\r\nFor a detailed understanding of the scientific background and the findings, refer to our paper published on [Nature Communications](https://www.nature.com/articles/s41467-024-49196-y).\r\n\r\n## Table of Contents\r\n\r\n- [Getting Started](#getting-started)\r\n  - [Prerequisites](#prerequisites)\r\n  - [Data Preparation](#data-preparation)\r\n  - [Organize Data](#organize-data)\r\n- [Reproducing Results](#reproducing-results)\r\n- [Contributing](#contributing)\r\n- [Collaboration](#collaboration)\r\n\r\n\r\n## Getting Started\r\n### Prerequisites\r\n- Git\r\n- [DVC](https://dvc.org/)\r\n- [Nextflow](https://www.nextflow.io/)\r\n- Container execution engine (e.g. [Docker](https://www.docker.com/) or [Podman](https://podman.io/))\r\n\r\nClone the git repository:\r\n\r\n```\r\ngit clone https://github.com/imsb-uke/MAXOMOD_Pipeline.git ./maxomod\r\n```\r\n\r\nEnter the cloned directory:\r\n\r\n   ``` cd maxomod ```\r\n\r\n### Data Preparation\r\n\r\nHuman sequencing data: [EGAS00001007318](https://ega-archive.org/datasets/) [due to patient data, access is restricted]\r\n\r\nMouse sequencing data: [GSE234246](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234246)\r\n\r\nProteomics data: [PXD043300](https://proteomecentral.proteomexchange.org/cgi/GetDataset?ID=PXD043300)\r\n\r\nPhosphoproteomics data: [PXD043297](https://proteomecentral.proteomexchange.org/cgi/GetDataset?ID=PXD043297)\r\n\r\n### Organize Data\r\n\r\nAll data should be organized in datasets using the following structures\r\n\r\n```\r\ndatasets/\r\n    consortium/\r\n        \u003cmodel\u003e/\r\n            01_received_data/\r\n                \u003comic\u003e/\r\n                cohort/\r\n```\r\n\r\nThe pipeline expects `fastq.gz` files for the sequencing data, `txt` files for the proteomics data and `csv` files for the phosphoproteomics data.\r\n\r\nPlease, use DVC to see, which exact file names are required:\r\n\r\n```bash\r\ndvc status srna_organize_samples proteomics_organize_samples phosphoproteomics_organize_samples rnaseq_nextflow\r\n```\r\n\r\n#### Automatic download (optional)\r\n\r\nTo automatically download the RNAseq \u0026 miRNAseq data we provide a download script, which can be executed using the following commands:\r\n\r\n```bash\r\ndvc unfreeze sra_prefetch sra_fastq_dump sra_organize\r\ndvc repro\r\n```\r\n\r\n\r\n## Reproducing Results\r\n\r\nTo reproduce the analysis results, execute the following command:\r\n\r\n```bash\r\ndvc repro\r\n```\r\n\r\nThis command will run the predefined pipelines to process and analyze the data according to the methodology described in the associated publication.\r\nAll steps will be executed in a docker container automatically using the `docker_wrapper.sh` script. All docker images will be automatically downloaded and are available in the [Packages section](https://github.com/orgs/imsb-uke/packages?repo_name=MAXOMOD_Pipeline) on GitHub.\r\n\r\n## Contributing\r\nWe welcome contributions to enhance the reproducibility and scope of the analysis.\r\n\r\n## Collaboration\r\n\r\nFor questions or collaboration offers, please contact the project's principal investigators via email provided on the MAXOMOD project page: [MAXOMOD Contact Information](https://www.gesundheitsforschung-bmbf.de/de/maxomod-multi-omische-analyse-axono-synaptischer-degeneration-bei-motoneuronerkrankungen-9409.php).\r\n\r\n\r\n","organization":"MAXOMOD","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1191?version=1","name":"main @ 82f5e3e","author":["Fabian Hausmann"],"descriptor_type":[]}]},{"id":"1194","url":"https://workflowhub.eu/workflows/1194","name":"goseq/main","description":"This workflow is used for GO and KEGG enrichment analysis using GOseq tools.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1194?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1195","url":"https://workflowhub.eu/workflows/1195","name":"CAELESTIS mechanical testing simulation, using SVR model, 8 n_samples, and 2 MN5 nodes, ALYA_PROCS 28","description":"This experiment has been executed using:\r\n  - Model: SVR (test_SVR.yaml)\r\n  - n_samples: 8 (number of ALYA simulations to be run)\r\n  - Nodes used in the MN5 supercomputer: 2\r\n  - ALYA_PROCS = 28\r\n  \r\nAdapted version of a workflow used in CAELESTIS for mechanical testing simulation which was developed by Riccardo Cecco (Workflows and Distributed Computing Group, BSC) with the guidance of Gerard Guillamet (Dual Technologies Research Group, BSC) and Aravind Sasikumar (AMADE Research UDG). The workflow is distributed with a singularity container described by Fernando Vazquez (Workflows and Distributed Computing Group, BSC) and created with the Image Creation Service developed by Jorge Ejarque (Workflows and Distributed Computing Group, BSC) at eFlows4HPC project.\r\n","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1195?version=1","name":"COMPSs 3.3.1","author":["Fernando Vázquez-Novoa","Jorge Ejarque","Rosa M Badia"],"descriptor_type":[]}]},{"id":"1197","url":"https://workflowhub.eu/workflows/1197","name":"SC24 Lysozyme in water full (4 MPI processes)","description":"Lysozyme in water full COMPSs application example, used during the Supercomputing 24 conference at the COMPSs Tutorial session.","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1197?version=1","name":"Version 1","author":["Javier Conejero"],"descriptor_type":[]}]},{"id":"1198","url":"https://workflowhub.eu/workflows/1198","name":"CAELESTIS mechanical testing simulation, using Decision Tree regressor model, 8 n_samples, and 4 MN5 nodes, ALYA_PROCS 56","description":"This experiment has been executed using:\r\n  - Model: Decision Tree regressor (test.yaml)\r\n  - n_samples: 8 (number of ALYA simulations to be run)\r\n  - Nodes used in the MN5 supercomputer: 4\r\n  - ALYA_PROCS = 56\r\n\r\nAdapted version of a workflow used in CAELESTIS for mechanical testing simulation which was developed by Riccardo Cecco (Workflows and Distributed Computing Group, BSC) with the guidance of Gerard Guillamet (Dual Technologies Research Group, BSC) and Aravind Sasikumar (AMADE Research UDG). The workflow is distributed with a singularity container described by Fernando Vazquez (Workflows and Distributed Computing Group, BSC) and created with the Image Creation Service developed by Jorge Ejarque (Workflows and Distributed Computing Group, BSC) at eFlows4HPC project.\r\n","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1198?version=1","name":"COMPSs 3.3.1","author":["Fernando Vázquez-Novoa","Jorge Ejarque","Rosa M Badia"],"descriptor_type":[]}]},{"id":"1199","url":"https://workflowhub.eu/workflows/1199","name":"Taxonomy classification using Kraken2 and Bracken","description":"The aim of this workflow is to handle the routine part of shotgun metagenomics data processing. The workflow is using the tools Kraken2 and Bracken for taxonomy classification and the KrakenTools to evaluate diversity metrics. This workflow was tested on Galaxy Australia. \r\nA How-to guide for the workflow can be found at:  https://github.com/vmurigneu/kraken_howto_ga_workflows/blob/main/pages/taxonomy_kraken2_wf_guide.md  ","organization":"QCIF Bioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1199?version=1","name":"Version 1","author":["Valentine Murigneux","Mike Thang"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1199?version=2","name":"Version 2","author":["Valentine Murigneux","Mike Thang"],"descriptor_type":["GALAXY"]}]},{"id":"1200","url":"https://workflowhub.eu/workflows/1200","name":"GALOP - Genome Assembly using Long reads Pipeline","description":"# GALOP - Genome Assembly using Long reads Pipeline\r\n\r\nThis repository contains an exact copy of the standard Genoscope long reads assembly pipeline.\r\n\r\nAt the moment, this is not intended for users to download as it uses grid submission commands that will only work at Genoscope. As time goes on, we intend to make this pipeline available to a broader audience. However, genome assembly and polishing commands are accessible in the `lib/assembly.py` and `lib/polishing.py` files.\r\n\r\n```\r\ngalop.py -h\r\nMandatory arguments:\r\n  --step {assembly,polishing}\r\n                        Defines if the program will launch assembly or polishing scripts (default: None)\r\n\r\nAssembly step arguments:\r\n  --proj PROJECT_CODE, -p PROJECT_CODE\r\n                        Project and material codes, can be given multiple times (eg. -p BCM,A,B -p BWW,AB)\r\n                        (default: None)\r\n  -i INPUT_FILE         Nanopore reads fastq file (default: )\r\n  --size GENOME_SIZE, -s GENOME_SIZE\r\n                        Estimated size of the genome in Mb (default: None)\r\n  --cov READSET_COVERAGE, -c READSET_COVERAGE\r\n                        Coverage to use for longest and filtlong subsets (default: 30)\r\n  --assemblers ASSEMBLER_LIST\r\n                        Comma-separated list of assemblers to use (e.g. '--assemblers\r\n                        Smartdenovo,Raven,Wtdbg2'will not launch flye nor Necat. Choices: Flye, Hifiasm, Necat,\r\n                        Nextdenovo, Raven, Shasta,Smartdenovo, Wtdbg2 (default:\r\n                        Smartdenovo,Wtdbg2,Flye,Necat,Nextdenovo)\r\n  --readsets READSET_LIST\r\n                        Comma-separated list of readsets to use (e.g. '--readsets Filtlong,Longest' will not\r\n                        launch assemblies with all reads (default: Full,Filtlong,Longest)\r\n  --no-readset          Disables readset creation (default: False)\r\n  --all-readsets        Disables the use of lsRunProj to check for readset validity and instead use all available\r\n                        readsets (default: False)\r\n  --force               Skips directory creation (default: False)\r\n  --nano-raw            Use --nano-raw instead of --nano-hq in Flye (default: False)\r\n  --pacbio              Look for PacBio runs when building readsets. (default: False)\r\n\r\nPolishing step arguments:\r\n  --model MEDAKA_MODEL, -m MEDAKA_MODEL\r\n                        Model to use for medaka polishing (default: r941_prom_sup_g507)\r\n  --pe1 PE1_PATH        Path to the Illumina R1 file (.gz or .fastq) (default: None)\r\n  --pe2 PE2_PATH        Path to the Illumina R2 file (.gz or .fastq) (default: None)\r\n  --assembly ASSEMBLY, -a ASSEMBLY\r\n                        FULL PATH to the assembly to polish (default: )\r\n  --assembly_dir ASSEMBLY_DIR\r\n                        FULL PATH to the directory ouput of the 'nanopore_assembly_pipeline --step assembly'\r\n                        (default: )\r\n  --racon               Enables the racon step (default: False)\r\n  --no_medaka           Skip the medaka step (default: False)\r\n\r\nOptional arguments:\r\n  --dir OUTPUT_DIRECTORY, -d OUTPUT_DIRECTORY\r\n                        Output directory (default: None)\r\n  --help, -h            Show this help message and exit\r\n\r\nSubmission arguments:\r\n  --submode {msub,local}\r\n                        Either submit using ccc_msub or run in local mode (default: msub)\r\n  --nolaunch            Creates submission scripts but does not launch them (default: False)\r\n  --account ACCOUNT     Account to use for submission (default: bistace)\r\n  --qos {long,week,nolimit,xlarge,xxlarge}\r\n                        QoS to use for submission (default: )\r\n  --assembly_queue {normal,xlarge,small,broadwell,xxlarge}\r\n                        Cluster queue to use for the assembly step (default: normal)\r\n  --assembly_core ASSEMBLY_CORE_NUMBER\r\n                        Number of cores to use for the assembly step (default: 36)\r\n  --polishing_queue {normal,xlarge,small,broadwell,xxlarge}\r\n                        Cluster queue to use for the polishing step (default: normal)\r\n  --polishing_core POLISHING_CORE_NUMBER\r\n                        Number of cores to use for the polishing step (default: 36)\r\n  --wait                Wait for all jobs to finish before exiting (default: False)\r\n```\r\n","organization":"Bioinformatics Laboratory for Genomics and Biodiversity (LBGB), ERGA Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1200?version=1","name":"main @ aa63fa8","author":["Benjamin Istace","Jean-Marc Aury","Caroline Belser"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1200?version=2","name":"main @ a1c22db","author":["Benjamin Istace","Jean-Marc Aury","Caroline Belser"],"descriptor_type":[]}]},{"id":"1201","url":"https://workflowhub.eu/workflows/1201","name":"rnaseq-de/main","description":"This workflow can only work on an experimental setup with exactly 2 conditions. It takes two collections of count tables as input and performs differential expression analysis. Additionally it filters for DE genes based on adjusted p-value  and log2 fold changes thresholds. It also generates informative plots.\n","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1201?version=1","name":"v0.1","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1201?version=2","name":"v0.2","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1201?version=3","name":"v0.3","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1201?version=4","name":"v0.4","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1201?version=5","name":"v0.5","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/1201?version=6","name":"v0.7","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/1201?version=7","name":"v0.8","author":["Pavankumar Videm"],"descriptor_type":["GALAXY"]}]},{"id":"1202","url":"https://workflowhub.eu/workflows/1202","name":"Proximity interactomes of human PRICKLE1, PRICKLE2 and PRICKLE3.","description":"KNIME workflow describing the analysis of mass spectrometry dataset related to the publication \"Armed with PRICKLE(3)s: Stabilizing WNT/PCP complexes against RNF43-mediated ubiquitination\". Workflow was built using the KNIME software container environment, version 4.7.7a, which can be created using \"docker pull cfprot/knime:4.7.7a\" command in Docker. The input data for the KNIME workflow (the report.tsv from DIA-NN) can be found on PRIDE repository under the identifier PXD057854.","organization":"Proteomics CEITEC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1202?version=1","name":"Version 1","author":["Kristina Gomoryova"],"descriptor_type":[]}]},{"id":"1205","url":"https://workflowhub.eu/workflows/1205","name":"ONT Artificial Deletion Filter-Delter","description":"# ONT Artificial Deletion Filter-Delter\r\nA tool to filter short artificial deletion variations by Oxford Nanopore Technologies (ONT) R9 and R10 flow cells and chemistries.\r\n## Requirements\r\nThe tool has been tested on Ubuntu 20.04 with 256GB RAM, 64 CPU cores and a NVIDIA GPU with 48GB RAM. The minimal requirements should be \u003e= 64GB RAM and a NVIDIA GPU with \u003e= 8GB RAM. Other operating systems like Windows or Mac were not tested.\r\n\r\nONT softwares like [Guppy](https://community.nanoporetech.com/downloads), [Tombo](https://github.com/nanoporetech/tombo), and [ont-fast5-api](https://github.com/nanoporetech/ont_fast5_api) should be pre-installed before generating Tombo-resquiggled single-read fast5 files.\r\nUsers might run following commands to preprocess R9 fast5 files in shell terminal before running our pipeline. As these steps below need GPU support and might take a long time, our pipeline doesn't contain them.\r\n```bash\r\n#===basecalling the fast5 files===\r\nont-guppy/bin/guppy_basecaller -c ont-guppy/data/dna_r9.4.1_450bps_sup.cfg -i $fast5dir/barcode${barcode} -s guppy_sup_basecalled/barcode${barcode} -r --compress_fastq -x cuda:1,2 --gpu_runners_per_device 4 --chunks_per_runner 256 --num_callers 3 --fast5_out\r\n\r\n#===preprocessing R9 fast5 files===\r\nc=$(ls *.fast5 | wc -l)\r\ndeclare -i count=$c-1\r\n#multiread fast5 to single read fast5\r\nmulti_to_single_fast5 -i guppy_sup_basecalled/barcode${barcode}/workspace -s fast5_pass_single --threads 24 \r\n#copy to new directory\r\ncd fast5_pass_single\r\nmkdir all_single_fast5s\r\nfor ((j=0;j\u003c=$count;j=j+1))\r\ndo\r\n  echo $j\r\n  cp -r ./$j/*.fast5 all_single_fast5s\r\n  rm -rf ./$j\r\ndone\r\n#align to reference genome via tombo resquiggle\r\ntombo resquiggle guppy_sup_basecalled/barcode${barcode}/workspace/fast5_pass_single/all_single_fast5s data/Refs/$refseq --processes 24 --overwrite --num-most-common-errors 5  --failed-reads-filename tombo_resquiggle_failed_fast5.txt\r\n```\r\nThe fast5 files in the directory named **all_single_fast5s** could be employed in downstream workflow, which equals the input parameter **Tombo_dir** in shell command line or config yaml (details listed in **Configure input parameters** section). \r\n## Installation\r\nThe tool runs via Snakemake workflows. Users must install workflow dependencies including [Snakemake](https://snakemake.readthedocs.io/en/latest/tutorial/tutorial.html) (**Version \u003e= 7.3**) and [R](https://mirrors.tuna.tsinghua.edu.cn/CRAN/) before using the pipeline. The workflow dependencies, which stored in a file named environment.yaml, are listed as below:\r\n\r\nchannels:\r\n  - conda-forge\r\n  - bioconda\r\n  - anaconda\r\n\r\ndependencies:\r\n  - snakemake-minimal \u003e=7.3\r\n  - graphviz\r\n  - seaborn\r\n  - numpy\r\n  - pandas\r\n  - h5py\r\n  - scipy\r\n  - samtools =1.15\r\n  - r-essentials\r\n  - r-base\r\n  - bioconductor-shortread\r\n  - r-stringr\r\n  - r-dplyr\r\n  - r-vegan \r\n\r\nUsers are suggested to use [Conda](https://docs.conda.io/en/latest/) or [Mamba](https://mamba.readthedocs.io/en/latest/user_guide/mamba.html) to install these dependencies. After users download the working directory containing all the necessary files, the following shell command could install **Delter** in a **conda** environment in less than half an hour.\r\n```bash\r\ncd /path/to/Delter/working/directory\r\nconda env create --name Delter --file environment.yaml\r\n```\r\nor\r\n```bash\r\ncd /path/to/Delter/working/directory\r\nconda create --name Delter\r\nconda activate Delter\r\nconda install -c bioconda -y snakemake-minimal\u003e=7.3\r\nconda install -c anaconda -y numpy pandas \r\nconda install -c anaconda -y h5py seaborn\r\nconda install -c conda-forge -y scipy\r\nconda install -c bioconda -y samtools=1.15\r\nconda install -c conda-forge -y r-essentials r-base\r\nconda install -c conda-forge -y r-dplyr r-vegan r-stringr\r\nconda install -c bioconda -y bioconductor-shortread\r\n```\r\n\r\n## Activate and exit the environment\r\nTo activate the environment \r\n  ```bash\r\n  conda activate Delter\r\n  ```\r\nTo exit the environment (after finishing the usage of the pipeline), just execute\r\n  ```bash\r\n  conda deactivate\r\n  ```\r\n## Run the pipeline\r\nThe whole pipeline could handle ONT R9 and R10 sequencing data. The working directory contains file named `Delter.config.yaml`, which stores key input parameters for the workflow. For handling the VCF file(s) generated by [LoFreq](https://csb5.github.io/lofreq/), the **Delter.py** script should be used. For VCF file(s) generated by [Clair3](https://github.com/HKU-BAL/Clair3), the **Delter_clair3.py** script should be used.\r\n\r\nThe Demo data could be accessed via [figshare](https://doi.org/10.6084/m9.figshare.26093869.v1). User should modify the filepaths in the Delter.config.yaml in the snakemakeexample directory.\r\n\r\n### Configure input parameters for the workflow\r\nThere are two ways to configure input parameters for this workflow.\r\n\r\n(1) Via shell command line\r\n\r\nUsers could define customized input paramaters using **--config** option in Snakemake command line.\r\n```bash\r\nUsage:\r\nsnakemake -s Delter.py --cores 8 --config Ref=refname Num=5 Vcf=path/to/VCF Refseq=path/to/refseq Outdir=path/to/outputdir Bam=path/to/sorted/bam Tombo_dir=path/to/tombo_processed/fast5 Subsample=2000 Flowcell=R9 Strategy=Direct MRPPthres=0.001 HomoQthres=23 OtherQthres=20.6\r\nRef=refname                             The value of #CHROM in vcf file, e.g., 'Ref=chr1'\r\nNum=5                                   The number of bases up- and down-stream that are centered around the variation loci, default=5\r\nVcf=path/to/VCF                         The file path to vcf file, e.g., 'Vcf=/data/res/lofreq.vcf'\r\nRefseq=path/to/refseq                   The file path to reference sequence, e.g., 'Refseq=/database/COVID-19.fa'\r\nOutdir=path/to/outputdir                The file path storing the output results and intermediate files, e.g., 'Outdir=/data/res'\r\nBam=path/to/sorted/bam                  The file path to sorted bam files, e.g., 'Bam=/data/res/sorted.bam'\r\nTombo_dir=path/to/tombo_processed/fast5 The file path to tombo-resquiggled single fats5 files, e.g., 'Tombo_dir=/data/fast5'\r\nSubsample=2000                          The number to subsample from reads covering variation loci, should be larger than 200, default=2000\r\nFlowcell=R9                             The version of flow cell, should be R9 or R10, default=R9\r\nStrategy=Direct                         The sequencing strategy, should be Amplicon or Direct, default=Direct\r\nMRPPthres=0.001                         The threshold of MRPP A, default=0.001\r\nHomoQthres=23                           The threshold of homo-dels, default=23\r\nOtherQthres=20.6                        The threshold of other-dels, default=20.6\r\n```\r\n(2) Edit config.yaml\r\n\r\nUsers could also define customized input paramaters by editing config.yaml.\r\n```yaml\r\nBam: \"/public/data1/yefq/data/fast5/20220703_WGA_twist/processed/20230426_Guppy621_comparison/Sce20_guppy_sup_aligned.softclip_trimmed.endtrim10_minimap2_align.mapped.sorted.bam\"\r\nRef: \"Zymo_Saccharomyces_cerevisiae_Seq5_ref\"\r\nNum: \"5\"\r\nTombo_dir: \"/public/data1/yefq/data/fast5/20220703_WGA_twist/processed/20230426_guppy_sup_basecalled/Sce20/workspace/fast5_pass_single/all_single_fast5s\"\r\nSubsample: \"2000\"\r\nVcf: \"/public/data1/yefq/data/fast5/20220703_WGA_twist/processed/20230426_Guppy621_comparison/Sce20_guppy_sup_aligned.test.vcf\" \r\nRefseq: \"/public/data1/yefq/data/Refs/Zymo_Saccharomyces_cerevisiae_Seq5_ref.fa\" \r\nOutdir: \"/public/data1/yefq/data/fast5/20220703_WGA_twist/processed/20230426_Guppy621_comparison/snakemake-tutorial/data/test\" \r\nFlowcell: \"R9\"\r\nStrategy: \"Direct\"\r\nMRPPthres: \"0.001\"\r\nHomoQthres: \"23\"\r\nOtherQthres: \"20.6\"\r\n```\r\nUsers should note that, **config values can be overwritten via the command line** even when it has deen defined in the config.yaml.\r\n### Start a run\r\nOnce the work directory and configuration files are set up, users can run the pipeline as easy as invoking:\r\n```bash\r\ncd /path/to/Delter/working/directory\r\nconda activate Delter\r\nsnakemake -s Delter.py --cores 8\r\n```\r\nOther Snakemake-related parameters like **--cores** and **--configfile** could be checked via \r\n```bash\r\nsnakemake -h\r\n```\r\n### Output\r\nThere are several outputs according to the indexes used. \r\n\r\n**(1) target.upstream_downstream.bases.comparison.result.txt**. When the workflow used MRPP A, the main output is **target.upstream_downstream.bases.comparison.result.txt**, which contains (1) the variation locus position, (2) group1 (plus.match or minus.match, corresponding to forward-aligned reads supporting the reference allele and reverse-aligned reads supporting the reference allele), (3) group2 (plus.del or minus.del, corresponding to forward-aligned reads supporting the non-reference allele and reverse-aligned reads supporting the non-reference allele), (4) the number of reads supporting group1 (**should always be around or higher than 400 in direct sequencing**), (5) the number of reads supporting group2 (**should always be around or higher than 400 in direct sequencing**), (6) the mean current measurements of upstream and downstream config[\"Num\"] bases centered around variation locus of group1, (7) the mean current measurements of upstream and downstream config[\"Num\"] bases centered around variation locus of group2, (8) P values between current measurements of group1 and group2, (9) MRPP P values, (10) **MRPP A statistic, users could compare this value against the pre-set threshold (WTA/Amplicon sequencing: 0.01; direct sequencing: 0.001) in our article to decide whether the variation locus is artificial**.\r\n\r\n**(2) fq.Qscore.info.txt**. For R9 or R10 data, when sequencing depth is low, Q score might be used to identify artificial deletions. The main output is **fq.Qscore.info.txt**, which contains (1) the variation locus position, (2) group1 (corresponding to forward-aligned reads supporting the non-reference allele and reverse-aligned reads supporting the non-reference allele), (3) group2 (plus.match or minus.match, corresponding to corresponding to forward-aligned reads supporting the reference allele and reverse-aligned reads supporting the reference allele), (4) the number of reads supporting group1 (**should always be ≥20**), (5) the number of reads supporting group2 (**should always be ≥20**), (6) the mean Q scores of upstream and downstream config[\"Num\"] bases centered around variation locus of group1, **users could compare this value against the pre-set threshold in our article to decide whether the variation locus is artificial**, (7) the mean Q scores of upstream and downstream config[\"Num\"] bases centered around variation locus of group2, (8) the P values between group1 and group2.\r\n\r\n**(3) variant.info.txt**. This file stores basic information of each variation output by VCF, which is used by the workflow. The 2th-10th columns are identical to VCF. Users should note that DP4 could be lower than DP, and **choosing to use MRPP A or Q score mainly depends on DP4 field**. The 12th-17th columns represent the location of deletion (homo or non-homo), the 1-based strating position, the 0-based starting position of homo or non-homo region, the 0-based ending position of homo or non-homo region, the deletion length output by Variation caller, and the length of homo or non-homo region (= 15th-14th+1). Our workflow use a strict criteria to extract reads with and without deletions. For example, if a deletion lacks 3 bases relative to the reference, then reads supporting the non-reference allele should only contain 3-base deletions. Therefore, **some deletions may be omitted due to undesirable read numbers**.     \r\n\r\n**(4) MRPP.filtered.txt** or **Qscore.filtered.txt**. Users should note that before comparing the results to pre-set thresholds, they are strongly recommended to filter the 4th and 5th columns in the **target.upstream_downstream.bases.comparison.result.txt** or/and **fq.Qscore.info.txt** according to our article, or the result may be biased due to low sequencing depth. We have provided two accessory scripts bundled in the workflow to pre-filter the results based on sequencing depth and then to compare with user-defined threshold(s). Position(s) with flags marked as \"FP\" are predicted artificial deletions. If neither of the above two files are generated, it indicates that none of potential variations could be removed due to very low sequencing depth or very low threshold(s).\r\n\r\n\r\n","organization":"NkuyfqLab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1205?version=1","name":"main @ be917a7","author":["Qiang Ye"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/1205?version=2","name":"main @ 9ed07fa","author":["Qiang Ye"],"descriptor_type":["SMK"]}]},{"id":"1206","url":"https://workflowhub.eu/workflows/1206","name":"plant2human workflow","description":"# plant2human workflow 🌾 ↔ 🕺\r\n\r\n![GitHub last commit (branch)](https://img.shields.io/github/last-commit/yonesora56/plant2human/main)\r\n![Status](https://img.shields.io/badge/status-development-yellow)\r\n[![cwltool](https://img.shields.io/badge/cwltool-3.1.20250110105449-success)](https://github.com/common-workflow-language/cwltool/releases/tag/3.1.20250110105449)\r\n[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/yonesora56/plant2human/blob/main/LICENSE)\r\n[![Version](https://img.shields.io/badge/version-3.0-brightgreen)](https://github.com/yonesora56/plant2human/releases/tag/v3.0)\r\n[![Open in Dev Containers](https://img.shields.io/static/v1?label=Dev%20Containers\u0026message=python3.13\u0026color=blue\u0026logo=docker)](https://github.com/yonesora56/plant2human/tree/main/.devcontainer)\r\n[![X (@sorayone56)](https://img.shields.io/badge/X-sorayone56-black?style=flat\u0026logo=x\u0026logoColor=white)](https://x.com/sorayone56)\r\n\r\n## Introduction\r\n\r\nThis analysis workflow is centered on [foldseek](https://github.com/steineggerlab/foldseek), which enables fast structural similarity searches and supports the discovery of understudied genes by comparing plants, which are distantly related species, with humans, for which there is a wealth of information.\r\nBased on the list of genes you are interested in, you can easily create a scatter plot of **“structural similarity vs. sequence similarity”** by retrieving structural data from the [AlphaFold protein structure database (AFDB)](https://alphafold.ebi.ac.uk/).\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 📣 Report\r\n\r\n- ✔ 2025-12-13: main workflow update! [`plant2human_v3_stringent.cwl` (recommend)](https://github.com/yonesora56/plant2human/blob/main/Workflow/plant2human_v3_stringent.cwl) and [`plant2human_v3_permissive.cwl`](https://github.com/yonesora56/plant2human/blob/main/Workflow/plant2human_v3_permissive.cwl)\r\n- ✔ 2026-01-21: Published in [Bioinformatics Advances](https://doi.org/10.1093/bioadv/vbag013)! The workflow version corresponding to the paper is available on WorkflowHub: [main @ b1c1e73 (latest)](https://doi.org/10.48546/WORKFLOWHUB.WORKFLOW.1206.10). We will keep updating it as the project evolves!\r\n- ✔ 2026-03-22: [`Workflow/plant2human_v3_stringent.cwl`](https://github.com/yonesora56/plant2human/blob/main/Workflow/plant2human_v3_stringent.cwl) is update!\r\n- ✔ 2026-03-22: Update *Oryza sativa* 100 genes (Ensembl plants release 62) test example\r\n- ✔ 2026-03-24: Update *Arabidopsis thaliana* 100 genes (Ensembl plants release 62) test example ([details](https://github.com/yonesora56/plant2human/blob/main/test/arabidopsis_test_100genes_202603/README.md))\r\n- ✔ 2025-03-24: Update *Zea mays* 100 genes (Ensembl plants release 62) test example ([details](https://github.com/yonesora56/plant2human/blob/main/test/zea_mays_test_100genes_202603/README.md))\r\n- ✔ 2025-03-24: Update *Solanum lycopersicum* 100 genes (Ensembl plants release 62) test example ([details](https://github.com/yonesora56/plant2human/blob/main/test/solanum_lycopersicum_test_100genes_202603/README.md))\r\n- ✔ 2025-03-24: Update *Glycine max* 100 genes (Ensembl plants release 62) test example ([details](https://github.com/yonesora56/plant2human/blob/main/test/glycine_max_test_100genes_202603/README.md))\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 🔧 Implementation Background\r\n\r\nIn recent years, with the [AlphaFold protein structure database](https://alphafold.ebi.ac.uk/), it has become easier to obtain protein structure prediction data and perform structural similarity searches even for plant species such as rice. Against this background, searching for hits with **“low sequence similarity and high structural similarity”** for the gene groups being focused on has become possible. This approach may allow us to discover proteins that are conserved in distantly related species and to consider the characteristics of these proteins based on the wealth of knowledge we have about humans.\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 📈 Analysis Environment\r\n\r\n### Prerequisites\r\n\r\n- Docker / Orbstack\r\n- [cwltool](https://github.com/common-workflow-language/cwltool) \u003e= 3.1.20250110105449\r\n\r\n\u003e **📝 Note:** This workflow is based on Common Workflow Language (CWL). Please see the [Official Document](https://www.commonwl.org/user_guide/)\r\n\r\n\u0026nbsp;\r\n\r\n### ⚠️  Prerequisites (Python Environment)\r\n\r\nI've already checked python 3.11 and packages version below.\r\nPlease install the following packages beforehand!\r\n\r\n(Using [Development Containers](https://github.com/devcontainers/spec) makes it easy to reproduce your execution environment!)\r\n\r\n```python3\r\npolars==1.39.2\r\nmatplotlib==3.10.8\r\nseaborn==0.13.2\r\nunipressed==1.4.0\r\npapermill==2.7.0\r\n```\r\n\r\n\u0026nbsp;\r\n\r\n### **Using Dev Containers (Docker and VScode extension)**\r\n\r\nMost processes, such as Foldseek, use container ([BioContainers](https://quay.io/organization/biocontainers)), but some involve processing with jupyter notebook, which requires the preparation of some python libraries (e.g., polars.).\r\nIf you want to experiment with a simple workflow, you can create an analysis environment easily using [Dev Containers](https://github.com/yonesora56/plant2human/blob/main/.devcontainer/devcontainer.json) system, a VScode extension.\r\nUsing this environment, the version of the python library is probably the one used during development, so errors are unlikely to occur (see [Dockerfile](https://github.com/yonesora56/plant2human/blob/main/.devcontainer/Dockerfile) for the package version).\r\n\r\nPlease check the official website for Dev Container details.\r\n- [Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers)\r\n- [Development Containers](https://containers.dev/)\r\n\r\n\u0026nbsp;\r\n\r\n### The machine used for testing (2026-03-20)\r\n\r\n- Machine: 🍎 MacBook Pro 🍎\r\n- Chip: Apple M3 Max\r\n- memory: 128 GB\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 🌾 Analysis Example ( *Oryza sativa subsp.japonica* 100 genes vs *Homo sapiens*) 🌾 (ver. 2026-03-20)\r\n\r\nHere, we will explain how to use the list of 100 rice genes as an example.\r\n\r\n\u0026nbsp;\r\n\r\n## 0. Clone Repository\r\n\r\n```bash\r\ngit clone https://github.com/yonesora56/plant2human.git\r\ncd ./plant2human/\r\n```\r\n\r\n\u0026nbsp;\r\n\r\n## 1. Creation of a TSV file of gene and UniProt ID correspondences 🧬\r\n\r\nFirst, you need the following [gene list tsv file](https://github.com/yonesora56/plant2human/blob/main/test/oryza_sativa_test_100genes_202603/oryza_sativa_random_100genes_list.tsv).\r\n\r\n**📝 Note:** Please set the column name as \"From\"\r\n\r\n```tsv\r\nFrom\r\nOs12g0269700\r\nOs10g0410900\r\nOs05g0403000\r\nOs06g0127250\r\nOs02g0249600\r\nOs09g0349700\r\nOs03g0735150\r\nOs08g0547350\r\nOs06g0282400\r\nOs05g0576750\r\nOs07g0216600\r\nOs10g0164500\r\nOs07g0201300\r\nOs01g0567200\r\nOs05g0563050\r\nOs03g0660050\r\nOs11g0436450\r\n...\r\n```\r\n\r\n\u0026nbsp;\r\n\r\nThe following [TSV file](https://github.com/yonesora56/plant2human/blob/main/test/oryza_sativa_test_100genes_202603/os_100_genes_idmapping_all.tsv) is required to execute the following workflow.\r\n\r\n\u003e **📝 Note:** Network access required in this process!\r\n\r\n```tsv\r\nFrom\tUniProt Accession\r\nOs01g0104800\tA0A0N7KC66\r\nOs01g0104800\tQ657Z6\r\nOs01g0104800\tQ658C6\r\nOs01g0152300\tQ9LGI2\r\nOs01g0322300\tA0A9K3Y6N1\r\nOs01g0322300\tQ657N1\r\nOs01g0567200\tA0A0N7KD66\r\nOs01g0567200\tQ657K0\r\nOs01g0571133\tA0A0P0V4A8\r\nOs01g0664500\tA0A8J8XFG3\r\nOs01g0664500\tQ5SN58\r\nOs01g0810800\tA0A8J8XDQ1\r\nOs01g0810800\tB7FAC9\r\nOs01g0875300\tA0A0P0VB72\r\nOs01g0924300\tA0A0P0VCB7\r\n...\r\n```\r\n\r\nTo do this, you need to follow the CWL workflow command below.\r\nThis YAML file is the parameter file for the workflow, for example.\r\n\r\n**📁 Where to save:** Place your YAML file in the `job/` directory.\r\n\r\n\u0026nbsp;\r\n\r\n## YAML Template for UniProt ID Mapping\r\n\r\nBelow is a template YAML file for the UniProt ID mapping process.\r\nCopy this template and modify the parameters marked with `# \u003c-- CHANGE THIS!`.\r\n\r\n**Example file:** [`job/os_100genes_uniprot_idmapping.yml`](https://github.com/yonesora56/plant2human/blob/main/job/os_100genes_uniprot_idmapping.yml)\r\n\r\n```YAML\r\n# ---------- OUTPUT SETTINGS ----------\r\n# Output notebook filename (string)\r\noutput_notebook_name: \"your_species_uniprot_idmapping.ipynb\"  # \u003c-- CHANGE THIS!\r\n\r\n# ---------- INPUT FILE ----------\r\n# Your gene list TSV file (column header must be \"From\")\r\ngene_id_file:\r\n  class: File                                # \u003c-- DO NOT CHANGE\r\n  format: edam:format_3475  # \u003c-- DO NOT CHANGE\r\n  location: ./path/to/your_gene_list.tsv     # \u003c-- CHANGE THIS! (path to your gene list)\r\n\r\n# ---------- UniProt API SETTINGS ----------\r\n# For plant species, use \"Ensembl_Genomes\" as query database\r\nuniprot_api_query_db: \"Ensembl_Genomes\"  # \u003c-- DO NOT CHANGE (for plants)\r\nuniprot_api_target_db: \"UniProtKB\"       # \u003c-- DO NOT CHANGE\r\n\r\n# ---------- OUTPUT DIRECTORY/FILE NAMES ----------\r\n# Directory for AlphaFold info JSON files\r\njson_dir_name: \"your_species_afinfo_json\"           # \u003c-- CHANGE THIS!\r\n\r\n# Structure file format: \"cifUrl\" for mmCIF file format (recommended), \"pdbUrl\" for PDB file format\r\ndata_url: \"cifUrl\"                                  # \u003c-- Usually DO NOT CHANGE\r\n\r\n# Directory for downloaded structure files\r\nstructure_dir_name: \"your_species_mmcif\"            # \u003c-- CHANGE THIS!\r\n\r\n# Output TSV filename for ID mapping results\r\nid_mapping_all_file_name: \"your_species_idmapping_all.tsv\"  # \u003c-- CHANGE THIS!\r\n```\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## Command Execution Example\r\n\r\n```bash\r\n# test date: 2026-03-20\r\ncwltool --debug --outdir ./test/oryza_sativa_test_100genes_202603/ \\\r\n./Tools/01_uniprot_idmapping.cwl \\\r\n./job/os_100genes_uniprot_idmapping.yml\r\n```\r\n\r\nIn this execution, [mmcif files](https://github.com/yonesora56/plant2human/tree/main/test/oryza_sativa_test_100genes_202603/os_100_genes_mmcif) are also retrieved from AlphaFold Database (version 6).\r\nThe execution results are output with the [jupyter notebook format](https://github.com/yonesora56/plant2human/blob/main/test/oryza_sativa_test_100genes_202603/oryza_sativa_100_genes_uniprot_idmapping.ipynb).\r\n\r\n---\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 2. Creating and Preparing Indexes 📂\r\n\r\nI'm sorry, but the [main workflow](https://github.com/yonesora56/plant2human/blob/main/Workflow/plant2human_v3_stringent.cwl) does not currently include the creation of an index process (both for protein structure (foldseek index) and protein sequence (BLAST index)).\r\nPlease perform the following processes in advance.\r\n\r\n\u0026nbsp;\r\n\r\n## ⚠️ Important: Database Version Compatibility ⚠️\r\n\r\nThis workflow uses data from the **AlphaFold Protein Structure Database (AFDB) version 6**. Due to recent database updates (v4 → v6, October 2025), users should be aware of potential version mismatches between different data components.\r\n\r\n## Understanding the Version Issue\r\n\r\n| Component | AFDB Version | Source |\r\n|-----------|-----------------|--------|\r\n| Query structures (your plant proteins) | **v6** | AlphaFold Database API |\r\n| ⚠️ Foldseek pre-built index | **v4** | `foldseek databases` command |\r\n| ⭐ Foldseek index (built myself) | **v6** | FTP download from AFDB |\r\n| `sequences.fasta` (for BLAST index) | **v6** | FTP download from AFDB |\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## Two Main Workflow Options\r\n\r\nWe provide two workflow variants to address this version compatibility issue:\r\n\r\n| Workflow | Index Source | AFDB index Version Match | Database Options | Use Case |\r\n|----------|--------------|:-------------:|------------------|----------|\r\n| **`plant2human_v3_permissive.cwl`** | `foldseek databases` (pre-built) | ❌ v4 vs v6 | UniProt50, Swiss-Prot, Proteome, etc. | Exploratory analysis (swiss-prot,TrEMBL) |\r\n| **`plant2human_v3_stringent.cwl`** | `foldseek createdb` (self-built) | ✅ v6 = v6 | Human proteome only | Rigorous analysis |\r\n\r\n\u0026nbsp;\r\n\r\n### Option 1: Permissive Mode\r\n\r\n**Pros:**\r\n- Easy setup with `foldseek databases` command\r\n- Access to diverse databases (UniProt50, Swiss-Prot, etc.)\r\n\r\n**Cons:**\r\n- Version mismatch between query (v6) and index (v4)\r\n- Some proteins may have updated structures in v6 that differ from v4\r\n\r\n**When to use:** broad searches (including swiss-prot, TrEMBL)\r\n\r\n➡️ **Go to:** [2-1a. Creating a Foldseek Index (Option 1: Permissive Mode))](#2-1a-creating-a-foldseek-index-option-1-permissive-mode)\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n### Option 2: Stringent Mode (Recommended)\r\n\r\n**Pros:**\r\n- Full version consistency (v6 query ↔ v6 index)\r\n- Smaller index size (Human proteome only: ~24,000 proteins)\r\n- Reproducible results with matched database versions\r\n\r\n**Cons:**\r\n- Requires manual download and index creation\r\n- Limited to Human proteome only\r\n\r\n**When to use:** Final analysis for publications, when version consistency is critical\r\n\r\n➡️ **Go to:** [2-1b. Creating Index (Stringent Mode)](#2-1b-creating-a-foldseek-index-option-2-stringent-mode)\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 2-1. Creating a Foldseek Index for structural alignment\r\n\r\n## 2-1a. Creating a Foldseek Index (Option 1: Permissive Mode)\r\n\r\nIn this workflow, the target of the structural similarity search is specified as the AlphaFold database to perform comparisons across a broader range of species.\r\nIndex creation using the `foldseek databases` command is through the following command.\r\n\r\nPlease select the database you want to use from `Alphafold/UniProt,` `Alphafold/UniProt50-minimal`, `Alphafold/UniProt50`, `Alphafold/Proteome,` `Alphafold/Swiss-Prot.`\r\n\r\n```bash\r\n# Supported databases in this workflow\r\nAlphafold/UniProt\r\nAlphafold/UniProt50-minimal\r\nAlphafold/UniProt50\r\nAlphafold/Proteome\r\nAlphafold/Swiss-Prot\r\n```\r\n\r\n\u0026nbsp;\r\n\r\nYou can check the details of this database using the following command.\r\n\r\n```bash\r\ndocker run --rm quay.io/biocontainers/foldseek:10.941cd33--h5021889_1 foldseek databases --help\r\n```\r\n\r\n\u0026nbsp;\r\n\r\nFor example, if you want to specify AlphaFold/Swiss-Prot as the index, you can do so with the following CWL file;\r\n\r\n```bash\r\n# execute creation of foldseek index using \"foldseek databases\"\r\n# test date: 2025-12-12\r\ncwltool --debug --outdir ./index/ \\\r\n./Tools/02_foldseek_database.cwl \\\r\n--database Alphafold/Swiss-Prot \\\r\n--index_dir_name index_swissprot \\\r\n--index_name swissprot \\\r\n--threads 16\r\n```\r\n---\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 2-1b. Creating a Foldseek Index (Option 2: Stringent Mode)\r\n\r\nIn this mode, you download structure files directly from AFDB v6 and create your own index.\r\nThis ensures version consistency between query and target structures.\r\n\r\n\u0026nbsp;\r\n\r\n### Step 1: Download Human proteome from AFDB v6\r\n\r\n```bash\r\n# Download date: 2026-03-20\r\n# file size is ~5GB\r\n\r\ncd ./index\r\n\r\n# curl\r\ncurl -O https://ftp.ebi.ac.uk/pub/databases/alphafold/v6/UP000005640_9606_HUMAN_v6.tar\r\n\r\n# or aria2c\r\naria2c -c --max-connection-per-server=4 \\\r\n--min-split-size=1M \\\r\n-o \"UP000005640_9606_HUMAN_v6.tar\" \\\r\n\"https://ftp.ebi.ac.uk/pub/databases/alphafold/v6/UP000005640_9606_HUMAN_v6.tar\"\r\n\r\ncd ../\r\n```\r\n\r\n### Step 2: Create Foldseek index using `foldseek createdb` command\r\n\r\n```bash\r\n# test date: 2026-03-20\r\n# foldseek version: https://github.com/steineggerlab/foldseek/releases/tag/10-941cd33\r\ncwltool --debug --outdir ./index/ \\\r\n./Tools/02_foldseek_createdb.cwl \\\r\n--input_structure_files ./index/UP000005640_9606_HUMAN_v6.tar \\\r\n--index_dir_name index_human_proteome_v6 \\\r\n--index_name human_proteome_v6 \\\r\n--threads 16\r\n```\r\n---\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 2-2. Creating a Index for protein \"sequence\" alignment (Common)\r\n\r\nAn index protein sequence FASTA file must be downloaded to obtain the amino acid sequence using the `blastdbcmd` command from the AlphaFold Protein Structure Database.\r\nThis workflow uses the version of the protein sequence that was used for structure prediction.\r\n\r\n\u003e**📝 Note:**: This FASTA file is extremely large (**\u003e 109GB !**), so it's probably best to delete FASTA file after creating the index.\r\n\r\n```bash\r\n# Preparation for BLAST index\r\n# test date: 2026-03-21\r\ncd ./index\r\n\r\n# curl\r\ncurl -O https://ftp.ebi.ac.uk/pub/databases/alphafold/sequences.fasta # AFDB version 6\r\n\r\n# or aria2c (recommend)\r\naria2c --continue=true \\\r\n--max-connection-per-server=4 \\\r\n--min-split-size=1M \\\r\nhttps://ftp.ebi.ac.uk/pub/databases/alphafold/sequences.fasta # AFDB version 6\r\n\r\n# rename\r\nmv sequences.fasta afdb_all_sequences_v6.fasta\r\ncd ../\r\n```\r\n\r\n\u0026nbsp;\r\n\r\n```bash\r\n# execute creation of BLAST index using \"makeblastdb\"\r\n# test date: 2026-03-21\r\ncwltool --debug \\\r\n--outdir ./index/ \\\r\n./Tools/03_makeblastdb.cwl \\\r\n--index_dir_name index_uniprot_afdb_all_sequences_v6 \\\r\n--input_fasta_file ./index/afdb_all_sequences_v6.fasta\r\n```\r\n\r\n\u0026nbsp;\r\n\r\n\u003e **📝 Note:** It is estimated to take 2~ hours for creating index.\r\n\u003e **This index is about  \u003e 150GB!**\r\n\u003e **We are currently investigating whether it can be executed by another method...**\r\n\r\n---\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 3. Execution of the `plant2human` workflow (main workflow)\r\n\r\n\u003e **📝 Note:** Network access required in this process!\r\n\r\n\u0026nbsp;\r\n\r\nIn this process, we perform a structural similarity search using the `foldseek easy-search` command and then perform a pairwise sequence alignment of the amino acid sequences of the hit pairs using the `needle` and `water` commands.\r\nFinally, based on this information, we create a scatter plot and output a jupyter notebook as a report.\r\n\r\n\u0026nbsp;\r\n\r\n\u003e **📝 Note:** For Permissive Mode (using pre-built indexes like Swiss-Prot), see [Workflow/README.md](https://github.com/yonesora56/plant2human/blob/main/Workflow/README.md).\r\n\r\n\u0026nbsp;\r\n\r\n## 📋 YAML Parameter File Reference (Stringent Mode)\r\n\r\nThe main workflow requires a YAML parameter file to specify input files and parameters.\r\nBelow is a detailed explanation of each parameter.\r\n\r\n**Example file (2026-03-22 update!):** [`job/plant2human_v3_stringent_example_os100.yml`](https://github.com/yonesora56/plant2human/blob/main/job/plant2human_v3_stringent_example_os100.yml)\r\n\r\n\u0026nbsp;\r\n\r\n### Input File Parameters\r\n\r\n| Parameter | Type | Description | Example |\r\n|-----------|------|-------------|---------|\r\n| `INPUT_DIRECTORY` | Directory | Directory containing mmCIF structure files from Step 1 | `../test/.../os_100_genes_mmcif/` |\r\n| `FILE_MATCH_PATTERN` | string | File pattern for structure files | `\"*.cif\"` |\r\n| `FOLDSEEK_INDEX` | File | Foldseek index created in Step 2-1b | `../index/index_human_proteome_v6/human_proteome_v6` |\r\n| `QUERY_IDMAPPING_TSV` | File | ID mapping TSV from Step 1 | `..._idmapping_all.tsv` |\r\n| `QUERY_GENE_LIST_TSV` | File | Original gene list TSV | `oryza_sativa_random_100genes_list.tsv` |\r\n\r\n\u0026nbsp;\r\n\r\n### Foldseek Parameters (`foldseek easy-search` command)\r\n\r\nfor more details, please execute the below command.\r\n\r\n```bash\r\ndocker run --rm quay.io/biocontainers/foldseek:10.941cd33--h5021889_1 \\\r\nfoldseek easy-search --help\r\n```\r\n\r\n| Parameter | Default | Description |\r\n|-----------|---------|-------------|\r\n| `COVERAGE_THRESHOLD` | `0.75` | (0~1) Coverage threshold for search results |\r\n| `COV_MODE` | `5` | (1,2,3,4,5) Coverage mode for search results. `5` means short sequence needs to be at least x% of the other seq. length |\r\n| `EVALUE` | `0.1` | E-value threshold for structural similarity search |\r\n| `ALIGNMENT_TYPE` | `2` | 0: 3Di only, 1: **TM-align (default)**, 2: 3Di+AA |\r\n| `THREADS` | `16` | Number of CPU threads |\r\n| `SPLIT_MEMORY_LIMIT` | `\"120G\"` | Memory limit for large searches |\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## YAML Template for Stringent Mode\r\n\r\nCopy and modify this template for your analysis:\r\n\r\n```YAML\r\n# ============================================================\r\n# YAML Parameter File for plant2human_v3_stringent.cwl\r\n# Species: [Your Species Name]\r\n# ============================================================\r\n\r\n# ---------- INPUT DIRECTORY ----------\r\nINPUT_DIRECTORY:\r\n  class: Directory\r\n  location: ./path/to/your_mmcif_directory/           # \u003c-- CHANGE THIS!\r\n\r\nFILE_MATCH_PATTERN: \"*.cif\"\r\n\r\n# ---------- FOLDSEEK INDEX (Stringent Mode) ----------\r\nFOLDSEEK_INDEX:\r\n  class: File\r\n  location: ../index/index_human_proteome_v6/human_proteome_v6  # \u003c-- Adjust path if needed\r\n  secondaryFiles:                                               # \u003c-- If you do not place the index in the “index” directory, you must specify the path to all generated index files! (This is generally not required.)\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_ca\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_ca.dbtype\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_ca.index\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_h\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_h.dbtype\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_h.index\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_mapping\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_ss\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_ss.dbtype\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6_ss.index\r\n    # No _taxonomy for self-built index\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6.dbtype\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6.index\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6.lookup\r\n    - class: File\r\n      location: ../index/index_human_proteome_v6/human_proteome_v6.source\r\n    # No .version for self-built index\r\n\r\n# ---------- FOLDSEEK DEFAULT PARAMETERS ----------\r\nCOVERAGE_THRESHOLD: 0.75\r\nCOV_MODE: 5\r\nEVALUE: 0.1\r\nALIGNMENT_TYPE: 1  # 1 = TM-align\r\nTHREADS: 16\r\nSPLIT_MEMORY_LIMIT: \"120G\"\r\n\r\n# ---------- EXTRACT ID COLUMNS ----------\r\nWF_COLUMN_NUMBER_QUERY_SPECIES: 1\r\nWF_COLUMN_NUMBER_HIT_SPECIES: 2\r\n\r\nQUERY_IDMAPPING_TSV:\r\n  class: File\r\n  format: edam:format_3475\r\n  location: ./path/to/your_idmapping_all.tsv          # \u003c-- CHANGE THIS!\r\n\r\nQUERY_GENE_LIST_TSV:\r\n  class: File\r\n  format: edam:format_3475\r\n  location: ./path/to/your_gene_list.tsv              # \u003c-- CHANGE THIS!\r\n```\r\n\r\n---\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n### Command Execution Example (Stringent Mode - Recommended)\r\n\r\n```bash\r\n# test date: 2026-03-22\r\ncwltool --debug --outdir ./test/oryza_sativa_test_100genes_202603/ \\\r\n./Workflow/plant2human_v3_stringent.cwl \\\r\n./job/plant2human_v3_stringent_example_os100.yml\r\n```\r\n\r\n\u0026nbsp;\r\n\r\nThe execution results are output with the [jupyter notebook](https://github.com/yonesora56/plant2human/blob/main/test/oryza_sativa_test_100genes_202603/plant2human_report.ipynb).\r\n\r\n\u003e **📝 Note:** For more detailed analysis or to modify the parameters in the figure, you can interactively operate this notebook again yourself!\r\n\u003e (2026-03-22) We have configured it to output the TSV files and scatter plot images generated in the Jupyter notebook!\r\n\u003e All result is generated as [TSV file](https://github.com/yonesora56/plant2human/blob/main/test/oryza_sativa_test_100genes_202603/foldseek_result_join_alignment_result_all.tsv)\r\n\r\n\u0026nbsp;\r\n\r\n\u003e **📝 Note:** For Permissive Mode (using pre-built indexes like Swiss-Prot), see [Workflow/README.md](https://github.com/yonesora56/plant2human/blob/main/Workflow/README.md).\r\n\r\n---\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## rice vs human result (strngent mode result) 🌾 ↔ 🕺\r\n\r\nFor example, you can visualize the results of structural similarity and global alignment, as shown below.\r\nIn this case, the x-axis represents the global alignment similarity match (%), and the y-axis represents the average lDDT score (an indicator of structural alignment).\r\n\r\nThe hit pairs in the upper-right plot indicate higher sequence similarity and structural similarity.\r\n\r\n![image](https://raw.githubusercontent.com/yonesora56/plant2human/main/test/oryza_sativa_test_100genes_202603/foldseek_result_similarity_percent_needle_lddt_all.png)\r\n\r\n\u0026nbsp;\r\n\r\nIn this case, the x-axis represents the local alignment similarity match (%), and the y-axis represents the average lDDT score (an indicator of structural alignment).\r\n\r\n![image](https://raw.githubusercontent.com/yonesora56/plant2human/main/test/oryza_sativa_test_100genes_202603/foldseek_result_similarity_percent_water_lddt_all.png)\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## After Filtering\r\n\r\nThe report notebook for the plant2human workflow also outputs scatter plots after applying the filtering conditions set in this workflow.\r\n\r\n## Filtering criteria\r\n\r\n1. structural alignment coverage \u003e= 50%\r\n2. If there are hits with the same target for the same gene-derived UniProt ID, the one with the highest qcov is selected, and if the qcov is the same, the one with the highest lDDT is selected.\r\n\r\n\u003e **📝 Note:** In this workflow, we leave the states with the same foldseek hit even if the query genes are different.\r\n\r\n3. Select hits that can be converted to Ensembl gene id and HGNC Gene nomenclature with [TogoID API](https://togoid.dbcls.jp/)\r\n\r\n\u0026nbsp;\r\n\r\nBy applying these filtering conditions, you can examine hit pairs that are easier to investigate!\r\n\r\n\u0026nbsp;\r\n\r\n## Global alignment (x-axis) (After Filtering)\r\n\r\n![image](https://raw.githubusercontent.com/yonesora56/plant2human/main/test/oryza_sativa_test_100genes_202603/foldseek_result_similarity_percent_needle_lddt_filter.png)\r\n\r\n\u0026nbsp;\r\n\r\n## local alignment (x-axis) (After Filtering)\r\n\r\n![image](https://raw.githubusercontent.com/yonesora56/plant2human/main/test/oryza_sativa_test_100genes_202603/foldseek_result_similarity_percent_water_lddt_filter.png)\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n\u0026nbsp;\r\n\r\n## 🌿 Running the Pipeline for Another Plant Species 🌿\r\n\r\nThis workflow can be applied to **any plant species** available in the [AlphaFold Protein Structure Database (AFDB)](https://alphafold.ebi.ac.uk/).\r\n\r\n\u0026nbsp;\r\n\r\n### Step 1: Check Species Availability in AFDB\r\n\r\nBefore running the pipeline, verify that your target plant species is available:\r\n\r\n1. Visit [AFDB Page](https://alphafold.ebi.ac.uk/)\r\n2. Search for your species name or UniProt proteome ID\r\n3. Confirm protein structures are available for your genes\r\n\r\n**📝 Note:** Most model organisms and many crop species are available in AFDB v6.\r\n\r\n\u0026nbsp;\r\n\r\n### Step 2: Prepare Your Gene List\r\n\r\nCreate a TSV file with column header \"From\" containing your gene IDs:\r\n\r\n| Species | Gene ID Format | Example |\r\n|---------|----------------|---------|\r\n| *Oryza sativa* | RAP-DB format | `Os01g0104800` |\r\n| *Arabidopsis thaliana* | TAIR format | `AT1G01010` |\r\n| *Zea mays* | Ensembl format | `Zm00001eb000010` |\r\n| *Solanum lycopersicum* | Ensembl format | `Solyc01g005000` |\r\n| *Glycine max* | Ensembl format | `Glyma.01G000100` |\r\n\r\n\u0026nbsp;\r\n\r\n### Step 3: Create YAML Parameter Files\r\n\r\n1. **Copy the template** from [Section 1 (UniProt ID Mapping)](#yaml-template-for-uniprot-id-mapping) and [Section 3 (Main Workflow)](#yaml-template-for-stringent-mode)\r\n2. **Modify the paths and filenames** marked with `# \u003c-- CHANGE THIS!`\r\n3. **Save your YAML files** in the `job/` directory\r\n\r\n\u0026nbsp;\r\n\r\n### Step 4: Execute the Workflow\r\n\r\nFollow the same steps as described in Sections 1-3, using your custom YAML files.\r\n\r\n\u0026nbsp;\r\n\r\n### 📚 Example Implementations for Other Species\r\n\r\nWe provide complete examples for multiple plant species. Use these as references:\r\n\r\n| Species | Test Directory | YAML Files |\r\n|---------|----------------|------------|\r\n| *Arabidopsis thaliana* | [`test/arabidopsis_test_100genes_202603/`](https://github.com/yonesora56/plant2human/blob/main/test/arabidopsis_test_100genes_202603/README.md) | [`job/at_100genes_*.yml`](https://github.com/yonesora56/plant2human/tree/main/job) |\r\n| *Zea mays* | [`test/zea_mays_test_100genes_202603/`](https://github.com/yonesora56/plant2human/blob/main/test/zea_mays_test_100genes_202603/README.md) | [`job/zm_100genes_*.yml`](https://github.com/yonesora56/plant2human/tree/main/job) |\r\n| *Solanum lycopersicum* | [`test/solanum_lycopersicum_test_100genes_202603/`](https://github.com/yonesora56/plant2human/blob/main/test/solanum_lycopersicum_test_100genes_202603/README.md) | [`job/sl_100genes_*.yml`](https://github.com/yonesora56/plant2human/tree/main/job) |\r\n| *Glycine max* | [`test/glycine_max_test_100genes_202603/`](https://github.com/yonesora56/plant2human/blob/main/test/glycine_max_test_100genes_202603/README.md) | [`job/gm_100genes_*.yml`](https://github.com/yonesora56/plant2human/tree/main/job) |\r\n\r\n---\r\n\r\n\r\n","organization":"bonohulab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1206?version=1","name":"main @ b8c0b1d","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1206?version=2","name":"main @ 1aa2763","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1206?version=3","name":"main @ 76a6471","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1206?version=4","name":"main @ 044221e","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1206?version=5","name":"main @ 11b46d8","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"6","url":"https://workflowhub.eu/workflows/1206?version=6","name":"main @ 6911e7a","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"7","url":"https://workflowhub.eu/workflows/1206?version=7","name":"main @ 10d8268","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"8","url":"https://workflowhub.eu/workflows/1206?version=8","name":"main @ ad71cdb","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"9","url":"https://workflowhub.eu/workflows/1206?version=9","name":"main @ fc8edcd","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"10","url":"https://workflowhub.eu/workflows/1206?version=10","name":"main @ b1c1e73","author":["Sora Yonezawa"],"descriptor_type":["CWL"]},{"id":"11","url":"https://workflowhub.eu/workflows/1206?version=11","name":"main @ 62a2b67","author":["Sora Yonezawa"],"descriptor_type":["CWL"]}]},{"id":"1207","url":"https://workflowhub.eu/workflows/1207","name":"pseudobulk-worflow-decoupler-edger/main","description":"This workflow uses the decoupler tool in Galaxy to generate pseudobulk counts from an annotated AnnData file obtained from scRNA-seq analysis. Following the pseudobulk step, differential expression genes (DEG) are calculated using the edgeR tool. The workflow also includes data sanitation steps to ensure smooth operation of edgeR and minimizing potential issues. Additionally, a Volcano plot tool is used to visualize the results after the DEG analysis.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1207?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1207?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1208","url":"https://workflowhub.eu/workflows/1208","name":"Ecoregionalization workflow (Part 1)","description":"Part 1 of ecoregionalization workflow\r\n\r\nThis workflow allows you to create an ecoregionalization map from occurrences and environmental data using a boosted regression trees model for predictions.\r\n\r\nThe workflow is intended for processing occurrence data, which should include latitude, longitude and species presence or absence. You can use example test data available with the workflow, highlighting a use case centered on the Dumont d'Urville sea region and benthic invertebrates. The primary goal of this workflow is to generate species distribution maps and identify ecoregions within the study area. The project's objective is to offer accessible, reproducible, and transparent IT solutions for processing and analyzing species occurrence data.\r\n\r\nThis workflow is linked to the Galaxy training ecoregionalization tutorial. (https://ecology.usegalaxy.eu/training-material/topics/ecology/tutorials/Ecoregionalization_tutorial/tutorial.html)\r\n\r\nFor practical use the ecoregionalization workflow is split in two worklows and this is the first part. ","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1208?version=1","name":"Version 1","author":["Pauline Seguineau"],"descriptor_type":["GALAXY"]}]},{"id":"1209","url":"https://workflowhub.eu/workflows/1209","name":"Ecoregionalization workflow (Part 2)","description":"Second part of the ecoregionalization\r\n\r\nThis workflow allows you to create an ecoregionalization map from occurrences and environmental data using a boosted regression trees model for predictions.\r\n\r\nThe workflow is intended for processing occurrence data, which should include latitude, longitude and species presence or absence. You can use example test data available with the workflow, highlighting a use case centered on the Dumont d'Urville sea region and benthic invertebrates. The primary goal of this workflow is to generate species distribution maps and identify ecoregions within the study area. The project's objective is to offer accessible, reproducible, and transparent IT solutions for processing and analyzing species occurrence data.\r\n\r\nThis workflow is linked to the Galaxy training ecoregionalization tutorial. (https://ecology.usegalaxy.eu/training-material/topics/ecology/tutorials/Ecoregionalization_tutorial/tutorial.html)\r\n\r\nFor practical use the ecoregionalization workflow is split in two worklows and this is the second part. You can find the first part here : https://workflowhub.eu/workflows/1208?version=1","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1209?version=1","name":"Version 1","author":["Pauline Seguineau"],"descriptor_type":["GALAXY"]}]},{"id":"1210","url":"https://workflowhub.eu/workflows/1210","name":"Test at SC24","description":"","organization":"COMPSs Tutorials","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1210?version=1","name":"Version 1","author":["Daniele Lezzi"],"descriptor_type":[]}]},{"id":"1216","url":"https://workflowhub.eu/workflows/1216","name":"clinicalmp-database-generation/main","description":"The workflow begins with the Database Generation process. The Galaxy-P team has developed a workflow that collects protein sequences from known disease-causing microorganisms to build a comprehensive database. This extensive database is then refined into a smaller, more relevant dataset using the Metanovo tool.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1216?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1217","url":"https://workflowhub.eu/workflows/1217","name":"nf-core/metapep","description":"From metagenomes to peptides","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1217?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1218","url":"https://workflowhub.eu/workflows/1218","name":"clinicalmp-verification/main","description":"In proteomics research, verifying detected peptides is essential for ensuring data accuracy and biological relevance. This tutorial continues from the clinical metaproteomics discovery workflow, focusing on verifying identified microbial peptides using the PepQuery tool.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1218?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1219","url":"https://workflowhub.eu/workflows/1219","name":"clinicalmp-data-interpretation/main","description":"This workflow will perform taxonomic and functional annotations using Unipept and statistical analysis using MSstatsTMT.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1219?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1221","url":"https://workflowhub.eu/workflows/1221","name":"EnrichDO: a Global Weighted Model for Disease Ontology Enrichment Analysis","description":"# EnrichDO\r\n\r\n***EnrichDO*** is a double weighted iterative model by integrating the DO graph topology on a global scale. It was based on the latest annotations of the human genome with DO terms, and double weighted the annotated protein-coding genes. On one hand, to reinforce the saliency of direct gene-DO annotations, different initial weights were assigned to directly annotated genes and indirectly annotated genes, respectively. On the other hand, to detect locally most significant node between the parent and its children, less significant nodes were dynamically down-weighted.\r\n\r\n***EnrichDO*** exhibits higher accuracy that often yield more specific significant DO terms, which alleviate the over enriched problem. The input data are the protein-coding genes of the human genome, using the ENTREZID format of NCBI.\r\n\r\n## Installation\r\n\r\nTo install this package, start R (version \"4.4\"), BiocManager (version \"3.20\") and enter:\r\n\r\n``` r\r\nif (!require(\"BiocManager\", quietly = TRUE))\r\n    install.packages(\"BiocManager\")\r\n\r\n##Release version\r\nBiocManager::install(\"EnrichDO\")\r\n\r\n\r\n## Devel version\r\nBiocManager::install(version='devel')\r\nBiocManager::install(\"EnrichDO\")\r\n```\r\n\r\n**Note:** for other R and BiocManager versions, need to manually download the source code in the Bioconductor website for installation.\r\n\r\n## Example\r\n\r\nAfter installation, check vignettes with:\r\n\r\n``` r\r\nbrowseVigenttes(\"EnrichDO\")\r\n```\r\n\r\n**Run cases** are stored in inst/scripts/EnrichDO_exampleTest.R\r\n\r\nThe **input data case** is stored at inst/extdata/Alzheimer_curated.csv\r\n\r\n**Output example** of enrichment result is available in inst/examples/result.txt\r\n\r\nThe **thesis data** is in thesisData folder (\u003chttps://github.com/liangcheng-hrbmu/EnrichDO/tree/devel/thesisData\u003e) and extdata_interpretation.txt explains the data source.\r\n","organization":"EnrichDO","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1221?version=1","name":"devel @ 729fe3e","author":["Liang Cheng"],"descriptor_type":[]}]},{"id":"1222","url":"https://workflowhub.eu/workflows/1222","name":"deepvariant-nextflow","description":"# Nextflow Pipeline for DeepVariant\r\n\r\nThis repository contains a Nextflow pipeline for Google’s DeepVariant, optimised for execution on NCI Gadi.\r\n\r\n## Quickstart Guide\r\n\r\n1. Edit the `pipeline_params.yml` file to include:\r\n    - `samples`:  a list of samples, where each sample includes the sample name, BAM file path (ensure corresponding .bai is in the same directory), path to an optional regions-of-interest BED file (set to `''` if not required), and the model type.\r\n    - `ref`: path to the reference FASTA (ensure corresponding .fai is in the same directory).\r\n    - `output_dir`: directory path to save output files.\r\n    - `nci_project`, `nci_storage` : NCI project and storage.\r\n\r\n3. Update `nextflow.config` to match the resource requirements for each stage of the pipeline. For NCI Gadi, you may need to adjust only `time` and `disk` (i.e. jobfs) parameters based on the size of the datasets used (the default values are tested to be suitable for a dataset of ~115GB in size).\r\n\r\n4. Load the Nextflow module and run the pipeline using the following commands:\r\n    ```bash\r\n    module load nextflow/24.04.1\r\n    nextflow run main.nf -params-file pipeline_params.yml\r\n    ```\r\n\r\n    Note: Additional Nextflow options can be included (e.g., `-resume` to resume from a previously paused/interrupted run)\r\n\r\n5. For each sample, output files will be stored in the directory `output_dir/sample_name`.\r\n\r\n## Notes  \r\n\r\n1. It is assumed that the user has access to NCI's `if89` project (required for using DeepVariant via `module load`). If not, simply request access using this [link](https://my.nci.org.au/mancini/project/if89).\r\n\r\n## Case Study\r\n\r\nA case study was conducted using a ~115GB BAM alignment file from a HG002 ONT whole genome sequencing (WGS) dataset to evaluate the runtime and service unit (SU) efficiency of *deepvariant-nextflow* compared to the original DeepVariant running on a single node. The benchmarking results are summarised in the table below.\r\n\r\n\u003ctable\u003e\r\n    \u003cthead\u003e\r\n        \u003ctr\u003e\r\n            \u003cth\u003eVersion\u003c/th\u003e\r\n            \u003cth\u003eGadi Resources\u003c/th\u003e\r\n            \u003cth\u003eRuntime (hh:mm:ss)\u003c/th\u003e\r\n            \u003cth\u003eSUs\u003c/th\u003e\r\n        \u003c/tr\u003e\r\n    \u003c/thead\u003e\r\n    \u003ctbody\u003e\r\n        \u003ctr\u003e\r\n            \u003ctd rowspan=2\u003eOriginal DeepVariant\u003c/td\u003e\r\n            \u003ctd\u003e\u003ccode\u003egpuvolta\u003c/code\u003e (24 CPUs, 2 GPUs, 192 GB memory)\u003c/td\u003e\r\n            \u003ctd\u003e05:07:21\u003c/td\u003e\r\n            \u003ctd\u003e368.82\u003c/td\u003e\r\n        \u003c/tr\u003e\r\n        \u003ctr\u003e\r\n            \u003ctd\u003e\u003ccode\u003egpuvolta\u003c/code\u003e (48 CPUs, 4 GPUs, 384 GB memory)\u003c/td\u003e\r\n            \u003ctd\u003e03:18:31\u003c/td\u003e\r\n            \u003ctd\u003e476.44\u003c/td\u003e\r\n        \u003c/tr\u003e\r\n        \u003ctr\u003e\r\n            \u003ctd rowspan=2\u003e\u003ci\u003edeepvariant-nextflow\u003c/i\u003e\u003c/td\u003e\r\n            \u003ctd\u003e\u003ccode\u003enormal\u003c/code\u003e (48 CPUs, 192 GB memory) → \u003ccode\u003egpuvolta\u003c/code\u003e (12 CPUs, 1 GPU, 96 GB memory) → \u003ccode\u003enormalbw\u003c/code\u003e (28 CPUs, 256 GB memory) \u003c/td\u003e\r\n            \u003ctd\u003e03:21:01\u003c/td\u003e\r\n            \u003ctd\u003e237.33\u003c/td\u003e\r\n        \u003c/tr\u003e\r\n        \u003ctr\u003e\r\n            \u003ctd\u003e\u003ccode\u003enormalsr\u003c/code\u003e (104 CPUs, 500 GB memory) → \u003ccode\u003egpuvolta\u003c/code\u003e (12 CPUs, 1 GPU, 96 GB memory) → \u003ccode\u003enormalbw\u003c/code\u003e (28 CPUs, 256 GB memory) \u003c/td\u003e\r\n            \u003ctd\u003e02:04:35\u003c/td\u003e\r\n            \u003ctd\u003e199\u003c/td\u003e\r\n        \u003c/tr\u003e\r\n    \u003c/tbody\u003e\r\n\u003c/table\u003e\r\n\r\n### Notes\r\n- Negligible runtime/SU values for the `DRY_RUN` stage (\u003c1 minute/\u003c1 SU) have been excluded from the results.\r\n- Total queueing times, which were similar across all cases, have been omitted.\r\n\r\n## Acknowledgments\r\n\r\nThe *deepvariant-nextflow* workflow was developed by Dr Kisaru Liyanage and Dr Matthew Downton (National Computational Infrastructure), with support from Australian BioCommons as part of the Workflow Commons project.\r\n\r\nWe thank Leah Kemp (Garvan Institute of Medical Research) for her collaboration in providing test datasets and assisting with pipeline testing.","organization":"National Computational Infrastructure (NCI) WorkflowHub team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1222?version=1","name":"main @ cb940b3","author":["Kisaru Liyanage","Matthew Downton"],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1222?version=2","name":"main @ 7387167","author":["Kisaru Liyanage","Matthew Downton"],"descriptor_type":["NFL"]}]},{"id":"1223","url":"https://workflowhub.eu/workflows/1223","name":"M6Allele","description":"# M6Allele Pipeline \u0026 M6Allele algorithm\r\n\r\n## Introduction\r\nWe have developed an algorithm called **M6Allele** for identifying allele-specific m6A modifications. To facilitate its usage by researchers, we have also encapsulated our analysis process into a pipeline. You can learn more about the pipeline and the algorithm's usage from the following two modules:\r\n* [Pipeline](#m6allele-pipeline)\r\n* [M6Allele algorithm](#m6allele-10)\r\n\r\n## M6Allele Pipeline\r\n### PARAMETER INTRODUCTION\r\n* `-g/--gtf` : required, the file name of your own GTF file\r\n* `-fa/--fasta` : required, the file name of your own reference genome file\r\n* `-sf/--skip_fastqc` : optional, whether to skip the fastqc phase. Default false\r\n* `-se/--single_end` : required, the fastq files are single-end or paired-end sequencing\r\n* `-vg/--varscan_or_gatk` : optional, use VarScan or GATK to call snp, v: VarScan, g: GATK. Default v\r\n* `-f/--function` : required, the function of M6Allele, including `AseGeneDetection`, `AsmPeakDetection`, `SampleSpecificASM`. Please refer to [M6Allele 1.0](#m6allele-10) for specific explanation\r\n* `-s/--sample` : required, the name of the file containing the sample name to be processed\r\n* `-gzip/--is_gzip` : required, whether the fastq file is compressed\r\n* `-db/--dbSnp` : optional, the name of dbSNP vcf file\r\n* `-h/--help` : help message of the pipeline\r\n\r\n### USAGE\r\n#### Overview\r\n1. Install [docker(v24.0.7)](https://www.docker.com/get-started/)\r\n2. Download a compressed docker image file from this [link](https://renlab.oss-cn-shenzhen.aliyuncs.com/M6Allele/m6allelepipe.tar.gz) and import it using the following command:\r\n   ```shell\r\n      cd your_compressed_file_directory\r\n      gunzip m6allelepipe.tar.gz\r\n      docker load -i m6allelepipe.tar\r\n   ```\r\n    * If you're unable to down the image from above link, you can download the required files for local packaging images from [here](https://renlab.oss-cn-shenzhen.aliyuncs.com/M6Allele/docker.tar.gz) and then build the image locally\r\n        ```shell\r\n            # build command\r\n            docker build -t your_image_name .\r\n        ```\r\n3. Assuming your current working directory is `your_work_directory`, you need to create the following subdirectories or files and place the required files:\r\n   * `fastq` : **required**, containing the fastq files you need to process\r\n   * `scripts` : **required**, containing three script files: main.sh, main.py and getPeakInMeT.R, which you can download from this [link](https://renlab.oss-cn-shenzhen.aliyuncs.com/M6Allele/scripts.tar.gz)\r\n   * `reference` : **required**, containing references files required for the processing workflow\r\n        * `your_gtf_file.gtf` : we provide our default fasta file, you can download it from [this](https://renlab.oss-cn-shenzhen.aliyuncs.com/M6Allele/gtfAndfasta.tar.gz)\r\n        * `your_reference_genome_file.fa` : we provide our default gtf file, you can download it from [this](https://renlab.oss-cn-shenzhen.aliyuncs.com/M6Allele/gtfAndfasta.tar.gz)\r\n        * If you want to use GATK for SNP calling, you need to provide the dbSNP dataset. Here, we provide our default dbSNP dataset. You can download the `GCF_000001405.39.dbsnp.vcf.gz`from [here](https://renlab.oss-cn-shenzhen.aliyuncs.com/M6Allele/GCF_000001405.39.dbsnp.vcf.gz) and `GCF_000001405.39.dbsnp.vcf.gz.tbi` files from [here](https://renlab.oss-cn-shenzhen.aliyuncs.com/M6Allele/GCF_000001405.39.dbsnp.vcf.gz.tbi) and place them in the `reference` folder. If you want to use a different version of the dbSNP dataset, please follow these steps:\r\n          * Download the GCF_XXX.vcf.gz and GCF_XXX.vcf.gz.tbi files from the [dbSNP database](https://ftp.ncbi.nih.gov/snp/latest_release/VCF/), and download the [chromosome conversion files](https://ftp.ncbi.nih.gov/genomes/all/GCF/000/001/405/) corresponding to the above-mentioned files. Process the downloaded dbSNP files using the following commands: \r\n              ```shell\r\n                # convert the chromosome names in the GCF_XXX.vcf.gz file to 1, 2, ..., X, Y\r\n                bcftools annotate --rename-chrs chromosome_conversion.txt --threads 10 -Oz -o your_new_process_dbsnp_file.vcf.gz your_downloaded_dbsnp_file.vcf.gz\r\n                # generate .tbi file\r\n                bcftools index -t your_new_process_dbsnp_file.vcf.gz\r\n              ```\r\n          * Move the resulting new dbSNP database `your_new_process_dbsnp_file.vcf.gz` and `your_new_process_dbsnp_file.vcf.gz.tbi` files to the `reference` folder\r\n          * You also need to provide .fai and .dict index files for the reference genome .fa file. If your fasta file was downloaded from the link we provided, you will also download the corresponding .fai and .dict files\r\n          * However, If you have your own fasta file, you can generate the corresponding .fai and .dict files using the following commands. Then move .fa and .dict file to `reference` folder\r\n            ``` shell\r\n                # Please ensure that the .fai, .dict, and .fa files have the same prefix in their filenames\r\n                # .fai generate command\r\n                samtools faidx your_reference_genome_file.fa\r\n                # .dict generate command\r\n                gatk CreateSequenceDictionary -R your_reference_genome_file.fa -O your_reference_genome_file.dict\r\n            ```\r\n   * `your_sample.txt`: **required**, containing the sample names you want to process, which are the prefixes of the fastq files. **Each line is separated by either space or tab.** According to the M6Allele function, there are three formats:\r\n        * `AseGeneDetection` : **Each line represents the name of an RNA-seq sample**. If there are multiple duplicates, use multiple lines to represent them\r\n        * `AsmPeakDetection` : **Each line consists of two columns, representing the INPUT sample name and the corresponding IP sample name for MeRIP-seq**. If there are multiple duplicates, use multiple lines to represent them\r\n        * `SampleSpecificASM` : **Each line consists of four columns, representing the INPUT sample name and the corresponding IP sample name for MeRIP-seq of sample 1, as well as the INPUT sample name and the corresponding IP sample name for MeRIP-seq of sample 2**. If there are multiple duplicates, use multiple lines to represent them\r\n\r\n### Specific Example:\r\nHere, we have listed several specific examples of using the pipeline. If you have other requirements, you can achieve them by combining different parameters.\r\n### 1. To use VarScan for calling SNPs and detecting ASE genes\r\n\r\n**data dependency:**\\\r\n`your_work_directory`: there are the following subfolders and files\r\n* `fastq` : It contains two files:\r\n  * input1.fastq.gz\r\n  * input2.fastq.gz\r\n* `scripts` : containing three script files: main.sh, main.py and getPeakInMeT.R\r\n* `reference` : \r\n  * your_fasta_file.fa\r\n  * your_gtf_file.gtf\r\n* `sample.txt` : It contains two lines:\r\n  * First line: input1\r\n  * Second line: input2\r\n\r\n**example:**\r\n```shell\r\n  docker run -v /path/to/your_work_directory:/data renlab303/m6allelepipe -f AseGeneDetection -s sample.txt -gzip true -se true -fa your_fasta_file.fa -g your_gtf_file.gtf\r\n```\r\n\r\n### 2. To use GATK for calling SNPs and detecting ASM m6A signals\r\n**data dependency:**\\\r\n`your_work_directory`: there are the following subfolders and files\r\n* `fastq` : It contains eight files: \r\n  * input1_1.fastq.gz\r\n  * input1_2.fastq.gz\r\n  * ip1_1.fastq.gz\r\n  * ip1_2.fastq.gz \r\n  * input2_1.fastq.gz\r\n  * input2_2.fastq.gz\r\n  * ip2_1.fastq.gz\r\n  * ip2_2.fastq.gz\r\n* `scripts` : containing three script files: main.sh, main.py and getPeakInMeT.R\r\n* `reference` :\r\n  * your_gtf_file.gtf\r\n  * your_fasta_file.fa\r\n  * your_fasta_file.fa.fai\r\n  * your_fasta_file.dict\r\n  * your_dbSNP_vcf_file.vcf.gz\r\n  * your_dbSNP_vcf_file.vcf.gz.tbi\r\n* `sample.txt` : It contains two lines\r\n  * input1\u0026emsp;ip1\r\n  * input2\u0026emsp;ip2\r\n\r\n**example:**\r\n```shell\r\n  docker run -v /path/to/your_work_directory:/data renlab303/m6allelepipe -f AsmPeakDetection -s sample.txt -gzip true -se false -g your_gtf_file.gtf -fa your_fasta_file.fa -vg g -db your_dbSNP_vcf_file.vcf.gz\r\n```\r\n\r\n### 3. To use GATK for calling SNPs and detecting Sample-specific ASM m6A signals\r\n**data dependency:**\\\r\n`your_work_directory`: there are the following subfolders and files\r\n* `fastq` : It contains eight files: \r\n  * sample1_input1.fastq.gz\r\n  * sample1_ip1.fastq.gz\r\n  * sample2_input1.fastq.gz\r\n  * sample2_ip1.fastq.gz\r\n  * sample1_input2.fastq.gz\r\n  * sample1_ip2.fastq.gz\r\n  * sample2_input2.fastq.gz\r\n  * sample2_ip2.fastq.gz \r\n* `scripts` : containing three script files: main.sh, main.py and getPeakInMeT.R\r\n* `reference` : It contains following files:\r\n    * your_gtf_file.gtf\r\n    * your_fasta_file.fa\r\n    * your_fasta_file.fa.fai\r\n    * your_fasta_file.dict\r\n    * your_dbSNP_vcf_file.vcf.gz\r\n    * your_dbSNP_vcf_file.vcf.gz.tbi\r\n* `sample.txt` : It contains two lines\r\n    * sample1_input1\u0026emsp;sample1_ip1\u0026emsp;sample2_input1\u0026emsp;sample2_ip1\r\n    * sample1_input2\u0026emsp;sample1_ip2\u0026emsp;sample2_input2\u0026emsp;sample2_ip2\r\n\r\n**example:**\r\n```shell\r\n  docker run -v /path/to/your_work_directory:/data renlab303/m6allelepipe -f SampleSpecificASM -s sample.txt -gzip true -se true -g your_gtf_file.gtf -fa your_fasta_file.fa -vg g -db your_sbSNP_vcf_file.vcf.gz\r\n```\r\n\r\n### Pipeline overview\r\n\r\nThis pipeline is built using shell scripts and integrates tools as follows:\r\n\r\n* **Quality control and preprocessing of raw data**\r\n    * [fastp](https://github.com/OpenGene/fastp): quality trimming and adapter clipping\r\n    * [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/): generate quality reports\r\n* **Build STAR index**\r\n  * [STAR](https://github.com/alexdobin/STAR): build index\r\n* **Read alignment**\r\n    * [STAR](https://github.com/alexdobin/STAR): Spliced Transcripts Alignment to a Reference\r\n    * [Samtools](http://www.htslib.org/): Reads sort and remove duplicates\r\n* **SNP calling**\r\n    * [VarScan](https://varscan.sourceforge.net/): Call SNPs from MeRIP-seq INPUT sample\r\n    * [bcftools](http://www.htslib.org/doc/1.1/bcftools.html): mark the result of VarScan\r\n    * [vcftools](https://vcftools.github.io/): filter the result of bcftools\r\n    * [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035531192-RNAseq-short-variant-discovery-SNPs-Indels): Call SNPs from MeRIP-seq INPUT sample\r\n* **Peak calling**\r\n    * [MeTPeak](https://github.com/compgenomics/MeTPeak): a novel, graphical model-based peak-calling method\r\n    * [BEDTools](https://bedtools.readthedocs.io/en/latest/): using \"mergeBed\" function\r\n* **ASE or ASM m6A detection**\r\n    * [M6Allele](#M6Allele 1.0): A toolkit for detection of allele-specific RNA N6-methyladenosine modifications \r\n\r\n## M6Allele 1.0\r\n\r\n### HARDWARE/SOFTWARE REQUIREMENTS\r\n* Java 1.8\r\n* Windows / Linux / Mac OS\r\n\r\n### INSTALLATION\r\n* clone the repo,\r\n```\r\ngit clone https://github.com/Jakob666/allele-specificM6A.git\r\n```\r\n* target JAR package\r\n```\r\ncd ./allele-specificM6A\r\n```\r\nmake sure the directory contains `M6Allele.jar`.\r\n\r\n### USAGE\r\n#### Overview\r\n#### Tools Introduction\r\nM6Allele.jar provides the following tools:\r\n\r\nTool | Function\r\n---|---\r\nAseGeneDetection|detect allele-specific expression (ASE) genes (one sample test)\r\nAsmPeakDetection|detect allele-specific modification (ASM) m6A signals  (one sample test)\r\nSampleSpecificASM|detect sample-specific ASM m6A signals  (paired sample test)\r\n\r\n#### parameters description\r\n1. **AseGeneDetection**\r\n   - `-vcf/--vcf_file` : required, VCF format file generate by RNA-seq or MeRIP-seq data SNP calling process\r\n   - `-g/--gtf` : required, GTF annotation file\r\n   - `-bam/--bam_files` : required, the alignment file of the FASTQ file, if you have multiple BAM files, please separate them with commas\r\n   - `-bai/--bai_files` : required, the index file of the bam file, If you have multiple BAI files, please separate them with commas\r\n   - `-o/--output` : optional, ASE gene test output file, default `./aseGene.txt`\r\n   - `-rc/--reads_coverage` : optional, reads coverage threshold using for filter RNA-seq or MeRIP-seq data SNVs in VCF file (aim for reducing FDR), default 10\r\n   - `-s/--sampling`: optional, MCMC sampling times, larger than 500, default 50000\r\n   - `-b/--burn` : optional, MCMC burn-in times, more than 100 and less than sampling times. Default 10000\r\n   - `-t/--thread` : optional, thread number for running test. Default 2\r\n   - `-h/--help` : help message of AseGeneDetection\r\n\r\n2. **AsmPeakDetection**\r\n   - `-bed/--peak_bed_file` : required, peak calling output result in BED format\r\n   - `-vcf/--vcf_file` : required, VCF format file generate by RNA-seq or MeRIP-seq data SNP calling process\r\n   - `-g/--gtf` : required, GTF annotation file\r\n   - `-inputBam/--input_bam_file` : required, the alignment file of the INPUT sample, if you have multiple BAM files, please separate them with commas\r\n   - `-inputBai/--input_bai_file` : required, the index file of the INPUT bam file, if you have multiple BAI files, please separate them with commas\r\n   - `-ipBam/--ip_bam_file` : required, the alignment file of the Ip sample\r\n   - `-ipBai/--ip_bai_file` : required, the index file of the Ip bam file\r\n   - `-o/--output` : optional, ASM m6A signal test output file, default `./asmPeak.txt`\r\n   - `-rc/--reads_coverage` : optional, reads coverage threshold using for filter RNA-seq or MeRIP-seq data SNVs in VCF file (aim for reducing FDR), default 10\r\n   - `-s/--sampling`: optional, MCMC sampling times, larger than 500, default 50000\r\n   - `-b/--burn` : optional, MCMC burn-in times, more than 100 and less than sampling times. Default 10000\r\n   - `-t/--thread` : optional, thread number for running test. Default 2\r\n   - `-h/--help` : help message of AsmPeakDetection\r\n   \r\n3. **SampleSpecificASM**\r\n   - `-s1Vcf/--sample1VcfFile` : required, VCF format file generate by sample1 RNA-seq or MeRIP-seq data SNP calling process\r\n   - `-s2Vcf/--sample2VcfFile` : required, VCF format file generate by sample2 RNA-seq or MeRIP-seq data SNP calling process\r\n   - `-bed/--mergePeakBedFile` : required, the merge result of peak calling for Sample 1 and Sample 2 in BED format\r\n   - `-g/--gtf` : required, GTF annotation file\r\n   - `-s1InputBam/--s1InputBamFiles` : required, the alignment file of the sample1 INPUT sample, if you have multiple BAM files, please separate them with commas\r\n   - `-s1InputBai/--s1InputBaiFiles` : required, the index file of the sample1 INPUT sample bam file, if you have multiple BAI files, please separate them with commas\r\n   - `-s2InputBam/--s2InputBamFiles` : required, the alignment file of the sample2 INPUT sample, if you have multiple BAM files, please separate them with commas\r\n   - `-s2InputBai/--s2InputBaiFiles` : required, the index file of the sample2 INPUT sample bam file, if you have multiple BAI files, please separate them with commas\r\n   - `-s1IpBam/--s1IpBamFiles` : required, the alignment file of the sample1 Ip sample, if you have multiple BAM files, please separate them with commas\r\n   - `-s1IpBai/--s1IpBaiFiles` : required, the index file of the sample1 Ip sample bam file, if you have multiple BAI files, please separate them with commas\r\n   - `-s2IpBam/--s2IpBamFiles` : required, the alignment file of the sample2 Ip sample, if you have multiple BAM files, please separate them with commas\r\n   - `-s2IpBai/--s2IpBaiFiles` : required, the index file of the sample2 Ip sample bam file, if you have multiple BAI files, please separate them with commas\r\n   - `-o/--output` : optional, Sample-specific test output directory, default `.`\r\n   - `-rc/--reads_coverage` : optional, reads coverage threshold using for filter RNA-seq or MeRIP-seq data SNVs in VCF file (aim for reducing FDR), default 10\r\n   - `-s/--sampling`: optional, MCMC sampling times, larger than 500, default 50000\r\n   - `-b/--burn` : optional, MCMC burn-in times, more than 100 and less than sampling times. Default 10000\r\n   - `-t/--thread` : optional, thread number for running test. Default 2\r\n   - `-threshold/--significantThreshold` : optional, the threshold for determining whether it is a sample-specific Asm6A modification, default 0.05\r\n   - `-h/--help` : help message of SampleSpecificASM\r\n\r\n\r\n### 1. Allele-specific expression (ASE) gene detection (one sample test)\r\n**data dependency**:\r\n1. VCF format file generate by SNV calling process of `RNA-seq data` or `MeRIP-seq INPUT data` (required, the format of the file is described below)\r\n2. GTF file (required)\r\n3. bam file (required)\r\n4. bai file (required, the index file of bam file)\r\n\r\n**examples**:\\\r\nsuppose here exists files below:\r\n1. human genome GTF file `/path/to/Homo_sapiens.GRCh38.93.chr.gtf`\r\n2. VCF format file generate by RNA data `/path/to/rna_filtered.vcf`\r\n3. bam files `/path/to/repeat1.bam (/path/to/repeat2.bam)`\r\n4. bai files `/path/to/repeat1.bam.bai (/path/to/repeat2.bam.bai)`\r\n\r\n* detect ASE gene\r\n```\r\n# command\r\njava -jar ./M6Allele.jar AseGeneDetection \r\n     -g /path/to/Homo_sapiens.GRCh38.93.chr.gtf \r\n     -vcf /path/to/rna_filtered.vcf \r\n     -bam /path/to/repeat1.bam,/path/to/repeat2.bam\r\n     -bai /path/to/repeat1.bam.bai,/path/to/repeat2.bam.bai\r\n     -o /path/to/output_file \r\n     -t 6\r\n```\r\n\r\n\r\n### 2. Allele-specific modification (ASM) m6A signal detection (one sample test)\r\n**data dependency**:\r\n1. GTF format file\r\n2. VCF format file generate by SNV calling process of `RNA-seq data` or `MeRIP-seq INPUT data` (required, the format of the file is described below)\r\n3. BED format peak calling result generate by `MeRIP-seq data` (required, the format of the file is described below)\r\n4. The bam file of `MeRIP-seq INPUT data` (required)\r\n5. The bai file of `MeRIP-seq INPUT data` (required)\r\n6. The bam file of `MeRIP-seq Ip data` (required)\r\n7. The bai file of `MeRIP-seq Ip data` (required)\r\n\r\n**examples**:\\\r\nsuppose here exists files below:\r\n1. human genome GTF file `/path/to/Homo_sapiens.GRCh38.93.chr.gtf`\r\n2. VCF format file generate by RNA data `/path/to/rna_filtered.vcf`\r\n3. BED format file generate by peak calling process `/path/to/peak.bed`\r\n4. Bam files generate by `MeRIP-seq INPUT data` `/path/to/repeat1_input.bam (/path/to/repeat2_input.bam)`\r\n5. Bai files generate by `MeRIP-seq INPUT data` `/path/to/repeat1_input.bam.bai (/path/to/repeat2_input.bam.bai)`\r\n6. Bam files generate by `MeRIP-seq Ip data` `/path/to/repeat1_ip.bam (/path/to/repeat2_ip.bam)`\r\n7. Bai files generate by `MeRIP-seq Ip data` `/path/to/repeat1_ip.bam.bai (/path/to/repeat2_ip.bam.bai)`\r\n\r\n* detect ASM m6A signal\r\n```\r\n# command\r\njava -jar ./M6Allele.jar AsmPeakDetection \r\n     -g /path/to/Homo_sapiens.GRCh38.93.chr.gtf \r\n     -inputBam /path/to/repeat1_input.bam,/path/to/repeat2_input.bam\r\n     -inputBai /path/to/repeat1_input.bam.bai,/path/to/repeat2_input.bam.bai\r\n     -ipBam /path/to/repeat1_ip.bam,/path/to/repeat2_ip.bam\r\n     -ipBai /path/to/repeat1_ip.bam.bai,/path/to/repeat2_ip.bam.bai\r\n     -bed /path/to/peak.bed \r\n     -vcf /path/to/rna_filtered.vcf \r\n     -o /path/to/output_file \r\n     -t 6\r\n```\r\n\r\n### 3. Sample-specific ASM m6A signal detection (paired sample test)\r\n**data dependency**:\r\n1. GTF format file\r\n2. paired sample VCF format files generate by SNV calling process of `RNA-seq data` or `MeRIP-seq INPUT data` (required, the format of the file is described below)\r\n3. paired sample BED format peak calling results generate by `MeRIP-seq data` (required, the format of the file is described below)\r\n   * After obtaining the m6A peak calling results for two samples separately, you need to merge the results using bedtools.\r\n4. paired sample Bam files generate by `MeRIP-seq INPUT data`(required)\r\n5. paired sample Bai files generate by `MeRIP-seq INPUT data`(required)\r\n6. paired sample Bam files generate by `MeRIP-seq Ip data`(required)\r\n7. paired sample Bai files generate by `MeRIP-seq Ip data`(required)\r\n\r\n**examples**:\\\r\nsuppose here exists files below:\r\n1. human genome GTF file `/path/to/Homo_sapiens.GRCh38.93.chr.gtf`\r\n2. paired sample VCF format files generate by RNA data `/path/to/sample1_rna_filtered.vcf` \u0026 `/path/to/sample2_rna_filtered.vcf`\r\n3. paired sample BED format files generate by peak calling process `/path/to/merge_peak.bed`\r\n4. paired sample Bam files generate by MeRIP-seq INPUT data `/path/to/sample1_repeat1_input.bam (/path/to/sample1_repeat2_input.bam)` \u0026 `/path/to/sample2_repeat1_input.bam (/path/to/sample2_repeat2_input.bam)`\r\n5. paired sample Bai files generate by MeRIP-seq INPUT data `/path/to/sample1_repeat1_input.bam.bai (/path/to/sample1_repeat2_input.bam.bai)` \u0026 `/path/to/sample2_repeat1_input.bam.bai (/path/to/sample2_repeat2_input.bam.bai)`\r\n6. paired sample Bam files generate by MeRIP-seq Ip data `/path/to/sample1_repeat1_ip.bam (/path/to/sample1_repeat2_ip.bam)` \u0026 `/path/to/sample2_repeat1_ip.bam (/path/to/sample2_repeat2_ip.bam)`\r\n7. paired sample Bai files generate by MeRIP-seq Ip data `/path/to/sample1_repeat1_ip.bam.bai (/path/to/sample1_repeat2_ip.bam.bai)` \u0026 `/path/to/sample2_repeat1_ip.bam.bai (/path/to/sample2_repeat2_ip.bam.bai)`\r\n\r\n* detect sample-specific ASM m6A signal\r\n```\r\n# command\r\njava -jar ./M6Allele.jar SampleSpecificASM \r\n     -g /path/to/Homo_sapiens.GRCh38.93.chr.gtf \r\n     -bed /path/to/merge_peak.bed\r\n     -s1Vcf /path/to/sample1_rna_filtered.vcf \r\n     -s2Vcf /path/to/sample2_rna_filtered.vcf \r\n     -s1InputBam /path/to/sample1_repeat1_input.bam,/path/to/sample1_repeat2_input.bam\r\n     -s1InputBai /path/to/sample1_repeat1_input.bam.bai,/path/to/sample1_repeat2_input.bam.bai\r\n     -s1IpBam /path/to/sample1_repeat1_ip.bam,/path/to/sample1_repeat2_ip.bam\r\n     -s1IpBai /path/to/sample1_repeat1_ip.bam.bai,/path/to/sample1_repeat2_ip.bam.bai\r\n     -s2InputBam /path/to/sample2_repeat1_input.bam,/path/to/sample2_repeat2_input.bam\r\n     -s2InputBai /path/to/sample2_repeat1_input.bam.bai,/path/to/sample2_repeat2_input.bam.bai\r\n     -s2IpBam /path/to/sample2_repeat1_ip.bam,/path/to/sample2_repeat2_ip.bam\r\n     -s2IpBai /path/to/sample2_repeat1_ip.bam.bai,/path/to/sample2_repeat2_ip.bam.bai\r\n     -o /path/to/output_dir\r\n     -t 6\r\n```\r\n\r\n### FORMAT DECLARATION\r\n### 1. VCF generate by SNP calling of RNA and MeRIP sequencing data\r\nAt least 2 columns,\r\n* \\#CHROM: chromosome number, `1,2,3,...X,Y,MT`\r\n* POS: mutation position\r\n* ID (optional): mutation ID, default `.`\r\n* REF (optional): reference nucleotide\r\n* ALT (optional): alternative nucleotide\r\n* QUAL (optional): quality score\r\n* FILTER (optional): `PASS` if SNP sites were filtered, default `.`\r\n* INFO (optional): additional information\r\n* FORMAT (optional): recording variant genotype information for the sample\r\n* Sample (optional): the corresponding data in `FORMAT` field.\r\n\r\n* example\r\n\u003e \\#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample\\\r\n\u003e 1\t3025531\t.\tT\tA\t64.28\tPASS\tAC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;QD=32.14;SOR=0.693\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:76,6,0\\\r\n\u003e 1\t3037125\t.\tA\tC\t68.28\tPASS\tAC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;QD=34.14;SOR=0.693\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:80,6,0\\\r\n\u003e 1\t5170624\t.\tA\tG\t434.6\tSnpCluster\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.479;DP=17;ExcessHet=3.0103;FS=5.315;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=25.56;ReadPosRankSum=-1.640;SOR=0.662\tGT:AD:DP:GQ:PL\t0/1:5,12:17:99:442,0,142\\\r\n\u003e 1\t85864585\t.\tT\tA,C\t771.02\tPASS\tAC=1,1;AF=0.500,0.500;AN=2;DP=20;ExcessHet=3.0103;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=60.00;QD=31.86;SOR=1.022\tGT:AD:DP:GQ:PL\t1/2:0,5,14:19:99:788,579,564,209,0,167\r\n\r\n\r\n### 2. BED format file\r\nContains fields below, more details see [BED format demonstration UCSC](http://genome.ucsc.edu/FAQ/FAQformat#format1)\r\n* \\# chr: chromosome number, `1,2,3,...X,Y,MT`\r\n* chromStart: m6A signal start position on chromosome\r\n* chromEnd: m6A signal end position on chromosome\r\n* name: ENSEMBL gene ID\r\n* score: significant score(adjusted p.value), generate by peak calling tools, less than `0.05`\r\n* strand: `+` or `-`\r\n* thickStart: The starting position at which the feature is drawn thickly\r\n* thickEnd: The ending position at which the feature is drawn thickly\r\n* itemRgb: An RGB value of the form R,G,B (e.g. 255,0,0). If the track line itemRgb attribute is set to \"On\", this RBG value will determine the display color of the data contained in this BED line. \r\n* blockCount: sub-block number of the m6A signal peak, integer, `≥1`\r\n* blockSizes: block size of each sub-block, separate by `,`\r\n* blockStarts: block start position on chromosome, separate by `,`\r\n\r\n\u003e \\# chr\tchromStart\tchromEnd\tname\tscore\tstrand\tthickStart\tthickEnd\titemRgb\tblockCount\tblockSizes\tblockStarts\\\r\n\u003e 1\t9747647\t9747845\tENSMUSG00000097893\t7.1e-05\t+\t9747647\t9747845\t0\t1\t198,\t0\\\r\n\u003e 1\t16105773\t16105923\tENSMUSG00000025921\t4.9e-05\t+\t16105773\t16105923\t0\t1\t150,\t0\\\r\n\u003e 1\t33739519\t33739819\tENSMUSG00000004768\t0.0032\t+\t33739519\t33739819\t0\t1\t300,\t0\\\r\n\u003e 1\t34180162\t34180463\tENSMUSG00000026131\t0.00022\t+\t34180162\t34180463\t0\t1\t301,\t0\\\r\n\u003e 1\t34306583\t34307612\tENSMUSG00000026131\t0.00038\t+\t34306583\t34307612\t0\t2\t68,283,\t0,746\r\n\r\n### OUTPUT FILE DESCRIPTION\r\n### 1. ASE gene detection output\r\nWhen the algorithm finishes running, the following files will be in your output folder:\r\n* error.log: error logs generated during program execution\r\n* logout.log: normal logs generated during program execution\r\n* snp_location.txt: the information of SNP loci used in the algorithmic computation process\r\n* aseGene.txt (if you specify an output filename, it will be the name you specified): the result of ASE gene detect\r\n   * geneId: The Ensembl ID of the gene\r\n   * geneName: The name of the gene being tested\r\n   * pValue: The p-value computed by the algorithm\r\n   * qValue: The result of the p-value after undergoing Benjamini-Hochberg (BH) correction\r\n   * snpNum: The number of SNP loci used in the algorithm for identifying the gene\r\n   * major/minorAlleleReads: The number of reads available for calculation on the major/minor allele of the gene\r\n   * majorAlleleFrequency: The frequency of the major allele\r\n   * majorAlleleBase: The major allele base at the SNP loci used for calculation\r\n\r\n### 2. ASM m6A signal detection output\r\nWhen the algorithm finishes running, the following files will be in your output folder:\r\n* error.log: error logs generated during program execution\r\n* logout.log: normal logs generated during program execution\r\n* snp_location.txt: the information of SNP loci used in the ASE algorithmic computation process\r\n* aseRes.txt: the ASE gene identified by the algorithm\r\n* peak_with_snp.txt: the information of SNP loci covered by the m6A peak\r\n* asmPeak.txt (if you specify an output filename, it will be the name you specified): the result of ASM m6A signal detect\r\n   * chr: Chromosome\r\n   * peakStart: The genomic position of the start point of the m6A peak\r\n   * peakEnd: The genomic position of the end point of the m6A peak\r\n   * geneId: The Ensembl ID of the gene to which the m6A peak belongs\r\n   * geneName: The name of the gene to which the m6A peak belongs\r\n   * pValue: The p-value computed by the algorithm\r\n   * qValue: The result of the p-value after undergoing Benjamini-Hochberg (BH) correction\r\n   * snpNum: The number of SNP loci used in the algorithm for identifying the m6A peak\r\n   * major/minorAlleleReads: The number of reads available for calculation on the major/minor allele of the peak\r\n   * majorAlleleFrequency: Major allele frequency, where the major allele is based on the genotype of the INPUT sample. **If this value is less than 0.5**, it indicates allele-specific m6A modification occurring on the minor allele. Conversely, if it's greater than 0.5, it indicates allele-specific m6A modification occurring on the major allele\r\n   * majorAlleleBase: The major allele base at the SNP loci covered by the identified m6A peak\r\n\r\n### 3. Sample-Specific ASM m6A signal detection output\r\nWhen the algorithm finishes running, the following files will be in your output folder:\r\n* error.log: error logs generated during program execution\r\n* logout.log: normal logs generated during program execution\r\n* sample1/snp_location.txt: the information of SNP loci used in the ASE algorithmic computation process for sample1\r\n* sample1/peak_with_snp.txt: the information of SNP loci covered by the m6A peak in sample1\r\n* sample2/snp_location.txt: the information of SNP loci used in the ASE algorithmic computation process for sample2\r\n* sample2/peak_with_snp.txt: the information of SNP loci covered by the m6A peak in sample2\r\n* sampleSpecificAsm6A.txt: the identification results of sample-specific ASM m6A signal\r\n    * chr: Chromosome\r\n    * peakStart: The genomic position of the start point of the m6A peak\r\n    * peakEnd: The genomic position of the end point of the m6A peak\r\n    * geneId: The Ensembl ID of the gene to which the m6A peak belongs\r\n    * geneName: The name of the gene to which the m6A peak belongs\r\n    * sample1MajorFrequency: The major allele frequency of peak m6A in sample 1, where the major allele is based on the genotype of the INPUT sample. **If this value is less than 0.5**, it indicates allele-specific m6A modification occurring on the minor allele. Conversely, if it's greater than 0.5, it indicates allele-specific m6A modification occurring on the major allele. **If the m6A peak has no snp loci in sample1 that can be used for calculation, it will be represented as -.**\r\n    * sample2MajorFrequency: The major allele frequency of peak m6A in sample 2, where the major allele is based on the genotype of the INPUT sample. **If this value is less than 0.5**, it indicates allele-specific m6A modification occurring on the minor allele. Conversely, if it's greater than 0.5, it indicates allele-specific m6A modification occurring on the major allele. **If the m6A peak has no snp loci in sample2 that can be used for calculation, it will be represented as -.**\r\n    * sample1MajorHaplotype: The major allele genotype within the m6A peak in sample1. **If the m6A peak has no snp loci in sample1 that can be used for calculation, it will be represented as -.**\r\n    * sample2MajorHaplotype: The major allele genotype within the m6A peak in sample2. **If the m6A peak has no snp loci in sample2 that can be used for calculation, it will be represented as -.**\r\n    * sample1PValue: \r\n        * If the m6A peak has no snp loci in sample1 that can be used for calculation, it will be represented as -\r\n        * If the specifically modified haplotypes of the m6A peak are the same in both sample1 and sample2, then the value represents the result recalculated using the data from both samples simultaneously\r\n        * If there is no resampling calculation performed and the peak has SNP loci available for calculation in sample1, then this value is calculated solely based on the data sampled from sample1\r\n    * sample1QValue: The Benjamini-Hochberg corrected value for the sample1PValue\r\n    * sample2PValue: \r\n        * If the m6A peak has no snp loci in sample2 that can be used for calculation, it will be represented as -\r\n        * If the specifically modified haplotypes of the m6A peak are the same in both sample1 and sample2, then the value represents the result recalculated using the data from both samples simultaneously\r\n        * If there is no resampling calculation performed and the peak has SNP loci available for calculation in sample2, then this value is calculated solely based on the data sampled from sample2\r\n    * sample2QValue: The Benjamini-Hochberg corrected value for the sample2PValue\r\n    * specificSample: In which sample does the peak have sample-specific ASM\r\n        * -: There is no sample-specific m6A modification in either of the two samples\r\n        * sample1: There is sample-specific m6A modification in sample1\r\n        * sample2: There is sample-specific m6A modification in sample2\r\n        * sample1/sample2: There is sample-specific m6A modification in both sample","organization":"RenLabBioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1223?version=1","name":"main @ c98f6fe","author":[],"descriptor_type":[]}]},{"id":"1224","url":"https://workflowhub.eu/workflows/1224","name":"Sample Python Crypt4GH workflow","description":"","organization":"Application Security - Test Crypt4GH solutions","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1224?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1225","url":"https://workflowhub.eu/workflows/1225","name":"clinicalmp-discovery/main","description":"Workflow for clinical metaproteomics database searching","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1225?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1226","url":"https://workflowhub.eu/workflows/1226","name":"nf-core/phaseimpute","description":"Phasing and imputation pipeline","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1226?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1229","url":"https://workflowhub.eu/workflows/1229","name":"GIMP Image Annotator","description":"# gimp-image-annotator\r\n*gimp-image-annotator or GIÀ, a lightweight GIMP plug-in to alllow for computer vision-assisted image annotation using the powerful GIMP selection toolbox.*\r\n\r\n**Installation**\r\n\r\nFollow the guide here: https://en.wikibooks.org/wiki/GIMP/Installing_Plugins to find how to install GIMP plug-ins on your system, save the file `image-annotator.py` in GIMP's plug-in folder. \r\n\r\nIn GIMP v2.x, the plug-in system relies on deprecated python2. On Windows, a version of python2 is included in the installation of GIMP, so you only need to follow the plug-in installation. On Linux, we recommend using the Flatpak version of GIMP, as it comes with the correct python2 binaries inlcluded. On Linux, the plug-in may need to be made executable with the command `chmod a+x /path/to/image-annotator.py` in order to be seen by GIMP.\r\n\r\n**Using the software**\r\nOnce installed, navigate to *Toolbox* then *Image Annotator*, add the labels you want, select one, use GIMP's selection tools (e.g. The Fuzzy Select tool - a guide can be found here: https://docs.gimp.org/en/gimp-tools-selection.html) to select an area (use Quick Mask or Shift+Q to quickly see the mask you have created). **Make sure antialisaing and feathering is off, you cannot turn it off for rectangle select however it isn't used**. Once you have your desired selected area, press *Save selected mask*. Repeat until all objects are annotated.\r\n\r\n**How do I use the data?**\r\n\r\n*gimp-image-annotator* saves a binary mask of each annotation, with class of mask stored in the `_annotations.json` file. The `_annotations.json` file is structured as followed\r\n\r\n\r\n`\r\n[{\r\n\"label\": \"label\",\r\n\"id\": \"0\",\r\n\"filename\": \"image.png\"\r\n}]\r\n`\r\n\r\nThe masks can be inputted using most image processing software. For example in `opencv-python` it would be:\r\n\r\n\r\n`\r\nimport cv2;\r\nmask = cv2.imread(PATH, cv2.IMREAD_GRAYSCALE)\r\n`\r\n","organization":"Into the deep","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1229?version=1","name":"main @ 0b0f345","author":["Kieran Atkins"],"descriptor_type":[]}]},{"id":"1230","url":"https://workflowhub.eu/workflows/1230","name":"[DTC-V2] WF5201: Forecast volcanic ash fallout and dispersal in the atmosphere","description":"Forecasting the atmospheric dispersal of volcanic products requires accurate input parameters for transport models, including meteorological data and ash/gas emission terms. Such forecasting builds upon three basic ingredients:\r\n1. Meteorological Data. Typically derived from global, regional, or local-scale models, meteorological data drive the transport and deposition of volcanic particles.\r\n2. Transport Models. These models simulate atmospheric dispersal, incorporating processes such as wind advection, turbulent diffusion, and particle sedimentation. Besides atmospheric density and viscosity, sedimentation velocities depend also on particle size, density, and shape, which affect the particle drag coefficients and atmospheric residence times. Some transport models (e.g. FALL3D) include volcanic ash aggregation and aerosol chemistry, two aspects that can improve the accuracy of dispersal forecasts.\r\n3. Emission Models. Emission models provide the so-called Eruption Source Parameters (ESP), needed to characterize the near vent properties of volcanic plumes (that are a kind of clouds which are connected to their source). The ESP, including eruption start and duration, cloud injection height, vertical mass distribution across the eruption column, Total Grain Size Distribution (TGSD), and Mass Eruption Rate (MER) are essential inputs to transport models. While some parameters (e.g., eruption start and end times, column height) can be directly observed, others like MER must be estimated by means of indirect methods, which introduces significant input uncertainties that propagate through the modelling workflow.\r\n\r\nThe DTC-V2 workflow (WF5201) configures and runs an ensemble of FALL3D model realizations, potentially assimilating in the model different sources of data (groundbased or satellite-based observations). A major advantage of the ensemble model approach is that it allows delivering both deterministic (e.g. ensemble mean) and probabilistic (e.g. fraction of ensemble members exceeding a given condition) forecasts, reflecting the inherent uncertainties in the ESPs. On a coarse-grain, the workflow requires four main steps:\r\n1. Get atmospheric data from Numerical Weather Prediction (NWP) models, assumed to be run and delivered by a third party. DTC-V2 can ingest either NWP forecast data (up to a few days ahead) or reanalyses (for past events or “what if” scenarios). Different agencies (e.g. ECMWF, National Weather Services, etc.) serve global/regional data at various spatial and temporal resolutions.\r\n2. Get the Eruption Source Parameters (ESP) from different sources; e.g. from volcano monitoring data provided by State Volcano Observatories (SVO). In case ESPs from several sources are available, the workflow assigns a ranked\r\npriority as shown in Table 1.\r\n3. Setup the FALL3D model and run an ensemble of simulations in the FENIX RI or HPC (leonardo@CINECA). The different ensemble members, each representing a single deterministic scenario, are set by perturbing some critical ESPs (e.g. eruption column height) within their uncertainty range. The model allows introducing both absolute and relative errors for all ESP around the observed value.\r\n4. Post-process the results","organization":"WP5 - Volcanoes","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1230?version=1","name":"main @ 3923678","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1230?version=2","name":"main @ c324ab2","author":[],"descriptor_type":["CWL"]}]},{"id":"1231","url":"https://workflowhub.eu/workflows/1231","name":"[DTC-T1] WF6101: Tsunami impact forecasting","description":"# [DTC-T1] WF6101: Tsunami impact forecasting\r\n\r\nThis repository contains a Common Workflow Language (CWL) and Ro-Crate metadata definition for DTC-T1 workflow 6101, which is designed for \r\nproviding tsunami impact forecasting following a tsunamigenic earthquake event, based on a probabilistic approach. The workflow integrates real-time\r\nearthquake data, runs HPC simulations, and generates tsunami hazard maps. \r\n\r\nThe main CWL implementation is found in WF6101.cwl, together with ST610106 and ST610109 describing complex steps. The file mini-ST610106.cwl is refereed to a specific\r\nrealization of the ST610106, where different alternative options are included.\r\n\r\nTo see a preview of the RO-Crate metadata the `ro-crate-preview.html` file can be opeens in the browser.\r\n\r\n## Overview\r\n\r\nThe DTC-T1 Digital Twin component operates a single workflow (WF6101) that runs 11 main steps. DTC-T1 is also referred to as PTF,\r\nProbabilistic Tsunami Forecasting. \r\n\r\nThe workflow is initialized by a potentially tsunamigenic earthquake event. Based on the real-time event parameters together with long-term information \r\nretrieved from a regional-scale tsunami hazard model, an ensemble of earthquake scenarios compatible with the occurring event is built, where each scenario\r\nis weighted by the probability of being a good representation of the actual event. The tsunami impact computed by numerical modeling and the scenario \r\nprobabilities are aggregated into exceedance probabilities for given tsunami intensities at given coastal points to calculate the hazard curves and\r\nsubsequently the hazard maps. \r\n\r\nThe workflow is designed to operate in (near-)real-time and to be employed at Tsunami Warning Centres for operational tsunami early warning and forecasting,\r\nas well as for rapid post-event assessment.\r\n\r\n## Workflow structure\r\n\r\nThe workflow consists of multiple steps (ST), datasets (DT), and software services (SS). Below is a simplified breakdown:\r\n\r\n- The Scenario Player (ST610101) acts like a data archive, storing observations (synthetic or measured) (DT6101) recorded by external data providers\r\n(i.e. seismic data, labeled as DT6103, sea level data, labeled as DT6104, and GNSS data, labeled as DT6105) and sending them to specific data listeners.\r\n\r\n- The Listener EQ (ST610102) evaluates the earthquake information that initialise the workflow execution, while Listeners SL (ST610103) and GNSS (ST610104)\r\nelaborate observational sea level and ground deformation data for further comparison with simulated data. \r\n\r\n- The Ensemble Manager (ST610105) combines the information from the Listener EQ with the long-term information from a regional hazard model (DT6102) and \r\ndefines a list of scenarios (DT6106) compatible with the triggering event, with their associated probabilities (DT6107). \r\n\r\n- The ensemble of earthquake scenarios is provided as input to the Scenario Modelling (ST610106), which makes use of pre-defined topo-bathymetric grids (DT6109) \r\nand computes tsunami intensities (DT6110) and ground deformation (DT6111). In the modeling step different alternative and/or combined models can be used \r\nin each single workflow realization. More specifically, different source and tsunami models can be combined through the following software: \r\nTsunami-HySEA (SS6107), SeisSol (SS6106), BingClaw (SS6109), Shaltop (SS6110), Landslide-HySEA (SS6108), Source-to-wave filter (SS6112), \r\nand the Inundation AI module (SS6111). User settings will dictate which model combinations are invoked. As an alternative, precomputed scenarios \r\ncan be simply retrieved (SS6119). \r\n\r\n- The Misfit Evaluator (ST610107) combines simulation outputs with eventual SL and GNSS data to evaluate the degree of confidence between the simulated \r\nscenarios and observations. According to the computed misfit, the initial scenario ensemble and related probabilities can be updated. \r\n\r\n- The Hazard Aggregator (ST610108) aggregates scenario probabilities and impact metrics to calculate hazard curves at multiple forecast points (DT6112).\r\n\r\n-  The AL (Alert level) and Visualization step (ST610109) makes the final processing of the results, converting hazard into Alert Levels and producing \r\nprobabilistic visual maps (DT6113). It is worth noting that only the Tsunami Service Providers (TSP) are allowed in producing and issuing the AL for \r\noperational tsunami early warning and forecasting.\r\n\r\n- In case of landslide-triggered tsunamis, input in the form of shake maps generated for the occurring event through the Earthquake Modeling \r\n(ST610111) is forwarded to the Landslide Scenario Manager (ST610110), which in turn feeds landslide scenarios (DT6108) to the Scenario Modelling.\r\n\r\n\r\n## Workflow diagram\r\n\r\nHere is a visual overview of the described workflow, and the corresponding CWL graph automatically generated from the metadata information stored in the \r\nCWL workflow file.\r\n\r\n![wf_diag](images/PTF_Workflow_Diagram_FROZEN.drawio.png)\r\n\r\n![cwl_wf](images/cwl_graph_wf.png)\r\n\r\n## The mini-ST610106 for operationalization\r\n\r\nWe focused on one realization of the Scenario Modeling step (hereinafter called mini-ST610106), where the T-HySEA code (SS6107) is used\r\nto model both the earthquake source and the following tsunami, and only tsunami intensities (DT6110) are saved. In this experiment, the singularity \r\nimage of the T-HySEA code created with the eFlows4HPC image service creation is used. Moreover, PyCOMPSs functionalities have been implemented in \r\norder to execute the mini-ST610106 as a COMPSs job on the Booster partition of the Leonardo cluster at the HPC CINECA infrastructure.\r\n\r\n![cwl_mini](images/ministep_graph.png)\r\n\r\nTo enable execution, an executable CWL file containing only the information required to run the process has been produced. In this case, \r\nthe processing step is implemented by the bash script `run_gpu_compss.sh` that uses PyCOMPSs to submit a job to the Leonardo cluster queue through \r\nthe Slurm scheduler. The executable CWL file does not explicitly specify input and output data because the underlying processing step manages \r\nthese details internally. An input configuration file `input.yaml` provides the necessary global input data.\r\n\r\n### How to execute the mini-ST610106\r\n\r\nThe mini-workflow depicted is executed using the following command:\r\n\r\n`cwltool --cachedir cache --preserve-entire-environment mini-ST610106.cwl input.yaml`\r\n\r\nIn this command, the --cachedir flag is employed to save all files generated during execution for debugging purposes, while the \r\n--preserve-entire-environment flag ensures that the current environment variables are transferred into the custom execution environment \r\nestablished by cwltool. Once the command is launched, cwltool creates an isolated execution environment where it runs the bash script, which, in turn, \r\nsubmits the PyCOMPSs job to the computational node via Slurm, with the standard output and error captured in the cwl.out and cwl.err files, respectively.\r\n\r\nThe execution exploits the singularity image of the Tsunami-HySEA code, built with the image creation service of eFlows4HPC using\r\nparameters that fit the architecture of the Cineca cluster Leonardo (i.e., linux/amd64, x86_64, openmpi@4.1.4, cuda@11.8).","organization":"WP6 - Tsunamis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1231?version=1","name":"main @ 3923678","author":[],"descriptor_type":["CWL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1231?version=2","name":"main @ c324ab2","author":[],"descriptor_type":["CWL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1231?version=3","name":"main @ b8047c3","author":[],"descriptor_type":["CWL"]}]},{"id":"1234","url":"https://workflowhub.eu/workflows/1234","name":"Connectome-based predictive modeling (CPM) with Python using Ray for parallel processing","description":"## Installation\r\n\r\nOther than cloning this repository, you need to have bash installed (which is most likely the case if you use Linux, *BSD or even MacOS).\r\nFor the Python code, the arguably easiest and cleanest way is to set up a Python virtual environment and install the dependencies there:\r\n\r\n``` shell\r\n$ python3 -m venv ./hcp-suite-venv # Setup the virtual environment\r\n$ source ./hcp-suite-venv/bin/activate # Activate the virtual environment\r\n$ pip install pandas pingouin networkx nilearn nibabel ray # Install dependencies within the virtual environment\r\n$ pip install ipython jupyterlab # Install interactive Python shells to run hcp-suite code in\r\n```\r\n\r\n## Usage: CPM tutorial\r\n\r\nThe following tutorial uses the gambling task as an example, the variable to be predicted is BMI. Differing commands for resting-state fMRI are provided for the corresponding steps.\r\n\r\n### Overview\r\n\r\nPython code in this repository is written to be used in an interactive Python shell. Either Jupyter Lab or Jupyer Notebook is recommended as plots and images are conveniently displayed in-line but any Python shell (e.g. iPython) should work.\r\n\r\nGenerally speaking, the procedure is as follows:\r\n\r\n1. Downloading data of subjects to be included in the analysis\r\n2. Parcellation of CIFTI files with `task.sh prepare` (at the code's current state, the HCP's folder structure is assumed)\r\n3a. Extraction of task-based fMRI time series based on EV files provided by the HCP with `get_ev_timeseries()` from `hcpsuite.py`\r\n3b. \r\n4. Computing of correlation matrices via `compute_correlations()` from `hcpsuite.py`\r\n5. Performing of CPM, the functions are provided by `cpm_ray.py`\r\n6. Establishing of statistical significance by way of permutation testing (also `cpm_ray.py`)\r\n\r\nThe following code snippets are to be run in an interactive Python shell (e.g. the \"Console\" of Jupyter Lab):\r\n\r\n#### Downloading data\r\n\r\nWe will use Amazon Web Services to download HCP data. Set up access to HCP data via Amazon Web Services by following their [documentation](https://wiki.humanconnectome.org/docs/How%20To%20Connect%20to%20Connectome%20Data%20via%20AWS.html). You should be provided with an AWS Access Key ID and Secret Access Key we are going to put into Python variables (in quotation marks) for easy access:\r\n\r\n``` python\r\naws_access_key_id=\"Replace with your access key ID\"\r\naws_secret_access_key=\"Replace with your secret access key\"\r\n\r\n# Also, specify a location you want to download files to. We will use this variable repeatedly.\r\ndata_dir = \"./HCP_1200\"\r\n```\r\n\r\nWe now need a list of subject IDs to be included in the analysis. Save them in a simple text file with one line per subject ID. For the rest of this tutorial, we will use the gambling task as an example (the list of IDs will be called ```gambling_subjects.ids```). As preprocessing for resting-state data differs, we will provide resting-state specific instructions when needed (the list of IDs will be called ```rest_subjects.ids```).\r\n\r\n``` python\r\nimport sys\r\n\r\nsys.path.append(\"../hcp-suite/lib\") # This assumes the hcp-suite directory is in the current working directory's parent directory. You can also use absolute paths.\r\nfrom hcpsuite import *\r\nfrom cpm_ray import *\r\n\r\nsubj_ids = load_list_from_file(\"./gambling_subjects.ids\") # Change the path to gambling_subjects.ids if it resides elsewhere\r\n\r\n# Check the number of subjects\r\nlen(subj_ids)\r\n```\r\n\r\nNext, we will use the function download_hcp_files() to download all needed files from the HCP's AWS storage. This function takes the following arguments in this order: List of subject IDs, download location, task name, AWS access key ID, and AWS secret access key. It will return a list of missing subjects we capture in the variable missing_subjs:\r\n\r\n``` python\r\nmissing_subjs = download_hcp_files(subj_ids, data_dir, \"gambling\", aws_access_key_id, aws_secret_access_key)\r\n```\r\n\r\nThe previous command will print a list of missing subjects (if any) since not all data from all subjects is available on AWS, unfortunately. You can either obtain data from other sources or remove the missing subjects from our list of subject IDs:\r\n\r\n\r\n``` python\r\nfor missing_subj in missing_subjs:\r\n    subj_ids.remove(missing_subj)\r\n```\r\n\r\n#### Parcellation\r\n\r\nParcellation involves combining voxels of the \"raw\" CIFTI files into parcels as specified by the combined cortical, subcortical, and cerebellar parcellation we dubbed \"RenTianGlasser\" after the authors of the individual parcellations.\r\n\r\n##### Task-based fMRI\r\n``` python\r\nparcellate_ciftis(subj_ids=subj_ids,\r\n                  parcellation_fname=\"./hcp-suite/data/parcellations/RenTianGlasser.dlabel.nii\",\r\n                  task=\"gambling\",\r\n                  data_dir=data_dir) # If you have installed Connectome Workbench in a non-standard way,\r\n                                     # so that 'wb_command' is not in your $PATH, add\r\n                                     # 'wb_command=\"/replace/with/path/to/wb_command\"'\r\n```\r\n\r\n##### Resting-state fMRI\r\n\r\nAs resting-state fMRI data was collected in two separate sessions called REST1 and REST2, we need to parcellate twice:\r\n\r\n```\r\n# First for REST1\r\n\r\nparcellate_ciftis(subj_ids=subj_ids, \r\n                  parcellation_fname=\"./hcp-suite/data/parcellations/RenTianGlasser.dlabel.nii\", \r\n                  task=\"REST1\", \r\n                  data_dir=data_dir) # If you have installed Connectome Workbench in a non-standard way, \r\n                                     # so that 'wb_command' is not in your $PATH, add \r\n                                     # 'wb_command=\"/replace/with/path/to/wb_command\"'\r\n\r\n# Now for REST2\r\nparcellate_ciftis(subj_ids=subj_ids, \r\n                  parcellation_fname=\"./hcp-suite/data/parcellations/RenTianGlasser.dlabel.nii\", \r\n                  task=\"REST2\", \r\n                  data_dir=data_dir) # If you have installed Connectome Workbench in a non-standard way, \r\n                                     # so that 'wb_command' is not in your $PATH, add \r\n                                     # 'wb_command=\"/replace/with/path/to/wb_command\"'\t\t\t\t\t\t\t\t\t\t\t \r\n```\r\n\r\n\r\n#### Extraction of time series\r\n\r\n##### Task-based fMRI\r\n``` python\r\nev_data_dict = get_ev_timeseries(subj_ids, [\"win.txt\"], task=\"gambling\",\r\n                                 runs=('LR', 'RL'),\r\n                                 parcellation=\"RenTianGlasser\",\r\n                                 data_dir=data_dir) # If you have installed Connectome Workbench in a non-standard way,\r\n                                                    # so that 'wb_command' is not in your $PATH, add\r\n                                                    # 'wb_command=\"/replace/with/path/to/wb_command\"'\r\n\r\n# Now, we save the extraced time series as text files in a directory of our choice\r\n# (in this case: ./GAMBLING_win)\r\nsave_data_dict(ev_data_dict, path=\"./GAMBLING_win\")\r\n```\r\n\r\n##### Resting-state fMRI\r\n\r\nExtraction of time series for resting-state fMRI is less complicated via the `get_rest_timeseries` function:\r\n\r\n```\r\nts_dict = get_rest_timeseries(subj_ids, data_dir)\r\n\r\n# Save time series files in directory \"REST\"\r\nsave_data_dict(ts_dict, path=\"./REST\")\r\n```\r\n\r\n\r\n#### Computation of correlation matrices\r\n\r\nWe continue in our Python shell to compute correlation matrices:\r\n\r\n``` python\r\n# We load the saved time series from the previous step\r\ncd GAMBLING_win # save_data_dict() writes file names into file \"ts_files\" but without paths,\r\n                # thus the easiest way is to change into the directory containing the files\r\ntime_series, time_series_files = load_time_series(\"./ts_files\") # ... and read them from there\r\n\r\ncorrelation_measure, correlation_matrices = compute_correlations(time_series, kind='tangent')\r\n# Tangent in our experience provides the best results, but there are alternatives:\r\n# https://nilearn.github.io/dev/modules/generated/nilearn.connectome.ConnectivityMeasure.html\r\n\r\n# We then save the matrices into a single file in the NIfTI format for downstream processing\r\nsave_matrix(cifti_dim_to_nifti_dim(correlation_matrices), \"./GAMBLING_win-tangent.nii.gz\")\r\n```\r\n\r\n#### CPM\r\n\r\nFor actual CPM, we need to install and run [Ray](https://ray.io) (run this e.g. in your Python virtual environment as described in [Installation](#installation)):\r\n\r\n``` shell\r\n$ pip install ray\r\n$ ray start --head --num-cpus=16 # Run this on your main node.\r\n                                 # Processes take up a lot of RAM, be careful not to use too many CPUs\r\n```\r\n\r\nOptionally add more Ray nodes to form a cluster, see the [Ray documentation](https://docs.ray.io/en/latest/cluster/vms/user-guides/launching-clusters/on-premises.html) for details.\r\n\r\n##### Merging behavioral/biometric data in Python\r\n\r\nFor our analysis, we need values from both the unrestricted and [restricted HCP data](https://www.humanconnectome.org/study/hcp-young-adult/document/restricted-data-usage), which are available as separate CSV files. For easier handling, we merge them into a single CSV file:\r\n\r\n``` python\r\nimport pandas as pd\r\nunrestricted = pd.read_csv(\"/path/to/unrestricted.csv\")\r\nrestricted = pd.read_csv(\"/path/to/restricted.csv\")\r\nmerged = pd.merge(restricted, unrestricted, on=\"Subject\")\r\nmerged.to_csv(\"./merged.csv\") # Save the merged DataFrame as a CSV file in the current directory\r\n```\r\n\r\n##### Loading and preparing data in Python\r\n\r\n``` python\r\nbehav = 'BMI' # Which variable do we want to predict?\r\ncovars = ['Age_in_Yrs', 'Gender' ] # What variables do we want to correct for?\r\n\r\nbehav_data = get_behav_data(\"./path/to/merged.csv\", ids) # Loading of behavioral and biometrical data as a Pandas DataFrame from a CSV file\r\n# We need to binarize gender for our analysis\r\nbehav_data[\"Gender\"] = behav_data[\"Gender\"].replace(\"F\", 0)\r\nbehav_data[\"Gender\"] = behav_data[\"Gender\"].replace(\"M\", 1)\r\n\r\nfunctional_data = convert_matrices_to_dataframe(nifti_dim_to_cifti_dim(get_nimg_data(\"./path/to/GAMBLING_win-tangent.nii.gz\")), subj_ids) # Loading of correlation matrices as Pandas DataFrame\r\n```\r\n\r\n##### Starting the Ray handler\r\n\r\nray_handler() is a Python class through which data management and the starting and coordination of Ray Actors (i.e. the processes working in parallel) is being handled.\r\n\r\n``` python\r\nn_folds = 128 # In this example, we use 128 folds, which is a good starting point\r\nn_perm = 1000 # How many permutations are we planning to do later on?\r\n\r\nray_handler = RayHandler(\r\n    functional_data.copy(),\r\n    behav_data.copy(),\r\n    behav,\r\n    covars,\r\n    address=\"auto\", # We assume that the main Ray process runs on the same host\r\n    n_perm=n_perm,\r\n) # You can safely ignore the PerformanceWarning messages\r\n\r\nray_handler.add_kfold_indices(n_folds, clean=True) # By setting \"clean\" to True, we remove twins from the fold so they don't predict each other\r\nray_handler.upload_data() # Functional and behavioral data are uploaded into the shared storage to which Ray Actors have access\r\n```\r\n\r\n##### Starting the analysis\r\n\r\nFirst we define the jobs for the actors; a job is a Python list object consisting of the following items: \"job type\", \"fold number\", \"permutation number\". The permutation number for the actual, i.e. unpermutated, data is \"-1\".\r\n\r\n``` python\r\njob_list = [[\"fselection_and_prediction\", fold, perm] for perm in [-1] for fold in range(n_folds)] # This is the job list without permutations\r\n\r\n# If we wanted to also compute the permutations (which takes a very long time), the job list can be created as follows:\r\n#job_list = [[\"fselection_and_prediction\", fold, perm] for perm in [-1, *range(n_perm)] for fold in range(n_folds)]\r\n\r\nray_handler.start_actors(job_list) # Start computing\r\n```\r\n\r\n##### Monitoring progress and retrieving results\r\n\r\n``` python\r\nn_done, n_remaining, n_held = ray_handler.status() # Prints a status report (see screenshot)\r\n\r\nresults = ray_handler.get_results(n=100) # Retrieving a huge number of results (e.g. when performing permutation analysis)\r\n                                         # and especially from distributed Ray actors can take a long time. Specifying the\r\n                                         # number of results (e.g. n=100) to be retrieved from the results store at once\r\n                                         # allows for a display of progress\r\n\r\n# Optional: Save results into a file (NumPy format)\r\nnp.save(\"./GAMBLING_win-results.npy\", results)\r\n\r\n# This file can be used to restore results\r\nresults = np.load(\"./GAMBLING_win-results.npy\", allow_pickle=True).item()\r\n```\r\n\r\nYou might consider fetching results and saving them periodically with a simple loop:\r\n\r\n``` python\r\nsleep_time = 1200 # Sleep for 20 minutes and then rerun loop\r\nn_remaining = 1 # Set to something \u003e 0 to get the loop started\r\nresults_path = get_safe_path(\"./GAMBLING_win-results\", \".npy\")\r\nwhile n_remaining \u003e 0: # Run until no jobs to be fetched are remaining\r\n    n_done, n_remaining, n_held = ray_handler.status()\r\n    if n_held \u003e 0:\r\n        results = ray_handler.get_results(n=100)\r\n        # BEWARE: the file in results_path will be overwritten without asking\r\n        # but we have used get_safe_path for risk mitigation\r\n        print(\"\\nSaving results to {}...\".format(results_path), end='', flush=True)\r\n        np.save(results_path, results)\r\n        print(\" done.\")\r\n    else:\r\n        print(\"\\nNo results to fetch and save.\")\r\n    print(\"Sleeping for {} seconds...\".format(sleep_time))\r\n    sleep(sleep_time) # Will sleep for sleep_time seconds\r\n```\r\n\r\n\r\n#### Check for completion\r\n\r\nRarely single jobs or actors die before completion. Having run your analyses, you can check your results for completion as follows and rerun analyses as needed (the function ```check_for_completion``` will advise you on how do this):\r\n\r\n``` python\r\nincomplete_jobs = check_for_completion(results)\r\n```\r\n\r\n#### Presenting results\r\n\r\nTo plot observed values against values as predicted by the GLM, use plot_predictions():\r\n\r\n``` python\r\nperm = -1 # Permutation -1 selects unpermutated results\r\ng = plot_predictions(results['prediction_results'][perm], tail=\"glm\", color=\"green\")\r\n```\r\n\r\n\r\nTo plot permutation results, use plot_permutation_results(), which will take either a list of paths to saved results files (f.i. when you want to combine multiple permutation runs) or the prediction results dictionary, which we will use in the following example:\r\n\r\n``` python\r\n# plot_permutation_results() will automatically remove incomplete permutations\r\n# and return/print basic descriptive statistics (minimum and maximum r value, total\r\n# number of permutations, and the minimum p value to be achieved with these permutations\r\n\r\nplot, min_value, max_value, count, min_p = plot_permutation_results(results['prediction_results'])\r\n```\r\n\r\nFor more presentation and plotting examples including static and interactive plots of predictive networks, see ```save.py``` in folder ```utils```.\r\n\r\n#### Overlap of predictive networks\r\n\r\ncpm_ray.py has a function ```get_overlap()``` to create overlap networks of different predictive networks. For example, if you have two different CPM results as generated by ```results = ray_handler.get_results()``` and saved in two files ```results_A.npy``` and ```results_B.npy```, you can do the following:\r\n\r\n``` python\r\n# Load results into Python\r\nresults_A = np.load(\"/path/to/results_A.npy\", allow_pickle=True).item()\r\nresults_B = np.load(\"/path/to/results_B.npy\", allow_pickle=True).item()\r\n\r\n# Load coordinates of parcels for plotting\r\ncoords = np.loadtxt(\"/path/to/hcp-suite/data/parcellations/RenTianGlasser.coords\")\r\n\r\n# Use overlap() function, which takes as its main input a list of results, in this case\r\n# [results_A, results_B]. You can specify a result as the odd one out (e.g. odd_one_out=0\r\n# for results_A), which means that result_A's tails will be switched, so that the positive\r\n# tail result A will be overlapped with the negative tail of result B and vice versa.\r\n#\r\n# overlap() returns a bunch of dictionaries with the tail (positive and negative predictive\r\n# networks)  as a primary key and usually several subkeys. These are more or less\r\n# self-explanatory.\r\n\r\noverlap, degrees_sorted, top_n_edges, plot_dict, m_cons = get_overlap([results_A, results_B], odd_one_out=None, coords=coords, plot_top_nodes=True, top_n_edges=50)\r\n\r\n```\r\n","organization":"CPM","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1234?version=1","name":"master @ 75f720e","author":["Tobias Bachmann"],"descriptor_type":[]}]},{"id":"1236","url":"https://workflowhub.eu/workflows/1236","name":"nf-core/references","description":"help community build references","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1236?version=1","name":"0.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1237","url":"https://workflowhub.eu/workflows/1237","name":"Use Case 1: Explain Drug-Drug Interactions","description":"The workflow starts with selecting Inflammation as the search term. The workflow starts with selecting Penicillin as the search term. The workflow starts with selecting Cortisol as the search term. Gene sets with set labels containing Inflammation were queried from Enrichr[1]. Identified matching terms from the GWAS Catalog 2019[2] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for GWAS_Catalog_2019. All the identified gene sets were combined using the union set operation. Identified matching terms from the MGI Mammalian Phenotype Level 4 2019[4] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for MGI_Mammalian_Phenotype_Level_4_2019. All the identified gene sets were combined using the union set operation. Identified matching terms from the Human Phenotype Ontology[5] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for Human_Phenotype_Ontology. All the identified gene sets were combined using the union set operation. Gene sets with set labels containing Penicillin were queried from Enrichr[1]. Identified matching terms from the LINCS L1000 Chem Pert Consensus Sigs[6] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for LINCS_L1000_Chem_Pert_Consensus_Sigs. Gene sets with set labels containing Cortisol were queried from Enrichr[1]. Identified matching terms from the LINCS L1000 Chem Pert Consensus Sigs[6] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for LINCS_L1000_Chem_Pert_Consensus_Sigs. The gene sets collected were combined into one gene set library. Multiple GMTs were combined into one GMT. The collection of gene sets was then visualized with a Supervenn diagram Fig.. \r\n1. Xie, Z. et al. Gene Set Knowledge Discovery with Enrichr. Current Protocols vol. 1 (2021). doi:10.1002/cpz1.90\r\n2. Sollis, E. et al. The NHGRI-EBI GWAS Catalog: knowledgebase and deposition resource. Nucleic Acids Research vol. 51 D977–D985 (2022). doi:10.1093/nar/gkac1010\r\n4. Blake, J. A. et al. Mouse Genome Database (MGD): Knowledgebase for mouse–human comparative biology. Nucleic Acids Research vol. 49 D981–D987 (2020). doi:10.1093/nar/gkaa1083\r\n5. Köhler, S. et al. The Human Phenotype Ontology in 2021. Nucleic Acids Research vol. 49 D1207–D1217 (2020). doi:10.1093/nar/gkaa1043\r\n6. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1237?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1237?version=2","name":"Version 2","author":[],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/1237?version=3","name":"Version 3","author":[],"descriptor_type":[]}]},{"id":"1238","url":"https://workflowhub.eu/workflows/1238","name":"Use Case 2: Explain MOAs of Side Effects for Approved Drugs","description":"The workflow starts with selecting atrial fibrillation as the search term. The workflow starts with selecting Ibrutinib as the search term. Gene sets with set labels containing atrial fibrillation were queried from Enrichr[1]. Identified matching terms from the MGI Mammalian Phenotype Level 4 2021[2] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for MGI_Mammalian_Phenotype_Level_4_2021. A consensus gene set was created by only retaining genes that appear in at least two sets. Identified matching terms from the GWAS Catalog 2019[4] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for GWAS_Catalog_2019. A consensus gene set was created by only retaining genes that appear in at least two sets. The gene sets collected were combined into one gene set library. Gene sets with set labels containing Ibrutinib were queried from Enrichr[1]. Identified matching terms from the LINCS L1000 Chem Pert Consensus Sigs[5] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for LINCS_L1000_Chem_Pert_Consensus_Sigs. Multiple GMTs were combined into one GMT. The collection of gene sets was then visualized with a Supervenn diagram Fig.. \r\n1. Xie, Z. et al. Gene Set Knowledge Discovery with Enrichr. Current Protocols vol. 1 (2021). doi:10.1002/cpz1.90\r\n2. Blake, J. A. et al. Mouse Genome Database (MGD): Knowledgebase for mouse–human comparative biology. Nucleic Acids Research vol. 49 D981–D987 (2020). doi:10.1093/nar/gkaa1083\r\n4. Sollis, E. et al. The NHGRI-EBI GWAS Catalog: knowledgebase and deposition resource. Nucleic Acids Research vol. 51 D977–D985 (2022). doi:10.1093/nar/gkac1010\r\n5. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1238?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1238?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1239","url":"https://workflowhub.eu/workflows/1239","name":"Use Case 4: Identify the Tissue Activity for a TF based on its Targets","description":"The workflow starts with selecting KLF4 as the search term. Gene sets with set labels containing KLF4 were queried from Enrichr[1]. Identified matching terms from the ENCODE TF ChIP-seq 2015[2] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for ENCODE_TF_ChIP-seq_2015. Identified matching terms from the ChEA 2022[4] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for ChEA_2022. Identified matching terms from the ARCHS4 TF Co-Expression[5] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for ARCHS4_TFs_Coexp. Multiple GMTs were combined into one GMT. A consensus gene set was created by only retaining genes that appear in at least two sets. The gene set was submitted to Enrichr[1]. The gene set was enriched against the GTEx Tissues V8 2023[6] library to identify statistically significant GTEx Tissue Signatures. \r\n1. Xie, Z. et al. Gene Set Knowledge Discovery with Enrichr. Current Protocols vol. 1 (2021). doi:10.1002/cpz1.90\r\n2. An integrated encyclopedia of DNA elements in the human genome. Nature vol. 489 57–74 (2012). doi:10.1038/nature11247\r\n4. Keenan, A. B. et al. ChEA3: transcription factor enrichment analysis by orthogonal omics integration. Nucleic Acids Research vol. 47 W212–W224 (2019). doi:10.1093/nar/gkz446\r\n5. Lachmann, A. et al. Massive mining of publicly available RNA-seq data from human and mouse. Nature Communications vol. 9 (2018). doi:10.1038/s41467-018-03751-6\r\n6. Lonsdale, J. et al. The Genotype-Tissue Expression (GTEx) project. Nature Genetics vol. 45 580–585 (2013). doi:10.1038/ng.2653","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1239?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1239?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1240","url":"https://workflowhub.eu/workflows/1240","name":"Use Case 5: Small Molecules to Induce a Biological Process","description":"The workflow starts with selecting Autophagy as the search term. Gene sets with set labels containing Autophagy were queried from Enrichr[1]. Identified matching terms from the MGI Mammalian Phenotype Level 4 2019[2] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for MGI_Mammalian_Phenotype_Level_4_2019. All the identified gene sets were combined using the union set operation. Reversers and mimickers from over 1 million signatures were identified using SigCom LINCS[4]. Resolved drugs from the LINCS L1000 Chemical Perturbagens library. Identified matching terms from the KEGG 2021 Human[6] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for KEGG_2021_Human. All the identified gene sets were combined using the union set operation. Reversers and mimickers from over 1 million signatures were identified using SigCom LINCS[4]. Identified matching terms from the GO Biological Process 2021[7] library were assembled into a collection of gene sets. A GMT was extracted from the Enrichr results for GO_Biological_Process_2021. All the identified gene sets were combined using the union set operation. Reversers and mimickers from over 1 million signatures were identified using SigCom LINCS[4]. Resolved drugs from the LINCS L1000 Chemical Perturbagens library. Resolved drugs from the LINCS L1000 Chemical Perturbagens library. The mean across multiple Scored Drugs is computed. The drugs were filtered by FDA Approved Drugs with the help of PubChem APIs[8]. \r\n1. Xie, Z. et al. Gene Set Knowledge Discovery with Enrichr. Current Protocols vol. 1 (2021). doi:10.1002/cpz1.90\r\n2. Blake, J. A. et al. Mouse Genome Database (MGD): Knowledgebase for mouse–human comparative biology. Nucleic Acids Research vol. 49 D981–D987 (2020). doi:10.1093/nar/gkaa1083\r\n4. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328\r\n6. Kanehisa, M., Furumichi, M., Sato, Y., Kawashima, M. \u0026 Ishiguro-Watanabe, M. KEGG for taxonomy-based analysis of pathways and genomes. Nucleic Acids Research vol. 51 D587–D592 (2022). doi:10.1093/nar/gkac963\r\n7. Ashburner, M. et al. Gene Ontology: tool for the unification of biology. Nature Genetics vol. 25 25–29 (2000). doi:10.1038/75556\r\n8. Kim, S. et al. PubChem 2023 update. Nucleic Acids Research vol. 51 D1373–D1380 (2022). doi:10.1093/nar/gkac956","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1240?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1240?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1241","url":"https://workflowhub.eu/workflows/1241","name":"Use Case 6: CFDE Knowledge about a Variant","description":"The workflow starts with selecting chr10:g.3823823G\u003eA as the search term. The closest gene to the variant was found using MyVariant.info[1]. RNA-seq-like LINCS L1000 Signatures[3] which mimick or reverse the the expression of KLF6 were visualized. Median expression of KLF6 was obtained from the GTEx Portal[8] using the portal's API. To visualize the scored tissues, a vertical bar plot was created Fig.. \r\n1. Lelong, S. et al. BioThings SDK: a toolkit for building high-performance data APIs in biomedical research. Bioinformatics vol. 38 2077–2079 (2022). doi:10.1093/bioinformatics/btac017\r\n3. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328\r\n8. Lonsdale, J. et al. The Genotype-Tissue Expression (GTEx) project. Nature Genetics vol. 45 580–585 (2013). doi:10.1038/ng.2653","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1241?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1241?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1242","url":"https://workflowhub.eu/workflows/1242","name":"Use Case 6: CFDE Knowledge about a Gene","description":"The workflow starts with selecting KLF6 as the search term. RNA-seq-like LINCS L1000 Signatures[1] which mimick or reverse the the expression of KLF6 were visualized. Median expression of KLF6 was obtained from the GTEx Portal[6] using the portal's API. To visualize the scored tissues, a vertical bar plot was created Fig.. \r\n1. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328\r\n6. Lonsdale, J. et al. The Genotype-Tissue Expression (GTEx) project. Nature Genetics vol. 45 580–585 (2013). doi:10.1038/ng.2653","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1242?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1242?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1243","url":"https://workflowhub.eu/workflows/1243","name":"Use Case 7: Variant Expression in Tumor/Healthy","description":"The workflow starts with selecting chr2:g.39417578C\u003eG as the search term. The closest gene to the variant was found using MyVariant.info[1]. Gene expression in tumors for CDKL4 were queried from the Open Pediatric Cancer Atlas API[3]. Median expression of CDKL4 was obtained from the GTEx Portal[4] using the portal's API. To visualize the level of expression across tumor gene expression, a bar plot was created Fig.. \r\n1. Lelong, S. et al. BioThings SDK: a toolkit for building high-performance data APIs in biomedical research. Bioinformatics vol. 38 2077–2079 (2022). doi:10.1093/bioinformatics/btac017\r\n3. Shapiro, J. A. et al. OpenPBTA: The Open Pediatric Brain Tumor Atlas. Cell Genomics vol. 3 100340 (2023). doi:10.1016/j.xgen.2023.100340\r\n4. Lonsdale, J. et al. The Genotype-Tissue Expression (GTEx) project. Nature Genetics vol. 45 580–585 (2013). doi:10.1038/ng.2653","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1243?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1243?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1244","url":"https://workflowhub.eu/workflows/1244","name":"Use Case 10: Guilt by Association","description":"The workflow starts with a gene set created from Example gene set. CTD is applied which diffuses through all nodes in STRING[1] to identify nodes that are \"guilty by association\" and highly connected to the initial gene set of interest[2][3]. A list of Highly Connected Genes was obtained from the CTD output. A list of Guilty By Association Genes was obtained from the CTD output. \r\n1. Szklarczyk, D. et al. STRING v10: protein–protein interaction networks, integrated over the tree of life. Nucleic Acids Research vol. 43 D447–D452 (2014). doi:10.1093/nar/gku1003\r\n2. Thistlethwaite, L. R. et al. Correction: CTD: An information-theoretic algorithm to interpret sets of metabolomic and transcriptomic perturbations in the context of graphical models. PLOS Computational Biology vol. 17 e1009551 (2021). doi:10.1371/journal.pcbi.1009551\r\n3. Petrosyan, V. et al. Identifying biomarkers of differential chemotherapy response in TNBC patient-derived xenografts with a CTD/WGCNA approach. iScience vol. 26 105799 (2023). doi:10.1016/j.isci.2022.105799","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1244?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1244?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1245","url":"https://workflowhub.eu/workflows/1245","name":"Use Case 11: Related Proteins/Metabolites across DCCs","description":"The workflow starts with selecting RPE as the search term. For the given gene ID (SYMBOL), StringDB PPI was extracted using their API[1]. For the Given StringDB PPI, the list of nodes (Gene Set) is generated. For the Given StringDB PPI, the list of nodes (GeneSet) is generated. Reversers and mimickers from over 1 million signatures were identified using SigCom LINCS[2]. The gene set was submitted to Enrichr[4]. The gene set was then searched in the Metabolomics Workbench[5] to identify relevant reactions. The gene set was then searched in the Metabolomics Workbench [Metabolomics Workbench, [7] to identify associated metabolites. The gene set was then searched in the Metabolomics Workbench[5] to identify relevant studies related to the genes. \r\n1. Szklarczyk, D. et al. The STRING database in 2023: protein–protein association networks and functional enrichment analyses for any sequenced genome of interest. Nucleic Acids Research vol. 51 D638–D646 (2022). doi:10.1093/nar/gkac1000\r\n2. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328\r\n4. Xie, Z. et al. Gene Set Knowledge Discovery with Enrichr. Current Protocols vol. 1 (2021). doi:10.1002/cpz1.90\r\n5. The Metabolomics Workbench, https://www.metabolomicsworkbench.org/\r\n7. https://www.metabolomicsworkbench.org/","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1245?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1245?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1246","url":"https://workflowhub.eu/workflows/1246","name":"Use Case 13: Novel Cell Surface Targets for Individual Cancer Patients Analyzed with Common Fund Datasets","description":"A file was first uploaded. The file was parsed as a gene count matrix. Significantly over-expressed genes when compared to tissue expression in GTEx[1] were identified. RNA-seq-like LINCS L1000 Signatures[3] which mimick or reverse the the expression of IMP3 were visualized. Drugs which down-regulate the expression of IMP3 were identified from the RNA-seq-like LINCS L1000 Chemical Perturbagens[3]. Genes which down-regulate the expression of IMP3 were identified from the RNA-seq-like LINCS L1000 CRISPR Knockouts[3]. Genes were filtered by IDG Understudied Proteins[8]. The gene was searched with the MetGENE tool providing pathways, reactions, metabolites, and studies from the Metabolomics Workbench[9]. IMP3 was then searched in the Metabolomics Workbench[11] to identify associated metabolites. IMP3 was then searched in the Metabolomics Workbench[11] to identify relevant reactions. A list of regulatory elements in the vicinity of the gene were retrieved from the CFDE Linked Data Hub[14]. The GlyGen database[18] was searched to identify a relevant set of protein products that originate from IMP3. \r\n1. Lonsdale, J. et al. The Genotype-Tissue Expression (GTEx) project. Nature Genetics vol. 45 580–585 (2013). doi:10.1038/ng.2653\r\n3. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328\r\n8. IDG Protein List, https://druggablegenome.net/IDGProteinList\r\n9. MetGENE, https://sc-cfdewebdev.sdsc.edu/MetGENE/metGene.php\r\n11. The Metabolomics Workbench, https://www.metabolomicsworkbench.org/\r\n14. CFDE Linked Data Hub, https://ldh.genome.network/cfde/ldh/\r\n18. York, W. S. et al. GlyGen: Computational and Informatics Resources for Glycoscience. Glycobiology vol. 30 72–73 (2019). doi:10.1093/glycob/cwz080","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1246?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1246?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1248","url":"https://workflowhub.eu/workflows/1248","name":"Use Case 3: Compounds to Reverse Disease Signatures","description":"\u003chttps://playbook-workflow-builder.cloud/report/f43bfb7a-557a-76d4-250c-7a44c81d70d7\u003e\r\n\r\nA file containing GEO Aging Signatures was first uploaded. The file containing GEO Aging Signatures was loaded as a gene signature. A file containing GTEx Aging Signatures was first uploaded. The file containing GTEx Aging Signatures was loaded as a gene signature. Significant genes were extracted from the GEO Aging Signatures. Significant genes were extracted from the GTEx Aging Signatures. Reversers and mimickers from over 1 million signatures were identified using SigCom LINCS[1]. Resolved drugs from the LINCS L1000 Chemical Perturbagens library. Reversers and mimickers from over 1 million signatures were identified using SigCom LINCS[1]. Resolved drugs from the LINCS L1000 Chemical Perturbagens library. The mean across multiple Scored Drugs is computed. The drugs were filtered by FDA Approved Drugs with the help of PubChem APIs[3]. \r\n1. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709 (2022). doi:10.1093/nar/gkac328\r\n3. Kim, S. et al. PubChem 2023 update. Nucleic Acids Research vol. 51 D1373–D1380 (2022). doi:10.1093/nar/gkac956","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1248?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1248?version=2","name":"Version 2","author":[],"descriptor_type":[]}]},{"id":"1249","url":"https://workflowhub.eu/workflows/1249","name":"Use case 9: Explore regulatory information associated with a regulatory element","description":"The workflow starts with selecting EH38E2924876 as the search term. Genomic position of provided unique regulatory element identifier was retrieved from CFDE Linked Data Hub[1]. A list of variants in the region of the regulatory element was retrieved from CFDE Linked Data Hub[1]. Variant/variant set associated allele specific epigenomic signatures were retrieved from CFDE LDH[5] based on Roadmap and ENTEx data[6], [4]. GTEx eQTL and sQTL evidence for the given variant(s) were retrieved from CFDE LDH[5][3]. MyVariant.info, dbSNP, gnomAD, and other common identifiers for the given variant(s) were retrieved from ClinGen Allele Registry[7]. Gene(s) in the vicinity of the given variant(s) were retrieved from MyVariant.info API results[8]. A list of genes in the 10kbps region of the given regulatory element was retrieved from CFDE Linked Data Hub[1]. \r\n1. CFDE Linked Data Hub, https://ldh.genome.network/cfde/ldh/\r\n3. The GTEx Consortium atlas of genetic regulatory effects across human tissues. Science vol. 369 1318–1330 (2020). doi:10.1126/science.aaz1776\r\n4. Onuchic, V. et al. Allele-specific epigenome maps reveal sequence-dependent stochastic switching at regulatory loci. Science vol. 361 (2018). doi:10.1126/science.aar3146\r\n5. Genomic Location Registry, https://reg.genome.network/reg/loc/\r\n6. Integrative analysis of 111 reference human epigenomes. Nature vol. 518 317–330 (2015). doi:10.1038/nature14248\r\n7. Pawliczek, P. et al. ClinGen Allele Registry links information about genetic variants. Human Mutation vol. 39 1690–1701 (2018). doi:10.1002/humu.23637\r\n8. Lelong, S. et al. BioThings SDK: a toolkit for building high-performance data APIs in biomedical research. Bioinformatics vol. 38 2077–2079 (2022). doi:10.1093/bioinformatics/btac017","organization":"NIH CFDE Playbook Workflow Partnership","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1249?version=1","name":"Version 1","author":[],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1249?version=2","name":"Version 2","author":[],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/1249?version=3","name":"Version 3","author":[],"descriptor_type":[]}]},{"id":"1250","url":"https://workflowhub.eu/workflows/1250","name":"nf-core/rangeland","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-rangeland_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/rangeland\" src=\"docs/images/nf-core-rangeland_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e[![GitHub Actions CI Status](https://github.com/nf-core/rangeland/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/rangeland/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/rangeland/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rangeland/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/rangeland/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rangeland)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rangeland-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/rangeland)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/rangeland** is a geographical best-practice analysis pipeline for remotely sensed imagery.\nThe pipeline processes satellite imagery alongside auxiliary data in multiple steps to arrive at a set of trend files related to land-cover changes. The main pipeline steps are:\n\n1. Read satellite imagery, digital elevation model (dem), endmember definition, water vapor database (wvdb), datacube definition and area of interest definition (aoi)\n2. Generate allow list and analysis mask to determine which pixels from the satellite data can be used\n3. Preprocess data to obtain atmospherically corrected images alongside quality assurance information (aka. level 2 analysis read data)\n4. Merge spatially and temporally overlapping preprocessed data\n5. Classify pixels by applying linear spectral unmixing\n6. Time series analyses to obtain trends in vegetation dynamics to derive level 3 data\n7. Create mosaic and pyramid visualizations of the results\n8. Version reporting with MultiQC ([`MultiQC`](http://multiqc.info/))\n\n\u003cp align=\"center\"\u003e\n    \u003cimg title=\"nf-core/rangeland diagram\" src=\"docs/images/rangeland_diagram.png\" width=95%\u003e\n\u003c/p\u003e\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow.Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nTo run, satellite imagery, water vapor data, a digital elevation model, endmember definitions, a datacube specification, and a area-of-interest specification are required as input data.\nPlease refer to the [usage documentation](https://nf-co.re/rangeland/usage) for details on the input structure.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/rangeland \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input \u003cSATELLITE IMAGES\u003e \\\n   --dem \u003cDIGITAL ELEVATION MODEL\u003e \\\n   --wvdb \u003cWATER VAPOR DATA\u003e \\\n   --data_cube \u003cDATA CUBE\u003e \\\n   --aoi \u003cAREA OF INTEREST\u003e \\\n   --endmember \u003cENDMEMBER SPECIFICATION\u003e \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rangeland/usage) and the [parameter documentation](https://nf-co.re/rangeland/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rangeland/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/rangeland/output).\n\n## Credits\n\nThe rangeland workflow was originally written by:\n\n- [Fabian Lehmann](https://github.com/Lehmann-Fabian)\n- [David Frantz](https://github.com/davidfrantz)\n\nThe original workflow can be found on [github](https://github.com/CRC-FONDA/FORCE2NXF-Rangeland).\n\nTransformation to nf-core/rangeland was conducted by [Felix Kummer](https://github.com/Felix-Kummer).\nnf-core alignment started on the [nf-core branch of the original repository](https://github.com/CRC-FONDA/FORCE2NXF-Rangeland/tree/nf-core).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Fabian Lehmann](https://github.com/Lehmann-Fabian)\n- [Katarzyna Ewa Lewinska](https://github.com/kelewinska).\n\n## Acknowledgements\n\nThis pipeline was developed and aligned with nf-core as part of the [Foundations of Workflows for Large-Scale Scientific Data Analysis (FONDA)](https://fonda.hu-berlin.de/) initiative.\n\n[![FONDA](docs/images/fonda_logo2_cropped.png)](https://fonda.hu-berlin.de/)\n\nFONDA can be cited as follows:\n\n\u003e **The Collaborative Research Center FONDA.**\n\u003e\n\u003e Ulf Leser, Marcus Hilbrich, Claudia Draxl, Peter Eisert, Lars Grunske, Patrick Hostert, Dagmar Kainmüller, Odej Kao, Birte Kehr, Timo Kehrer, Christoph Koch, Volker Markl, Henning Meyerhenke, Tilmann Rabl, Alexander Reinefeld, Knut Reinert, Kerstin Ritter, Björn Scheuermann, Florian Schintke, Nicole Schweikardt, Matthias Weidlich.\n\u003e\n\u003e _Datenbank Spektrum_ 2021 doi: [10.1007/s13222-021-00397-5](https://doi.org/10.1007/s13222-021-00397-5)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#rangeland` channel](https://nfcore.slack.com/channels/rangeland) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/rangeland for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\nThis pipeline is based one the publication listed below.\nThe publication can be cited as follows:\n\n\u003e **FORCE on Nextflow: Scalable Analysis of Earth Observation Data on Commodity Clusters**\n\u003e\n\u003e [Lehmann, F., Frantz, D., Becker, S., Leser, U., Hostert, P. (2021). FORCE on Nextflow: Scalable Analysis of Earth Observation Data on Commodity Clusters. In CIKM Workshops.](https://www.informatik.hu-berlin.de/de/forschung/gebiete/wbi/research/publications/2021/force_nextflow.pdf/@@download/file/force_nextflow.pdf)\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1250?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1251","url":"https://workflowhub.eu/workflows/1251","name":"A workflow to extract individual trees from 3D point cloud data obtained by airborne laser scanning","description":"# Point-based Individual Tree Delineation from 3D LiDAR Point Cloud Data.\r\n\r\nThis module implements a lightweight and easy-to-use **_Point-based_** method for individual tree delineation from 3D point cloud data using pure C/C++.\r\n\r\nThe source code files are included in folder **[TreeSeparation]**, which consists of a project generated from _Visual Studio 2015_. The **CLASS** for tree separation is named \"FoxTree\" and can be found in the respect **_FoxTree.h_** and **_FoxTree.cpp_** files. \r\n\r\n## Inupt\r\n The input of this module is **TREE Points** only, as illustrated by the figures below.\r\n\r\n![test-02](TestDatasets/test-02.png)\r\n\r\n![Another test data](TestDatasets/test-03.png)\r\n\r\n![One more test data](TestDatasets/test-04.png)\r\n \r\n The format of the tree points is **_*.xyz_**, such as:\r\n ``` javascript {.line-numbers}\r\n          x            y         z          \r\n      623772.9200 4834465.5900 77.7409     \r\n         ...         ...        ...       \r\n```\r\nNote that if the original data does not have color information, either initiate the last three columns with arbitrary integers or modify the code on data loading. \r\n\r\n\r\n## Parameters\r\nThere are three parameters have to be initialized for optimal individualization results:\r\n\r\n     \r\n     *  Searching radius;\r\n     *  Vertical resolution;\r\n     *  Minimum number of points per cluster;\r\n     \r\n\r\nAs demonstrated by the **code snippet** below (Note that the parameters are based on geo-referenced point cloud data.):\r\n\r\n\r\n``` javascript {.line-numbers}\r\n\t//Parameter settings\r\n\t\r\n\tconst double radius = 1.0;  \r\n\t//Searching Radius, 1.0 meter;\r\n\t\r\n\tconst double verticalResolution = 1.0;  \r\n\t//Vertical resolution of the layers, 1.0 meter;\r\n\t\r\n\tconst int miniPtsPerCluster = 5; \r\n\t// Minimum number of points per cluster, 5 points;\r\n```\r\n\r\n**Hints on Parameter Settings:**\r\n\r\n**_Radius_** should be in accordance with the average point density, i.e. to ensure there are a certain number of points within the radius.\r\n\r\n**_VerticalResolution_** depends on the overall point density and fineness of results.\r\n\r\n## Output\r\nThe output of this implementation is an ASCII format **_*.xyz_** file as well:\r\n```javascript {.line-numbers}\r\n\ttreeID        x            y         z         r      g     b \r\n\t  89     623942.8999 4833932.5500   77.8399   36      76    89\r\n\t  ...         ...        ...         ...      ...    ...    ...\r\n```\r\nNotably, the first column is the **index of tree** of which this point is assigned. The last three columns are randomly designated color for the points of a same tree.\r\n\r\nThe individual tree delineation results are given as the figures below:\r\n![Individual tree delineation results](Results/test-02-results-1.0-0.7-3.png)\r\n![Individual tree delineation results](Results/test-02-results-1.0-0.7-3_01.png)\r\n![Individual tree delineation results](Results/test-03-results-1.0-0.5-3.png)\r\n![Individual tree delineation results](Results/test-04-results-1.0-0.8-5.png)\r\n\r\n## Declarations\r\nIn this implementation,  **nanoflann** is employed for **_KNN_** searching, which can be found from here [link](https://github.com/jlblancoc/nanoflann). \r\n\r\n## Support \u0026 Contact\r\n\r\nShould you have any questions, comments, BUG(s) reporting, or IDEAS for further improvements? Please contact:\r\n\r\n**Jinhu Wang**\r\n\r\n\r\njinhu.wang (at) hotmail.com  \r\n\r\n\r\n\r\n\r\nor \r\n \r\n \r\n**Roderik Lindenbergh**\r\n\r\n\r\nr.c.lindenbergh (at) tudelft.nl. \r\n\r\n\r\nhttp://doris.tudelft.nl/~rlindenbergh/\r\n\r\nDec. 9, 2018\r\n\r\nCopyright (C) 2018\r\n\r\n","organization":"LiDAR","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1251?version=1","name":"master @ e5828a7","author":[],"descriptor_type":[]}]},{"id":"1253","url":"https://workflowhub.eu/workflows/1253","name":"Allele-specific expression-network clustering analysis for Huntington's Disease","description":"","organization":"EJPRD WP13 case-studies workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1253?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"1255","url":"https://workflowhub.eu/workflows/1255","name":"annotation-helixer/main","description":"This workflow allows you to annotate a genome with Helixer and evaluate the quality of the annotation using BUSCO and Genome Annotation statistics. GFFRead is also used to predict protein sequences derived from this annotation, and BUSCO and OMArk are used to assess proteome quality. ","organization":"EuroScienceGateway, Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1255?version=1","name":"v0.1","author":["Romane Libouban"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1255?version=2","name":"v0.2","author":["Romane Libouban"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1255?version=3","name":"v0.3","author":["Romane Libouban"],"descriptor_type":["GALAXY"]}]},{"id":"1256","url":"https://workflowhub.eu/workflows/1256","name":"scanpy-clustering/main","description":"Single-cell RNA-seq workflow with Scanpy and Anndata. Based on the 3k PBMC clustering tutorial from Scanpy. It takes count matrix, barcodes and feature files as input and creates an Anndata object out of them. It then performs QC and filters for lowly expressed genes and cells. Then the data is normalized and scaled. Then PCs are computed to further cluster using louvain algorithm. It also generated various plots of clustering colored with highly ranked genes.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1256?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1257","url":"https://workflowhub.eu/workflows/1257","name":"OMERO Image Import and Annotation Workflow","description":"**General workflow to upload data into OMERO using KNIME**\r\n\r\nThe workflow consists of two main branches: the Green Branch, which imports a folder containing images, and the Purple Branch, which enables the annotation of metadata as key-value pairs.\r\n\r\n* **Fetching Images:** The first step involves fetching images from a locally accessible folder.\r\n* **User Authentication:** Users are prompted to input their OMERO username and password through a Java snippet. This information is then converted into variables that can be used by the Python script node.\r\n* **Image Import:** The Python script node utilizes ezomero to execute the image import process.\r\n* **Temporary Folder Deletion:** After the import process is complete, the temporary folder is deleted.\r\n\r\nA dataset for testing can be found at: https://zenodo.org/records/14205500\r\n\r\n**Important Security Note:** It is crucial to be aware that storing credentials as variables can pose security risks, particularly if accessed by administrators. Therefore, it is essential to handle user credentials securely and in accordance with best practices.\r\n\r\n","organization":"UFZ - Image Data Management and Processing Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1257?version=1","name":"Version 1","author":["Riccardo Massei"],"descriptor_type":[]}]},{"id":"1258","url":"https://workflowhub.eu/workflows/1258","name":"OMERO Image Import, Annotation and Region of Interests (ROIs) Workflow","description":"General workflow to upload data into OMERO using Galaxy\r\n\r\nA dataset for testing can be found at: https://zenodo.org/records/14205500\r\n\r\n**Important Security Note:** It is crucial to be aware that storing credentials as variables can pose security risks, particularly if accessed by administrators. Therefore, it is essential to handle user credentials securely and in accordance with best practices.\r\n","organization":"UFZ - Image Data Management and Processing Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1258?version=1","name":"Version 1","author":["Riccardo Massei"],"descriptor_type":["GALAXY"]}]},{"id":"1259","url":"https://workflowhub.eu/workflows/1259","name":"OMERO Nuclei Cell Counting, Image Import, Annotation and Region of Interests (ROIs) Workflow","description":"Workflow to perform nuclei cell counting on High Content Screening (HCS) Data and upload result into OMERO\r\n\r\nIn this workflow, cell images are first uploaded to both Galaxy and OMERO using the  “OMERO Image Import” tool. \r\nConcurrently, image processing is performed. After thresholding and binarization, key features of nuclei, such as area, label number, and perimeter, are computed from the processed images and saved as a CSV file. \r\nThe result file is then attached to each image stored in OMERO using the “OMERO Metadata Import” tool. \r\nThe “Label Extraction” tool generates ROI coordinates from the binary image, which are subsequently uploaded into OMERO using the “OMERO ROI Import” tool. \r\n\r\nA dataset for testing can be found at: https://zenodo.org/records/14205500\r\n\r\n**Important Security Note:** It is crucial to be aware that storing credentials as variables can pose security risks, particularly if accessed by administrators. Therefore, it is essential to handle user credentials securely and in accordance with best practices.","organization":"UFZ - Image Data Management and Processing Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1259?version=1","name":"Version 1","author":["Riccardo Massei"],"descriptor_type":["GALAXY"]}]},{"id":"1260","url":"https://workflowhub.eu/workflows/1260","name":"influenza-isolates-consensus-and-subtyping/main","description":"This workflow performs subtyping and consensus sequence generation for batches of Illumina PE sequenced Influenza A isolates.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1260?version=1","name":"v0.1","author":["Wolfgang Maier","Saim Momin"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1260?version=2","name":"v0.2","author":["Wolfgang Maier","Saim Momin"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1260?version=3","name":"v0.3","author":["Wolfgang Maier","Saim Momin"],"descriptor_type":["GALAXY"]}]},{"id":"1261","url":"https://workflowhub.eu/workflows/1261","name":"Workflow for species distribution modeling with ModGP","description":"Crop Wild Relatives distribution modeling workflow using the ModGP; a prototype Digital Twin from BioDT.","organization":"Senckenberg Digital Collection and Biodiversity Information Technologies","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1261?version=1","name":"Version 1","author":["Daniel Bauer"],"descriptor_type":[]}]},{"id":"1262","url":"https://workflowhub.eu/workflows/1262","name":"functional-annotation-protein-sequences/main","description":"This workflow uses eggNOG mapper and InterProScan for functional annotation of protein sequences.","organization":"EuroScienceGateway, Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1262?version=1","name":"v0.1","author":["Romane Libouban","Anthony Bretaudeau"],"descriptor_type":["GALAXY"]}]},{"id":"1263","url":"https://workflowhub.eu/workflows/1263","name":"ONT-bacpac-nf","description":"A rapid and portable workflow for pond-side sequencing of bacterial pathogens for sustainable aquaculture using ONT long-read sequencing.","organization":"Sydney Informatics Hub","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1263?version=1","name":"main @ dfabc50","author":["Georgina Samaha","Mitchell J O'Brien"],"descriptor_type":["NFL"]}]},{"id":"1265","url":"https://workflowhub.eu/workflows/1265","name":"OMERO: VAST Data Preparation, Metadata Creation, and ROI Upload with FishInspector Annotations","description":"This KNIME workflow is designed to facilitate the loading of image data from OMERO.\r\nIt includes key preprocessing steps for VAST data, such as metadata creation and the linking of Key-Value Pairs. \r\n\r\n\r\n* **Fetching Images**: The first step involves fetching images from a locally accessible folder.\r\n* **User Authentication:** Users are prompted to input their OMERO username and password through a Java snippet. This information is then converted into variables that can be used by the Python script node.\r\n* **Image Import:** The Python script node utilizes ezomero to execute the image import process.\r\n* **Import Metadata:** Purple and Green branch allows to upload metada as Key-Value Pairs and Tables\r\n* **Import ROIs:** ROI created with Fish Inspector will be automatically uploaded into OMERO. (ROIs need to be reported in a json file)\r\n* **Temporary Folder Deletion:** After the import process is complete, the temporary folder is deleted.\r\n\r\nA dataset for testing can be found at: https://zenodo.org/records/14790777\r\n\r\n**Important Security Note:** It is crucial to be aware that storing credentials as variables can pose security risks, particularly if accessed by administrators. Therefore, it is essential to handle user credentials securely and in accordance with best practices.","organization":"UFZ - Image Data Management and Processing Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1265?version=1","name":"Version 1","author":["Riccardo Massei"],"descriptor_type":[]}]},{"id":"1266","url":"https://workflowhub.eu/workflows/1266","name":"nf-core/drugresponseeval","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-drugresponseeval_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/drugresponseeval\" src=\"docs/images/nf-core-drugresponseeval_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/drugresponseeval/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/drugresponseeval/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/drugresponseeval/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/drugresponseeval/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/drugresponseeval/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14779984-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14779984)\n\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/drugresponseeval)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23drugresponseeval-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/drugresponseeval)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n# ![drevalpy_summary](assets/dreval_summary.svg)\n\n**DrEval** is a bioinformatics framework that includes a PyPI package (drevalpy) and a Nextflow\npipeline (this repo). DrEval ensures that evaluations are statistically sound, biologically\nmeaningful, and reproducible. DrEval simplifies the implementation of drug response prediction\nmodels, allowing researchers to focus on advancing their modeling innovations by automating\nstandardized evaluation protocols and preprocessing workflows. With DrEval, hyperparameter\ntuning is fair and consistent. With its flexible model interface, DrEval supports any model type,\nranging from statistical models to complex neural networks. By contributing your model to the\nDrEval catalog, you can increase your work's exposure, reusability, and transferability.\n\n1. The response data is loaded\n2. All models are trained and evaluated in a cross-validation setting\n3. For each CV split, the best hyperparameters are determined using a grid search per model\n4. The model is trained on the full training set (train \u0026 validation) with the best\n   hyperparameters to predict the test set\n5. If randomization tests are enabled, the model is trained on the full training set with the best\n   hyperparameters to predict the randomized test set\n6. If robustness tests are enabled, the model is trained N times on the full training set with the\n   best hyperparameters\n7. Plots are created summarizing the results\n\nFor baseline models, no randomization or robustness tests are performed.\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/drugresponseeval \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --models \u003cRandomForest,model2,...\u003e \\\n   --baselines \u003cNaiveMeanEffectsPredictor,baseline2,...\u003e \\\n   --dataset_name \u003cCTRPv2|CTRPv1|CCLE|GDSC1|GDSC2|custom_dataset\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/drugresponseeval/usage) and the [parameter documentation](https://nf-co.re/drugresponseeval/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/drugresponseeval/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/drugresponseeval/output).\n\n## Credits\n\nnf-core/drugresponseeval was originally written by Judith Bernett (TUM) and Pascal Iversen (FU\nBerlin).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n## Contributions and Support\n\nContributors to nf-core/drugresponseeval and the drevalpy PyPI package:\n\n- [Judith Bernett](https://github.com/JudithBernett) (TUM)\n- [Pascal Iversen](https://github.com/PascalIversen) (FU Berlin)\n- [Mario Picciani](https://github.com/picciama) (TUM)\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#drugresponseeval` channel](https://nfcore.slack.com/channels/drugresponseeval) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/drugresponseeval for your analysis, please cite it using the following doi: [10.5281/zenodo.14779984](https://doi.org/10.5281/zenodo.14779984)\n\n\u003e Our corresponding publication is at doi [10.1101/2025.05.26.655288](doi.org/10.1101/2025.05.26.655288)\n\u003e\n\u003e Bernett, J., Iversen, P., Picciani, M., Wilhelm, M., Baum, K., \u0026 List, M. **From Hype to Health Check: Critical Evaluation of Drug Response Prediction Models with DrEval.**\n\u003e\n\u003e _bioRxiv_, 2025-05.\n\nThe underlying data is available at doi: [10.5281/zenodo.12633909](https://doi.org/10.5281/zenodo.12633909).\n\nThe underlying python package is drevalpy, availably on [PyPI](https://pypi.org/project/drevalpy/) as standalone, for which we also have an extensive [ReadTheDocs Documentation](https://drevalpy.readthedocs.io/en/latest/).\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1266?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1266?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1267","url":"https://workflowhub.eu/workflows/1267","name":"Source halo","description":"Calculate extended gamma-ray source halo using crbeam simulation","organization":"Astroparticle Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1267?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1269","url":"https://workflowhub.eu/workflows/1269","name":"mgnify-amplicon-taxonomic-summary-tables/main","description":"This workflow creates taxonomic summary tables out of the amplicon pipeline results. ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1269?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1269?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1270","url":"https://workflowhub.eu/workflows/1270","name":"mgnify-amplicon-pipeline-v5-rrna-prediction/main","description":"Classification and visualization of SSU, LSU sequences.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1270?version=1","name":"v0.1","author":["Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1270?version=2","name":"v0.2","author":["Paul Zierep"],"descriptor_type":["GALAXY"]}]},{"id":"1271","url":"https://workflowhub.eu/workflows/1271","name":"mgnify-amplicon-pipeline-v5-quality-control-single-end/main","description":"Quality control subworkflow for single-end reads.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1271?version=1","name":"v0.1","author":["Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1271?version=2","name":"v0.2","author":["Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1271?version=3","name":"v0.3","author":["Paul Zierep"],"descriptor_type":["GALAXY"]}]},{"id":"1272","url":"https://workflowhub.eu/workflows/1272","name":"mgnify-amplicon-pipeline-v5-quality-control-paired-end/main","description":"Quality control subworkflow for paired-end reads. ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1272?version=1","name":"v0.1","author":["Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1272?version=2","name":"v0.2","author":["Paul Zierep"],"descriptor_type":["GALAXY"]}]},{"id":"1273","url":"https://workflowhub.eu/workflows/1273","name":"mgnify-amplicon-pipeline-v5-its/main","description":"Classification and visualization of ITS regions.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1273?version=1","name":"v0.1","author":["Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1273?version=2","name":"v0.2","author":["Paul Zierep"],"descriptor_type":["GALAXY"]}]},{"id":"1274","url":"https://workflowhub.eu/workflows/1274","name":"mgnify-amplicon-pipeline-v5-complete/main","description":"MGnify's amplicon pipeline v5.0. Including the Quality control for single-end and paired-end reads, rRNA-prediction, and ITS sub-WFs.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1274?version=1","name":"v0.1","author":["Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1274?version=2","name":"v0.2","author":["Paul Zierep"],"descriptor_type":["GALAXY"]}]},{"id":"1275","url":"https://workflowhub.eu/workflows/1275","name":"mapseq-to-ampvis2/main","description":"The MAPseq to Ampvis workflow processes MAPseq OTU tables and associated metadata for analysis in Ampvis2. This workflow involves reformatting MAPseq output datasets to produce structured output files suitable for Ampvis2.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1275?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1275?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1276","url":"https://workflowhub.eu/workflows/1276","name":"nf-core/fastqrepair","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-fastqrepair_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/fastqrepair\" src=\"docs/images/nf-core-fastqrepair_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/fastqrepair/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/fastqrepair/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/fastqrepair/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/fastqrepair/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/fastqrepair/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/fastqrepair)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23fastqrepair-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/fastqrepair)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/fastqrepair** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/fastqrepair \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/fastqrepair/usage) and the [parameter documentation](https://nf-co.re/fastqrepair/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/fastqrepair/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/fastqrepair/output).\n\n## Credits\n\nnf-core/fastqrepair was originally written by Tommaso Mazza.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#fastqrepair` channel](https://nfcore.slack.com/channels/fastqrepair) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/fastqrepair for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1276?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1293","url":"https://workflowhub.eu/workflows/1293","name":"nf-core/pacvar","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-pacvar_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/pacvar\" src=\"docs/images/nf-core-pacvar_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/pacvar/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/pacvar/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/pacvar/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/pacvar/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/pacvar/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/pacvar)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23pacvar-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/pacvar)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\nnf-core/pacvar is a bioinformatics pipeline that processes long-read PacBio data. Specifically, the pipeline provides two workflows: one for processing whole-genome sequencing data, and another for processing reads from the PureTarget expansion panel offered by PacBio. This second workflow characterizes tandem repeats. Because the pipeline is designed for PacBio reads, it uses PacBio’s officially released tools.\n\n![nf-core/pacvar metro map](docs/images/pacvar_white_background.png)\n\nWorkflow Overview\n\n1. Demultiplex reads ([`lima`](https://lima.how))\n2. Align reads ([`pbmm2`](https://github.com/PacificBiosciences/pbmm2))\n3. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n\nWGS Workflow Overview\n\n1. Choice of SNP calling routes:\n   a. ([`deepvariant`](https://github.com/google/deepvariant))\n   b. ([`HaplotypeCaller`](https://gatk.broadinstitute.org/hc/en-us/articles/360037225632-HaplotypeCaller))\n2. Call SVs ([`pbsv`](https://github.com/PacificBiosciences/pbsv))\n3. Index VCF files ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html))\n4. Phase SNPs, SVs and BAM files ([`hiphase`](https://github.com/PacificBiosciences/HiPhase))\n\nTandem Repeat Workflow Overview\n\n1. Genotype tandem repeats - produce spanning bams and vcf ([`TRGT`](https://github.com/PacificBiosciences/trgt))\n2. Index and Sort tandem tepeat spanning bam ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n3. Plot repeat motif plots ([`TRGT`](https://github.com/PacificBiosciences/trgt))\n4. Sort spanning VCF ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,pbi\nCONTROL,AEG588A1_S1_L002_R1_001.bam,AEG588A1_S1_L002_R1_001.pbi\n```\n\nNote that the `.pbi` file is not required. If you choose not to include it, your input file might look like this:\n\n```csv\nsample,bam,pbi\nCONTROL,AEG588A1_S1_L002_R1_001.bam\n```\n\nEach row represents an unaligned bam file and their associated index (optional).\n\nNow, you can run the pipeline. Below is an example\n\n```bash\nnextflow run nf-core/pacvar \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --workflow \u003cwgs/repeat\u003e \\\n   --barcodes barcodes.bed \\\n   --intervals intervals.bed \\\n   --genome \u003cGENOME NAME (e.g. GATK.GRCh38)\u003e \\\n   --outdir \u003cOUTDIR\u003e\n```\n\noptional paramaters include: `--skip_demultiplexing`, `--skip_snp`, `--skip_sv`, `--skip_phase`.\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/pacvar/usage) and the [parameter documentation](https://nf-co.re/pacvar/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/pacvar/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/pacvar/output).\n\n## Credits\n\nnf-core/pacvar was originally written by Tanya Sarkin Jain.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#pacvar` channel](https://nfcore.slack.com/channels/pacvar) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/pacvar for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1293?version=1","name":"v1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1293?version=2","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1293?version=3","name":"1.0.1","author":[],"descriptor_type":["NFL"]}]},{"id":"1294","url":"https://workflowhub.eu/workflows/1294","name":"nf-core/proteinfamilies","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-proteinfamilies_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/proteinfamilies\" src=\"docs/images/nf-core-proteinfamilies_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinfamilies/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinfamilies/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfamilies/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfamilies/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/proteinfamilies/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14881993-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14881993)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinfamilies)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfamilies-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/proteinfamilies)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinfamilies** is a bioinformatics pipeline that generates protein families from amino acid sequences and/or updates existing families with new sequences.\nIt takes a protein fasta file as input, clusters the sequences and then generates protein family Hiden Markov Models (HMMs) along with their multiple sequence alignments (MSAs).\nOptionally, paths to existing family HMMs and MSAs can be given (must have matching base filenames one-to-one) in order to update with new sequences in case of matching hits.\n\n\u003cp align=\"center\"\u003e\n    \u003cimg src=\"docs/images/proteinfamilies_workflow.png\" alt=\"nf-core/proteinfamilies workflow overview\"\u003e\n\u003c/p\u003e\n\n### Check quality\n\nGenerate input amino acid sequence statistics with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Create families\n\n1. Cluster sequences ([`MMseqs2`](https://github.com/soedinglab/MMseqs2/))\n2. Perform multiple sequence alignment (MSA) ([`FAMSA`](https://github.com/refresh-bio/FAMSA/) or [`mafft`](https://github.com/GSLBiotech/mafft/))\n3. Optionally, clip gap parts of the MSA ([`ClipKIT`](https://github.com/JLSteenwyk/ClipKIT/))\n4. Generate family HMMs and fish additional sequences into the family ([`hmmer`](https://github.com/EddyRivasLab/hmmer/))\n5. Optionally, remove redundant families by comparing family representative sequences against family models with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/))\n6. Optionally, from the remaining families, remove in-family redundant sequences by strictly clustering with ([`MMseqs2`](https://github.com/soedinglab/MMseqs2/)) and keep cluster representatives\n7. Optionally, if in-family redundancy was not removed, reformat the `.sto` full MSAs to `.fas` with ([`HH-suite3`](https://github.com/soedinglab/hh-suite))\n8. Present statistics for remaining/updated family size distributions and representative sequence lengths ([`MultiQC`](http://multiqc.info/))\n\n### Update families\n\n1. Find which families to update by comparing the input sequences against existing family models with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/))\n2. For non hit sequences continue with the above: A. Create families. For hit sequences and families continue to: 3\n3. Extract family sequences ([`SeqKit`](https://github.com/shenwei356/seqkit/)) and concatenate with filtered hit sequences of each family\n4. Optionally, remove in-family redundant sequences by strictly clustering with ([`MMseqs2`](https://github.com/soedinglab/MMseqs2/)) and keeping cluster representatives\n5. Perform multiple sequence alignment (MSA) ([`FAMSA`](https://github.com/refresh-bio/FAMSA/) or [`mafft`](https://github.com/GSLBiotech/mafft/))\n6. Optionally, clip gap parts of the MSA ([`ClipKIT`](https://github.com/JLSteenwyk/ClipKIT/))\n7. Update family HMM with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta,existing_hmms_to_update,existing_msas_to_update\nCONTROL_REP1,input/mgnifams_input_small.fa,,\n```\n\nEach row contains a fasta file with amino acid sequences (can be zipped or unzipped).\nOptionally, a row may contain tarball archives (tar.gz) of existing families' HMM and MSA folders, in order to be updated.\nIn this case, the HMM and MSA files must be matching in numbers and in base filenames (not the extension).\nHit families/sequences will be updated, while no hit sequences will create new families.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinfamilies \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfamilies/usage) and the [parameter documentation](https://nf-co.re/proteinfamilies/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfamilies/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinfamilies/output).\n\n## Credits\n\nnf-core/proteinfamilies was originally written by Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinfamilies` channel](https://nfcore.slack.com/channels/proteinfamilies) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinfamilies for your analysis, please cite it using the following doi: [10.5281/zenodo.14881993](https://doi.org/10.5281/zenodo.14881993).\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1294?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1294?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1294?version=3","name":"1.1.1","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1294?version=4","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"5","url":"https://workflowhub.eu/workflows/1294?version=5","name":"1.3.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1295","url":"https://workflowhub.eu/workflows/1295","name":"Matrix Multiplication","description":"Application that perform the multiplication between matrices.","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1295?version=1","name":"Version 1","author":["Nicolò Giacomini"],"descriptor_type":[]}]},{"id":"1296","url":"https://workflowhub.eu/workflows/1296","name":"taxonomic-rank-abundance-summary-table/main","description":"This workflow creates taxonomic summary tables for a specified taxonomic rank out of MAPseq's OTU tables output collection.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1296?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1296?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1297","url":"https://workflowhub.eu/workflows/1297","name":"Laserfarm Jupyter Notebooks for Reserve Naturelle Nationale du Bagnas","description":"Laserfarm (https://doi.org/10.1016/j.ecoinf.2022.101836) is a high-throughput workflow for generating geospatial data products of ecosystem structure using LiDAR point clouds from national or regional airborne laser scanning (ALS) surveys. The workflow example here shows the application of the Laserfarm workflow to the 7.5 km2 large Reserve Naturelle Nationale du Bagnas ('Bagnas') in France (3.514360 E, 43.314332 N). The work has been performed in the context of the EU project MAMBO (Modern Approaches to the Monitoring of Biоdiversity, https://doi.org/10.3897/rio.9.e116951), in which the automated execution of workflows for habitat condition metrics from LiDAR is tested for EU habitat monitoring. In this context, the Laserfarm workflow has been applied to a number of MAMBO demonstration sites from different European countries (Denmark, France, Netherlands, United Kingdom, Malta). For each demonstration site, the Jupyter Notebooks for the Laserfarm workflow, the derived data products (GeoTIFF files) and their visualization (maps in PDF format), and the study site boundaries (shapefiles) are stored in a Zenodo repository (https://doi.org/10.5281/zenodo.14745309). The Zenodo repository also provides a detailed methodology description. The raw (input) data is stored on a public data repository (https://doi.org/10.48546/workflowhub.datafile.5.1). The data of vegetation structure metrics from each demonstration site have been described in an accompanying data paper (https://doi.org/10.1016/j.dib.2025.111548).\r\n\r\nHere on WorkflowHub, the following Jupyter Notebooks are provided to show how the Laserfarm workflow has been implemented for the Reserve Naturelle Nationale du Bagnas:\r\n\r\n1_Retiling.ipynb: \r\nRe-tiles the raw LiDAR point clouds into smaller chunks for further efficient, scalable and distributed processing. The raw LiDAR point clouds were accessed from a national LiDAR repository and subsequently clipped to the boundaries of the study area (here Reserve Naturelle Nationale du Bagnas). The Jupyter Notebook requires to define a regular grid for re-tiling and a spatial resolution (tile mesh size) for the final output of the workflow (GeoTIFF files). The regular grid is specified with the minimum and maximum of the X and Y coordinates (min_x, max_x, min_y, max_y) of the bounding box around the region of interest and the number of tiles (n_tiles_side) along the side of the bounding box. The specifications of the re-tiling grid are accessed through a text file in the Jupyter Notebook (‘grids.txt’). \r\n\r\n2_Normalization.ipynb: \r\nNormalizes the point cloud heights (z-values) relative to the terrain surface by calculating the normalized height for each individual point as the height relative to the lowest point within a grid cell. Requires defining a spatial resolution of the grid cell size for normalization (here 1 m). \r\n\r\n3_Feature_extraction_veg.ipynb: \r\nCalculates LiDAR metrics (‘features’) with vegetation points, e.g. related to vegetation height, density, and vertical variability. Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m), the list of features, and the ASPRS standard point classes which are considered to be vegetation points.\r\n\r\n4_Feature_extraction_all.ipynb: \r\nCalculates LiDAR metrics (‘features’) of openness which use all points (not only vegetation points), namely the pulse penetration ratio (i.e. the ratio of the number of ground points to the total number of points within a grid cell). Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m) and the list of features (here only the pulse penetration ratio).\r\n\r\n5_Geotiff_export_veg.ipynb: \r\nRasterizes the extracted features of vegetation (e.g. related to vegetation height, density, and vertical variability) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\n6_Geotiff_export_all.ipynb: \r\nRasterizes the extracted features of openness (here pulse penetration ratio) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\nMore general information about the Laserfarm workflow can be found in the user manual (https://laserfarm.readthedocs.io/en/latest/) and on GitHub (https://github.com/eEcoLiDAR/Laserfarm). The current version of Laserfarm is available from PyPI (https://pypi.org/project/laserfarm/) or Zenodo (https://doi.org/10.5281/zenodo.3842780). An example of a country-wide dataset that has been produced with the Laserfarm workflow has been published in a data paper (https://doi.org/10.1016/j.dib.2022.108798). Additional information on the Jupyter Notebooks provided here is available from a Zenodo repository (https://doi.org/10.5281/zenodo.14745309).\r\n","organization":"Laserfarm applications to European demonstration sites","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1297?version=1","name":"Version 1","author":["W. Daniel Kissling","Jinhu Wang"],"descriptor_type":[]}]},{"id":"1298","url":"https://workflowhub.eu/workflows/1298","name":"Laserfarm Jupyter Notebooks for Comino","description":"Laserfarm (https://doi.org/10.1016/j.ecoinf.2022.101836) is a high-throughput workflow for generating geospatial data products of ecosystem structure using LiDAR point clouds from national or regional airborne laser scanning (ALS) surveys. The workflow example here shows the application of the Laserfarm workflow to the 3.5 km2 large island of Comino in Malta (36.0113 E, 14.3362 N). The work has been performed in the context of the EU project MAMBO (Modern Approaches to the Monitoring of Biоdiversity, https://doi.org/10.3897/rio.9.e116951), in which the automated execution of workflows for habitat condition metrics from LiDAR is tested for EU habitat monitoring. In this context, the Laserfarm workflow has been applied to a number of MAMBO demonstration sites from different European countries (Denmark, France, Netherlands, United Kingdom, Malta). For each demonstration site, the Jupyter Notebooks for the Laserfarm workflow, the derived data products (GeoTIFF files) and their visualization (maps in PDF format), and the study site boundaries (shapefiles) are stored in a Zenodo repository (https://doi.org/10.5281/zenodo.14745309). The Zenodo repository also provides a detailed methodology description. The raw (input) data are stored on a public data repository (https://doi.org/10.48546/workflowhub.datafile.5.1). The data of vegetation structure metrics from each demonstration site have been described in an accompanying data paper (https://doi.org/10.1016/j.dib.2025.111548).  \r\n\r\nHere on WorkflowHub, the following Jupyter Notebooks are provided to show how the Laserfarm workflow has been implemented for Comino:\r\n\r\n1_Retiling.ipynb: Re-tiles the raw LiDAR point clouds into smaller chunks for further efficient, scalable and distributed processing. The raw LiDAR point clouds were accessed from a national LiDAR repository and subsequently clipped to the boundaries of the study area (here Comino). The Jupyter Notebook requires to define a regular grid for re-tiling and a spatial resolution (tile mesh size) for the final output of the workflow (GeoTIFF files). The regular grid is specified with the minimum and maximum of the X and Y coordinates (min_x, max_x, min_y, max_y) of the bounding box around the region of interest and the number of tiles (n_tiles_side) along the side of the bounding box. The specifications of the re-tiling grid are accessed through a text file in the Jupyter Notebook (‘grids.txt’).\r\n\r\n2_Normalization.ipynb: Normalizes the point cloud heights (z-values) relative to the terrain surface by calculating the normalized height for each individual point as the height relative to the lowest point within a grid cell. Requires defining a spatial resolution of the grid cell size for normalization (here 1 m).\r\n\r\n3_Feature_extraction_veg.ipynb: Calculates LiDAR metrics (‘features’) with vegetation points, e.g. related to vegetation height, density, and vertical variability. Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m), the list of features, and the ASPRS standard point classes which are considered to be vegetation points.\r\n\r\n4_Feature_extraction_all.ipynb: Calculates LiDAR metrics (‘features’) of openness which use all points (not only vegetation points), namely the pulse penetration ratio (i.e. the ratio of the number of ground points to the total number of points within a grid cell). Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m) and the list of features (here only the pulse penetration ratio).\r\n\r\n5_Geotiff_export_veg.ipynb: Rasterizes the extracted features of vegetation (e.g. related to vegetation height, density, and vertical variability) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\n6_Geotiff_export_all.ipynb: Rasterizes the extracted features of openness (here pulse penetration ratio) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\nMore general information about the Laserfarm workflow can be found in the user manual (https://laserfarm.readthedocs.io/en/latest/) and on GitHub (https://github.com/eEcoLiDAR/Laserfarm). The current version of Laserfarm is available from PyPI (https://pypi.org/project/laserfarm/) or Zenodo (https://doi.org/10.5281/zenodo.3842780). An example of a country-wide dataset that has been produced with the Laserfarm workflow has been published in a data paper (https://doi.org/10.1016/j.dib.2022.108798). Additional information on the Jupyter Notebooks provided here is available from a Zenodo repository (https://doi.org/10.5281/zenodo.14745309).\r\n","organization":"Laserfarm applications to European demonstration sites","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1298?version=1","name":"Version 1","author":["W. Daniel Kissling","Jinhu Wang"],"descriptor_type":[]}]},{"id":"1299","url":"https://workflowhub.eu/workflows/1299","name":"Laserfarm Jupyter Notebooks for Knepp Estate","description":"Laserfarm (https://doi.org/10.1016/j.ecoinf.2022.101836) is a high-throughput workflow for generating geospatial data products of ecosystem structure using LiDAR point clouds from national or regional airborne laser scanning (ALS) surveys. The workflow example here shows the application of the Laserfarm workflow to the 5.55 km2 large Knepp Estate in the United Kingdom (0.3758 W, 50.969859 N). The work has been performed in the context of the EU project MAMBO (Modern Approaches to the Monitoring of Biоdiversity, https://doi.org/10.3897/rio.9.e116951), in which the automated execution of workflows for habitat condition metrics from LiDAR is tested for EU habitat monitoring. In this context, the Laserfarm workflow has been applied to a number of MAMBO demonstration sites from different European countries (Denmark, France, Netherlands, United Kingdom, Malta). For each demonstration site, the Jupyter Notebooks for the Laserfarm workflow, the derived data products (GeoTIFF files) and their visualization (maps in PDF format), and the study site boundaries (shapefiles) are stored in a Zenodo repository (https://doi.org/10.5281/zenodo.14745309). The Zenodo repository also provides a detailed methodology description. The raw (input) data are stored on a public data repository (https://doi.org/10.48546/workflowhub.datafile.5.1). The data of vegetation structure metrics from each demonstration site have been described in an accompanying data paper (https://doi.org/10.1016/j.dib.2025.111548). \r\n\r\nHere on WorkflowHub, the following Jupyter Notebooks are provided to show how the Laserfarm workflow has been implemented for Knepp Estate:\r\n\r\n1_Retiling.ipynb: Re-tiles the raw LiDAR point clouds into smaller chunks for further efficient, scalable and distributed processing. The raw LiDAR point clouds were accessed from a national LiDAR repository and subsequently clipped to the boundaries of the study area (here Knepp Estate). The Jupyter Notebook requires to define a regular grid for re-tiling and a spatial resolution (tile mesh size) for the final output of the workflow (GeoTIFF files). The regular grid is specified with the minimum and maximum of the X and Y coordinates (min_x, max_x, min_y, max_y) of the bounding box around the region of interest and the number of tiles (n_tiles_side) along the side of the bounding box. The specifications of the re-tiling grid are accessed through a text file in the Jupyter Notebook (‘grids.txt’).\r\n\r\n2_Normalization.ipynb: Normalizes the point cloud heights (z-values) relative to the terrain surface by calculating the normalized height for each individual point as the height relative to the lowest point within a grid cell. Requires defining a spatial resolution of the grid cell size for normalization (here 1 m).\r\n\r\n3_Feature_extraction_veg.ipynb: Calculates LiDAR metrics (‘features’) with vegetation points, e.g. related to vegetation height, density, and vertical variability. Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m), the list of features, and the ASPRS standard point classes which are considered to be vegetation points.\r\n\r\n4_Feature_extraction_all.ipynb: Calculates LiDAR metrics (‘features’) of openness which use all points (not only vegetation points), namely the pulse penetration ratio (i.e. the ratio of the number of ground points to the total number of points within a grid cell). Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m) and the list of features (here only the pulse penetration ratio).\r\n\r\n5_Geotiff_export_veg.ipynb: Rasterizes the extracted features of vegetation (e.g. related to vegetation height, density, and vertical variability) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\n6_Geotiff_export_all.ipynb: Rasterizes the extracted features of openness (here pulse penetration ratio) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\nMore general information about the Laserfarm workflow can be found in the user manual (https://laserfarm.readthedocs.io/en/latest/) and on GitHub (https://github.com/eEcoLiDAR/Laserfarm). The current version of Laserfarm is available from PyPI (https://pypi.org/project/laserfarm/) or Zenodo (https://doi.org/10.5281/zenodo.3842780). An example of a country-wide dataset that has been produced with the Laserfarm workflow has been published in a data paper (https://doi.org/10.1016/j.dib.2022.108798). Additional information on the Jupyter Notebooks provided here is available from a Zenodo repository (https://doi.org/10.5281/zenodo.14745309).\r\n","organization":"Laserfarm applications to European demonstration sites","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1299?version=1","name":"Version 1","author":["W. Daniel Kissling","Jinhu Wang"],"descriptor_type":[]}]},{"id":"1300","url":"https://workflowhub.eu/workflows/1300","name":"Laserfarm Jupyter Notebooks for Mols Bjerge National Park","description":"Laserfarm (https://doi.org/10.1016/j.ecoinf.2022.101836) is a high-throughput workflow for generating geospatial data products of ecosystem structure using LiDAR point clouds from national or regional airborne laser scanning (ALS) surveys. The workflow example here shows the application of the Laserfarm workflow to the 1.29 km2 large Mols Bjerge National Park in Denmark (10.478393 E, 56.289050 N). The work has been performed in the context of the EU project MAMBO (Modern Approaches to the Monitoring of Biоdiversity, https://doi.org/10.3897/rio.9.e116951), in which the automated execution of workflows for habitat condition metrics from LiDAR is tested for EU habitat monitoring. In this context, the Laserfarm workflow has been applied to a number of MAMBO demonstration sites from different European countries (Denmark, France, Netherlands, United Kingdom, Malta). For each demonstration site, the Jupyter Notebooks for the Laserfarm workflow, the derived data products (GeoTIFF files) and their visualization (maps in PDF format), and the study site boundaries (shapefiles) are stored in a Zenodo repository (https://doi.org/10.5281/zenodo.14745309). The Zenodo repository also provides a detailed methodology description. The raw (input) data are stored on a public data repository (https://doi.org/10.48546/workflowhub.datafile.5.1). The data of vegetation structure metrics from each demonstration site have been described in an accompanying data paper (https://doi.org/10.1016/j.dib.2025.111548).\r\n\r\nHere on WorkflowHub, the following Jupyter Notebooks are provided to show how the Laserfarm workflow has been implemented for Mols Bjerge National Park:\r\n\r\n1_Retiling.ipynb: Re-tiles the raw LiDAR point clouds into smaller chunks for further efficient, scalable and distributed processing. The raw LiDAR point clouds were accessed from a national LiDAR repository and subsequently clipped to the boundaries of the study area (here Mols Bjerge National Park). The Jupyter Notebook requires to define a regular grid for re-tiling and a spatial resolution (tile mesh size) for the final output of the workflow (GeoTIFF files). The regular grid is specified with the minimum and maximum of the X and Y coordinates (min_x, max_x, min_y, max_y) of the bounding box around the region of interest and the number of tiles (n_tiles_side) along the side of the bounding box. The specifications of the re-tiling grid are accessed through a text file in the Jupyter Notebook (‘grids.txt’).\r\n\r\n2_Normalization.ipynb: Normalizes the point cloud heights (z-values) relative to the terrain surface by calculating the normalized height for each individual point as the height relative to the lowest point within a grid cell. Requires defining a spatial resolution of the grid cell size for normalization (here 1 m).\r\n\r\n3_Feature_extraction_veg.ipynb: Calculates LiDAR metrics (‘features’) with vegetation points, e.g. related to vegetation height, density, and vertical variability. Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m), the list of features, and the ASPRS standard point classes which are considered to be vegetation points.\r\n\r\n4_Feature_extraction_all.ipynb: Calculates LiDAR metrics (‘features’) of openness which use all points (not only vegetation points), namely the pulse penetration ratio (i.e. the ratio of the number of ground points to the total number of points within a grid cell). Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m) and the list of features (here only the pulse penetration ratio).\r\n\r\n5_Geotiff_export_veg.ipynb: Rasterizes the extracted features of vegetation (e.g. related to vegetation height, density, and vertical variability) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\n6_Geotiff_export_all.ipynb: Rasterizes the extracted features of openness (here pulse penetration ratio) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\nMore general information about the Laserfarm workflow can be found in the user manual (https://laserfarm.readthedocs.io/en/latest/) and on GitHub (https://github.com/eEcoLiDAR/Laserfarm). The current version of Laserfarm is available from PyPI (https://pypi.org/project/laserfarm/) or Zenodo (https://doi.org/10.5281/zenodo.3842780). An example of a country-wide dataset that has been produced with the Laserfarm workflow has been published in a data paper (https://doi.org/10.1016/j.dib.2022.108798). Additional information on the Jupyter Notebooks provided here is available from a Zenodo repository (https://doi.org/10.5281/zenodo.14745309).\r\n","organization":"Laserfarm applications to European demonstration sites","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1300?version=1","name":"Version 1","author":["W. Daniel Kissling","Jinhu Wang"],"descriptor_type":[]}]},{"id":"1301","url":"https://workflowhub.eu/workflows/1301","name":"Laserfarm Jupyter Notebooks for Monks Wood","description":"Laserfarm (https://doi.org/10.1016/j.ecoinf.2022.101836) is a high-throughput workflow for generating geospatial data products of ecosystem structure using LiDAR point clouds from national or regional airborne laser scanning (ALS) surveys. The workflow example here shows the application of the Laserfarm workflow to the 0.08 km2 large Monks Wood in the United Kingdom (10.478393 E, 56.289050 N). The work has been performed in the context of the EU project MAMBO (Modern Approaches to the Monitoring of Biоdiversity, https://doi.org/10.3897/rio.9.e116951), in which the automated execution of workflows for habitat condition metrics from LiDAR is tested for EU habitat monitoring. In this context, the Laserfarm workflow has been applied to a number of MAMBO demonstration sites from different European countries (Denmark, France, Netherlands, United Kingdom, Malta). For each demonstration site, the Jupyter Notebooks for the Laserfarm workflow, the derived data products (GeoTIFF files) and their visualization (maps in PDF format), and the study site boundaries (shapefiles) are stored in a Zenodo repository (https://doi.org/10.5281/zenodo.14745309). The Zenodo repository also provides a detailed methodology description. The raw (input) data are stored on a public data repository (https://doi.org/10.48546/workflowhub.datafile.5.1). The data of vegetation structure metrics from each demonstration site have been described in an accompanying data paper (https://doi.org/10.1016/j.dib.2025.111548). \r\n\r\nHere on WorkflowHub, the following Jupyter Notebooks are provided to show how the Laserfarm workflow has been implemented for Monks Wood:\r\n\r\n1_Retiling.ipynb: Re-tiles the raw LiDAR point clouds into smaller chunks for further efficient, scalable and distributed processing. The raw LiDAR point clouds were accessed from a national LiDAR repository and subsequently clipped to the boundaries of the study area (here Monks Wood). The Jupyter Notebook requires to define a regular grid for re-tiling and a spatial resolution (tile mesh size) for the final output of the workflow (GeoTIFF files). The regular grid is specified with the minimum and maximum of the X and Y coordinates (min_x, max_x, min_y, max_y) of the bounding box around the region of interest and the number of tiles (n_tiles_side) along the side of the bounding box. The specifications of the re-tiling grid are accessed through a text file in the Jupyter Notebook (‘grids.txt’).\r\n\r\n2_Normalization.ipynb: Normalizes the point cloud heights (z-values) relative to the terrain surface by calculating the normalized height for each individual point as the height relative to the lowest point within a grid cell. Requires defining a spatial resolution of the grid cell size for normalization (here 1 m).\r\n\r\n3_Feature_extraction_veg.ipynb: Calculates LiDAR metrics (‘features’) with vegetation points, e.g. related to vegetation height, density, and vertical variability. Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m), the list of features, and the ASPRS standard point classes which are considered to be vegetation points.\r\n\r\n4_Feature_extraction_all.ipynb: Calculates LiDAR metrics (‘features’) of openness which use all points (not only vegetation points), namely the pulse penetration ratio (i.e. the ratio of the number of ground points to the total number of points within a grid cell). Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m) and the list of features (here only the pulse penetration ratio).\r\n\r\n5_Geotiff_export_veg.ipynb: Rasterizes the extracted features of vegetation (e.g. related to vegetation height, density, and vertical variability) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\n6_Geotiff_export_all.ipynb: Rasterizes the extracted features of openness (here pulse penetration ratio) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\nMore general information about the Laserfarm workflow can be found in the user manual (https://laserfarm.readthedocs.io/en/latest/) and on GitHub (https://github.com/eEcoLiDAR/Laserfarm). The current version of Laserfarm is available from PyPI (https://pypi.org/project/laserfarm/) or Zenodo (https://doi.org/10.5281/zenodo.3842780). An example of a country-wide dataset that has been produced with the Laserfarm workflow has been published in a data paper (https://doi.org/10.1016/j.dib.2022.108798). Additional information on the Jupyter Notebooks provided here is available from a Zenodo repository (https://doi.org/10.5281/zenodo.14745309).\r\n\r\n","organization":"Laserfarm applications to European demonstration sites","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1301?version=1","name":"Version 1","author":["W. Daniel Kissling","Jinhu Wang"],"descriptor_type":[]}]},{"id":"1302","url":"https://workflowhub.eu/workflows/1302","name":"Laserfarm Jupyter Notebooks for Oostvaardersplassen","description":"Laserfarm (https://doi.org/10.1016/j.ecoinf.2022.101836) is a high-throughput workflow for generating geospatial data products of ecosystem structure using LiDAR point clouds from national or regional airborne laser scanning (ALS) surveys. The workflow example here shows the application of the Laserfarm workflow to the 54 km2 large Oostvaardersplassen in the Netherlands (5.418842 E, 52.456870 N). The work has been performed in the context of the EU project MAMBO (Modern Approaches to the Monitoring of Biоdiversity, https://doi.org/10.3897/rio.9.e116951), in which the automated execution of workflows for habitat condition metrics from LiDAR is tested for EU habitat monitoring. In this context, the Laserfarm workflow has been applied to a number of MAMBO demonstration sites from different European countries (Denmark, France, Netherlands, United Kingdom, Malta). For each demonstration site, the Jupyter Notebooks for the Laserfarm workflow, the derived data products (GeoTIFF files) and their visualization (maps in PDF format), and the study site boundaries (shapefiles) are stored in a Zenodo repository (https://doi.org/10.5281/zenodo.14745309). The Zenodo repository also provides a detailed methodology description. The raw (input) data are stored on a public data repository (https://doi.org/10.48546/workflowhub.datafile.5.1). The data of vegetation structure metrics from each demonstration site have been described in an accompanying data paper (https://doi.org/10.1016/j.dib.2025.111548).  \r\n\r\nHere on WorkflowHub, the following Jupyter Notebooks are provided to show how the Laserfarm workflow has been implemented for Oostvaardersplassen:\r\n\r\n1_Retiling.ipynb: Re-tiles the raw LiDAR point clouds into smaller chunks for further efficient, scalable and distributed processing. The raw LiDAR point clouds were accessed from a national LiDAR repository and subsequently clipped to the boundaries of the study area (here Oostvaardersplassen). The Jupyter Notebook requires to define a regular grid for re-tiling and a spatial resolution (tile mesh size) for the final output of the workflow (GeoTIFF files). The regular grid is specified with the minimum and maximum of the X and Y coordinates (min_x, max_x, min_y, max_y) of the bounding box around the region of interest and the number of tiles (n_tiles_side) along the side of the bounding box. The specifications of the re-tiling grid are accessed through a text file in the Jupyter Notebook (‘grids.txt’).\r\n\r\n2_Normalization.ipynb: Normalizes the point cloud heights (z-values) relative to the terrain surface by calculating the normalized height for each individual point as the height relative to the lowest point within a grid cell. Requires defining a spatial resolution of the grid cell size for normalization (here 1 m).\r\n\r\n3_Feature_extraction_veg.ipynb: Calculates LiDAR metrics (‘features’) with vegetation points, e.g. related to vegetation height, density, and vertical variability. Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m), the list of features, and the ASPRS standard point classes which are considered to be vegetation points.\r\n\r\n4_Feature_extraction_all.ipynb: Calculates LiDAR metrics (‘features’) of openness which use all points (not only vegetation points), namely the pulse penetration ratio (i.e. the ratio of the number of ground points to the total number of points within a grid cell). Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m) and the list of features (here only the pulse penetration ratio).\r\n\r\n5_Geotiff_export_veg.ipynb: Rasterizes the extracted features of vegetation (e.g. related to vegetation height, density, and vertical variability) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\n6_Geotiff_export_all.ipynb: Rasterizes the extracted features of openness (here pulse penetration ratio) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\nMore general information about the Laserfarm workflow can be found in the user manual (https://laserfarm.readthedocs.io/en/latest/) and on GitHub (https://github.com/eEcoLiDAR/Laserfarm). The current version of Laserfarm is available from PyPI (https://pypi.org/project/laserfarm/) or Zenodo (https://doi.org/10.5281/zenodo.3842780). An example of a country-wide dataset that has been produced with the Laserfarm workflow has been published in a data paper (https://doi.org/10.1016/j.dib.2022.108798). Additional information on the Jupyter Notebooks provided here is available from a Zenodo repository (https://doi.org/10.5281/zenodo.14745309).\r\n","organization":"Laserfarm applications to European demonstration sites","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1302?version=1","name":"Version 1","author":["W. Daniel Kissling","Jinhu Wang"],"descriptor_type":[]}]},{"id":"1303","url":"https://workflowhub.eu/workflows/1303","name":"Laserfarm Jupyter Notebooks for Salisbury Plain","description":"Laserfarm (https://doi.org/10.1016/j.ecoinf.2022.101836) is a high-throughput workflow for generating geospatial data products of ecosystem structure using LiDAR point clouds from national or regional airborne laser scanning (ALS) surveys. The workflow example here shows the application of the Laserfarm workflow to the 7.95 km2 large Salisbury Plain in the United Kingdom (1.866189 W, 51.220814 N). The work has been performed in the context of the EU project MAMBO (Modern Approaches to the Monitoring of Biоdiversity, https://doi.org/10.3897/rio.9.e116951), in which the automated execution of workflows for habitat condition metrics from LiDAR is tested for EU habitat monitoring. In this context, the Laserfarm workflow has been applied to a number of MAMBO demonstration sites from different European countries (Denmark, France, Netherlands, United Kingdom, Malta). For each demonstration site, the Jupyter Notebooks for the Laserfarm workflow, the derived data products (GeoTIFF files) and their visualization (maps in PDF format), and the study site boundaries (shapefiles) are stored in a Zenodo repository (https://doi.org/10.5281/zenodo.14745309). The Zenodo repository also provides a detailed methodology description. The raw (input) data are stored on a public data repository (https://doi.org/10.48546/workflowhub.datafile.5.1).  The data of vegetation structure metrics from each demonstration site have been described in an accompanying data paper (https://doi.org/10.1016/j.dib.2025.111548).\r\n\r\nHere on WorkflowHub, the following Jupyter Notebooks are provided to show how the Laserfarm workflow has been implemented for Salisbury Plain:\r\n\r\n1_Retiling.ipynb: Re-tiles the raw LiDAR point clouds into smaller chunks for further efficient, scalable and distributed processing. The raw LiDAR point clouds were accessed from a national LiDAR repository and subsequently clipped to the boundaries of the study area (here Salisbury Plain). The Jupyter Notebook requires to define a regular grid for re-tiling and a spatial resolution (tile mesh size) for the final output of the workflow (GeoTIFF files). The regular grid is specified with the minimum and maximum of the X and Y coordinates (min_x, max_x, min_y, max_y) of the bounding box around the region of interest and the number of tiles (n_tiles_side) along the side of the bounding box. The specifications of the re-tiling grid are accessed through a text file in the Jupyter Notebook (‘grids.txt’).\r\n\r\n2_Normalization.ipynb: Normalizes the point cloud heights (z-values) relative to the terrain surface by calculating the normalized height for each individual point as the height relative to the lowest point within a grid cell. Requires defining a spatial resolution of the grid cell size for normalization (here 1 m).\r\n\r\n3_Feature_extraction_veg.ipynb: Calculates LiDAR metrics (‘features’) with vegetation points, e.g. related to vegetation height, density, and vertical variability. Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m), the list of features, and the ASPRS standard point classes which are considered to be vegetation points.\r\n\r\n4_Feature_extraction_all.ipynb: Calculates LiDAR metrics (‘features’) of openness which use all points (not only vegetation points), namely the pulse penetration ratio (i.e. the ratio of the number of ground points to the total number of points within a grid cell). Requires defining the spatial resolution (tile mesh size) for the metric calculation (here 10 m) and the list of features (here only the pulse penetration ratio).\r\n\r\n5_Geotiff_export_veg.ipynb: Rasterizes the extracted features of vegetation (e.g. related to vegetation height, density, and vertical variability) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\n6_Geotiff_export_all.ipynb: Rasterizes the extracted features of openness (here pulse penetration ratio) and exports them as raster layers (here GeoTIFF format). Requires defining a country-specific code of the coordinate reference system (EPSG code) which is loaded with a text file (‘epsgs.txt’).\r\n\r\nMore general information about the Laserfarm workflow can be found in the user manual (https://laserfarm.readthedocs.io/en/latest/) and on GitHub (https://github.com/eEcoLiDAR/Laserfarm). The current version of Laserfarm is available from PyPI (https://pypi.org/project/laserfarm/) or Zenodo (https://doi.org/10.5281/zenodo.3842780). An example of a country-wide dataset that has been produced with the Laserfarm workflow has been published in a data paper (https://doi.org/10.1016/j.dib.2022.108798). Additional information on the Jupyter Notebooks provided here is available from a Zenodo repository (https://doi.org/10.5281/zenodo.14745309).\r\n","organization":"Laserfarm applications to European demonstration sites","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1303?version=1","name":"Version 1","author":["W. Daniel Kissling","Jinhu Wang"],"descriptor_type":[]}]},{"id":"1304","url":"https://workflowhub.eu/workflows/1304","name":"Provenance generation when running SUNSET with Autosubmit","description":"This workflow demonstrates the integration of FAIR principles into the workflow management ecosystem through provenance integration in Autosubmit, a workflow manager developed at the Barcelona Supercomputing Center (BSC), and SUNSET (SUbseasoNal to decadal climate forecast post-processing and aSSEssmenT suite), an R-based verification workflow also developed at BSC.\r\n\r\nAutosubmit supports the generation of data provenance information based on RO-Crate, facilitating the creation of machine-actionable digital objects that encapsulate detailed metadata about its executions. However, the provenance metadata provided by Autosubmit focuses on the workflow process and does not encapsulate the details of the data transformation processes. This is where SUNSET plays a complementary role. SUNSET’s approach to provenance information is based on the METACLIP (METAdata for CLImate Products) ontologies. METACLIP offers a semantic approach to describing climate products and their provenance. This framework enables SUNSET to provide specific, high-resolution provenance metadata for its operations, improving transparency and compliance with FAIR principles. The generated files provide detailed information about each transformation the data has undergone, as well as additional details about the data's state, location, structure, and associated source code, all represented in a tree-like structure.\r\n\r\nThe workflow uses a SUNSET configuration file, referred to as a \"recipe,\" to generate a set of JSON files containing the provenance information of the workflow execution based on the METACLIP ontologies. For this, we compute some skill metrics and scorecard plots with SUNSET, using Autosubmit to dispatch jobs in parallel. In the recipe, we request three start dates for January, February, and March (0101, 0201, 0301). SUNSET will split the recipe into three atomic recipes, and Autosubmit will run three jobs, processing the verification for each recipe in parallel. When all the scorecards are generated, the \"transfer_provenance\" job will be triggered, transferring the SUNSET-generated provenance files to the Autosubmit experiment folder. Finally, an RO-Crate object will be created, encapsulating the entire process description.\r\n\r\nCurrently, this workflow can only be executed within the BSC infrastructure. Here is the complete use case: [Use Case Documentation](https://earth.bsc.es/gitlab/es/sunset/-/blob/Dev-Provenance/use_cases/ex1_4_provenance_autosubmit/ex1_4-handson.md)\r\n\r\nThe METACLIP-based JSON files can be interactively visualized using the [METACLIP Interpreter.](http://metaclip.org)","organization":"BSC-CES","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1304?version=1","name":"Version 1","author":["Albert Puiggros"],"descriptor_type":[]}]},{"id":"1305","url":"https://workflowhub.eu/workflows/1305","name":"PVGA","description":"PVGA is a powerful virus-focused assembler that does both assembly and polishing. For virus genomes, small changes will lead to significant differences in terms of viral function and pathogenicity. Thus, for virus-focused assemblers, high-accuracy results are crucial. Our approach heavily depends on the input reads as evidence to produce the reported genome. It first adopts a reference genome to start with. We then align all the reads against the reference genome to get an alignment graph. After that, we use a dynamic programming algorithm to compute a path with the maximum weight of edges supported by reads. Most importantly, the obtained path is used as the new reference genome and the process is repeated until no further improvement is possible.","organization":"Virus sequencing team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1305?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1306","url":"https://workflowhub.eu/workflows/1306","name":"tcga-data-nf","description":"Workflow to download and prepare TCGA data.\r\n\r\nThe workflow divides the process of generating Gene Regulatory networks from TCGA cancer data in three steps:\r\n\r\n1. Downloading the raw data from GDC and saving the rds/tables needed later\r\n2. Preparing the data. This step includes filtering the data, normalizing it...\r\n3. Analysis of gene regulatory networks","organization":"QuackenbushLab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1306?version=1","name":"main @ b7cb7f8","author":["Viola Fanfani"],"descriptor_type":["NFL"]}]},{"id":"1307","url":"https://workflowhub.eu/workflows/1307","name":"nf-core/variantbenchmarking","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-variantbenchmarking_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/variantbenchmarking\" src=\"docs/images/nf-core-variantbenchmarking_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/variantbenchmarking/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/variantbenchmarking/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/variantbenchmarking/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/variantbenchmarking/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/variantbenchmarking/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14916661-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14916661)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/variantbenchmarking)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23variantbenchmarking-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/variantbenchmarking)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/variantbenchmarking** is designed to evaluate and validate the accuracy of variant calling methods in genomic research. Initially, the pipeline is tuned well for available gold standard truth sets (for example, Genome in a Bottle and SEQC2 samples) but it can be used to compare any two variant calling results. The workflow provides benchmarking tools for small variants including SNVs and INDELs, Structural Variants (SVs) and Copy Number Variations (CNVs) for germline and somatic analysis.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\n\u003cp align=\"center\"\u003e\n    \u003cimg title=\"variantbenchmarking metro map\" src=\"docs/images/variantbenchmarking_metromap.png\" width=100%\u003e\n\u003c/p\u003e\n\nThe workflow involves several key processes to ensure reliable and reproducible results as follows:\n\n### Standardization and normalization of variants:\n\nThis initial step ensures consistent formatting and alignment of variants in test and truth VCF files for accurate comparison.\n\n- Subsample if input test vcf is multisample ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n- Homogenization of multi-allelic variants, MNPs and SVs (including imprecise paired breakends and single breakends) ([variant-extractor](https://github.com/EUCANCan/variant-extractor))\n- Reformatting test VCF files from different SV callers ([svync](https://github.com/nvnieuwk/svync))\n- Rename sample names in test and truth VCF files ([bcftools reheader](https://samtools.github.io/bcftools/bcftools.html#reheader))\n- Splitting multi-allelic variants in test and truth VCF files ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Deduplication of variants in test and truth VCF files ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Left aligning of variants in test and truth VCF files ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Use prepy in order to normalize test files. This option is only applicable for happy benchmarking of germline analysis ([prepy](https://github.com/Illumina/hap.py/tree/master))\n- Split SNVs and indels if the given test VCF contains both. This is only applicable for somatic analysis ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n\n### Filtering options:\n\nApplying filtering on the process of benchmarking itself might makes it impossible to compare different benchmarking strategies. Therefore, for whom like to compare benchmarking methods this subworkflow aims to provide filtering options for variants.\n\n- Filtration of contigs ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n- Include or exclude SNVs and INDELs ([bcftools filter](https://samtools.github.io/bcftools/bcftools.html#filter))\n- Size and quality filtering for SVs ([SURVIVOR filter](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n\n### Liftover of vcfs:\n\nThis sub-workflow provides option to convert genome coordinates of truth VCF and test VCFs and high confidence BED file to a new assembly. Golden standard truth files are build upon specific reference genomes which makes the necessity of lifting over depending on the test VCF in query. Lifting over one or more test VCFs is also possible.\n\n- Create sequence dictionary for the reference ([picard CreateSequenceDictionary](https://gatk.broadinstitute.org/hc/en-us/articles/360037068312-CreateSequenceDictionary-Picard)). This file can be saved and reused.\n- Lifting over VCFs ([picard LiftoverVcf](https://gatk.broadinstitute.org/hc/en-us/articles/360037060932-LiftoverVcf-Picard))\n- Lifting over high confidence coordinates ([UCSC liftover](http://hgdownload.cse.ucsc.edu/admin/exe))\n\n### Statistical inference of input test and truth variants:\n\nThis step provides insights into the distribution of variants before benchmarking by extracting variant statistics:.\n\n- SNVs, INDELs and complex variants ([bcftools stats](https://samtools.github.io/bcftools/bcftools.html#stats))\n- SVs by type ([SURVIVOR stats](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n\n### Benchmarking of variants:\n\nActual benchmarking of variants are split between SVs and small variants:\n\nAvailable methods for germline and somatic _structural variant (SV)_ benchmarking are:\n\n- Truvari ([truvari bench](https://github.com/acenglish/truvari/wiki/bench))\n- SVanalyzer ([svanalyzer benchmark](https://github.com/nhansen/SVanalyzer/blob/master/docs/svbenchmark.rst))\n- Rtgtools (only for BND) ([rtg bndeval](https://realtimegenomics.com/products/rtg-tools))\n\n\u003e [!NOTE]\n\u003e Please note that there is no somatic specific tool for SV benchmarking in this pipeline.\n\nAvailable methods for germline and somatic _CNVs (copy number variations)_ are:\n\n- Truvari ([truvari bench](https://github.com/acenglish/truvari/wiki/bench))\n- Wittyer ([witty.er](https://github.com/Illumina/witty.er/tree/master))\n- Intersection ([bedtools intersect](https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html))\n\n\u003e [!NOTE]\n\u003e Please note that there is no somatic specific tool for CNV benchmarking in this pipeline.\n\nAvailable methods for *small variants: SNVs and INDEL*s:\n\n- Germline variant benchmarking using ([rtg vcfeval](https://realtimegenomics.com/products/rtg-tools))\n- Germline variant benchmarking using ([hap.py](https://github.com/Illumina/hap.py/blob/master/doc/happy.md))\n- Somatic variant benchmarking using ([rtg vcfeval --squash-ploidy](https://realtimegenomics.com/products/rtg-tools))\n- Somatic variant benchmarking using ([som.py](https://github.com/Illumina/hap.py/tree/master?tab=readme-ov-file#sompy))\n\n\u003e [!NOTE]\n\u003e Please note that using happ.py and som.py with rtgtools as comparison engine is also possible. Check conf/tests/test_ga4gh.config as an example.\n\n### Intersection of benchmark regions:\n\nIntersecting test and truth BED regions produces benchmark metrics. Intersection analysis is especially recommended for _CNV benchmarking_ where result reports may variate per tool.\n\n- Convert SV or CNV VCF file to BED file, if no regions file is provided for test case using ([SVTK vcf2bed](https://github.com/broadinstitute/gatk-sv/blob/main/src/svtk/scripts/svtk))\n- Convert VCF file to BED file, if no regions file is provided for test case using ([Bedops convert2bed](https://bedops.readthedocs.io/en/latest/content/reference/file-management/conversion/convert2bed.html#convert2bed))\n- Intersect the regions and gether benchmarking statistics using ([bedtools intersect](https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html))\n\n### Comparison of benchmarking results per TP, FP and FN files\n\nIt is essential to compare benchmarking results in order to infer uniquely or commonly seen TPs, FPs and FNs.\n\n- Merging TP, FP and FN results for happy, rtgtools and sompy ([bcftools merge](https://samtools.github.io/bcftools/bcftools.html#merge))\n- Merging TP, FP and FN results for Truvari and SVanalyzer ([SURVIVOR merge](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n- Conversion of VCF files to CSV to infer common and unique variants per caller (python script)\n\n### Reporting of benchmark results\n\nThe generation of comprehensive report that consolidates all benchmarking results.\n\n- Merging summary statistics per benchmarking tool (python script)\n- Plotting benchmark metrics per benchmarking tool (R script)\n- Create visual HTML report for the integration of NCBENCH ([datavzrd](https://datavzrd.github.io/docs/index.html))\n- Apply _MultiQC_ to visualize results\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,test_vcf,caller\ntest1,test1.vcf.gz,delly\ntest2,test2.vcf,gatk\ntest3,test3.vcf.gz,cnvkit\n```\n\nEach row represents a vcf file (test-query file). For each vcf file and variant calling method (caller) have to be defined.\n\nUser _has to provide truth_vcf and truth_id in config files_.\n\n\u003e [!NOTE]\n\u003e There are publicly available truth sources. For germline analysis, it is common to use [genome in a bottle (GiAB)](https://www.nist.gov/programs-projects/genome-bottle) variants. There are variate type of golden truths and high confidence regions for hg37 and hg38 references. Please select and use carefully.\n\u003e For somatic analysis, [SEQC2 project](https://sites.google.com/view/seqc2/home/data-analysis/high-confidence-somatic-snv-and-indel-v1-2) released SNV, INDEL and CNV regions. One, can select and use those files.\n\nHere you can find example combinations of [truth files](docs/truth.md)\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/variantbenchmarking/usage) and the [parameter documentation](https://nf-co.re/variantbenchmarking/parameters).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/variantbenchmarking \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e \\\n   --genome GRCh37 \\\n   --analysis germline \\\n   --truth_id HG002 \\\n   --truth_vcf truth.vcf.gz\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\u003e Conda profile is not available for SVanalyzer (SVBenchmark) tool, if you are planing to use the tool either choose docker or singularity.\n\n### Example usages\n\nThis pipeline enables quite a number of subworkflows suitable for different benchmarking senarios. Please go through [this documentation](docs/testcases.md) to learn some example usages which discusses about the test config files under conf/tests and tests/.\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/variantbenchmarking/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/variantbenchmarking/output).\n\nThis pipeline outputs benchmarking results per method besides to the inferred and compared statistics.\n\n## Credits\n\nnf-core/variantbenchmarking was originally written by Kübra Narcı ([@kubranarci](https://github.com/kubranarci)) as a part of benchmarking studies in German Human Genome Phenome Archieve Project ([GHGA](https://www.ghga.de/)).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- Nicolas Vannieuwkerke ([@nvnienwk](https://github.com/nvnieuwk)),\n- Maxime Garcia ([@maxulysse](https://github.com/maxulysse)),\n- Sameesh Kher ([@khersameesh24](https://github.com/khersameesh24))\n- Florian Heyl ([@heylf](https://github.com/heyl))\n- Krešimir Beštak ([@kbestak](https://github.com/kbestak))\n- Elad Herz ([@EladH1](https://github.com/EladH1))\n\n## Acknowledgements\n\n\u003ca href=\"https://www.ghga.de/\"\u003e\n  \u003cimg src=\"docs/images/GHGA_short_Logo_orange.png\" alt=\"GHGA\" width=\"200\"/\u003e\n\u003c/a\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#variantbenchmarking` channel](https://nfcore.slack.com/channels/variantbenchmarking) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/variantbenchmarking for your analysis, please cite it using the following doi: [110.5281/zenodo.14916661](https://doi.org/10.5281/zenodo.14916661)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1307?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1307?version=2","name":"1.1.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1307?version=3","name":"1.2.0","author":[],"descriptor_type":["NFL"]},{"id":"4","url":"https://workflowhub.eu/workflows/1307?version=4","name":"1.3.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1308","url":"https://workflowhub.eu/workflows/1308","name":"Settlement Delineation and Analysis","description":"# Description\r\nThe Settlement Delineation and Analysis (SDA) workflows generates a settlement network from geospatial settlement data. It can process _geotiff_ and _shapefile_ inputs and was originally designed to operate on the [World Settlement Footprint](https://geoservice.dlr.de/web/maps/eoc:wsf2019) dataset. Through multiple workflow stages, a settlement network is constructed, contracted (i.e. clustered) and ultimately analysed with centrality measures. The output _shapefile_ stores the aggregated settlement (multi-)polygons and their centrality values in fields. Optionally, the edges of the settlement graph can be visualized in a separate file as well.\r\n# Implementation\r\nThe workflow comprises four main stages, each implemented as a standalone executable, with the interface wrapped in a CWL definition file.\r\n* **Filter**: At first, the raster input is polygonized, and settlements not fulfilling the user-specified filter condition are dropped\r\n* **Neighbours**:  The adjacencies of the settlements are computed according to a user-configurable criterion. The graph is stored in a central graph database.\r\n* **Contraction**: The settlement graph is  contracted with edges fulfilling the contraction criterion being removed from the graph and the incident vertices merged. The connectivity to former neighbors of aggregated vertices is restored afterwards. \r\n* **Analysis**: The final stage computes selected centrality measures on the contracted graph and writes the centrality values in the output shapefile. Optionally, this stage can visualize the graph's edges in a separate output file.\r\n\r\nAdditionally, three orchestrational task are needed:\r\n* **Split**: Splits the input _geotiff_ or _shapefile_ to enable concurrent processing.\r\n* **Components**: Identifies connected components in the settlement graph, which are the unit of parallelization for the **Contraction** and **Analysis** stage.\r\n* **Merge**: Merges the output files of the **Analysis** stage into a single file. \r\n\r\nThe **Job Generator** creates CWL jobs for the aformentionend tasks and models their dependencies in a directed acyclic graph (DAG). The **Scheduler** queues the jobs according to the DAG and submits them to an CWL executor, which runs the job in a separate child process. Both the job generator and scheduler are utilized by the workflow's _main_ file (SettlementDelineation.h/cpp). \r\n\r\n# Deployment\r\nTo run the **Settlement Delineation and Analysis** workflow, the easiest way is to use ```SettlementDelineationAnalysis.py```, which wraps the command line interface of the main binary and executes it in a container using _docker_. Please use the latest version from the [Fishnet Repository](https://gitlab2.informatik.uni-wuerzburg.de/descartes/sos/fishnet)\r\n### Software Requirements\r\n- _Docker_\r\n- _Python 3.x_\r\n### Running the Workflow\r\n```\r\npython3 SettlementDelineationAnalysis.py -i INPUT_FILE -c CONFIG_FILE.json -o OUTPUT_FILE.shp\r\n``` \r\n- **Input**: Input GIS file (*GeoTIFF* | *Shapefile*) on settlement location (e.g. [WSF](https://geoservice.dlr.de/web/maps/eoc:wsf2019))\r\n- **Config**: JSON file containing the config for the workflow run (e.g. [Example Config](https://gitlab2.informatik.uni-wuerzburg.de/descartes/sos/fishnet/-/blob/main/app/sda-workflow/sda-docker.json?ref_type=heads) )\r\n- **Output**: Path of output shapefile  \r\n\r\n\r\n_Please refer to Version 1.1 of the Workflow:_ [https://doi.org/10.48546/WORKFLOWHUB.WORKFLOW.1308.2](https://doi.org/10.48546/WORKFLOWHUB.WORKFLOW.1308.2)\r\n","organization":"SOS","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1308?version=1","name":"1.0","author":["Lorenz Gruber"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1308?version=2","name":"1.1","author":["Lorenz Gruber"],"descriptor_type":[]}]},{"id":"1309","url":"https://workflowhub.eu/workflows/1309","name":"Anti-Cancer potential of a new Derivative of Caffeic Acid Phenethyl Ester targeting the centrosome - DIA-NN data KNIME processing workflow","description":"KNIME workflow describing the analysis of mass spectrometry dataset related to the publication \"Anti-Cancer potential of a new Derivative of Caffeic Acid Phenethyl Ester targeting the centrosome\". Workflow was built using the [KNIME software container environment](https://github.com/OmicsWorkflows/KNIME_docker_vnc/tree/version_4.1.3a), version 4.1.3a, which can be created using \"docker pull cfprot/knime:4.1.3a\" command in Docker. Please consult Github pages for more information on how to use the container.\r\n\r\nBriefly, the KNIME workflow contains the contaminants removal, log2 intensities transformation, data filtering, normalization and statistical evaluation using the limma test.\r\n\r\nThe input data for the KNIME workflow (the report.tsv from DIA-NN) as well as raw LC-MS data can be found on PRIDE repository under the identifier PXD061079. Processed data and figures from the data quality control are located in the \\_\\_outputs\\_\\_ folder, sobfolder tables and figures, respectively.","organization":"Proteomics CEITEC","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1309?version=1","name":"Version 1","author":["Katerina Hanakova","David Potesil"],"descriptor_type":[]}]},{"id":"1314","url":"https://workflowhub.eu/workflows/1314","name":"Galaxy Workflow for CNV Detection with CNVkit and Conversion to Beacon JSON Using cnv-vcf2json","description":"This Galaxy workflow streamlines comprehensive copy number variation (CNV) analysis by integrating CNVkit’s robust detection capabilities with an efficient conversion step using cnv-vcf2json to format results into Beacon JSON. Designed for computational biologists and bioinformaticians, the workflow standardizes CNV identification and output formatting to enhance interoperability with Beacon networks. It is specifically optimized for use with mapped BAM files from the EGAD00001008392 synthetic dataset, leveraging spiked-in variant data from RD-Connect GPAP synthetic data (available at [Zenodo](https://zenodo.org/records/7273767)).","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1314?version=1","name":"Version 1","author":["Khaled Jum'ah","Krzysztof Poterlowicz"],"descriptor_type":["GALAXY"]}]},{"id":"1315","url":"https://workflowhub.eu/workflows/1315","name":"RDM_system_connector","description":"![Lin_X_NFDI4BIOIMAGE](/docs/imgs/lin_x_nfdi4bioimage.png)\r\n# RDM_system_connector\r\n# **WARNING** \r\nThis is a proof of concept, it has not been decided whether it will be developed into a fully functional tool. \r\nFeedback is therefore essential, especially as it is unclear whether this type of tool is useful at all, and if so, which parts, as the concept consists of many different parts.\r\n(source code readme: \r\n- [installation guide and short description](/docs/src_README.md)\r\n- [sphinx code documentation](/docs/_build/html/index.html)\r\n\r\n---\r\n\r\n# Table of Contents:\r\n- [RDM\\_system\\_connector](#rdm_system_connector)\r\n- [**WARNING**](#warning)\r\n- [Table of Contents:](#table-of-contents)\r\n- [RDM system connector](#rdm-system-connector)\r\n\t\t\t- [overview graph](#overview-graph)\r\n\t- [Internal project study registration](#internal-project-study-registration)\r\n\t- [ELN (e.g. RSpace, elabFTW) \\\u0026 inventory](#eln-eg-rspace-elabftw--inventory)\r\n\t- [Omero Image \\\u0026 metadata hub](#omero-image--metadata-hub)\r\n\t- [long-term archive storage](#long-term-archive-storage)\r\n\t- [matching](#matching)\r\n\t- [(fuzzy similarity matching, direct matching, manual\\_linking)](#fuzzy-similarity-matching-direct-matching-manual_linking)\r\n- [long-term vision](#long-term-vision)\r\n\t- [of a connected RDM \\_structure](#of-a-connected-rdm-_structure)\r\n# RDM system connector\r\n\r\n- The purpose of this tool will be to connect different platforms that have been or will be used as part of research data management. \r\n- Every part of the system is replaceable as the connection is the central point of the tool. \r\n- the benefits in day-to-day research result from the cooperation of different stakeholders who work together on a project and do not necessarily have access to the same systems or do not use them in their work process despite having access\r\n- making essential information usable in all connected systems makes it possible to have it available more quickly and clearly\r\n- in the best case scenario, stakeholders receive information that they were previously unable to obtain\r\n[see a real practical example](practical%20example%20lin.md)\r\n\r\n#### overview graph\r\n\r\n```mermaid\r\ngraph TD\r\n    A[project registration] --\u003e B[ELN e.g. RSpace]\r\n    B --\u003e C[Omero hub]\r\n    C --\u003e D[Long-term archive storage]\r\n    A -- matching e.g. fuzzy_similarity_matching --\u003e D\r\n```\r\n\r\n## Internal project study registration\r\n- the main point of this part is that every scientific project has a study registration somewhere\r\n- the registration can be a proposal (e.g. a pdf/text file to apply for a funding programme or a thesis)\r\n- we use a separate platform (egroupware) where people can register their study and book time slots for specific instruments (e.g. MR, EEG, microscopes, computer servers)\r\n## ELN (e.g. RSpace, elabFTW) \u0026 inventory\r\n- a platform where protocols of preparation procedures or plans for procedures can be written\r\n- there should be basic protocols and subject-specific ones (e.g. keeping track of daily events)\r\n- be used to plan and structure the interaction between people working on different parts of a project (e.g. principal investigators set the protocol and delegate work; technical assistants prepare the tissue; doctoral candidates take the images). \r\n\r\n## Omero Image \u0026 metadata hub\r\n- use inplace import to link the images from [long-term_archive_storage](#long-term archive storage) to Omero\r\n- use key-value pairs to display the metadata\r\n- create tags from [(semi-)automatic tag creation](/docs/(semi-)%20automatic%20tag%20creation.md) including tag descriptions from [(semi-)_automatic_description\u0026_ontology_linking_creation](/docs/(semi-)%20automatic%20description%20\u0026%20ontology%20linking%20creation.md)\r\n## long-term archive storage\r\n- crawl a mounted drive to find images, metadata files, projects, studies and add them to [ELN_(e.g._RSpace,_elabFTW)_+_inventory](#ELN%20(e.g.%20RSpace,%20elabFTW)%20\u0026%20inventory) and [Omero_Image_+_metadata_hub](#Omero%20Image%20\u0026%20metadata%20hub)\r\n- use file names, folder names, metadata for [(semi-)_automatic_tag_creation](/docs/(semi-)%20automatic%20tag%20creation.md) and [(semi-)_automatic_description_\u0026_ontology_linking_creation](/docs/(semi-)%20automatic%20description%20\u0026%20ontology%20linking%20creation.md)\r\n## matching \r\n## (fuzzy similarity matching, direct matching, manual_linking)\r\n- **fuzzy** = Calculate the overlap of project names (from [internal_project_study_registration](#internal%20project%20study%20registration) and folder names (from [long-term_archive_storage](#long-term%20archive%20storage)); \r\n\t- where a percentage of overlap of consecutive letters is specified; if the shortest name (either projectname or foldername) is completely contained in the other (~one is the substring of the other), by convention the overlap is set to 100% \r\n- TODO: **direct matching** = define a file (TODO: metadata entry) Define a file or a metadata entry from a file as the project name, which must be identical to that of the study application, character for character; i.e. a 100% match is assumed\r\n\t- e.g. our project leaders have to sign an application letter which is included in [internal_project_study_registration](#internal%20project%20study%20registration) and in [long-term_archive_storage](#long-term%20archive%20storage) for every new project or study. \r\n\t- as both files are identical, the project duration, project manager and project name can be read from them\r\n- TODO: **manual linking** = the linking table in the database could be filled manually to force a specific project/study to be linked to another;  \r\n\t- however, a browser interface is planned to display the automatically generated matches, validate them by eye and create your own\r\n\r\n\r\n# long-term vision\r\n## of a connected RDM _structure\r\n![vision](/docs/imgs/longterm_vision_rdm.png)","organization":"BioImage Informatics and Analysis Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1315?version=1","name":"main @ 287d562","author":[],"descriptor_type":[]}]},{"id":"1316","url":"https://workflowhub.eu/workflows/1316","name":"FAIR Statistics Aggregator for DOIs","description":"![Lin_X_NFDI4BIOIMAGE](/imgs/lin_x_nfdi4bioimage.png)\r\n# FAIR Statistics Aggregator for DOIs\r\n\r\n## Table of Contents\r\n1. [Introduction](#introduction)\r\n2. [Features](#features)\r\n3. [Requirements](#requirements)\r\n4. [Installation](#installation)\r\n5. [Usage](#usage)\r\n6. [Output](#output)\r\n7. [Limitations](#limitations)\r\n8. [License](#license)\r\n\r\n## Introduction\r\nThis repository hosts a prototype tool designed to analyze and aggregate FAIR (Findable, Accessible, Interoperable, and Reusable) statistics for a list of Digital Object Identifiers (DOIs). The tool currently utilizes the F-UJI FAIR checker to evaluate the FAIRness of the metadata associated with each DOI. Future versions aim to incorporate additional FAIR checkers to provide a more comprehensive analysis.\r\n\r\nThe tool processes a list of DOIs, which can be sourced from a website or fetched using a metasearch API like Crossref or DataCite. It calculates FAIR statistics for each DOI, aggregates these statistics by publication year, and identifies common metadata errors that impact FAIRness. The results are presented in an aggregated FAIR-statistic per publication year diagram and a summary of the most frequent metadata issues.\r\n\r\nThis tool also serves as a justification for metadata providers (e.g., Springer, Nature) to ensure their metadata is hosted in a machine-readable format, as this is crucial for optimal FAIRness evaluation.\r\n\r\n**Warning**: The F-UJI FAIR checker must be initialized beforehand using a Docker container. Instructions for setting up the F-UJI checker can be found [here](https://github.com/FAIR-IMPACT/fuji). Please note that F-UJI and other FAIR checkers are in a very early beta status.\r\n\r\n## Features\r\n- **DOI List Processing**: Accepts a list of DOIs from a file or fetched via APIs like Crossref or DataCite.\r\n- **FAIR Evaluation**: Uses the F-UJI FAIR checker to evaluate the FAIRness of each DOI's metadata.\r\n- **Aggregation**: Aggregates FAIR statistics by publication year.\r\n- **Error Summary**: Identifies and summarizes the most common metadata errors affecting FAIRness.\r\n- **Visualization**: Generates an aggregated FAIR-statistic per publication year diagram.\r\n\r\n## Requirements\r\n- Python 3.x\r\n- Docker (for running the F-UJI FAIR checker)\r\n- Required Python packages (listed in `requirements.txt`)\r\n\r\n## Installation\r\n1. Clone the repository:\r\n   ```bash\r\n   git clone https://github.com/saibotmagd/fair_stats_aggregator.git\r\n   cd fair_stats_aggregator\r\n   ```\r\n2. Install the required Python packages:\r\n   ```bash\r\n   pip install -r requirements.txt\r\n   ```\r\n3. Set up the F-UJI FAIR checker (https://github.com/FAIR-IMPACT/fuji) using Docker:\r\n   ```bash\r\n   docker pull fairimpact/fuji\r\n   docker run -d -p 1071:1071 fairimpact/fuji\r\n   ```\r\n\r\n## Usage\r\n1. Prepare a list of DOIs in a text file (one DOI per line) or use an API to fetch DOIs.\r\n2. Run the tool:\r\n   ```bash\r\n   python fair_stats_agg.py --doi-file path/to/doi_list.txt\r\n   ```\r\n   There's an \"example_DOI_list.txt\" including the publications of the Leibniz Institute for Neurobiology Magdeburg.\r\n3. The tool will output the aggregated FAIR statistics and a summary of metadata errors. \r\n\r\n## Output\r\n- **Aggregated FAIR-statistic per Publication Year Diagram**: A visual representation of FAIR statistics aggregated by publication year.\r\n- **Metadata Error Summary**: A list of the most common metadata errors affecting FAIRness.\r\n- **Justification for Metadata Providers**: A summary highlighting the importance of machine-readable metadata for optimal FAIRness evaluation.\r\n\r\n## Limitations\r\n- **Beta Status**: The F-UJI FAIR checker and other FAIR checkers are in a very early beta status. Results may vary and should be interpreted with caution.\r\n- **Dependency on Docker**: The F-UJI FAIR checker requires Docker to be initialized beforehand.\r\n\r\n## License\r\n\r\n# CC BY-NC License\r\n\r\nThis project is licensed under the **Creative Commons Attribution-NonCommercial 4.0 International License** (CC BY-NC 4.0).\r\n\r\n## You are free to:\r\n\r\n- **Share** — Copy and redistribute the material in any medium or format.\r\n- **Adapt** — Remix, transform, and build upon the material.\r\n\r\nThe licensor cannot revoke these freedoms as long as you follow the license terms.\r\n\r\n## Under the following terms:\r\n\r\n- **Attribution** — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.\r\n- **NonCommercial** — You may not use the material for commercial purposes.\r\n\r\n## Notices:\r\n\r\n- You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation.\r\n- No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material.\r\n\r\nFor more details, please refer to the full license text: [CC BY-NC 4.0 License](https://creativecommons.org/licenses/by-nc/4.0/).\r\n\r\n[Back to Top](#fair-statistics-aggregator-for-dois)\r\n","organization":"BioImage Informatics and Analysis Workflows","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1316?version=1","name":"main @ 42fecb6","author":[],"descriptor_type":[]}]},{"id":"1319","url":"https://workflowhub.eu/workflows/1319","name":"ethos.REFLOW: Renewable Energy potentials workFLOW manager","description":"REFLOW is a workflow manager tool designed to streamline and automate tasks related to renewable energy potential analyses. It is built with Luigi and provides an automated, robust framework for data acquisition, processing, land/sea eligibility analysis, technology placements, simulations and visualizations. It is build with transparency and reproducibility in mind.","organization":"Research Data Management ICE-2","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1319?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1320","url":"https://workflowhub.eu/workflows/1320","name":"sanger-tol/curationpretext","description":"# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\r\n\r\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+CI%22)\r\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/curationpretext)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\r\n\r\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\r\n\r\n![Workflow Diagram](./assets/CurationPretext.png)\r\n\r\n1. Generate Maps - Generates pretext maps as well as a static image.\r\n\r\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\r\n\r\n## Usage\r\n\r\n\u003e **Note**\r\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how\r\n\u003e to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)\r\n\u003e with `-profile test` before running the workflow on actual data.\r\n\r\nCurrently, the pipeline uses the following flags:\r\n\r\n- `--input`\r\n\r\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\r\n\r\n- `--reads`\r\n\r\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\r\n\r\n- `--read_type`\r\n\r\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\r\n\r\n- `--aligner`\r\n\r\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\r\n\r\n- `--cram`\r\n\r\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\r\n\r\n- `--map_order`\r\n\r\n  - hic map scaffold order, input either `length` or `unsorted`\r\n\r\n- `--teloseq`\r\n\r\n  - A telomeric sequence, e.g., `TTAGGG`\r\n\r\n- `-entry`\r\n  - ALL_FILES is the default and generates all accessory files as well as pretext maps\r\n  - MAPS_ONLY generates only the pretext maps and static images\r\n\r\nNow, you can run the pipeline using:\r\n\r\n#### For ALL_FILES run\r\n\r\n```bash\r\nnextflow run sanger-tol/curationpretext \\\r\n  --input { input.fasta } \\\r\n  --cram { path/to/cram/ } \\\r\n  --reads { path/to/longread/fasta/ } \\\r\n  --read_type { default is \"hifi\" }\r\n  --sample { default is \"pretext_rerun\" } \\\r\n  --teloseq { default is \"TTAGGG\" } \\\r\n  --map_order { default is \"unsorted\" } \\\r\n  --outdir { OUTDIR } \\\r\n  -profile \u003cdocker/singularity/{institute}\u003e\r\n\r\n```\r\n\r\n#### For MAPS_ONLY run\r\n\r\n```bash\r\nnextflow run sanger-tol/curationpretext \\\r\n  --input { input.fasta } \\\r\n  --cram { path/to/cram/ } \\\r\n  --reads { path/to/longread/fasta/ } \\\r\n  --read_type { default is \"hifi\" }\r\n  --sample { default is \"pretext_rerun\" } \\\r\n  --teloseq { default is \"TTAGGG\" } \\\r\n  --map_order { default is \"unsorted\" } \\\r\n  --outdir { OUTDIR } \\\r\n  -profile \u003cdocker/singularity/{institute}\u003e \\\r\n  -entry MAPS_ONLY \\\r\n```\r\n\r\n\u003e **Warning:**\r\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\r\n\u003e provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\r\n\r\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\r\n\r\n## Pipeline output\r\n\r\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\r\nFor more details about the output files and reports, please refer to the\r\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\r\n\r\n## Credits\r\n\r\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\r\n\r\nWe thank the following people for their extensive assistance in the development of this pipeline:\r\n\r\n- @muffato - For reviews.\r\n\r\n- @yumisims - TreeVal and Software.\r\n\r\n- @weaglesBio - TreeVal and Software.\r\n\r\n- @josieparis - Help with better docs and testing.\r\n\r\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\nFor further information or help, don't hesitate to get in touch on the [Slack `#curationpretext` channel](https://nfcore.slack.com/channels/curationpretext) (you can join with [this invite](https://nf-co.re/join/slack)).\r\n\r\n## Citations\r\n\r\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nYou can cite the `nf-core` publication as follows:\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Analysis, Tree of Life Genome Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1320?version=1","name":"main @ 1cc6a31","author":["Damon-Lee Pointon"],"descriptor_type":["NFL"]}]},{"id":"1321","url":"https://workflowhub.eu/workflows/1321","name":"sanger-tol/curationpretext","description":"# sanger-tol/curationpretext\r\n\r\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+CI%22)\r\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\r\n\r\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\r\n[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/curationpretext)\r\n\r\n## Introduction\r\n\r\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\r\n\r\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\r\n\r\n![Workflow Diagram](./assets/CurationPretext.png)\r\n\r\n1. Generate Maps - Generates pretext maps as well as a static image.\r\n\r\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\r\n\r\n## Usage\r\n\r\n\u003e **Note**\r\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how\r\n\u003e to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)\r\n\u003e with `-profile test` before running the workflow on actual data.\r\n\r\nCurrently, the pipeline uses the following flags:\r\n\r\n- `--input`\r\n\r\n  - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\r\n\r\n- `--reads`\r\n\r\n  - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\r\n\r\n- `--read_type`\r\n\r\n  - The type of longread data you are utilising, e.g., ont, illumina, hifi.\r\n\r\n- `--aligner`\r\n\r\n  - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\r\n\r\n- `--cram`\r\n\r\n  - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\r\n\r\n- `--map_order`\r\n\r\n  - hic map scaffold order, input either `length` or `unsorted`\r\n\r\n- `--teloseq`\r\n\r\n  - A telomeric sequence, e.g., `TTAGGG`\r\n\r\n- `-entry`\r\n  - ALL_FILES is the default and generates all accessory files as well as pretext maps\r\n  - MAPS_ONLY generates only the pretext maps and static images\r\n\r\nNow, you can run the pipeline using:\r\n\r\n#### For ALL_FILES run\r\n\r\n```bash\r\nnextflow run sanger-tol/curationpretext \\\r\n  --input { input.fasta } \\\r\n  --cram { path/to/cram/ } \\\r\n  --reads { path/to/longread/fasta/ } \\\r\n  --read_type { default is \"hifi\" }\r\n  --sample { default is \"pretext_rerun\" } \\\r\n  --teloseq { default is \"TTAGGG\" } \\\r\n  --map_order { default is \"unsorted\" } \\\r\n  --outdir { OUTDIR } \\\r\n  -profile \u003cdocker/singularity/{institute}\u003e\r\n\r\n```\r\n\r\n#### For MAPS_ONLY run\r\n\r\n```bash\r\nnextflow run sanger-tol/curationpretext \\\r\n  --input { input.fasta } \\\r\n  --cram { path/to/cram/ } \\\r\n  --reads { path/to/longread/fasta/ } \\\r\n  --read_type { default is \"hifi\" }\r\n  --sample { default is \"pretext_rerun\" } \\\r\n  --teloseq { default is \"TTAGGG\" } \\\r\n  --map_order { default is \"unsorted\" } \\\r\n  --outdir { OUTDIR } \\\r\n  -profile \u003cdocker/singularity/{institute}\u003e \\\r\n  -entry MAPS_ONLY \\\r\n```\r\n\r\n\u003e **Warning:**\r\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\r\n\u003e provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\r\n\r\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\r\n\r\n## Pipeline output\r\n\r\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\r\nFor more details about the output files and reports, please refer to the\r\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\r\n\r\n## Credits\r\n\r\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\r\n\r\nWe thank the following people for their extensive assistance in the development of this pipeline:\r\n\r\n- @muffato - For reviews.\r\n\r\n- @yumisims - TreeVal and Software.\r\n\r\n- @weaglesBio - TreeVal and Software.\r\n\r\n- @josieparis - Help with better docs and testing.\r\n\r\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\r\n\r\n## Contributions and Support\r\n\r\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\r\n\r\nFor further information or help, don't hesitate to get in touch on the [Slack `#curationpretext` channel](https://nfcore.slack.com/channels/curationpretext) (you can join with [this invite](https://nf-co.re/join/slack)).\r\n\r\n## Citations\r\n\r\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\r\n\r\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\r\n\r\nYou can cite the `nf-core` publication as follows:\r\n\r\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\r\n\u003e\r\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\r\n\u003e\r\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\r\n","organization":"Tree of Life Genome Analysis, Tree of Life Genome Assembly","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1321?version=1","name":"1.2.0","author":["Damon-Lee Pointon"],"descriptor_type":["NFL"]}]},{"id":"1322","url":"https://workflowhub.eu/workflows/1322","name":"PISAD - Phsaed Intraspecies Sample Anomalies Detection tool","description":"# PISAD - Phsaed Intraspecies Sample Anomalies Detection tool\r\n\r\n## Summary\r\n\r\nWe developed PISAD, a tool designed to detect anomalies in cohort samples without requiring reference information. It is primarily divided into two stages. Stage 1: We select low-error data from the cohort and conduct reference-free SNP calling to construct a variant sketch. Stage 2: By comparing the k-mer counts of other cohort data to the variant sketch, we infer the relationships between the sample and other samples to detect the sample swap.\r\n\r\n## Dependencies\r\n\r\nrecommend use conda to install\r\n\r\n- GCC (Tested on 8.5.0)\r\n- gperftools(2.10)\r\n- hdf5(1.14.3)\r\n- boost(1.85.0)\r\n\r\n## Installation\r\n\r\ncloning the PISAD repository to your machine and enter its directory.\r\n\r\n```bash\r\n git clone https://github.com/ZhantianXu/PISAD.git\r\n cd pisad/\r\n```\r\n\r\nCompiling should be as easy as:\r\n\r\n```bash\r\n./configure \u0026\u0026 make\r\n```\r\n\r\nTo install in a specified directory:\r\n\r\n```bash\r\n./configure --prefix=/PATH \u0026\u0026 make install\r\n```\r\n\r\n## Usage\r\n\r\n##### Stage1: SNP callng :\r\n\r\nFirst, we select a low-error-rate sequencing dataset as the target sample for rapid SNP calling. It supports multi-threaded processing.\r\n\r\nExample:\r\n\r\n```bash\r\n./run.sh -i /data/hg002.fastq.gz -m 0\r\n```\r\n\r\n```bash\r\n    Required parameters:\r\n      -i:        Input files ( *.fastq or *.fastq.gz files)\r\n      -m:        Heterozygosity parameter (0 for \u003c1.2%, 1 otherwise)\r\n    Optional parameters:\r\n      -k:        kmer-size (default: 21)\r\n      -t:        thread (default: 8)\"\r\n      -o:        Output prefix (defaults: first input file's prefix)\r\n      -d1:       Directory for dsk files (default: current directory)\r\n      -d2:       Directory for output plot (default: current directory)\r\n      -d3:       Directory for SNP output (default: current directory)\r\n      -h:        Show this help message\r\n    Advanced optional parameters:\r\n      -est:      est_kmercov (default: Estimated by algorithm)\r\n      -cutoff:   cutoff threshold (defaults: 0.95)\r\n      -het:      Initial heterozygosity (defaults: 0/0.12)\r\n      -rho:      Initial rho value (defaults: 0.2)\r\n      -setleft:  Left boundary of the heterozygous region (defaults: Estimated by algorithm)\r\n      -setright: Right boundary of the heterozygous region (defaults: Estimated by algorithm)\r\n```\r\n\r\n##### Stage1: construct variant sketch:\r\n\r\nNext, we convert the called SNPs into a variant sketch.\r\n\r\n```bash\r\n./create -i /snp/hg002_21_2_4_pairex.snp\r\n```\r\n\r\n```bash\r\n    Required parameters:\r\n      -i:        Input files ( .snp file)\r\n    Optional parameters:\r\n      -k:        kmer-size (default: 21)\r\n      -l:        Filtering threshold (default: 21)\r\n      -o:        Output prefix (defaults: current directory)\r\n```\r\n\r\n##### Stage2: count the k-mers:\r\n\r\nwe compare the k-mer counts of other cohort samples to the variant sketch to infer relationships between them. Files may be gzipped and multiple threads can be used.\r\n\r\n```bash\r\n./pisadCount -s /fa/hg002.fa /data/hg003.fastq.gz\r\n```\r\n\r\n```bash\r\n    Usage: ./pisadCount -s [FASTA] [OPTION]... [FILES...]\r\n    Required options:\r\n        -s:         variant sketch (one or more)\r\n    Optional options:\r\n        -t:      Number of threads to run (default: 1)\r\n        -m:      k-mer coverage threshold for early termination (default: inf)\r\n        -i:      extra debug information\r\n        -k:      k-mer size used (default: 21)\r\n        -o:      Evaluation file path (defaults: current directory)\r\n        -h:      Display this dialog\r\n\r\n```\r\n\r\nHere, the -s option allows inputting multiple FA files for variant sketching, separated by commas, such as `-s /fa/hg002.fa,/fa/hg001.fa`.\r\nIf your input file has a high coverage, you can also add the `-m` parameter to control the reading process and save time, such as `-m 10`.\r\n\r\n##### Stage2:Evaluate the samples:\r\n\r\nInput the statistics of samples to calculate their relationship and detect sample swaps.\r\n\r\n```bash\r\n./pisadEval /homeb/xuzt/coverage/eval/hg002_hg003.txt \u003e summary.tsv\r\n```\r\n\r\n```bash\r\n    Usage: ./pisadEval [OPTION]... [FILES...]\r\n    Optional options:\r\n        -t:      Number of threads to run(default: 1)\r\n        -h:      Display this dialog\r\n\r\n```\r\n","organization":"CSUbioinformatics","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1322?version=1","name":"main @ 01f7a4a","author":[],"descriptor_type":[]}]},{"id":"1323","url":"https://workflowhub.eu/workflows/1323","name":"annotation-maker/main","description":"This workflow allows for genome annotation using Maker and evaluates the quality of the annotation.","organization":"EuroScienceGateway, Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1323?version=1","name":"v0.1","author":["Romane Libouban"],"descriptor_type":["GALAXY"]}]},{"id":"1324","url":"https://workflowhub.eu/workflows/1324","name":"lncRNAs-annotation/main","description":"This workflow runs the FEELnc tool to annotate long non-coding RNAs. Before annotating these long non-coding RNAs, StringTie will be used to assemble the RNA-seq alignments into potential trancriptions. The gffread tool provides a genome annotation file in GTF format.","organization":"EuroScienceGateway, Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1324?version=1","name":"v0.1","author":["Romane Libouban"],"descriptor_type":["GALAXY"]}]},{"id":"1325","url":"https://workflowhub.eu/workflows/1325","name":"nf-core/genomeassembler","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-genomeassembler_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/genomeassembler\" src=\"docs/images/nf-core-genomeassembler_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat\u0026logo=nextflow\u0026logoColor=white\u0026color=%230DC09D\u0026link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat\u0026logo=nfcore\u0026logoColor=white\u0026color=%2324B064\u0026link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000\u0026logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\u003cpicture\u003e\n  \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/genomeassembler.dark.png\"\u003e\n  \u003cimg alt=\"nf-core/genomeassembler\" src=\"docs/images/genomeassembler.light.png\"\u003e\n\u003c/picture\u003e\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias Hörtenhuber](https://github.com/mashehu)\n- [Louis Le Nézet](https://github.com/LouisLeNezet)\n- [Júlia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1325?version=1","name":"1.0.1","author":[],"descriptor_type":["NFL"]},{"id":"2","url":"https://workflowhub.eu/workflows/1325?version=2","name":"1.0.0","author":[],"descriptor_type":["NFL"]},{"id":"3","url":"https://workflowhub.eu/workflows/1325?version=3","name":"1.1.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1326","url":"https://workflowhub.eu/workflows/1326","name":"Lysozyme in water full with new provenance run features","description":"Lysozyme in water full COMPSs application. Added new WRROC profile: [Provenance Run Crate](https://www.researchobject.org/workflow-run-crate/profiles/provenance_run_crate/)","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1326?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1327","url":"https://workflowhub.eu/workflows/1327","name":"hi-c-contact-map-for-assembly-manual-curation/main","description":"This workflow generates Hi-C contact maps for genome assemblies in the Pretext format. It is compatible with one or 2 haplotypes. It includes tracks for PacBio read coverage, Gaps, and telomeres. The Pretext files can be open in PretextView for the manual curation of genome assemblies.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1327?version=1","name":"v1.0beta1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1327?version=2","name":"v1.0beta2","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1327?version=3","name":"v1.0beta3","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1327?version=4","name":"v1.0beta4","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"5","url":"https://workflowhub.eu/workflows/1327?version=5","name":"v1.0beta5","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"6","url":"https://workflowhub.eu/workflows/1327?version=6","name":"v1.0beta6","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"7","url":"https://workflowhub.eu/workflows/1327?version=7","name":"v2.0","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"8","url":"https://workflowhub.eu/workflows/1327?version=8","name":"v2.1","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]},{"id":"9","url":"https://workflowhub.eu/workflows/1327?version=9","name":"v2.2","author":["Delphine Lariviere"],"descriptor_type":["GALAXY"]}]},{"id":"1328","url":"https://workflowhub.eu/workflows/1328","name":"Tissue segmentation workflow","description":"# Tissue segmentation workflow\r\n\r\nThis workflow performs tissue segmentation on H\u0026E whole slide images using AI.\r\n","organization":"CDPP","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1328?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"1329","url":"https://workflowhub.eu/workflows/1329","name":"Prostate cancer classification workflow","description":"# Prostate cancer classification workflow\r\n\r\nThis workflow segments tissue regions and classifies prostate cancer on H\u0026E whole slide images, using AI. It consists of three steps:\r\n\r\n1. low-resolution tissue segmentation to select areas for further processing;\r\n\r\n2. high-resolution tissue segmentation to refine borders - it uses step 1 as input;\r\n\r\n3. high-resolution normal/cancer classification - it uses step 1 as input.\r\n","organization":"CDPP","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1329?version=1","name":"Version 1","author":[],"descriptor_type":["CWL"]}]},{"id":"1330","url":"https://workflowhub.eu/workflows/1330","name":"AnnoAudit - Annotation Auditor","description":"# AnnoAudit - Annotation Auditor\r\n\r\nAnnoAudit is a robust Nextflow pipeline designed to evaluate the quality of genomic annotations through a multifaceted approach.\r\n\r\n## Overview of the workflow\r\n\r\nThe workflow assess the annotation quality based on different criteria:\r\n- Protein evidence support\r\n- RNASeq evidence support\r\n- Statistics of the predictions (i.e., gene length, exon number, etc.)\r\n- Ortholog analysis (BUSCO, OMArk)\r\n\r\n### Input data\r\n\r\n- Reference genome `genome.[.fna, .fa, .fasta]`\r\n- Annotation output `annotation.gff`\r\n- RNAseq data listed in a metadata csv file. Input type can be mixed between long and short reads, with the option of single-end read. The input file should follow the format below:\r\n\r\n```\r\nsample_id,R1_path,R2_path,read_type\r\nSAM1,/path/to/R1,,long             # For long reads\r\nSAM2,/path/to/R1,/path/to/R2,short # For PE reads\r\nSAM3,/path/to/R1,,short            # For SE reads\r\n```\r\n\r\n- Protein reference data in `fasta` format for evaluation, if not given, then the `Uniprot-SwissProt` will be downloaded and used.\r\n\r\n### Pipeline steps\r\n\r\n![Pipeline](./assets/images/annoaudit-workflow.svg)\r\n\r\nThe main pipeline is divided into five different subworkflows.\r\n- `General statistics`: Calculate the statistics obtained from the GFF file.\r\n- `RNASeq analysis`: Map the RNASeq data to the genome (or with provided mapping bam file) to generate exon, intron, transcript coverage.\r\n- `Ortholog analysis`: Compare the predicted proteome to known database using BUSCO and OMArk (OMA database).\r\n- `Protein analysis`: Blast the predicted proteome to a known database (could be of relative species) to obtain best reciprocal hits (BRH), then generate statistics based on the BRH results.\r\n\r\n### Output data\r\n\r\n- Output text file contain the statistic calculated from the input `GFF` file: \r\n  - General statistics\r\n  - BUSCO\r\n  - OMArk\r\n  - PSAURON\r\n  - Best reciprocal hits\r\n  - RNASeq analysis\r\n\r\n## Prerequisites\r\n\r\nThe following programs are required to run the workflow and the listed version were tested. \r\n\r\n`nextflow v23.04.0 or higher`\r\n\r\n`singularity`\r\n\r\n`docker` (have not been tested but in theory should work fine)\r\n\r\n## Installation\r\n\r\nSimply get the code from github or workflowhub and directly use it for the analysis with `nextflow`.\r\n\r\n```\r\ngit clone https://github.com/ERGA-consortium/pipelines\r\n```\r\n\r\n## Running AnnoAudit\r\n\r\n### Before running the pipeline (IMPORTANT)\r\n\r\nOne thing with Nextflow is that it is running off a Java Virtual Machine (JVM), and it will try to use all available memory for Nextflow even though it is unnecessary (for workflow management and job control). This will cause much trouble if you run a job on an HPC cluster. Thus, to minimize the effect of it, we need to limit the maximum memory the JVM can use.\r\n\r\n```\r\nexport NFX_OPTS=\"-Xms=512m -Xmx=3g\"\r\n```\r\n\r\n`-Xms` is the lower limit, which is set as 512 MB.\r\n`-Xmx` is the upper limit, which in this case is set as 3 GB.\r\nPlease modify this according to your situation.\r\n\r\n### How to run the code\r\n\r\n```\r\nnextflow run main.nf --genome genome.fasta \\\r\n      --gff annotation.gff3 \\\r\n      --rnaseq metadata.csv [--genome_bam path/to/the/mapped/bam]\\\r\n      --outdir OUTDIR_NAME \\\r\n      --taxon_id 9606 [Optional] \\\r\n      --ncbi_query_email xxxx \\\r\n      --rm -resume\r\n```\r\n\r\n### Other parameters for running the analysis\r\n\r\n```\r\nInput parameter:\r\n--genome                  Draft genome fasta file contain the assembled contigs/scaffolds.\r\n--gff                     Annotation file that need to be evaluated.\r\n--genome_bam              BAM file contain the mapped information from the RNASeq to the genome FASTA.\r\n--rnaseq                  A metadata CSV file following the pattern: sample_id,R1_path,R2_path,read_type. Required if `genome_bam` is not provided.\r\n--taxon_id                Taxon ID for identifying BUSCO lineage and download protein data from NCBI if needed.\r\n--ncbi_query_email        Email for querying protein from NCBI database.\r\n\r\nOptional input:\r\n--protein                  Fasta file containing translated protein sequences from the GFF for running evaluation. If not specified, the workflow will automatically extract it from the\r\n `genome` and `gff`.\r\n--ref_protein              Fasta file containing the reference protein sequences to be used for evaluation. Ideally this should come from the same species and/or closely related specie\r\ns. If not provided, the workflow will download the proteome from NCBI or using Uniprot SwissProt database.\r\n--lineage                  Lineage information providing for BUSCO, if not provided, the workflow will automatically search for the closest lineage. Example: eudicots_odb10.\r\n--genetic_code             Genetic code for translation of protein.\r\n--stranding                Strandness of the RNASeq reads used for extraction of junction position using `regtools`.\r\n\r\nDatabase input:\r\n--odb_version              odb version to choose to run BUSCO, option: odb12, odb10. [default: odb12]\r\n--busco_database           Pathway to the BUSCO databse store locally. [default: null]\r\n--oma_database             Pathway to the OMA database, if not specified, the workflow will download it automatically. [default: null]\r\n--ref_protein              Pathway to the reference proteome for comparison. [default: null]\r\n--ncbi_query_count         Number of protein to extract from the NCBI database. [default: 100000]\r\n--ncbi_query_batch         Number of protein to query for each batch. [default: 1000]\r\n\r\nOutput option:\r\n--pdf                      Output PDF name. [default: AnnoAudit_Report.pdf]\r\n--outdir                   Output directory. [default: /env/export/bigtmp2/pdoan/evaluate_pipeline]\r\n--tracedir                 Pipeline information. [default: /env/export/bigtmp2/pdoan/evaluate_pipeline/pipeline_info]\r\n--publish_dir_mode         Option for nextflow to move data to the output directory. [default: copy]\r\n--tmpdir                   Database directory. [default: /env/export/bigtmp2/pdoan/evaluate_pipeline/tmpdir]\r\n\r\nConditioning options:\r\n--rnaseq_single             If specify, will run `featureCounts` in single read mode, this is necessary if the mapped RNASeq is single-ended. [default: false]\r\n--run_blast                 If specify, will use `blast` for running best reciprocal hits instead of DIAMOND. [default: false]\r\n--query_ncbi_prot           If specify, will download the reference proteome from NCBI, other wise, will use the provided proteom or Uniprot SwissProt. [default: true]\r\n--cds_only                  If specify, only extracting information from the GFF file using the CDS line. [default: \"False\"]\r\n\r\n--help                   Print help message.\r\n\r\nExecution/Engine profiles:\r\nThe pipeline supports profiles to run via different Executers and Engines e.g.: -profile local,conda\r\n\r\nExecuter (choose one):\r\n  local\r\n  slurm\r\n\r\nEngines (choose one):\r\n  docker\r\n  singularity\r\n  apptainer\r\n\r\nPer default: -profile slurm,singularity is executed.\r\n```\r\n\r\n## Example output\r\n\r\nBelow is the sample output of this workflow. The example PDF output is located in `assest` folder.\r\n\r\n```\r\n|General Statistics                 | Value           |\r\n-------------------------------------------------------\r\n|num_genes                          | 36391           |\r\n|num_genes_without_introns          | 12968 (35.64%)  |\r\n|mean_gene_length                   | 2359.57         |\r\n|median_gene_length                 | 1562.0          |\r\n|num_exons                          | 149725          |\r\n|mean_exons_per_gene                | 4.11            |\r\n|median_exons_per_gene              | 2.0             |\r\n|num_exon_3n                        | 76783 (51.28%)  |\r\n|num_exon_3n1                       | 36932 (24.67%)  |\r\n|num_exon_3n2                       | 36010 (24.05%)  |\r\n|mean_cds_length                    | 1091.4          |\r\n|median_cds_length                  | 873.0           |\r\n|total_cds_length                   | 39717145        |\r\n|percentage_cds_coverage            | 10.64%          |\r\n|num_introns                        | 113334          |\r\n|mean_intron_length                 | 407.2           |\r\n|median_intron_length               | 149.0           |\r\n|short_intron_\u003c120_3n0_without_stop | 4324 (3.82)%    |\r\n|long_intron_\u003e120_3n0_without_stop  | 1185 (1.05)%    |\r\n|short_intron_\u003c120_3n1_without_stop | 4205 (3.71)%    |\r\n|long_intron_\u003e120_3n1_without_stop  | 1291 (1.14)%    |\r\n|short_intron_\u003c120_3n2_without_stop | 4319 (3.81)%    |\r\n|long_intron_\u003e120_3n2_without_stop  | 1249 (1.10)%    |\r\n|short_intron_\u003c120_3n0_with_stop    | 12073 (10.65)%  |\r\n|long_intron_\u003e120_3n0_with_stop     | 20332 (17.94)%  |\r\n|short_intron_\u003c120_3n1_with_stop    | 11652 (10.28)%  |\r\n|long_intron_\u003e120_3n1_with_stop     | 20486 (18.08)%  |\r\n|short_intron_\u003c120_3n2_with_stop    | 11733 (10.35)%  |\r\n|long_intron_\u003e120_3n2_with_stop     | 20485 (18.07)%  |\r\n\r\n|BUSCO                              | Value           |\r\n-------------------------------------------------------\r\n|lineage_dataset                    | poales_odb10    |\r\n|complete                           | 97.6%           |\r\n|single_copy                        | 95.8%           |\r\n|multi_copy                         | 1.8%            |\r\n|fragmented                         | 0.2%            |\r\n|missing                            | 2.2%            |\r\n|num_markers                        | 4896            |\r\n|domain                             | eukaryota       |\r\n\r\n\r\n|OMARK                              | Value           |\r\n-------------------------------------------------------\r\n|OMA_clade                          | Oryza           |\r\n|num_conserved_hogs                 | 15087           |\r\n|single                             | 13316 (88.26%)  |\r\n|duplicated                         | 1353 (8.97%)    |\r\n|duplicated_unexpected              | 1101 (7.30%)    |\r\n|duplicated_expected                | 252 (1.67%)     |\r\n|missing                            | 418 (2.77%)     |\r\n|num_proteins_in_proteome           | 36387           |\r\n|total_consistent                   | 30365 (83.45%)  |\r\n|consistent_partial_hits            | 1803 (4.96%)    |\r\n|consistent_fragmented              | 1625 (4.47%)    |\r\n|total_inconsistent                 | 2283 (6.27%)    |\r\n|inconsistent_partial_hits          | 517 (1.42%)     |\r\n|inconsistent_fragmented            | 1444 (3.97%)    |\r\n|total_contaminants                 | 0 (0.00%)       |\r\n|contaminants_partial_hits          | 0 (0.00%)       |\r\n|contaminants_fragmented            | 0 (0.00%)       |\r\n|total_unknown                      | 3739 (10.28%)   |\r\n\r\n|PSAURON                            | Value           |\r\n-------------------------------------------------------\r\n|psauron_score                      | 83.8            |\r\n|true_count                         | 30494           |\r\n|false_count                        | 5893            |\r\n|median_score                       | 0.98278         |\r\n|max_score                          | 1.0             |\r\n|min_score                          | 0.00022         |\r\n\r\n\r\n|Best Reciprocal Hits               | Value           |\r\n-------------------------------------------------------\r\n|num_best_reciprocal_hits           | 29185           |\r\n|num_splitting_genes_08             | 932 (3.19%)     |\r\n|num_splitting_genes_05             | 0 (0.0%)        |\r\n|num_fusion_genes_12                | 437 (1.5%)      |\r\n|num_fusion_genes_15                | 482 (1.65%)     |\r\n|KL_divergence_normalized           | 0.0105          |\r\n|JS_divergence_normalized           | 0.0023          |\r\n|Wasserstein_distance               | 2.480915        |\r\n\r\n\r\n|RNASeq                             | Value           |\r\n-------------------------------------------------------\r\n|mapping_rate                       | 96.27%          |\r\n|primary_mapping_rate               | 95.83%          |\r\n|properly_paired                    | 92.47%          |\r\n|num_gene_unsupported               | 9445 (25.95%)   |\r\n|num_exon_unsupported               | 20232 (13.51%)  |\r\n|num_intron_supported               | 107202          |\r\n|num_intron_supported_canonical     | 107131 (99.93%) |\r\n|num_intron_supported_non_canonical | 71 (0.07%)      |\r\n```\r\n\r\n## Performance of the workflow on assessing annotation\r\n\r\nTo be added\r\n\r\n## Future work\r\n\r\n- Adding other plots for easier evaluation\r\n- Perform comparative performance with different genomes","organization":"Bioinformatics Laboratory for Genomics and Biodiversity (LBGB)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1330?version=1","name":"main @ a25f396","author":["Phuong Doan"],"descriptor_type":["NFL"]}]},{"id":"1332","url":"https://workflowhub.eu/workflows/1332","name":"[DTC-E5] WF7502: Synthetic shaking simulation or Event-DT","description":"Digital Twin Component for Earthquakes (DTC-E) WorkFlow 2 (WF7502) generates synthetic shaking simulations (shakemaps) for seismic sources at different time scales, and automatically incorporates new data as it becomes available.\r\n\r\nThe workflow consists of multiple steps (ST), datasets (DT), and software services (SS). Below is a simplified breakdown:\r\n\r\n**Data Ingestion \u0026 Preprocessing**\r\n* ST750201: Assimilates real-time earthquake data from external sources.\r\n* ST750202: Extracts earthquake source parameters.\r\n* SS7502 - SS7504: Software services supporting data extraction.\r\n\r\n**HPC Simulations \u0026 Hazard Mapping**\r\n* ST750203: Prepares input parameters for simulations.\r\n* ST750204: Runs high-performance computing (HPC) earthquake simulations.\r\n* SS7501: Simulating software salvus by mondaic.com.\r\n* ST750207: Generates synthetic shaking maps based on ML (MLESmap).\r\n\r\n**Dynamic Updates \u0026 Urgent Computing**\r\n* ST750205: Evaluates real-time event updates.\r\n* ST750206: Enables urgent computing for time-sensitive simulations.\r\n\r\n**Post-Processing \u0026 Output Storage**\r\n* ST750208: Post-processes the results.\r\n* ST750209: Gathers final outputs.\r\n* ST750210: Updates the Shake Maps Library (DT7502).","organization":"WP7 - Earthquakes","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1332?version=1","name":"Version 2","author":["Johannes Kemper","Cedric Bhihe","Georgina Díez"],"descriptor_type":["CWL"]}]},{"id":"1333","url":"https://workflowhub.eu/workflows/1333","name":"[DTC-E5] WF7501: Full waveform inversion of Earth model or Earth-DT","description":"Digitial Twin Component for Earthquakes (DTC-E5) WorkFlow 1 (WF7501) uses new data to create a multi-scale model of the Earth, which is used to update the REVEAL/CSEMv3 velocity model. \r\nThe WF7501 workflow models an inversion-based update process for CSEMv3 (Computational Seismic Earth Model).\r\nBelow is a simple explanation of the STEPs:\r\n\r\n* **ST750101 - Data Catalog Update**: Collects external data sources and updates the database for further processing.\r\n* **ST750102 - Inversion Setup**: Configures inversion parameters at local, regional, and global levels.\r\n* **ST750103 - Model Extraction**: Extracts an initial computational model from the setup parameters.\r\n* **ST750104 - Inversion Iterations**: Performs multiple iterations to refine the extracted model.\r\n* **ST750105 - Model Update**: Updates the CSEM model using the results from the inversion process.\r\n* **ST750106 - User Model Validation**: The refined model undergoes external validation to ensure accuracy and usability.","organization":"WP7 - Earthquakes","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1333?version=1","name":"Version 2","author":["Johannes Kemper"],"descriptor_type":["CWL"]}]},{"id":"1336","url":"https://workflowhub.eu/workflows/1336","name":"High-precision machine learning identifies a reproducible functional connectivity signature of autism spectrum diagnosis in a subset of individuals","description":"# Code for the high risk autism phenotype paper\r\n[![MIT license](https://img.shields.io/badge/License-MIT-blue.svg)](https://lbesson.mit-license.org/)\r\n\r\nThis repository implements a **fully reproducible pipeline** for the autism signature project. It uses `invoke` tasks and a Docker container for **consistent, cross-platform execution.**\r\n\r\nThe entire workflow—data fetching, processing, and figure generation—can be **reproduced in a few commands.**\r\nMuch of the code in this repo originated from [ASD High Risk Endophenotype Code Supplement](https://github.com/surchs/ASD_high_risk_endophenotype_code_supplement) and was written by Sebastian Urchs and Hien Nguyen.\r\nAll data to reproduce the analysis can be downloaded from [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15517157.svg)](https://doi.org/10.5281/zenodo.15517157)  \r\n\r\n- The study uses data from ABIDE 1 and 2 datasets. Participants were matched using propensity score matching as part of another project, matching scripts can be found here [ASD Subtype Code Supplement](https://github.com/surchs/ASD_subtype_code_supplement/tree/master/scripts/pheno).\r\n- Resting state functional connectivity data was preprocessed using NIAK, described in the paper. This study uses the seed maps.\r\n- Using the following scripts the full analysis can be reproduced. Alternatively, to skip the data analysis part and recreate the figures, download only the results and atlas data from Zenodo.\r\n\r\n\r\n## 🚀 Quick Start\r\n\r\n### 1⃣ Install invoke\r\n\r\nFirst, install `invoke`:\r\n\r\n```bash\r\npip install invoke\r\n```\r\n\r\nYou must also have either Docker or Apptainer installed to use container-based execution.\r\n\r\n### 4⃣ Clean Everything\r\n\r\nTo remove all generated data:\r\n\r\n```bash\r\ninvoke clean-all\r\n```\r\n\r\n## 📁 Folder Structure\r\n\r\n| Folder                        | Description                                              |\r\n| ----------------------------- | -------------------------------------------------------- |\r\n| `source_data/`                | Raw data: Atlases \u0026 fMRI data.                           |\r\n| `output_data/`                | All generated outputs: Discovery results, figures, etc.  |\r\n| `code/figures/`               | Jupyter notebooks used to generate all figures.          |\r\n| `output_data/Figures/`        | Output folders for each figure notebook.                 |\r\n| `tasks.py` / `tasks_utils.py` | The heart of the pipeline: all `invoke` tasks live here. |\r\n\r\n## Building the environment\r\n\r\n### Installing dependencies\r\nTo set up everything, ensure functional Python and R environments and run:\r\n\r\n```bash\r\ninvoke setup-all\r\n```\r\n\r\nThis:\r\n\r\n* sets up Python \u0026 R environments (if running locally);\r\n* prepares the folder structure.\r\n\r\n**Note:**\r\nThis task assumes an Ubuntu-like OS. You still need to install R, Python, etc. See the Dockerfile for complete setup info.\r\n\r\n**Note 2:**\r\nYou can skip this if using the Docker container and running `docker-run` directly.\r\n\r\n### Create a Docker image\r\nTo build a Docker image:\r\n\r\n```bash\r\ninvoke docker-build\r\n```\r\n\r\nTo generate a compressed archive:\r\n\r\n```bash\r\ninvoke docker-archive\r\n```\r\n\r\n### Create an Apptainer image\r\nAfter building the Docker image, run:\r\n\r\n```bash\r\ninvoke apptainer-archive\r\n```\r\n\r\nThis builds the `.sif` image from the Docker daemon.\r\n","organization":"SIMEXP","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1336?version=1","name":"main @ d423b33","author":["Natasha Clarke","Lune Bellec"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1336?version=2","name":"main @ 4b6e6b9","author":["Natasha Clarke","Lune Bellec"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/1336?version=3","name":"main @ 4b6e6b9","author":["Natasha Clarke","Lune Bellec"],"descriptor_type":[]}]},{"id":"1337","url":"https://workflowhub.eu/workflows/1337","name":"tissue-microarray-analysis/main","description":"Complete multiplex tissue image (MTI) analysis pipeline for tissue microarray (TMA) data imaged using cyclic immunofluorescence: Performs illumination correction, stitching and registration, and tissue microarray segmentation. Tissue-segmented images undergo nuclear segmentation, cell/nuclei feature quantification (mean marker intensities, cell coordinates, and morphological features), and cell phenotyping. Produces outputs that are compatible with downstream single-cell/spatial analysis and interactive image viewers including: Pyramidal OME-TIFF images, nuclear segmentation masks (TIFF), quantified feature tables (CSV, h5ad) with cell type annotations, and an interactive Vitessce dashboard that combines image viewing with linked single-cell data visualizations. ","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1337?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1337?version=2","name":"v0.1.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1338","url":"https://workflowhub.eu/workflows/1338","name":"Matrix Multiplication – resource usage visualization","description":"Application that perform the multiplication between matrices.\r\nIn this experiment, a new profiling visualization is available, showing the resource usage such as CPU, memory, data read and written to disk, and data sent and received over the network.","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1338?version=1","name":"Version 1","author":["Raül Sirvent","Nicolò Giacomini"],"descriptor_type":[]}]},{"id":"1339","url":"https://workflowhub.eu/workflows/1339","name":"COMPSs Matrix Multiplication resourceUsage profiling example MN5 MSIZE=20 BSIZE=768 7 Nodes total","description":"COMPSs Matrix Multiplication resourceUsage profiling example.\r\n\r\nMN5 MSIZE=20 BSIZE=768 7 Nodes (6 workers) (--num_nodes=7 --worker_in_master_cpus=0). \r\n\r\n* Total number of tasks: 20^3 = 8000\r\n* Maximum code parallelism: 20^2 = 400\r\n* Total cores: 112*6 = 672 \r\n* Maximum utilisation: 400 / 112 = 3,57 Nodes\r\n\r\nOverall stats from \"pycompss inspect\":\r\n\r\n```\r\n    │   └── overall\r\n    │       ├── matmul_tasks\r\n    │       │   └── multiply\r\n    │       │       ├── maxTime = 91,111 ms\r\n    │       │       ├── executions = 8,000 \r\n    │       │       ├── avgTime = 84,839 ms\r\n    │       │       └── minTime = 79,278 ms\r\n    │       └── executionTime = 1,929,944 ms\r\n```\r\n\r\nThis example shows misuse of resources (from 6 workers, only 3 and a half can be exploited due to the application's inherent parallelism), which can be seen in the profiling folder plots.","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1339?version=1","name":"Version 1","author":["Raül Sirvent","Rosa M Badia"],"descriptor_type":[]}]},{"id":"1340","url":"https://workflowhub.eu/workflows/1340","name":"gSpreadComp","description":"## gSpreadComp: Streamlining Microbial Community Analysis for Resistance, Virulence, and Plasmid-Mediated Spread\r\n\r\n\r\n\u003cp align=\"center\" width=\"100%\"\u003e\r\n\t\u003cimg width=\"30%\" src=\"/gspreadcomp_logo_noback.png\"\u003e\r\n\u003c/p\u003e\r\n\r\n### Overview\r\n\r\ngSpreadComp is a UNIX-based, modular bioinformatics toolkit designed to streamline comparative genomics for analyzing microbial communities. It integrates genome annotation, gene spread calculation, plasmid-mediated horizontal gene transfer (HGT) detection and resistance-virulence ranking within the analysed microbial community to help researchers identify potential resistance-virulence hotspots in complex microbial datasets.\r\n\r\n\u003e [!TIP]\r\n\u003e After installation, the user may want to check a detailed tutorial with example input and output data [here](usage_tutorial.md)\r\n\r\n### Objectives and Features\r\n- **Six Integrated Modules**: Offers modules for taxonomy assignment, genome quality estimation, ARG annotation, plasmid/chromosome classification, virulence factor annotation, and in-depth downstream analysis, including target-based gene spread analysis and prokaryotic resistance-virulence ranking.\r\n- **Weighted Average Prevalence (WAP)**: Employs WAP for calculating the spread of target genes at different taxonomical levels or target groups, enabling refined analyses and interpretations of microbial communities.\r\n- **Reference Pathogen Identification**: Compares genomes to the NCBI pathogens database to create a resistance-virulence ranking within the community.\r\n- **HTML Reporting**: Culminates in a structured HTML report after the complete downstream analysis, providing users with an overview of the results.\r\n\r\n### Modular Approach and Flexibility\r\n`gSpreadComp`’s modular nature enables researchers to use the tool's main analysis and report generation steps independently or to integrate only specific pieces of `gSpreadComp` into their pipelines, providing flexibility and accommodating the varying software management needs of investigators.\r\n\r\n#### Using other annotation tools with gSpreadComp\r\n\u003e [!TIP]\r\n\u003e Users can incorporate results from other annotation tools within gSpreadComp's workflow, provided the input is formatted according to gSpreadComp's specifications. This allows for the integration of preferred or specialized tools for specific steps (e.g., alternative ARG or plasmid detection methods) while still benefiting from gSpreadComp's downstream analysis capabilities.\r\n\u003e \r\n\u003e For the quality data it should look like: [Quality DataFrame Format](test_data/checkm_df_format_gSpread.csv)\r\n\u003e \r\n\u003e For the taxonomy data it should look like: [Taxonomy DataFrame Format](test_data/gtdb_df_format_gSpread.csv)\r\n\u003e \r\n\u003e For the gene annotation (e.g. ARGs) data it should look like: [Gene annotation DataFrame Format](test_data/deeparg_df_format_gSpread.csv)\r\n\u003e \r\n\u003e For the plasmid identification data it should look like: [Plasmid identification DataFrame Format](test_data/plasflow_combined_format_gSpread.csv)\r\n\u003e\r\n\u003e Metadata information data should look like: [Metadata Sample](test_data/02_metadata_gspread_sample.csv)\r\n\r\nBy the end of a successful run, you should have a report that looks like this: [Download Example Report](https://raw.githubusercontent.com/mdsufz/gSpreadComp/refs/heads/main/test_data/gSpread_example_result_report.html)\r\n\r\n### Comprehensive Workflow\r\n\r\n![ScreenShot](/test_data/01_Kasmanas_gSpread_Fig_1.png)\r\n\r\ngSpreadComp consists of the following modules:\r\n\r\n1. **Taxonomy Assignment**: Uses [GTDBtk v2](https://academic.oup.com/bioinformatics/article/38/23/5315/6758240) for taxonomic classification.\r\n2. **Genome Quality Estimation**: Employs [CheckM](https://genome.cshlp.org/content/25/7/1043) for assessing genome completeness and contamination.\r\n3. **ARG Annotation**: Utilizes [DeepARG](https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-018-0401-z) for antimicrobial resistance gene prediction.\r\n4. **Plasmid Classification**: Implements [Plasflow](https://academic.oup.com/nar/article/46/6/e35/4807335) for plasmid sequence identification.\r\n5. **Virulence Factor Annotation**: Annotates virulence factors using the [Victors](https://academic.oup.com/nar/article/47/D1/D693/5144967?login=false) and/or [VFDB](http://www.mgc.ac.cn/VFs/main.htm) databases.\r\n6. **Downstream Analysis**: Performs gene spread analysis, resistance-virulence ranking, and potential plasmid-mediated HGT detection.\r\n\r\n\r\n# Requirements\r\n\r\nBefore installing and running `gSpreadComp`, ensure that your system meets the following requirements:\r\n\r\n## 1. Operating System\r\n- Linux x64 system\r\n\r\n## 2. Package Managers\r\n- [Miniconda](https://docs.conda.io/en/latest/miniconda.html): Required for creating environments and managing packages.\r\n- [Mamba](https://mamba.readthedocs.io/en/latest/user_guide/mamba.html): A faster package manager used within the `gSpreadComp` installation.\r\n\r\n## 3. Storage\r\n- Approximately 15 GB for software installation.\r\n- Around 92 GB for the entire database requirements.\r\n\r\n# Installation\r\n\r\n## Database Management\r\n`gSpreadComp` includes an easy-to-use script for automatic download and configuration of the required databases, with scheduled updates every January and July.\r\n\r\n## Compatibility and Requirements\r\nDesigned to support Linux x64 systems, requiring approximately 15 GB for software installation and around 92 GB for the entire database requirements.\r\n\r\n## 1 - Install miniconda\r\n\r\nTo bypass conflicting dependencies, the gSpreadComp approach uses miniconda to create automatically orchestrated environments. [Mamba](https://mamba.readthedocs.io/en/latest/user_guide/mamba.html) is a much faster package manager than conda and is used within the gSpreadComp installation. Consequently, miniconda and mamba are required to be previously installed in your system. Below is a possible way of installing miniconda and mamba. Please, be aware that mamba works best when installed in your base environment.\r\n\r\n```console\r\n# See documentation: https://docs.conda.io/en/latest/miniconda.html\r\n\r\n$ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh\r\n$ chmod +x Miniconda3-latest-Linux-x86_64.sh\r\n$ ./Miniconda3-latest-Linux-x86_64.sh\r\n$ export PATH=~/miniconda3/bin:$PATH\r\n\r\n# Install mamba. See documentation: https://mamba.readthedocs.io/en/latest/installation.html\r\n$ conda install mamba -n base -c conda-forge\r\n```\r\n\r\n## 2 - Install gSpreadComp\r\n\r\nOnce you have miniconda and mamba installed and on your PATH, you can proceed to install gSpreadComp.\r\nThe installation script was designed to install and set up all necessary tools and packages.\r\n\r\n```console\r\n# Clone repository\r\n$ git clone https://github.com/mdsufz/gSpreadComp.git\r\n\r\n# Go to the gSpreadComp cloned repository folder\r\n$ cd gSpreadComp\r\n\r\n# Make sure you have conda ready and that you are in your base environment.\r\n$ conda activate base\r\n$ echo $CONDA_PREFIX\r\n\r\n# You should see something like the following:\r\n/path/to/miniconda3\r\n\r\n# Run the installation script as follows\r\n$ bash -i installation/install.sh\r\n\r\n# Follow the instructions on the screen:\r\n# Enter \"y\" if you want to install all modules; otherwise, enter \"n\".\r\n# If you entered \"n\", enter \"y\" for each of the modules you would like to install individually.\r\n\r\n\tThe MuDoGeR's installation will begin..\r\n\r\n\r\n\t      (  )   (   )  )\t\t\t\r\n\t       ) (   )  (  (\t\t\t\r\n\t       ( )  (    ) )\t\t\t\r\n\t       _____________\t\t\t\r\n\t      \u003c_____________\u003e ___\t\t\r\n\t      |             |/ _ \\\t\t\r\n\t      |               | | |\t\t\r\n\t      |               |_| |\t\t\r\n\t   ___|             |\\___/\t\t\r\n\t  /    \\___________/    \\\t\t\r\n\t  \\_____________________/\t\t\r\n\r\n\tThis might take a while. Time to grab a coffee...\r\n```\r\n\r\n## 3 - Install necessary databases\r\n\r\n**Make sure to run the database setup after gSpreadComp is installed.**\r\n\r\nSome bioinformatics tools used within gSpreadComp require specific databases to work. We developed a database download and set up tool to make our lives easier. You can choose to install only the databases you intend to use. You can use the flag `--dbs` to choose and set up the selected databases (all [default], install all databases).\r\n\r\nUse this script if you want gSpreadComp to take care of everything.\r\n\r\n```console\r\n# Make sure gSpreadComp_env is activated. It should have been created when you ran 'bash -i installation/install.sh'\r\n$ conda activate gspreadcomp_env\r\n\r\n# Go to gSpreadComp cloned directory\r\n$ cd gSpreadComp\r\n\r\n# Run the database setup script\r\n$ bash -i installation/database-setup.sh --dbs all -o /path/to/save/databases\r\n\r\n# You can also check out the database-setup help information\r\n$ bash -i installation/database-setup.sh --help\r\n\r\n        gSpreadComp database script v=1.0\r\n        Usage: bash -i database-setup.sh --dbs [module] -o output_folder_for_dbs\r\n\t\t    USE THE SAME DATABASE LOCATION OUTPUT FOLDER FOR ALL DATABASES USED WITH gSpreadComp\r\n          --dbs all\t\t\t\tdownload and install the required and optional databases [default]\"\r\n          --dbs required              \t\tdownload and install the required databases (Victors and VFDB) for gSpreadComp\r\n          --dbs optional              \t\tdownload and install all the optional (ARGs, GTDB-tk, CheckM) databases for gSpreadComp\r\n          --dbs args\t\t\t\tdownload and install the required and the ARGs databases.\r\n          -o path/folder/to/save/dbs\t\toutput folder where you want to save the downloaded databases\r\n          --help | -h\t\t\t\tshow this help message\r\n          --version | -v\t\t\tshow database install script version\r\n\r\n\r\n```\r\n\r\n## Usage\r\n\r\n### Activating the Conda Environment\r\nBefore using `gSpreadComp`, activate the appropriate conda environment using the following command:\r\n```sh\r\nconda activate gSpreadComp_env\r\n```\r\n\r\n### Command-Line Usage\r\n`gSpreadComp` provides several modules, each performing a specific task within the pipeline. The quick command-line usage is as follows:\r\n```sh\r\ngspreadcomp --help\r\n```\r\n\r\n### Modules and Their Descriptions\r\n`gSpreadComp` comprises several modules, each serving a specific purpose in the genome analysis workflow:\r\n\r\n#### 1. Taxonomy Assignment\r\n```sh\r\ngspreadcomp taxonomy [options] --genome_dir genome_folder -o output_dir\r\n```\r\n- Assigns taxonomy to genomes using [GTDBtk v2](https://academic.oup.com/bioinformatics/article/38/23/5315/6758240).\r\n- Options:\r\n  - `--genome_dir STR`: folder with the bins to be classified (in fasta format)\r\n  - `--extension STR`: fasta file extension (e.g. fa or fasta) [default: fa]\r\n  - `-o STR`: output directory\r\n  - `-t INT`: number of threads\r\n\r\n#### 2. Genome Quality Estimation\r\n```sh\r\ngspreadcomp quality [options] --genome_dir genome_folder -o output_dir\r\n```\r\n- Estimates genome completeness and contamination using [CheckM](https://genome.cshlp.org/content/25/7/1043).\r\n- Options:\r\n  - `--genome_dir STR`: folder with the genomes to estimate quality (in fasta format)\r\n  - `--extension STR`: fasta file extension (e.g. fa or fasta) [default: fa]\r\n  - `-o STR`: output directory\r\n  - `-t INT`: number of threads [default: 1]\r\n  - `-h --help`: print this message\r\n\r\n#### 3. ARG Prediction\r\n```sh\r\ngspreadcomp args [options] --genome_dir genome_folder -o output_dir\r\n```\r\n- Predicts the Antimicrobial Resistance Genes (ARGs) in a genome using [DeepARG](https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-018-0401-z).\r\n- Options:\r\n  - `--genome_dir STR`: folder with the genomes to be classified (in fasta format)\r\n  - `--extension STR`: fasta file extension (e.g. fa or fasta) [default: fa]\r\n  - `--min_prob NUM`: Minimum probability cutoff for DeepARG [Default: 0.8]\r\n  - `--arg_alignment_identity NUM`: Identity cutoff for sequence alignment for DeepARG [Default: 35]\r\n  - `--arg_alignment_evalue NUM`: Evalue cutoff for DeepARG [Default: 1e-10]\r\n  - `--arg_alignment_overlap NUM`: Alignment read overlap for DeepARG [Default: 0.8]\r\n  - `--arg_num_alignments_per_entry NUM`: Diamond, minimum number of alignments per entry [Default: 1000]\r\n  - `-o STR`: output directory\r\n  - `-h --help`: print this message\r\n\r\n#### 4. Plasmid Prediction\r\n```sh\r\ngspreadcomp plasmid [options] --genome_dir genome_folder -o output_dir\r\n```\r\n- Predicts if a sequence within a fasta file is a chromosome, plasmid, or undetermined using [Plasflow](https://academic.oup.com/nar/article/46/6/e35/4807335).\r\n- Options:\r\n  - `--genome_dir STR`: folder with the genomes to be classified (in fasta format)\r\n  - `--extension STR`: fasta file extension (e.g. fa or fasta) [default: fa]\r\n  - `--threshold NUM`: threshold for probability filtering [default: 0.7]\r\n  - `-o STR`: output directory\r\n  - `-h --help`: print this message\r\n\r\n#### 5. Virulence Factor annotation\r\n```sh\r\ngspreadcomp pathogens [options] --genome_dir genome_folder -o output_dir\r\n```\r\n- Aligns provided genomes to Virulence Factors databases and formats the output.\r\n- Options:\r\n  - `--genome_dir STR`: folder with the genomes to be aligned against Virulence factors (in fasta format)\r\n  - `--extension STR`: fasta file extension (e.g. fa or fasta) [default: fa]\r\n  - `--evalue NUM`: evalue, expect value, threshold as defined by NCBI-BLAST [default: 1e-50]\r\n  - `-t INT`: number of threads\r\n  - `-o STR`: output directory\r\n  - `-h --help`: print this message\r\n\r\n#### 6. Main Analysis\r\n```sh\r\ngspreadcomp gspread [options] -o output_dir\r\n```\r\n- Runs the main `gSpreadComp` to compare spread and plasmid-mediated HGT.\r\n- Options:\r\n  - `--checkm STR`: Path to the formatted Quality estimation dataframe\r\n  - `--gene STR`: Path to the formatted target Gene dataframe to calculate the spread\r\n  - `--gtdbtk STR`: Path to the formatted Taxonomy assignment dataframe\r\n  - `--meta STR`: Path to the formatted Sample's Metadata dataframe\r\n  - `--vf STR`: Path to the formatted Virulence Factors assignment dataframe\r\n  - `--plasmid STR`: Path to the formatted Plasmid prediction dataframe\r\n  - `--nmag INT`: Minimum number of Genomes per Library accepted [default=0]\r\n  - `--spread_taxa STR`: Taxonomic level to check gene spread [default=Phylum]\r\n  - `--target_gene_col STR`: Name of the column from the gene dataset with the Gene_ids to analyse [default=Gene_id]\r\n  - `-t INT`: number of threads\r\n  - `-o STR`: output directory\r\n  - `-h --help`: print this message\r\n\r\n\r\n## Important Considerations\r\n\r\n- gSpreadComp is designed for hypothesis generation and is not a standalone risk assessment tool.\r\n- Results should be interpreted cautiously and used to guide further experimental validation.\r\n- The tool provides relative rankings within analyzed communities, not absolute risk assessments.\r\n\r\n## Citation\r\n\r\nIf you use gSpreadComp in your research, please cite:\r\n\r\n[Citation information will be added upon publication]\r\n\r\n","organization":"Kasmanas","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1340?version=1","name":"main @ d34bd3f","author":["Jonas Kasmanas"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1340?version=2","name":"main @ 880fcc4","author":["Jonas Kasmanas"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/1340?version=3","name":"main @ 75874e5","author":["Jonas Kasmanas"],"descriptor_type":[]}]},{"id":"1342","url":"https://workflowhub.eu/workflows/1342","name":"Gene Fetch","description":"\u003cp align=\"center\"\u003e\r\n  \u003cimg src=\"gene_fetch_logo.svg\" width=\"400\" alt=\"gene_fetch_logo\"\u003e\r\n\u003c/p\u003e\r\n\r\n[![PyPI version](https://img.shields.io/pypi/v/gene-fetch.svg)](https://pypi.org/project/gene-fetch/)\r\n[![Install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/gene-fetch/README.html)\r\n[![JOSS DOI](https://joss.theoj.org/papers/10.21105/joss.08456/status.svg)](https://doi.org/10.21105/joss.08456)\r\n[![Github Action test](https://github.com/bge-barcoding/gene_fetch/workflows/Test%20gene-fetch/badge.svg)](https://github.com/bge-barcoding/gene_fetch/actions)\r\n[![Zenodo archive DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16759414.svg)](https://doi.org/10.5281/zenodo.16759414)\r\n\r\n\r\n# GeneFetch \r\nGene Fetch enables high-throughput retreival of sequence data from NCBI's GenBank sequence database based on taxonomy IDs (taxids) or taxonomic heirarchies (phylum-\u003especies). It can retrieve protein and/or nucleotide sequences for various 'supported' loci (including protein-coding genes (e.g., cox1, cytb, rbcl, matk) and rRNA genes (e.g., 16S, 18S). Gene Fetch can be run for 'unsupported' loci, although the quality of the returned sequence data cannot be guaranteed. \r\n\r\n---\r\n\r\n## Highlight features\r\n- Fetch protein and/or nucleotide sequences from NCBI's GenBank database without constructing NCBI search terms.\r\n- Handles both direct nucleotide sequence searching, and protein-linked nucleotide searches (CDS extraction includes fallback mechanisms for atypical annotation formats).\r\n- Seqeunce matches are made by searching for the target gene and/ protein in the GenBank annotation (feature table). \r\n- Contains customisable length filtering thresholds for protein and nucleotide sequences.\r\n- Default \"batch\" mode processes multiple input taxa based on a user-specified CSV file, as well as \"single\" mode (-s/--single) for retrieving a specified number of target sequences for a particular taxon.\r\n- Implements automatic taxonomy traversal (\"batch\" mode only), utilising the returned NCBI taxonomic lineage for a given taxid when sequences are not found at the input taxonomic level (i.e. If searching at a given taxid level (e.g., species) and no sequences are found, traverse 'up' a rank (species-\u003ephylum) until a suitable sequence is found).\r\n- Validates fetched sequence taxonomy against input taxonomic heirarchy, avoiding potential taxonomic homonyms (i.e. when the same taxa name is used for different taxa across the tree of life).\r\n- Handles complex sequence features (e.g., complement strands, joined sequences, WGS entries) in addition to 'simple' cds extaction (if --type nucleotide/both). The tool avoids \"unverified\" sequences and WGS entries not containing sequence data (i.e. master records).\r\n- 'Checkpointing' functionality, so that if a run fails/crashes, gene-fetch can be rerun using the same arguments and parameters to resume from where it stopped (unless `--clean` is specified).\r\n- When more than 50 matching GenBank records are found for a sample, the tool fetches summary information for all matches (using NCBI esummary API), orders the records by sequence length, and processes the longest sequences first.\r\n- Can output corresponding genbank (.gb) files for each fetched nucleotide and/or protein sequences.\r\n- Optional detail in FASTA sequence headers of retrieved sequences.\r\n- Robust error handling, progress tracking, and logging, with compliance to NCBI API rate limits (10 requests/second). Caches taxonomy lookups for reduced API calls.\r\n\r\n---\r\n\r\n## Contents\r\n - [Installation](#installation)\r\n - [Usage](#usage)\r\n - [Examples](#examples)\r\n - [Input](#input)\r\n - [Output](#output)\r\n - [Cluster](#running-gene-fetch-on-a-cluster)\r\n - [Supported targets](#supported-targets)\r\n - [Benchmarking](#benchmarking)\r\n - [Future developments](#future-development)\r\n -  [Contributions and guidelines](#contributions-and-guidelines)\r\n\r\n\r\n---\r\n\r\n## Installation\r\n- Due to the risk of dependency conflicts, it's recommended to install Gene Fetch in a Conda environment.\r\n- First Conda needs to be installed, which can be done from [here](https://www.anaconda.com/docs/getting-started/miniconda/install).\r\n- Once installed:\r\n```bash\r\n# Create new environment\r\nconda create -n gene-fetch\r\n\r\n# Activate environment\r\nconda activate gene-fetch\r\n```\r\n\r\n- Gene Fetch and all necessary dependencies can then be installed via [Bioconda](https://anaconda.org/bioconda/gene-fetch), [PyPI](https://pypi.org/project/gene-fetch/#description), or by specifying `environment.yaml`:\r\n```bash\r\n# Install via bioconda\r\nconda install bioconda::gene-fetch\r\n\r\n# Or, install via pip\r\npip install gene-fetch\r\n\r\n# Or, via environment specification\r\nconda env update --name gene-fetch -f environment.yaml --prune\r\n\r\n# Verify installation\r\ngene-fetch --help\r\n```\r\n\r\n- If you would rather clone this repository and run a standalone version of Gene Fetch for some reason, you can do that as follows:\r\n```bash\r\n# Clone the repository\r\ngit clone https://github.com/bge-barcoding/gene_fetch.git\r\ncd gene_fetch\r\n\r\n# Activate conda environment (once created), and install gene-fetch (+ dependencies) via your preferred method. See `environment.yaml` for list of dependencies.\r\n\r\n# Run standalone Gene Fetch:\r\npython /path/to/gene_fetch.py [options]\r\n\r\n```\r\n\r\n---\r\n\r\n## Recommended: Testing\r\n- The Gene Fetch package includes some basic tests for each module that we recommend are run after installation.\r\n```bash\r\n# Clone the repository\r\ngit clone https://github.com/bge-barcoding/gene_fetch.git\r\ncd gene_fetch\r\n\r\n# Install pytest\r\npip install pytest\r\n\r\n# [Optional] Locally install Gene Fetch in editable mode from source (when inside `gene_fetch`) - enables testing of source code in development\r\npip install -e .\r\n\r\n# Run tests\r\npytest\r\n```\r\n* This will take a few minutes to run the tests. You will get 1 warning regarding API credentials as these are not provided in the basic tests.\r\n\r\n---\r\n\r\n## Usage\r\n```bash\r\ngene-fetch --gene \u003cgene_name\u003e --type \u003csequence_type\u003e --in \u003csamples.csv\u003e --out \u003coutput_directory\u003e --email example@example.co.uk --api-key 1234567890\r\n```\r\n* `--help`: Show usage help and exit.\r\n\r\n### Required arguments\r\n* `-g/--gene`: Name of gene to search for in NCBI GenBank database (e.g., cox1/16s/rbcl).\r\n* `-t/--type`: Sequence type to fetch; 'protein', 'nucleotide', or 'both' ('both' will initially search and fetch a protein sequence, and then fetches the corresponding nucleotide CDS for that protein sequence).\r\n* `-i/--in`: Path to input CSV file containing sample IDs and TaxIDs (see [Input](#input) section below).\r\n* `-i2/--in2`: Path to alternative input CSV file containing sample IDs and taxonomic information for each sample (see [Input](#input) section below).\r\n* `o/--out`: Path to output directory. The directory will be created if it does not exist.\r\n* `e/--email` and `-k/--api-key`: Email address and associated API key for NCBI account. An NCBI account is required to run this tool (due to otherwise strict API limitations) - information on how to create an NCBI account and find your API key can be found [here](https://support.nlm.nih.gov/kbArticle/?pn=KA-05317).\r\n### Optional arguments\r\n* `-ps/--protein-size`: Minimum protein sequence length filter. Applicable to mode 'batch' and 'single' search modes (default: 500aa).\r\n* `-ns/--nucleotide-size`: Minimum nucleotide sequence length filter. Applicable to mode 'batch' and 'single' search modes (default: 1000bp).\r\n* `s/--single`: Taxonomic ID for 'single' sequence search mode (`-i` and `-i2` are ignored when run with `-s` mode). 'single' mode will fetch all (or N if specifying `--max-sequences`) target gene or protein sequences on GenBank for a specific taxonomic ID.\r\n* `-ms/--max-sequences`: Maximum number of sequences to fetch for a specific taxonomic ID (only applies when run in 'single' mode).\r\n* `-b/--genbank`: Saves genbank (.gb) files for fetched nucleotide and/or protein sequences to `genbank/` (applies when run in 'batch' or 'single' mode).\r\n* `-c/--clear`: Forces clean (re)start by clearing output directory regardless of previous run parameters. If ommiting `--clear` and rerunning gene-fetch with the same arguments and parameters, checkpointing will be enabled.\r\n* `--header`: Dictates the format of sequence headers in output FASTA files. 'basic' = '\u003eID' (default). 'detailed' = '\u003eID|taxid|accession_number|genbank_description|length'.\r\n\r\n---\r\n\r\n## Examples\r\nFetch both protein and nucleotide sequences for COI with default sequence length thresholds, and store the corresponding genbank records.\r\n```\r\ngene-fetch -e your.email@domain.com -k your_api_key \\\r\n            -g cox1 -o ./output_dir -i ./data/samples.csv \\\r\n            --type both --genbank\r\n```\r\n\r\nFetch COI nucleotide sequences using sample taxonomic information, applying a minimum nucleotide sequence length of 1000bp\r\n```\r\ngene-fetch -e your.email@domain.com -k your_api_key \\\r\n            -g cox1 -o ./output_dir -i2 ./data/samples_taxonomy.csv \\\r\n            --type nucleotide --nucleotide-size 1000\r\n```\r\n\r\nRetrieve 100 available rbcL protein sequences \u003e400aa for _Arabidopsis thaliana_ (taxid: 3702).\r\n```\r\ngene-fetch -e your.email@domain.com -k your_api_key \\\r\n            -g rbcL -o ./output_dir -s 3702 \\\r\n            --type protein --protein-size 400 --max-sequences 100\r\n```\r\n\r\n---\r\n\r\n## Input\r\n**Example 'samples.csv' input file (-i/--in)**\r\n| ID | taxid |\r\n| --- | --- |\r\n| sample-1  | 177658 |\r\n| sample-2 | 177627 |\r\n| sample-3 | 3084599 |\r\n\r\n**Example 'samples_taxonomy.csv' input file (-i2/--in2)**\r\n| ID | phylum | class | order | family | genus | species |\r\n| --- | --- | --- | --- | --- | --- | --- |\r\n| sample-1  | Arthropoda | Insecta | Diptera | Acroceridae | Astomella | |\r\n| sample-2 | Arthropoda | Insecta | Hemiptera | Cicadellidae | Psammotettix | Psammotettix sabulicola |\r\n| sample-3 | Arthropoda | Insecta | Trichoptera | Limnephilidae | Dicosmoecus | Dicosmoecus palatus |\r\n* Leave blank if taxonomic information not known/needed. At least one rank must be supplied for each sample.\r\n\r\n## Output\r\n### 'Batch' mode\r\n```\r\noutput_dir/\r\n├── genbank/                    # Genbank (.gb) files for each fetched nucleotide and/or protein sequence.\r\n│   ├── nucleotide/  \r\n│   ├── protein/  \r\n├── nucleotide/                 # Nucleotide sequences. Only populated if '--type nucleotide/both' utilised.\r\n│   ├── sample-1.fasta   \r\n│   ├── sample-2.fasta\r\n│   └── ...\r\n├── protein/                    # Protein sequences. Only populated if '--type protein/both' utilised.\r\n│   ├── sample-1.fasta   \r\n│   ├── sample-2.fasta\r\n│   └── ...\r\n├── sequence_references.csv     # Sequence metadata.\r\n├── failed_searches.csv         # Failed search attempts (if any).\r\n└── gene_fetch.log              # Log.\r\n```\r\n\r\n**sequence_references.csv output example**\r\n| ID | input_taxa | first_matched_taxid | first_matched_taxid_rank | protein_accession | protein_length | nucleotide_accession | nucleotide_length | matched_rank | ncbi_taxonomy | reference_name | protein_reference_path | nucleotide_reference_path |\r\n| --- | --- | --- | --- | --- | --- | ---| --- | --- | --- | --- | --- | --- |\r\n| sample-1 | Apatania | 177658 | genus:Apatania | AHF21732.1 | 510 | KF756944.1 | 1530 | genus:Apatania | Eukaryota; ...; Apataniinae; Apatania | sample-1 | abs/path/to/protein_references/sample-1.fasta | abs/path/to/protein_references/sample-1_dna.fasta |\r\n| sample-2 | Isoptena serricornis | 2719103 | species:Isoptena serricornis | QNE85983.1 | 518 | MT410852.1 | 1557 | species:Isoptena serricornis | Eukaryota; ...; Chloroperlinae; Isoptena | sample-2 | abs/path/to/protein_references/sample-2.fasta | abs/path/to/protein_references/sample-2_dna.fasta |\r\n| sample-3 | Triaenodes conspersus | 1876143 | species:Triaenodes conspersus | YP_009526503.1 | 512 | NC_039659.1 | 1539 | genus:Triaenodes | Eukaryota; ...; Triaenodini; Triaenodes | sample-3 | abs/path/to/protein_references/sample-3.fasta | abs/path/to/protein_references/sample-3_dna.fasta |\r\n```\r\n* ID - The unique identifier (ID) for each sample (from the input CSV)\r\n* input_taxa - The taxon name searched for (e.g., \"Apatania\" == taxid 177658), or the taxon name for the closest valid taxid found.\r\n* first_matched_taxid - The NCBI taxonomic ID that was searched (same as the taxid from the --in CSV, or the closest valid taxid if using --in2 as input)\r\n* first_matched_taxid_rank - The taxonomic rank and name of the first_matched_taxid (e.g., \"genus:Astomella\")\r\n* protein_accession - The NCBI accession number of the protein sequence retrieved (if applicable)\r\n* protein_length - Length of the protein sequence in amino acids (if applicable)\r\n* nucleotide_accession - The NCBI accession number of the nucleotide sequence retrieved (if applicable)\r\n* nucleotide_length - Length of the nucleotide sequence in base pairs (if applicable)\r\n* matched_rank - The taxonomic rank where sequences were actually found (e.g., \"family:Acroceridae\" if no sequences existed at the the proceeding rank, and the search traversed up the taxonomy tree)\r\n* ncbi_taxonomy - The complete NCBI taxonomic lineage for the retrieved sequence (semicolon-separated)\r\n* reference_name - Copy of the ID (for reference purposes)\r\n* protein_reference_path - Full file path to the saved protein FASTA file (if applicable)\r\n* nucleotide_reference_path - Full file path to the saved nucleotide FASTA file (if applicable)\r\n```\r\n\r\n### 'Single' mode\r\n```\r\noutput_dir/\r\n├── genbank/                         # Genbank (.gb) files for each fetched nucleotide and/or protein sequence.\r\n├── nucleotide/                      # Nucleotide sequences. Only populated if '--type nucleotide/both' utilised.\r\n│   ├── ACCESSION1_dna.fasta   \r\n│   ├── ACCESSION2_dna.fasta\r\n│   └── ...\r\n├── ACCESSION1.fasta                 # Protein sequences.\r\n├── ACCESSION2.fasta\r\n├── fetched_nucleotide_sequences.csv # Sequence metadata. Only populated if '--type nucleotide/both' utilised.\r\n├── fetched_protein_sequences.csv    # Sequence metadata. Only populated if '--type protein/both' utilised.\r\n├── failed_searches.csv              # Failed search attempts (if any).\r\n└── gene_fetch.log                   # Log.\r\n```\r\n\r\n**fetched_protein|nucleotide_sequences.csv output example**\r\n| ID | length | Description | searched_taxid\r\n| --- | --- | --- | --- |\r\n| PQ645072.1 | 1501 | Ochlerotatus nigripes isolate Pool11 cytochrome c oxidase subunit I (COX1) gene, partial cds; mitochondrial | 508662 |\r\n| PQ645071.1 | 1537 | Ochlerotatus nigripes isolate Pool10 cytochrome c oxidase subunit I (COX1) gene, partial cds; mitochondrial | 508662 |\r\n| PQ645070.1 | 1501 | Ochlerotatus impiger isolate Pool2 cytochrome c oxidase subunit I (COX1) gene, partial cds; mitochondrial | 508662 |\r\n| PQ645069.1 | 1518\t| Ochlerotatus impiger isolate Pool1 cytochrome c oxidase subunit I (COX1) gene, partial cds; mitochondrial | 508662 |\r\n\r\n---\r\n\r\n## Running Gene Fetch on a cluster\r\n- See 'gene_fetch.sh' for running gene_fetch.py on a HPC cluster (SLURM job schedular). \r\n- Edit 'mem' and/or 'cpus-per-task' to set memory and CPU/threads - allocating lots of CPUs is unecessary as Gene Fetch is not paralellised (yet). The tool should run well with 4-10G memory and 1-2 CPUs.\r\n- Change paths and variables as needed.\r\n- Run 'gene_fetch.sh' with:\r\n```\r\nsbatch gene_fetch.sh\r\n```\r\n\r\n---\r\n\r\n## Supported targets\r\nGeneFetch includes the following 'hard-coded' search terms with common name variations for 'smarter' searching of the targets listed below. \r\n- cox1/COI/cytochrome c oxidase subunit I\r\n- cox2/COII/cytochrome c oxidase subunit II\r\n- cox3/COIII/cytochrome c oxidase subunit III\r\n- cytb/cob/cytochrome b\r\n- nd1/NAD1/NADH dehydrogenase subunit 1\r\n- nd2/NAD2/NADH dehydrogenase subunit 2\r\n- rbcL/RuBisCO/ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit\r\n- matK/maturase K\r\n- psbA/photosystem II protein D1\r\n- 16S ribosomal RNA/16s\r\n- SSU/18s\r\n- LSU/28s\r\n- 23s\r\n- 12S ribosomal RNA/12s\r\n- ITS (ITS1-5.8S-ITS2)\r\n- ITS1/internal transcribed spacer 1\r\n- ITS2/internal transcribed spacer 2\r\n- tRNA-Leucine/trnL\r\n\r\n\r\nGene/protein targets not listed can also be searched, however, Gene Fetch will implement a more generic search term/strategy with `{target}[Title] OR {target}[Gene] OR {target}[Protein Name]`.\r\nAdditional targets can be added if required - see `self._rRNA_genes` and '`self._protein_coding_genes` dictionaries within 'class config' (in `src/gene_fetch/core.py`) for example search terms to construct your own. You are welcome to open an [Issue](https://github.com/bge-barcoding/gene_fetch/issues/new) or create a pull request with your search term for inclusion into the main Gene Fetch release (see [Contributions and guidelines](https://github.com/bge-barcoding/gene_fetch?tab=readme-ov-file#contributions-and-guidelines) section below.\r\n\r\n---\r\n\r\n## Benchmarking\r\n| Sample Description | Run Mode | Target | Input File | Data Type | Memory | CPUs | Run Time (hh:mm:ss) |\r\n|--------------------|----------|--------|------------|-----------|--------|------|----------|\r\n| 570 Arthropod samples | Batch | COI | taxonomy.csv | Both | 4G | 1 | 01:34:47 |\r\n| 570 Arthropod samples | Batch | COI | samples.csv | Both (+ genbank) | 4G | 1 | 01:42:37 |\r\n| 570 Arthropod samples | Batch | COI | samples.csv | Nucleotide | 4G | 1 | 1:07:53  |\r\n| 570 Arthropod samples | Batch | ND1 | samples.csv | Nucleotide (\u003e500bp) | 4G | 1 | 1:23:26 |\r\n| All available (30) _A. thaliana_ sequences | Single | rbcL | N/A | Protein (\u003e300aa) | 4G | 1 | 00:00:25 |\r\n| 1000 Culicidae sequences | Single | COI | N/A | nucleotide (\u003e500bp) | 4G | 1 | 0031:05 |\r\n| 1000 _M. tubercolisis_ sequences | Single | 16S | N/A | nucleotide | 4G | 1 | 01:23:54 |\r\n* All benchmarking runs were performed on a SLURM-managed HPC cluster running Debian 12 (\"Bookworm), with each job allocated a modest 1 CPU and 4 GB RAM.\r\n\r\n---\r\n\r\n## Future Development\r\n- Add optional alignment of retrieved sequences [Ben].\r\n- Further improve efficiency of record searching and selecting the longest sequence [Dan].\r\n- Add support for additional genetic markers beyond the currently supported set [Dan].\r\n- Add optional HMM profile alignment that will attempt to extract the barcode region from certain support target genes (e.g. 658bp COI-5P barcode) [Ben].\r\n\r\n---\r\n\r\n## Contributions and guidelines\r\nFirst off, thanks for taking the time to contribute! ❤️\r\n\r\n- If you hav any questions, we assume that you have read the available [Documentation](https://github.com/bge-barcoding/gene_fetch/blob/main/README.md). It may also be worth searching for existing [Issues](https://github.com/bge-barcoding/gene_fetch/issues) that might awnser your question(s).\r\n- If you feel you still need clarification or want to report a possible bug/unexpected behaviour, we recommend opening an [Issue](https://github.com/bge-barcoding/gene_fetch/issues/) and provide as much context as you can about what behaviour you were expecting and the behaviour you're running into.\r\n- If you want to suggest a novel feature or minor improvements to existing functionality, please make your case for the feature/enchanment by opening an [Issue](https://github.com/bge-barcoding/gene_fetch/issues/new) or create a pull request with your contribution (at which point it will be evaluated as a possible addition). We aim to address any issues as soon as possible.\r\n\r\n## Authorship \u0026 citation\r\nGeneFetch was written by Dan Parsons \u0026 Ben Price @ NHMUK (2025).\r\n\r\nIf you use GeneFetch, please cite our publication: Parsons and Price (2025). Gene Fetch: A Python tool for sequence retrieval from GenBank across the tree of life. Journal of Open Source Software, 10(112), 8456, https://doi.org/10.21105/joss.08456\r\n","organization":"Biodiversity Genomics Europe (general), iBOL Europe Museum Skimming","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1342?version=1","name":"main @ 9adc5e6","author":["Dan Parsons","Ben Price"],"descriptor_type":[]},{"id":"2","url":"https://workflowhub.eu/workflows/1342?version=2","name":"main @ 976e79d","author":["Dan Parsons","Ben Price"],"descriptor_type":[]},{"id":"3","url":"https://workflowhub.eu/workflows/1342?version=3","name":"v1.0.20","author":["Dan Parsons","Ben Price"],"descriptor_type":[]}]},{"id":"1343","url":"https://workflowhub.eu/workflows/1343","name":"CWL4IncorporateTSSintoGXF (paired-end file)","description":"[![License](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE)\r\n![GitHub last commit (branch)](https://img.shields.io/github/last-commit/RyoNozu/CWL4IncorporateTSSintoGXF/main)\r\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\r\n[![Lab Website](https://img.shields.io/badge/Lab%20Website-bonohulab-informational?style=flat-square)](https://bonohu.hiroshima-u.ac.jp/)\r\n\r\n\u0026nbsp;\r\n\r\n# CWL4IncorporateTSSintoGXF\r\n\r\nThis workflow determines TSS based on the analysis of CAGE-seq data and incorporates TSS information and 5'UTR information calculated based on TSS information into the gene annotation file (gff/gtf). The R package, [TSSr](https://github.com/Linlab-slu/TSSr), is used to determine TSS.  \r\n\r\n## Requirements\r\n\r\n- [cwltool](https://github.com/common-workflow-language/cwltool)  \r\n\r\n    Install using pip  \r\n    ```\r\n    pip  install cwltool  \r\n    ```\r\n\r\n    Install using conda  \r\n    ```\r\n    conda create -n cwltool  \r\n    conda activate cwltool  \r\n    conda install -c conda-forge cwltool \r\n    ``` \r\n\r\n- [docker](https://www.docker.com/)  \r\n\r\n    † and Docker Desktop must be running  \r\n\r\n## Simple usage  \r\n\r\n- Clone this repository  \r\n\r\n    ```\r\n    git clone https://github.com/RyoNozu/CWL4IncorporateTSSintoGXF.git\r\n    cd CWL4IncorporateTSSintoGXF\r\n    ```\r\n\r\n- Run workflow  \r\n\r\n    ```\r\n    # for paired-end reads case\r\n    cwltool --debug --cachedir ./cwl_cache/ --outdir ./test/ ./workflow/cageseq_gtf_update_pe.cwl ./config/Workflow_config/cageseq_gtf_update_pe.yml\r\n    ```\r\n    - Prep your case yml file referring to the [template](https://github.com/RyoNozu/CWL4IncorporateTSSintoGXF/blob/main/config/workflow_template.yml)  \r\n        • Refer to the [Link](https://view.commonwl.org/workflows/github.com/RyoNozu/CWL4IncorporateTSSintoGXF/blob/main/workflow/cageseq_gtf_update_pe.cwl) for details on each parameter that needs to be specified  \r\n    - A single-ended version (cageseq_gtf_update_se.cwl) is in prep as of 20240417  \r\n\r\n## Input files  \r\n\r\n- CAGE-seq Read (fastq, paried/single-end)  \r\n- reference genome (fasta)  \r\n- gene annotation file (gff/gtf)  \r\n- (BSgenome_data_package_seed_file (.txt))  \r\n        \u003e refere to forgeBSgenomeDataPkg function in [BSgenomeForge](https://bioconductor.org/packages/release/bioc/html/BSgenomeForge.html) package  \r\n\r\n## Output files  \r\n\r\n- updated gxf file (.gff/gtf)  \r\n\r\n## FYI: Running time\r\n\r\n***\r\n","organization":"bonohulab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1343?version=1","name":"main @ 6f4b47f","author":["Ryo Nozu","Sora Yonezawa"],"descriptor_type":["CWL"]}]},{"id":"1344","url":"https://workflowhub.eu/workflows/1344","name":"NGS Equimolar Pooling Calculator","description":"# NGS Equimolar Pooling Calculator\r\n\r\nA web-based tool for calculating equimolar pooling volumes from complex sub-pools for Next-Generation Sequencing (NGS).\r\n\r\n## About\r\n\r\nThis calculator helps researchers determine the correct volumes to take from each pool when combining multiple complex sub-pools of NGS libraries for sequencing. It ensures that each individual sample contributes an equal number of molecules to the final sequencing run, resulting in more balanced coverage.\r\n\r\n## Features\r\n\r\n- Calculate equimolar pooling volumes for any number of input pools\r\n- Support for both calculated (estimated / target) and measured concentrations\r\n- Option to maximize the usage of original pools\r\n- Adjustable final volume\r\n- Real-time results with detailed metrics\r\n- Verification of equimolarity across samples\r\n\r\n## Live Calculator\r\n\r\nThe calculator is available at: https://bwprice.github.io/ngs-equimolar-pooling-calculator/\r\n\r\n## Usage\r\n\r\n1. Enter information for each input pool:\r\n   - Molarity that each sample was normalized to in the sub-pool - i.e. the target molarity when pooling (nM)\r\n   - Total pool volume (μl)\r\n   - Number of samples in the pool\r\n   - (Optional) Measured concentration of each sub-pool from lab quantification\r\n\r\n2. Choose whether to:\r\n   - Use measured concentrations (if available from Qubit, Bioanalyzer, qPCR, etc.)\r\n   - Maximize usage of original pools\r\n   - Set a specific final pool volume\r\n\r\n3. The calculator will automatically determine:\r\n   - How much volume to take from each sub-pool\r\n   - The percentage of each sub-pool that will be used\r\n   - The final nM contribution per sample\r\n   - Total samples in the final pool\r\n\r\n## Installation for Local Development\r\n\r\n1. Clone this repository:\r\n   ```\r\n   git clone https://github.com/bwprice/ngs-equimolar-pooling-calculator.git\r\n   cd ngs-equimolar-pooling-calculator\r\n   ```\r\n\r\n2. Install dependencies:\r\n   ```\r\n   npm install\r\n   ```\r\n\r\n3. Start the development server:\r\n   ```\r\n   npm start\r\n   ```\r\n\r\n4. Open [http://localhost:3000](http://localhost:3000) to view it in your browser.\r\n\r\n## Deployment\r\n\r\nTo deploy to GitHub Pages:\r\n\r\n```\r\nnpm run deploy\r\n```\r\n\r\n## Contributing\r\n\r\nContributions are welcome! Please feel free to submit a Pull Request.\r\n\r\n## License\r\n\r\nThis project is licensed under the MIT License - see the LICENSE file for details.\r\n\r\n## Citation\r\n\r\nIf you use this tool in your research, please cite:\r\n\r\n```\r\nBen Price, NGS Equimolar Pooling Calculator, GitHub Repository, 2025, \r\nAvailable at: https://github.com/bwprice/ngs-equimolar-pooling-calculator\r\n```\r\n","organization":"iBOL Europe Museum Skimming","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1344?version=1","name":"main @ 9f81504","author":["Ben Price"],"descriptor_type":[]}]},{"id":"1346","url":"https://workflowhub.eu/workflows/1346","name":"Barcode gene Extraction and Evaluation from Genome Skims (BeeGees) Snakemake workflow","description":"# Barcode gene Extraction and Evaluation from Genome Skims (BeeGees) Snakemake workflow #\r\nSnakemake workflow for recovering high-quality barcode sequences at scale, built around MitoGeneExtractor and adapted for genome skims of museum specimens.\r\n\r\n\r\n# Contents # \r\n - [Requirements](#Requirements)\r\n - [Workflow](#Workflow)\r\n - [Installation and set up](#Installation-and-set-up)\r\n - [Cluster configuration](#Cluster-configuration-using-Snakemake-profiles)\r\n - [Results structure](#Results-structure)\r\n - [Validation process](#Validation-process)\r\n - [Contributing](#Contributing)\r\n - [Future development](#Future-development)\r\n\r\n\r\n# Requirements #\r\n- [MitoGeneExtractor](https://github.com/cmayer/MitoGeneExtractor) version 1.9.6 installed.\r\n- Paired-end reads in .fastq.gz or .fastq format.\r\n- samples.csv (generated manually, or as outlined below if working from BOLD sample metadata).\r\n- sequence_references.csv (generated manually, or using [Gene Fetch](https://github.com/bge-barcoding/gene_fetch?tab=readme-ov-file) within the workflow).\r\n- Activated conda env (see BeeGees_env.yaml).\r\n\r\n\r\n# Workflow #\r\n\u003cdiv align=\"center\"\u003e\r\n  \u003cimg width=\"1819\" height=\"858\" src=\"https://github.com/user-attachments/assets/ad5f64e7-f253-4801-98fb-859a031de56b\"\u003e\r\n\u003c/div\u003e\r\n\r\n\r\n1. **Preprocessing modes** (both modes run in parallel to optimise barcode recovery from degraded hDNA):\r\n   - **concat**: Adapter trimming, quality filtering, poly-G trimming, deduplication of paired-end reads using [fastp](https://github.com/OpenGene/fastp), followed by concatenation of R1+R2 reads, a secondary quality trimming with [TrimGalore](https://github.com/FelixKrueger/TrimGalore), and optional read downsampling.\r\n   - **merge**: Quality control and merging of overlapping paired-end reads using [fastp](https://github.com/OpenGene/fastp), with header cleaning for MitoGeneExtractor compatibility, and optional read downsampling.\r\n2. **Sample-specific reference retrieval**: Automated retrieval of taxonomically-appropriate protein reference sequences from GenBank using [Gene-Fetch](https://github.com/bge-barcoding/gene_fetch).\r\n3. **Barcode recovery**: Protein reference-guided extraction of barcode sequences from preprocessed reads using [MitoGeneExtractor](https://github.com/cmayer/MitoGeneExtractor), producing initial consensus sequences for both preprocessing modes.\r\n4. **Consensus sequence preparation**: Header standardisation and concatenation of raw consensus sequences into multi-FASTA format for downstream processing.\r\n5. **Consensus cleaning and filtering pipeline (fasta_cleaner)**: Sequential quality filters applied to MGE read alignments to remove contaminants and outliers before generating cleaned consensus sequences:\r\n   - Human COI contamination removal (common in museum specimens) ([01_human_cox1_filter.py](https://github.com/bge-barcoding/MitoGeneExtractor-BGE/blob/main/workflow/scripts/01_human_cox1_filter.py)\r\n   - AT content filtering (removes suspected fungal/bacterial contamination) ([02_at_content_filter.py](https://github.com/bge-barcoding/MitoGeneExtractor-BGE/blob/main/workflow/scripts/02_at_content_filter.py)\r\n   - Statistical outlier removal (eliminates reads dissimilar to initial consensus) ([03_statistical_outliers.py](https://github.com/bge-barcoding/MitoGeneExtractor-BGE/blob/main/workflow/scripts/03_statistical_outlier_filter.py)\r\n   - Optional: Custom reference-based filtering ([04_reference_filter.py](https://github.com/bge-barcoding/MitoGeneExtractor-BGE/blob/main/workflow/scripts/04_reference_filter.py)\r\n   - Cleaned consensus generation and metrics aggregation ([05_consensus_generator.py](https://github.com/bge-barcoding/MitoGeneExtractor-BGE/blob/main/workflow/scripts/05_consensus_generator.py)\r\n6. **Barcode validation and selection** (see Validation Process section for more detail):\r\n   - **Structural validation**: HMM-based barcode extraction, reading frame analysis, stop codon detection, and quality ranking of all generated barcode consensus sequences ([structural_validation.py](https://github.com/bge-barcoding/BeeGees/blob/main/workflow/scripts/structural_validation.py))\r\n   - **Local BLASTn search**: Parallel BLASTn searches of structurally validated barcodes against local reference database ([tv_local_blast.py](https://github.com/bge-barcoding/BeeGees/blob/main/workflow/scripts/tv_local_blast.py))\r\n   - **Taxonomic validation**: Hierarchical matching of BLAST results against expected taxonomy, selecting the best sequence per sample based on taxonomic match quality and alignment metrics ([tv_blast2taxonomy.py](https://github.com/bge-barcoding/BeeGees/blob/main/workflow/scripts/tv_blast2taxonomy.py))\r\n7. **Statistics compilation**: Aggregation of QC, recovery, cleaning, filtering, and validation metrics into comprehensive CSV reports for both preprocessing modes ([mge_stats.py](https://github.com/bge-barcoding/MitoGeneExtractor-BGE/blob/main/workflow/scripts/mge_stats.py)).\r\n8. **Final integration**: Merging of all pipeline metrics (read QC, MGE, fasta_cleaner, structural validation, taxonomic validation) into a unified output CSV ([val_csv_merger.py](https://github.com/bge-barcoding/BeeGees/blob/main/workflow/scripts/val_csv_merger.py)).\r\n9. **Evaluate barcoding outcome**: Take unified CSV file and determine barcoding success (PASS/PARTIAL/FAIL) for each sample ([barcoding_outcome.py](https://github.com/SchistoDan/BeeGees/blob/main/workflow/scripts/barcoding_outcome.py)).\r\n10. **Cleanup**: Removal of temporary files and redundant sample-specific logs.\r\n\r\n\r\n# Installation and set up: #\r\n## Install MitoGeneextractors ##\r\n-  Navigate to the [MitoGeneExtractor](https://github.com/cmayer/MitoGeneExtractor) repository and follow the [installation](https://github.com/cmayer/MitoGeneExtractor?tab=readme-ov-file#installation) instructions.\r\n## Clone BeeGees github repository and set up conda environment ##\r\n- [Install miniconda](https://www.anaconda.com/docs/getting-started/miniconda/install#quickstart-install-instructions).\r\n```bash\r\ngit clone https://github.com/bge-barcoding/BeeGees.git [path/to/desired/install/location/]\r\ncd installation/dir/BeeGees\r\nconda env create -f BeeGees_env.yaml\r\ngit status\r\n```\r\n## Generate input sample CSV file ###\r\n- This can be created manually, or via [sample-processing](https://github.com/bge-barcoding/sample-processing) workflow.\r\n- The file must contian the following headers: **'ID', 'forward', 'reverse', and 'taxid' _OR_ 'phylum-\u003especies' (see below for more information).**\r\n  - `ID`: Unique sample identifier. Due to regex matching and statistics aggregation, the sample ID will be considered as the string before the first underscore. **It is therefore recommended that sample names do not use '_' characters.** E.g. BSNHM002-24 instead of BSNHM002_24, or P3-1-A10-2-G1 instead of P3_1_A10_2_G1.\r\n  - `forward` \u0026 `reverse`: Absolute paths to forward (R1) and reverse (R2) PE read files. In fastq/fq fomrat, either gzipped not.\r\n  - `taxid` _OR_ `heirarchical taxonomy`: Unique taxonomic identifier or taxonomic lineage for sample. Taxid's can be found manually by searching the expected species/genus/family of each sample in the [NCBI taxonomy database](https://www.ncbi.nlm.nih.gov/taxonomy). Alternatively, you can provide the taxonomic lineages of each sample (with the headers phylum, class, order, family, genus, species) and the corresponding taxid of the lowest identified taxonomic rank will be retrieved.\r\n  \r\n**samples.csv example (taxid)**\r\n| ID | forward | reverse | taxid |\r\n| --- | --- | --- | --- |\r\n| BSNHM002-24  | abs/path/to/R1.fq.gz | abs/path/to/R2.fq.gz | 177658 |\r\n| BSNHM038-24 | abs/path/to/R1.fq.gz | abs/path/to/R2.fq.gz | 177627 |\r\n| BSNHM046-24 | abs/path/to/R1.fq.gz | abs/path/to/R2.fq.gz | 3084599 |\r\n\r\n**samples.csv example (hierarchical taxonomy)**\r\n| ID | forward | reverse | phylum | class | order | family | genus | species |\r\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | \r\n| BSNHM002-24  | abs/path/to/R1.fq.gz | abs/path/to/R2.fq.gz | Arthropoda | Insecta | Hemiptera | Cicadidae | Tibicina | Tibicina tomentosa |\r\n| BSNHM038-24 | abs/path/to/R1.fq.gz | abs/path/to/R2.fq.gz | Tracheophyta | Pinopsida | Pinales | Pinaceae | Abies |  |\r\n| BSNHM046-24 | abs/path/to/R1.fq.gz | abs/path/to/R2.fq.gz | Annelida | Polychaeta | Terebellida | Ampharetidae | Samytha | Samytha sexcirrata |\r\n\r\n## Gathering sample-specific pseudo-references ##\r\n- The sample_references.csv file can be created manually, or using [Gene-fetch](https://github.com/bge-barcoding/gene_fetch) integrated into the workflow (highly recommended). If enabled in the config.yaml by setting `run_gene_fetch` to 'true', Gene-fetch will retrieve the necessary protein pseudo-references for each sample from NCBI GenBank using the sample's taxonomic identifier (taxid) or taxonomic hierarchy. A sequence target (e.g. COI) must be specified in the config.yaml, as well as your NCBI API credentials (email address \u0026 API key - see [guidance](https://support.nlm.nih.gov/kbArticle/?pn=KA-05317) on getting a key).\r\n- The file must contain the following header: **'ID', 'reference'name' and 'protein_reference_path'.**\r\n  - `ID`: Unique sample identifier. This **must** be the same string as the 'ID' column in the input samples.csv file.\r\n  - `protein_reference_path`: Absolute path to the protein pseudo-reference sequence used for sample-specific protein-guided read alignment.\r\n\r\n**sample_references.csv example**\r\n| ID | protein_reference_path | \r\n| --- | --- |\r\n| BSNHM002-24 | path/to/BSNHM002-24.fasta |\r\n| BSNHM038-24 | path/to/BSNHM038-24.fasta |\r\n| BSNHM046-24 | path/toBSNHM046-24.fasta |\r\n* **Currently, it is crucial that the sample ID (ID), the reference sequence FASTA filename, and corresponding reference sequence FASTA header are all identical for correct sample-reference file mapping.** Gene-fetch will handle this for you.\r\n\r\n## Customising snakemake configuration file ##\r\n- Update [config/config.yaml](https://github.com/bge-barcoding/BeeGees/blob/main/config/config.yaml) with the neccessary paths and variables.\r\n```\r\n## General BeeGees pipeline parameters and paths\r\nrun_name: BeeGees run identifier\r\nmge_path: Path to MGE install (MitoGeneExtractor-vX.X.X file)\r\nsamples_file: Path to samples.csv (see above for formatting)\r\nsequence_reference_file: Path to sequence_references.csv (leave path blank/empty if 'run_gene_fetch' == true) (see above for formatting)\r\noutput_dir: Path to output directory. If any directories in the path do not already exist, then they wil be created\r\n\r\n## Gene Fetch parameters (https://github.com/bge-barcoding/gene_fetch)\r\nrun_gene_fetch: Set to true to use gene-fetch to generate reference sequences (default: true)\r\nemail: Email for NCBI API. Required if run_gene_fetch == true. \r\napi_key: NCBI API key. Required if run_gene_fetch == true.\r\ngene: Target gene (cox1 or rbcl)\r\nminimum_length: Minimum length (in amino acids) of protein pseudo-reference(s) to fetch (default: 500)\r\ninput_type: Taxonomic identification column(s) (taxid/hierarchical). i.e. Does the 'samples.csv' contain a 'taxid' column or 'hierarchical' taxonomic information columns (default: taxid)? (see above for formatting)\r\ngenbank: Download complete GenBank records of retrieved protein pseudo-references\r\n\r\n## Downsampling parameters\r\nenabled: Set to true to enable downsampling (default: false)\r\nmax_reads: Maximum number of read PAIRS to process (e.g. 25M read pairs = 25M fwd + 25M rev reads, = 50M reads in total). Setting this to zero is equivilent to 'enabled: false'\r\n\r\n## MitoGeneExtractor parameters (https://github.com/cmayer/MitoGeneExtractor/tree/main?tab=readme-ov-file#command-line-options)\r\nr: Exonerate relative score threshold parameter\r\ns: Exonerate minimum score threshold parameter\r\nn: Number of base pairs to extend beyond the Exonerate alignment\r\nC: Genetic code to use for Exonerate (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) \r\nt: Consensus threshold (e.g. 0.5 = 50%)\r\n\r\n## Post-processing of aligned reads for cleaning and filtering\r\n# human coi filtering -\u003e at content filtering -\u003e statistical outlier filtering -\u003e (optional) reference-base filtering -\u003e 'cleaned' consensus generation\r\nconsensus_threshold: Threshold at which bases at each positional 'column' must 'agree' to be incldued in the consensus (e.g. 0.5 = ≥50% of bases at each position must agree)\r\nhuman_threshold: Threshold at which reads are removed due to similarity with the human COI reference sequence (e.g. 0.95 = reads with ≥95% similarity are removed)\r\nat_difference: Threshold at which reads are removed due to AT content variation (e.g. 0.1 = reads with AT% differing by 10% from the consensus are removed)\r\nat_mode: AT content filtering mode (Absolute/Higher/Lower). Absolute = Remove sequences if AT content differs from consensus by more than threshold in either direction. Higher = Only remove sequences with AT content above at_difference threshold. Lower = Only remove sequences with AT content below at_difference threshold\r\noutlier_percentile: Threshold at which reads are flagged as statistical outliers compared to the consensus and removed (e.g. 90.0 = reads \u003c 90% 'similar' to the consensus are removed)\r\nreference_dir: Path to directory containing at least one [ID].fasta file with known contaminant or target species genome(s) to be filtered or retained (see reference_filter_mode below)\r\nreference_filter_mode: Either keep sequences that map to the supplied reference sequence (reference-based retention) or remove sequences that map to the supplied reference sequence (contaminant removal) (\"keep_similar\"/\"remove_similar\")\r\n\r\n## Structural validation\r\nrun_structural_validation: Set false to skip structural validation step (default: true)\r\ntarget: Barcode marker to extract. Corresponds to HMM files in 'resources/hmm' (cox1/coi or rbcl)\r\nverbose: Enable verbose logging (default: false)\r\ngenetic_code: Genetic code for translation table (must be the same as 'C' MitoGeneExtractor parameter\r\n\r\n## Taxonomic validation\r\nrun_taxonomic_validation: Set false to skip structural validation step (default: true). If run_structural_validation == false, run_taxonomic_validation MUST also == false\r\ndatabase: Path to directory containing BLASTn database, or to a specific FASTA file to make a BLASTn database from (using makeblastdb)\r\ndatabase_taxonomy: Path to TSV file containing taxonomic mappings corresponding to records in the BLASTn 'database'\r\ntaxval_rank: Taxonomic rank to stop validating at (e.g. family, genus, species) (default \u0026 recommended: family)\r\nexpected_taxonomy: Expected taxonomy file must contain the following columns: Process ID,phylum,class,order,family,genus,speces. Process ID must equal 'ID' from the samples_file above. If heirarchical taxonomy information was provided in the `samples.csv` file, this file can be reused as the expected_taxonomy CSV file required for taxonomic validation of barcode consensus sequences.\r\nverbose: Enable verbose logging (default: true)\r\nmin_pident: Minimum percent identity (pident) threshold to be considered for returned BLAST hits. Any hit with a pident below this value is removed\r\nmin_length: Minimum length of alignment to be considered for returned BLAST hits. Any hit with a length below this value is removed\r\n\r\n## Resource allocation for each rule\r\nrules: Each of the main rules in the config.yaml can specify the number of requested threads and memory resources (in Mb) for every job (e.g. specifying 4 threads and 4G memory for fastp_pe_merge would allocate those resources for every 'fastp_pe_merge' job). Rules have dynamic memory scaling upon retry (mem_mb * retry #). Make sure to change 'PARTITION' for MitoGeneExtractor, structural_validation, and taxonomic_validation rules. \r\n```\r\n**Currently, BeeGees barcode validaition only works for COI-5P and rbcL barcodes due to HMM and BLAST database availability (to be expanded with future updates).**\r\n\r\n\r\n## Cluster configuration using Snakemake profiles ##\r\n- See `profiles/` directory for config.yaml files for 'SLURM' or 'local' cluster submission parameters. Other than the default `slurm_partition` and `jobs` parameters, all other parameters can likely stay as they are unless you experience issues.\r\n  - The default `slurm_partition` determines the SLURM cluster partition for each snakemake job, unless otherwise specified (in the config/config.yaml). It is recommended to set this to a partition with at least 12-24 hour time limits.\r\n  - The `jobs` parameter dictates the maximium number of workflow jobs that can be run concurrently. The value to set jobs to depends on your specific cluster. If the value is too low, it will create a bottleneck and reduce run speed/efficiency. If the value is too high, you may hit filesystem limits, job submission limits, user resource quotas, and fairshare policies, resulting in many pending or idle jobs. For example, if your cluster had a per-user memory limit of 256G, setting jobs to 20 and allocating 32G memory to each MitoGeneExtractor job would result in only 8 MitoGeneExtractor jobs running in parallel and the remaining 12 jobs to be pending until memroy is available.\r\n- The profile (`profiles/local` or `profiles/slurm`) will need to be changed in `snakemake_run.sh` depending on your system and which one you use (see `$PROFILE` variable).\r\n\r\n### Cluster submission ###\r\n- Depending on your system and whether you are using the 'SLURM' or 'local' snakemake profile, there are two ways to run the BeeGees pipeline:\r\n  - **SLURM**: Use [snakemake_run-sbatch.sh](https://github.com/SchistoDan/BeeGees/blob/main/snakemake_run-sbatch.sh). Run `sbatch snakemake_run-sbatch.sh` on the head/login node of your cluster. Submits the main snakemake coordinating job to the SLURM cluster using SBATCH, and will 'farm out' each job in the workflow to a new SBATCH job for increased parallelisation. Please change `--partition` in the SBATCH header section of the script to an appropriate cluster parition. The main snakemake coordinating job needs to run throughout the entire BeeGees run. It is therefore recommended to set this to a partition with at least 1 day-1 week time limits.\r\n  - **local**: Use [snakemake_run.sh](https://github.com/bge-barcoding/MitoGeneExtractor-BGE/blob/main/snakemake_run.sh). Simply run `./snakemake_run.sh` on your desired cluster compute node. This node will handle all job scheduling and job computation.\r\n\r\n\r\n\r\n# Results structure #\r\n```\r\noutput_dir/\r\n├── **01_preprocessing/**\r\n│   ├── merge_mode/\r\n│   │   ├── trimmed_data/\r\n│   │   │   ├── {sample}_merged.fq                             # Merged paired-end reads\r\n│   │   │   ├── {sample}_merged_clean.fq                       # Header-cleaned merged reads\r\n│   │   │   ├── {sample}_fastp_report.html                     # FastP HTML report\r\n│   │   │   ├── {sample}_fastp_report.json                     # FastP JSON report\r\n│   │   │   └── unpaired/                                      # Unpaired reads from merging\r\n│   │   └── logs/\r\n│   │       ├── clean_headers/\r\n│   │       │   └── clean_headers.log                          # Aggregated header cleaning logs\r\n│   │       ├── fastp/                                         # Individual FastP logs per sample\r\n│   │       └── final_cleanup_complete.txt\r\n│   └── concat_mode/\r\n│       ├── trimmed_data/\r\n│       │   └── {sample}/\r\n│       │       ├── {sample}_R1_trimmed.fastq.gz               # Trimmed forward reads\r\n│       │       ├── {sample}_R2_trimmed.fastq.gz               # Trimmed reverse reads\r\n│       │       ├── {sample}_concat_trimmed.fq                 # Quality-trimmed concatenated reads\r\n│       │       ├── {sample}_fastp_report.html                 # FastP HTML report\r\n│       │       ├── {sample}_fastp_report.json                 # FastP JSON report\r\n│       │       └── {sample}_concat.fastq_trimming_report.txt  # Trim Galore report\r\n│       └── logs/\r\n│           ├── concat/\r\n│           │   └── concat_reads.log                           # Aggregated concatenation logs\r\n│           ├── trim_galore/\r\n│           │   └── trim_galore.log                            # Aggregated Trim Galore logs\r\n│           ├── fastp/                                         # Individual FastP logs per sample\r\n│           ├── gzip/                                          # Compression logs per sample\r\n│           └── final_cleanup_complete.txt\r\n│\r\n├── **02_references/**                                             # Only if run_gene_fetch = true\r\n│   ├── protein/\r\n│   │   └── {sample}.fasta                                     # Protein references for each sample\r\n│   ├── genbank/                                               # GenBank records (if genbank: true)\r\n│   └── sequence_references.csv                                # Reference metadata\r\n│\r\n├── **03_barcode_recovery/**\r\n│   ├── merge_mode/\r\n│   │   ├── alignment/\r\n│   │   │   └── {sample}_r_{r}_s_{s}_align_{sample}.fas        # MGE alignment files\r\n│   │   ├── consensus/\r\n│   │   │   ├── {sample}_r_{r}_s_{s}_con_{sample}.fas          # Individual consensus files\r\n│   │   │   └── {run_name}_cons_combined-merge.fasta           # Combined consensus sequences\r\n│   │   ├── fasta_cleaner/\r\n│   │   │   ├── 01_human_filtered/\r\n│   │   │   │   ├── human_filtered.txt                         # List of filtered files\r\n│   │   │   │   └── human_filter_metrics.csv                   # Human filtering metrics\r\n│   │   │   ├── 02_at_filtered/\r\n│   │   │   │   ├── at_filtered_sequences/                     # Individual filtered files\r\n│   │   │   │   ├── at_filtered.txt                            # List of filtered files\r\n│   │   │   │   └── at_filter_summary.csv                      # AT filtering summary\r\n│   │   │   ├── 03_outlier_filtered/\r\n│   │   │   │   ├── outlier_filtered.txt                       # List of filtered files\r\n│   │   │   │   ├── outlier_filter_summary_metrics.csv         # Summary metrics\r\n│   │   │   │   └── outlier_filter_individual_metrics.csv      # Individual metrics\r\n│   │   │   ├── 04_reference_filtered/                         # Optional - if reference filtering enabled\r\n│   │   │   │   ├── reference_filtered.txt                     # List of filtered files\r\n│   │   │   │   └── reference_filter_metrics.csv               # Reference filtering metrics\r\n│   │   │   ├── 05_cleaned_consensus/\r\n│   │   │   │   └── cleaned_cons_metrics-merge.csv             # Consensus generation metrics\r\n│   │   │   ├── combined_statistics.csv                        # Aggregated cleaning statistics\r\n│   │   │   └── cleaned_cons_combined.fasta                    # Final cleaned consensus sequences\r\n│   │   ├── logs/\r\n│   │   │   ├── mge/\r\n│   │   │   │   ├── alignment_files.log                        # List of alignment files\r\n│   │   │   │   ├── mge_stats.log                              # MGE statistics log\r\n│   │   │   │   └── {sample}_r_{r}_s_{s}/                      # MGE vulgar files per sample\r\n│   │   │   ├── fasta_cleaner/\r\n│   │   │   │   └── fasta_cleaner_complete.txt                 # Cleaner completion flag\r\n│   │   │   ├── rename_consensus/\r\n│   │   │   │   └── rename_fasta.log                           # Header renaming logs\r\n│   │   │   ├── fasta_cleaner_complete.txt                     # Main cleaner completion\r\n│   │   │   └── exonerate_int_cleanup_complete.txt             # Intermediate cleanup completion\r\n│   │   ├── out/                                               # MGE output files per sample/parameter\r\n│   │   ├── err/                                               # MGE error logs per sample/parameter\r\n│   │   └── {run_name}_merge-stats.csv                         # Mode-specific statistics\r\n│   └── concat_mode/\r\n│       ├── alignment/\r\n│       │   └── {sample}_r_{r}_s_{s}_align_{sample}.fas        # MGE alignment files\r\n│       ├── consensus/\r\n│       │   ├── {sample}_r_{r}_s_{s}_con_{sample}.fas          # Individual consensus files\r\n│       │   └── {run_name}_cons_combined-concat.fasta          # Combined consensus sequences\r\n│       ├── fasta_cleaner/\r\n│       │   ├── 01_human_filtered/\r\n│       │   │   ├── human_filtered.txt                         # List of filtered files\r\n│       │   │   └── human_filter_metrics.csv                   # Human filtering metrics\r\n│       │   ├── 02_at_filtered/\r\n│       │   │   ├── at_filtered_sequences/                     # Individual filtered files\r\n│       │   │   ├── at_filtered.txt                            # List of filtered files\r\n│       │   │   └── at_filter_summary.csv                      # AT filtering summary\r\n│       │   ├── 03_outlier_filtered/\r\n│       │   │   ├── outlier_filtered.txt                       # List of filtered files\r\n│       │   │   ├── outlier_filter_summary_metrics.csv         # Summary metrics\r\n│       │   │   └── outlier_filter_individual_metrics.csv      # Individual metrics\r\n│       │   ├── 04_reference_filtered/                         # Optional - if reference filtering enabled\r\n│       │   │   ├── reference_filtered.txt                     # List of filtered files\r\n│       │   │   └── reference_filter_metrics.csv               # Reference filtering metrics\r\n│       │   ├── 05_cleaned_consensus/\r\n│       │   │   └── cleaned_cons_metrics-concat.csv            # Consensus generation metrics\r\n│       │   ├── combined_statistics.csv                        # Aggregated cleaning statistics\r\n│       │   └── cleaned_cons_combined.fasta                    # Final cleaned consensus sequences\r\n│       ├── logs/\r\n│       │   ├── mge/\r\n│       │   │   ├── alignment_files.log                        # List of alignment files\r\n│       │   │   ├── mge_stats.log                              # MGE statistics log\r\n│       │   │   └── {sample}_r_{r}_s_{s}/                      # MGE vulgar files per sample\r\n│       │   ├── fasta_cleaner/\r\n│       │   │   └── fasta_cleaner_complete.txt                 # Cleaner completion flag\r\n│       │   ├── rename_consensus/\r\n│       │   │   └── rename_fasta.log                           # Header renaming logs\r\n│       │   ├── fasta_cleaner_complete.txt                     # Main cleaner completion\r\n│       │   └── exonerate_int_cleanup_complete.txt             # Intermediate cleanup completion\r\n│       ├── out/                                               # MGE output files per sample/parameter\r\n│       ├── err/                                               # MGE error logs per sample/parameter\r\n│       └── {run_name}_concat-stats.csv                        # Mode-specific statistics\r\n│   ├── {run_name}_BeeGees_stats.csv                              # Combined statistics from both modes\r\n│   └── {run_name}_all_cons_combined.fasta                     # All consensus sequences from both modes\r\n│\r\n├── **04_barcode_validation/**\r\n│   ├── structural/                                            # Only if run_structural_validation = true\r\n│   │   ├── structural_validation.csv                          # Structural validation results\r\n│   │   ├── {run_name}_full_sequences.fasta                    # Full sequences passing validation\r\n│   │   └── {run_name}_barcode_sequences.fasta                 # Barcode sequences passing validation\r\n│   └── taxonomic/                                             # Only if run_taxonomic_validation = true (\u0026 run_structural_validation = true)\r\n│       ├── 01_local_blast_output.csv                          # BLAST results\r\n│       ├── 02_taxonomic_validation.csv                        # Taxonomic validation results\r\n│       └── {run_name}_barcode_sequences.fasta                 # Final validated barcode sequences\r\n│\r\n├── **05_barcoding_outcome/**\r\n│   ├── baroding_outcome.log                                   # Summary and logging of barcoding outcome analysis\r\n│   └── barcoding-outcome.tsv                                  # Overview of barcoding outcomes\r\n|\r\n├── {run_name}_final_validated_barcodes.fasta                  # Only if both validations run\r\n├── {run_name}_final_stats.csv                                 # Only if both validations run\r\n└── logs/                                                      # Top-level logs directory\r\n```\r\n\r\n\r\n# Validation process\r\nThe BeeGees pipeline contains an optional barcode validation process (see [Workflow](#Workflow) section and [config.yaml](https://github.com/SchistoDan/BeeGees/blob/main/config/config.yaml)) to ensure output barcode quality is maximised through sequential structural and taxonomic validation steos, selecting the best barcode consensus sequences for downstream analyses. \r\n- The BeeGees pipeline has the capacity to validate the following barcode markers:\r\n  - **COI-5P**: Requires the [BOLDistilled](https://boldsystems.org/data/boldistilled/) BLASTn COI database and corresponding taxonomy mapping TSV file (*_SEQUENCES.fasta \u0026 *_TAXONOMY.tsv files) downloaded via the ['Download Source Data'](https://us-sea-1.linodeobjects.com/boldistilled/source.zip) button. Utilised the [COI-5p.hmm](https://github.com/SchistoDan/BeeGees/blob/main/resources/hmm/README.md) for structural validation.\r\n  - **rbcL**: Requires the [custom reference](https://doi.org/10.6084/m9.figshare.17040680.v5) BLASTn rbcL database and corresponding taxonomy mapping TSV file (*\\_dereplicated_\\*.fasta \u0026 *\\_dereplicated_\\*.tsv), downloaded [here](https://figshare.com/ndownloader/files/56104238). Utilised the [rbcL.hmm](https://github.com/SchistoDan/BeeGees/blob/main/resources/hmm/README.md) for structural validation.\r\n \r\n## Structural validation\r\nStructural validation (via `structural_validation.py`) evaluates all generated barcode consensus sequences (from both pre-processing mode and all fasta_cleaner variants) through structural and functional analysis to identify high-quality, protein-coding sequences suitable for taxonomic assignment and species identification. Outputs  a validation CSV containing comprehensive metrics for all sequences, including structural features, translation analysis, and quality ranks, and 'output_barcode_all_passing.fasta' containing ALL barcode sequences that pass the five quality criteria (multiple barcode sequences per process_id may pass)\r\n\r\n**Process:**\r\n1. Barcode region extraction: Remove tilde characters (~) representing missing gene regions, replace gap ('-') characters with ambiguous bases (N's), use nhmmer to align sequences against marker-specific HMM profiles, constructs barcode sequences in HMM coordinate space, and trims leading/trailing N's while preserving internal ambiguous bases.\r\n2. Structural analysis: Calculates sequence length, gap distribution (leading/trailing/internal), N base count, and distinguishes 'original' N's (barcode_ambiguous_bases_original, representing quality issues) from processing-introduced N's (barcode_ambiguous_bases, representing all N's in final sequence).\r\n3. Translation analysis: Evaluates all three reading frames (0, 1 2), translates sequences using specified genetic code, counts stop codons in each frame, and selects the optimal frame with the fewest stop codons.\r\n4. Quality ranking: Assigns barcode ranks (1-6) based on original N's, stop codons, reading frame validity, and base count (lower = better):\r\n     - Rank 1: Perfect sequences (no original N's, no stop codons, valid frame, ≥500bp)\r\n     - Rank 2: High quality (no original N's, no stop codons, valid frame, 400-499bp)\r\n     - Rank 3: Good quality (no original N's, no stop codons, valid frame, 300-399bp)\r\n     - Rank 4: Acceptable (no original N's, no stop codons, valid frame, 200-299bp)\r\n     - Rank 5: Minimal (no original N's, no stop codons, valid frame, 1-199bp)\r\n     - Rank 6: Problematic (contains original N's or translation issues)\r\n5. Sequence selection: To be considered structurally validated and proceed to taxonomic validation, sequences mut pass ALL of the following criteria:\r\n     - No original N's ( barcode_ambiguous_bases_original == 0)\r\n     - No stop codons (stop_codons == 0)\r\n     - Sequence is in a valid reading frame (reading_frame \u003e= 0)\r\n     - Sufficient informative nucleotide base content (barcode_base_count \u003e 300bp)\r\n     - Acceptable post-processing sequence 'quality' (barcode_ambiguous_bases \u003c 30% of barcode_base_count)\r\n\r\n\r\n## Taxonomic validation\r\nTaxonomic validation is a two-step process (via `tv_local_blast.py` and `tv_blast2taxonomy.py`) for verifying barcode identity through local BLAST searches and hierarchical taxonomic matching.\r\n\r\n**Process:**\r\n1. Local BLASTn search: Perform parallel BLASTn searches against a local database, either created from a multi-FASTA file (using makeblastdb), or using a pre-constructed BLAST database. The e-value threshold is hardcoded to 1e-5. In sequence-specific TSV output files (output format 6), the top 500 BLAST hits are then ordered by percent identity (in descending order). These are then filtered to the top 100 hits and are output to the summary CSV.\r\n2. Taxonomic assignment validation: Validates BLASTn results against expected taxonomy using hierarchical matching and quality-based filtering to confirm barcode identity.\r\n  1. Parses local BLASTn summary CSV, expected taxonomic lineage for each sample, BLAST database taxonomy mappings, and structurally validated sequences in FASTA format.\r\n  2. Filter top 100 BLASTn hits to remove those with percent identity values below the specified threshold (\u003c `min_pident`), as well as hits below a specified minimum aignment length (\u003c `min_length`).\r\n  3. Assess taxonomy of remaining BLAST hits for each sequence via hierarchical taxonomic (exact string) matching between the BLAST database taxonomy mapping and expected taxonomic lineage. Looks for matches between the expected taxonomy and database taxonomy mapping at family, genus, or species-level (highest rank to consider set with `taxval_rank`). The first (i.e. top) hit with a taxonomy match at any of the allowed ranks is accepted.\r\n  4. Sequence selection for each sample (Process ID): Among the structurally validated consensus sequences with taxonomy matches, the 'best' sequence is selected based on the following criteria:\r\n     - Lowest matched_rank (species \u003e genus \u003e family - more specific preferred)\r\n     - Lowest gaps (alignment quality)\r\n     - Lowest mismatches (sequence variability)\r\n     - Highest percent identity (overall sequence similarity)\r\n     - Lowest e-value (statistical significance)\r\n     - Highest alignment length (matching hit confidence)\r\n     - Highest s value (MitoGeneExtractor parameter)\r\n     - Highest r value (MitoGeneExtractor parameter)\r\n     - Has \"fcleaner\" in seq_id (prioritises cleaned consensus sequences)\r\n  5. Generation of taxonomic validation CSV file\r\n\r\n## Final metric integration\r\nThe barcode validation outputs are merged with pre-processing and barcode recovery statistics (via `val_csv_merger.py`) to create the final comprehensive BeeGees output ({run_name}_final_stats.csv), consolidating:\r\n- Read QC metrics (fastp, TrimGalore)\r\n- Reference retrieval results (Gene Fetch)\r\n- Barcode recovery statistics (MGE, fasta_cleaner)\r\n- Structural validation metrics\r\n- Taxonomic validation results\r\n\r\n\r\n# Contributing #\r\n- Please feel free to submit issues, fork the repository, and create pull requests for any improvements.\r\n- This snakemake pipeline was produced by Dan Parsons @ NHMUK for the Biodiversity Genomics Europe (BGE) consortium. If you use BeeGees in your work, please cite our paper at ...\r\n- Since BeeGees uses [MitogeneExtractor](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.14075) at its core, please cite:\r\n  Brasseur, M.V., Astrin, J.J., Geiger, M.F., Mayer, C., 2023. MitoGeneExtractor: Efficient extraction of mitochondrial genes from next-generation sequencing libraries. Methods in Ecology and Evolution.\r\n\r\n\r\n# Future development #\r\n- Expand supported markers beyond COI-5P and rbcL. Will require marker-specific HMMs, BLAST databases and associated taxonomy files for barcode validation. Next likely maker to be added = Matk.\r\n- Increase flexibility of input sequence_references CSV headers, so that ID/id/Process ID/PROCESS ID/process_id/sample/sample_id/SAMPLE ID/etc are accepted.\r\n- Update 01_human_cox1_filter.py so it does not solely filter aligned reads against human COI, but instead against the whole human mitogenome.\r\n- Integrate pre-MGE contamination screening step (e.g. using BBDuk).\r\n- Output simple plots. \r\n","organization":"iBOL Europe Museum Skimming","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1346?version=1","name":"main @ c0f7c36","author":["Dan Parsons"],"descriptor_type":["SMK"]},{"id":"2","url":"https://workflowhub.eu/workflows/1346?version=2","name":"v1.1.0","author":["Dan Parsons"],"descriptor_type":["SMK"]}]},{"id":"1350","url":"https://workflowhub.eu/workflows/1350","name":"qcxms-sdf/main","description":"Workflow to predict EI mass spectra using QCxMS starting from a single SDF file, containing the 3D coordinates of all atoms in the molecule. These files can typically be obtained from PubChem.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1350?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1350?version=2","name":"v0.2","author":[],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1350?version=3","name":"v0.3","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1351","url":"https://workflowhub.eu/workflows/1351","name":"mfassignr/main","description":"This workflow can be used to assign multi-element molecular formulas to ultrahigh resolution mass spectra.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1351?version=1","name":"v0.1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1352","url":"https://workflowhub.eu/workflows/1352","name":"mags-building/main","description":"This workflow constructs Metagenome-Assembled Genomes (MAGs) using SPAdes or MEGAHIT as assemblers, followed by binning with four different tools and refinement using Binette. The resulting MAGs are dereplicated across the entire input sample set, then annotated and evaluated for quality.\nYou can provide pooled reads (for co-assembly/binning), individual read sets, or a combination of both. The input samples must consist of the original reads, which are used for abundance estimation. In all cases, reads should be trimmed, adapters removed, and cleaned of host or other contaminants before processing.","organization":"Intergalactic Workflow Commission (IWC)","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1352?version=1","name":"v0.1","author":["Bérénice Batut","Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1352?version=2","name":"v0.2","author":["Bérénice Batut","Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"3","url":"https://workflowhub.eu/workflows/1352?version=3","name":"v0.3","author":["Bérénice Batut","Paul Zierep"],"descriptor_type":["GALAXY"]},{"id":"4","url":"https://workflowhub.eu/workflows/1352?version=4","name":"v0.4","author":["Bérénice Batut","Paul Zierep"],"descriptor_type":["GALAXY"]}]},{"id":"1353","url":"https://workflowhub.eu/workflows/1353","name":"ATel Follow-up Photometric data Legacy Survey Photo-z","description":"This workflow analyses a given astrophysics text (e.g. Astronomer's Telegram https://astronomerstelegram.org/). It extracts positions of mentioned astronomical sources and provides possible optical counter-parts with photometric data. The corresponding photometric data can be further used to estimate the redshit of the optical sources, that is a measure of the distance between the Earth and the optical source.\r\n\r\nGiven the fact that the the tool that estimates the photometric redshift is still in staging mode on a local instance, due to non-standard package channels, the entire workflow can be found on the local galaxy instance:\r\nhttps://galaxy.odahub.fr/u/andreiv/w/atel-follow-up-photometric-data-legacy-survey-photoz\r\n\r\nThe workflow withtout the last tool can be found on the usegalaxy.eu instance:\r\nhttps://usegalaxy.eu/u/avariu/w/atel-follow-up-photometric-data-legacy-survey","organization":"EuroScienceGateway","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1353?version=1","name":"Version 1","author":["Andrei Variu"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1353?version=2","name":"Version 2","author":["Andrei Variu"],"descriptor_type":["GALAXY"]}]},{"id":"1355","url":"https://workflowhub.eu/workflows/1355","name":"Clonal population analysis for TracerX study","description":"The Galaxy workflow for clonal population analysis was designed to process patient mutation data extracted from cBioPortal, specifically using the TracerX study as an example. This workflow, part of the deliverable D3.2 for EOSC4Cancer developed at UiO, begins with a raw patient mutation table from cBioPortal, where modifications and formatting occur to prepare the data for analysis. Some data, notably the copy number alterations (referred to as \"All_CN\"), were manually uploaded to Galaxy to complement the mutation data, as this information was missing in cBioPortal.\r\n\r\nThe workflow facilitates the integration of various datasets by utilizing tools that enable the easy export of processed data back to cBioPortal for comparison and further analysis. Key steps in the workflow include:\r\n* **Data Input**: The raw mutation data from cBioPortal and the uploaded copy number alteration data are ingested.\r\n* **Query Tabular**: This step extracts and formats the necessary data for PyClone-VI, a tool used for inferring clonal population structures.\r\n* **Data Merging and Filtering**: The workflow merges mutation data with copy number data and filters incomplete entries to ensure quality inputs for analysis.\r\n* **Running PyClone-VI**: The formatted data is then analyzed using PyClone-VI to model clonal evolution.\r\n* **Visualization**: Outputs from PyClone-VI are visualized through cellular prevalence graphs to illustrate clonal dynamics over time.\r\n* **Export to cBioPortal**: Finally, the results, along with relevant plots, are exported back to cBioPortal, allowing researchers to link findings directly within the resource.\r\n\r\nThe design of this workflow enhances the analytical capabilities available to researchers by integrating diverse cancer data and providing a seamless pathway for data management and visualization across platforms, all achieved as part of the EOSC4Cancer initiative.\r\n\r\n**Requirements**\r\n\r\nTo successfully deploy the Galaxy workflow and the associated components, the following requirements must be met:\r\n* **Intermediary Server**: A custom server developed to act as a bridge between cBioPortal and Galaxy. This server handles API requests from cBioPortal and interacts with Galaxy using Bioblend. It is essential for the integration of the two platforms.\r\n* **Docker**: The workflow components can be orchestrated using Docker. \r\n\r\nA full example of the setup can be found at the following repository: https://github.com/elixir-oslo/cbioportal-docker-compose\r\n\r\nFor more details on the tools and wrappers used in this workflow, please visit our GitHub repositories:\r\ncBioPortal Frontend: https://github.com/cBioPortal/cbioportal-frontend\r\ncBioPortal Backend: https://github.com/cBioPortal/cbioportal/tree/tmr\r\nWrapper for PyClone-VI: https://github.com/iuc/galaxy-tool-pyclone-vi\r\nIntermediary Server: https://github.com/your-repo/cbioportal-galaxy-connector\r\nPlotting for PyClone-VI Output: https://github.com/your-repo/galaxy-tool-plot-cluster-prevalence\r\nExport Resource Image to cBioPortal: https://github.com/your-repo/galaxy-tool-export-cbioportal-image\r\nExport Timeline Data to cBioPortal: https://github.com/your-repo/galaxy-tool-export-cbioportal-timeline\r\nGalaxy Server with Tools: https://github.com/your-repo/docker-galaxy-pyclone","organization":"EOSC4Cancer","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1355?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1356","url":"https://workflowhub.eu/workflows/1356","name":"Import and Analyse from EBI - Universal","description":"This workflow was developed for the 2024 Bioinformatics Bootcamp at The Open University. It imports datasets from the EBI SCXA, reformats then, and analyses them similar to the Filter, plot and explore Galaxy tutorial.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1356?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1359","url":"https://workflowhub.eu/workflows/1359","name":"CausalCoxMGM","description":"# CausalCoxMGM\r\nImplementation of CausalCoxMGM algorithm and scripts for analysis of simulated and real-world biomedical datasets.\r\n\r\n## Installation\r\n\r\nTo install CoxMGM and CausalCoxMGM, run the following command in the terminal:\r\n\r\n```\r\nR CMD INSTALL rCausalMGM\r\n```\r\n\r\nor alternatively:\r\n\r\n```\r\nR CMD INSTALL rCausalMGM/rCausalMGM_1.0.tar.gz\r\n```\r\n\r\n## Demonstration of CausalCoxMGM with the WHAS500 dataset\r\n\r\n\r\nFirst, we begin by loading the necessray R packages for this analysis.\r\n\r\n\r\n``` r\r\nlibrary(rCausalMGM)\r\nlibrary(survival)\r\nlibrary(dplyr)\r\nlibrary(survminer)\r\nlibrary(ggplot2)\r\n```\r\n\r\nNext, we load the WHAS500 dataset and format the data appropriately for analysis by CausalCoxMGM. First, we create our censored variables, Survival and LengthOfStay, using the `Surv` function from the `survival` R package. Then, we exclude features that are not clinical measurements or patient outcomes, and convert discrete variables into factors.\r\n\r\n\r\n``` r\r\ndata \u003c- read.csv('cvd/whas500.csv', row.names=1)\r\n\r\ndata$LengthOfStay \u003c- Surv(data$los, 1-data$dstat)\r\n\r\ndata$Survival \u003c- Surv(data$lenfol, data$fstat)\r\n\r\ndata \u003c- data %\u003e% select(-c(\"admitdate\", \"disdate\", \"fdate\", \"dstat\", \"lenfol\", \"fstat\", \"year\", \"los\"))\r\n\r\ndata \u003c- data %\u003e% mutate_at(c(\"cvd\", \"afb\", \"sho\", \"chf\", \"av3\", \"gender\", \"miord\", \"mitype\"), factor)\r\n\r\nhead(data)\r\n```\r\n\r\n```\r\n##   age gender hr sysbp diasbp      bmi cvd afb sho chf av3 miord mitype\r\n## 1  83      0 89   152     78 25.54051   1   1   0   0   0     1      0\r\n## 2  49      0 84   120     60 24.02398   1   0   0   0   0     0      1\r\n## 3  70      1 83   147     88 22.14290   0   0   0   0   0     0      1\r\n## 4  70      0 65   123     76 26.63187   1   0   0   1   0     0      1\r\n## 5  70      0 63   135     85 24.41255   1   0   0   0   0     0      1\r\n## 6  70      0 76    83     54 23.24236   1   0   0   0   1     0      0\r\n##   LengthOfStay Survival\r\n## 1            5    2178+\r\n## 2            5    2172+\r\n## 3            5    2190+\r\n## 4           10      297\r\n## 5            6    2131+\r\n## 6           1+        1\r\n```\r\n\r\nNext, we perform stratified 5-fold cross-validation to select model hyperparameters. Cross-validation folds are stratified to have approximately the same number of all-cause mortality and hospital discharge events. We measure model performance based on the total deviance of the censored outcomes. Rather than performing a grid search over the hyperparameters, we perform a random search.\r\n\r\n\r\n``` r\r\nset.seed(43)\r\n\r\nidx00 \u003c- which(data$Survival[,2]==0 \u0026 data$LengthOfStay[,2]==0)\r\nidx10 \u003c- which(data$Survival[,2]==1 \u0026 data$LengthOfStay[,2]==0)\r\nidx01 \u003c- which(data$Survival[,2]==0 \u0026 data$LengthOfStay[,2]==1)\r\nidx11 \u003c- which(data$Survival[,2]==1 \u0026 data$LengthOfStay[,2]==1)\r\n\r\nfoldid \u003c- rep(0, 500)\r\nfoldid[idx00] \u003c- sample(((1:length(idx00))-1) %% 5 + 1)\r\nfoldid[idx10] \u003c- sample(((1:length(idx10))-1) %% 5 + 1)\r\nfoldid[idx01] \u003c- sample(((1:length(idx01))-1) %% 5 + 1)\r\nfoldid[idx11] \u003c- sample(((1:length(idx11))-1) %% 5 + 1)\r\n\r\ntable(foldid, data$Survival[,2])\r\n```\r\n\r\n```\r\n##       \r\n## foldid  0  1\r\n##      1 57 44\r\n##      2 57 43\r\n##      3 57 43\r\n##      4 57 43\r\n##      5 57 42\r\n```\r\n\r\n``` r\r\ntable(foldid, data$LengthOfStay[,2])\r\n```\r\n\r\n```\r\n##       \r\n## foldid  0  1\r\n##      1  8 93\r\n##      2  8 92\r\n##      3  8 92\r\n##      4  8 92\r\n##      5  7 92\r\n```\r\n\r\n``` r\r\nlambdas \u003c- runif(100, 0.05, 0.5)\r\n\r\nalphas \u003c- runif(100, 0.01, 0.25)\r\n\r\nloglik \u003c- matrix(0, 100, 5)\r\nsize \u003c- matrix(0, 100, 5)\r\nfor (k in 1:5) {\r\n    ig.path \u003c- coxmgmPath(data[foldid!=k,], lambdas=lambdas, rank=F)\r\n    idx \u003c- 0\r\n    for (ig in ig.path$graphs) {\r\n        idx \u003c- idx + 1\r\n        g \u003c- fciStable(data[foldid!=k,], initialGraph=ig,\r\n                       alpha=alphas[idx], orientRule=\"maxp\", rank=F)\r\n        mb \u003c- g$markov.blankets$Survival\r\n        size[idx,k] \u003c- length(mb)\r\n        if (length(mb)==1) {\r\n            mb \u003c- c(1)\r\n        }\r\n        f \u003c- as.formula(paste(\"Survival ~\", paste(mb, collapse=\" + \")))\r\n        res \u003c- coxph(f, data[foldid!=k,])\r\n        test.risk \u003c- predict(res, newdata=data[foldid==k,])\r\n        res.test \u003c- coxph(Survival ~ offset(test.risk), data[foldid==k,])\r\n        loglik[idx,k] \u003c- -as.numeric(logLik(res.test))\r\n\r\n        mb \u003c- g$markov.blankets$LengthOfStay\r\n        size[idx,k] \u003c- size[idx,k] + length(mb)\r\n        if (length(mb)==1) {\r\n            mb \u003c- c(1)\r\n        }\r\n        f \u003c- as.formula(paste(\"LengthOfStay ~\", paste(mb, collapse=\" + \")))\r\n        res \u003c- coxph(f, data[foldid!=k,])\r\n        test.risk \u003c- predict(res, newdata=data[foldid==k,])\r\n        res.test \u003c- coxph(LengthOfStay ~ offset(test.risk), data[foldid==k,])\r\n        loglik[idx,k] \u003c- loglik[idx,k] + -as.numeric(logLik(res.test))\r\n    }\r\n}\r\n\r\nsizeMean \u003c- rowMeans(size)\r\nloglikMean \u003c- rowMeans(loglik)\r\nloglikSd \u003c- apply(loglik, 1, sd)\r\n\r\nplot(sizeMean, loglikMean, pch=19, col='red')\r\n```\r\n\r\n![plot of chunk modelselectcv](figure/modelselectcv-1.png)\r\n\r\n``` r\r\nminIdx \u003c- which.min(loglikMean)\r\n```\r\n\r\nNow that we have selected the best set of hyperparameters, we learn the final causal graphical model of all-cause mortality and hospital discharge after hospitilazation with acute myocardial infarction. This is done in two stages: first, we learn the undirected CoxMGM that serves as an initial estimate of the adjacencies in the causal graph. Second, we use FCI-Max to prune adjacencies and orient edges.\r\n\r\n\r\n``` r\r\nig.path \u003c- coxmgmPath(data, lambda=lambdas)\r\n\r\ng \u003c- fciStable(data, initialGraph=ig.path$graphs[[minIdx]],\r\n               alpha=alphas[minIdx], verbose=T, orientRule=\"maxp\")\r\n```\r\n\r\n```\r\n## Starting FCI-Stable algorithm...\r\n##   Starting FAS Stable...\r\n##     Searching at depth 0...\r\n##     Searching at depth 1...\r\n##     Searching at depth 2...\r\n##     Searching at depth 3...\r\n##     Searching at depth 4...\r\n##   FAS Stable Elapsed Time =  0.029 s\r\n##   RFCI adjacency pruning...\r\n##   Starting Posssible DSep search\r\n##     Starting Conservative Orientations...\r\n##       Filling Triple Map...\r\n##     Orienting colliders...\r\n##     Checking Possible-Dsep sets...\r\n##   Starting Final Orientations...\r\n##     Filling Triple Map...\r\n##   Orienting colliders...\r\n##   Orienting implied edges...\r\n##   FCI-Stable Elapsed Time =  0.25 s\r\n```\r\n\r\n``` r\r\ng\r\n```\r\n\r\n```\r\n## Algorithm:  CoxMGM-FCI-Max \r\n## Nodes:  15 \r\n## Edges:  17 \r\n##   Unoriented:  4 \r\n##   Partially Oriented:  6 \r\n##   Directed:  6 \r\n##   Bidirected:  1 \r\n## lambda = {0.3173977, 0.3173977, 0.3173977, 0.3173977, 0.3173977}\r\n## alpha =  0.02566852\r\n```\r\n\r\nFinally, we can generate a simple plot of the causal graphical model in R.\r\n\r\n\r\n``` r\r\nplot(g, nodeAttr=list(fontsize=36))\r\n```\r\n\r\n![plot of chunk finalmodelplot](figure/finalmodelplot-1.png)\r\n\r\n## License\r\n\r\nThe following data files are under the CC0 public domain:\r\n\r\n```\r\nwhas500.csv\r\nmetabric.rna.full.csv\r\nmetabric.rna.erp.full.csv\r\nmetabric.rna.ern.full.csv\r\nmeta_cohort_common_genes.rds\r\n```\r\n\r\nWhile all other files composing CausalCoxMGM are under the GPL-3.0 license.","organization":"CausalCoxMGM Team","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1359?version=1","name":"main @ ff02ff5","author":[],"descriptor_type":[]}]},{"id":"1360","url":"https://workflowhub.eu/workflows/1360","name":"nf-core/methylong","description":"\u003ch1\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-methylong_logo_dark.png\"\u003e\n    \u003cimg alt=\"nf-core/methylong\" src=\"docs/images/nf-core-methylong_logo_light.png\"\u003e\n  \u003c/picture\u003e\n\u003c/h1\u003e\n\n[![GitHub Actions CI Status](https://github.com/nf-core/methylong/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/methylong/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/methylong/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/methylong/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000\u0026logo=Amazon%20AWS)](https://nf-co.re/methylong/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000\u0026logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000\u0026logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/methylong)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23methylong-4A154B?labelColor=000000\u0026logo=slack)](https://nfcore.slack.com/channels/methylong)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000\u0026logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF\u0026logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000\u0026logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/methylong** is a bioinformatics pipeline that ...\n\n\u003c!-- TODO nf-core:\n   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the\n   major pipeline sections and the types of output it produces. You're giving an overview to someone new\n   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction\n--\u003e\n\n\u003c!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core\n     workflows use the \"tube map\" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   --\u003e\n\u003c!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline --\u003e1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n\u003e [!NOTE]\n\u003e If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\u003c!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.\n     Explain what rows and columns represent. For instance (please edit as appropriate):\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\n--\u003e\n\nNow, you can run the pipeline using:\n\n\u003c!-- TODO nf-core: update the following command to include all required parameters for a minimal example --\u003e\n\n```bash\nnextflow run nf-core/methylong \\\n   -profile \u003cdocker/singularity/.../institute\u003e \\\n   --input samplesheet.csv \\\n   --outdir \u003cOUTDIR\u003e\n```\n\n\u003e [!WARNING]\n\u003e Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/methylong/usage) and the [parameter documentation](https://nf-co.re/methylong/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/methylong/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/methylong/output).\n\n## Credits\n\nnf-core/methylong was originally written by Jin Yan Khoo.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\u003c!-- TODO nf-core: If applicable, make list of people who have also contributed --\u003e\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#methylong` channel](https://nfcore.slack.com/channels/methylong) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\u003c!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. --\u003e\n\u003c!-- If you use nf-core/methylong for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) --\u003e\n\n\u003c!-- TODO nf-core: Add bibliography of tools and data used in your pipeline --\u003e\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n\u003e **The nf-core framework for community-curated bioinformatics pipelines.**\n\u003e\n\u003e Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso \u0026 Sven Nahnsen.\n\u003e\n\u003e _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n","organization":"nf-core","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1360?version=1","name":"1.0.0","author":[],"descriptor_type":["NFL"]}]},{"id":"1361","url":"https://workflowhub.eu/workflows/1361","name":"BVSim: A Benchmarking Variation Simulator Mimicking Human Variation Spectrum","description":"# BVSim: A Benchmarking Variation Simulator Mimicking Human Variation Spectrum\r\n\r\n[![Profile views](https://komarev.com/ghpvc/?username=YongyiLuo98\u0026repo=BVSim\u0026label=Profile%20views\u0026color=0e75b6\u0026style=flat)](https://github.com/YongyiLuo98/BVSim)\r\n## Table of Contents\r\n\r\n- [Getting Started](#getting-started)\r\n- [Installation](#installation)\r\n- [General Functions and Parameters](#parameters)\r\n  - [Shared Parameters](#shared-parameters)\r\n    - [Output Naming Conventions](#output)\r\n    - [Write the Relative Positions of Simulated Variations](#write)\r\n    - [User-defined Block Regions with No Variations](#block)\r\n  - [Uniform Mode](#uniform-mode)\r\n  - [Complex SV Mode](#complex-sv-mode)\r\n    - [Parameters for CSV Mode](#parameters-for-csv-mode)\r\n  - [Uniform Parallel Mode](#uniform-parallel-mode)\r\n    - [Parameters for Uniform parallel Mode](#parameters-for-uniform-parallel-mode)\r\n  - [Wave Mode](#wave-mode)\r\n    - [User-defined Sample(s) and Input BED File Requirements](#requirements-for-the-bed-file)\r\n    - [Generate a BED File for a Single Sample](#generating-a-bed-file-for-a-single-sample-in-wave-mode)\r\n    - [Job Submission for Single Sample (BED Format)](#job-submission-for-wave-mode-single-sample)\r\n    - [Generating BED Files for Multiple Samples](#generating-bed-files-for-multiple-samples-in-wave-mode)\r\n    - [Job Submission for Multiple Samples (BED Format)](#job-submission-for-wave-mode-multiple-samples)\r\n    - [Important Note on File Placement](#important-note-on-file-placement)\r\n    - [Parameters for Wave Mode](#parameters-for-wave-mode)\r\n  - [Wave Region Mode](#wave-region-mode)\r\n    - [Extract User-defined Regions (e.g. TR region) and Generate the BED File](#step-1-extract-tr-regions)\r\n    - [Job Submission for Single Sample (BED Format)](#job-submission-for-wave-region-mode-single-sample)\r\n    - [Parameters for Wave Region Mode](#parameters-for-wave-region-mode)\r\n  - [Human Genome](#human-genome)\r\n- [Uninstallation for Updates](#uninstallation)\r\n- [Workflow of BVSim](#workflow)\r\n- [Definitions of SVs Simulated by BVSim](#definitions)\r\n\r\n## \u003ca name=\"getting-started\"\u003e\u003c/a\u003eGetting Started\r\n\r\nTo get started with BVSim, follow these steps to install and run the simulator:\r\n\r\n```sh\r\n# Create an envrionment called BVSim and install the dependencies\r\nconda create -n BVSim python=3.11 numpy pandas biopython scipy seaborn psutil\r\nconda activate BVSim\r\n# Run the following to install pysam or use the latest guide\r\nconda config --add channels defaults\r\nconda config --add channels conda-forge\r\nconda config --add channels bioconda\r\nconda install pysam\r\n# Installzation\r\n## Clone the repository in your home path\r\ncd your_home_path\r\ngit clone https://github.com/YongyiLuo98/BVSim.git\r\n## Navigate to the ~/BVSim/main directory and install the package\r\npip install your_home_path/BVSim/main/.\r\n\r\n# Verify the installation in your home path\r\ncd your_home_path\r\npython -m BVSim --help\r\npython -m BVSim -h\r\n\r\n## Run a toy example with a specified reference in the cloned folder\r\nconda activate BVSim\r\npython -m BVSim -ref 'your_home_path/BVSim/empirical/sub_hg19_chr1.fasta' -seed 0 -rep 0 -write -snp 2000\r\n## If you prefer using the default reference, simply execute\r\ncd your_home_path\r\npython -m BVSim\r\n\r\n\r\n# Generate variations with specific parameters\r\ncd your_home_path\r\npython -m BVSim -seed 1 -rep 1 -snp 2000\r\n\r\n# To write out the relative positions, use the following command\r\npython your_home_path/BVSim/main/write_SV.py your_home_path/BVSim/save/ BV_1_con0_chr1_SVtable.csv BV_1_con0_chr1_tem_ins_dic.npy\r\n\r\n# Create a block intervals BED file\r\ncd your_home_path\r\necho -e \"0\\t1000\\n3000\\t4000\" \u003e block_intervals.bed\r\n\r\n# Run the simulator with block regions\r\ncd your_home_path\r\npython -m BVSim -seed 1 -rep 1 -write -snp 2000 -block_region_bed_url block_intervals.bed\r\n```\r\n\r\n## \u003ca name=\"Installation\"\u003e\u003c/a\u003eInstallation\r\n### Create an envrionment called BVSim and install the dependencies\r\nTo start with, you need to install the dependent packages in an environment, for example called BVSim.\r\n```bash\r\n# Create an envrionment called BVSim and install the dependencies\r\nconda create -n BVSim python=3.11 numpy pandas biopython scipy seaborn psutil\r\nconda activate BVSim\r\n# Run the following to install pysam or use the latest guide\r\nconda config --add channels defaults\r\nconda config --add channels conda-forge\r\nconda config --add channels bioconda\r\nconda install pysam\r\n```\r\n### Clone the Repository\r\nNext, you need to clone the BVSim repository to your local machine. Execute the following command in your home directory:\r\n```bash\r\ncd your_home_path\r\ngit clone https://github.com/YongyiLuo98/BVSim.git\r\n```\r\n### Navigate to the Main Directory and Install the Package\r\nNext, navigate to the .../BVSim/main/ directory to install the package:\r\n```bash\r\npip install your_home_path/BVSim/main/.\r\n```\r\n### Verify the Installation\r\nAfter installation, you can verify it from your home directory. Execute the following commands:\r\n```bash\r\ncd\r\npython -m BVSim --help\r\npython -m BVSim -h\r\n```\r\nNote: You can only call BVSim in the cloned repository directory, while the installation must take place in the BVSim/main/ directory.\r\n#### Toy Example (Uniform mode):\r\n```bash\r\nconda activate BVSim\r\npython -m BVSim -ref 'your_home_path/BVSim/empirical/sub_hg19_chr1.fasta' -seed 0 -rep 0 -write -snp 2000\r\n```\r\nor you can use the default reference to test the installation by type the following in your home path. If you do not give a saving path, the outputs will go to \"your_home_path\\BVSim\\save\\\".\r\n\r\n```bash\r\ncd your_home_path\r\npython -m BVSim \r\n```\r\n## \u003ca name=\"parameters\"\u003e\u003c/a\u003eFunctions and Parameters\r\n\r\nFive modes: uniform, uniform parallel, csv, wave, wave_region\r\n\r\n### \u003ca name=\"shared-parameters\"\u003e\u003c/a\u003eShared Parameters\r\nThe BVSim package provides several functions (modes) and parameters for simulating genetic variations. Here is a table that introduces all the functions and different parameters:\r\n\r\n| Parameter | Type | Description | Default |\r\n| --- | --- | --- | --- |\r\n| `-ref` | str | Input reference file | '.../BVSim/empirical/sub_hg19_chr1.fasta' |\r\n| `-save` | str | Saving path | .../BVSim/save/ |\r\n| `-seed` | int | Seed for random number generator | 999 |\r\n| `-times` | int | Number of times | 10 |\r\n| `-rep` | int | Replication ID | 5 |\r\n| `-sv_trans` | int | Number of trans SV | 5 |\r\n| `-sv_inver` | int | Number of inversion SV | 5 |\r\n| `-sv_dup` | int | Number of tandem duplication | 5 |\r\n| `-sv_del` | int | Number of SV deletion | 5 |\r\n| `-sv_ins` | int | Number of SV insertion | 5 |\r\n| `-snp` | float | SNV number or probability | 5 |\r\n| `-snv_del` | float | SNV deletion number or probability | 5 |\r\n| `-snv_ins` | float | SNV insertion number or probability | 5 |\r\n| `-notblockN` | bool | Do not Block N positions | False |\r\n| `-write` | bool | Write full results | False |\r\n| `-delmin` | int | Minimum deletion length | 50 |\r\n| `-delmax` | int | Maximum deletion length | 60 |\r\n| `-insmin` | int | Minimum insertion length | 50 |\r\n| `-insmax` | int | Maximum insertion length | 450 |\r\n| `-dupmin` | int | Minimum duplication length | 50 |\r\n| `-dupmax` | int | Maximum duplication length | 450 |\r\n| `-invmin` | int | Minimum inversion length | 50 |\r\n| `-invmax` | int | Maximum inversion length | 450 |\r\n| `-dupmin` | int | Minimum duplication length | 50 |\r\n| `-dupmax` | int | Maximum duplication length | 450 |\r\n| `-transmin` | int | Minimum translocation length | 50 |\r\n| `-transmax` | int | Maximum translocation length | 450 |\r\n| `-block_region_bed_url` | str | local path of the block region BED file | None |\r\n\r\n#### \u003ca name=\"output\"\u003e\u003c/a\u003eOutput Naming Conventions\r\nWhen you run the simulation tool, the output files are named based on the sequence name you input or the parameter `rep` you set (repetition number). Below is a summary of the output files you can expect:\r\n\r\n1. **FASTA File**:  \r\n   The output FASTA file will be named as follows:\r\n```\r\nBV_\u003crep\u003e_seq_\u003cseqname\u003e.fasta\r\n```\r\nThis file contains the simulated sequence.\r\n\r\n2. **VCF File**:  \r\nThe VCF file will be named:\r\n```\r\nBV_\u003crep\u003e_seq_\u003cseqname\u003e.vcf\r\n```\r\nThis file stores the simulated variations.\r\n\r\n3. **SV Table**:  \r\nThe SV table will have different naming conventions depending on whether you choose to include relative positions:\r\n- If you include relative positions (by using the `-write` flag):\r\n  ```\r\n  BV_\u003crep\u003e_seq_\u003cseqname\u003e_SVtable_full.csv\r\n  ```\r\n- If you do not include relative positions:\r\n  ```\r\n  BV_\u003crep\u003e_seq_\u003cseqname\u003e_SVtable.csv\r\n  ```\r\n\r\n4. **Numpy File**:  \r\nThe numpy file that records all inserted segments we need to update the relative positions will be named:\r\n```\r\nBV_\u003crep\u003e_seq_\u003cseqname\u003e_tem_ins_dic.npy\r\n```\r\n#### \u003ca name=\"write\"\u003e\u003c/a\u003eWrite the Relative Positions of Simulated Variations\r\nIf you choose not to generate the relative positions during the initial simulation run (i.e., you do not include the `-write` flag), the columns for relative positions in the SV table will be empty. However, you can still update these relative positions later using the saved intermediate files.\r\n##### Steps to Write Relative Positions After Simulation\r\n1. **Run the Initial Simulation**:  \r\nFor example, you can execute:\r\n```bash\r\ncd your_home_path\r\npython -m BVSim -seed 1 -rep 1 -snp 2000\r\n```\r\nIn this case you generated default number of elementary SVs and micro indels, as well as 20000 SNPs saved in the default directory with `BV_1_seq_chr1_SVtable.csv`, `BV_1_seq_chr1_tem_ins_dic.npy`.\r\n\r\n2. **Update Relative Positions**:\r\nYou can then run the following command to generate a table with the relative positions:\r\n```bash\r\npython your_home_path/BVSim/main/write_SV.py your_home_path/BVSim/save/ BV_1_seq_chr1_SVtable.csv BV_1_seq_chr1_tem_ins_dic.npy\r\n```\r\nThis command will create a file called called `full_BV_1_seq_chr1_SVtable.csv` in the same directory, which will contain the relative positions for all variations with respect to the consensus sequence.\r\nBy following this naming convention and steps, you can easily manage and update your output files as needed.\r\n#### \u003ca name=\"block\"\u003e\u003c/a\u003eUser-defined Block Regions with No Variations\r\nThe input of the '-block_region_bed_url' should be two columns of positions(start;end) without headers seperated by '\\t'. To create a bed file, you can refer to the following example. In this case, positions from 0 to 999, from 3000 to 3999 cannot have any variation, so called blocked.\r\n\r\n#### Toy Example:\r\n```bash\r\ncd your_home_path\r\necho -e \"0\\t1000\\n3000\\t4000\" \u003e block_intervals.bed\r\n# uniform.py\r\ncd your_home_path\r\npython -m BVSim -seed 1 -rep 1 -write -snp 2000 -block_region_bed_url block_intervals.bed\r\n```\r\n\r\n### \u003ca name=\"uniform-mode\"\u003e\u003c/a\u003eUniform Mode\r\nIf you do not call any of the following parameters (-csv, -cores, -len_bins, -wave), the simulation will be generated one by one uniformly.\r\n\r\n#### Toy Example (Uniform mode):\r\n```bash\r\nconda activate BVSim\r\npython -m BVSim -ref 'hg19_chr1.fasta' -seed 0 -rep 0 -write -snp 2000\r\n```\r\n### \u003ca name=\"complex-sv-mode\"\u003e\u003c/a\u003eComplex SV Mode\r\nAdd -csv to your command, 18 types of Complex Structure Variations can be generated.\r\n\r\n* ID1: Tandem Inverted Duplication (TanInvDup)\r\n* ID2: Dispersed Inverted Duplication (DisInvDup)\r\n* ID3: Dispersed Duplication (DisDup)\r\n* ID4: Inversion with 5’ or 3’ Flanking Deletion (DEL+INV/INV+DEL)\r\n* ID5: 5’ Deletion and Dispersed Inverted Duplication (DEL+DisInvDup)\r\n* ID6: 5’ Deletion and Dispersed Duplication (DEL+DisDup)\r\n* ID7: Tandem Duplication and 3’ Deletion (TanDup+DEL)\r\n* ID8: Tandem Inverted Duplication and 3’ Deletion (TanInvDup+DEL)\r\n* ID9: Tandem Duplication, Deletion and Inversion (TanDup+DEL+INV)\r\n* ID10: Tandem Inverted Duplication, Deletion and Inversion (TanInvDup+DEL+INV)\r\n* ID11: Paired-Deletion Inversion (DEL+INV+DEL)\r\n* ID12: Inversion with 5’ Flanking Duplication (DUP+INV)\r\n* ID13: Inversion with 3’ Flanking Duplication (INV+DUP)\r\n* ID14: Paired-Duplication Inversion (DUP+INV+DUP)\r\n* ID15: Inversion with 5’ Flanking Duplication and 3’ Flanking Deletion (DUP+INV+DEL)\r\n* ID16: Inversion with 5’ Flanking Deletion and 3’ Flanking Duplication (DEL+INV+DUP)\r\n* ID17: Inverted Duplication with Flanking Triplication (DupTripDup-INV)\r\n* ID18: Insertion with Deletion (INSdel)\r\n#### Toy Example (CSV mode):\r\n```bash\r\ncd your_home_path\r\npython -m BVSim -ref 'your_home_path/BVSim/empirical/sub_hg19_chr1.fasta' -save your_saving_url -seed 1 -rep 1 -csv -write -snp 2000\r\n```\r\n#### \u003ca name=\"parameters-for-csv-mode\"\u003e\u003c/a\u003eParameters for CSV Mode\r\nThe lengths of the CSVs follow different Gaussian distributions with modifiable means (-mu) and standard deviations (-sigma).\r\n| Parameter | Type | Description | Default |\r\n| --- | --- | --- | --- |\r\n| `-csv_num` | int | Number for each type of CSV, superior to -csv_total_num | 0 |\r\n| `-csv_total_num` | int | Total number for CSV, assign number of each type by empirical weights | 0 |\r\n| `-num_ID1_csv to -num_ID18_csv` | int | Number of respective CSV types | 5 |\r\n| `-mu_ID1 to -mu_ID18` | int | Mean of Gaussian distribution of CSV length | 1000 |\r\n| `-sigma_ID1 to -sigma_ID18` | int | Standard deviation of Gaussian distribution of CSV length | 100 |\r\n\r\n### \u003ca name=\"uniform-parallel-mode\"\u003e\u003c/a\u003eUniform Parallel Mode\r\nAdd -cores, -len_bins to your command, and write a .job file (task01.job) as follows (-c 5 means 5 cores, should be the same as -cores 5), parallel simulation will be allowed.\r\n\r\n#### Toy Example (Uniform-parallel mode): task01.job\r\n```bash\r\n#!/bin/bash\r\n#SBATCH -J uniform_parallel\r\n#SBATCH -N 1 -c 5\r\n#SBATCH --output=output.txt\r\n#SBATCH --error=err.txt\r\n\r\nsource /opt/share/etc/miniconda3-py39.sh\r\nconda activate BVSim\r\ncd your_home_path\r\npython -m BVSim -ref your_home_path/hg19/hg19_chr21.fasta -save your_home_path/test_data/BVSim/task03/ -cores 5 -len_bins 500000 -rep 3 -snp 200 -snv_del 200 -snv_ins 200 -write\r\nconda deactivate\r\n```\r\nSubmit the job file by:\r\n```bash\r\nsbatch task01.job\r\n```\r\n#### \u003ca name=\"parameters-for-uniform-parallel-mode\"\u003e\u003c/a\u003eParameters for Uniform parallel Mode\r\n\r\n| Parameter | Type | Description | Default |\r\n| --- | --- | --- | --- |\r\n| `-cores` | int | Number of kernels for parallel processing | 1 |\r\n| `-len_bins` | int | Length of bins for parallel processing | 50000 |\r\n\r\n### \u003ca name=\"wave-mode\"\u003e\u003c/a\u003eWave Mode\r\n\r\nIn Wave mode, users can provide a `.bed` file generated from an empirical `.vcf` file (for example, from HG002) or multiple BED files derived from samples of a selected population (such as the 15 Cell samples). This functionality allows you to generate non-uniform insertions and deletions with various options.\r\n\r\n#### \u003ca name=\"requirements-for-the-bed-file\"\u003e\u003c/a\u003eUser-defined Sample(s) and Input BED File Requirements\r\n\r\nThe BED file must adhere to the following requirements:\r\n\r\n- **First Column**: Location (genomic position)\r\n- **Second Column**: DEL/INS label (indicating if the variation is a deletion or insertion)\r\n- **Third Column**: Length (absolute value of the variation)\r\n\r\nEach column should be separated by a tab character (`\\t`) and must not include headers. Additionally, each BED file should represent variations on the same sequence.\r\n\r\n#### \u003ca name=\"generating-a-bed-file-for-a-single-sample-in-wave-mode\"\u003e\u003c/a\u003eGenerate a BED File for a Single Sample\r\n\r\nTo generate a single input BED file from the HG002 `.vcf` file of chromosome 21, you can use the following commands in your terminal:\r\n\r\n```bash\r\n# Download the VCF file and its index\r\nwget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz \r\nwget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz.tbi\r\n\r\n# Activate the bcftools environment\r\nconda activate bcftools\r\n\r\n# Generate the BED file using bcftools and awk\r\nbcftools view -H -r 21 -i 'SVTYPE=\"INS\" || SVTYPE=\"DEL\"' /home/adduser/data/test_data/TGS/hg002/HG002_SVs_Tier1_v0.6.vcf.gz | \\\r\nawk -v OFS='\\t' '{\r\n    split($8, a, \";\");\r\n    for (i in a) {\r\n        if (a[i] ~ /^SVTYPE/) {\r\n            split(a[i], b, \"=\");\r\n            svtype = b[2];\r\n        }\r\n        else if (a[i] ~ /^SVLEN/) {\r\n            split(a[i], c, \"=\");\r\n            svlen = c[2];\r\n            if (svlen \u003c 0) svlen = -svlen;  # Extract the absolute value of SV length\r\n        }\r\n    }\r\n    print $2, svtype, svlen;  # Print the location, SV type, and absolute SV length\r\n}' \u003e /home/adduser/data/test_data/TGS/hg002/chr21_SV_Tier1.bed\r\n```\r\n##### \u003ca name=\"job-submission-for-wave-mode-single-sample\"\u003e\u003c/a\u003eJob Submission for Single Sample (BED Format)\r\n\r\nTo utilize this single BED file, users should call '-indel_input_bed' in the command. Below is the example of a SLURM job script that you can use to run the Wave mode simulation with single empirical data:\r\n\r\n```bash\r\n#!/bin/bash\r\n#SBATCH -J full_chr21_parallel\r\n#SBATCH -N 1 -c 5\r\n#SBATCH --output=output_chr21_wave.txt\r\n#SBATCH --error=err_chr21_wave.txt\r\n\r\nsource /opt/share/etc/miniconda3-py39.sh\r\nconda activate BVSim\r\ncd your_home_path\r\npython -m BVSim -ref your_home_path/hg19/hg19_chr21.fasta -save your_home_path/test_data/BVSim -seed 0 -rep 2 -cores 5 -len_bins 500000 -wave -indel_input_bed your_home_path/hg002/chr21_SV_Tier1_2.bed -mode empirical -snp 2000 -snv_del 1000 -snv_ins 100 -write\r\nconda deactivate\r\n```\r\nSubmit the job file by:\r\n```bash\r\nsbatch task02_single.job\r\n```\r\n#### \u003ca name=\"generating-bed-files-for-multiple-samples-in-wave-mode\"\u003e\u003c/a\u003eGenerating BED Files for Multiple Samples\r\n\r\nIn this section, we will outline the steps to generate `.bed` files for multiple cell samples from the original Excel spreadsheet, using the 15 Cell samples as an example.\r\n\r\n##### Step 1: Download the Original Excel File\r\n\r\nFirst, download the Excel file containing the cell samples data:\r\n\r\n```python\r\nimport os\r\n\r\n# Download the Excel file\r\nos.system('wget https://ars.els-cdn.com/content/image/1-s2.0-S0092867418316337-mmc1.xlsx')\r\n\r\n```\r\n##### Step 2: Load and View the Data\r\nNext, load the Excel file into a Pandas DataFrame and view the first few rows:\r\n```python\r\nimport pandas as pd\r\n\r\n# Read the Excel file into a DataFrame\r\nfile_path = '1-s2.0-S0092867418316337-mmc1.xlsx'\r\ndf = pd.read_excel(file_path, sheet_name=0)  # Choose the correct sheet based on the file\r\n\r\n# Display the first 5 rows of the DataFrame\r\nprint(df.head(5))\r\n```\r\n##### Step 3: Filter the Data\r\nExtract the required columns and rename the first column:\r\n```python\r\n# Extract the necessary columns\r\ncolumns_to_keep = ['#CHROM', 'POS', 'END', 'ID', 'SVTYPE', 'SVLEN', 'MERGE_SAMPLES']\r\ncell_df = df[columns_to_keep]\r\n\r\n# List of all sample strings\r\nsamples = ['CHM1', 'CHM13', 'HG00514', 'HG00733', 'NA19240', 'HG02818', 'NA19434', 'HG01352', 'HG02059', 'NA12878', 'HG04217', 'HG02106', 'HG00268', 'AK1', 'HX1']\r\n# selected population: the African population\r\nAFR_samples = ['NA19240', 'HG02818', 'NA19434']\r\n\r\n# Specify the columns to save in the BED file\r\ncolumns_to_save = ['POS', 'SVTYPE', 'SVLEN']\r\n\r\n# Extract rows where CHROM equals 'chr21'\r\nchr21_df = cell_df[cell_df['CHROM'] == 'chr21']\r\n\r\n# Display the first 10 rows for verification\r\nprint(chr21_df.head(10))\r\n\r\n# Generate BED files for each sample in the AFR_samples list\r\nfor sample in AFR_samples:\r\n    # Create a new DataFrame containing only rows where 'MERGE_SAMPLES' contains the current sample\r\n    sample_df = chr21_df[chr21_df['MERGE_SAMPLES'].str.contains(sample)]\r\n\r\n    # Specify the path for the new BED file\r\n    bed_file_path = f'.../BVSim/empirical/{sample}_chr21.bed'\r\n\r\n    # Save the specified columns to a BED file\r\n    sample_df[columns_to_save].to_csv(bed_file_path, sep='\\t', header=False, index=False)\r\n\r\n```\r\n#### \u003ca name=\"job-submission-for-wave-mode-multiple-samples\"\u003e\u003c/a\u003eJob Submission for Multiple Samples (BED Format)\r\n\r\nWe provide an example of a Job submission script using SLURM for running the Wave mode with BVSim. This script utilizes the generated multiple sample BED files. Below is the example of a SLURM job script that you can use to run the Wave mode simulation with multiple samples:\r\n\r\n```bash\r\n#!/bin/bash\r\n#SBATCH -J wave\r\n#SBATCH -N 1 -c 5\r\n#SBATCH --output=/home/project18/code/BVSim_code/wave2_out.txt\r\n#SBATCH --error=/home/project18/code/BVSim_code/wave2_err.txt\r\n\r\nsource /opt/share/etc/miniconda3-py39.sh\r\nconda activate BVSim\r\ncd /home/project18/\r\n\r\npython -m BVSim -ref your_home_path/hg38/chr21.fasta \\\r\n-save your_home_path/BVSim/task01/ -seed 0 -rep 1 -cores 5 \\\r\n-len_bins 500000 -wave -mode empirical -snp 2000 -snv_del 1000 -snv_ins 100 \\\r\n-write -file_list NA19240_chr21 HG02818_chr21 NA19434_chr21\r\n\r\nconda deactivate\r\n```\r\n#### \u003ca name=\"important-note-on-file-placement\"\u003e\u003c/a\u003eImportant Note on File Placement\r\nEnsure that both the single sample and multiple sample BED files are placed in the .../BVSim/empirical/ directory. This organization simplifies the command structure, allowing you to specify only the base names of the files (without extensions) directly in the -file_list option, as demonstrated in the script above.\r\n\r\n#### \u003ca name=\"parameters-for-wave-mode\"\u003e\u003c/a\u003eParameters for Wave Mode\r\n\r\n| Parameter | Type | Description | Default |\r\n| --- | --- | --- | --- |\r\n| `-cores` | int | Number of kernels for parallel processing | 1 |\r\n| `-len_bins` | int | Length of bins for parallel processing | 50000 |\r\n| `-wave` | bool | Run Wave.py script | False |\r\n| `-mode` | str | Mode for calculating probabilities | 'probability' |\r\n| `-sum` | bool | Total indel SV equals sum of the input bed | False |\r\n| `-indel_input_bed` | str | Input single BED file | None |\r\n| `-file_list` | str | Input list of multiple BED files | None |\r\n\r\n##### Mode and Sum Parameters\r\n\r\nThe `-mode` parameter determines how the simulation calculates probabilities for insertions and deletions. It accepts two values:\r\n\r\n- **'probability'**: In this mode, probabilities for insertions and deletions are derived from the empirical data provided in the input BED files. The total number of variations can be defined by the `-sum` parameter. If `-sum` is set to `True`, the total number of insertions or deletions will be the maximum of the calculated empirical total or the specified values in `-sv_ins` or `-sv_del`. This allows for flexibility in controlling the total number of SVs in the simulation.\r\n\r\n- **'empirical'**: When set to this mode, the simulation directly uses the empirical values from the input data without any probability calculations. The total number of variations will be the sum of the provided empirical data.\r\n\r\nThe `-sum` parameter, when enabled, alters the total number of insertions and deletions based on the specified empirical data. If disabled, the simulation uses the fixed total values defined in `-sv_ins` and `-sv_del`, regardless of the empirical input.\r\n\r\n\r\n### \u003ca name=\"wave-region-mode\"\u003e\u003c/a\u003eWave Region Mode\r\n\r\nIn Wave region mode, you can specify different INDEL probabilities using a BED file defined by `region_bed_url`. For example, if you want to increase the insertion and deletion probabilities in the tandem repeat (TR) regions of hg19, you can follow these steps.\r\n\r\n#### \u003ca name=\"step-1-extract-tr-regions\"\u003e\u003c/a\u003eExtract User-defined Regions (e.g. TR region) and Generate the BED File\r\n\r\nFirst, extract the TR regions' positions from UCSC and create a BED file with two columns (start; end) separated by a tab character (`\\t`).\r\n\r\nYou can generate the BED file using the following commands:\r\n\r\n```bash\r\n# Download the TR regions data\r\nwget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/references/GRCh37/resources/hg19.simpleRepeat.bed.gz \r\nwget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/references/GRCh37/resources/hg19.simpleRepeat.bed.gz.tbi\r\n\r\n# Extract the relevant columns and create the BED file\r\nzcat hg19.simpleRepeat.bed.gz | awk 'BEGIN{OFS=\"\\t\"} {print $1, $2, $3}' \u003e your_home_path/hg002/windows_TR.bed\r\n\r\n# Merge overlapping intervals and remove duplicates\r\nbedtools sort -i your_home_path/hg002/windows_TR.bed | bedtools merge -i stdin | awk 'BEGIN{OFS=\"\\t\"} {$4=\"TR\"; print}' | uniq \u003e your_home_path/hg002/windows_TR_unique.bed\r\n\r\n# Filter for chromosome 21\r\nawk '$1 == \"chr21\"' your_home_path/hg002/windows_TR_unique.bed \u003e your_home_path/hg002/windows_TR_unique_chr21.bed\r\n\r\n# Create a final BED file with start and end positions\r\nawk '{print $2 \"\\t\" $3}' your_home_path/hg002/windows_TR_unique_chr21.bed \u003e your_home_path/hg002/chr21_TR_unique.bed\r\n```\r\n#### \u003ca name=\"job-submission-for-wave-region-mode-single-sample\"\u003e\u003c/a\u003eJob Submission for Single Sample (BED Format)\r\nIn this example, we set the seed to `0` and use a replication ID of `4`. The job is configured to utilize `5` cores for parallel processing, with a bin size of `500,000`. We will generate `10,000` SNPs, along with `100` micro deletions and `100` micro insertions. The probabilities for these insertions and deletions are specified in the input BED file (`-indel_input_bed`) using the empirical mode (`-mode`). Additionally, we have set the probabilities for insertions (`-p_ins_region`) and deletions (`-p_del_region`) to approximately `0.6` for the total located in the TR region defined by `-region_bed_url`.\r\n\r\n```bash\r\n#!/bin/bash\r\n#SBATCH -J full_chr21_parallel\r\n#SBATCH -N 1 -c 5\r\n#SBATCH --output=output_chr21_wave_region.txt\r\n#SBATCH --error=err_chr21_wave_region.txt\r\n\r\nsource /opt/share/etc/miniconda3-py39.sh\r\nconda activate BVSim\r\ncd your_home_path\r\npython -m BVSim -ref your_home_path/hg19/hg19_chr21.fasta -save your_home_path/test_data/BVSim -seed 0 -rep 4 -cores 5 -len_bins 500000 -wave_region -indel_input_bed your_home_path/hg002/chr21_SV_Tier1.bed -mode empirical -snp 10000 -snv_del 100 -snv_ins 100 -write -p_del_region 0.6 -p_ins_region 0.6 -region_bed_url your_home_path/hg002/chr21_TR_unique.bed\r\nconda deactivate\r\n```\r\n\r\nSubmit the job file using the following command:\r\n```bash\r\nsbatch task03.job\r\n```\r\n#### \u003ca name=\"parameters-for-wave-region-mode\"\u003e\u003c/a\u003eParameters for Wave Region Mode\r\nThe table below summarizes the parameters available for Wave region mode:\r\n| Parameter | Type | Description | Default |\r\n| --- | --- | --- | --- |\r\n| `-cores` | int | Number of kernels for parallel processing | 1 |\r\n| `-len_bins` | int | Length of bins for parallel processing | 50000 |\r\n| `-wave` | bool | Run Wave.py script | False |\r\n| `-mode` | str | Mode for calculating probabilities | 'probability' |\r\n| `-sum` | bool | Total number of insertions and deletions equals sum of the input bed | False |\r\n| `-indel_input_bed` | str | Input single BED file | None |\r\n| `-file_list` | str | Input list of multiple BED files | None |\r\n| `-wave_region` | bool | Run Wave_TR.py script | False |\r\n| `-p_del_region` | float | Probability of SV DEL in the user-defined region for deletion | 0.5 |\r\n| `-p_ins_region` | float | Probability of SV INS in the user-defined region for insertion | 0.5 |\r\n| `-region_bed_url` | str | Path of the BED file for the user-defined region | 'your_home_path/hg002/chr21_TR_unique.bed' |\r\n\r\n### \u003ca name=\"human-genome\"\u003e\u003c/a\u003eHuman Genome\r\nFor the human genome, we derive the length distributions of SVs from HG002 and the 15 representative samples. For SNPs, we embed a learned substitution transition matrix from the dbSNP database. With a user-specified bin size, BVSim learns the distribution of SV positions per interval. It can model the SVs per interval as a multinomial distribution parameterized by the observed frequencies in HG002 (GRCh37/hg19 as reference) or sample the SV numbers per interval from a Gaussian distribution with the mean and standard deviation computed across the 15 samples (GRCh38/hg38 as reference). Calling ‘-hg19’ or ‘-hg38’ and specifying the chromosome name can activate the above procedures automatically for the human genome.\r\n\r\nIn the following example, we use 5 cores and 500,000 as length of the intervals. The reference is chromosome 21 of hg19, so we call \"-hg19 chr21\" in the command line to utilize the default procedure. In addition, we generated 1,000 SNPs, 99 duplications, 7 inversions, 280 deletions, and 202 insertions. The ratio of deletions/insertions in the tandem repeat regions with respect to the total number is 0.810/0.828. We also set the minimum and maximum lengths of some SVs.\r\n#### Toy example (-hg19)\r\n```bash\r\n#!/bin/bash\r\n#SBATCH -J 0_hg19_chr21\r\n#SBATCH -N 1 -c 5\r\n#SBATCH --output=output.txt\r\n#SBATCH --error=err.txt\r\n\r\nsource /opt/share/etc/miniconda3-py39.sh\r\nconda activate BVSim\r\ncd your_home_path\r\npython -m BVSim -ref your_home_path/hg19/hg19_chr21.fasta -save your_home_path/test_data/BVSim/ -seed 0 -rep 0 -cores 5 -len_bins 500000 -hg19 chr21 -mode probability -snp 1000 -sv_trans 0 -dup 99 -sv_inver 7 -sv_del 280 -sv_ins 202 -snv_del 0 -snv_ins 0 -p_del_region 0.810 -p_ins_region 0.828 -region_bed_url /home/project18/data/test_data/TGS/hg002/chr21_TR_unique.bed -delmin 50 -delmax 2964912 -insmin 50 -insmax 187524\r\nconda deactivate\r\n```\r\n\r\n\r\n## \u003ca name=\"uninstallation\"\u003e\u003c/a\u003eUninstallation for Updates\r\nTo update to the latest version of BVSim, you can uninstall and delete the cloned files. Then, try to clone from the new repository and install again.\r\n```bash\r\ncd your_home_path\r\npip uninstall BVSim\r\n```\r\n## \u003ca name=\"workflow\"\u003e\u003c/a\u003eWorkflow of BVSim\r\nThe following figure illustrates the workflow of BVSim, encapsulated within a dashed box, and demonstrates how the output files interact with read simulators, the alignment tool Minimap2, Samtools, and evaluation tools.\r\n![Workflow of BVSim](flow_chart_pipline.png)\r\n## \u003ca name=\"definitions\"\u003e\u003c/a\u003eDefinitions of SVs Simulated by BVSim\r\n![Definitions of SVs](Fig1a_BVSim.png)\r\n","organization":"Structural Variation Analysis","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1361?version=1","name":"main @ 72a3760","author":[],"descriptor_type":[]}]},{"id":"1362","url":"https://workflowhub.eu/workflows/1362","name":"Subclustering","description":"This workflow was built for the 2024 Bioinformatics Bootcamp at The Open University. It is meant to occur after the (universal) Filter, plot and explore tutorial to allow analysis of a single cluster.","organization":"usegalaxy-eu","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1362?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1364","url":"https://workflowhub.eu/workflows/1364","name":"Basic Virtual Drug Screening Workflow","description":"This workflow performs the most basic Virtual Drug Screening Pipeline to import a set of small molecules and dock them to an imported protein structure. \r\n","organization":"Scipion CNB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1364?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1365","url":"https://workflowhub.eu/workflows/1365","name":"Consensus Virtual Drug Screening Workflow","description":"This workflows contains a pipeline in Scipion that performs the following steps:\r\n\r\n1.1) Import small molecules: introduces a set of small molecular structures in the pipeline as prospective ligands\r\n\r\n1.2) Import atomic structure: introduces a protein atomic structure in the pipeline as receptor.\r\n\r\n2.1) Ligand preparation: uses RDKit to prepare the small molecules optimizing their 3D structure.\r\n\r\n2.2) Receptor preparation: uses bioPython to prepare the receptor structure, removing waters, adding hydrogens and removing unnecessary chains if asked. Also, uses PDBFixer to optimize the structure if selected.\r\n\r\n3.1) Ligand filters: uses RDKit to perform ADME and PAINS filters on the prepared ligands to remove undesired molecules\r\n\r\n3.2) Protein pocket search: uses 3 different software (P2Rank, AutoSite and FPocket) for predicting the receptor pockets.\r\n\r\n4.2) Consensus pockets: common pockets are computed by clustering their contact residues in order to obtain the most promising pocket predicted by all 3 programs.\r\n\r\n5) Receptor-ligands docking: uses 3 different software (AutoDock-GPU, AutoDock-Vina and LeDock) to dock the prepared ligands onto the receptor pockets. \r\n\r\n6) Docked poses rescoring: uses ODDT Vina scoring to rescore the poses coming from all 3 different software in order to have a comparable score of the poses.\r\n\r\n7.1) Consensus docking:  common ligand poses are computed clustering by RMSD the different molecules in order to obtain the most promising predicted poses.\r\n\r\n7.2) Ranx scoring: the scores of the different programs are combined using Ranx in order to obtain a final score for each of the molecules.\r\n","organization":"Scipion CNB","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1365?version=1","name":"Version 1","author":[],"descriptor_type":[]}]},{"id":"1369","url":"https://workflowhub.eu/workflows/1369","name":"Extended Source","description":"Calculate extended gamma-ray source halo using crbeam simulation","organization":"Astroparticle Lab","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1369?version=1","name":"master @ ef683ae","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1377","url":"https://workflowhub.eu/workflows/1377","name":"FAIR data pipeline for marine acoustic data","description":"This is the workflow for thesis: A FAIR Data Pipeline for Ecosystem Research, with Machine Learning and Marine Acoustic Data (Jiamian He)","organization":"FAIR_thesis: Marine acoustic data","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1377?version=1","name":"Version 1","author":["Jiamian He"],"descriptor_type":["SMK"]}]},{"id":"1380","url":"https://workflowhub.eu/workflows/1380","name":"FAIR data pipeline for marine acoustic data","description":"","organization":"FAIR_thesis: Marine acoustic data","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1380?version=1","name":"Version 1","author":[],"descriptor_type":["SMK"]}]},{"id":"1381","url":"https://workflowhub.eu/workflows/1381","name":"EOSC4Cancer D2.2","description":"# EOSC4Cancer_D2.2\r\nGalaxy workflow used for EOSC4Cancer D2.2 - Analytical methods for data extraction, processing and sharing using biomedical images - demonstrator\r\n\r\nAn overview of the components for the image processing demonstrator can be seen in the diagram below\r\n![diagram](https://github.com/rnavest/EOSC4Cancer_D2.2/blob/main/Demonstrator_diagram.png)\r\n\r\nLink to deliverable report on Zenodo: https://doi.org/10.5281/zenodo.15704480\r\n","organization":"EOSC4Cancer","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1381?version=1","name":"main @ b01b771","author":["Robin Navest"],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1381?version=2","name":"main @ 3317f1b","author":["Robin Navest"],"descriptor_type":["GALAXY"]}]},{"id":"1382","url":"https://workflowhub.eu/workflows/1382","name":"Annual percentage Change for population in Germany (1950 - 2025)","description":"# Annual percentage Change for population in Germany (1950 - 2025)\r\n\r\nThis workflow run was executed on Galaxy (Workflow Rerun Information)\r\n\r\n**Workflow:** Climate Stripes\r\n\r\n**Execution Status:** scheduled\r\n\r\n**Executed:** 2025-05-27 14:14:07.240149\r\n\r\n\r\n## Workflow Inputs\r\n\r\n### Formal Input Definitions\r\n\r\n- **Germany-Population-Annual--Change-2025-05-27-15-17.csv** (File)\r\n\r\n- **Column name to use for plotting** (Text)\r\n\r\n- **Plot Title** (Text)\r\n\r\n- **nxsplit** (Integer)\r\n  - Description: Number of values per intervals\r\n\r\n- **xname (column name for the x-axis)** (Text)\r\n  - Description: The column name to use for the x-axis.\r\n\r\n- **Date/time format for the x-axis column** (Text)\r\n  - Description: Date/time format to use when reading the column to be used for the x-axis.\r\n\r\n- **dates format for xlabels** (Text)\r\n  - Description: Format for plotting dates on the x-axis\r\n\r\n- **Matplotlib colormap** (Text)\r\n  - Description: Parameter 'colormap': valid options: winter,PRGn,hsv,gist_ncar,RdYlGn,summer,BrBG,Pastel1,autumn,PuBuGn,seismic,YlOrRd,Purples,Wistia,YlOrBr,tab10,tab20c,gist_heat,bone,gist_yarg,ocean,flag,RdGy,gist_earth,coolwarm,spring,PuBu,cool,gist_stern,gray,Reds,Greens,Accent,BuGn,RdGy_r,Set3,Pastel2,pink,OrRd,gist_rainbow,Blues,binary,gist_gray,PuOr,Set2,rainbow,copper,RdBu_r,Oranges,Set1,afmhot,BuPu,gnuplot2,brg,terrain,YlGnBu,tab20,Greys,bwr,RdPu,PuRd,tab20b,PiYG,hot,gnuplot,YlGn,Dark2,prism,Spectral,Paired,RdPu_r,RdBu,RdYlBu,GnBu,cubehelix,CMRmap,jet,nipy_spectral) Using default: 'RdBu_r'.\r\n\r\n### Actual Input Files Used\r\n\r\n- **Germany-Population-Annual--Change-2025-05-27-15-17.csv**\r\n  - Format: `text/plain`\r\n  - Path: `datasets/Germany-Population-Annual--Change-2025-05-27-15-17.csv_31e7840b5aedca43e53def1f4fdf0064.csv`\r\n\r\n\r\n## Workflow Parameters\r\n\r\n- **input:**\r\n  - __class__: `NoReplacement`\r\n\r\n- **adv:**\r\n  - colormap: `{'__class__': 'ConnectedValue'}`\r\n  - format_date: `{'__class__': 'ConnectedValue'}`\r\n  - format_plot: `{'__class__': 'ConnectedValue'}`\r\n  - nxsplit: `{'__class__': 'ConnectedValue'}`\r\n  - xname: `{'__class__': 'ConnectedValue'}`\r\n\r\n- **ifilename:**\r\n  - __class__: `ConnectedValue`\r\n\r\n- **title:**\r\n  - __class__: `ConnectedValue`\r\n\r\n- **variable:**\r\n  - __class__: `ConnectedValue`\r\n\r\n\r\n## Workflow Outputs\r\n\r\n### Formal Output Definitions\r\n\r\n- **stripes.png** (File)\r\n\r\n### Actual Output Files Generated\r\n\r\n- **stripes.png**\r\n  - Format: `application/octet-stream`\r\n  - Path: `datasets/stripes.png_31e7840b5aedca437fdd404c00a60dc0.png`\r\n\r\n\r\n## Rerun Template\r\n\r\nTo rerun this workflow:\r\n\r\n1. **Workflow:** Climate Stripes\r\n\r\n2. **Required inputs:**\r\n   - Germany-Population-Annual--Change-2025-05-27-15-17.csv (type: `File`)\r\n   - Column name to use for plotting (type: `Text`)\r\n   - Plot Title (type: `Text`)\r\n   - nxsplit (type: `Integer`)\r\n   - xname (column name for the x-axis) (type: `Text`)\r\n   - Date/time format for the x-axis column (type: `Text`)\r\n   - dates format for xlabels (type: `Text`)\r\n   - Matplotlib colormap (type: `Text`)\r\n\r\n3. **Parameters to set:**\r\n   - input:\r\n     - __class__: `NoReplacement`\r\n   - adv:\r\n     - colormap: `{'__class__': 'ConnectedValue'}`\r\n     - format_date: `{'__class__': 'ConnectedValue'}`\r\n     - format_plot: `{'__class__': 'ConnectedValue'}`\r\n     - nxsplit: `{'__class__': 'ConnectedValue'}`\r\n     - xname: `{'__class__': 'ConnectedValue'}`\r\n   - ifilename:\r\n     - __class__: `ConnectedValue`\r\n   - title:\r\n     - __class__: `ConnectedValue`\r\n   - variable:\r\n     - __class__: `ConnectedValue`\r\n\r\n4. **Expected outputs:**\r\n   - stripes.png (type: `File`)","organization":"Galaxy Climate","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1382?version=1","name":"Version 1","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1383","url":"https://workflowhub.eu/workflows/1383","name":"Pure COMPSs Kmeans with Provenance Run","description":"K-means COMPSs application","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1383?version=1","name":"Version 1","author":["Rosa M Badia"],"descriptor_type":[]}]},{"id":"1384","url":"https://workflowhub.eu/workflows/1384","name":"Matrix Multiplication - improved resources visualization","description":"Application that perform the multiplication between matrices.","organization":"Workflows and Distributed Computing","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1384?version=1","name":"Version 1","author":["Raül Sirvent"],"descriptor_type":[]}]},{"id":"1385","url":"https://workflowhub.eu/workflows/1385","name":"SeuratExtend","description":"# SeuratExtend: An Enhanced Toolkit for scRNA-seq Analysis\r\n\r\n## Overview\r\n\r\n`SeuratExtend` is an R package designed to provide an improved and easy-to-use toolkit for scRNA-seq analysis and visualization, built upon the Seurat object. While `Seurat` is a widely-used tool in the R community that offers a foundational framework for scRNA-seq analysis, it has limitations when it comes to more advanced analysis and customized visualization. `SeuratExtend` expands upon `Seurat` by offering an array of enhanced visualization tools, an integrated functional and pathway analysis pipeline, seamless integration with popular Python tools, and a suite of utility functions for data manipulation and presentation. Designed to be user-friendly even for beginners, the package retains a level of professionalism that ensures rigorous analysis.\r\n\r\n**Key Features**:\r\n\r\n- **Enhanced Data Visualization**: Includes heatmaps, violin plots, dimensional reduction (UMAP) plots, waterfall plots, dot plots, proportion bars, volcano plots, and GSEA plots.\r\n- **Integrated Functional and Pathway Analysis**: Supports GO and Reactome databases, with the option to use custom databases.\r\n- **Python Tool Integration**: Easily apply tools like scVelo, SCENIC, and Palantir within R using the Seurat object.\r\n- **Utility Functions**: Assorted functions for calculations and color selections to streamline your scRNA-seq analysis.\r\n\r\n## Resources\r\n\r\n- **GitHub Repository**: Access the source code and contribute to SeuratExtend on [GitHub](https://github.com/huayc09/SeuratExtend).\r\n- **Online Tutorial**: For a comprehensive guide on using SeuratExtend, visit our [tutorial website](https://huayc09.github.io/SeuratExtend/).\r\n- **SeuratExtend Chatbot**: Try our AI-powered assistant (beta version, powered by ChatGPT) for help with scRNA-seq analysis: [scRNA-seq Assistant](https://chatgpt.com/g/g-8scQjmzkd-scrna-seq-assistant).\r\n\r\n## Citation\r\n\r\nIf you use SeuratExtend in your research, please cite:\r\n\r\nHua, Y., Weng, L., Zhao, F., and Rambow, F. (2024). SeuratExtend: Streamlining Single-Cell RNA-Seq Analysis Through an Integrated and Intuitive Framework. bioRxiv, 2024.08.01.606144. https://doi.org/10.1101/2024.08.01.606144\r\n\r\n## Installation\r\n\r\nInstall `SeuratExtend` directly from GitHub:\r\n\r\n```R\r\nif (!requireNamespace(\"remotes\", quietly = TRUE)) {\r\n    install.packages(\"remotes\")\r\n}\r\nremotes::install_github(\"huayc09/SeuratExtend\")\r\n```\r\n\r\n## Vignettes and Tutorials\r\n\r\n### [Quick Start-Up Guide](#quick-start-up-guide-1)\r\n\r\n### [What's New in v1.2.0](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/News.md)\r\n\r\n### [Enhanced Visualization](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md)\r\n- [Create an Enhanced Dimensional Reduction Plot](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#create-an-enhanced-dimensional-reduction-plot) `DimPlot2` `FeaturePlot3` `FeaturePlot3.grid` `theme_umap_arrows`\r\n- [Generate a Heatmap Plot](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#generate-a-heatmap-plot) `Heatmap`\r\n- [Create Enhanced Dot Plots](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#create-enhanced-dot-plots-new-in-v110) **(New in v1.1.0)** `DotPlot2`\r\n- [Create an Enhanced Violin Plot](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#create-an-enhanced-violin-plot) `VlnPlot2`\r\n- [Visualize Cluster Distribution in Samples](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#visualize-cluster-distribution-in-samples) `ClusterDistrBar`\r\n- [Generate a Waterfall Plot](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#generate-a-waterfall-plot) `WaterfallPlot`\r\n- [Create Volcano Plots](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#create-volcano-plots-new-in-v110) **(New in v1.1.0)** `VolcanoPlot`\r\n- [Explore Color Functions](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Visualization.md#explore-color-functions) `color_pro` `color_iwh` `ryb2rgb` `save_colors`\r\n\r\n### [Geneset Enrichment Analysis (GSEA)](vignettes/GSEA.md)\r\n- [Conduct GSEA using the GO or Reactome database](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/GSEA.md#conduct-gsea-using-the-go-or-reactome-database) `GeneSetAnalysisGO` `GeneSetAnalysisReactome`\r\n- [Perform GSEA using customized genesets](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/GSEA.md#perform-gsea-using-customized-genesets) `GeneSetAnalysis`\r\n- [Find pathways in the GO/Reactome database or customized genesets](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/GSEA.md#find-pathways-in-the-goreactome-database-or-customized-genesets) `SearchDatabase` `SearchPathways`\r\n- [Convert GO/Reactome pathway IDs to pathway names](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/GSEA.md#convert-goreactome-pathway-ids-to-pathway-names) `RenameGO` `RenameReactome`\r\n- [Filter the GO/Reactome pathway list based on certain criteria](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/GSEA.md#filter-the-goreactome-pathway-list-based-on-certain-criteria) `FilterGOTerms` `FilterReactomeTerms`\r\n- [Create a GSEA plot emulating the Broad Institute analysis](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/GSEA.md#create-a-gsea-plot-emulating-the-broad-institute-analysis) `GSEAplot`\r\n\r\n### [Trajectory and Pseudotime Analysis](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md)\r\n-  [scVelo Tutorial for Trajectory Analysis](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md#analyzing-single-cell-trajectories-with-scvelo) `scVelo.SeuratToAnndata` `scVelo.Plot` \r\n-  [Palantir Tutorial for Trajectory and Pseudotime Analysis](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md#palantir-tutorial-for-trajectory-and-pseudotime-analysis) `Palantir.RunDM` `Palantir.Pseudotime`\r\n-  [MAGIC for Denoising and Smoothing Gene Expression](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md#magic-for-denoising-and-smoothing-gene-expression) `Palantir.Magic`\r\n-  [CellRank Tutorial for Trajectory Analysis](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md#cellrank-tutorial-for-trajectory-analysis) `Cellrank.Compute` `Cellrank.Plot`\r\n-  [Gene Expression Dynamics Along Differentiation Trajectories](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md#gene-expression-dynamics-along-differentiation-trajectories) `GeneTrendCurve.Palantir` `GeneTrendHeatmap.Palantir` `GeneTrendCurve.Slingshot` `GeneTrendHeatmap.Slingshot`\r\n-  [Slingshot Tutorial for Pseudotime Analysis](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md#slingshot-tutorial-for-pseudotime-analysis) `RunSlingshot` \r\n-  [Integration of Seurat with Python Tools](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Trajectory.md#integration-of-seurat-with-python-tools) `create_condaenv_seuratextend` `Seu2Adata` `Seu2Loom` `adata.LoadLoom` `adata.AddDR` `adata.AddMetadata` `adata.Save` `adata.Load`\r\n\r\n### [SCENIC for Gene Regulatory Networks Analysis](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/SCENIC.md)\r\n- [Importing SCENIC Loom Files into Seurat](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/SCENIC.md#importing-scenic-loom-files-into-seurat) `ImportPyscenicLoom`\r\n- [Visualizing SCENIC Results](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/SCENIC.md#visualizing-scenic-results) \r\n\r\n### [Utility Tools and Functions](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Utilities.md)\r\n- [Facilitate Gene Naming Conversions](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Utilities.md#facilitate-gene-naming-conversions) `HumanToMouseGenesymbol` `MouseToHumanGenesymbol` `EnsemblToGenesymbol` `GenesymbolToEnsembl` `UniprotToGenesymbol`\r\n- [Compute Statistics Grouped by Clusters](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Utilities.md#compute-statistics-grouped-by-clusters) `CalcStats`\r\n- [Assess Proportion of Positive Cells in Clusters](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Utilities.md#assess-proportion-of-positive-cells-in-clusters) `feature_percent`\r\n- [Run Standard Seurat Pipeline](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/Utilities.md#run-standard-seurat-pipeline) `RunBasicSeurat`\r\n\r\n### [Single-Cell RNA-seq Analysis Course](#single-cell-rna-seq-analysis-course-new-in-v110-1) **(New in v1.1.0)**\r\n\r\n### [FAQ](https://github.com/huayc09/SeuratExtend/blob/master/vignettes/FAQ.md)\r\n\r\n## Quick Start-Up Guide\r\n\r\nThis quick start-up guide provides an overview of the most frequently\r\nused functions in single-cell RNA sequencing (scRNA-seq) analysis. After\r\nrunning the standard Seurat pipeline (refer to this [Seurat pbmc3k\r\ntutorial](https://satijalab.org/seurat/articles/pbmc3k_tutorial)), you\r\nshould have a Seurat object ready for further analysis. Below, we\r\nillustrate the use of a subset of the pbmc dataset as an example to\r\ndemonstrate various functionalities of the `SeuratExtend` package.\r\n\r\n### Visualizing Clusters\r\n\r\n``` r\r\nlibrary(Seurat)\r\nlibrary(SeuratExtend)\r\n\r\n# Visualizing cell clusters using DimPlot2\r\nDimPlot2(pbmc, theme = theme_umap_arrows())\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-1-1.png)\u003c!-- --\u003e\r\n\r\n### Analyzing Cluster Distribution\r\n\r\nTo check the percentage of each cluster within different samples:\r\n\r\n``` r\r\n# Cluster distribution bar plot\r\nClusterDistrBar(pbmc$orig.ident, pbmc$cluster)\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-2-1.png)\u003c!-- --\u003e\r\n\r\n### Marker Gene Analysis with Heatmap\r\n\r\nTo examine the marker genes of each cluster and visualize them using a\r\nheatmap:\r\n\r\n``` r\r\n# Calculating z-scores for variable features\r\ngenes.zscore \u003c- CalcStats(\r\n  pbmc,\r\n  features = VariableFeatures(pbmc),\r\n  group.by = \"cluster\",\r\n  order = \"p\",\r\n  n = 4)\r\n  \r\n# Displaying heatmap\r\nHeatmap(genes.zscore, lab_fill = \"zscore\")\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-3-1.png)\u003c!-- --\u003e\r\n\r\n### Enhanced Dot Plots (New in v1.1.0)\r\n\r\n``` r\r\n# Create grouped features\r\ngrouped_features \u003c- list(\r\n  \"B_cell_markers\" = c(\"MS4A1\", \"CD79A\"),\r\n  \"T_cell_markers\" = c(\"CD3D\", \"CD8A\", \"IL7R\"),\r\n  \"Myeloid_markers\" = c(\"CD14\", \"FCGR3A\", \"S100A8\")\r\n)\r\n\r\nDotPlot2(pbmc, features = grouped_features)\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-4-1.png)\u003c!-- --\u003e\r\n\r\n### Enhanced Visualization of Marker Genes\r\n\r\nFor visualizing specific markers via a violin plot that incorporates box\r\nplots, median lines, and performs statistical testing:\r\n\r\n``` r\r\n# Specifying genes and cells of interest\r\ngenes \u003c- c(\"CD3D\", \"CD14\", \"CD79A\")\r\ncells \u003c- WhichCells(pbmc, idents = c(\"B cell\", \"CD8 T cell\", \"Mono CD14\"))\r\n\r\n# Violin plot with statistical analysis\r\nVlnPlot2(\r\n  pbmc,\r\n  features = genes,\r\n  group.by = \"cluster\",\r\n  cells = cells,\r\n  stat.method = \"wilcox.test\")\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-5-1.png)\u003c!-- --\u003e\r\n\r\n### Visualizing Multiple Markers on UMAP\r\n\r\nDisplaying three markers on a single UMAP, using RYB coloring for each\r\nmarker:\r\n\r\n``` r\r\nFeaturePlot3(pbmc, feature.1 = \"CD3D\", feature.2 = \"CD14\", feature.3 = \"CD79A\", pt.size = 1)\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-6-1.png)\u003c!-- --\u003e\r\n\r\n### Create Volcano Plots (New in v1.1.0)\r\n\r\nCreate a basic volcano plot comparing two cell types:\r\n\r\n``` r\r\nVolcanoPlot(pbmc, \r\n            ident.1 = \"B cell\",\r\n            ident.2 = \"CD8 T cell\")\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-7-1.png)\u003c!-- --\u003e\r\n\r\n### Conducting Geneset Enrichment Analysis (GSEA)\r\n\r\nExamining all the pathways of the immune process in the Gene Ontology\r\n(GO) database, and visualizing by a heatmap that displays the top\r\npathways of each cluster across multiple cell types:\r\n\r\n``` r\r\noptions(spe = \"human\")\r\npbmc \u003c- GeneSetAnalysisGO(pbmc, parent = \"immune_system_process\", n.min = 5)\r\nmatr \u003c- RenameGO(pbmc@misc$AUCell$GO$immune_system_process)\r\ngo_zscore \u003c- CalcStats(\r\n  matr,\r\n  f = pbmc$cluster,\r\n  order = \"p\",\r\n  n = 3)\r\nHeatmap(go_zscore, lab_fill = \"zscore\")\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-8-1.png)\u003c!-- --\u003e\r\n\r\n### Detailed Comparison of Two Cell Types\r\n\r\nUsing a GSEA plot to focus on a specific pathway for deeper comparative\r\nanalysis:\r\n\r\n``` r\r\nGSEAplot(\r\n  pbmc,\r\n  ident.1 = \"B cell\",\r\n  ident.2 = \"CD8 T cell\",\r\n  title = \"GO:0042113 B cell activation (335g)\",\r\n  geneset = GO_Data$human$GO2Gene[[\"GO:0042113\"]])\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-9-1.png)\u003c!-- --\u003e\r\n\r\n### Importing and Visualizing SCENIC Analysis\r\n\r\nAfter conducting Gene Regulatory Networks Analysis using pySCENIC,\r\nimport the output and visualize various aspects within Seurat:\r\n\r\n``` r\r\n# Downloading a pre-computed SCENIC loom file\r\nscenic_loom_path \u003c- file.path(tempdir(), \"pyscenic_integrated-output.loom\")\r\ndownload.file(\"https://zenodo.org/records/10944066/files/pbmc3k_small_pyscenic_integrated-output.loom\", scenic_loom_path, mode = \"wb\")\r\n\r\n# Importing SCENIC Loom Files into Seurat\r\npbmc \u003c- ImportPyscenicLoom(scenic_loom_path, seu = pbmc)\r\n\r\n# Visualizing variables such as cluster, gene expression, and SCENIC regulon activity with customized colors\r\nDimPlot2(\r\n  pbmc,\r\n  features = c(\"cluster\", \"orig.ident\", \"CEBPA\", \"tf_CEBPA\"),\r\n  cols = list(\"tf_CEBPA\" = \"OrRd\"),\r\n  theme = NoAxes()\r\n) + theme_umap_arrows()\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-10-1.png)\u003c!-- --\u003e\r\n\r\n``` r\r\n# Creating a waterfall plot to compare regulon activity between cell types\r\nDefaultAssay(pbmc) \u003c- \"TF\"\r\nWaterfallPlot(\r\n  pbmc,\r\n  features = rownames(pbmc),\r\n  ident.1 = \"Mono CD14\",\r\n  ident.2 = \"CD8 T cell\",\r\n  exp.transform = FALSE,\r\n  top.n = 20)\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-11-1.png)\u003c!-- --\u003e\r\n\r\n### Trajectory Analysis with Palantir in R\r\n\r\nTrajectory analysis helps identify developmental pathways and\r\ntransitions between different cell states. In this section, we\r\ndemonstrate how to perform trajectory analysis using the Palantir\r\nalgorithm on a subset of myeloid cells, integrating everything within\r\nthe R environment.\r\n\r\n#### Download and Prepare the Data\r\n\r\nFirst, we download a small subset of myeloid cells to illustrate the\r\nanalysis:\r\n\r\n``` r\r\n# Download the example Seurat Object with myeloid cells\r\nmye_small \u003c- readRDS(url(\"https://zenodo.org/records/10944066/files/pbmc10k_mye_small_velocyto.rds\", \"rb\"))\r\n```\r\n\r\n#### Diffusion Map Calculation\r\n\r\nPalantir uses diffusion maps for dimensionality reduction to infer\r\ntrajectories. Here’s how to compute and visualize them:\r\n\r\n``` r\r\n# Compute diffusion map\r\nmye_small \u003c- Palantir.RunDM(mye_small)\r\n```\r\n\r\n    ## Determing nearest neighbor graph...\r\n\r\n``` r\r\n# Visualize the first two diffusion map dimensions\r\nDimPlot2(mye_small, reduction = \"ms\")\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-13-1.png)\u003c!-- --\u003e\r\n\r\n#### Pseudotime Calculation\r\n\r\nPseudotime ordering assigns each cell a time point in a trajectory,\r\nindicating its progression along a developmental path:\r\n\r\n``` r\r\n# Calculate pseudotime with a specified start cell\r\nmye_small \u003c- Palantir.Pseudotime(mye_small, start_cell = \"sample1_GAGAGGTAGCAGTACG-1\")\r\n```\r\n\r\n    ## Sampling and flocking waypoints...\r\n    ## Time for determining waypoints: 0.00112607479095459 minutes\r\n    ## Determining pseudotime...\r\n    ## Shortest path distances using 30-nearest neighbor graph...\r\n    ## Time for shortest paths: 0.014574062824249268 minutes\r\n    ## Iteratively refining the pseudotime...\r\n    ## Correlation at iteration 1: 1.0000\r\n    ## Entropy and branch probabilities...\r\n    ## Markov chain construction...\r\n    ## Identification of terminal states...\r\n    ## Computing fundamental matrix and absorption probabilities...\r\n    ## Project results to all cells...\r\n\r\n``` r\r\n# Store pseudotime results in meta.data for easy plotting\r\nps \u003c- mye_small@misc$Palantir$Pseudotime\r\ncolnames(ps)[3:4] \u003c- c(\"fate1\", \"fate2\")\r\nmye_small@meta.data[,colnames(ps)] \u003c- ps\r\n\r\n# Visualize pseudotime and cell fates\r\nDimPlot2(\r\n  mye_small,\r\n  features = colnames(ps),\r\n  reduction = \"ms\",\r\n  cols = list(continuous = \"A\", Entropy = \"D\"),\r\n  theme = NoAxes())\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-14-1.png)\u003c!-- --\u003e\r\n\r\n#### Visualization Along Trajectories\r\n\r\nVisualizing gene expression or regulon activity along calculated\r\ntrajectories can provide insights into dynamic changes:\r\n\r\n``` r\r\n# Create smoothed gene expression curves along trajectory\r\nGeneTrendCurve.Palantir(\r\n  mye_small,\r\n  pseudotime.data = ps,\r\n  features = c(\"CD14\", \"FCGR3A\")\r\n)\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-15-1.png)\u003c!-- --\u003e\r\n\r\n``` r\r\n# Create a gene trend heatmap for different fates\r\nGeneTrendHeatmap.Palantir(\r\n  mye_small,\r\n  features = VariableFeatures(mye_small)[1:10],\r\n  pseudotime.data = ps,\r\n  lineage = \"fate1\"\r\n)\r\n```\r\n\r\n![](vignettes/quick_start_files/figure-gfm/unnamed-chunk-16-1.png)\u003c!-- --\u003e\r\n\r\n### scVelo Analysis\r\n\r\nscVelo is a Python tool used for RNA velocity analysis. We demonstrate\r\nhow to integrate and analyze velocyto-generated data within the Seurat\r\nworkflow using scVelo.\r\n\r\n#### Preparing for scVelo\r\n\r\nFirst, download the pre-calculated velocyto loom file:\r\n\r\n``` r\r\n# Download velocyto loom file\r\nloom_path \u003c- file.path(tempdir(), \"pbmc10k_mye_small.loom\")\r\ndownload.file(\"https://zenodo.org/records/10944066/files/pbmc10k_mye_small.loom\", \r\n              loom_path,\r\n              mode = \"wb\")  # Use binary mode for Windows compatibility\r\n\r\n# Set up the path for saving the AnnData object in the HDF5 (h5ad) format\r\nif (.Platform$OS.type == \"windows\") {\r\n    adata_path \u003c- normalizePath(file.path(tempdir(), \"mye_small.h5ad\"), winslash = \"/\")\r\n} else {\r\n    adata_path \u003c- file.path(tempdir(), \"mye_small.h5ad\")\r\n}\r\n\r\n# Integrate Seurat Object and velocyto loom into an AnnData object\r\nscVelo.SeuratToAnndata(\r\n  mye_small,\r\n  filename = adata_path,\r\n  velocyto.loompath = loom_path,\r\n  prefix = \"sample1_\",\r\n  postfix = \"-1\"\r\n)\r\n```\r\n\r\n    ## scVelo version: 0.3.0\r\n    ## Filtered out 10891 genes that are detected 20 counts (shared).\r\n    ## Normalized count data: X, spliced, unspliced.\r\n    ## Extracted 2000 highly variable genes.\r\n    ## Logarithmized X.\r\n    ## computing neighbors\r\n    ##     finished (0:00:00) --\u003e added \r\n    ##     'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)\r\n    ## computing moments based on connectivities\r\n    ##     finished (0:00:00) --\u003e added \r\n    ##     'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)\r\n    ## computing velocities\r\n    ##     finished (0:00:00) --\u003e added \r\n    ##     'velocity', velocity vectors for each individual cell (adata.layers)\r\n    ## computing velocity graph (using 1/28 cores)\r\n    ## WARNING: Unable to create progress bar. Consider installing `tqdm` as `pip install tqdm` and `ipywidgets` as `pip install ipywidgets`,\r\n    ## or disable the progress bar using `show_progress_bar=False`.\r\n    ##     finished (0:00:01) --\u003e added \r\n    ##     'velocity_graph', sparse matrix with cosine correlations (adata.uns)\r\n\r\n    ## NULL\r\n\r\n#### Plotting scVelo Results\r\n\r\nOnce the data is processed, visualize the RNA velocity:\r\n\r\n``` r\r\n# Plot RNA velocity\r\nscVelo.Plot(color = \"cluster\", basis = \"ms_cell_embeddings\", \r\n            save = \"quick_start_scvelo.png\", figsize = c(5,4))\r\n```\r\n\r\n\u003cimg src=\"vignettes/figures/scvelo_quick_start_scvelo.png\" width=\"700\" /\u003e\r\n\r\nFor detailed usage of the functions and more advanced analysis, please refer to the vignettes and tutorials.\r\n\r\n## Single-Cell RNA-seq Analysis Course (New in v1.1.0)\r\n\r\nA comprehensive 6-lesson course originally presented at the [Institute for AI in Medicine (IKIM)](https://www.ikim.uk-essen.de/institute), University Hospital Essen on October 8, 2024, organized by the [Department of Applied Computational Cancer Research](https://www.ikim.uk-essen.de/groups/accr). The course materials have been updated for SeuratExtend v1.1.0 and are now freely available online. Starting with fundamentals of R and Seurat, the course progressively builds to cover enhanced visualization, functional analysis, quality control, and cutting-edge methods including trajectory analysis, regulatory networks, and cell-cell communication. Perfect for beginners while providing depth needed for advanced applications.\r\n\r\n### [Lesson 1: Introduction to R Programming](https://huayc09.github.io/SeuratExtend/articles/single-cell-course/1.basic-R.html)\r\nEssential R programming fundamentals tailored for scRNA-seq analysis. Covers basic data types, data structures (vectors, matrices, data frames), file operations, and package management. Perfect for beginners starting their journey in bioinformatics.\r\n\r\n### [Lesson 2: Basic Single-Cell Analysis with Seurat](https://huayc09.github.io/SeuratExtend/articles/single-cell-course/2.Seurat.html)\r\nComprehensive walkthrough of the standard Seurat workflow, from raw count matrix to cell type annotation. Learn about data normalization, dimensionality reduction, clustering, and visualization through hands-on analysis of PBMC data.\r\n\r\n### [Lesson 3: Advanced Visualization with SeuratExtend](https://huayc09.github.io/SeuratExtend/articles/single-cell-course/3.Visualization.html)\r\nMaster advanced visualization techniques using SeuratExtend's enhanced plotting functions. Explore DimPlot2, FeaturePlot3, Heatmap, and other tools to create publication-ready figures. Includes practical examples of customizing plots and color schemes.\r\n\r\n### [Lesson 4: Gene Set Enrichment Analysis and Utilities](https://huayc09.github.io/SeuratExtend/articles/single-cell-course/4.GSEA.html)\r\nMaster functional enrichment analysis using GO and Reactome databases through SeuratExtend's integrated GSEA pipeline. Learn to perform custom gene set analysis, interpret enrichment scores, and utilize helpful utility functions for gene naming conversions and cell proportions.\r\n\r\n### [Lesson 5: Core Workflow Enhancements](https://huayc09.github.io/SeuratExtend/articles/single-cell-course/5.Core-Enhancement.html)\r\nElevate your scRNA-seq analysis with advanced quality control, doublet removal, data integration using Harmony, cell cycle analysis, and alternative normalization methods like SCTransform. Understand key considerations for processing and analyzing multi-sample datasets.\r\n\r\n### [Lesson 6: Advanced Analytical Methods (Part 1)](https://huayc09.github.io/SeuratExtend/articles/single-cell-course/6.Advanced.html) [(Part 2)](https://huayc09.github.io/SeuratExtend/articles/single-cell-course/6.Advanced-2.html)\r\nExplore cutting-edge techniques including trajectory analysis with scVelo/Palantir, cell-cell communication using CellChat/NicheNet, regulatory network inference with SCENIC, and specialized analyses for TCR/BCR data and copy number variations.\r\n\r\n## License\r\n\r\nThe SeuratExtend R package code is licensed under GPL-3.0.\r\n\r\nThe data files (*.rda files in the 'data' folder) are released under CC0 1.0 Universal (CC0 1.0) Public Domain Dedication, meaning they are in the public domain and can be used without any restrictions.\r\n\r\n## Publications Using SeuratExtend\r\n\r\n1. Hua, Y., Vella, G., Rambow, F., et al. (2022). Cancer immunotherapies transition endothelial cells into HEVs that generate TCF1+ T lymphocyte niches through a feed-forward loop. **Cancer Cell** 40, 1600-1618. https://doi.org/10.1016/j.ccell.2022.11.002\r\n2. Hua, Y., Wu, N., Miao, J., Shen, M. (2023). Single-cell transcriptomic analysis in two patients with rare systemic autoinflammatory diseases treated with anti-TNF therapy. **Front. Immunol.** 14. https://doi.org/10.3389/fimmu.2023.1091336\r\n3. Verhoeven, J., Jacobs, K.A., Rizzollo, F., Lodi, F., Hua, Y., Poźniak, J., Narayanan Srinivasan, A., Houbaert, D., Shankar, G., More, S., et al. (2023). Tumor endothelial cell autophagy is a key vascular-immune checkpoint in melanoma. **EMBO Mol. Med.** 15, e18028. https://doi.org/10.15252/emmm.202318028\r\n4. Dobersalske, C., Rauschenbach, L., Hua, Y., Berliner, C., Steinbach, A., Grüneboom, A., Kokkaliaris, K.D., Heiland, D.H., Berger, P., Langer, S., et al. (2024). Cranioencephalic functional lymphoid units in glioblastoma. **Nat. Med.** https://doi.org/10.1038/s41591-024-03152-x\r\n","organization":"Applied Computational Cancer Research","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1385?version=1","name":"master @ b8ad0dc","author":[],"descriptor_type":[]}]},{"id":"1388","url":"https://workflowhub.eu/workflows/1388","name":"MultiGSEA","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Multiomics data analysis using MultiGSEA](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/multiGSEA-tutorial/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Matthias Bernt\n\n**Tutorial Author(s)**: [Thorben Stehling](https://training.galaxyproject.org/training-material/hall-of-fame/tStehling/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/)\n\n**Tutorial Contributor(s)**: [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1388?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1389","url":"https://workflowhub.eu/workflows/1389","name":"Copy of PeptideDataAnalysis (imported from uploaded file)","description":"Peptide Library Data Analysis\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Peptide Library Data Analysis](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/peptide-library-data-analysis/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Jayadev Joshi](https://training.galaxyproject.org/training-material/hall-of-fame/jaidevjoshi83/), [Daniel Blankenberg](https://training.galaxyproject.org/training-material/hall-of-fame/blankenberg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1389?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1390","url":"https://workflowhub.eu/workflows/1390","name":"Assembly of metagenomic sequencing data","description":"Assembly of metagenomic sequencing data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Assembly of metagenomic sequencing data](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metagenomics-assembly/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Polina Polunina, Bérénice Batut\n\n**Tutorial Author(s)**: [Polina Polunina](https://training.galaxyproject.org/training-material/hall-of-fame/plushz/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n**Tutorial Contributor(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Polina Polunina](https://training.galaxyproject.org/training-material/hall-of-fame/plushz/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1390?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1390?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1391","url":"https://workflowhub.eu/workflows/1391","name":"vcf2lineage","description":"workflow-automation\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Automating Galaxy workflows using the command line](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/workflow-automation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Wolfgang Maier\n\n**Tutorial Author(s)**: [Simon Bray](https://training.galaxyproject.org/training-material/hall-of-fame/simonbray/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n**Grants(s)**: [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1391?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1392","url":"https://workflowhub.eu/workflows/1392","name":"Tutorial workflow","description":"workflow-editor\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Creating, Editing and Importing Galaxy Workflows](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/workflow-editor/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Marius van den Beek](https://training.galaxyproject.org/training-material/hall-of-fame/mvdbeek/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1392?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1393","url":"https://workflowhub.eu/workflows/1393","name":"Subworkflow","description":"workflow-editor\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Creating, Editing and Importing Galaxy Workflows](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/workflow-editor/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Uses [subworkflows](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_subworkflows.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Marius van den Beek](https://training.galaxyproject.org/training-material/hall-of-fame/mvdbeek/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1393?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1394","url":"https://workflowhub.eu/workflows/1394","name":"Peptide And Protein ID Tutorial","description":"Peptide and Protein ID using SearchGUI and PeptideShaker\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Peptide and Protein ID using SearchGUI and PeptideShaker](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/protein-id-sg-ps/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1394?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1395","url":"https://workflowhub.eu/workflows/1395","name":"Building an amplicon sequence variant (ASV) table from 16S data using DADA2","description":"Building an amplicon sequence variant (ASV) table from 16S data using DADA2\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Building an amplicon sequence variant (ASV) table from 16S data using DADA2](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/dada-16S/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: , Bérénice Batut\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n**Tutorial Contributor(s)**: [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Clea Siguret](https://training.galaxyproject.org/training-material/hall-of-fame/clsiguret/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Linelle Abueg](https://training.galaxyproject.org/training-material/hall-of-fame/abueg/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Santino Faack](https://training.galaxyproject.org/training-material/hall-of-fame/santamccloud/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1395?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1395?version=2","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1396","url":"https://workflowhub.eu/workflows/1396","name":"WF2_Discovery-Workflow","description":"Discovery workflow with SG/PS and MaxQuant to generate microbial peptides\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 2: Discovery](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/clinical-mp-2-discovery/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1396?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1397","url":"https://workflowhub.eu/workflows/1397","name":"Select First N Lines","description":"Workflows: Using Workflow Parameters\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Using Workflow Parameters](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/workflow-parameters/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Marius van den Beek](https://training.galaxyproject.org/training-material/hall-of-fame/mvdbeek/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1397?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1398","url":"https://workflowhub.eu/workflows/1398","name":"Intermine import/export","description":"Intermine import/export\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [InterMine integration with Galaxy](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/intermine/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Daniela Butano](https://training.galaxyproject.org/training-material/hall-of-fame/danielabutano/), [Yo Yehudi](https://training.galaxyproject.org/training-material/hall-of-fame/yochannah/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Daniela Butano](https://training.galaxyproject.org/training-material/hall-of-fame/danielabutano/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1398?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1399","url":"https://workflowhub.eu/workflows/1399","name":"ProteinID SG PS Tutorial WF datasetCollection","description":"Peptide and Protein ID using SearchGUI and PeptideShaker\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Peptide and Protein ID using SearchGUI and PeptideShaker](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/protein-id-sg-ps/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1399?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1400","url":"https://workflowhub.eu/workflows/1400","name":"Workflow 2: Data Cleaning And Chimera Removal [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1400?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1401","url":"https://workflowhub.eu/workflows/1401","name":"MaxQuant MSstatsTMT Training","description":"MaxQuant and MSstats for the analysis of TMT data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [MaxQuant and MSstats for the analysis of TMT data](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/maxquant-msstats-tmt/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Klemens Fröhlich](https://training.galaxyproject.org/training-material/hall-of-fame/KlemensFroehlich/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1401?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1402","url":"https://workflowhub.eu/workflows/1402","name":"GTN Training: Workflow Reports","description":"introduction\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Workflow Reports](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/workflow-reports/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [John Chilton](https://training.galaxyproject.org/training-material/hall-of-fame/jmchilton/), [Aysam Guerler](https://training.galaxyproject.org/training-material/hall-of-fame/guerler/), [Marius van den Beek](https://training.galaxyproject.org/training-material/hall-of-fame/mvdbeek/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1402?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1403","url":"https://workflowhub.eu/workflows/1403","name":"'Biomarkers4Paris' Workflow","description":"Biomarker candidate identification\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Biomarker candidate identification](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/biomarker_selection/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florence Combes](https://training.galaxyproject.org/training-material/hall-of-fame/combesf/), [David Christiany](https://training.galaxyproject.org/training-material/hall-of-fame/davidchristiany/), [Valentin Loux](https://training.galaxyproject.org/training-material/hall-of-fame/vloux/), [Yves Vandenbrouck](https://training.galaxyproject.org/training-material/hall-of-fame/yvandenb/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1403?version=1","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1404","url":"https://workflowhub.eu/workflows/1404","name":"Workflow 5: OTU Clustering [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1404?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1405","url":"https://workflowhub.eu/workflows/1405","name":"GTN Training: Workflow Reports - Galaxy 101 For Everyone","description":"introduction\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Workflow Reports](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/workflow-reports/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [John Chilton](https://training.galaxyproject.org/training-material/hall-of-fame/jmchilton/), [Aysam Guerler](https://training.galaxyproject.org/training-material/hall-of-fame/guerler/), [Marius van den Beek](https://training.galaxyproject.org/training-material/hall-of-fame/mvdbeek/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1405?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1406","url":"https://workflowhub.eu/workflows/1406","name":"GigaScience-IEDB-PepQuery-Neoantigen","description":"Predict binding using IEDB and check novelty peptides with PepQuery\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Neoantigen 7: IEDB binding PepQuery Validated Neopeptides](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/neoantigen-7-hla-binding-novel-peptides/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: GalaxyP\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1406?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1407","url":"https://workflowhub.eu/workflows/1407","name":"Proteogenomics 1: Database Creation","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Proteogenomics 1: Database Creation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/proteogenomics-dbcreation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1407?version=1","name":"10.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1408","url":"https://workflowhub.eu/workflows/1408","name":"Workflow 4: Mock OTU Clustering [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1408?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1409","url":"https://workflowhub.eu/workflows/1409","name":"GTN_ENA_upload_workflow","description":"Submitting SARS-CoV-2 sequences to ENA\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Submitting sequence data to ENA](https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/upload-data-to-ena/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Miguel Roncoroni](https://training.galaxyproject.org/training-material/hall-of-fame/roncoronimiguel/), [Bert Droesbeke](https://training.galaxyproject.org/training-material/hall-of-fame/bedroesb/), [Boris Depoortere](https://training.galaxyproject.org/training-material/hall-of-fame/B0r1sD/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1409?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1410","url":"https://workflowhub.eu/workflows/1410","name":"GigaScience_Database_merge_FragPipe_STS26T_demonstration","description":"Merging Fusion and non-normal databases + Discovery peptidomics using FragPipe\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Neoantigen 3: Database merge and FragPipe discovery](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/neoantigen-3-fragpipe-discovery/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: GalaxyP\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1410?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1411","url":"https://workflowhub.eu/workflows/1411","name":"ProteoRE ProteomeAnnotation Tutorial (release 2.0)","description":"Annotating a protein list identified by LC-MS/MS experiments\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Annotating a protein list identified by LC-MS/MS experiments](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/proteome_annotation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Valentin Loux](https://training.galaxyproject.org/training-material/hall-of-fame/vloux/), [Florence Combes](https://training.galaxyproject.org/training-material/hall-of-fame/combesf/), [David Christiany](https://training.galaxyproject.org/training-material/hall-of-fame/davidchristiany/), [Yves Vandenbrouck](https://training.galaxyproject.org/training-material/hall-of-fame/yvandenb/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1411?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1412","url":"https://workflowhub.eu/workflows/1412","name":"Workflow 6: Alpha Diversity [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1412?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1413","url":"https://workflowhub.eu/workflows/1413","name":"Circos tutorial","description":"Visualisation with Circos\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Visualisation with Circos](https://training.galaxyproject.org/training-material/topics/visualisation/tutorials/circos/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Helena Rasche, Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Nate Coraor](https://training.galaxyproject.org/training-material/hall-of-fame/natefoo/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1413?version=1","name":"7.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1414","url":"https://workflowhub.eu/workflows/1414","name":"'Proteome Annotation'","description":"Annotating a protein list identified by LC-MS/MS experiments\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Annotating a protein list identified by LC-MS/MS experiments](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/proteome_annotation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Valentin Loux](https://training.galaxyproject.org/training-material/hall-of-fame/vloux/), [Florence Combes](https://training.galaxyproject.org/training-material/hall-of-fame/combesf/), [David Christiany](https://training.galaxyproject.org/training-material/hall-of-fame/davidchristiany/), [Yves Vandenbrouck](https://training.galaxyproject.org/training-material/hall-of-fame/yvandenb/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1414?version=1","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1415","url":"https://workflowhub.eu/workflows/1415","name":"Circos: Nature workflow","description":"Visualisation with Circos\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Visualisation with Circos](https://training.galaxyproject.org/training-material/topics/visualisation/tutorials/circos/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Helena Rasche, Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Nate Coraor](https://training.galaxyproject.org/training-material/hall-of-fame/natefoo/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1415?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1416","url":"https://workflowhub.eu/workflows/1416","name":"GigaScience_PepQuery2_demonstration_STS26T_neoantigen_candidates_workflow","description":"Validate the NeoAntigen Candidates from FragPipe discovery through the PepQuery Novel search\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Neoantigen 4: PepQuery2 Verification](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/neoantigen-4-peptide-verification/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: GalaxyP\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1416?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1417","url":"https://workflowhub.eu/workflows/1417","name":"Secreted Proteins Via GO Annotation And WoLF PSORT For shCTSB Paper","description":"version 1.0, 160318, published at https://github.com/Stortebecker/secretome_prediction\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Secretome Prediction](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/secretome-prediction/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1417?version=1","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1418","url":"https://workflowhub.eu/workflows/1418","name":"Workflow7: Beta Diversity [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1418?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1419","url":"https://workflowhub.eu/workflows/1419","name":"Circos for E. Coli","description":"This workflow will create a simple plot of a microbial sized genome (e.g. E. coli) using a couple of datasets like sequencing depth (bigwigs), gff3 formatted annotations, and some variants.\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Ploting a Microbial Genome with Circos](https://training.galaxyproject.org/training-material/topics/visualisation/tutorials/circos-microbial/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Helena Rasche\n\n**Tutorial Author(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Tutorial Contributor(s)**: [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1419?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1420","url":"https://workflowhub.eu/workflows/1420","name":"WF4_Quantitation_Workflow","description":"Quantification using the MaxQuant tool\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 4: Quantitation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/clinical-mp-4-quantitation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1420?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1421","url":"https://workflowhub.eu/workflows/1421","name":"DIA_lib_OSW","description":"DIA library Training HEK Ecoli data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Library Generation for DIA Analysis](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/DIA_lib_OSW/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1421?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1422","url":"https://workflowhub.eu/workflows/1422","name":"Workflow 1: Quality Control [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1422?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1423","url":"https://workflowhub.eu/workflows/1423","name":"Pangeo Jupyter Notebook","description":"Workflow for GTN Pangeo Notebook in Galaxy - Introduction to Xarray\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pangeo Notebook in Galaxy - Introduction to Xarray](https://training.galaxyproject.org/training-material/topics/climate/tutorials/pangeo-notebook/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Anne Fouilloux\n\n**Tutorial Author(s)**: [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1423?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1424","url":"https://workflowhub.eu/workflows/1424","name":"Tails Triple Dimethyl OpenMS2.1","description":"Detection and quantitation of N-termini (degradomics) via N-TAILS\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Detection and quantitation of N-termini (degradomics) via N-TAILS](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/ntails/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Anton Nekrutenko](https://training.galaxyproject.org/training-material/hall-of-fame/nekrut/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1424?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1425","url":"https://workflowhub.eu/workflows/1425","name":"WF3_VERIFICATION_WORKFLOW","description":"WF3- Peptide verification/validaiton workflow\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 3: Verification](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/clinical-mp-3-verification/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1425?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1426","url":"https://workflowhub.eu/workflows/1426","name":"Proteomics: MaxQuant and MSstats for the analysis of label-free data","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [MaxQuant and MSstats for the analysis of label-free data](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1426?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1427","url":"https://workflowhub.eu/workflows/1427","name":"CLM-FATES_ ALP1 simulation (5 years)","description":"Functionally Assembled Terrestrial Ecosystem Simulator (FATES)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Functionally Assembled Terrestrial Ecosystem Simulator (FATES)](https://training.galaxyproject.org/training-material/topics/climate/tutorials/fates/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Anne Fouilloux\n\n**Tutorial Author(s)**: [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/), [Hui Tang](https://training.galaxyproject.org/training-material/hall-of-fame/huitang-earth/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1427?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1428","url":"https://workflowhub.eu/workflows/1428","name":"Workflow 3: Classification [Galaxy Training: 16S Microbial Analysis With Mothur]","description":"16S Microbial Analysis with mothur (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Saskia Hiltemann\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1428?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1429","url":"https://workflowhub.eu/workflows/1429","name":"Proteogenomics 2: Database Search","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Proteogenomics 2: Database Search](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/proteogenomics-dbsearch/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/), [Delphine Lariviere](https://training.galaxyproject.org/training-material/hall-of-fame/delphine-l/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1429?version=1","name":"8.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1430","url":"https://workflowhub.eu/workflows/1430","name":"GigaScience-RNAseq-Optitype-seq2HLA-to-IEDB-alleles","description":"Prediction of HLA binding for verified candidates\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Neoantigen 6: Predicting HLA Binding](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/neoantigen-6-predicting-hla-binding/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: galaxyp\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1430?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1431","url":"https://workflowhub.eu/workflows/1431","name":"Calculating diversity from microbiome taxonomic data","description":"Calculating diversity from bracken output\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Calculating α and β diversity from microbiome taxonomic data](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/diversity/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Paul Zierep, Sophia Hampe, Bérénice Batut\n\n**Tutorial Author(s)**: [Sophia Hampe](https://training.galaxyproject.org/training-material/hall-of-fame/sophia120199/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/)\n\n**Tutorial Contributor(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Sophia Hampe](https://training.galaxyproject.org/training-material/hall-of-fame/sophia120199/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1431?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1432","url":"https://workflowhub.eu/workflows/1432","name":"GTN 'Pangeo 101 for everyone - Xarray'","description":"Workflow for GTN Pangeo 101 for everyone - Introduction to Xarray\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pangeo ecosystem 101 for everyone - Introduction to Xarray Galaxy Tools](https://training.galaxyproject.org/training-material/topics/climate/tutorials/pangeo/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Anne Fouilloux\n\n**Tutorial Author(s)**: [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1432?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1433","url":"https://workflowhub.eu/workflows/1433","name":"WF5_Data_Interpretation_Worklow","description":"Interpreting MaxQuant data using MSstats involves applying a rigorous statistical framework to glean meaningful insights from quantitative proteomic datasets\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 5: Data Interpretation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/clinical-mp-5-data-interpretation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1433?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1434","url":"https://workflowhub.eu/workflows/1434","name":"Proteomics: database handling","description":"Protein FASTA Database Handling\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Protein FASTA Database Handling](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/database-handling/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1434?version=1","name":"5.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1435","url":"https://workflowhub.eu/workflows/1435","name":"WF2_Discovery-Workflow","description":"Discovery workflow with SG/PS and MaxQuant to generate microbial peptides\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 2: Discovery](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/clinical-mp-2-discovery/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1435?version=1","name":"0.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1436","url":"https://workflowhub.eu/workflows/1436","name":"Analyse Argo data","description":"Process argo data with the Pangeo Ecosystem and visualise them with Ocean Data View (ODV)\r\n\r\n## Associated Tutorial\r\n\r\nThis workflows is part of the tutorial [Analyse Argo data](https://training.galaxyproject.org/training-material/topics/climate/tutorials/argo_pangeo/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\r\n\r\n\r\n\r\n\r\n\r\n## Thanks to...\r\n\r\n**Workflow Author(s)**: Marie Jossé\r\n\r\n**Tutorial Author(s)**: [Marie Josse](https://training.galaxyproject.org/training-material/hall-of-fame/Marie59/)\r\n\r\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Marie Josse](https://training.galaxyproject.org/training-material/hall-of-fame/Marie59/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\r\n\r\n**Grants(s)**: [Fair-Ease](https://training.galaxyproject.org/training-material/hall-of-fame/fairease/), [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\r\n\r\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"EuroScienceGateway, Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1436?version=1","name":"1.0","author":["Marie Jossé"],"descriptor_type":["GALAXY"]}]},{"id":"1437","url":"https://workflowhub.eu/workflows/1437","name":"Gigascience_Indels_SAV_non-reference_demonstration_STS26T-Gent_Workflow","description":"Generating non-reference protein database for FragPipe discovery\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Neoantigen 2: Non-Reference-Database-Generation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/neoantigen-2-non-reference-database-generation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: GalaxyP\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1437?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1438","url":"https://workflowhub.eu/workflows/1438","name":"Proteomics: database handling including mycoplasma","description":"Protein FASTA Database Handling\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Protein FASTA Database Handling](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/database-handling/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1438?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1439","url":"https://workflowhub.eu/workflows/1439","name":"Identification of the micro-organisms in a beer using Nanopore sequencing","description":"Identification of the micro-organisms in a beer using Nanopore sequencing\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Identification of the micro-organisms in a beer using Nanopore sequencing](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/beer-data-analysis/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Teresa Müller, Polina Polunina\n\n**Tutorial Author(s)**: [Polina Polunina](https://training.galaxyproject.org/training-material/hall-of-fame/plushz/), [Siyu Chen](https://training.galaxyproject.org/training-material/hall-of-fame/chensy96/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Siyu Chen](https://training.galaxyproject.org/training-material/hall-of-fame/chensy96/), [Nuwan Goonasekera](https://training.galaxyproject.org/training-material/hall-of-fame/nuwang/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1439?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1440","url":"https://workflowhub.eu/workflows/1440","name":"Sentinel5 volcanic data","description":"From Copernicus Sentinel 5P data to panoply visualization of volcanic activity impact to atmosphere\r\n\r\n## Associated Tutorial\r\n\r\nThis workflows is part of the tutorial [Sentinel 5P data visualisation](https://training.galaxyproject.org/training-material/topics/climate/tutorials/sentinel5_data/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\r\n\r\n\r\n\r\n\r\n\r\n## Thanks to...\r\n\r\n**Workflow Author(s)**: Marie Jossé\r\n\r\n**Tutorial Author(s)**: [Marie Josse](https://training.galaxyproject.org/training-material/hall-of-fame/Marie59/)\r\n\r\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\r\n\r\n**Grants(s)**: [Fair-Ease](https://training.galaxyproject.org/training-material/hall-of-fame/fairease/), [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\r\n\r\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"EuroScienceGateway, Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1440?version=1","name":"1.0","author":["Marie Jossé"],"descriptor_type":["GALAXY"]}]},{"id":"1441","url":"https://workflowhub.eu/workflows/1441","name":"Climate 101","description":"Visualize Climate data with Panoply netCDF viewer\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Getting your hands-on climate data](https://training.galaxyproject.org/training-material/topics/climate/tutorials/climate-101/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Anne Fouilloux\n\n**Tutorial Author(s)**: [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1441?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1442","url":"https://workflowhub.eu/workflows/1442","name":"Gigascience_Fusions_demonstration_STS26T-Gent_Workflow","description":"Create a protein Fusion database through the Arriba workflow\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Neoantigen 1: Fusion-Database-Generation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/neoantigen-1-fusion-database-generation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: GalaxyP\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1442?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1443","url":"https://workflowhub.eu/workflows/1443","name":"Metaproteomics_GTN","description":"Metaproteomics tutorial\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Metaproteomics tutorial](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/metaproteomics/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1443?version=1","name":"7.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1444","url":"https://workflowhub.eu/workflows/1444","name":"Metatranscriptomics analysis using microbiome RNA-seq data - Workflow 1: Preprocessing","description":"Metatranscriptomics analysis using microbiome RNA-seq data (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Metatranscriptomics analysis using microbiome RNA-seq data (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metatranscriptomics-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Pratik Jagtap, Subina Mehta, Saskia Hiltemann, Paul Zierep\n\n**Tutorial Author(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/)\n\n**Tutorial Contributor(s)**: [Christine Oger](https://training.galaxyproject.org/training-material/hall-of-fame/ogerdfx/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1444?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1444?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1445","url":"https://workflowhub.eu/workflows/1445","name":"Ocean's variables 2.0","description":"Subset data on the Mediterreanean see and extract and visualise the Phosphate variable\r\n\r\n## Associated Tutorial\r\n\r\nThis workflows is part of the tutorial [Ocean's variables study](https://training.galaxyproject.org/training-material/topics/climate/tutorials/ocean-variables/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\r\n\r\n## Features\r\n\r\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\r\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\r\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\r\n\r\n## Thanks to...\r\n\r\n**Workflow Author(s)**: Marie Jossé\r\n\r\n**Tutorial Author(s)**: [Marie Josse](https://training.galaxyproject.org/training-material/hall-of-fame/Marie59/)\r\n\r\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Marie Josse](https://training.galaxyproject.org/training-material/hall-of-fame/Marie59/)\r\n\r\n**Grants(s)**: [Fair-Ease](https://training.galaxyproject.org/training-material/hall-of-fame/fairease/), [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\r\n\r\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"EuroScienceGateway, Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1445?version=1","name":"2.0","author":["Marie Jossé"],"descriptor_type":["GALAXY"]}]},{"id":"1446","url":"https://workflowhub.eu/workflows/1446","name":"Proteomics: Peptide and Protein ID using OpenMS","description":"Peptide and Protein ID using OpenMS tools\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Peptide and Protein ID using OpenMS tools](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/protein-id-oms/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1446?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1447","url":"https://workflowhub.eu/workflows/1447","name":"Workflow 3: Functional Information (quick)","description":"Metatranscriptomics analysis using microbiome RNA-seq data (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Metatranscriptomics analysis using microbiome RNA-seq data (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metatranscriptomics-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Pratik Jagtap, Subina Mehta, Ray Sajulga, Emma Leith, Praveen Kumar, Saskia Hiltemann, Paul Zierep\n\n**Tutorial Author(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/)\n\n**Tutorial Contributor(s)**: [Christine Oger](https://training.galaxyproject.org/training-material/hall-of-fame/ogerdfx/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1447?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1447?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1448","url":"https://workflowhub.eu/workflows/1448","name":"Proteomics: MaxQuant workflow","description":"Label-free data analysis using MaxQuant\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Label-free data analysis using MaxQuant](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/maxquant-label-free/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1448?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1449","url":"https://workflowhub.eu/workflows/1449","name":"Proteomics: Peptide and Protein Quantification via stable istobe labeling","description":"Peptide and Protein Quantification via Stable Isotope Labelling (SIL)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Peptide and Protein Quantification via Stable Isotope Labelling (SIL)](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/protein-quant-sil/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1449?version=1","name":"7.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1450","url":"https://workflowhub.eu/workflows/1450","name":"metaQuantome_datacreation_workflow","description":"metaquantome-data-creation\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [metaQuantome 1: Data creation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/metaquantome-data-creation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Marie Crane](https://training.galaxyproject.org/training-material/hall-of-fame/mariecrane/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1450?version=1","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1451","url":"https://workflowhub.eu/workflows/1451","name":"Metatranscriptomics analysis using microbiome RNA-seq data - Workflow 2: Community profile","description":"Metatranscriptomics analysis using microbiome RNA-seq data (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Metatranscriptomics analysis using microbiome RNA-seq data (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metatranscriptomics-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Pratik Jagtap, Subina Mehta, Saskia Hiltemann, Paul Zierep\n\n**Tutorial Author(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/)\n\n**Tutorial Contributor(s)**: [Christine Oger](https://training.galaxyproject.org/training-material/hall-of-fame/ogerdfx/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1451?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1451?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1452","url":"https://workflowhub.eu/workflows/1452","name":"Peptide And Protein ID Via OMS Using Two Search Engines","description":"Peptide and Protein ID using OpenMS tools\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Peptide and Protein ID using OpenMS tools](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/protein-id-oms/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Florian Christoph Sigloch](https://training.galaxyproject.org/training-material/hall-of-fame/stortebecker/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1452?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1453","url":"https://workflowhub.eu/workflows/1453","name":"metaquantome-function-worklow","description":"metaquantome-function\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [metaQuantome 2: Function](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/metaquantome-function/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Marie Crane](https://training.galaxyproject.org/training-material/hall-of-fame/mariecrane/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1453?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1454","url":"https://workflowhub.eu/workflows/1454","name":"PeptideML","description":"ML Modeling of Anti-cancer Peptides\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Machine Learning Modeling of Anticancer Peptides](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/ml-modeling-of-anti-cancer-peptides/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Jayadev Joshi](https://training.galaxyproject.org/training-material/hall-of-fame/jaidevjoshi83/), [Daniel Blankenberg](https://training.galaxyproject.org/training-material/hall-of-fame/blankenberg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1454?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1455","url":"https://workflowhub.eu/workflows/1455","name":"MS Imaging Loading Exploring Data","description":"Mass spectrometry imaging: Loading and exploring MSI data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Mass spectrometry imaging: Loading and exploring MSI data](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/mass-spectrometry-imaging-loading-exploring-data/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Tutorial Contributor(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1455?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1456","url":"https://workflowhub.eu/workflows/1456","name":"Metatranscriptomics analysis using microbiome RNA-seq data - Workflow 3: Functional Information","description":"Metatranscriptomics analysis using microbiome RNA-seq data (short)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Metatranscriptomics analysis using microbiome RNA-seq data (short)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metatranscriptomics-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Pratik Jagtap, Subina Mehta, Saskia Hiltemann, Paul Zierep\n\n**Tutorial Author(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/)\n\n**Tutorial Contributor(s)**: [Christine Oger](https://training.galaxyproject.org/training-material/hall-of-fame/ogerdfx/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1456?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1456?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1457","url":"https://workflowhub.eu/workflows/1457","name":"GigaScience_Peptide_Annotation_demonstration_STS26T_neoantigen_candidates_workflow","description":"Annotating the novel peptides\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Neoantigen 5: Variant Annotation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/neoantigen-5-variant-annotation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: GalaxyP\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1457?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1458","url":"https://workflowhub.eu/workflows/1458","name":"DIA_analysis_MSstats","description":"DIA MSstats Training export tabular\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Statistical analysis of DIA data](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/DIA_Analysis_MSstats/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1458?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1459","url":"https://workflowhub.eu/workflows/1459","name":"metaquantome-taxonomy-workflow","description":"metaquantome-taxonomy\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [metaQuantome 3: Taxonomy](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/metaquantome-taxonomy/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Marie Crane](https://training.galaxyproject.org/training-material/hall-of-fame/mariecrane/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1459?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1460","url":"https://workflowhub.eu/workflows/1460","name":"Workflow for Identifying MF from ITS2 sequencing using LotuS2 - tutorial example run'","description":"Workflow for running LotuS2 tool on fungal ITS paired-end sequencing data, to identify the fungi present in the samples\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Identifying Mycorrhizal Fungi from ITS2 sequencing using LotuS2](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/lotus2-identifying-fungi/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Society for the Protection of Underground Networks, Sujai Kumar, Bethan Manley\n\n**Tutorial Author(s)**: [Sujai Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/sujaikumar/)\n\n**Tutorial Contributor(s)**: [Bethan Manley](https://training.galaxyproject.org/training-material/hall-of-fame/bethanmanley/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Sujai Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/sujaikumar/)\n\n**Funder(s)**: [Society for the Protection of Underground Networks](https://training.galaxyproject.org/training-material/hall-of-fame/societyprotectionundergroundnetworks/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1460?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1461","url":"https://workflowhub.eu/workflows/1461","name":"WF1_Database_Generation_Workflow","description":"Generating a large database and then reducing it to a compact database using Metanovo\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 1: Database-Generation](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/clinical-mp-1-database-generation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1461?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1462","url":"https://workflowhub.eu/workflows/1462","name":"DIA_analysis_MSstats","description":"DIA MSstats Training msstats input tabular\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Statistical analysis of DIA data](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/DIA_Analysis_MSstats/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1462?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1463","url":"https://workflowhub.eu/workflows/1463","name":"DIA_Analysis_OSW","description":"DIA Analysis Training using HEK Ecoli data in OSW\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [DIA Analysis using OpenSwathWorkflow](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/DIA_Analysis_OSW/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Matthias Fahrner](https://training.galaxyproject.org/training-material/hall-of-fame/matthias313/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1463?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1464","url":"https://workflowhub.eu/workflows/1464","name":"WF3_VERIFICATION_WORKFLOW","description":"WF3- Peptide verification/validaiton workflow\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 3: Verification](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/clinical-mp-3-verification/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1464?version=1","name":"0.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1465","url":"https://workflowhub.eu/workflows/1465","name":"Training: 16S rRNA Sequencing With Mothur: Main Tutorial","description":"16S Microbial Analysis with mothur (extended)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial Analysis with mothur (extended)](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/mothur-miseq-sop/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1465?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1466","url":"https://workflowhub.eu/workflows/1466","name":"Metatranscriptomics analysis using microbiome RNA-seq data","description":"Metatranscriptomics analysis using microbiome RNA-seq data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Metatranscriptomics analysis using microbiome RNA-seq data](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metatranscriptomics/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Pratik Jagtap, Subina Mehta, Saskia Hiltemann, Paul Zierep\n\n**Tutorial Author(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Tutorial Contributor(s)**: [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Christine Oger](https://training.galaxyproject.org/training-material/hall-of-fame/ogerdfx/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1466?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1466?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1467","url":"https://workflowhub.eu/workflows/1467","name":"GTN Proteogemics3 Novel Peptide Analysis","description":"Proteogenomics 3: Novel peptide analysis\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Proteogenomics 3: Novel peptide analysis](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/proteogenomics-novel-peptide-analysis/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Ray Sajulga](https://training.galaxyproject.org/training-material/hall-of-fame/jraysajulga/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/), [Praveen Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/pravs3683/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1467?version=1","name":"7.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1468","url":"https://workflowhub.eu/workflows/1468","name":"WF4_Quantitation_Workflow","description":"Quantification using the MaxQuant tool\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 4: Quantitation](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/clinical-mp-4-quantitation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1468?version=1","name":"0.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1469","url":"https://workflowhub.eu/workflows/1469","name":"Query a metaplasmidome database to identify and annotate plasmids in metagenomes","description":"This workflow query metagenomic raw data against a metaplasmidome database to identify plasmids and annotate them with genes, KO, PFAM\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Query an annotated mobile genetic element database to identify and annotate genetic elements (e.g. plasmids) in metagenomics data](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metaplasmidome_query/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Nadia Goué\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Nadia Goué](https://training.galaxyproject.org/training-material/hall-of-fame/nagoue/), [Didier Debroas](https://training.galaxyproject.org/training-material/hall-of-fame/debroas/)\n\n**Tutorial Contributor(s)**: [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1469?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1470","url":"https://workflowhub.eu/workflows/1470","name":"Taxonomic Profiling and Visualization of Metagenomic Data","description":"This workflow performs taxonomic profiling of metagenomic data and visualizes microbial community composition using Kraken2 and Bracken as well as MetaPhlAn.\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Taxonomic Profiling and Visualization of Metagenomic Data](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/taxonomic-profiling/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Tarnima Omara, Tristan Reynolds\n\n**Tutorial Author(s)**: [Sophia Hampe](https://training.galaxyproject.org/training-material/hall-of-fame/sophia120199/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/)\n\n**Tutorial Contributor(s)**: [Tarnima Omara](https://training.galaxyproject.org/training-material/hall-of-fame/Tarnima-Omara/), [Tristan Reynolds](https://training.galaxyproject.org/training-material/hall-of-fame/tflowers15/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/)\n\n**Funder(s)**: [The University of Melbourne](https://training.galaxyproject.org/training-material/hall-of-fame/unimelb/), [Melbourne Bioinformatics](https://training.galaxyproject.org/training-material/hall-of-fame/melbournebioinformatics/), [Australian BioCommons](https://training.galaxyproject.org/training-material/hall-of-fame/AustralianBioCommons/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1470?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1470?version=2","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1471","url":"https://workflowhub.eu/workflows/1471","name":"EncyclopeDIA-GTN","description":"encyclopedia- DIA Metaproteomics\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [EncyclopeDIA](https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/encyclopedia/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: GalaxyP\n\n**Tutorial Author(s)**: [Emma Leith](https://training.galaxyproject.org/training-material/hall-of-fame/emmaleith/), [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [James Johnson](https://training.galaxyproject.org/training-material/hall-of-fame/jj-umn/), [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1471?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1472","url":"https://workflowhub.eu/workflows/1472","name":"WGS Part In \"Analyses Of Metagenomics Data - The Global Picture\"","description":"Analyses of metagenomics data - The global picture\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Analyses of metagenomics data - The global picture](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/general-tutorial/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1472?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1473","url":"https://workflowhub.eu/workflows/1473","name":"Training: 16S rRNA Analysis with Nanopore Sequencing Reads","description":"16S rRNA analysis with Nanopore reads\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [16S Microbial analysis with Nanopore data](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/nanopore-16S-metagenomics/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1473?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1474","url":"https://workflowhub.eu/workflows/1474","name":"WF1_Database_Generation_Workflow","description":"Generating a large database and then reducing it to a compact database using Metanovo\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 1: Database-Generation](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/clinical-mp-1-database-generation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1474?version=1","name":"0.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1475","url":"https://workflowhub.eu/workflows/1475","name":"Galaxy Intro Strands","description":"Introduction to Genomics and Galaxy\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Introduction to Genomics and Galaxy](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/galaxy-intro-strands/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Yvan Le Bras](https://training.galaxyproject.org/training-material/hall-of-fame/yvanlebras/), [Sebastian Schaaf](https://training.galaxyproject.org/training-material/hall-of-fame/sebastian-schaaf/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Gildas Le Corguillé](https://training.galaxyproject.org/training-material/hall-of-fame/lecorguille/), [Peter van Heusden](https://training.galaxyproject.org/training-material/hall-of-fame/pvanheus/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Goodnews Sandy](https://training.galaxyproject.org/training-material/hall-of-fame/sandygudie/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Maria Doyle](https://training.galaxyproject.org/training-material/hall-of-fame/mblue9/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1475?version=1","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1476","url":"https://workflowhub.eu/workflows/1476","name":"Amplicon Tutorial","description":"Analyses of metagenomics data - The global picture\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Analyses of metagenomics data - The global picture](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/general-tutorial/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1476?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1477","url":"https://workflowhub.eu/workflows/1477","name":"Copy Of GTN Training - Antibiotic Resistance Detection","description":"Antibiotic resistance detection\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Antibiotic resistance detection](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/plasmid-metagenomics-nanopore/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Willem de Koning](https://training.galaxyproject.org/training-material/hall-of-fame/willemdek11/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1477?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1478","url":"https://workflowhub.eu/workflows/1478","name":"Galaxy Introduction Peaks2Genes - Part 1","description":"Galaxy Introduction Peaks2Genes - Part 1\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [From peaks to genes](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/galaxy-intro-peaks2genes/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Anne Pajon](https://training.galaxyproject.org/training-material/hall-of-fame/pajanne/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Dilmurat Yusuf](https://training.galaxyproject.org/training-material/hall-of-fame/dyusuf/), [Sarah Peter](https://training.galaxyproject.org/training-material/hall-of-fame/sarah-peter/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Tutorial Contributor(s)**: [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Bert Droesbeke](https://training.galaxyproject.org/training-material/hall-of-fame/bedroesb/), [Gildas Le Corguillé](https://training.galaxyproject.org/training-material/hall-of-fame/lecorguille/), [Nate Coraor](https://training.galaxyproject.org/training-material/hall-of-fame/natefoo/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/), [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Cyril Monjeaud](https://training.galaxyproject.org/training-material/hall-of-fame/cmonjeau/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Maria Doyle](https://training.galaxyproject.org/training-material/hall-of-fame/mblue9/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1478?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1479","url":"https://workflowhub.eu/workflows/1479","name":"Allele-based Pathogen Identification","description":"Microbiome - Variant calling and Consensus Building\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pathogen detection from (direct Nanopore) sequencing data using Galaxy - Foodborne Edition](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/pathogen-detection-from-nanopore-foodborne-data/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Engy Nasr, Bérénice Batut, Paul Zierep\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/)\n\n**Tutorial Contributor(s)**: [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/), [EOSC-Life](https://training.galaxyproject.org/training-material/hall-of-fame/eosc-life/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1479?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1480","url":"https://workflowhub.eu/workflows/1480","name":"workflow-generate-dataset-for-assembly-tutorial","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Assembly of metagenomic sequencing data](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/metagenomics-assembly/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Polina Polunina](https://training.galaxyproject.org/training-material/hall-of-fame/plushz/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n**Tutorial Contributor(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Polina Polunina](https://training.galaxyproject.org/training-material/hall-of-fame/plushz/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1480?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1481","url":"https://workflowhub.eu/workflows/1481","name":"Galaxy Intro Short","description":"A short introduction to Galaxy\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [A short introduction to Galaxy](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/galaxy-intro-short/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Anna Syme](https://training.galaxyproject.org/training-material/hall-of-fame/annasyme/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/)\n\n**Tutorial Contributor(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Ahmed Hamid Awan](https://training.galaxyproject.org/training-material/hall-of-fame/ahmedhamidawan/), [Phil Reed](https://training.galaxyproject.org/training-material/hall-of-fame/PhilReedData/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Bert Droesbeke](https://training.galaxyproject.org/training-material/hall-of-fame/bedroesb/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Marius van den Beek](https://training.galaxyproject.org/training-material/hall-of-fame/mvdbeek/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/), [Maria Doyle](https://training.galaxyproject.org/training-material/hall-of-fame/mblue9/), [Morgan Howells](https://training.galaxyproject.org/training-material/hall-of-fame/hexhowells/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/)\n\n**Funder(s)**: [Australian BioCommons](https://training.galaxyproject.org/training-material/hall-of-fame/AustralianBioCommons/), [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1481?version=1","name":"5.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1482","url":"https://workflowhub.eu/workflows/1482","name":"WF5_Data_Interpretation_Worklow","description":"Interpreting MaxQuant data using MSstats involves applying a rigorous statistical framework to glean meaningful insights from quantitative proteomic datasets\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clinical Metaproteomics 5: Data Interpretation](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/clinical-mp-5-data-interpretation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Subina Mehta\n\n**Tutorial Author(s)**: [Subina Mehta](https://training.galaxyproject.org/training-material/hall-of-fame/subinamehta/), [Katherine Do](https://training.galaxyproject.org/training-material/hall-of-fame/katherine-d21/), [Dechen Bhuming](https://training.galaxyproject.org/training-material/hall-of-fame/dechendb/)\n\n**Tutorial Contributor(s)**: [Pratik Jagtap](https://training.galaxyproject.org/training-material/hall-of-fame/pratikdjagtap/), [Timothy J. Griffin](https://training.galaxyproject.org/training-material/hall-of-fame/timothygriffin/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1482?version=1","name":"0.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1483","url":"https://workflowhub.eu/workflows/1483","name":"Taxonomy Profiling and Visualization with Krona","description":"Microbiome - Taxonomy Profiling\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pathogen detection from (direct Nanopore) sequencing data using Galaxy - Foodborne Edition](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/pathogen-detection-from-nanopore-foodborne-data/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Engy Nasr, Bérénice Batut, Paul Zierep\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/)\n\n**Tutorial Contributor(s)**: [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/), [EOSC-Life](https://training.galaxyproject.org/training-material/hall-of-fame/eosc-life/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1483?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1484","url":"https://workflowhub.eu/workflows/1484","name":"Long non-coding RNAs (lncRNAs) annotation with FEELnc","description":"Long non-coding RNAs (lncRNAs) annotation with FEELnc\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Long non-coding RNAs (lncRNAs) annotation with FEELnc](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/lncrna/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Stéphanie Robin\n\n**Tutorial Author(s)**: [Stéphanie Robin](https://training.galaxyproject.org/training-material/hall-of-fame/stephanierobin/)\n\n**Tutorial Contributor(s)**: [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Romane LIBOUBAN](https://training.galaxyproject.org/training-material/hall-of-fame/rlibouba/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1484?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1485","url":"https://workflowhub.eu/workflows/1485","name":"scRNA Plant Analysis","description":"Downstream Single-cell RNA Plant analysis with ScanPy\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Analysis of plant scRNA-Seq Data with Scanpy](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-plant/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Mehmet Tekman , Beatriz Serrano-Solano, Cristóbal Gallardo, Pavankumar Videm\n\n**Tutorial Author(s)**: [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/)\n\n**Tutorial Contributor(s)**: [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1485?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1486","url":"https://workflowhub.eu/workflows/1486","name":"GTN Tutorial: Data manipulation Olympics - all steps and exercises","description":"Data Manipulation Olympics\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Data Manipulation Olympics](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/data-manipulation-olympics/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Yongbin Li](https://training.galaxyproject.org/training-material/hall-of-fame/lybCNU/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Scott Cain](https://training.galaxyproject.org/training-material/hall-of-fame/scottcain/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Yongbin Li](https://training.galaxyproject.org/training-material/hall-of-fame/lybCNU/), [Donny Vrins](https://training.galaxyproject.org/training-material/hall-of-fame/dirowa/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Daniela Schneider](https://training.galaxyproject.org/training-material/hall-of-fame/Sch-Da/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1486?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1487","url":"https://workflowhub.eu/workflows/1487","name":"Pathogen Detection PathoGFAIR Samples Aggregation and Visualisation","description":"Pathogens of all samples report generation and visualization\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pathogen detection from (direct Nanopore) sequencing data using Galaxy - Foodborne Edition](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/pathogen-detection-from-nanopore-foodborne-data/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Engy Nasr, Bérénice Batut, Paul Zierep\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/)\n\n**Tutorial Contributor(s)**: [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/), [EOSC-Life](https://training.galaxyproject.org/training-material/hall-of-fame/eosc-life/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1487?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1487?version=2","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1488","url":"https://workflowhub.eu/workflows/1488","name":"mrsa AMR gene detection","description":"Identification of AMR genes in an assembled bacterial genome\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Identification of AMR genes in an assembled bacterial genome](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/amr-gene-detection/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bazante Sanders, Bérénice Batut, Tristan Reynolds\n\n**Tutorial Author(s)**: [Bazante Sanders](https://training.galaxyproject.org/training-material/hall-of-fame/bazante1/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Bazante Sanders](https://training.galaxyproject.org/training-material/hall-of-fame/bazante1/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Miaomiao Zhou](https://training.galaxyproject.org/training-material/hall-of-fame/miaomiaozhou88/), [Tristan Reynolds](https://training.galaxyproject.org/training-material/hall-of-fame/tflowers15/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [pimarin](https://training.galaxyproject.org/training-material/hall-of-fame/pimarin/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Funder(s)**: [Avans Hogeschool](https://training.galaxyproject.org/training-material/hall-of-fame/avans-atgm/), [ABRomics](https://training.galaxyproject.org/training-material/hall-of-fame/abromics/), [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [The University of Melbourne](https://training.galaxyproject.org/training-material/hall-of-fame/unimelb/), [Melbourne Bioinformatics](https://training.galaxyproject.org/training-material/hall-of-fame/melbournebioinformatics/), [Australian BioCommons](https://training.galaxyproject.org/training-material/hall-of-fame/AustralianBioCommons/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1488?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1488?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1489","url":"https://workflowhub.eu/workflows/1489","name":"GTN Training: Galaxy 101 For Everyone","description":"introduction\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [How to reproduce published Galaxy analyses](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/galaxy-reproduce/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Lucille Delisle](https://training.galaxyproject.org/training-material/hall-of-fame/lldelisle/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Melanie Föll](https://training.galaxyproject.org/training-material/hall-of-fame/foellmelanie/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Funder(s)**: [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1489?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1490","url":"https://workflowhub.eu/workflows/1490","name":"CS3_Filter, Plot and Explore Single-cell RNA-seq Data","description":"Updated tool versions Aug 24 2022\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Filter, plot and explore single-cell RNA-seq data with Scanpy](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_basic-pipeline/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Wendi Bacon\n\n**Tutorial Author(s)**: [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Amirhossein Naghsh Nilchi](https://training.galaxyproject.org/training-material/hall-of-fame/Nilchia/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1490?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1491","url":"https://workflowhub.eu/workflows/1491","name":"CelSeq2: Multi Batch (mm10)","description":"Pre-processing of Single-Cell RNA Data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pre-processing of Single-Cell RNA Data](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-preprocessing/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Uses [subworkflows](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_subworkflows.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Anika Erxleben](https://training.galaxyproject.org/training-material/hall-of-fame/erxleben/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Gildas Le Corguillé](https://training.galaxyproject.org/training-material/hall-of-fame/lecorguille/), [Stéphanie Robin](https://training.galaxyproject.org/training-material/hall-of-fame/stephanierobin/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Simon Bray](https://training.galaxyproject.org/training-material/hall-of-fame/simonbray/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1491?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1492","url":"https://workflowhub.eu/workflows/1492","name":"Nanopore Preprocessing","description":"Microbiome - QC and Contamination Filtering\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pathogen detection from (direct Nanopore) sequencing data using Galaxy - Foodborne Edition](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/pathogen-detection-from-nanopore-foodborne-data/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Engy Nasr, Paul Zierep\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/)\n\n**Tutorial Contributor(s)**: [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/), [EOSC-Life](https://training.galaxyproject.org/training-material/hall-of-fame/eosc-life/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1492?version=1","name":"3.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1492?version=2","name":"4.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1493","url":"https://workflowhub.eu/workflows/1493","name":"Find exons with the highest number of features","description":"Galaxy 101\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Galaxy Basics for genomics](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/galaxy-intro-101/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Anton Nekrutenko, Helena Rasche, Armin Dadras\n\n**Tutorial Author(s)**: [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Clemens Blank](https://training.galaxyproject.org/training-material/hall-of-fame/blankclemens/), [Anton Nekrutenko](https://training.galaxyproject.org/training-material/hall-of-fame/nekrut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Anne Pajon](https://training.galaxyproject.org/training-material/hall-of-fame/pajanne/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Tutorial Contributor(s)**: [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Anton Nekrutenko](https://training.galaxyproject.org/training-material/hall-of-fame/nekrut/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Gildas Le Corguillé](https://training.galaxyproject.org/training-material/hall-of-fame/lecorguille/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Nate Coraor](https://training.galaxyproject.org/training-material/hall-of-fame/natefoo/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/), [William Durand](https://training.galaxyproject.org/training-material/hall-of-fame/willdurand/), [Niall Beard](https://training.galaxyproject.org/training-material/hall-of-fame/njall/), [Maria Doyle](https://training.galaxyproject.org/training-material/hall-of-fame/mblue9/)\n\n**Funder(s)**: [The Pennsylvania State University](https://training.galaxyproject.org/training-material/hall-of-fame/psu/), [Erasmus Medical Center](https://training.galaxyproject.org/training-material/hall-of-fame/erasmusmc/), [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1493?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1494","url":"https://workflowhub.eu/workflows/1494","name":"CelSeq2: Single Batch (mm10)","description":"Pre-processing of Single-Cell RNA Data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pre-processing of Single-Cell RNA Data](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-preprocessing/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Anika Erxleben](https://training.galaxyproject.org/training-material/hall-of-fame/erxleben/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Gildas Le Corguillé](https://training.galaxyproject.org/training-material/hall-of-fame/lecorguille/), [Stéphanie Robin](https://training.galaxyproject.org/training-material/hall-of-fame/stephanierobin/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Simon Bray](https://training.galaxyproject.org/training-material/hall-of-fame/simonbray/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1494?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1495","url":"https://workflowhub.eu/workflows/1495","name":"Gene-based Pathogen Identification","description":"Nanopore datasets analysis - Phylogenetic Identification - antibiotic resistance genes detection and contigs building\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pathogen detection from (direct Nanopore) sequencing data using Galaxy - Foodborne Edition](https://training.galaxyproject.org/training-material/topics/microbiome/tutorials/pathogen-detection-from-nanopore-foodborne-data/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Engy Nasr, Bérénice Batut, Paul Zierep\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/)\n\n**Tutorial Contributor(s)**: [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Wolfgang Maier](https://training.galaxyproject.org/training-material/hall-of-fame/wm75/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Engy Nasr](https://training.galaxyproject.org/training-material/hall-of-fame/EngyNasr/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/), [EOSC-Life](https://training.galaxyproject.org/training-material/hall-of-fame/eosc-life/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1495?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1496","url":"https://workflowhub.eu/workflows/1496","name":"Filter, Plot and Explore Single-cell RNA-seq Data updated","description":"Filter, Plot and Explore Single-cell RNA-seq Data\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Filter, plot and explore single-cell RNA-seq data with Scanpy](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_basic-pipeline/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Wendi Bacon, Julia Jakiela\n\n**Tutorial Author(s)**: [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Amirhossein Naghsh Nilchi](https://training.galaxyproject.org/training-material/hall-of-fame/Nilchia/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1496?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1497","url":"https://workflowhub.eu/workflows/1497","name":"Genome Annotation with Prokka","description":"Genome annotation with Prokka\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Genome annotation with Prokka](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/annotation-with-prokka/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Anna Syme, Torsten Seemann, Simon Gladman\n\n**Tutorial Author(s)**: [Anna Syme](https://training.galaxyproject.org/training-material/hall-of-fame/annasyme/), [Torsten Seemann](https://training.galaxyproject.org/training-material/hall-of-fame/tseemann/), [Simon Gladman](https://training.galaxyproject.org/training-material/hall-of-fame/slugger70/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1497?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1498","url":"https://workflowhub.eu/workflows/1498","name":"MuSiC-Deconvolution: Compare","description":"This workflow runs 3 comparisons using MuSiC Deconvolution compare: where datasets cell compositions are inferred from a reference containing healthy and diseased cells; where diseased are inferred from disease and healthy from healthy; and where both diseased and healthy are inferred from a healthy reference.\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Comparing inferred cell compositions using MuSiC deconvolution](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/bulk-music-4-compare/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Wendi Bacon, Mehmet Tekman\n\n**Tutorial Author(s)**: [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/)\n\n**Tutorial Contributor(s)**: [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Carlos Chee Mendonça](https://training.galaxyproject.org/training-material/hall-of-fame/carloscheemendonca/), [Morgan Howells](https://training.galaxyproject.org/training-material/hall-of-fame/hexhowells/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1498?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1499","url":"https://workflowhub.eu/workflows/1499","name":"Capturing mitoflashes","description":"This workflow is developed for automatic detection and measuring of mitoflashes in time-lapse microscopy images. \n\nIn addition, this workflow can be used for detection and tracking of other spot-like organelles with small motion.\n\nIf this workflow helped with the analysis of your data, please do not forget to cite: https://doi.org/10.1097/j.pain.0000000000002642\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Tracking of mitochondria and capturing mitoflashes](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/detection-of-mitoflashes/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: BMCV Group, Heidelberg University\n\n**Tutorial Author(s)**: [Diana Chiang Jurado](https://training.galaxyproject.org/training-material/hall-of-fame/dianichj/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/)\n\n**Tutorial Contributor(s)**: [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Diana Chiang Jurado](https://training.galaxyproject.org/training-material/hall-of-fame/dianichj/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1499?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1500","url":"https://workflowhub.eu/workflows/1500","name":"annotation_helixer","description":"This workflow allows you to annotate a genome with Helixer and evaluate the quality of the annotation using BUSCO and Genome Annotation statistics. GFFRead is also used to predict protein sequences derived from this annotation, and BUSCO and OMArk are used to assess proteome quality. \r\n\r\n## Associated Tutorial\r\n\r\nThis workflows is part of the tutorial [Genome annotation with Helixer](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/helixer/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\r\n\r\n## Features\r\n\r\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\r\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\r\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\r\n\r\n## Thanks to...\r\n\r\n**Workflow Author(s)**: Romane Libouban\r\n\r\n**Tutorial Author(s)**: [Romane LIBOUBAN](https://training.galaxyproject.org/training-material/hall-of-fame/rlibouba/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/)\r\n\r\n**Tutorial Contributor(s)**: [Felicitas Kindel](https://training.galaxyproject.org/training-material/hall-of-fame/felicitas215/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Romane LIBOUBAN](https://training.galaxyproject.org/training-material/hall-of-fame/rlibouba/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\r\n\r\n**Grants(s)**: [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\r\n\r\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"EuroScienceGateway, Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1500?version=1","name":"2.0","author":["Romane Libouban","Anthony Bretaudeau"],"descriptor_type":["GALAXY"]}]},{"id":"1501","url":"https://workflowhub.eu/workflows/1501","name":"Filter plot and explore single-cell RNA-seq data with Scanpy (imported from uploaded file)","description":"Workflow for this training: https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_basic-pipeline/tutorial.html\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Filter, plot and explore single-cell RNA-seq data with Scanpy](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_basic-pipeline/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Wendi Bacon\n\n**Tutorial Author(s)**: [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Amirhossein Naghsh Nilchi](https://training.galaxyproject.org/training-material/hall-of-fame/Nilchia/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1501?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1502","url":"https://workflowhub.eu/workflows/1502","name":"scATAC-seq Count Matrix Filtering","description":"Visualize and filter scATAC-seq anndata to produce a high quality count matrix  \n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pre-processing of 10X Single-Cell ATAC-seq Datasets](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scatac-preprocessing-tenx/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Pavankumar Videm\n\n**Tutorial Author(s)**: [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/)\n\n**Tutorial Contributor(s)**: [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Timon Schlegel](https://training.galaxyproject.org/training-material/hall-of-fame/timonschlegel/), [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1502?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1502?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1503","url":"https://workflowhub.eu/workflows/1503","name":"CP_object_tracking_example","description":"example cellprofiler pipeline\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Object tracking using CellProfiler](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/object-tracking-using-cell-profiler/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Yi Sun](https://training.galaxyproject.org/training-material/hall-of-fame/sunyi000/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Jean-Karim Hériché](https://training.galaxyproject.org/training-material/hall-of-fame/jkh1/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1503?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1504","url":"https://workflowhub.eu/workflows/1504","name":"Genome annotation with Maker","description":"Genome annotation with Maker\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Genome annotation with Maker](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/annotation-with-maker/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Anthony Bretaudeau, French National Institute for Agriculture, Food, and Environment (INRAE)\n\n**Tutorial Author(s)**: [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1504?version=1","name":"10.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1505","url":"https://workflowhub.eu/workflows/1505","name":"CP_pipeline_IDR_training","description":"Nucleoli segmentation and feature extraction using CellProfiler\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Nucleoli segmentation and feature extraction using CellProfiler](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/tutorial-CP/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Jean-Karim Hériché](https://training.galaxyproject.org/training-material/hall-of-fame/jkh1/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1505?version=1","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1506","url":"https://workflowhub.eu/workflows/1506","name":"scATAC-seq FASTQ to Count Matrix","description":"This workflow creates an count matrix anndata file given 10x scATAC-seq data.\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Pre-processing of 10X Single-Cell ATAC-seq Datasets](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scatac-preprocessing-tenx/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Pavankumar Videm\n\n**Tutorial Author(s)**: [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/)\n\n**Tutorial Contributor(s)**: [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Timon Schlegel](https://training.galaxyproject.org/training-material/hall-of-fame/timonschlegel/), [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1506?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]},{"id":"2","url":"https://workflowhub.eu/workflows/1506?version=2","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1507","url":"https://workflowhub.eu/workflows/1507","name":"Understanding Barcodes","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Understanding Barcodes](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-umis/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Morgan\n\n**Tutorial Author(s)**: [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/)\n\n**Tutorial Contributor(s)**: [Morgan Howells](https://training.galaxyproject.org/training-material/hall-of-fame/hexhowells/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Lucille Delisle](https://training.galaxyproject.org/training-material/hall-of-fame/lldelisle/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Mohua Das](https://training.galaxyproject.org/training-material/hall-of-fame/MD-Chem/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1507?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1508","url":"https://workflowhub.eu/workflows/1508","name":"feature_extraction","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Analyse HeLa fluorescence siRNA screen](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/hela-screen-analysis/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Till Korten, Leonid Kostrykin\n\n**Tutorial Author(s)**: [Thomas Wollmann](https://training.galaxyproject.org/training-material/hall-of-fame/thomaswollmann/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Thomas Wollmann](https://training.galaxyproject.org/training-material/hall-of-fame/thomaswollmann/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1508?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1509","url":"https://workflowhub.eu/workflows/1509","name":"Genome annotation with Braker3","description":"This workflow uses Braker3 to annotate a genome.\r\n\r\n## Associated Tutorial\r\n\r\nThis workflows is part of the tutorial [Genome annotation with Braker3](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/braker3/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\r\n\r\n## Features\r\n\r\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\r\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\r\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\r\n\r\n## Thanks to...\r\n\r\n**Workflow Author(s)**: Romane Libouban\r\n\r\n**Tutorial Author(s)**: [Romane LIBOUBAN](https://training.galaxyproject.org/training-material/hall-of-fame/rlibouba/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/)\r\n\r\n**Tutorial Contributor(s)**: [Deepti Varshney](https://training.galaxyproject.org/training-material/hall-of-fame/deeptivarshney/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\r\n\r\n**Grants(s)**: [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\r\n\r\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"EuroScienceGateway, Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1509?version=1","name":"1.0","author":["Romane Libouban","Anthony Bretaudeau"],"descriptor_type":["GALAXY"]}]},{"id":"1510","url":"https://workflowhub.eu/workflows/1510","name":"Scanpy Parameter Iterator workflow full (imported from URL)","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Scanpy Parameter Iterator](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scanpy_parameter_iterator/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Julia Jakiela\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/)\n\n**Tutorial Contributor(s)**: [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Grants(s)**: [EOSC-Life](https://training.galaxyproject.org/training-material/hall-of-fame/eosc-life/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1510?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1511","url":"https://workflowhub.eu/workflows/1511","name":"Single-cell QC with scater","description":"Single-cell quality control with scater\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Single-cell quality control with scater](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-scater-qc/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Graham Etherington, Nicola Soranzo, Pavankumar Videm\n\n**Tutorial Author(s)**: [Graham Etherington](https://training.galaxyproject.org/training-material/hall-of-fame/ethering/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/)\n\n**Tutorial Contributor(s)**: [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Graham Etherington](https://training.galaxyproject.org/training-material/hall-of-fame/ethering/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Gildas Le Corguillé](https://training.galaxyproject.org/training-material/hall-of-fame/lecorguille/), [Stéphanie Robin](https://training.galaxyproject.org/training-material/hall-of-fame/stephanierobin/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Simon Bray](https://training.galaxyproject.org/training-material/hall-of-fame/simonbray/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1511?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1512","url":"https://workflowhub.eu/workflows/1512","name":"analyze_screen","description":"Analyse HeLa fluorescence siRNA screen\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Analyse HeLa fluorescence siRNA screen](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/hela-screen-analysis/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Uses [subworkflows](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_subworkflows.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Till Korten, Leonid Kostrykin\n\n**Tutorial Author(s)**: [Thomas Wollmann](https://training.galaxyproject.org/training-material/hall-of-fame/thomaswollmann/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Thomas Wollmann](https://training.galaxyproject.org/training-material/hall-of-fame/thomaswollmann/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1512?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1513","url":"https://workflowhub.eu/workflows/1513","name":"Bacterial Genome Annotation","description":"Bacterial Genome Annotation\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Bacterial Genome Annotation](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/bacterial-genome-annotation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Pierre Marin\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Paul Zierep](https://training.galaxyproject.org/training-material/hall-of-fame/paulzierep/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Funder(s)**: [ABRomics](https://training.galaxyproject.org/training-material/hall-of-fame/abromics/), [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [Institut Français de Bioinformatique](https://training.galaxyproject.org/training-material/hall-of-fame/ifb/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1513?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1514","url":"https://workflowhub.eu/workflows/1514","name":"GTN_Exemplar_002_TMA_workflow_Feb2025","description":"End-to-End Tissue Microarray Analysis with Galaxy-ME\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [End-to-End Tissue Microarray Image Analysis with Galaxy-ME](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Cameron Watson\n\n**Tutorial Author(s)**: [Cameron Watson](https://training.galaxyproject.org/training-material/hall-of-fame/CameronFRWatson/), [Allison Creason](https://training.galaxyproject.org/training-material/hall-of-fame/alliecreason/)\n\n**Tutorial Contributor(s)**: [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Cameron Watson](https://training.galaxyproject.org/training-material/hall-of-fame/CameronFRWatson/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1514?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1515","url":"https://workflowhub.eu/workflows/1515","name":"GO Enrichment Workflow","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [GO Enrichment Analysis on Single-Cell RNA-Seq Data](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/GO-enrichment/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Menna Gamal\n\n**Tutorial Author(s)**: [Menna Gamal](https://training.galaxyproject.org/training-material/hall-of-fame/MennaGamal/)\n\n**Tutorial Contributor(s)**: [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mennayousef](https://training.galaxyproject.org/training-material/hall-of-fame/Mennayousef/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Armin Dadras](https://training.galaxyproject.org/training-material/hall-of-fame/dadrasarmin/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [de.NBI](https://training.galaxyproject.org/training-material/hall-of-fame/deNBI/), [University of Freiburg](https://training.galaxyproject.org/training-material/hall-of-fame/uni-freiburg/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1515?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1516","url":"https://workflowhub.eu/workflows/1516","name":"Monocle3 workflow","description":"Trajectory analysis using Monocle3, starting from 3 input files: expression matrix, gene and cell annotations\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Inferring single cell trajectories with Monocle3](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_monocle3-trajectories/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Grants(s)**: [EPSRC Training Grant DTP 2020-2021 Open University](https://training.galaxyproject.org/training-material/hall-of-fame/epsrc-training-grant/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1516?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1517","url":"https://workflowhub.eu/workflows/1517","name":"Segmentation_Values_Testing","description":"Workflow to assess nuclei segmentation by applying different Gaussian filtering values during image pre-processing.\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Parameter tuning and optimization - Evaluating nuclei segmentation with Galaxy](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/parameter-tuning/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Riccardo Massei\n\n**Tutorial Author(s)**: [Riccardo Massei](https://training.galaxyproject.org/training-material/hall-of-fame/rmassei/)\n\n**Tutorial Contributor(s)**: [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Riccardo Massei](https://training.galaxyproject.org/training-material/hall-of-fame/rmassei/)\n\n**Funder(s)**: [Deutsche Forschungsgemeinschaft](https://training.galaxyproject.org/training-material/hall-of-fame/dfg/)\n\n**Grants(s)**: [NFDI4Bioimage](https://training.galaxyproject.org/training-material/hall-of-fame/nfdi4bioimage/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1517?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1518","url":"https://workflowhub.eu/workflows/1518","name":"Comparative gene analysis","description":"Workflows for comparison of genes in annotated genomes\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Comparative gene analysis in unannotated genomes](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/gene-centric/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Anton Nekrutenko\n\n**Tutorial Author(s)**: [Anton Nekrutenko](https://training.galaxyproject.org/training-material/hall-of-fame/nekrut/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1518?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1519","url":"https://workflowhub.eu/workflows/1519","name":"Clustering 3k PBMC with Scanpy","description":"Workflow based on clustering 3K PBMCs with Scanpy tutorial\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Clustering 3K PBMCs with Scanpy](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-scanpy-pbmc3k/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Bérénice Batut, Hans-Rudolf Hotz, Mehmet Tekman, Pavankumar Videm\n\n**Tutorial Author(s)**: [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Diana Chiang Jurado](https://training.galaxyproject.org/training-material/hall-of-fame/dianichj/)\n\n**Tutorial Contributor(s)**: [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Amirhossein Naghsh Nilchi](https://training.galaxyproject.org/training-material/hall-of-fame/Nilchia/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Hans-Rudolf Hotz](https://training.galaxyproject.org/training-material/hall-of-fame/hrhotz/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Marius van den Beek](https://training.galaxyproject.org/training-material/hall-of-fame/mvdbeek/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Nate Coraor](https://training.galaxyproject.org/training-material/hall-of-fame/natefoo/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1519?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1520","url":"https://workflowhub.eu/workflows/1520","name":"AnnData object to Monocle input files","description":"Preparing and filtering gene and cell annotations files and expression matrix to be passed as input for Monocle\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Inferring single cell trajectories with Monocle3](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_monocle3-trajectories/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Grants(s)**: [EPSRC Training Grant DTP 2020-2021 Open University](https://training.galaxyproject.org/training-material/hall-of-fame/epsrc-training-grant/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1520?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1521","url":"https://workflowhub.eu/workflows/1521","name":"Functional annotation","description":"Functional annotation of protein sequences\r\n\r\n## Associated Tutorial\r\n\r\nThis workflows is part of the tutorial [Functional annotation of protein sequences](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/functional/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\r\n\r\n## Features\r\n\r\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\r\n\r\n## Thanks to...\r\n\r\n**Workflow Author(s)**: Anthony Bretaudeau\r\n\r\n**Tutorial Author(s)**: [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/)\r\n\r\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\r\n\r\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/), [Institut Français de Bioinformatique](https://training.galaxyproject.org/training-material/hall-of-fame/ifb/)\r\n\r\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/), [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\r\n\r\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"EuroScienceGateway, Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1521?version=1","name":"1.0","author":["Anthony Bretaudeau"],"descriptor_type":["GALAXY"]}]},{"id":"1522","url":"https://workflowhub.eu/workflows/1522","name":"Voronoi segmentation","description":"Generic workflow to perform Voronoi segmentation.\nInput requirements: \n* Image: \n-- Preferably lighter objects on a darker background for the mask to work well.\n-- Format: .tiff, stored in planar RGB format, not interleaved (http://avitevet.com/uncategorized/when-to-use-it-interleaved-vs-planar-image-data-storage/). \n* Seeds: \n-- White seeds on a black background\n-- Format: .tiff\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Voronoi segmentation](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/voronoi-segmentation/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Even Moa Myklebust, Riccardo Massei\n\n**Tutorial Author(s)**: [Even Moa Myklebust](https://training.galaxyproject.org/training-material/hall-of-fame/evenmm/), [Riccardo Massei](https://training.galaxyproject.org/training-material/hall-of-fame/rmassei/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1522?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1523","url":"https://workflowhub.eu/workflows/1523","name":"AnnData to SingleCellExperiment (SCE) conversion","description":"AnnData to SCE format conversion (manually using Galaxy buttons)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Converting between common single cell data formats](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-data-ingest/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Julia Jakiela, Morgan Howells\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Morgan Howells](https://training.galaxyproject.org/training-material/hall-of-fame/hexhowells/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Grants(s)**: [DASH UK](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-uk-dash/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1523?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1524","url":"https://workflowhub.eu/workflows/1524","name":"Trajectory analysis using Monocle3 - full tutorial workflow","description":"Trajectory analysis using Monocle3, starting from AnnData\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Inferring single cell trajectories with Monocle3](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_monocle3-trajectories/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Julia Jakiela\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Grants(s)**: [EPSRC Training Grant DTP 2020-2021 Open University](https://training.galaxyproject.org/training-material/hall-of-fame/epsrc-training-grant/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1524?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1525","url":"https://workflowhub.eu/workflows/1525","name":"Funannotate","description":"Structural and functional genome annotation with Funannotate\r\n\r\n## Associated Tutorial\r\n\r\nThis workflows is part of the tutorial [Genome annotation with Funannotate](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/funannotate/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\r\n\r\n## Features\r\n\r\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\r\n\r\n## Thanks to...\r\n\r\n**Workflow Author(s)**: Anthony Bretaudeau\r\n\r\n**Tutorial Author(s)**: [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/)\r\n\r\n**Tutorial Contributor(s)**: [Alexandre Cormier](https://training.galaxyproject.org/training-material/hall-of-fame/alexcorm/), [Laura Leroi](https://training.galaxyproject.org/training-material/hall-of-fame/lleroi/), [Erwan Corre](https://training.galaxyproject.org/training-material/hall-of-fame/r1corre/), [Stéphanie Robin](https://training.galaxyproject.org/training-material/hall-of-fame/stephanierobin/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Cristóbal Gallardo](https://training.galaxyproject.org/training-material/hall-of-fame/gallardoalba/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Jonathan Kreplak](https://training.galaxyproject.org/training-material/hall-of-fame/jkreplak/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Simon Bray](https://training.galaxyproject.org/training-material/hall-of-fame/simonbray/)\r\n\r\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/)\r\n\r\n**Grants(s)**: [Gallantries: Bridging Training Communities in Life Science, Environment and Health](https://training.galaxyproject.org/training-material/hall-of-fame/gallantries/), [EuroScienceGateway](https://training.galaxyproject.org/training-material/hall-of-fame/eurosciencegateway/)\r\n\r\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"EuroScienceGateway, Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1525?version=1","name":"1.0","author":["Anthony Bretaudeau"],"descriptor_type":["GALAXY"]}]},{"id":"1526","url":"https://workflowhub.eu/workflows/1526","name":"Image prediction with BioImage.IO model (imported from uploaded file)","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Using BioImage.IO models for image analysis in Galaxy](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/process-image-bioimageio/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Diana Chiang Jurado, Leonid Kostrykin\n\n**Tutorial Author(s)**: [Diana Chiang Jurado](https://training.galaxyproject.org/training-material/hall-of-fame/dianichj/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/)\n\n**Tutorial Contributor(s)**: [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Diana Chiang Jurado](https://training.galaxyproject.org/training-material/hall-of-fame/dianichj/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [Anup Kumar](https://training.galaxyproject.org/training-material/hall-of-fame/anuprulez/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1526?version=1","name":"3.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1527","url":"https://workflowhub.eu/workflows/1527","name":"AnnData to Seurat conversion","description":"AnnData to Seurat format conversion (manually using Galaxy buttons)\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Converting between common single cell data formats](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-data-ingest/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Julia Jakiela, Morgan Howells\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Morgan Howells](https://training.galaxyproject.org/training-material/hall-of-fame/hexhowells/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Grants(s)**: [DASH UK](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-uk-dash/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1527?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1528","url":"https://workflowhub.eu/workflows/1528","name":"2025_2D_spot_detection","description":"The present workflow allows to perform 2D spots/blobs detection on specific channel and stack of an image.\nImages will be directly fetched from IDR by including the IDs. A pre-processing steps with histogram normalization is performed before the blobs detection.\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Quantification of single-molecule RNA fluorescence in situ hybridization (smFISH) in yeast cell lines](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/2D-spot-detection/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n* Uses [Galaxy Workflow Comments](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_comments.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Riccardo\n\n**Tutorial Author(s)**: [Riccardo Massei](https://training.galaxyproject.org/training-material/hall-of-fame/rmassei/)\n\n**Tutorial Contributor(s)**: [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Riccardo Massei](https://training.galaxyproject.org/training-material/hall-of-fame/rmassei/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/)\n\n**Funder(s)**: [Deutsche Forschungsgemeinschaft](https://training.galaxyproject.org/training-material/hall-of-fame/dfg/)\n\n**Grants(s)**: [NFDI4Bioimage](https://training.galaxyproject.org/training-material/hall-of-fame/nfdi4bioimage/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1528?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1529","url":"https://workflowhub.eu/workflows/1529","name":"Inferring Trajectories with Scanpy Tutorial Workflow","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Inferring single cell trajectories with Scanpy](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_trajectories/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Marisa Loach\n\n**Tutorial Author(s)**: [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/)\n\n**Tutorial Contributor(s)**: [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1529?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1530","url":"https://workflowhub.eu/workflows/1530","name":"Essential genes detection with Transposon insertion sequencing","description":"Essential genes detection with Transposon insertion sequencing\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Essential genes detection with Transposon insertion sequencing](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/tnseq/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Delphine Lariviere](https://training.galaxyproject.org/training-material/hall-of-fame/delphine-l/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1530?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1531","url":"https://workflowhub.eu/workflows/1531","name":"AnnData to Cell Data Set (CDS) conversion","description":"AnnData to CDS format conversion (manually using Galaxy buttons). This workflow does not include renaming the column containing gene symbols. \n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Converting between common single cell data formats](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-data-ingest/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Julia Jakiela\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Morgan Howells](https://training.galaxyproject.org/training-material/hall-of-fame/hexhowells/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Grants(s)**: [DASH UK](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-uk-dash/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1531?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1532","url":"https://workflowhub.eu/workflows/1532","name":"Workflow constructed from Tutorial 'Introduction to Image Analysis using Galaxy'","description":"## Associated Tutorial\n\nThis workflows is part of the tutorial [Introduction to Image Analysis using Galaxy](https://training.galaxyproject.org/training-material/topics/imaging/tutorials/imaging-introduction/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Till Korten, Leonid Kostrykin\n\n**Tutorial Author(s)**: [Thomas Wollmann](https://training.galaxyproject.org/training-material/hall-of-fame/thomaswollmann/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Leonid Kostrykin](https://training.galaxyproject.org/training-material/hall-of-fame/kostrykin/), [Thomas Wollmann](https://training.galaxyproject.org/training-material/hall-of-fame/thomaswollmann/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1532?version=1","name":"6.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1533","url":"https://workflowhub.eu/workflows/1533","name":"Generating a single cell matrix using Alevin","description":"This workflow generates a single cell matrix using Alevin. \n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Generating a single cell matrix using Alevin](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-case_alevin/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Workflow Author(s)**: Julia Jakiela, Wendi Bacon\n\n**Tutorial Author(s)**: [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Jonathan Manning](https://training.galaxyproject.org/training-material/hall-of-fame/pinin4fjords/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Marisa Loach](https://training.galaxyproject.org/training-material/hall-of-fame/MarisaJL/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Teresa Müller](https://training.galaxyproject.org/training-material/hall-of-fame/teresa-m/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Jonathan Manning](https://training.galaxyproject.org/training-material/hall-of-fame/pinin4fjords/), [Beatriz Serrano-Solano](https://training.galaxyproject.org/training-material/hall-of-fame/beatrizserrano/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1533?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1534","url":"https://workflowhub.eu/workflows/1534","name":"Workflow constructed from history 'CRISPR tutorial Kenji'","description":"CRISPR screen analysis\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [CRISPR screen analysis](https://training.galaxyproject.org/training-material/topics/genome-annotation/tutorials/crispr-screen/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n\n\n\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Maria Doyle](https://training.galaxyproject.org/training-material/hall-of-fame/mblue9/), [Kenji Fujihara](https://training.galaxyproject.org/training-material/hall-of-fame/kenjifujihara/), [Twishi Gulati](https://training.galaxyproject.org/training-material/hall-of-fame/twishigulati/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1534?version=1","name":"1.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1535","url":"https://workflowhub.eu/workflows/1535","name":"EBI SCXA to AnnData (Scanpy) or Seurat Object","description":"Creates input file for Filter, Plot, Explore tutorial\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Importing files from public atlases](https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/EBI-retrieval/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n* Includes a [Galaxy Workflow Report](https://training.galaxyproject.org/training-material/faqs/galaxy/workflows_report_view.html)\n\n## Thanks to...\n\n**Workflow Author(s)**: Julia Jakiela\n\n**Tutorial Author(s)**: [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Tutorial Contributor(s)**: [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Julia Jakiela](https://training.galaxyproject.org/training-material/hall-of-fame/wee-snufkin/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/), [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Pablo Moreno](https://training.galaxyproject.org/training-material/hall-of-fame/pcm32/), [Pavankumar Videm](https://training.galaxyproject.org/training-material/hall-of-fame/pavanvidem/), [Mehmet Tekman](https://training.galaxyproject.org/training-material/hall-of-fame/mtekman/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Grants(s)**: [DASH UK](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-uk-dash/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1535?version=1","name":"2.0","author":[],"descriptor_type":["GALAXY"]}]},{"id":"1536","url":"https://workflowhub.eu/workflows/1536","name":"GTN Training: Galaxy 101 For Everyone","description":"introduction\n\n## Associated Tutorial\n\nThis workflows is part of the tutorial [Galaxy Basics for everyone](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.html), available in the [GTN](https://training.galaxyproject.org)\n\n## Features\n\n* Includes [Galaxy Workflow Tests](https://training.galaxyproject.org/training-material/faqs/gtn/workflow_run_test.html)\n\n## Thanks to...\n\n**Tutorial Author(s)**: [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/), [Nadia Goué](https://training.galaxyproject.org/training-material/hall-of-fame/nagoue/), [Christopher Barnett](https://training.galaxyproject.org/training-material/hall-of-fame/chrisbarnettster/), [Michele Maroni](https://training.galaxyproject.org/training-material/hall-of-fame/michelemaroni/), [Olha Nahorna](https://training.galaxyproject.org/training-material/hall-of-fame/olanag1/), [Dave Clements](https://training.galaxyproject.org/training-material/hall-of-fame/tnabtaf/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/)\n\n**Tutorial Contributor(s)**: [Helena Rasche](https://training.galaxyproject.org/training-material/hall-of-fame/hexylena/), [Saskia Hiltemann](https://training.galaxyproject.org/training-material/hall-of-fame/shiltemann/), [Matthias Bernt](https://training.galaxyproject.org/training-material/hall-of-fame/bernt-matthias/), [Nicola Soranzo](https://training.galaxyproject.org/training-material/hall-of-fame/nsoranzo/), [Anne Fouilloux](https://training.galaxyproject.org/training-material/hall-of-fame/annefou/), [Michele Maroni](https://training.galaxyproject.org/training-material/hall-of-fame/michelemaroni/), [Martin Čech](https://training.galaxyproject.org/training-material/hall-of-fame/martenson/), [Jennifer Hillman-Jackson](https://training.galaxyproject.org/training-material/hall-of-fame/jennaj/), [Nadia Goué](https://training.galaxyproject.org/training-material/hall-of-fame/nagoue/), [Anthony Bretaudeau](https://training.galaxyproject.org/training-material/hall-of-fame/abretaud/), [Björn Grüning](https://training.galaxyproject.org/training-material/hall-of-fame/bgruening/), [Alex Ostrovsky](https://training.galaxyproject.org/training-material/hall-of-fame/astrovsky01/), [Mélanie Petera](https://training.galaxyproject.org/training-material/hall-of-fame/melpetera/), [Nate Coraor](https://training.galaxyproject.org/training-material/hall-of-fame/natefoo/), [David López](https://training.galaxyproject.org/training-material/hall-of-fame/davelopez/), [Bérénice Batut](https://training.galaxyproject.org/training-material/hall-of-fame/bebatut/), [Wendi Bacon](https://training.galaxyproject.org/training-material/hall-of-fame/nomadscientist/)\n\n**Funder(s)**: [ELIXIR Europe](https://training.galaxyproject.org/training-material/hall-of-fame/elixir-europe/)\n\n[![gtn star logo followed by the word workflows](https://training.galaxyproject.org/training-material/assets/branding/gtn-workflows.png)](https://training.galaxyproject.org/training-material/)","organization":"Galaxy Training Network","toolclass":{"id":"1","name":"Workflow","description":"A computational workflow"},"versions":[{"id":"1","url":"https://workflowhub.eu/workflows/1536?version=1","name":"8.0","author":[],"descriptor_type":["GALAXY"]}]}]