Data underlying the publication "An Efficient Triplex TaqMan Quantitative PCR to Detect a Blackleg-Causing Lineage of Pectobacterium brasiliense in Potato Based on a Pangenome Analysis” by van der Lee et al. (2023).

# Installing PanTools dependencies. We provide conda environments for installing all dependencies for which we highly recommend using mamba.

 mamba env create -n pantools -f conda_linux.yml

# Activate the environment

 conda activate pantools

# Obtaining the correct Pantools version. The pangenome analysis was performed with PanTools v3.4.0 
 
 git clone https://git.wur.nl/bioinformatics/pantools.git
 cd pantools
 git checkout pantools_v3
 mvn package
 
 cd addons/
 wget https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/interpro.xml.gz
 gzip -d interpro.xml.gz

# Installing PanTools dependencies. We provide conda environments for installing all dependencies for which we highly recommend using mamba.
 
 mamba env create -n pantools -f conda_linux.yml

# Reproduce the main steps of the pangenome analysis. Be aware that the path to the .jar should be updated as well as the paths to the input data in the provided text files (genomes.txt, annotations.txt, functions.txt) . 

 java -jar -Xms40G -Xmx120G pantools/target/pantools-3.4.0.jar build_pangenome -dp brasiliense_DB -gf input_data/genomes.txt
 java -jar pantools/target/pantools-3.4.0.jar add_annotations -dp brasiliense_DB -af input_data/annotations.txt
 java -jar pantools/target/pantools-3.4.0.jar add_phenotypes -dp brasiliense_DB --phenotype input_data/phenotypes.txt
 java -jar pantools/target/pantools-3.4.0.jar group -dp brasiliense_DB -tn 12 
 java -jar pantools/target/pantools-3.4.0.jar gene_classification -dp brasiliense_DB --phenotype virulence
 java -jar pantools/target/pantools-3.4.0.jar add_functions -dp brasiliense_DB -if input_data/functions.txt 
 
 Core phylogeny steps: 
  java -jar pantools/target/pantools-3.4.0.jar core_phylogeny -dp brasiliense_DB -tn 12
  iqtree -nt 24 -s brasiliense_DB/core_snp_tree/informative.fasta -redo -bb 10000
  java -jar pantools/target/pantools-3.4.0.jar rename_phylogeny -dp brasiliense_DB/core_snp_tree/informative.fasta --phenotype name -if brasiliense_DB/core_snp_tree/informative.fasta.treefile --mode no-numbers
  java -jar pantools/target/pantools-3.4.0.jar create_tree_templates -dp brasiliense_DB --phenotype name --mode no-numbers
 
 Visualize tree:
  Include Newick file (brasiliense_DB/core_snp_tree/informative.fasta_RENAMED.treefile) into https://itol.embl.de. 
  Include coloring template (brasiliense_DB/tree_templates/label/name/virulence_including_assemblies.txt) into iTOL  

# Perform local BLAST to confirm absence/presence LZI and TIR in the 116 origanlly genome fasta files. All genome FASTA files were combined into a single file from which a BLAST database can be constructed. 

 makeblastdb -dbtype nucl -in blast/all_brasiliense.fasta
 blastn -db blast/all_brasiliense.fasta -query blast/LZI_sequence.fasta -outfmt 7
 blastn -db blast/all_brasiliense.fasta -query blast/TIR_sequence.fasta -outfmt 7

# Verification that the combination FASTA holds the same sequences as the original files. Only difference being the preceding genome number in the contig header.

 cat `ls input_data/brasiliense_genomes/*/prokka/*.fna | grep -v 'filtered' ` | grep '>'  | wc -l
 grep '>' blast/all_brasiliense.fasta | wc -l

# Perform local BLAST to confirm absence/presence LZI and TIR in NAK 152 from NCBI derived genome. First two steps can be skipped as genome is already included.
 
 wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/429/565/GCF_001429565.1_ASM142956v1/GCF_001429565.1_ASM142956v1_genomic.fna.gz
 gzip -d GCF_001429565.1_ASM142956v1_genomic.fna.gz
 
 makeblastdb -dbtype nucl -in blast/GCF_001429565.1_ASM142956v1_genomic.fna
 blastn -db blast/GCF_001429565.1_ASM142956v1_genomic.fna -query blast/LZI_sequence.fasta -outfmt 7
 blastn -db blast/GCF_001429565.1_ASM142956v1_genomic.fna -query blast/TIR_sequence.fasta -outfmt 7
