QUAST

Quality Assessment Tool for Genome Assemblies by CAB

{"subreports":[],"report":[["Genome statistics",[]],["Reads mapping",[]],["Misassemblies",[]],["Unaligned",[]],["Mismatches",[{"values":[0],"quality":"Less is better","isMain":false,"metricName":"# N's"},{"values":["0.00"],"quality":"Less is better","isMain":true,"metricName":"# N's per 100 kbp"}]],["Statistics without reference",[{"values":[7],"quality":"Equal","isMain":true,"metricName":"# contigs"},{"values":[13],"quality":"Equal","isMain":false,"metricName":"# contigs (>= 0 bp)"},{"values":[7],"quality":"Equal","isMain":false,"metricName":"# contigs (>= 1000 bp)"},{"values":[6],"quality":"Equal","isMain":false,"metricName":"# contigs (>= 5000 bp)"},{"values":[5],"quality":"Equal","isMain":false,"metricName":"# contigs (>= 10000 bp)"},{"values":[5],"quality":"Equal","isMain":false,"metricName":"# contigs (>= 25000 bp)"},{"values":[4],"quality":"Equal","isMain":false,"metricName":"# contigs (>= 50000 bp)"},{"values":[141900],"quality":"More is better","isMain":true,"metricName":"Largest contig"},{"values":[484585],"quality":"More is better","isMain":true,"metricName":"Total length"},{"values":[486016],"quality":"More is better","isMain":false,"metricName":"Total length (>= 0 bp)"},{"values":[484585],"quality":"More is better","isMain":true,"metricName":"Total length (>= 1000 bp)"},{"values":[480756],"quality":"More is better","isMain":false,"metricName":"Total length (>= 5000 bp)"},{"values":[475592],"quality":"More is better","isMain":true,"metricName":"Total length (>= 10000 bp)"},{"values":[475592],"quality":"More is better","isMain":false,"metricName":"Total length (>= 25000 bp)"},{"values":[439403],"quality":"More is better","isMain":true,"metricName":"Total length (>= 50000 bp)"},{"values":[125811],"quality":"More is better","isMain":false,"metricName":"N50"},{"values":[83198],"quality":"More is better","isMain":false,"metricName":"N75"},{"values":[2],"quality":"Less is better","isMain":false,"metricName":"L50"},{"values":[4],"quality":"Less is better","isMain":false,"metricName":"L75"},{"values":["51.40"],"quality":"Equal","isMain":false,"metricName":"GC (%)"}]],["K-mer-based statistics",[]],["Predicted genes",[]],["Similarity statistics",[]],["Reference statistics",[]]],"referenceName":"","date":"26 September 2019, Thursday, 17:16:34","subreferences":[],"minContig":500,"order":[0],"assembliesNames":["spades.fasta.txt"]}
{{ qualities }}
{{ mainMetrics }}
{"lists_of_lengths":[[141900,125811,88494,83198,36189,5164,3829]],"filenames":["spades.fasta.txt"]}
{{ assembliesLengths }}
{{ referenceLength }}
{"tickX":1}
{"coord_y":[[141900,141900,141900,125811,125811,88494,88494,83198,83198,36189,36189,5164,5164,3829,3829,0.0]],"coord_x":[[0.0,1e-10,29.28278836530227,29.28278836540227,55.245416180855784,55.245416180955786,73.50722783412611,73.50722783422611,90.67614556785703,90.67614556795704,98.1441852306613,98.1441852307613,99.20983934707017,99.20983934717017,100.0,100.0000000001]],"filenames":["spades.fasta.txt"]}
{{ coordNGx }}
{{ coordNAx }}
{{ coordNGAx }}
{{ coordmisassemblies }}
{{ featuresInContigs }}
{{ operonsInContigs }}
[{{ num_contigs }}, {{ Largest_alignment }}, {{ Total_aligned_length }}, {{ num_misassemblies }}, {{ Misassembled_contigs_length }}, {{ num_mismatches_per_100_kbp }}, {{ num_indels_per_100_kbp }}, {{ num_N's_per_100_kbp }}, {{ Genome_fraction }}, {{ Duplication_ratio }}, {{ NGA50 }}]
{{ allMisassemblies }}
{{ krona }}
{"list_of_GC_distributions":[[[0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33.0,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0,75.0,76.0,77.0,78.0,79.0,80.0,81.0,82.0,83.0,84.0,85.0,86.0,87.0,88.0,89.0,90.0,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,2,5,3,5,14,15,9,16,16,15,19,20,32,23,36,46,45,59,55,74,82,110,130,135,205,189,228,240,306,330,312,312,333,274,283,228,190,133,91,76,65,29,29,5,10,2,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]],"reference_index":null,"list_of_GC_contigs_distributions":[[[0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100],[0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0]]],"lists_of_gc_info":null,"filenames":["spades.fasta.txt"]}
{"links_names":["View in Icarus contig browser"],"links":["icarus_viewers/contig_size_viewer.html"]}
{ "# contigs" : "is the total number of contigs in the assembly.", "Largest contig" : "is the length of the longest contig in the assembly.", "Total length" : "is the total number of bases in the assembly.", "Reference length" : "is the total number of bases in the reference.", "# contigs (>= 0 bp)" : "is the total number of contigs in the assembly that have size greater than or equal to 0 bp.", "Total length (>= 0 bp)" : "is the total number of bases in the contigs having size greater than or equal to 0 bp.", "N50" : "is the contig length such that using longer or equal length contigs produces half (50%) of the bases of the assembly. Usually there is no value that produces exactly 50%, so the technical definition is the maximum length x such that using contigs of length at least x accounts for at least 50% of the total assembly length.", "NG50" : "is the contig length such that using longer or equal length contigs produces half (50%) of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "N75" : "is the contig length such that using longer or equal length contigs produces 75% of the bases of the assembly. Usually there is no value that produces exactly 75%, so the technical definition is the maximum length x such that using contigs of length at least x accounts for at least 75% of the total assembly length.", "NG75" : "is the contig length such that using longer or equal length contigs produces 75% of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "L50" : "is the minimum number of contigs that produce half (50%) of the bases of the assembly. In other words, it's the number of contigs of length at least N50.", "LG50" : "is the minimum number of contigs that produce half (50%) of the bases of the reference genome. In other words, it's the number of contigs of length at least NG50. This metric is computed only if a reference genome is provided.", "L75" : "is the minimum number of contigs that produce 75% of the bases of the assembly. In other words, it's the number of contigs of length at least N75.", "LG75" : "is the minimum number of contigs that produce 75% of the bases of the reference genome. In other words, it's the number of contigs of length at least NG75. This metric is computed only if a reference genome is provided.", "NA50" : "is N50 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "NGA50" : "is NG50 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "NA75" : "is N75 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "NGA75" : "is NG75 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "LA50" : "is L50 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "LGA50" : "is LG50 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "LA75" : "is L75 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "LGA75" : "is LG75 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "Average %IDY" : "is the average of alignment identity percent (Nucmer measure of alignment accuracy) among all contigs.", "# misassemblies" : "is the number of positions in the assembled contigs where the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference (relocation) or they overlap on more than 1 kbp (relocation) or flanking sequences align on different strands (inversion) or different chromosomes (translocation).", "# large block misassemblies" : "is the number of misassemblies between alignments with length greater than or equal to 3 kbp and with the misassembly threshold equal to 5 kbp (instead of default 1 kbp for regular misassemblies).", "# misassembled contigs" : "is the number of contigs that contain misassembly events.", "Misassembled contigs length" : "is the number of total bases contained in all contigs that have one or more misassemblies.", "# relocations" : "is the number of relocation events among all misassembly events. Relocation is a misassembly where the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference, or they overlap by more than 1 kbp and both flanking sequences align on the same chromosome.", "# translocations" : "is the number of translocation events among all misassembly events. Translocation is a misassembly where the flanking sequences align on different chromosomes.", "# interspecies translocations" : "is the number of interspecies translocation events among all misassembly events. Interspecies translocation is a misassembly where the flanking sequences align on different references (based on alignments to the combined reference).", "# inversions" : "is the number of inversion events among all misassembly events. Inversion is a misassembly where it is not a relocation and the flanking sequences align on opposite strands of the same chromosome.", "# large relocations" : "is the number of relocation events among all large block misassemblies. Relocation is a misassembly where the left flanking sequence aligns over 5 kbp away from the right flanking sequence on the reference, or they overlap by more than 5 kbp and both flanking sequences align on the same chromosome.", "# large translocations" : "is the number of translocation events among all large block misassemblies. Translocation is a misassembly where the flanking sequences align on different chromosomes.", "# large i/s translocations" : "is the number of interspecies translocation events among all large block misassemblies. Interspecies translocation is a misassembly where the flanking sequences align on different references (based on alignments to the combined reference).", "# large inversions" : "is the number of inversion events among all large block misassemblies. Inversion is a misassembly where it is not a relocation and the flanking sequences align on opposite strands of the same chromosome.", "# local misassemblies" : "is the number of local misassemblies. We define a local misassembly breakpoint as a breakpoint that satisfies these conditions:
  1. Two or more distinct alignments cover the breakpoint.
  2. The gap between left and right flanking sequences is less than the misassembly threshold (1 kbp by default).
  3. The left and right flanking sequences both are on the same strand of the same chromosome of the reference genome.
", "# scaffold gap ext. mis." : "is the number of scaffold gap size extensive misassemblies. We define a scaffold gap size misassembly as a breakpoint where the flanking sequences are combined in a scaffold on the wrong distance. These misassemblies are not included in the total number of misassemblies. ", "# scaffold gap loc. mis." : "is the number of scaffold gap size local misassemblies. Such breakpoints satisfy the local misassembly conditions but they occur inside scaffolds. These misassemblies are not included in the total number of local misassemblies. ", "# possibly misassembled contigs": "is the number of contigs that contain large unaligned fragment (default min length is 500 bp) and thus could possibly contain interspecies translocation with unknown reference.", "# possible misassemblies" : "is the number of putative interspecies translocations in possibly misassembled contigs if each large unaligned fragment is supposed to be a fragment of unknown reference.", "# intergenomic misassemblies" : "is the number of all found and putative (possible) interspecies translocations.", "# structural variations" : "is the number of misassemblies matched with structural variations.", "# possible TEs" : "is the number of misassemblies possibly caused by transposable elements (TEs). We define a possible TE as an event that satisfies these conditions:
  1. There are two misassembly breakpoints of the same type around a short alignment (less than 7 kbp by default)
  2. The gap between two long flanking sequences on the sides of the short alignment is less than 7 kbp.
  3. The long flanking sequences both are on the same strand of the same chromosome of the reference genome.
", "# unaligned mis. contigs" : "is the number of contigs that have the number of unaligned bases more than 50% of contig length and a misassembly event in their aligned fragment. Note that such misassemblies are not counted in # misassemblies and other misassemblies statistics.", "# fully unaligned contigs" : "is the number of contigs that have no alignment to the reference sequence.", "Fully unaligned length" : "is the total number of bases contained in all fully unaligned contigs. Uncalled bases (N's) are not counted.", "# partially unaligned contigs" : "is the number of contigs that have at least one alignment to the reference sequence but also have at least one unaligned fragment of length ≥ unaligned-part-size threshold.", "Partially unaligned length" : "is the total number of unaligned bases in all partially unaligned contigs. Uncalled bases (N's) are not counted.", "# ambiguous contigs" : "is the number of contigs that have reference alignments of equal quality in multiple locations on the reference.", "Ambiguous contigs length" : "is the total number of bases contained in all ambiguous contigs.", "Genome fraction (%)" : "is the total number of aligned bases in the reference, divided by the genome size. A base in the reference genome is counted as aligned if there is at least one contig with at least one alignment to this base. Contigs from repeat regions may map to multiple places, and thus may be counted multiple times in this quantity.", "GC (%)" : "is the total number of G and C nucleotides in the assembly, divided by the total length of the assembly.", "Reference GC (%)" : "is the total number of G and C nucleotides in the reference, divided by the total length of the reference.", "# mismatches per 100 kbp" : "is the average number of mismatches per 100000 aligned bases.", "# mismatches" : "is the number of mismatches in all aligned bases.", "# indels per 100 kbp" : "is the average number of indels per 100000 aligned bases.", "# indels" : "is the number of indels in all aligned bases", "# indels (<= 5 bp)" : "is the number of indels of length less than or equal to 5 bp", "# indels (> 5 bp)" : "is the number of indels of length greater than 5 bp", "Indels length" : "is the number of total bases contained in all indels", "# genomic features" : "is the number of genomic features (genes, transcripts, CDS) in the assembly (complete and partial), based on a user-provided annotated list of gene positions in the reference genome. A feature counts as 'partially covered' if the assembly contains at least 100 bp of this feature but not the whole feature.", "# operons" : "is the number of operons in the assembly (complete and partial), based on a user-provided annotated list of operon positions in the reference genome. An operon counts as 'partially covered' if the assembly contains at least 100 bp of this operon but not the whole operon.", "# predicted genes (unique)" : "is the number of unique genes in the assembly found by a gene prediction tool.", "# predicted genes (>= 0 bp)" : "is the number of found genes having length greater than or equal to 0 bp.", "Complete BUSCO (%)" : "is the percent of BUSCO (Universal Single-Copy Ortholog) genes found in the assembly in a complete form.", "Partial BUSCO (%)" : "is the percent of BUSCO (Universal Single-Copy Ortholog) genes found in the assembly in a partial form.", "Cumulative length" : "plot shows the growth of assembly contig lengths. On the x-axis, contigs are ordered from largest (contig #1) to smallest. The y-axis gives the size of the x largest contigs in the assembly.", "Nx" : "plot shows the Nx metric value as x varies from 0 to 100. Nx is the minimum contig length y such that using contigs of length at least y accounts for at least x% of the total assembly length.", "NGx" : "plot shows the NGx metric value as x varies from 0 to 100. NGx is the minimum contig length y such that using contigs of length at least y accounts for at least x% of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "NAx" : "plot shows the NAx metric value as x varies from 0 to 100. NAx is computed similarly to Nx, but based on lengths of aligned blocks instead of contig lengths. Contigs are broken into aligned blocks at misassembly breakpoints. NAx is the minimum block length y such that using blocks of length at least y accounts for at least x% of the bases of the assembly. This metric is computed only if a reference genome is provided.", "NGAx" : "plot shows the NGAx metric value as x varies from 0 to 100.NGAx is computed similarly to NGx, but based on lengths of aligned blocks instead of contig lengths. Contigs are broken at misassembly breakpoints. NGAx is the minimum block length y such that using blocks of length at least y accounts for at least x% of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "GC content" : "plot shows the distribution of GC percentage among the contigs, i.e., the total number of bases in contigs with such GC content. Typically, the distribution is approximately Gaussian. However, for some genomes it is not Gaussian. For assembly projects with contaminants, the GC distribution of the contaminants often differs from the reference genome and may give a superposition of multiple curves with different peaks.", "Duplication ratio" : "is the total number of aligned bases in the assembly (i.e. Total length - Fully unaligned length - Partially unaligned length), divided by the total number of aligned bases in the reference (see the Genome fraction (%) metric). If the assembly contains many contigs that cover the same regions of the reference, its Duplication ratio may be much larger than 1. This may occur due to overestimating repeat multiplicities and due to small overlaps between contigs, among other reasons.", "Largest alignment" : "is the length of the largest continuous alignment in the assembly. This metric is always equal to the Largest contig metric but it can be smaller if the largest contig of the assembly contains a misassembly event.", "Total aligned length" : "is the total number of aligned bases in the assembly.", "Avg contig read support" : "is the average coverage of contigs that have large unique alignments to the reference. Read coverage is extracted from contig names (SPAdes/Velvet naming scheme only).", "# N's" : "is the total number of uncalled bases (N's) in the assembly.", "# N's per 100 kbp" : "is the average number of uncalled bases (N's) per 100000 assembly bases.", "# mapped" : "is the number of reads that mapped to the assembly.", "Mapped (%)" : "is the percent of reads that mapped to the assembly.", "# properly paired" : "is the number of reads that mapped within expected range and orientation of each other to the assembly.", "Properly paired (%)" : "is the percent of reads that mapped within expected range and orientation of each other to the assembly.", "# singletons" : "is the number of reads which do not overlap any other reads.", "Singletons (%)" : "is the percent of reads which do not overlap any other reads.", "# misjoint mates" : "is the number of reads with mate mapped to a different contig.", "Misjoint mates (%)" : "is the percent of reads with mate mapped to a different contig.", "Avg. coverage depth" : "is the average depth of coverage.", "Coverage >= 1x (%)" : "is the total number of bases with at least 1x coverage, divided by the total length of the assembly.", "# similar correct contigs" : "is the number of correct contigs similar among > 50% assemblies (see Icarus for visualization).", "# similar misassembled blocks" : "is the number of misassembled blocks similar among > 50% assemblies (see Icarus for visualization)." }