Quality of sequence reads
=========================

This script plots the sequencing quality scores from an FASTQ file
along with the sequence (base calls).

.. image-sg:: /examples/gallery/sequence/sequencing/images/sphx_glr_read_quality_001.png
   :alt: read quality
   :srcset: /examples/gallery/sequence/sequencing/images/sphx_glr_read_quality_001.png
   :class: sphx-glr-single-img

.. code-block:: Python

    # Code source: Patrick Kunzmann
    # License: BSD 3 clause

    from io import StringIO
    import matplotlib.pyplot as plt
    import numpy as np
    import biotite
    import biotite.sequence.io.fastq as fastq

    # Sample FASTQ file from https://en.wikipedia.org/wiki/FASTQ_format
    fastq_content = StringIO("""
    @SEQ_ID
    GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
    +
    !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
    """)

    fastq_file = fastq.FastqFile.read(fastq_content, offset="Sanger")
    sequence, scores = fastq.get_sequence(fastq_file, "SEQ_ID")

    figure, ax = plt.subplots(figsize=(8.0, 2.0))
    ax.bar(
        x=np.arange(len(sequence)),
        height=scores,
        color=biotite.colors["orange"],
        width=1.0,
        linewidth=1,
        edgecolor="white",
    )
    # -1 to put space between Y-axis and sequence
    ax.set_xlim(-1, len(sequence))
    # The range of Phred scores
    ax.set_ylim(0, 40)
    ax.set_ylabel("Phred score")
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    # Show sequence as X-axis ticks
    ax.set_xticks(np.arange(len(sequence)))
    ax.set_xticklabels(sequence.symbols)
    ax.xaxis.set_ticks_position("none")

    figure.tight_layout()
    plt.show()