diff --git a/src/designer_dna/__init__.py b/src/designer_dna/__init__.py index 9e43c03..fb460af 100644 --- a/src/designer_dna/__init__.py +++ b/src/designer_dna/__init__.py @@ -29,4 +29,4 @@ """DesignerDNA Project.""" -__version__: str = "v0.0.1" +__version__: str = "v0.0.2" diff --git a/src/designer_dna/_oligos.pyi b/src/designer_dna/_oligos.pyi index 428a8ce..f5c86dc 100644 --- a/src/designer_dna/_oligos.pyi +++ b/src/designer_dna/_oligos.pyi @@ -57,6 +57,23 @@ def reverse(sequence: str) -> str: """ +def gc(sequence: str) -> float: + """Compute the gc content of a nucleotide sequence. + + Args: + sequence (str): Nucleotide sequence string. + + Returns: + (float) gc sequence fraction. + + Examples: + .. code-block:: python + + gc("ATATAT") == 0.0 + gc("GGAA") == 0.5 + + """ + def m_complement(sequence: array[int], dna: bool = ...) -> None: """Complement a nucleotide sequence. diff --git a/src/designer_dna/_oligos.pyx b/src/designer_dna/_oligos.pyx index 96c9427..cee7759 100644 --- a/src/designer_dna/_oligos.pyx +++ b/src/designer_dna/_oligos.pyx @@ -33,11 +33,11 @@ from libc.string cimport memcpy from libc.stdlib cimport free, malloc -from common cimport ( - StringView, - str_to_view, - to_str -) +from common cimport StringView, str_to_view, to_str + +cdef extern from "Python.h": + Py_ssize_t PyUnicode_GET_LENGTH(object) + int PyUnicode_Count(object, object, Py_ssize_t, Py_ssize_t) cdef extern from "oligos.h": const unsigned char DNA[0x100] @@ -109,6 +109,34 @@ cpdef str reverse(str sequence): return sequence[::-1] +cpdef float gc(str sequence) noexcept: + """Compute the gc content of a nucleotide sequence. + + Args: + sequence (str): Nucleotide sequence string. + + Returns: + (float) gc sequence fraction. + + Examples: + .. code-block:: python + + gc("ATATAT") == 0.0 + gc("GGAA") == 0.5 + + """ + cdef int length = PyUnicode_GET_LENGTH(sequence) + + if length == 0: + return 0.0 + + cdef: + int g = PyUnicode_Count(sequence, "G", 0, length) + int c = PyUnicode_Count(sequence, "C", 0, length) + + return (g + c) / length + + cdef inline void _c_complement( unsigned char* sequence, Py_ssize_t length, @@ -153,10 +181,7 @@ cdef void c_complement( (void) Complement sequence in place. """ - if dna: - _c_complement(sequence, length, &DNA[0]) - else: - _c_complement(sequence, length, &RNA[0]) + _c_complement(sequence, length, &DNA[0] if dna else &RNA[0]) cdef inline void v_complement(StringView* view, bint dna) noexcept: @@ -259,10 +284,7 @@ cdef void c_reverse_complement( (void) Complement sequence in place. """ - if dna: - _c_reverse_complement(sequence, length, &DNA[0]) - else: - _c_reverse_complement(sequence, length, &RNA[0]) + _c_reverse_complement(sequence, length, &DNA[0] if dna else &RNA[0]) cdef inline void v_reverse_complement(StringView* view, bint dna) noexcept: diff --git a/src/designer_dna/oligos.py b/src/designer_dna/oligos.py index 2e5b07b..6ba7671 100644 --- a/src/designer_dna/oligos.py +++ b/src/designer_dna/oligos.py @@ -32,6 +32,7 @@ from ._oligonucleotides import manacher from ._oligos import ( complement, + gc, nrepeats, palindrome, reverse, @@ -43,6 +44,7 @@ __all__ = [ "complement", "complement_py", + "gc", "manacher", "nrepeats", "nrepeats_py", @@ -300,3 +302,10 @@ def nrepeats_py(sequence: str, n: int) -> int: previous = phase return max_val + + +def gc_py(sequence: str) -> float: + """Calculate gc content of a nucleotide sequence.""" + length: float = float(len(sequence)) + + return length and (sequence.count("G") + sequence.count("C")) / length diff --git a/tests/unit/test_oligos.py b/tests/unit/test_oligos.py index cf8414f..1766e67 100644 --- a/tests/unit/test_oligos.py +++ b/tests/unit/test_oligos.py @@ -308,3 +308,26 @@ def test_palindromes( assert result == expected, f"Unexpected palindrome: {result}" if result: assert result == oligos.reverse_complement(result) + + +@pytest.mark.parametrize( + "function", + [ + _oligos.gc, + oligos.gc_py, + ], +) +@pytest.mark.parametrize( + ["seq", "expected"], + [ + ("", 0.0), + ("A", 0.0), + ("G", 1.0), + ("GC", 1.0), + ("ATGC", 0.5), + ], +) +def test_gc(seq: str, expected: float, function: Callable[[str], float]) -> None: + """Confirm gc content is calculated correctly.""" + result = function(seq) + assert result == expected, "Unexpected GC content calculation result."