Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/designer_dna/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@

"""DesignerDNA Project."""

__version__: str = "v0.0.1"
__version__: str = "v0.0.2"
17 changes: 17 additions & 0 deletions src/designer_dna/_oligos.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,23 @@ def reverse(sequence: str) -> str:

"""

def gc(sequence: str) -> float:
"""Compute the gc content of a nucleotide sequence.

Args:
sequence (str): Nucleotide sequence string.

Returns:
(float) gc sequence fraction.

Examples:
.. code-block:: python

gc("ATATAT") == 0.0
gc("GGAA") == 0.5

"""

def m_complement(sequence: array[int], dna: bool = ...) -> None:
"""Complement a nucleotide sequence.

Expand Down
48 changes: 35 additions & 13 deletions src/designer_dna/_oligos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@
from libc.string cimport memcpy
from libc.stdlib cimport free, malloc

from common cimport (
StringView,
str_to_view,
to_str
)
from common cimport StringView, str_to_view, to_str

cdef extern from "Python.h":
Py_ssize_t PyUnicode_GET_LENGTH(object)
int PyUnicode_Count(object, object, Py_ssize_t, Py_ssize_t)

cdef extern from "oligos.h":
const unsigned char DNA[0x100]
Expand Down Expand Up @@ -109,6 +109,34 @@ cpdef str reverse(str sequence):
return sequence[::-1]


cpdef float gc(str sequence) noexcept:
"""Compute the gc content of a nucleotide sequence.

Args:
sequence (str): Nucleotide sequence string.

Returns:
(float) gc sequence fraction.

Examples:
.. code-block:: python

gc("ATATAT") == 0.0
gc("GGAA") == 0.5

"""
cdef int length = <int> PyUnicode_GET_LENGTH(sequence)

if length == 0:
return 0.0

cdef:
int g = PyUnicode_Count(sequence, "G", 0, length)
int c = PyUnicode_Count(sequence, "C", 0, length)

return (g + c) / length


cdef inline void _c_complement(
unsigned char* sequence,
Py_ssize_t length,
Expand Down Expand Up @@ -153,10 +181,7 @@ cdef void c_complement(
(void) Complement sequence in place.

"""
if dna:
_c_complement(sequence, length, &DNA[0])
else:
_c_complement(sequence, length, &RNA[0])
_c_complement(sequence, length, &DNA[0] if dna else &RNA[0])


cdef inline void v_complement(StringView* view, bint dna) noexcept:
Expand Down Expand Up @@ -259,10 +284,7 @@ cdef void c_reverse_complement(
(void) Complement sequence in place.

"""
if dna:
_c_reverse_complement(sequence, length, &DNA[0])
else:
_c_reverse_complement(sequence, length, &RNA[0])
_c_reverse_complement(sequence, length, &DNA[0] if dna else &RNA[0])


cdef inline void v_reverse_complement(StringView* view, bint dna) noexcept:
Expand Down
9 changes: 9 additions & 0 deletions src/designer_dna/oligos.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from ._oligonucleotides import manacher
from ._oligos import (
complement,
gc,
nrepeats,
palindrome,
reverse,
Expand All @@ -43,6 +44,7 @@
__all__ = [
"complement",
"complement_py",
"gc",
"manacher",
"nrepeats",
"nrepeats_py",
Expand Down Expand Up @@ -300,3 +302,10 @@ def nrepeats_py(sequence: str, n: int) -> int:
previous = phase

return max_val


def gc_py(sequence: str) -> float:
"""Calculate gc content of a nucleotide sequence."""
length: float = float(len(sequence))

return length and (sequence.count("G") + sequence.count("C")) / length
23 changes: 23 additions & 0 deletions tests/unit/test_oligos.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,26 @@ def test_palindromes(
assert result == expected, f"Unexpected palindrome: {result}"
if result:
assert result == oligos.reverse_complement(result)


@pytest.mark.parametrize(
"function",
[
_oligos.gc,
oligos.gc_py,
],
)
@pytest.mark.parametrize(
["seq", "expected"],
[
("", 0.0),
("A", 0.0),
("G", 1.0),
("GC", 1.0),
("ATGC", 0.5),
],
)
def test_gc(seq: str, expected: float, function: Callable[[str], float]) -> None:
"""Confirm gc content is calculated correctly."""
result = function(seq)
assert result == expected, "Unexpected GC content calculation result."
Loading