Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ classifiers = [
"Programming Language :: Python :: 3",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Bio-Informatics"
"Topic :: Scientific/Engineering :: Bio-Informatics",
]
dynamic = ["version", "readme", "dependencies"]

Expand All @@ -28,7 +28,7 @@ readme = { file = ["README.md"], content-type = "text/markdown" }
dependencies = { file = ["requirements.txt"] }

[tool.setuptools]
package-dir = { "" = "src"}
package-dir = { "" = "src" }

[tool.setuptools.packages.find]
where = ["src"]
Expand Down Expand Up @@ -95,7 +95,14 @@ max-line-length = 88

[tool.pylint."messages control"]
disable = [
"R1731" # consider-using-max-builtin
"R1731", # consider-using-max-builtin
"E0401", # import-error
]

[tool.pylint."*.pyi"]
disable = [
"W0613", # unused-argument
"W0231", # super-init-not-called
]

[tool.ruff]
Expand Down
8 changes: 8 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@
# extra_link_args=[openmp],
)
)
extensions.append(
Extension(
"designer_dna._oligonucleotides",
["src/designer_dna/_oligonucleotides.pyx"],
include_dirs=["src/designer_dna/headers"],
language="c++",
)
)


# Add cython directive to specify python version target
Expand Down
48 changes: 48 additions & 0 deletions src/designer_dna/_oligonucleotides.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# BSD 3-Clause License
#
# Copyright (c) 2025, Spill-Tea
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""Oligonucleotide functions with the help of C++."""

def manacher(sequence: str, dna: bool = ...) -> str:
"""Find the longest palindromic substring within a nucleotide sequence.

Args:
sequence (str): Nucleotide sequence string.
dna (bool): Sequence is DNA, else RNA.

Returns:
(str): longest palindromic substring within sequence.

Notes:
* This is a cython/c++ implementation of the O(n) Manacher's algorithm.
* This algorithm is typically slower than the O(nlogn) palindrome function for
strings up to 2^23 characters (not benchmarked beyond this limit).
* This function here is primarily here for demonstration purposes.

"""
127 changes: 127 additions & 0 deletions src/designer_dna/_oligonucleotides.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# BSD 3-Clause License
#
# Copyright (c) 2025, Spill-Tea
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# distutils: language = c++

"""Oligonucleotide functions with the help of C++."""
from narray cimport NumericArray
from designer_dna._oligos cimport v_complement
from common cimport StringView, str_to_view
from libc.stdlib cimport free


cdef extern from "Python.h":
str PyUnicode_Join(str, str)


cdef inline void _compute(
char* s,
char* c,
NumericArray[int]* arr,
ssize_t n,
):
cdef:
ssize_t mirror, a, b, i, stemp, center = 0, radius = 0
int temp, zero = 0

for i in range(1, n - 1):
# parity: 0 1 0 1 0 1 0 1 0
# string: ^ # A # N # T # $
# index: 0 1 2 3 4 5 6 7 8

# skip odd length palindromes (when index is centered on an even position)
# if character from ref seq and complement do not match
if i % 2 == 0 and s[i] != c[i]:
arr[0][i] = zero
continue

# Look ahead at mirror position
mirror = 2 * center - i
if i < radius:
temp = <int> (radius - i)
arr[0][i] = min(temp, arr[0][mirror])
else:
arr[0][i] = zero

# Center expansion method
stemp = <ssize_t> arr[0][i]
a = i + 1 + stemp
b = i - 1 - stemp
while s[a] == c[b] and s[b] == c[a]:
arr[0][i] += 1
a += 1
b -= 1

stemp = <ssize_t> arr[0][i]
if i + stemp > radius:
center = i
radius = i + stemp


cpdef str manacher(str sequence, bint dna = True):
"""Find the longest palindromic substring within a nucleotide sequence.

Args:
sequence (str): Nucleotide sequence string.
dna (bool): Sequence is DNA, else RNA.

Returns:
(str): longest palindromic substring within sequence.

Notes:
* This is a cython/c++ implementation of the O(n) Manacher's algorithm.
* This algorithm is typically slower than the O(nlogn) palindrome function for
strings up to 2^23 characters (not benchmarked beyond this limit).
* This function here is primarily here for demonstration purposes.

"""
cdef:
str k = PyUnicode_Join("#", f"^{sequence}$")
StringView ref = str_to_view(k)
StringView com = str_to_view(k)
NumericArray[int]* arr
ssize_t i, center = 0
int radius = 0

arr = new NumericArray[int](ref.size)
arr.fill(radius)
v_complement(&com, dna)

_compute(ref.ptr, com.ptr, arr, <ssize_t> ref.size)
free(ref.ptr)
free(com.ptr)

# Enumerate, capturing index (center) and value of max (radius)
for i in range(1, ref.size - 1):
if arr[0][i] > radius:
radius = arr[0][i]
center = i
del arr

return k[center - radius + 1: center + radius: 2]
40 changes: 40 additions & 0 deletions src/designer_dna/_oligos.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# BSD 3-Clause License
#
# Copyright (c) 2025, Spill-Tea
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from common cimport StringView

cdef:
void c_reverse(char*, Py_ssize_t)
void v_reverse(StringView*)

void c_complement(char*, Py_ssize_t, unsigned char*)
void v_complement(StringView*, bint)

void c_reverse_complement(char*, Py_ssize_t, unsigned char*)
void v_reverse_complement(StringView*, bint)
4 changes: 1 addition & 3 deletions src/designer_dna/_oligos.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# pylint: disable=W0613

"""Cythonized oligonucleotide functions."""

def reverse(sequence: str) -> str:
Expand Down Expand Up @@ -70,7 +68,7 @@ def reverse_complement(sequence: str, dna: bool = ...) -> str:
"""Reverse complement a nucleotide sequence.

Args:
sequence (str): Nucelotide sequence string.
sequence (str): Nucleotide sequence string.
dna (bool): Sequence is DNA, else RNA.

Returns:
Expand Down
Loading
Loading