diff --git a/pyproject.toml b/pyproject.toml index 195ff8d..3e82dcb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Bio-Informatics" + "Topic :: Scientific/Engineering :: Bio-Informatics", ] dynamic = ["version", "readme", "dependencies"] @@ -28,7 +28,7 @@ readme = { file = ["README.md"], content-type = "text/markdown" } dependencies = { file = ["requirements.txt"] } [tool.setuptools] -package-dir = { "" = "src"} +package-dir = { "" = "src" } [tool.setuptools.packages.find] where = ["src"] @@ -95,7 +95,14 @@ max-line-length = 88 [tool.pylint."messages control"] disable = [ - "R1731" # consider-using-max-builtin + "R1731", # consider-using-max-builtin + "E0401", # import-error +] + +[tool.pylint."*.pyi"] +disable = [ + "W0613", # unused-argument + "W0231", # super-init-not-called ] [tool.ruff] diff --git a/setup.py b/setup.py index 9149af7..19b604e 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,14 @@ # extra_link_args=[openmp], ) ) +extensions.append( + Extension( + "designer_dna._oligonucleotides", + ["src/designer_dna/_oligonucleotides.pyx"], + include_dirs=["src/designer_dna/headers"], + language="c++", + ) +) # Add cython directive to specify python version target diff --git a/src/designer_dna/_oligonucleotides.pyi b/src/designer_dna/_oligonucleotides.pyi new file mode 100644 index 0000000..6bb0a89 --- /dev/null +++ b/src/designer_dna/_oligonucleotides.pyi @@ -0,0 +1,48 @@ +# BSD 3-Clause License +# +# Copyright (c) 2025, Spill-Tea +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Oligonucleotide functions with the help of C++.""" + +def manacher(sequence: str, dna: bool = ...) -> str: + """Find the longest palindromic substring within a nucleotide sequence. + + Args: + sequence (str): Nucleotide sequence string. + dna (bool): Sequence is DNA, else RNA. + + Returns: + (str): longest palindromic substring within sequence. + + Notes: + * This is a cython/c++ implementation of the O(n) Manacher's algorithm. + * This algorithm is typically slower than the O(nlogn) palindrome function for + strings up to 2^23 characters (not benchmarked beyond this limit). + * This function here is primarily here for demonstration purposes. + + """ diff --git a/src/designer_dna/_oligonucleotides.pyx b/src/designer_dna/_oligonucleotides.pyx new file mode 100644 index 0000000..f9bf877 --- /dev/null +++ b/src/designer_dna/_oligonucleotides.pyx @@ -0,0 +1,127 @@ +# BSD 3-Clause License +# +# Copyright (c) 2025, Spill-Tea +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# distutils: language = c++ + +"""Oligonucleotide functions with the help of C++.""" +from narray cimport NumericArray +from designer_dna._oligos cimport v_complement +from common cimport StringView, str_to_view +from libc.stdlib cimport free + + +cdef extern from "Python.h": + str PyUnicode_Join(str, str) + + +cdef inline void _compute( + char* s, + char* c, + NumericArray[int]* arr, + ssize_t n, +): + cdef: + ssize_t mirror, a, b, i, stemp, center = 0, radius = 0 + int temp, zero = 0 + + for i in range(1, n - 1): + # parity: 0 1 0 1 0 1 0 1 0 + # string: ^ # A # N # T # $ + # index: 0 1 2 3 4 5 6 7 8 + + # skip odd length palindromes (when index is centered on an even position) + # if character from ref seq and complement do not match + if i % 2 == 0 and s[i] != c[i]: + arr[0][i] = zero + continue + + # Look ahead at mirror position + mirror = 2 * center - i + if i < radius: + temp = (radius - i) + arr[0][i] = min(temp, arr[0][mirror]) + else: + arr[0][i] = zero + + # Center expansion method + stemp = arr[0][i] + a = i + 1 + stemp + b = i - 1 - stemp + while s[a] == c[b] and s[b] == c[a]: + arr[0][i] += 1 + a += 1 + b -= 1 + + stemp = arr[0][i] + if i + stemp > radius: + center = i + radius = i + stemp + + +cpdef str manacher(str sequence, bint dna = True): + """Find the longest palindromic substring within a nucleotide sequence. + + Args: + sequence (str): Nucleotide sequence string. + dna (bool): Sequence is DNA, else RNA. + + Returns: + (str): longest palindromic substring within sequence. + + Notes: + * This is a cython/c++ implementation of the O(n) Manacher's algorithm. + * This algorithm is typically slower than the O(nlogn) palindrome function for + strings up to 2^23 characters (not benchmarked beyond this limit). + * This function here is primarily here for demonstration purposes. + + """ + cdef: + str k = PyUnicode_Join("#", f"^{sequence}$") + StringView ref = str_to_view(k) + StringView com = str_to_view(k) + NumericArray[int]* arr + ssize_t i, center = 0 + int radius = 0 + + arr = new NumericArray[int](ref.size) + arr.fill(radius) + v_complement(&com, dna) + + _compute(ref.ptr, com.ptr, arr, ref.size) + free(ref.ptr) + free(com.ptr) + + # Enumerate, capturing index (center) and value of max (radius) + for i in range(1, ref.size - 1): + if arr[0][i] > radius: + radius = arr[0][i] + center = i + del arr + + return k[center - radius + 1: center + radius: 2] diff --git a/src/designer_dna/_oligos.pxd b/src/designer_dna/_oligos.pxd new file mode 100644 index 0000000..0eff996 --- /dev/null +++ b/src/designer_dna/_oligos.pxd @@ -0,0 +1,40 @@ +# BSD 3-Clause License +# +# Copyright (c) 2025, Spill-Tea +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from common cimport StringView + +cdef: + void c_reverse(char*, Py_ssize_t) + void v_reverse(StringView*) + + void c_complement(char*, Py_ssize_t, unsigned char*) + void v_complement(StringView*, bint) + + void c_reverse_complement(char*, Py_ssize_t, unsigned char*) + void v_reverse_complement(StringView*, bint) diff --git a/src/designer_dna/_oligos.pyi b/src/designer_dna/_oligos.pyi index 9d5f5e8..4710980 100644 --- a/src/designer_dna/_oligos.pyi +++ b/src/designer_dna/_oligos.pyi @@ -27,8 +27,6 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# pylint: disable=W0613 - """Cythonized oligonucleotide functions.""" def reverse(sequence: str) -> str: @@ -70,7 +68,7 @@ def reverse_complement(sequence: str, dna: bool = ...) -> str: """Reverse complement a nucleotide sequence. Args: - sequence (str): Nucelotide sequence string. + sequence (str): Nucleotide sequence string. dna (bool): Sequence is DNA, else RNA. Returns: diff --git a/src/designer_dna/_oligos.pyx b/src/designer_dna/_oligos.pyx index d222d2d..0ab3deb 100644 --- a/src/designer_dna/_oligos.pyx +++ b/src/designer_dna/_oligos.pyx @@ -32,11 +32,6 @@ from libc.stdlib cimport free, malloc -cdef extern from "Python.h": - Py_ssize_t PyUnicode_GET_LENGTH(object) - bytes PyUnicode_AsUTF8String(object) - Py_ssize_t PyBytes_GET_SIZE(object) - from common cimport ( StringView, str_to_view, @@ -63,6 +58,11 @@ cdef inline void c_reverse(char* seq, Py_ssize_t length) noexcept: seq[start], seq[end] = seq[end], seq[start] +cdef inline void v_reverse(StringView* view) noexcept: + """Handle reverse in place on StringView directly.""" + c_reverse(view[0].ptr, view[0].size) + + cpdef str reverse(str sequence): """Reverse a nucleotide sequence. @@ -82,13 +82,17 @@ cpdef str reverse(str sequence): return sequence[::-1] -cdef void c_complement(char* sequence, Py_ssize_t length, unsigned char[] table): +cdef void c_complement( + char* sequence, + Py_ssize_t length, + unsigned char* table +) noexcept: """Complement sequence C string in place. Args: seq (char*): buffer sequence. length (Py_ssize_t): length of seq. - table (char[]): translation table. + table (unsigned char*): translation table. """ cdef: @@ -103,12 +107,12 @@ cdef void c_complement(char* sequence, Py_ssize_t length, unsigned char[] table) sequence[idx] = table[ sequence[idx]] -cdef void v_complement(StringView view, bint dna): - """Handle complement on StringView directly, in place.""" +cdef inline void v_complement(StringView* view, bint dna) noexcept: + """Handle complement in place on StringView directly.""" if dna: - c_complement(view.ptr, view.size, DNA) + c_complement(view[0].ptr, view[0].size, &DNA[0]) else: - c_complement(view.ptr, view.size, RNA) + c_complement(view[0].ptr, view[0].size, &RNA[0]) cpdef str complement(str sequence, bint dna = True): @@ -129,7 +133,7 @@ cpdef str complement(str sequence, bint dna = True): """ cdef StringView view = str_to_view(sequence) - v_complement(view, dna) + v_complement(&view, dna) return to_str(view) @@ -137,14 +141,14 @@ cpdef str complement(str sequence, bint dna = True): cdef void c_reverse_complement( char* sequence, Py_ssize_t length, - unsigned char[] table -): + unsigned char* table +) noexcept: """Reverse complement sequence C string in place. Args: sequence (char*): buffer pointer to nucleotide char sequence. length (Py_ssize_t): length of seq. - table (char[]): translation table. + table (unsigned char*): translation table. """ cdef: @@ -162,11 +166,19 @@ cdef void c_reverse_complement( sequence[0] = table[ sequence[0]] +cdef inline void v_reverse_complement(StringView* view, bint dna) noexcept: + """Handle reverse complement in place on StringView directly.""" + if dna: + c_reverse_complement(view[0].ptr, view[0].size, &DNA[0]) + else: + c_reverse_complement(view[0].ptr, view[0].size, &RNA[0]) + + cpdef str reverse_complement(str sequence, bint dna = True): """Reverse complement a nucleotide sequence. Args: - sequence (str): Nucelotide sequence string. + sequence (str): Nucleotide sequence string. dna (bool): Sequence is DNA, else RNA. Returns: @@ -180,16 +192,12 @@ cpdef str reverse_complement(str sequence, bint dna = True): """ cdef StringView view = str_to_view(sequence) - - if dna: - c_reverse_complement(view.ptr, view.size, DNA) - else: - c_reverse_complement(view.ptr, view.size, RNA) + v_reverse_complement(&view, dna) return to_str(view) -cdef void _center( +cdef inline void _center( char* seq, char* comp, Py_ssize_t* left, @@ -204,6 +212,21 @@ cdef void _center( left[0] += 1 +cdef inline void _update_bounds( + Py_ssize_t left, + Py_ssize_t right, + Py_ssize_t* current, + Py_ssize_t* length, + Py_ssize_t* start, + Py_ssize_t* end +) noexcept: + current[0] = right - left + if current[0] > length[0]: + length[0] = current[0] + start[0] = left + end[0] = right + + cpdef str palindrome(str sequence, bint dna = True): """Find the longest palindromic substring within a nucleotide sequence. @@ -233,18 +256,14 @@ cpdef str palindrome(str sequence, bint dna = True): StringView com = str_to_view(sequence) Py_ssize_t i, left, right, current, length = 0, start = 0, end = 0 - v_complement(com, dna) + v_complement(&com, dna) for i in range(seq.size - 1): # Check even length palindromes first (more common for ATGC based sequences) left = i right = i + 1 _center(seq.ptr, com.ptr, &left, &right, seq.size) - current = right - left - if current > length: - length = current - start = left - end = right + _update_bounds(left, right, ¤t, &length, &start, &end) # Only check odd length palindromes in case of (center) degenerate bases if seq.ptr[i] != com.ptr[i]: @@ -253,11 +272,7 @@ cpdef str palindrome(str sequence, bint dna = True): left = i - 1 right = i + 1 _center(seq.ptr, com.ptr, &left, &right, seq.size) - current = right - left - if current > length: - length = current - start = left - end = right + _update_bounds(left, right, ¤t, &length, &start, &end) free(seq.ptr) free(com.ptr) @@ -300,7 +315,12 @@ cpdef int stretch(str sequence): return longest -cdef inline bint _compare(char* p, char* q, Py_ssize_t start, Py_ssize_t end): +cdef inline bint _compare( + char* p, + char* q, + Py_ssize_t start, + Py_ssize_t end +) noexcept: """Awkward slice comparison between two different size chars.""" cdef: Py_ssize_t j, count = 0 @@ -314,7 +334,7 @@ cdef inline bint _compare(char* p, char* q, Py_ssize_t start, Py_ssize_t end): cdef inline void _assign(char* src, char* dest, Py_ssize_t start, Py_ssize_t end): - """Overcome assigning a substring slice to another char variable.""" + """Overcome slice assignment between two char variables of different sizes.""" cdef: Py_ssize_t j, count = 0 diff --git a/src/designer_dna/headers/common.pxd b/src/designer_dna/headers/common.pxd index 6f4003c..38b68cf 100644 --- a/src/designer_dna/headers/common.pxd +++ b/src/designer_dna/headers/common.pxd @@ -41,7 +41,7 @@ cdef extern from "Python.h": bytes PyBytes_FromStringAndSize(char*, Py_ssize_t) -cdef struct StringView: +ctypedef struct StringView: char* ptr Py_ssize_t size bint origin diff --git a/src/designer_dna/headers/narray.pxd b/src/designer_dna/headers/narray.pxd new file mode 100644 index 0000000..cfbecaa --- /dev/null +++ b/src/designer_dna/headers/narray.pxd @@ -0,0 +1,41 @@ +# BSD 3-Clause License +# +# Copyright (c) 2025, Spill-Tea +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# distutils: language = c++ +"""Common data structures.""" + +cdef extern from "numeric_array.hpp": + cdef cppclass NumericArray[T]: + NumericArray(size_t) + T& operator[](size_t) + T get(size_t) + void set(size_t, T) + void fill(T) + NumericArray[T] slice(size_t, size_t) + size_t length() const diff --git a/src/designer_dna/headers/numeric_array.hpp b/src/designer_dna/headers/numeric_array.hpp new file mode 100644 index 0000000..a69237e --- /dev/null +++ b/src/designer_dna/headers/numeric_array.hpp @@ -0,0 +1,108 @@ +/* + * BSD 3-Clause License + * + * Copyright (c) 2025, Spill-Tea + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NUMERIC_ARRAY_HPP +#define NUMERIC_ARRAY_HPP + +#include +#include +#include + +template +class NumericArray { +public: + NumericArray(std::size_t size); + ~NumericArray(); + + T& operator[](std::size_t index); + T get(std::size_t index); + void set(std::size_t index, T& value); + void fill(T& value); + NumericArray slice(std::size_t start, std::size_t end); + std::size_t length() const; + +private: + T* data; + std::size_t size_; +}; + +template +NumericArray::NumericArray(std::size_t size) + : size_(size) { + if (size_ == 0) throw std::invalid_argument("Size must be > 0"); + data = new T[size_]; +} + +template +NumericArray::~NumericArray() { + delete[] data; +} + +template +T& NumericArray::operator[](std::size_t index) { + if (index >= size_) throw std::out_of_range("Index out of range"); + return data[index]; +} + +template +T NumericArray::get(std::size_t index) { + if (index >= size_) throw std::out_of_range("Index out of range"); + return data[index]; +} + +template +void NumericArray::set(std::size_t index, T& value) { + if (index >= size_) throw std::out_of_range("Index out of range"); + data[index] = value; +} + +template +void NumericArray::fill(T& value) { + for (std::size_t i = 0; i < size_; ++i) + data[i] = value; +} + +template +NumericArray NumericArray::slice(std::size_t start, std::size_t end) { + if (start > end || end > size_) + throw std::out_of_range("Invalid slice range"); + NumericArray result(end - start); + for (std::size_t i = start; i < end; ++i) + result.set(i - start, data[i]); + return result; +} + +template +std::size_t NumericArray::length() const { + return size_; +} + +#endif // NUMERIC_ARRAY_HPP diff --git a/src/designer_dna/oligos.py b/src/designer_dna/oligos.py index 8cfb130..f97cb98 100644 --- a/src/designer_dna/oligos.py +++ b/src/designer_dna/oligos.py @@ -29,6 +29,7 @@ """Common utility functions to work with and analyze oligonucleotide sequences.""" +from ._oligonucleotides import manacher from ._oligos import ( complement, nrepeats, @@ -42,6 +43,7 @@ __all__ = [ "complement", "complement_py", + "manacher", "nrepeats", "nrepeats_py", "palindrome", diff --git a/tests/unit/test_oligos.py b/tests/unit/test_oligos.py index a3a31f5..cf588eb 100644 --- a/tests/unit/test_oligos.py +++ b/tests/unit/test_oligos.py @@ -190,6 +190,7 @@ def test_nrepeats( [ oligos.palindrome, oligos.palindrome_py, + oligos.manacher, ], ) @pytest.mark.parametrize( diff --git a/tox.ini b/tox.ini index e3b49d9..8280b6d 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = description = Run Unit Tests commands_pre = {envpython} --version - python -c 'import pathlib; pathlib.Path("{env_site_packages_dir}/cov.pth").write_text("import coverage; coverage.process_startup()")' + {envpython} -c 'import pathlib; pathlib.Path("{env_site_packages_dir}/cov.pth").write_text("import coverage; coverage.process_startup()")' [testenv:coverage] description = Report Code Coverage