Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions singlem/diamond_spkg_searcher.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import os
import logging
import itertools
import sys
import threading
import time
import extern

from subprocess import Popen, PIPE
Expand Down Expand Up @@ -49,6 +53,14 @@ def run_diamond(self, hmms, forward_read_files, reverse_read_files, performance_

return (fwds, revs)

def _animation_thread(self, filename, stop_event):
'''Thread that displays a spinner animation while DIAMOND is running'''
spinner = itertools.cycle(['|', '/', '-', '\\'])
while not stop_event.is_set():
sys.stderr.write(f"\rFiltering {filename} {next(spinner)}")
sys.stderr.flush()
time.sleep(0.1)

def _prefilter(self, diamond_database, read_files, is_reverse_reads, performance_parameters, sample_names, min_orf_length, context_window):
'''Find all reads that match the DIAMOND database in the
singlem_package database.
Expand Down Expand Up @@ -105,8 +117,18 @@ def _prefilter(self, diamond_database, read_files, is_reverse_reads, performance
cmd.extend(performance_parameters.split())
logging.debug(' '.join(cmd))

logging.info(f"Filtering {os.path.basename(file)}")

best_hits = {}
query_sequence_lengths = {}
# Start animation thread
stop_animation = threading.Event()
animation_thread = threading.Thread(
target=self._animation_thread,
args=(os.path.basename(file), stop_animation),
daemon=True
)
animation_thread.start()
# using Popen to stream the output
with Popen(cmd, stdout=PIPE, stderr=PIPE, text=True) as proc:
seen_full_qseqs = set()
Expand Down Expand Up @@ -171,6 +193,11 @@ def _prefilter(self, diamond_database, read_files, is_reverse_reads, performance
full_qseq_f.write(f'>{qseqid}\n{qseq_full_seq}\n')
seen_full_qseqs.add(qseqid)

# Stop animation and clear the line
stop_animation.set()
animation_thread.join(timeout=1)
sys.stderr.write('\r' + ' ' * 80 + '\r')
sys.stderr.flush()

# check for DIAMOND errors
stderr_output = proc.stderr.read()
Expand All @@ -184,6 +211,7 @@ def _prefilter(self, diamond_database, read_files, is_reverse_reads, performance
raise extern.ExternCalledProcessError(proc, cmd)

diamond_results.append(DiamondSearchResult(fasta_path, full_qseq_fasta_path, best_hits, query_sequence_lengths))
logging.info(f"Found {len(best_hits)} hits for {os.path.basename(file)}")

return diamond_results

Expand Down
2 changes: 1 addition & 1 deletion singlem/metagenome_otu_finder.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def find_windowed_sequences(self,

def _find_lower_case_columns(self, protein_alignment):
lower_cases = [False]*len(protein_alignment[0].seq)
lower_case_chars = re.compile(r'[a-z]')
lower_case_chars = re.compile(r'[a-z\*]')
for pro in protein_alignment:
for i, aa in enumerate(pro.seq):
if lower_case_chars.match(aa):
Expand Down
Loading
Loading