From ce8fcd84cc743e524ce739c12d03d6a8b8e66485 Mon Sep 17 00:00:00 2001 From: Kevin Lenzo Date: Tue, 14 Apr 2026 10:41:58 -0400 Subject: [PATCH 1/4] feat: add align_* beams and align_use_main_beams for forced-alignment FSG Register align_beam, align_pbeam, align_wbeam (defaults 1e-48) and align_use_main_beams (default no). fsg_search_init uses them for the _align search only; other FSG and LVCSR paths unchanged. Document CLI flags in usage_align. --- programs/pocketsphinx_main.c | 2 ++ src/config_macro.h | 28 ++++++++++++++++++++++++ src/fsg_search.c | 42 +++++++++++++++++++++++++++--------- 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/programs/pocketsphinx_main.c b/programs/pocketsphinx_main.c index 1c7a6c0cc..e53b8ccb2 100644 --- a/programs/pocketsphinx_main.c +++ b/programs/pocketsphinx_main.c @@ -753,6 +753,8 @@ usage_align(char *name) fprintf(stderr, " INPUT Audio file to align (or '-' for stdin)\n"); fprintf(stderr, " WORDS... Words to align to (will be concatenated)\n"); fprintf(stderr, "\nAlignment-specific options:\n"); + fprintf(stderr, " -align_beam FLOAT Beam for forced-alignment FSG (see also -align_pbeam, -align_wbeam)\n"); + fprintf(stderr, " -align_use_main_beams yes/no Use -beam/-pbeam/-wbeam for alignment FSG (default: no)\n"); fprintf(stderr, " -phone_align yes/no Run a second pass to align phones and print their durations\n"); fprintf(stderr, " (default: no)\n"); fprintf(stderr, " -state_align yes/no Run a second pass to align phones and states and print their\n"); diff --git a/src/config_macro.h b/src/config_macro.h index 769412ed1..9b3d0b284 100644 --- a/src/config_macro.h +++ b/src/config_macro.h @@ -61,6 +61,8 @@ POCKETSPHINX_FEAT_OPTIONS, \ POCKETSPHINX_ACMOD_OPTIONS, \ POCKETSPHINX_BEAM_OPTIONS, \ + POCKETSPHINX_ALIGN_BEAM_OPTIONS, \ + POCKETSPHINX_ALIGN_FSG_OPTIONS, \ POCKETSPHINX_SEARCH_OPTIONS, \ POCKETSPHINX_DICT_OPTIONS, \ POCKETSPHINX_NGRAM_OPTIONS, \ @@ -144,6 +146,32 @@ "3.0", \ "Weight for phoneme lookahead penalties" } \ +/** Beam widths for forced-alignment FSG (ps_set_align_text, align subcommand). */ +#define POCKETSPHINX_ALIGN_BEAM_OPTIONS \ +{ "align_beam", \ + ARG_FLOATING, \ + "1e-48", \ + "Beam width for each frame in forced-alignment FSG search " \ + "(independent of beam)" }, \ + { "align_pbeam", \ + ARG_FLOATING, \ + "1e-48", \ + "Phone-transition beam for forced-alignment FSG search " \ + "(independent of pbeam)" }, \ + { "align_wbeam", \ + ARG_FLOATING, \ + "1e-48", \ + "Word-exit beam for forced-alignment FSG search " \ + "(independent of wbeam)" } \ + +/** Toggle: use main decoder beams for forced-alignment FSG instead of align_*. */ +#define POCKETSPHINX_ALIGN_FSG_OPTIONS \ +{ "align_use_main_beams", \ + ARG_BOOLEAN, \ + "no", \ + "If yes, forced-alignment FSG uses beam, pbeam, wbeam instead of " \ + "align_beam, align_pbeam, align_wbeam" } \ + /** Options defining other parameters for tuning the search. */ #define POCKETSPHINX_SEARCH_OPTIONS \ { "compallsen", \ diff --git a/src/fsg_search.c b/src/fsg_search.c index d4db17c5f..578b90e7e 100644 --- a/src/fsg_search.c +++ b/src/fsg_search.c @@ -171,6 +171,23 @@ fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg) return n_alt; } +static void +fsg_search_beam_config(ps_config_t *config, char const *name, + float *beam, float *pbeam, float *wbeam) +{ + if (name != NULL && strcmp(name, PS_DEFAULT_ALIGN_SEARCH) == 0 + && !ps_config_bool(config, "align_use_main_beams")) { + *beam = ps_config_float(config, "align_beam"); + *pbeam = ps_config_float(config, "align_pbeam"); + *wbeam = ps_config_float(config, "align_wbeam"); + } + else { + *beam = ps_config_float(config, "beam"); + *pbeam = ps_config_float(config, "pbeam"); + *wbeam = ps_config_float(config, "wbeam"); + } +} + ps_search_t * fsg_search_init(const char *name, fsg_model_t *fsg, @@ -196,16 +213,21 @@ fsg_search_init(const char *name, fsgs->frame = -1; /* Get search pruning parameters */ - fsgs->beam_factor = 1.0f; - fsgs->beam = fsgs->beam_orig - = (int32) logmath_log(acmod->lmath, ps_config_float(config, "beam")) - >> SENSCR_SHIFT; - fsgs->pbeam = fsgs->pbeam_orig - = (int32) logmath_log(acmod->lmath, ps_config_float(config, "pbeam")) - >> SENSCR_SHIFT; - fsgs->wbeam = fsgs->wbeam_orig - = (int32) logmath_log(acmod->lmath, ps_config_float(config, "wbeam")) - >> SENSCR_SHIFT; + { + float fl_beam, fl_pbeam, fl_wbeam; + + fsgs->beam_factor = 1.0f; + fsg_search_beam_config(config, name, &fl_beam, &fl_pbeam, &fl_wbeam); + fsgs->beam = fsgs->beam_orig + = (int32) logmath_log(acmod->lmath, fl_beam) + >> SENSCR_SHIFT; + fsgs->pbeam = fsgs->pbeam_orig + = (int32) logmath_log(acmod->lmath, fl_pbeam) + >> SENSCR_SHIFT; + fsgs->wbeam = fsgs->wbeam_orig + = (int32) logmath_log(acmod->lmath, fl_wbeam) + >> SENSCR_SHIFT; + } /* LM related weights/penalties */ fsgs->lw = ps_config_float(config, "lw"); From 12ffd21d06a1018f7d007bcd1f1ef331255704b1 Mon Sep 17 00:00:00 2001 From: Kevin Lenzo Date: Tue, 14 Apr 2026 10:42:00 -0400 Subject: [PATCH 2/4] docs: document forced-alignment beam parameters Describe align_* vs main beams and align_use_main_beams in API header, README, man page, and Python set_align_text. --- README.md | 6 ++++-- cython/_pocketsphinx.pyx | 4 ++++ doxygen/pocketsphinx.1 | 7 ++++++- doxygen/pocketsphinx.1.in | 7 ++++++- include/pocketsphinx/search.h | 5 +++++ 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9ce84cd10..8a7da7b8a 100644 --- a/README.md +++ b/README.md @@ -120,8 +120,10 @@ The commands are as follows: all subsequent ones are concatenated to make the text, to avoid surprises if you forget to quote it. You are responsible for normalizing the text to remove punctuation, uppercase, centipedes, - etc. For example: - + etc. Forced alignment uses `align_beam`, `align_pbeam`, and + `align_wbeam` instead of `beam`, `pbeam`, and `wbeam`; see + `pocketsphinx help-config`. For example: + pocketsphinx align goforward.wav "go forward ten meters" By default, only word-level alignment is done. To get phone diff --git a/cython/_pocketsphinx.pyx b/cython/_pocketsphinx.pyx index d5ef2da64..83c898eb7 100644 --- a/cython/_pocketsphinx.pyx +++ b/cython/_pocketsphinx.pyx @@ -1668,6 +1668,10 @@ cdef class Decoder: segmentation in the usual manner. For phone-level alignment, see `set_alignment` and `get_alignment`. + Pruning for this pass uses ``align_beam``, ``align_pbeam``, and + ``align_wbeam`` by default; set ``align_use_main_beams`` to use the + main decoder ``beam``/``pbeam``/``wbeam`` instead. + Args: text(str): Sentence to align, as whitespace-separated words. All words must be present in the diff --git a/doxygen/pocketsphinx.1 b/doxygen/pocketsphinx.1 index 689e36d93..2fe7d6bbd 100644 --- a/doxygen/pocketsphinx.1 +++ b/doxygen/pocketsphinx.1 @@ -59,7 +59,12 @@ sequence, and write a JSON object in the same format described above. The first positional argument is the input, and all subsequent ones are concatenated to make the text, to avoid surprises if you forget to quote it. You are responsible for normalizing the text to remove -punctuation, uppercase, centipedes, etc. For example: +punctuation, uppercase, centipedes, etc. +Forced alignment uses +\fIalign_beam\fP, \fIalign_pbeam\fP, and \fIalign_wbeam\fP +instead of \fIbeam\fP, \fIpbeam\fP, and \fIwbeam\fP; defaults are +independent of LVCSR tuning (see \fBpocketsphinx help-config\fP). +For example: .EX pocketsphinx align goforward.wav "go forward ten meters" diff --git a/doxygen/pocketsphinx.1.in b/doxygen/pocketsphinx.1.in index 4ad4021dc..0f5b97f41 100644 --- a/doxygen/pocketsphinx.1.in +++ b/doxygen/pocketsphinx.1.in @@ -59,7 +59,12 @@ sequence, and write a JSON object in the same format described above. The first positional argument is the input, and all subsequent ones are concatenated to make the text, to avoid surprises if you forget to quote it. You are responsible for normalizing the text to remove -punctuation, uppercase, centipedes, etc. For example: +punctuation, uppercase, centipedes, etc. +Forced alignment uses +\fIalign_beam\fP, \fIalign_pbeam\fP, and \fIalign_wbeam\fP +instead of \fIbeam\fP, \fIpbeam\fP, and \fIwbeam\fP; defaults are +independent of LVCSR tuning (see \fBpocketsphinx help-config\fP). +For example: .EX pocketsphinx align goforward.wav "go forward ten meters" diff --git a/include/pocketsphinx/search.h b/include/pocketsphinx/search.h index 35cbff87b..62d85f078 100644 --- a/include/pocketsphinx/search.h +++ b/include/pocketsphinx/search.h @@ -336,6 +336,11 @@ int ps_add_allphone_file(ps_decoder_t *ps, const char *name, const char *path); * phoneme or state segmentations, you must subsequently call * ps_set_alignment() and re-run decoding. It's tough son, but it's life. * + * By default, pruning uses align_beam, align_pbeam, and align_wbeam + * rather than beam, pbeam, and wbeam. If align_use_main_beams is enabled, + * the main decoder beams are used instead. Defaults for align_* are tuned + * for forced alignment and are independent of LVCSR beam defaults. + * * @memberof ps_decoder_t * @param ps Decoder * @param words String containing whitespace-separated words for alignment. From 90f282686f7da455e7afce7fcb156724d63cd9a1 Mon Sep 17 00:00:00 2001 From: Kevin Lenzo Date: Tue, 14 Apr 2026 10:42:02 -0400 Subject: [PATCH 3/4] test: add test_align_fsg_beam Verify _align FSG uses align_* under asymmetric wbeam and main beams when align_use_main_beams is enabled. --- test/unit/CMakeLists.txt | 1 + test/unit/test_align_fsg_beam.c | 60 +++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 test/unit/test_align_fsg_beam.c diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 7833c19c4..59d227b60 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -40,6 +40,7 @@ set(TESTS test_vad test_vad_alloc test_word_align + test_align_fsg_beam test_endpointer test_endpointer_timestamp test_thread_local_compile diff --git a/test/unit/test_align_fsg_beam.c b/test/unit/test_align_fsg_beam.c new file mode 100644 index 000000000..5f352fe9a --- /dev/null +++ b/test/unit/test_align_fsg_beam.c @@ -0,0 +1,60 @@ +/* -*- c-basic-offset: 4 -*- */ +#include + +#include "pocketsphinx_internal.h" +#include "fsg_search_internal.h" +#include "util/hash_table.h" +#include "test_macros.h" + +int +main(int argc, char *argv[]) +{ + ps_decoder_t *ps; + ps_config_t *config; + void *search_p; + fsg_search_t *fsgs; + + (void)argc; + (void)argv; + err_set_loglevel(ERR_INFO); + /* Stock asymmetry: wbeam differs from beam; forced-align FSG must use align_* */ + TEST_ASSERT(config = + ps_config_parse_json( + NULL, + "loglevel: INFO, bestpath: false," + "hmm: \"" MODELDIR "/en-us/en-us\"," + "dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\"," + "samprate: 16000," + "wbeam: 7e-29")); + TEST_ASSERT(ps = ps_init(config)); + TEST_EQUAL(0, ps_set_align_text(ps, "go forward ten meters")); + TEST_EQUAL(0, hash_table_lookup(ps->searches, PS_DEFAULT_ALIGN_SEARCH, + &search_p)); + fsgs = (fsg_search_t *)search_p; + TEST_EQUAL(fsgs->wbeam_orig, fsgs->beam_orig); + TEST_EQUAL(fsgs->pbeam_orig, fsgs->beam_orig); + + ps_free(ps); + ps_config_free(config); + + /* With align_use_main_beams, FSG uses global wbeam (asymmetric from beam). */ + TEST_ASSERT(config = + ps_config_parse_json( + NULL, + "loglevel: ERROR, bestpath: false," + "hmm: \"" MODELDIR "/en-us/en-us\"," + "dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\"," + "samprate: 16000," + "wbeam: 7e-29," + "align_use_main_beams: yes")); + TEST_ASSERT(ps = ps_init(config)); + TEST_EQUAL(0, ps_set_align_text(ps, "go forward ten meters")); + TEST_EQUAL(0, hash_table_lookup(ps->searches, PS_DEFAULT_ALIGN_SEARCH, + &search_p)); + fsgs = (fsg_search_t *)search_p; + TEST_ASSERT(fsgs->wbeam_orig != fsgs->beam_orig); + + ps_free(ps); + ps_config_free(config); + return 0; +} From 33df974bc997d2b789d38e0b7353ce6797b772b2 Mon Sep 17 00:00:00 2001 From: Kevin Lenzo Date: Tue, 14 Apr 2026 10:48:25 -0400 Subject: [PATCH 4/4] fix: disable lattice bestpath for forced-alignment FSG hypothesis When global bestpath is enabled, fsg_search_hyp() can return the lattice bestpath string, which may be shorter than the forced transcript. The align CLI already turns bestpath off; Decoder() does not. Force fsgs->bestpath false for _align so ps_get_hyp matches the full path and cython/test/alignment_test.py::test_default_lm passes. --- src/fsg_search.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/fsg_search.c b/src/fsg_search.c index 578b90e7e..4ee5a17d6 100644 --- a/src/fsg_search.c +++ b/src/fsg_search.c @@ -264,6 +264,11 @@ fsg_search_init(const char *name, if (ps_config_bool(config, "bestpath")) fsgs->bestpath = TRUE; #endif + /* Forced-alignment FSG: hyp() must list the full transcript. Lattice + * bestpath can return a shorter string than the Viterbi backtrace (see + * pocketsphinx_main align(), which disables bestpath). */ + if (name != NULL && strcmp(name, PS_DEFAULT_ALIGN_SEARCH) == 0) + fsgs->bestpath = FALSE; if (fsg_search_reinit(ps_search_base(fsgs), ps_search_dict(fsgs),