From 29cb0fd04a82da261a233cb3f0004bd693b10db1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jun 2025 01:53:59 +0100 Subject: [PATCH 01/16] Update ovos-plugin-manager requirement (#31) Updates the requirements on [ovos-plugin-manager](https://github.com/OpenVoiceOS/OVOS-plugin-manager) to permit the latest version. - [Release notes](https://github.com/OpenVoiceOS/OVOS-plugin-manager/releases) - [Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/blob/dev/CHANGELOG.md) - [Commits](https://github.com/OpenVoiceOS/OVOS-plugin-manager/compare/0.5.0...1.0.3) --- updated-dependencies: - dependency-name: ovos-plugin-manager dependency-version: 1.0.3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- extras.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 extras.txt diff --git a/extras.txt b/extras.txt old mode 100755 new mode 100644 index 857e1ca..69032e5 --- a/extras.txt +++ b/extras.txt @@ -1,3 +1,3 @@ -ovos-plugin-manager>=0.5.0,<1.0.0 +ovos-plugin-manager>=0.5.0,<2.0.0 ovos-utils>=0.3.5,<1.0.0 langcodes \ No newline at end of file From 67cb84239dee2d2c04c0ee7a66fd70702c1b385c Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Mon, 16 Jun 2025 00:54:10 +0000 Subject: [PATCH 02/16] Increment Version to 1.0.1a1 --- padacioso/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/padacioso/version.py b/padacioso/version.py index a3d5a94..4aed667 100644 --- a/padacioso/version.py +++ b/padacioso/version.py @@ -1,6 +1,6 @@ # START_VERSION_BLOCK VERSION_MAJOR = 1 VERSION_MINOR = 0 -VERSION_BUILD = 0 -VERSION_ALPHA = 0 +VERSION_BUILD = 1 +VERSION_ALPHA = 1 # END_VERSION_BLOCK From f2d1daf667099ca894df4c2ebc532e0052dd3098 Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Mon, 16 Jun 2025 00:54:26 +0000 Subject: [PATCH 03/16] Update Changelog --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ae4280..fe3b137 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,12 @@ # Changelog -## [1.0.0a1](https://github.com/OpenVoiceOS/padacioso/tree/1.0.0a1) (2024-10-16) +## [1.0.1a1](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a1) (2025-06-16) -[Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/0.2.4...1.0.0a1) +[Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.0...1.0.1a1) -**Breaking changes:** +**Merged pull requests:** -- feat!:pipeline factory [\#29](https://github.com/OpenVoiceOS/padacioso/pull/29) ([JarbasAl](https://github.com/JarbasAl)) +- Update ovos-plugin-manager requirement from \<1.0.0,\>=0.5.0 to \>=0.5.0,\<2.0.0 [\#31](https://github.com/OpenVoiceOS/padacioso/pull/31) ([dependabot[bot]](https://github.com/apps/dependabot)) From 2b49785007cee074f7dba9326064a4af55686327 Mon Sep 17 00:00:00 2001 From: Mike Date: Thu, 30 Oct 2025 13:00:22 -0500 Subject: [PATCH 04/16] fix: padacioso speed (#33) * fix: move to sequential instead of parallel intent calculation * feat: python gitignore * fix(unit-tests): drop EOL Python, add newer ones * fix: mark cache as dirty on all methods * Update .gitignore Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .github/workflows/unit_tests.yml | 6 +- .gitignore | 216 +++++++++++++++++++++++++++++++ padacioso/__init__.py | 102 ++++++++------- 3 files changed, 275 insertions(+), 49 deletions(-) create mode 100644 .gitignore diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 10a0954..9cc5972 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -13,12 +13,12 @@ jobs: strategy: max-parallel: 2 matrix: - python-version: [ 3.7, 3.8, 3.9, '3.10' ] + python-version: [3.9, "3.10", "3.11", "3.12", "3.13"] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install System Dependencies diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..091f922 --- /dev/null +++ b/.gitignore @@ -0,0 +1,216 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml diff --git a/padacioso/__init__.py b/padacioso/__init__.py index 82dbd6a..f0f0a46 100644 --- a/padacioso/__init__.py +++ b/padacioso/__init__.py @@ -1,4 +1,3 @@ -import concurrent.futures from typing import List, Iterator, Optional import simplematch @@ -11,11 +10,10 @@ except ImportError: import logging - LOG = logging.getLogger('padacioso') + LOG = logging.getLogger("padacioso") from difflib import SequenceMatcher - def fuzzy_match(x, against): """Perform a 'fuzzy' comparison between two strings. Returns: @@ -38,6 +36,10 @@ def __init__(self, fuzz=False, n_workers=4): self.excluded_keywords = {} self.excluded_contexts = {} + # Cache for optimization - pre-built list for fast iteration + self._intent_list = [] # Pre-built list of (intent_name, regexes) + self._cache_dirty = True # Flag to rebuild cache on next query + if "word" not in simplematch.types: LOG.debug(f"Registering `word` type") _init_sm_word_type() @@ -67,8 +69,7 @@ def add_intent(self, name: str, lines: List[str]): @param lines: list of intent regexes """ if name in self.intent_samples: - raise RuntimeError(f"Attempted to re-register existing intent: " - f"{name}") + raise RuntimeError(f"Attempted to re-register existing intent: {name}") expanded = [] for l in lines: expanded += expand_parentheses(normalize_example(l)) @@ -76,10 +77,9 @@ def add_intent(self, name: str, lines: List[str]): regexes.sort(key=len, reverse=True) self.intent_samples[name] = regexes for r in regexes: - self._cased_matchers[r] = \ - simplematch.Matcher(r, case_sensitive=True) - self._uncased_matchers[r] = \ - simplematch.Matcher(r, case_sensitive=False) + self._cased_matchers[r] = simplematch.Matcher(r, case_sensitive=True) + self._uncased_matchers[r] = simplematch.Matcher(r, case_sensitive=False) + self._cache_dirty = True # Mark cache as needing rebuild def remove_intent(self, name: str): """ @@ -93,6 +93,7 @@ def remove_intent(self, name: str): self._cased_matchers.pop(rx) if rx in self._uncased_matchers: self._uncased_matchers.pop(rx) + self._cache_dirty = True # Mark cache as needing rebuild def add_entity(self, name: str, lines: List[str]): """ @@ -101,13 +102,13 @@ def add_entity(self, name: str, lines: List[str]): @param lines: list of entity examples """ if name in self.entity_samples: - raise RuntimeError(f"Attempted to re-register existing entity: " - f"{name}") + raise RuntimeError(f"Attempted to re-register existing entity: {name}") name = name.lower() expanded = [] for l in lines: expanded += expand_parentheses(l) self.entity_samples[name] = expanded + self._cache_dirty = True # Mark cache as needing rebuild def remove_entity(self, name: str): """ @@ -118,6 +119,15 @@ def remove_entity(self, name: str): if name in self.entity_samples: del self.entity_samples[name] + def _rebuild_cache(self): + """ + Rebuild cached intent metadata for fast filtering. + Called lazily on first query after registration to avoid O(n²) during bulk registration. + """ + # Pre-build the intent list to avoid reconstructing it every query + self._intent_list = list(self.intent_samples.items()) + self._cache_dirty = False + def _filter(self, query: str): # filter intents based on context/excluded keywords excluded_intents = [] @@ -127,14 +137,12 @@ def _filter(self, query: str): for intent_name, contexts in self.required_contexts.items(): if intent_name not in self.available_contexts: excluded_intents.append(intent_name) - elif any(context not in self.available_contexts[intent_name] - for context in contexts): + elif any(context not in self.available_contexts[intent_name] for context in contexts): excluded_intents.append(intent_name) for intent_name, contexts in self.excluded_contexts.items(): if intent_name not in self.available_contexts: continue - if any(context in self.available_contexts[intent_name] - for context in contexts): + if any(context in self.available_contexts[intent_name] for context in contexts): excluded_intents.append(intent_name) return excluded_intents @@ -146,8 +154,7 @@ def _match(self, query, intent_name, regexes): penalty = 0.15 if r not in self._cased_matchers: LOG.warning(f"{r} not initialized") - self._cased_matchers[r] = \ - simplematch.Matcher(r, case_sensitive=True) + self._cased_matchers[r] = simplematch.Matcher(r, case_sensitive=True) entities = self._cased_matchers[r].match(query) if entities is not None: for k, v in entities.items(): @@ -157,14 +164,11 @@ def _match(self, query, intent_name, regexes): elif str(v) not in self.entity_samples[k]: # penalize parsed entity value not in samples penalty += 0.1 - return {"entities": entities or {}, - "conf": 1 - penalty, - "name": intent_name} + return {"entities": entities or {}, "conf": 1 - penalty, "name": intent_name} if r not in self._uncased_matchers: LOG.warning(f"{r} not initialized") - self._uncased_matchers[r] = \ - simplematch.Matcher(r, case_sensitive=False) + self._uncased_matchers[r] = simplematch.Matcher(r, case_sensitive=False) entities = self._uncased_matchers[r].match(query) if entities is not None: # penalize case mismatch @@ -176,9 +180,7 @@ def _match(self, query, intent_name, regexes): elif str(v) not in self.entity_samples[k]: # penalize parsed entity value not in samples penalty += 0.1 - return {"entities": entities or {}, - "conf": 1 - penalty, - "name": intent_name} + return {"entities": entities or {}, "conf": 1 - penalty, "name": intent_name} if self.fuzz: for r in regexes: @@ -205,8 +207,7 @@ def _fuzzy_score(self, query, s, penalty=0.25): score = (fuzzy_score + base_score) / 2 if entities is not None: - return {"entities": entities or {}, - "conf": (fuzzy_score + base_score) / 2} + return {"entities": entities or {}, "conf": score} def calc_intents(self, query: str) -> Iterator[dict]: """ @@ -214,19 +215,25 @@ def calc_intents(self, query: str) -> Iterator[dict]: @param query: input to evaluate for an intent match @return: yields dict intent matches """ - # filter intents based on context/excluded keywords + # Lazy cache rebuild - only rebuild once after bulk registration + # This avoids O(n²) scaling during registration (rebuild on every add) + if self._cache_dirty: + self._rebuild_cache() + + # Filter based on runtime context/keywords (query and session dependent) excluded_intents = self._filter(query) - # do the work in parallel instead of sequentially - with concurrent.futures.ProcessPoolExecutor(max_workers=self.workers) as executor: - future_to_source = { - executor.submit(self._match, query, intent_name, regexes): intent_name - for intent_name, regexes in self.intent_samples.items() if intent_name not in excluded_intents - } - for future in concurrent.futures.as_completed(future_to_source): - res = future.result() - if res is not None: - yield res + # Sequential processing - threading overhead > actual work for regex matching + for intent_name, regexes in self._intent_list: + if intent_name in excluded_intents: + continue + res = self._match(query, intent_name, regexes) + if res is not None: + yield res + # Early exit optimization: perfect match found + # TODO: Some validation that we don't have duplicates, and warning if we do + if res.get("conf", 0) == 1.0: + return def calc_intent(self, query: str) -> Optional[dict]: """ @@ -234,7 +241,7 @@ def calc_intent(self, query: str) -> Optional[dict]: @param query: input to evaluate for an intent @return: dict matched intent (or None) """ - match = {'name': None, 'entities': {}} + match = {"name": None, "entities": {}} intents = [i for i in self.calc_intents(query) if i is not None and i.get("name")] if len(intents) == 0: LOG.info("No match") @@ -249,9 +256,9 @@ def calc_intent(self, query: str) -> Optional[dict]: match = ties[0] - for entity in set(match['entities'].keys()): - entities = match['entities'].pop(entity) - match['entities'][entity.lower()] = entities + for entity in set(match["entities"].keys()): + entities = match["entities"].pop(entity) + match["entities"][entity.lower()] = entities LOG.debug(match) return match @@ -260,6 +267,7 @@ def exclude_keywords(self, intent_name, samples): self.excluded_keywords[intent_name] = samples else: self.excluded_keywords[intent_name] += samples + self._cache_dirty = True # Mark cache as needing rebuild def set_context(self, intent_name, context_name, context_val=None): if intent_name not in self.available_contexts: @@ -271,11 +279,12 @@ def exclude_context(self, intent_name, context_name): self.excluded_contexts[intent_name] = [context_name] else: self.excluded_contexts[intent_name].append(context_name) + self._cache_dirty = True # Mark cache as needing rebuild def unexclude_context(self, intent_name, context_name): if intent_name in self.excluded_contexts: - self.excluded_contexts[intent_name] = [c for c in self.excluded_contexts[intent_name] - if context_name != c] + self.excluded_contexts[intent_name] = [c for c in self.excluded_contexts[intent_name] if context_name != c] + self._cache_dirty = True # Mark cache as needing rebuild def unset_context(self, intent_name, context_name): if intent_name in self.available_contexts: @@ -287,11 +296,12 @@ def require_context(self, intent_name, context_name): self.required_contexts[intent_name] = [context_name] else: self.required_contexts[intent_name].append(context_name) + self._cache_dirty = True # Mark cache as needing rebuild def unrequire_context(self, intent_name, context_name): if intent_name in self.required_contexts: - self.required_contexts[intent_name] = [c for c in self.required_contexts[intent_name] - if context_name != c] + self.required_contexts[intent_name] = [c for c in self.required_contexts[intent_name] if context_name != c] + self._cache_dirty = True # Mark cache as needing rebuild def _init_sm_word_type(): From afb5a120ccc6528bca9b76f2529c84a7a5319939 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 00:52:30 +0000 Subject: [PATCH 05/16] Update ovos-plugin-manager requirement (#34) Updates the requirements on [ovos-plugin-manager](https://github.com/OpenVoiceOS/OVOS-plugin-manager) to permit the latest version. - [Release notes](https://github.com/OpenVoiceOS/OVOS-plugin-manager/releases) - [Changelog](https://github.com/OpenVoiceOS/ovos-plugin-manager/blob/dev/CHANGELOG.md) - [Commits](https://github.com/OpenVoiceOS/OVOS-plugin-manager/compare/0.5.0...2.1.0) --- updated-dependencies: - dependency-name: ovos-plugin-manager dependency-version: 2.1.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- extras.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extras.txt b/extras.txt index 69032e5..2c44600 100644 --- a/extras.txt +++ b/extras.txt @@ -1,3 +1,3 @@ -ovos-plugin-manager>=0.5.0,<2.0.0 +ovos-plugin-manager>=0.5.0,<3.0.0 ovos-utils>=0.3.5,<1.0.0 langcodes \ No newline at end of file From 08dd4640851a2343d92bfee1da4b4a7dc9bd6c27 Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Mon, 10 Nov 2025 00:52:41 +0000 Subject: [PATCH 06/16] Increment Version to 1.0.1a2 --- padacioso/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/padacioso/version.py b/padacioso/version.py index 4aed667..b649644 100644 --- a/padacioso/version.py +++ b/padacioso/version.py @@ -2,5 +2,5 @@ VERSION_MAJOR = 1 VERSION_MINOR = 0 VERSION_BUILD = 1 -VERSION_ALPHA = 1 +VERSION_ALPHA = 2 # END_VERSION_BLOCK From 417e71204cd51edd055a0c7e20a336a252ef1c0d Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Mon, 10 Nov 2025 00:53:01 +0000 Subject: [PATCH 07/16] Update Changelog --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe3b137..5d061a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## [1.0.1a2](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a2) (2025-11-10) + +[Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.1a1...1.0.1a2) + +**Merged pull requests:** + +- Update ovos-plugin-manager requirement from \<2.0.0,\>=0.5.0 to \>=0.5.0,\<3.0.0 [\#34](https://github.com/OpenVoiceOS/padacioso/pull/34) ([dependabot[bot]](https://github.com/apps/dependabot)) +- fix: padacioso speed [\#33](https://github.com/OpenVoiceOS/padacioso/pull/33) ([mikejgray](https://github.com/mikejgray)) + ## [1.0.1a1](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a1) (2025-06-16) [Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.0...1.0.1a1) From 71931c99937111394110942497a8863f06b4589d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 18 Dec 2025 03:56:33 +0000 Subject: [PATCH 08/16] Add renovate.json (#36) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- renovate.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 renovate.json diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..5db72dd --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended" + ] +} From 46a7bf00e8f87f349c79f11d93e7094d25508e02 Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Thu, 18 Dec 2025 03:57:44 +0000 Subject: [PATCH 09/16] Increment Version to 1.0.1a3 --- padacioso/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/padacioso/version.py b/padacioso/version.py index b649644..4f493d9 100644 --- a/padacioso/version.py +++ b/padacioso/version.py @@ -2,5 +2,5 @@ VERSION_MAJOR = 1 VERSION_MINOR = 0 VERSION_BUILD = 1 -VERSION_ALPHA = 2 +VERSION_ALPHA = 3 # END_VERSION_BLOCK From 00cad09f337fd953c48bb632ebc30d983b9605bf Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Thu, 18 Dec 2025 04:02:06 +0000 Subject: [PATCH 10/16] Update Changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d061a8..2fe0ac7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## [1.0.1a3](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a3) (2025-12-18) + +[Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.1a2...1.0.1a3) + +**Merged pull requests:** + +- chore: Configure Renovate [\#36](https://github.com/OpenVoiceOS/padacioso/pull/36) ([renovate[bot]](https://github.com/apps/renovate)) + ## [1.0.1a2](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a2) (2025-11-10) [Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.1a1...1.0.1a2) From 923d9bb0da1fcf4455d4daad9614b77ae04430df Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 19 Dec 2025 20:32:30 +0000 Subject: [PATCH 11/16] chore(deps): update dependency python to 3.14 (#37) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .github/workflows/build_tests.yml | 2 +- .github/workflows/publish_stable.yml | 2 +- .github/workflows/release_workflow.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index f204bb7..f05b0f4 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -13,7 +13,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v1 with: - python-version: 3.8 + python-version: 3.14 - name: Install Build Tools run: | python -m pip install build wheel diff --git a/.github/workflows/publish_stable.yml b/.github/workflows/publish_stable.yml index 4e6128f..f2b7e7e 100644 --- a/.github/workflows/publish_stable.yml +++ b/.github/workflows/publish_stable.yml @@ -26,7 +26,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v1 with: - python-version: 3.8 + python-version: 3.14 - name: Install Build Tools run: | python -m pip install build wheel diff --git a/.github/workflows/release_workflow.yml b/.github/workflows/release_workflow.yml index 0ff4764..0da4795 100644 --- a/.github/workflows/release_workflow.yml +++ b/.github/workflows/release_workflow.yml @@ -46,7 +46,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v1 with: - python-version: 3.8 + python-version: 3.14 - name: Install Build Tools run: | python -m pip install build wheel @@ -75,7 +75,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: '3.10' + python-version: '3.14' - name: Get version from setup.py id: get_version From 96ffd27c0fdfffd80bd46b183f1c79c349cb2ac9 Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Fri, 19 Dec 2025 20:33:21 +0000 Subject: [PATCH 12/16] Increment Version to 1.0.1a4 --- padacioso/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/padacioso/version.py b/padacioso/version.py index 4f493d9..b1c8ade 100644 --- a/padacioso/version.py +++ b/padacioso/version.py @@ -2,5 +2,5 @@ VERSION_MAJOR = 1 VERSION_MINOR = 0 VERSION_BUILD = 1 -VERSION_ALPHA = 3 +VERSION_ALPHA = 4 # END_VERSION_BLOCK From dca0e864a7330697b24881c1dd3ea5040b46bd61 Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Fri, 19 Dec 2025 20:34:20 +0000 Subject: [PATCH 13/16] Update Changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fe0ac7..8023469 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## [1.0.1a4](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a4) (2025-12-19) + +[Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.1a3...1.0.1a4) + +**Merged pull requests:** + +- chore\(deps\): update dependency python to 3.14 [\#37](https://github.com/OpenVoiceOS/padacioso/pull/37) ([renovate[bot]](https://github.com/apps/renovate)) + ## [1.0.1a3](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a3) (2025-12-18) [Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.1a2...1.0.1a3) From e7e0772c50ed153081170481dab15a9ce2f87fdd Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Tue, 21 Apr 2026 18:58:43 +0100 Subject: [PATCH 14/16] fix: normalize whitespace and apostrophes for training data and inference queries (#44) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: normalize whitespace and apostrophes in training data and queries - Add normalize_whitespace() to collapse runs of whitespace to a single space - Add normalize_apostrophes() to map curly/fancy quote variants to ASCII ' - Add normalize_utterance() combining both for use on plain text - Update normalize_example() to apply both normalizations at registration time - Apply normalize_utterance() to every query at the top of calc_intents() - Add 11 new tests: 3 utility unit tests + 8 integration tests covering double whitespace, apostrophe variants, entity interactions, and mixed cases Prevents STT output with curly apostrophes or extra spaces from failing to match intents that were trained with canonical punctuation. Co-Authored-By: Claude Sonnet 4.6 * feat: drop apostrophes + modernize CI workflows Normalization: - Switch from apostrophe normalization to dropping — "what's" and "whats" both reduce to "whats", covering all STT apostrophe variants without maintaining an exhaustive list of what to map to - Rename normalize_apostrophes() to drop_apostrophes() accordingly - Applied at both training time (normalize_example) and inference time (normalize_utterance called in calc_intents) - Update tests to reflect drop semantics; use unicode escapes for curly quote test data to keep source files ASCII-safe CI/packaging: - Replace legacy build_tests.yml/install_tests.yml/unit_tests.yml with modern shared workflow callers (build-tests, coverage, lint, opm-check, pip_audit, license_check, release-preview, repo-health) - Remove setup.py and requirements.txt — pyproject.toml covers all deps, entry points, and build config - Modernize release_workflow.yml and publish_stable.yml to thin callers Co-Authored-By: Claude Sonnet 4.6 * ci: remove unnecessary secrets from non-release workflows PYPI_TOKEN and MATRIX_TOKEN are only needed by release_workflow.yml and publish_stable.yml. All other workflow callers (build-tests, lint, coverage, opm-check, pip_audit, license_check, release-preview, repo-health) had them passed unnecessarily. Co-Authored-By: Claude Sonnet 4.6 * fix: add test extra and opm install_extras to resolve CI failures - Add [test] optional-dependencies group to pyproject.toml with ovos-utils, ovos-bus-client, ovos-plugin-manager so build-tests CI can install the deps required by test/test_pipeline.py - Add install_extras: 'extras' to opm-check.yml so ovos-plugin-manager is present when OPM scans for the entry point Co-Authored-By: Claude Sonnet 4.6 * fix: replace literal apostrophe glyphs with unicode escapes (RUF001) apostrophe_variants list in drop_apostrophes() used literal non-ASCII characters that trigger ruff RUF001. Replaced each with its \uXXXX escape sequence; runtime behavior is identical. Co-Authored-By: Claude Sonnet 4.6 * ci: install test extras in coverage workflow test/test_pipeline.py imports ovos_utils which is not a base dependency. Add install_extras: test so the coverage run has the same environment as the build-tests workflow. Co-Authored-By: Claude Sonnet 4.6 * feat: replace apostrophes with space, add entity suffix spacing - drop_apostrophes() now replaces with ' ' instead of '' so "it's" -> "it s" preserving word boundaries rather than merging tokens - _space_entities() inserts spaces around {placeholder} after parenthesis expansion so agglutinative suffixes like {keyword}ren become {keyword} ren and the capture group is not contaminated by the suffix - Add test_entity_suffix_spacing covering Basque-style suffix patterns Co-Authored-By: Claude Sonnet 4.6 * refactor: replace hand-rolled tree parser with regex+itertools expansion The old SentenceTreeParser/TreeFragment class hierarchy is replaced with a clean regex-based approach (ported from ovos-utils bracket_expansion): - [optional] expanded via re.sub before alternatives pass - (a|b) alternatives split via re.split and combined with itertools.product - Fixed-point loop handles nested expansions Same public API (expand_parentheses), 190 fewer lines, no external dep. Co-Authored-By: Claude Sonnet 4.6 * test: add TestExpandParentheses suite + fix double-space in optional expansion - Add 24 direct unit tests for expand_parentheses() covering: plain strings, empty input, entity placeholders, two/three alternatives, multiple independent groups, empty-alternative optional form, [optional] syntax, combined alternatives+optional, entity placeholders with alternatives and optional, whitespace handling, and deduplication - Fix double-space bug: when the empty branch of [optional] is taken the join left "word next"; add re.sub(r' +', ' ', ...) inside _fully_expand so internal runs of spaces are collapsed at expansion time Co-Authored-By: Claude Sonnet 4.6 * ci: fix coverage install_extras format to '.[test]' The coverage workflow does 'pip install ' verbatim, so the value must be a valid pip install target. 'test' was interpreted as a package name; '.[test]' correctly installs the local package with the test extra. (build-tests appends extras to the wheel path differently and is unaffected.) Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Sonnet 4.6 --- .github/workflows/build-tests.yml | 14 + .github/workflows/build_tests.yml | 36 --- ...onal-label.yaml => conventional-label.yml} | 2 +- .github/workflows/coverage.yml | 16 + .github/workflows/install_tests.yml | 34 --- .github/workflows/license_check.yml | 10 + .github/workflows/license_tests.yml | 10 - .github/workflows/lint.yml | 13 + .github/workflows/opm-check.yml | 19 ++ .github/workflows/pip_audit.yml | 10 + .github/workflows/publish_stable.yml | 65 +---- .github/workflows/release-preview.yml | 13 + .github/workflows/release_workflow.yml | 108 +------ .github/workflows/repo-health.yml | 12 + .github/workflows/unit_tests.yml | 42 --- padacioso/__init__.py | 7 +- padacioso/bracket_expansion.py | 273 ++++++------------ padacioso/version.py | 2 + pyproject.toml | 41 +++ requirements.txt | 1 - setup.py | 68 ----- test/test_padacioso.py | 249 ++++++++++++++++ 22 files changed, 526 insertions(+), 519 deletions(-) create mode 100644 .github/workflows/build-tests.yml delete mode 100644 .github/workflows/build_tests.yml rename .github/workflows/{conventional-label.yaml => conventional-label.yml} (77%) create mode 100644 .github/workflows/coverage.yml delete mode 100644 .github/workflows/install_tests.yml create mode 100644 .github/workflows/license_check.yml delete mode 100644 .github/workflows/license_tests.yml create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/opm-check.yml create mode 100644 .github/workflows/pip_audit.yml create mode 100644 .github/workflows/release-preview.yml create mode 100644 .github/workflows/repo-health.yml delete mode 100644 .github/workflows/unit_tests.yml create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml new file mode 100644 index 0000000..dff64be --- /dev/null +++ b/.github/workflows/build-tests.yml @@ -0,0 +1,14 @@ +name: Build Tests + +on: + pull_request: + branches: [dev, master, main] + workflow_dispatch: + +jobs: + build: + uses: OpenVoiceOS/gh-automations/.github/workflows/build-tests.yml@dev + with: + python_versions: '["3.10", "3.11", "3.12", "3.13", "3.14"]' + install_extras: 'test' + test_path: 'test' diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml deleted file mode 100644 index f05b0f4..0000000 --- a/.github/workflows/build_tests.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: Run Build Tests -on: - push: - workflow_dispatch: - -jobs: - build_tests: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - ref: ${{ github.head_ref }} - - name: Setup Python - uses: actions/setup-python@v1 - with: - python-version: 3.14 - - name: Install Build Tools - run: | - python -m pip install build wheel - - name: Install System Dependencies - run: | - sudo apt-get update - sudo apt install python3-dev swig libssl-dev libfann-dev portaudio19-dev libpulse-dev - - name: Build Source Packages - run: | - python setup.py sdist - - name: Build Distribution Packages - run: | - python setup.py bdist_wheel - - name: Install tflite_runtime workaround tflit bug - run: | - pip3 install numpy - pip3 install --extra-index-url https://google-coral.github.io/py-repo/ tflite_runtime - - name: Install core repo - run: | - pip install .[audio-backend,mark1,stt,tts,skills_minimal,skills,gui,bus,all] diff --git a/.github/workflows/conventional-label.yaml b/.github/workflows/conventional-label.yml similarity index 77% rename from .github/workflows/conventional-label.yaml rename to .github/workflows/conventional-label.yml index 0a449cb..9894c1b 100644 --- a/.github/workflows/conventional-label.yaml +++ b/.github/workflows/conventional-label.yml @@ -7,4 +7,4 @@ jobs: label: runs-on: ubuntu-latest steps: - - uses: bcoe/conventional-release-labels@v1 \ No newline at end of file + - uses: bcoe/conventional-release-labels@v1 diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000..4528f7e --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,16 @@ +name: Code Coverage + +on: + pull_request: + branches: [dev] + workflow_dispatch: + +jobs: + coverage: + uses: OpenVoiceOS/gh-automations/.github/workflows/coverage.yml@dev + with: + python_version: '3.11' + coverage_source: 'padacioso' + test_path: 'test/' + install_extras: '.[test]' + min_coverage: 0 diff --git a/.github/workflows/install_tests.yml b/.github/workflows/install_tests.yml deleted file mode 100644 index 4aaabea..0000000 --- a/.github/workflows/install_tests.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Run Install Tests -on: - push: - branches: - - master - - dev - workflow_dispatch: - -jobs: - install: - strategy: - max-parallel: 2 - matrix: - python-version: [ 3.7, 3.8, 3.9, "3.10" ] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Setup Python - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install Build Tools - run: | - python -m pip install build wheel - - name: Install System Dependencies - run: | - sudo apt-get update - sudo apt install python3-dev swig libssl-dev - - name: Build Distribution Packages - run: | - python setup.py bdist_wheel - - name: Install package - run: | - pip install .[all] \ No newline at end of file diff --git a/.github/workflows/license_check.yml b/.github/workflows/license_check.yml new file mode 100644 index 0000000..214edaa --- /dev/null +++ b/.github/workflows/license_check.yml @@ -0,0 +1,10 @@ +name: License Check + +on: + pull_request: + branches: [dev] + workflow_dispatch: + +jobs: + license_check: + uses: OpenVoiceOS/gh-automations/.github/workflows/license-check.yml@dev diff --git a/.github/workflows/license_tests.yml b/.github/workflows/license_tests.yml deleted file mode 100644 index 7d0c4f6..0000000 --- a/.github/workflows/license_tests.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: Run License Tests -on: - push: - workflow_dispatch: - pull_request: - branches: - - master -jobs: - license_tests: - uses: neongeckocom/.github/.github/workflows/license_tests.yml@master diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..0cb9564 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,13 @@ +name: Lint + +on: + pull_request: + branches: [dev, master, main] + workflow_dispatch: + +jobs: + lint: + uses: OpenVoiceOS/gh-automations/.github/workflows/lint.yml@dev + with: + ruff: true + pre_commit: false # set true if .pre-commit-config.yaml exists diff --git a/.github/workflows/opm-check.yml b/.github/workflows/opm-check.yml new file mode 100644 index 0000000..842e21d --- /dev/null +++ b/.github/workflows/opm-check.yml @@ -0,0 +1,19 @@ +name: OPM Plugin Check + +on: + pull_request: + branches: [dev, master, main] + workflow_dispatch: + +jobs: + opm_check: + uses: OpenVoiceOS/gh-automations/.github/workflows/opm-check.yml@dev + with: + python_version: '3.11' + install_extras: 'extras' + plugin_type: 'auto' + entry_point: '"ovos-padacioso-pipeline-plugin"' + opm_require_found: true + opm_validate_interface: true + opm_test_import: true + opm_perf_threshold_ms: 500 diff --git a/.github/workflows/pip_audit.yml b/.github/workflows/pip_audit.yml new file mode 100644 index 0000000..131320d --- /dev/null +++ b/.github/workflows/pip_audit.yml @@ -0,0 +1,10 @@ +name: PIP Audit + +on: + pull_request: + branches: [dev] + workflow_dispatch: + +jobs: + pip_audit: + uses: OpenVoiceOS/gh-automations/.github/workflows/pip-audit.yml@dev diff --git a/.github/workflows/publish_stable.yml b/.github/workflows/publish_stable.yml index f2b7e7e..f9aee05 100644 --- a/.github/workflows/publish_stable.yml +++ b/.github/workflows/publish_stable.yml @@ -1,58 +1,23 @@ -name: Stable Release +name: Publish Stable Release + on: - push: - branches: [master] workflow_dispatch: + push: + branches: [master, main] + +permissions: + contents: write # required for version bump commit and release tag jobs: publish_stable: - uses: TigreGotico/gh-automations/.github/workflows/publish-stable.yml@master - secrets: inherit + if: github.actor != 'github-actions[bot]' + uses: OpenVoiceOS/gh-automations/.github/workflows/publish-stable.yml@dev + secrets: + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + MATRIX_TOKEN: ${{ secrets.MATRIX_TOKEN }} with: - branch: 'master' version_file: 'padacioso/version.py' - setup_py: 'setup.py' + publish_pypi: true publish_release: true - - publish_pypi: - needs: publish_stable - if: success() # Ensure this job only runs if the previous job succeeds - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - ref: dev - fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository. - - name: Setup Python - uses: actions/setup-python@v1 - with: - python-version: 3.14 - - name: Install Build Tools - run: | - python -m pip install build wheel - - name: version - run: echo "::set-output name=version::$(python setup.py --version)" - id: version - - name: Build Distribution Packages - run: | - python setup.py sdist bdist_wheel - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@master - with: - password: ${{secrets.PYPI_TOKEN}} - - - sync_dev: - needs: publish_stable - if: success() # Ensure this job only runs if the previous job succeeds - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository. - ref: master - - name: Push master -> dev - uses: ad-m/github-push-action@master - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - branch: dev \ No newline at end of file + sync_dev: true + notify_matrix: true diff --git a/.github/workflows/release-preview.yml b/.github/workflows/release-preview.yml new file mode 100644 index 0000000..fdcb46e --- /dev/null +++ b/.github/workflows/release-preview.yml @@ -0,0 +1,13 @@ +name: Release Preview + +on: + pull_request: + branches: [dev] + workflow_dispatch: + +jobs: + release_preview: + uses: OpenVoiceOS/gh-automations/.github/workflows/release-preview.yml@dev + with: + package_name: 'padacioso' + version_file: 'padacioso/version.py' diff --git a/.github/workflows/release_workflow.yml b/.github/workflows/release_workflow.yml index 0da4795..82ec027 100644 --- a/.github/workflows/release_workflow.yml +++ b/.github/workflows/release_workflow.yml @@ -1,108 +1,28 @@ name: Release Alpha and Propose Stable on: + workflow_dispatch: pull_request: types: [closed] branches: [dev] +permissions: + contents: write + pull-requests: write + jobs: publish_alpha: - if: github.event.pull_request.merged == true - uses: TigreGotico/gh-automations/.github/workflows/publish-alpha.yml@master - secrets: inherit + if: github.event.pull_request.merged == true || github.event_name == 'workflow_dispatch' + uses: OpenVoiceOS/gh-automations/.github/workflows/publish-alpha.yml@dev + secrets: + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + MATRIX_TOKEN: ${{ secrets.MATRIX_TOKEN }} with: branch: 'dev' version_file: 'padacioso/version.py' - setup_py: 'setup.py' update_changelog: true publish_prerelease: true - changelog_max_issues: 100 - - notify: - if: github.event.pull_request.merged == true - needs: publish_alpha - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Send message to Matrix bots channel - id: matrix-chat-message - uses: fadenb/matrix-chat-message@v0.0.6 - with: - homeserver: 'matrix.org' - token: ${{ secrets.MATRIX_TOKEN }} - channel: '!WjxEKjjINpyBRPFgxl:krbel.duckdns.org' - message: | - new ${{ github.event.repository.name }} PR merged! https://github.com/${{ github.repository }}/pull/${{ github.event.number }} - - publish_pypi: - needs: publish_alpha - if: success() # Ensure this job only runs if the previous job succeeds - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - ref: dev - fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository. - - name: Setup Python - uses: actions/setup-python@v1 - with: - python-version: 3.14 - - name: Install Build Tools - run: | - python -m pip install build wheel - - name: version - run: echo "::set-output name=version::$(python setup.py --version)" - id: version - - name: Build Distribution Packages - run: | - python setup.py sdist bdist_wheel - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@master - with: - password: ${{secrets.PYPI_TOKEN}} - - - propose_release: - needs: publish_alpha - if: success() # Ensure this job only runs if the previous job succeeds - runs-on: ubuntu-latest - steps: - - name: Checkout dev branch - uses: actions/checkout@v3 - with: - ref: dev - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: '3.14' - - - name: Get version from setup.py - id: get_version - run: | - VERSION=$(python setup.py --version) - echo "VERSION=$VERSION" >> $GITHUB_ENV - - - name: Create and push new branch - run: | - git checkout -b release-${{ env.VERSION }} - git push origin release-${{ env.VERSION }} - - - name: Open Pull Request from dev to master - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - # Variables - BRANCH_NAME="release-${{ env.VERSION }}" - BASE_BRANCH="master" - HEAD_BRANCH="release-${{ env.VERSION }}" - PR_TITLE="Release ${{ env.VERSION }}" - PR_BODY="Human review requested!" - - # Create a PR using GitHub API - curl -X POST \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: token $GITHUB_TOKEN" \ - -d "{\"title\":\"$PR_TITLE\",\"body\":\"$PR_BODY\",\"head\":\"$HEAD_BRANCH\",\"base\":\"$BASE_BRANCH\"}" \ - https://api.github.com/repos/${{ github.repository }}/pulls - + propose_release: true + changelog_max_issues: 50 + publish_pypi: true + notify_matrix: true diff --git a/.github/workflows/repo-health.yml b/.github/workflows/repo-health.yml new file mode 100644 index 0000000..b538624 --- /dev/null +++ b/.github/workflows/repo-health.yml @@ -0,0 +1,12 @@ +name: Repo Health + +on: + pull_request: + branches: [dev, master, main] + workflow_dispatch: + +jobs: + repo_health: + uses: OpenVoiceOS/gh-automations/.github/workflows/repo-health.yml@dev + with: + version_file: 'padacioso/version.py' diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml deleted file mode 100644 index 9cc5972..0000000 --- a/.github/workflows/unit_tests.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Run UnitTests -on: - pull_request: - branches: - - dev - push: - workflow_dispatch: - -jobs: - py_build_tests: - uses: neongeckocom/.github/.github/workflows/python_build_tests.yml@master - unit_tests: - strategy: - max-parallel: 2 - matrix: - python-version: [3.9, "3.10", "3.11", "3.12", "3.13"] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install System Dependencies - run: | - python -m pip install build wheel - - name: Install repo - run: | - pip install . - - name: Install test dependencies - run: | - pip install pytest pytest-timeout pytest-cov - - name: Run unittests - run: | - pytest --cov=padacioso --cov-report xml test/test_padacioso.py - # NOTE: additional pytest invocations should also add the --cov-append flag - # or they will overwrite previous invocations' coverage reports - # (for an example, see OVOS Skill Manager's workflow) - - name: Upload coverage - env: - CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}} - uses: codecov/codecov-action@v3 diff --git a/padacioso/__init__.py b/padacioso/__init__.py index f0f0a46..86fcf00 100644 --- a/padacioso/__init__.py +++ b/padacioso/__init__.py @@ -2,7 +2,7 @@ import simplematch -from padacioso.bracket_expansion import expand_parentheses, normalize_example +from padacioso.bracket_expansion import expand_parentheses, normalize_example, normalize_utterance, _space_entities try: from ovos_utils.log import LOG @@ -72,7 +72,8 @@ def add_intent(self, name: str, lines: List[str]): raise RuntimeError(f"Attempted to re-register existing intent: {name}") expanded = [] for l in lines: - expanded += expand_parentheses(normalize_example(l)) + for e in expand_parentheses(normalize_example(l)): + expanded.append(normalize_utterance(_space_entities(e))) regexes = list(set(expanded)) regexes.sort(key=len, reverse=True) self.intent_samples[name] = regexes @@ -215,6 +216,8 @@ def calc_intents(self, query: str) -> Iterator[dict]: @param query: input to evaluate for an intent match @return: yields dict intent matches """ + query = normalize_utterance(query) + # Lazy cache rebuild - only rebuild once after bulk registration # This avoids O(n²) scaling during registration (rebuild on every add) if self._cache_dirty: diff --git a/padacioso/bracket_expansion.py b/padacioso/bracket_expansion.py index e49b268..6d6d519 100644 --- a/padacioso/bracket_expansion.py +++ b/padacioso/bracket_expansion.py @@ -1,190 +1,44 @@ -class TreeFragment: - """(Abstract) empty sentence fragment""" +import itertools +import re - def __init__(self, tree): - """ - Construct a sentence tree fragment which is merely a wrapper for - a list of Strings - Args: - tree (?): Base tree for the sentence fragment, type depends on - subclass, refer to those subclasses - """ - self._tree = tree - - def tree(self): - """Return the represented sentence tree as raw data.""" - return self._tree - - def expand(self): - """ - Expanded version of the fragment. In this case an empty sentence. - - Returns: - List>: A list with an empty sentence (= token/string list) - """ - return [[]] - - def __str__(self): - return self._tree.__str__() - - def __repr__(self): - return self._tree.__repr__() - - -class Word(TreeFragment): - """ - Single word in the sentence tree. - - Construct with a string as argument. +def expand_parentheses(sent: str) -> list: """ + Expand a template string with (a|b) alternatives and [optional] syntax + into all possible combinations. - def expand(self): - """ - Creates one sentence that contains exactly that word. - - Returns: - List>: A list with the given string as sentence - (= token/string list) - """ - return [[self._tree]] - - -class Sentence(TreeFragment): - """ - A Sentence made of several concatenations/words. - - Construct with a List as argument. - """ - - def expand(self): - """ - Creates a combination of all sub-sentences. - - Returns: - List>: A list with all subsentence expansions combined in - every possible way - """ - old_expanded = [[]] - for sub in self._tree: - sub_expanded = sub.expand() - new_expanded = [] - while len(old_expanded) > 0: - sentence = old_expanded.pop() - for new in sub_expanded: - new_expanded.append(sentence + new) - old_expanded = new_expanded - return old_expanded - - -class SentenceTree(TreeFragment): - """ - A Combination of possible sub-sentences. - - Construct with List as argument. - """ - - def expand(self): - """ - Returns all of its options as seperated sub-sentences. - - Returns: - List>: A list containing the sentences created by all - expansions of its sub-sentences - """ - options = [] - for option in self._tree: - options.extend(option.expand()) - return options - - -class SentenceTreeParser: + Examples: + "Will it (rain|pour) [today]?" -> + ["Will it rain today?", "Will it rain?", + "Will it pour today?", "Will it pour?"] """ - Generate sentence token trees from a list of sentence - ['1', '(', '2', '|', '3, ')'] -> [['1', '2'], ['1', '3']] - """ - - def __init__(self, sentence): - # the syntax for .optionally is square brackets - # "hello [world]" - # this is equivalent to using .one_of - # "hello (world|) - sentence = sentence.replace("[", "(").replace("]", "|)") - self.sentence = sentence - - def _parse(self): - """ - Generate sentence token trees - ['1', '(', '2', '|', '3, ')'] -> ['1', ['2', '3']] - """ - self._current_position = 0 - return self._parse_expr() - - def _parse_expr(self): - """ - Generate sentence token trees from the current position to - the next closing parentheses / end of the list and return it - ['1', '(', '2', '|', '3, ')'] -> ['1', [['2'], ['3']]] - ['2', '|', '3'] -> [['2'], ['3']] - """ - # List of all generated sentences - sentence_list = [] - # Currently active sentence - cur_sentence = [] - sentence_list.append(Sentence(cur_sentence)) - # Determine which form the current expression has - while self._current_position < len(self.sentence): - cur = self.sentence[self._current_position] - self._current_position += 1 - if cur == '(': - # Parse the subexpression - subexpr = self._parse_expr() - # Check if the subexpression only has one branch - # -> If so, append "(" and ")" and add it as is - normal_brackets = False - if len(subexpr.tree()) == 1: - normal_brackets = True - cur_sentence.append(Word('(')) - # add it to the sentence - cur_sentence.append(subexpr) - if normal_brackets: - cur_sentence.append(Word(')')) - elif cur == '|': - # Begin parsing a new sentence - cur_sentence = [] - sentence_list.append(Sentence(cur_sentence)) - elif cur == ')': - # End parsing the current subexpression - break - # TODO anything special about {sth}? + def _expand_optional(text): + return re.sub(r"\[([^\[\]]+)\]", lambda m: f"({m.group(1)}|)", text) + + def _expand_alternatives(text): + parts = [] + for segment in re.split(r"(\([^\(\)]+\))", text): + if segment.startswith("(") and segment.endswith(")"): + parts.append(segment[1:-1].split("|")) else: - cur_sentence.append(Word(cur)) - return SentenceTree(sentence_list) + parts.append([segment]) + return itertools.product(*parts) + + def _fully_expand(texts): + result = set(texts) + while True: + expanded = set() + for text in result: + for combo in _expand_alternatives(text): + # collapse internal whitespace so the empty branch of + # [optional] doesn't leave a double space + expanded.add(re.sub(r' +', ' ', "".join(combo)).strip()) + if expanded == result: + break + result = expanded + return sorted(result) - def expand_parentheses(self): - tree = self._parse() - return tree.expand() - - -def expand_parentheses(sent): - """ - ['1', '(', '2', '|', '3, ')'] -> [['1', '2'], ['1', '3']] - For example: - Will it (rain|pour) (today|tomorrow|)? - ----> - Will it rain today? - Will it rain tomorrow? - Will it rain? - Will it pour today? - Will it pour tomorrow? - Will it pour? - Args: - sent (list): List of sentence in sentence - Returns: - list>: Multiple possible sentences from original - """ - expanded = SentenceTreeParser(sent).expand_parentheses() - return ["".join(_).strip() for _ in expanded] + return _fully_expand([_expand_optional(sent)]) def clean_braces(example: str) -> str: @@ -214,5 +68,62 @@ def translate_padatious(example: str) -> str: return " ".join(tokens) +def normalize_whitespace(text: str) -> str: + """ + Collapse multiple consecutive whitespace characters into a single space + and strip leading/trailing whitespace. + @param text: input text + @return: whitespace-normalized text + """ + return re.sub(r'\s+', ' ', text).strip() + + +def drop_apostrophes(text: str) -> str: + """ + Replace apostrophes and common apostrophe-like unicode variants with a space. + Using a space rather than empty string preserves word boundaries so that + "it's" -> "it s" and both sides of a match reduce the same way. + @param text: input text + @return: text with all apostrophe variants replaced by a space + """ + apostrophe_variants = [ + "'", # U+0027 ASCII apostrophe + "’", # U+2019 RIGHT SINGLE QUOTATION MARK + "‘", # U+2018 LEFT SINGLE QUOTATION MARK + "ʼ", # U+02BC MODIFIER LETTER APOSTROPHE + "ʹ", # U+02B9 MODIFIER LETTER PRIME + "`", # U+0060 GRAVE ACCENT (backtick) + "´", # U+00B4 ACUTE ACCENT + "'", # U+FF07 FULLWIDTH APOSTROPHE + ] + for variant in apostrophe_variants: + text = text.replace(variant, " ") + return text + + +def _space_entities(text: str) -> str: + """ + Ensure a space exists on both sides of every {entity} placeholder. + Handles agglutinative suffixes like {keyword}ren so the suffix becomes + a separate token and the capture group is not contaminated. + """ + return re.sub(r'(\{[^}]+\})', r' \1 ', text) + + +def normalize_utterance(text: str) -> str: + """ + Normalize a plain utterance (inference query) for consistent matching. + Does NOT touch entity placeholder syntax. + @param text: input utterance + @return: normalized text + """ + text = drop_apostrophes(text) + text = normalize_whitespace(text) + return text + + def normalize_example(example: str) -> str: - return clean_braces(translate_padatious(example)) + text = clean_braces(translate_padatious(example)) + text = drop_apostrophes(text) + text = normalize_whitespace(text) + return text diff --git a/padacioso/version.py b/padacioso/version.py index b1c8ade..e89635f 100644 --- a/padacioso/version.py +++ b/padacioso/version.py @@ -4,3 +4,5 @@ VERSION_BUILD = 1 VERSION_ALPHA = 4 # END_VERSION_BLOCK + +__version__ = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_BUILD}" + (f"a{VERSION_ALPHA}" if VERSION_ALPHA else "") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7bebb24 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,41 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "padacioso" +dynamic = ["version"] +description = "dead simple intent parser" +readme = "readme.md" +license = { text = "Apache-2.0" } +authors = [{ name = "jarbasai", email = "jarbasai@mailfence.com" }] +requires-python = ">=3.8" +dependencies = [ + "simplematch", +] + +[project.optional-dependencies] +extras = [ + "ovos-plugin-manager>=0.5.0,<3.0.0", + "ovos-utils>=0.3.5,<1.0.0", + "langcodes", +] +test = [ + "ovos-plugin-manager>=0.5.0,<3.0.0", + "ovos-utils>=0.3.5,<1.0.0", + "ovos-bus-client>=0.0.8,<1.0.0", + "langcodes", +] + +[project.urls] +Homepage = "https://github.com/OpenVoiceOS/padacioso" + +[project.entry-points."opm.pipeline"] +"ovos-padacioso-pipeline-plugin" = "padacioso.opm:PadaciosoPipeline" + +[tool.setuptools.dynamic] +version = { attr = "padacioso.version.__version__" } + +[tool.setuptools.packages.find] +where = ["."] +include = ["padacioso*"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index fb508d3..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -simplematch \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 79391c0..0000000 --- a/setup.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -from setuptools import setup - -BASEDIR = os.path.abspath(os.path.dirname(__file__)) - - -def get_version(): - """ Find the version of the package""" - version_file = os.path.join(BASEDIR, 'padacioso', 'version.py') - major, minor, build, alpha = (None, None, None, None) - with open(version_file) as f: - for line in f: - if 'VERSION_MAJOR' in line: - major = line.split('=')[1].strip() - elif 'VERSION_MINOR' in line: - minor = line.split('=')[1].strip() - elif 'VERSION_BUILD' in line: - build = line.split('=')[1].strip() - elif 'VERSION_ALPHA' in line: - alpha = line.split('=')[1].strip() - - if ((major and minor and build and alpha) or - '# END_VERSION_BLOCK' in line): - break - version = f"{major}.{minor}.{build}" - if alpha and int(alpha) > 0: - version += f"a{alpha}" - return version - - -def package_files(directory): - paths = [] - for (path, directories, filenames) in os.walk(directory): - for filename in filenames: - paths.append(os.path.join('..', path, filename)) - return paths - - -def required(requirements_file): - """ Read requirements file and remove comments and empty lines. """ - with open(os.path.join(BASEDIR, requirements_file), 'r') as f: - requirements = f.read().splitlines() - if 'MYCROFT_LOOSE_REQUIREMENTS' in os.environ: - print('USING LOOSE REQUIREMENTS!') - requirements = [r.replace('==', '>=').replace('~=', '>=') for r in requirements] - return [pkg for pkg in requirements - if pkg.strip() and not pkg.startswith("#")] - - -PLUGIN_ENTRY_POINT = 'ovos-padacioso-pipeline-plugin=padacioso.opm:PadaciosoPipeline' - - -setup( - name='padacioso', - version=get_version(), - packages=['padacioso'], - package_data={'': package_files('padacioso')}, - url='https://github.com/OpenVoiceOS/padacioso', - license='apache-2.0', - author='jarbasai', - author_email='jarbasai@mailfence.com', - install_requires=required("requirements.txt"), - extras_require={ - 'extras': required('extras.txt') - }, - entry_points={'opm.pipeline': PLUGIN_ENTRY_POINT}, - description='dead simple intent parser' -) diff --git a/test/test_padacioso.py b/test/test_padacioso.py index d4dfd5a..6122169 100644 --- a/test/test_padacioso.py +++ b/test/test_padacioso.py @@ -1,4 +1,5 @@ from padacioso import IntentContainer +from padacioso.bracket_expansion import expand_parentheses import unittest @@ -229,3 +230,251 @@ def test_add_padatious_wildcard_intent(self): self.assertEqual(match['entities']['word0'], 'neon') self.assertEqual(match['entities']['word1'], 'neon') + # normalization unit tests + def test_normalize_whitespace_util(self): + from padacioso.bracket_expansion import normalize_whitespace + self.assertEqual(normalize_whitespace("hello world"), "hello world") + self.assertEqual(normalize_whitespace(" hello world "), "hello world") + self.assertEqual(normalize_whitespace("one\ttwo\nthree"), "one two three") + self.assertEqual(normalize_whitespace("already fine"), "already fine") + self.assertEqual(normalize_whitespace(""), "") + + def test_drop_apostrophes_util(self): + from padacioso.bracket_expansion import drop_apostrophes + # apostrophes replaced with space to preserve word boundaries + self.assertEqual(drop_apostrophes("what's up"), "what s up") + # U+2019 RIGHT SINGLE QUOTATION MARK + self.assertEqual(drop_apostrophes("what's up"), "what s up") + # U+2018 LEFT SINGLE QUOTATION MARK + self.assertEqual(drop_apostrophes("what's up"), "what s up") + # backtick + self.assertEqual(drop_apostrophes("what`s up"), "what s up") + # U+02BC MODIFIER LETTER APOSTROPHE + self.assertEqual(drop_apostrophes("whatʼs up"), "what s up") + # no apostrophe — unchanged + self.assertEqual(drop_apostrophes("what s up"), "what s up") + + def test_normalize_example_util(self): + from padacioso.bracket_expansion import normalize_example + self.assertEqual(normalize_example(" hello world "), "hello world") + # apostrophe replaced with space, then whitespace collapsed + self.assertEqual(normalize_example("what's up"), "what s up") + self.assertEqual(normalize_example("{{entity}}"), "{entity}") + # combined: curly apostrophe + whitespace + braces cleaned + self.assertEqual(normalize_example(" what's {{place}} "), "what s {place}") + + # normalization integration tests + def test_double_whitespace_in_query(self): + """Extra whitespace in the spoken query should not prevent matching.""" + container = IntentContainer() + container.add_intent('hello', ['hello world']) + self.assertEqual(container.calc_intent('hello world')['name'], 'hello') + self.assertEqual(container.calc_intent(' hello world ')['name'], 'hello') + self.assertEqual(container.calc_intent('hello world')['name'], 'hello') + + def test_double_whitespace_in_training(self): + """Extra whitespace in training data should be collapsed at registration time.""" + container = IntentContainer() + container.add_intent('hello', ['hello world']) + self.assertIn('hello world', container.intent_samples['hello']) + self.assertNotIn('hello world', container.intent_samples['hello']) + self.assertEqual(container.calc_intent('hello world')['name'], 'hello') + + def test_apostrophe_variants_in_query(self): + """All apostrophe variants in a query should match — both sides normalize the same way.""" + container = IntentContainer() + container.add_intent('whats_up', ["what's up"]) + # stored as "what s up"; query variants also reduce to "what s up" + self.assertEqual(container.calc_intent("what s up")['name'], 'whats_up') + self.assertEqual(container.calc_intent("what's up")['name'], 'whats_up') + # U+2019 RIGHT SINGLE QUOTATION MARK — common from voice STT + self.assertEqual(container.calc_intent("what's up")['name'], 'whats_up') + # backtick + self.assertEqual(container.calc_intent('what`s up')['name'], 'whats_up') + # U+02BC MODIFIER LETTER APOSTROPHE + self.assertEqual(container.calc_intent("whatʼs up")['name'], 'whats_up') + + def test_apostrophe_variants_in_training(self): + """Apostrophes in training examples should be replaced with spaces at registration time.""" + container = IntentContainer() + container.add_intent('whats_up', ["what's up"]) + self.assertIn("what s up", container.intent_samples['whats_up']) + self.assertNotIn("what's up", container.intent_samples['whats_up']) + # curly apostrophe (U+2018) normalizes the same way + container.add_intent('curly_test', ["what's new"]) + self.assertIn("what s new", container.intent_samples['curly_test']) + + def test_apostrophe_with_entity(self): + """Apostrophe normalization should work alongside entity extraction.""" + container = IntentContainer() + container.add_intent('navigate', ["navigate to {place}"]) + match = container.calc_intent("navigate to the store") + self.assertEqual(match['name'], 'navigate') + self.assertEqual(match['entities']['place'], 'the store') + + def test_whitespace_with_entity(self): + """Whitespace normalization should not corrupt extracted entity values.""" + container = IntentContainer() + container.add_intent('buy', ['buy {item}']) + match = container.calc_intent('buy milk') + self.assertEqual(match['name'], 'buy') + self.assertEqual(match['entities']['item'], 'milk') + + def test_leading_trailing_whitespace_query(self): + """Leading/trailing whitespace on the query should be stripped.""" + container = IntentContainer() + container.add_intent('hello', ['hello']) + self.assertEqual(container.calc_intent(' hello ')['name'], 'hello') + + def test_mixed_normalization(self): + """Combined apostrophe and whitespace issues should both be handled.""" + container = IntentContainer() + container.add_intent('whats_up', ["what's up"]) + # curly apostrophe + double space → "what s up" on both sides + self.assertEqual(container.calc_intent("what's up")['name'], 'whats_up') + self.assertEqual(container.calc_intent("what's up")['name'], 'whats_up') + + def test_entity_suffix_spacing(self): + """Agglutinative suffixes attached to {entity} placeholders should still match.""" + container = IntentContainer() + # Basque-style patterns where suffix is glued to the placeholder + container.add_intent('doktore', [ + 'zeintzuk ziren {keyword}ren doktore-ikasleak', + 'nork egin zuen doktoretza {keyword}rekin', + ]) + # the suffix is separated at training time so the entity captures just the keyword + match = container.calc_intent('zeintzuk ziren Einstein ren doktore-ikasleak') + self.assertEqual(match['name'], 'doktore') + self.assertEqual(match['entities']['keyword'], 'Einstein') + + match = container.calc_intent('nork egin zuen doktoretza Curie rekin') + self.assertEqual(match['name'], 'doktore') + self.assertEqual(match['entities']['keyword'], 'Curie') + + +class TestExpandParentheses(unittest.TestCase): + + # --- no-op cases --- + + def test_plain_string(self): + self.assertEqual(expand_parentheses("hello world"), ["hello world"]) + + def test_empty_string(self): + self.assertEqual(expand_parentheses(""), [""]) + + def test_entity_placeholder_untouched(self): + # {entity} must survive expansion unchanged + self.assertEqual(expand_parentheses("buy {item}"), ["buy {item}"]) + + def test_typed_entity_untouched(self): + self.assertEqual(expand_parentheses("set volume {level:int}"), ["set volume {level:int}"]) + + # --- (a|b) alternatives --- + + def test_two_alternatives(self): + self.assertEqual(expand_parentheses("(hello|hi)"), + sorted(["hello", "hi"])) + + def test_three_alternatives(self): + self.assertEqual(expand_parentheses("(hello|hi|hey) world"), + sorted(["hello world", "hey world", "hi world"])) + + def test_alternatives_at_end(self): + self.assertEqual(expand_parentheses("turn (on|off)"), + sorted(["turn off", "turn on"])) + + def test_alternatives_in_middle(self): + self.assertEqual(expand_parentheses("I (want|need) coffee"), + sorted(["I need coffee", "I want coffee"])) + + def test_two_independent_groups(self): + self.assertEqual( + expand_parentheses("(a|b) (c|d)"), + sorted(["a c", "a d", "b c", "b d"]) + ) + + def test_three_independent_groups(self): + self.assertEqual( + expand_parentheses("(a|b) (c|d) (e|f)"), + sorted(["a c e", "a c f", "a d e", "a d f", + "b c e", "b c f", "b d e", "b d f"]) + ) + + def test_empty_alternative_makes_optional(self): + # (word|) is the canonical optional form + self.assertEqual(expand_parentheses("hello (world|)"), + sorted(["hello", "hello world"])) + + def test_single_item_group(self): + # (word) with no pipe — parens stripped, single result + result = expand_parentheses("hello (world)") + self.assertEqual(result, ["hello world"]) + + # --- [optional] syntax --- + + def test_optional_word(self): + self.assertEqual(expand_parentheses("hey [world]"), + sorted(["hey", "hey world"])) + + def test_optional_at_start(self): + self.assertEqual(expand_parentheses("[please] turn on"), + sorted(["please turn on", "turn on"])) + + def test_optional_at_end(self): + self.assertEqual(expand_parentheses("turn on [the light]"), + sorted(["turn on", "turn on the light"])) + + def test_two_optional_groups(self): + self.assertEqual( + expand_parentheses("[please] turn [on]"), + sorted(["please turn", "please turn on", "turn", "turn on"]) + ) + + def test_optional_entity_placeholder(self): + self.assertEqual(expand_parentheses("hi [{person}|people]"), + sorted(["hi", "hi {person}", "hi people"])) + + # --- nested / combined --- + + def test_alternatives_inside_optional(self): + self.assertEqual( + expand_parentheses("set [the] (light|fan)"), + sorted(["set light", "set fan", "set the light", "set the fan"]) + ) + + def test_optional_and_alternatives_combined(self): + result = expand_parentheses("(turn|switch) [the] (light|fan) (on|off)") + self.assertEqual(len(result), 16) # 2 * 2 * 2 * 2 + self.assertIn("turn the light on", result) + self.assertIn("switch fan off", result) + + def test_entity_with_alternatives(self): + self.assertEqual( + expand_parentheses("(buy|purchase) {item}"), + sorted(["buy {item}", "purchase {item}"]) + ) + + def test_entity_with_optional(self): + self.assertEqual( + expand_parentheses("eat [some] {fruit}"), + sorted(["eat {fruit}", "eat some {fruit}"]) + ) + + # --- whitespace handling --- + + def test_leading_trailing_spaces_stripped(self): + for result in expand_parentheses(" hello "): + self.assertEqual(result, result.strip()) + + def test_internal_spaces_preserved(self): + results = expand_parentheses("(good morning|hi) there") + self.assertIn("good morning there", results) + self.assertIn("hi there", results) + + # --- deduplication --- + + def test_duplicate_alternatives_deduplicated(self): + # (a|a) should produce one "a", not two + result = expand_parentheses("(hello|hello)") + self.assertEqual(result, ["hello"]) + From 143c21eae1711cd518071a6ba8e14a061f411d64 Mon Sep 17 00:00:00 2001 From: JarbasAl <33701864+JarbasAl@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:58:56 +0000 Subject: [PATCH 15/16] Increment Version to 1.0.2a1 --- padacioso/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/padacioso/version.py b/padacioso/version.py index e89635f..d04a515 100644 --- a/padacioso/version.py +++ b/padacioso/version.py @@ -1,8 +1,8 @@ # START_VERSION_BLOCK VERSION_MAJOR = 1 VERSION_MINOR = 0 -VERSION_BUILD = 1 -VERSION_ALPHA = 4 +VERSION_BUILD = 2 +VERSION_ALPHA = 1 # END_VERSION_BLOCK __version__ = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_BUILD}" + (f"a{VERSION_ALPHA}" if VERSION_ALPHA else "") From 6f1b585bd2fb0199b3a098e88c627436ca74bdac Mon Sep 17 00:00:00 2001 From: JarbasAl <33701864+JarbasAl@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:59:25 +0000 Subject: [PATCH 16/16] Update Changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8023469..e313c50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## [1.0.2a1](https://github.com/OpenVoiceOS/padacioso/tree/1.0.2a1) (2026-04-21) + +[Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.1a4...1.0.2a1) + +**Merged pull requests:** + +- fix: normalize whitespace and apostrophes for training data and inference queries [\#44](https://github.com/OpenVoiceOS/padacioso/pull/44) ([JarbasAl](https://github.com/JarbasAl)) + ## [1.0.1a4](https://github.com/OpenVoiceOS/padacioso/tree/1.0.1a4) (2025-12-19) [Full Changelog](https://github.com/OpenVoiceOS/padacioso/compare/1.0.1a3...1.0.1a4)