diff --git a/README.md b/README.md index 45f488c..b1d337f 100644 --- a/README.md +++ b/README.md @@ -290,6 +290,7 @@ This category also contains `ascii85`, `adobe`, `[x]btoa`, `zeromq` with the `ba - [X] `rotN`: aka Caesar cipher (*N* belongs to [1,25]) - [X] `scytaleN`: encrypts using the number of letters on the rod (*N* belongs to [1,[) - [X] `shiftN`: shift ordinals (*N* belongs to [1,255]) +- [X] `vigenere`: aka Vigenere Cipher - [X] `xorN`: XOR with a single byte (*N* belongs to [1,255]) > :warning: Crypto functions are of course definitely **NOT** encoding functions ; they are implemented for leveraging the `.encode(...)` API from `codecs`. diff --git a/docs/pages/enc/crypto.md b/docs/pages/enc/crypto.md index 71d89e9..432ac3d 100644 --- a/docs/pages/enc/crypto.md +++ b/docs/pages/enc/crypto.md @@ -202,6 +202,24 @@ This is a dynamic encoding, that is, it can be called with an integer to define ----- +### Vigenere Cipher + +This is a dynamic encoding, that is, it holds the key. There is no default key, meaning that `vigenere` as the encoding scheme throws a `LookupError` indicating that the _key must be a non-empty alphabetic string_. + +**Codec** | **Conversions** | **Aliases** | **Comment** +:---: | :---: | --- | --- +`vigenere` | text <-> Vigenere ciphertext | `vigenere-abcdef`, `vigenere_MySuperSecret` | key only consists of characters, not digits + +```python +>>> codext.encode("This is a test !", "vigenere-abababa") +'Tiit it a tfsu !' +>>> codext.encode("This is a test !", "vigenere_MySuperSecret") +'Ffam xw r liuk !' +>>> codext.decode("Tiit it a tfsu !", "vigenere-abababa") +``` + +----- + ### XOR with 1 byte This is a dynamic encoding, that is, it can be called with an integer to define the ordinal of the byte to XOR with the input text. diff --git a/src/codext/crypto/__init__.py b/src/codext/crypto/__init__.py index 0854db2..8dc4480 100644 --- a/src/codext/crypto/__init__.py +++ b/src/codext/crypto/__init__.py @@ -1,6 +1,7 @@ # -*- coding: UTF-8 -*- from .affine import * from .atbash import * +from .bazeries import * from .bacon import * from .barbie import * from .citrix import * @@ -9,5 +10,6 @@ from .rot import * from .scytale import * from .shift import * +from .vigenere import * from .xor import * diff --git a/src/codext/crypto/bazeries.py b/src/codext/crypto/bazeries.py new file mode 100644 index 0000000..b096102 --- /dev/null +++ b/src/codext/crypto/bazeries.py @@ -0,0 +1,177 @@ +# -*- coding: UTF-8 -*- +"""Bazeries Cipher Codec - bazeries content encoding. + +The Bazeries cipher is an encryption system created by Étienne Bazeries that combines +two Polybius grids (5×5 square arrays of letters) and a transposition based on a +numeric key. The plaintext is split into groups whose sizes are the digits of the key, +each group is reversed, and then a substitution is applied by mapping each letter's +position in the first (standard) Polybius square to the same position in the second +(key-based) Polybius square. When the key is a keyword instead of a number, the +lengths of the words in the keyword are used as group sizes. + +This codec: +- en/decodes strings from str to str +- en/decodes strings from bytes to bytes +- decodes file content to str (read) +- encodes file content from str to bytes (write) + +Reference: https://www.dcode.fr/bazeries-cipher +""" +from ..__common__ import * + + +__examples__ = { + 'enc(bazeries-137)': {'HELLO': 'TSSUB', 'ATTACK': 'OOLLYE'}, + 'dec(bazeries-137)': {'TSSUB': 'HELLO', 'OOLLYE': 'ATTACK'}, +} +__guess__ = ["bazeries-137"] + + +_DEFAULT_KEY = "137" +# Standard 5×5 Polybius square alphabet (I and J share the same cell) +_DEFAULT_ALPHABET = "ABCDEFGHIKLMNOPQRSTUVWXYZ" + +_ONES = ["", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN", "EIGHT", "NINE", + "TEN", "ELEVEN", "TWELVE", "THIRTEEN", "FOURTEEN", "FIFTEEN", "SIXTEEN", + "SEVENTEEN", "EIGHTEEN", "NINETEEN"] +_TENS = ["", "", "TWENTY", "THIRTY", "FORTY", "FIFTY", "SIXTY", "SEVENTY", "EIGHTY", "NINETY"] + + +def _num_to_words(n): + """ Convert a non-negative integer to its English word representation (uppercase). """ + if n == 0: + return "ZERO" + if n < 20: + return _ONES[n] + if n < 100: + rest = n % 10 + return (_TENS[n // 10] + (" " + _ONES[rest] if rest else "")).strip() + if n < 1000: + rest = n % 100 + return (_ONES[n // 100] + " HUNDRED" + (" " + _num_to_words(rest) if rest else "")).strip() + if n < 1_000_000: + rest = n % 1000 + return (_num_to_words(n // 1000) + " THOUSAND" + (" " + _num_to_words(rest) if rest else "")).strip() + if n < 1_000_000_000: + rest = n % 1_000_000 + return (_num_to_words(n // 1_000_000) + " MILLION" + (" " + _num_to_words(rest) if rest else "")).strip() + rest = n % 1_000_000_000 + return (_num_to_words(n // 1_000_000_000) + " BILLION" + (" " + _num_to_words(rest) if rest else "")).strip() + + +def _parse_key(key): + """ Parse the key into (phrase, group_sizes). + + For a numeric key, it is written in English words to build the phrase, and its + individual non-zero digits form the group sizes for transposition. + For a keyword, the key itself is the phrase and word lengths are the group sizes. + """ + if not key: + key = _DEFAULT_KEY + key_str = str(key).upper().replace("-", " ").replace("_", " ").strip() + if key_str.replace(" ", "").isdigit(): + n = int(key_str.replace(" ", "")) + phrase = _num_to_words(n) + digits = [int(d) for d in str(n) if d != '0'] + if not digits: + digits = [1] + else: + phrase = key_str + digits = [len(w) for w in key_str.split() if w] + if not digits: + digits = [1] + return phrase, digits + + +def _build_key_alphabet(phrase): + """ Build a 25-character cipher alphabet from the key phrase for the second Polybius square. + + Letters appear in the order they first occur in the phrase (with J merged into I), + followed by the remaining letters of the standard alphabet. + """ + seen = [] + for c in phrase.upper(): + if c == 'J': + c = 'I' + if c.isalpha() and c not in seen: + seen.append(c) + for c in _DEFAULT_ALPHABET: + if c not in seen: + seen.append(c) + return "".join(seen) + + +def _build_squares(key_alphabet): + """ Build position maps and lookup maps for the two 5×5 Polybius squares. + + Returns (sq1_pos, sq2_pos, sq1_lkp, sq2_lkp) where: + - sq1_pos / sq2_pos map a letter to its (row, col) 1-indexed coordinate + - sq1_lkp / sq2_lkp map a (row, col) coordinate to its letter + """ + alph1 = _DEFAULT_ALPHABET + alph2 = key_alphabet + sq1_pos = {alph1[i]: (i // 5 + 1, i % 5 + 1) for i in range(25)} + sq2_pos = {alph2[i]: (i // 5 + 1, i % 5 + 1) for i in range(25)} + sq1_lkp = {(i // 5 + 1, i % 5 + 1): alph1[i] for i in range(25)} + sq2_lkp = {(i // 5 + 1, i % 5 + 1): alph2[i] for i in range(25)} + # J shares the cell with I in both squares + sq1_pos['J'] = sq1_pos['I'] + sq2_pos['J'] = sq2_pos['I'] + return sq1_pos, sq2_pos, sq1_lkp, sq2_lkp + + +def _transpose(chars, digits): + """ Split chars into consecutive groups of sizes given by digits (cycling) and reverse each group. """ + result, i, grp_idx = [], 0, 0 + while i < len(chars): + size = digits[grp_idx % len(digits)] + grp_idx += 1 + group = chars[i:i + size] + result.extend(reversed(group)) + i += size + return result + + +def bazeries_encode(key=""): + phrase, digits = _parse_key(key) + key_alph = _build_key_alphabet(phrase) + sq1_pos, sq2_pos, sq1_lkp, sq2_lkp = _build_squares(key_alph) + + def encode(text, errors="strict"): + _h = handle_error("bazeries", errors) + alpha = [('I' if c == 'J' else c) for c in ensure_str(text).upper() if c.isalpha()] + transposed = _transpose(alpha, digits) + result = [] + for pos, c in enumerate(transposed): + if c in sq1_pos: + result.append(sq2_lkp[sq1_pos[c]]) + else: + result.append(_h(c, pos, "".join(result))) + r = "".join(result) + return r, len(text) + return encode + + +def bazeries_decode(key=""): + phrase, digits = _parse_key(key) + key_alph = _build_key_alphabet(phrase) + sq1_pos, sq2_pos, sq1_lkp, sq2_lkp = _build_squares(key_alph) + + def decode(text, errors="strict"): + _h = handle_error("bazeries", errors, decode=True) + alpha = [c for c in ensure_str(text).upper() if c.isalpha()] + sub = [] + for pos, c in enumerate(alpha): + if c in sq2_pos: + sub.append(sq1_lkp[sq2_pos[c]]) + else: + sub.append(_h(c, pos, "".join(sub))) + result = _transpose(sub, digits) + r = "".join(result) + return r, len(text) + return decode + + +add("bazeries", bazeries_encode, bazeries_decode, + r"^bazeries(?:[-_](.+))?$", + printables_rate=1., expansion_factor=1.) diff --git a/src/codext/crypto/vigenere.py b/src/codext/crypto/vigenere.py new file mode 100755 index 0000000..39ac4ce --- /dev/null +++ b/src/codext/crypto/vigenere.py @@ -0,0 +1,65 @@ +# -*- coding: UTF-8 -*- +"""Vigenere Cipher Codec - vigenere content encoding. + +This codec: +- en/decodes strings from str to str +- en/decodes strings from bytes to bytes +- decodes file content to str (read) +- encodes file content from str to bytes (write) +""" +from string import ascii_lowercase as LC, ascii_uppercase as UC + +from ..__common__ import * + + +__examples__ = { + 'enc(vigenere)': None, + 'enc(vigenere-lemon)': {'ATTACKATDAWN': 'LXFOPVEFRNHR'}, + 'enc(vigenere-key)': {'hello': 'rijvs'}, + 'enc(vigenère_key)': {'Hello World': 'Rijvs Uyvjn'}, + 'enc-dec(vigenere-secret)': ['hello world', 'ATTACK AT DAWN', 'Test 1234!'], +} +__guess__ = ["vigenere-key", "vigenere-secret", "vigenere-password"] + + +__char = lambda c, k, i, d=False: (LC if (b := c in LC) else UC)[(ord(c) - ord("Aa"[b]) + \ + [1, -1][d] * (ord(k[i % len(k)]) - ord('a'))) % 26] + + +def __check(key): + key = key.lower() + if not key or not key.isalpha(): + raise LookupError("Bad parameter for encoding 'vigenere': key must be a non-empty alphabetic string") + return key + + +def vigenere_encode(key): + def encode(text, errors="strict"): + result, i, k = [], 0, __check(key) + for c in ensure_str(text): + if c in LC or c in UC: + result.append(__char(c, k, i)) + i += 1 + else: + result.append(c) + r = "".join(result) + return r, len(r) + return encode + + +def vigenere_decode(key): + def decode(text, errors="strict"): + result, i, k = [], 0, __check(key) + for c in ensure_str(text): + if c in LC or c in UC: + result.append(__char(c, k, i, True)) + i += 1 + else: + result.append(c) + r = "".join(result) + return r, len(r) + return decode + + +add("vigenere", vigenere_encode, vigenere_decode, r"vigen[eè]re(?:[-_]cipher)?(?:[-_]([a-zA-Z]+))?$", penalty=.1) +