diff --git a/twinkle-screen/src/main/java/org/codejive/twinkle/screen/io/BufferWriter.java b/twinkle-screen/src/main/java/org/codejive/twinkle/screen/io/BufferWriter.java index cc7cc03..35daacb 100644 --- a/twinkle-screen/src/main/java/org/codejive/twinkle/screen/io/BufferWriter.java +++ b/twinkle-screen/src/main/java/org/codejive/twinkle/screen/io/BufferWriter.java @@ -7,13 +7,13 @@ import org.codejive.twinkle.ansi.util.AnsiOutputParser.AnsiSequenceHandler; import org.codejive.twinkle.screen.Buffer; import org.codejive.twinkle.screen.Buffer.LinkPrintOption; -import org.codejive.twinkle.text.SequenceDecoder; import org.codejive.twinkle.text.Size; +import org.codejive.twinkle.text.UnicodeDecoder; import org.jspecify.annotations.NonNull; public class BufferWriter extends Writer { protected Buffer buffer; - protected SequenceDecoder decoder; + protected UnicodeDecoder decoder; int cursorX; int cursorY; private int savedCursorX; @@ -26,7 +26,7 @@ public class BufferWriter extends Writer { public BufferWriter(@NonNull Buffer buffer) { this.buffer = buffer; - this.decoder = new SequenceDecoder(); + this.decoder = new UnicodeDecoder(); this.cursorX = 0; this.cursorY = 0; this.savedCursorX = 0; @@ -73,7 +73,7 @@ public void flush() { } decoder.finish(); if (decoder.isReady()) { - if (decoder.state() == SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE) { + if (decoder.state() == UnicodeDecoder.ANSI) { handleAnsiSequence(decoder.toString()); } else if (decoder.codepoint() == '\n') { cursorX = 0; diff --git a/twinkle-text/src/main/java/org/codejive/twinkle/ansi/util/AnsiDecoder.java b/twinkle-text/src/main/java/org/codejive/twinkle/ansi/util/AnsiDecoder.java new file mode 100644 index 0000000..d0f0925 --- /dev/null +++ b/twinkle-text/src/main/java/org/codejive/twinkle/ansi/util/AnsiDecoder.java @@ -0,0 +1,246 @@ +package org.codejive.twinkle.ansi.util; + +import org.codejive.twinkle.ansi.Constants; + +/** + * A base decoder for handling ANSI escape sequences. This class provides the foundation for + * decoding character sequences, with a focus on identifying and parsing ANSI escape sequences. + * Subclasses can extend this to add additional sequence handling. + * + *
Characters are pushed into the decoder while its state is INCOMPLETE. Once enough information + * is available to determine the sequence type, the state changes accordingly. Use {@code reset()} + * to clear the decoder and start a new sequence. + */ +public class AnsiDecoder { + // State constants + public static final int INCOMPLETE = 0; + public static final int ANSI = 1; + public static final int ERROR = 2; + + protected enum AnsiMode { + NONE, + PREFIX, + CSI, + OSC + } + + protected final StringBuilder buffer = new StringBuilder(); + protected int state = INCOMPLETE; + protected AnsiMode ansiMode = AnsiMode.NONE; + protected boolean oscSeenEsc = false; + + /** + * Pushes a character value (as an int) into the decoder. + * + *
Accepts int values to support full Unicode range including supplementary characters. This + * base implementation handles ANSI escape sequences. Subclasses should override {@link + * #handleNonAnsi(int)} to provide additional handling for non-ANSI characters. + * + * @param c the character value to push + */ + public void push(int c) { + if (!canPush(c)) { + state = ERROR; + return; + } + + if (Character.isSupplementaryCodePoint(c)) { + buffer.append(Character.toChars(c)); + } else { + buffer.append((char) c); + } + + if (ansiMode != AnsiMode.NONE) { + char[] chars = Character.toChars(c); + for (int i = 0; i < chars.length; i++) { + pushAnsi(chars[i]); + if (state == ERROR || state == ANSI) { + break; + } + } + return; + } + + if (c == Constants.ESC) { + pushAnsi((char) c); + return; + } + + handleNonAnsi(c); + } + + /** + * Returns true if the given character value can be consumed as part of the currently decoded + * sequence. + * + *
This is a non-mutating probe. Callers can use it to detect sequence boundaries without + * relying on completion heuristics. + * + * @param c the character value to check + * @return true if the character can be pushed + */ + public boolean canPush(int c) { + if (state == ERROR || state == ANSI) { + return false; + } + + if (!Character.isValidCodePoint(c)) { + return false; + } + + if (ansiMode != AnsiMode.NONE) { + return true; + } + + if (buffer.length() == 0) { + return true; + } + + if (c == Constants.ESC) { + return false; + } + + return canPushNonAnsi(c); + } + + /** + * Finalizes pending state when no more input is available. + * + *
This base implementation resolves unterminated ANSI escapes as ANSI sequences. Subclasses
+ * should override {@link #finishNonAnsi()} to handle additional finalization logic.
+ */
+ public void finish() {
+ if (state == ERROR) {
+ return;
+ }
+ if (ansiMode != AnsiMode.NONE) {
+ ansiMode = AnsiMode.NONE;
+ state = ANSI;
+ return;
+ }
+ finishNonAnsi();
+ }
+
+ /** Resets the decoder to its initial state, clearing all accumulated data. */
+ public void reset() {
+ buffer.setLength(0);
+ state = INCOMPLETE;
+ ansiMode = AnsiMode.NONE;
+ oscSeenEsc = false;
+ resetNonAnsi();
+ }
+
+ /** Returns true if the decoder has completed a sequence. */
+ public boolean isComplete() {
+ return state() != INCOMPLETE;
+ }
+
+ /**
+ * Returns the current state of the decoder.
+ *
+ * @return the current state as an int constant
+ */
+ public int state() {
+ return state;
+ }
+
+ /** Returns the buffered sequence as a string. */
+ @Override
+ public String toString() {
+ return buffer.toString();
+ }
+
+ /** Handles ANSI escape sequence parsing logic. */
+ protected void pushAnsi(char ch) {
+ if (state == ANSI) {
+ state = ERROR;
+ return;
+ }
+
+ if (ansiMode == AnsiMode.NONE) {
+ if (ch == Constants.ESC) {
+ ansiMode = AnsiMode.PREFIX;
+ state = INCOMPLETE;
+ return;
+ }
+ state = ERROR;
+ return;
+ }
+
+ if (ansiMode == AnsiMode.PREFIX) {
+ if (ch == '[') {
+ ansiMode = AnsiMode.CSI;
+ state = INCOMPLETE;
+ } else if (ch == ']') {
+ ansiMode = AnsiMode.OSC;
+ state = INCOMPLETE;
+ oscSeenEsc = false;
+ } else {
+ state = ANSI;
+ ansiMode = AnsiMode.NONE;
+ }
+ return;
+ }
+
+ if (ansiMode == AnsiMode.CSI) {
+ if (ch >= 0x40 && ch <= 0x7E) {
+ state = ANSI;
+ ansiMode = AnsiMode.NONE;
+ } else {
+ state = INCOMPLETE;
+ }
+ return;
+ }
+
+ if (ansiMode == AnsiMode.OSC) {
+ if (oscSeenEsc) {
+ if (ch == '\\') {
+ state = ANSI;
+ ansiMode = AnsiMode.NONE;
+ oscSeenEsc = false;
+ return;
+ }
+ oscSeenEsc = (ch == Constants.ESC);
+ state = INCOMPLETE;
+ return;
+ }
+ if (ch == 0x07) {
+ state = ANSI;
+ ansiMode = AnsiMode.NONE;
+ return;
+ }
+ oscSeenEsc = (ch == Constants.ESC);
+ state = INCOMPLETE;
+ }
+ }
+
+ /**
+ * Hook for subclasses to handle non-ANSI characters. Base implementation sets state to ERROR.
+ *
+ * @param c the character value to handle
+ */
+ protected void handleNonAnsi(int c) {
+ state = ERROR;
+ }
+
+ /**
+ * Hook for subclasses to check if a non-ANSI character can be pushed. Base implementation
+ * returns false.
+ *
+ * @param c the character value to check
+ * @return true if the character can be pushed
+ */
+ protected boolean canPushNonAnsi(int c) {
+ return false;
+ }
+
+ /** Hook for subclasses to perform finalization of non-ANSI sequences. */
+ protected void finishNonAnsi() {
+ // Base implementation does nothing
+ }
+
+ /** Hook for subclasses to reset non-ANSI state. */
+ protected void resetNonAnsi() {
+ // Base implementation does nothing
+ }
+}
diff --git a/twinkle-text/src/main/java/org/codejive/twinkle/text/SequenceIterator.java b/twinkle-text/src/main/java/org/codejive/twinkle/text/SequenceIterator.java
index 6c495c8..6b4f103 100644
--- a/twinkle-text/src/main/java/org/codejive/twinkle/text/SequenceIterator.java
+++ b/twinkle-text/src/main/java/org/codejive/twinkle/text/SequenceIterator.java
@@ -39,12 +39,6 @@ public interface SequenceIterator {
/** Returns the full sequence of the last returned codepoint from {@link #next()}. */
String sequence();
- /** Returns the start index of the current sequence in characters. */
- int begin();
-
- /** Returns the end index of the current sequence in characters. */
- int end();
-
static SequenceIterator of(CharSequence text) {
return new CharSequenceSequenceIterator(text);
}
@@ -104,7 +98,7 @@ protected int calculateWidth(int cp) {
class CharSequenceSequenceIterator extends BaseSequenceIterator {
private final CharSequence text;
private final int length;
- private final SequenceDecoder decoder = new SequenceDecoder();
+ private final UnicodeDecoder decoder = new UnicodeDecoder();
private int cursor = 0;
private int sequenceStart = 0;
@@ -144,16 +138,6 @@ public String sequence() {
return text.subSequence(sequenceStart, sequenceEnd).toString();
}
- @Override
- public int begin() {
- return sequenceStart;
- }
-
- @Override
- public int end() {
- return sequenceEnd;
- }
-
private void primeNext() {
if (cursor >= length) {
nextLeadCodePoint = -1;
@@ -184,12 +168,12 @@ private void primeNext() {
cursor += cpChars;
}
- if (cursor >= length || decoder.state() == SequenceDecoder.State.INCOMPLETE) {
+ if (cursor >= length || decoder.state() == UnicodeDecoder.INCOMPLETE) {
decoder.finish();
}
sequenceEnd = cursor;
- if (decoder.state() == SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE) {
+ if (decoder.state() == UnicodeDecoder.ANSI) {
nextLeadCodePoint = Constants.ESC;
currentWidth = 0;
} else {
@@ -212,12 +196,10 @@ private void primeNext() {
class ReaderSequenceIterator extends BaseSequenceIterator {
private final PushbackReader reader;
private final StringBuilder currentSequence = new StringBuilder();
- private final SequenceDecoder decoder = new SequenceDecoder();
+ private final UnicodeDecoder decoder = new UnicodeDecoder();
private int nextLeadCodePoint = -1;
private boolean primed = false;
private boolean exhausted = false;
- private int position = 0;
- private int sequenceStart = 0;
/** Creates a SequenceIterator that reads from the given Reader. */
ReaderSequenceIterator(Reader reader) {
@@ -251,21 +233,8 @@ public String sequence() {
return currentSequence.toString();
}
- /** Returns the start index of the current sequence in characters. */
- @Override
- public int begin() {
- return sequenceStart;
- }
-
- /** Returns the end index of the current sequence in characters. */
- @Override
- public int end() {
- return sequenceStart + currentSequence.length();
- }
-
private void primeNext() {
currentSequence.setLength(0);
- sequenceStart = position;
nextLeadCodePoint = -1;
try {
@@ -299,12 +268,12 @@ private void primeNext() {
}
if (currentSequence.length() > 0
- && (cp == -1 || decoder.state() == SequenceDecoder.State.INCOMPLETE)) {
+ && (cp == -1 || decoder.state() == UnicodeDecoder.INCOMPLETE)) {
decoder.finish();
}
if (currentSequence.length() > 0 && nextLeadCodePoint == -1) {
- if (decoder.state() == SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE) {
+ if (decoder.state() == UnicodeDecoder.ANSI) {
nextLeadCodePoint = Constants.ESC;
currentWidth = 0;
} else {
@@ -340,15 +309,10 @@ private void unreadCodePoint(int cp) throws IOException {
}
private int read() throws IOException {
- int c = reader.read();
- if (c != -1) {
- position++;
- }
- return c;
+ return reader.read();
}
private void unread(int c) throws IOException {
reader.unread(c);
- position--;
}
}
diff --git a/twinkle-text/src/main/java/org/codejive/twinkle/text/StyledIterator.java b/twinkle-text/src/main/java/org/codejive/twinkle/text/StyledIterator.java
index a69f333..d5d3198 100644
--- a/twinkle-text/src/main/java/org/codejive/twinkle/text/StyledIterator.java
+++ b/twinkle-text/src/main/java/org/codejive/twinkle/text/StyledIterator.java
@@ -71,16 +71,6 @@ public int width() {
return delegate.width();
}
- @Override
- public int begin() {
- return delegate.begin();
- }
-
- @Override
- public int end() {
- return delegate.end();
- }
-
@Override
public String sequence() {
return delegate.sequence();
diff --git a/twinkle-text/src/main/java/org/codejive/twinkle/text/SequenceDecoder.java b/twinkle-text/src/main/java/org/codejive/twinkle/text/UnicodeDecoder.java
similarity index 54%
rename from twinkle-text/src/main/java/org/codejive/twinkle/text/SequenceDecoder.java
rename to twinkle-text/src/main/java/org/codejive/twinkle/text/UnicodeDecoder.java
index 34851dc..c26fcdf 100644
--- a/twinkle-text/src/main/java/org/codejive/twinkle/text/SequenceDecoder.java
+++ b/twinkle-text/src/main/java/org/codejive/twinkle/text/UnicodeDecoder.java
@@ -1,40 +1,28 @@
package org.codejive.twinkle.text;
import org.codejive.twinkle.ansi.Constants;
+import org.codejive.twinkle.ansi.util.AnsiDecoder;
/**
- * A utility class for decoding sequences of characters. Sequences can be built up by pushing
- * characters into the decoder while it's state is INCOMPLETE. Once the decoder has enough
- * information to determine the type of sequence (e.g. a codepoint, a grapheme cluster, or an ANSI
- * escape sequence), the state will change to the corresponding type. The decoder can then be
- * queried for the result. reset() can be used to clear the decoder and start building
- * a new sequence.
+ * A decoder for character sequences including Unicode codepoints, grapheme clusters, and ANSI
+ * escape sequences. Extends {@link AnsiDecoder} to add support for proper Unicode handling
+ * including surrogate pairs, grapheme cluster boundaries, and extended grapheme cluster rules.
+ *
+ *
Sequences can be built up by pushing characters into the decoder while its state is
+ * INCOMPLETE. Once the decoder has enough information to determine the type of sequence (e.g. a
+ * codepoint, a grapheme cluster, or an ANSI escape sequence), the state will change to the
+ * corresponding type. The decoder can then be queried for the result. reset() can be
+ * used to clear the decoder and start building a new sequence.
*/
-public class SequenceDecoder {
- public enum State {
- INCOMPLETE,
- CODEPOINT,
- GRAPHEME_CLUSTER,
- ANSI_ESCAPE_SEQUENCE,
- ERROR
- }
-
- private enum AnsiMode {
- NONE,
- PREFIX,
- CSI,
- OSC
- }
+public class UnicodeDecoder extends AnsiDecoder {
+ // Additional state constants for Unicode handling
+ public static final int CODEPOINT = 10;
+ public static final int GRAPHEME_CLUSTER = 11;
private static final int NEWLINE = '\n';
private static final int CARRIAGE_RETURN = '\r';
- private final StringBuilder buffer = new StringBuilder();
-
- private State state = State.INCOMPLETE;
- private AnsiMode ansiMode = AnsiMode.NONE;
private char pendingHighSurrogate = 0;
- private boolean oscSeenEsc = false;
private int riCount = 0;
private boolean pendingCarriageReturn = false;
private int firstCodepoint = -1;
@@ -44,196 +32,119 @@ private enum AnsiMode {
/**
* Pushes either a Unicode code point or a UTF-16 code unit encoded as an int.
*
- *
Values in the surrogate range are treated as UTF-16 code units and paired using internal - * pending-surrogate state. + *
Extends the base implementation to handle UTF-16 surrogate pairs and Unicode codepoint + * sequences properly. */ + @Override public void push(int cp) { + // Handle UTF-16 surrogate pairs if (pendingHighSurrogate != 0) { if (!canPush(cp)) { - state = State.ERROR; + state = ERROR; return; } char low = (char) cp; buffer.append(low); int codepoint = Character.toCodePoint(pendingHighSurrogate, low); pendingHighSurrogate = 0; - pushCodepoint(codepoint); + handleNonAnsi(codepoint); return; } if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) { if (!canPush(cp)) { - state = State.ERROR; + state = ERROR; return; } buffer.append((char) cp); pendingHighSurrogate = (char) cp; - state = State.INCOMPLETE; - return; - } - - if (!canPush(cp)) { - state = State.ERROR; - return; - } - - if (Character.isSupplementaryCodePoint(cp)) { - buffer.append(Character.toChars(cp)); - } else { - buffer.append((char) cp); - } - - if (pendingCarriageReturn) { - pendingCarriageReturn = false; - state = State.CODEPOINT; - return; - } - - if (ansiMode != AnsiMode.NONE) { - char[] chars = Character.toChars(cp); - for (int i = 0; i < chars.length; i++) { - pushAnsi(chars[i]); - if (state == State.ERROR || state == State.ANSI_ESCAPE_SEQUENCE) { - break; - } - } - return; - } - - if (cp == Constants.ESC) { - pushAnsi((char) cp); + state = INCOMPLETE; return; } - if (cp == CARRIAGE_RETURN) { - firstCodepoint = NEWLINE; - lastCodepoint = NEWLINE; - codepointCount = 1; - state = State.INCOMPLETE; - pendingCarriageReturn = true; - return; - } - - if (cp == NEWLINE) { - firstCodepoint = NEWLINE; - lastCodepoint = NEWLINE; - codepointCount = 1; - state = State.CODEPOINT; - return; - } - - pushCodepoint(cp); + // Delegate to base class for ANSI handling and standard processing + super.push(cp); } /** * Returns true if {@code cp} can be consumed as part of the currently decoded sequence. * - *
This is a non-mutating probe. Callers can use it to detect sequence boundaries without - * relying on completion heuristics. - * - *
Like {@link #push(int)}, this accepts either Unicode code points or UTF-16 code units - * encoded as ints. + *
Extends the base implementation to handle UTF-16 surrogate pairs and grapheme cluster + * boundaries. */ + @Override public boolean canPush(int cp) { - if (state == State.ERROR || state == State.ANSI_ESCAPE_SEQUENCE) { + if (state == ERROR || state == ANSI) { return false; } + // Handle UTF-16 surrogate pairs if (pendingHighSurrogate != 0) { if (cp < Character.MIN_LOW_SURROGATE || cp > Character.MAX_LOW_SURROGATE) { return false; } int codepoint = Character.toCodePoint(pendingHighSurrogate, (char) cp); - return canPushCodepoint(codepoint); + return canPushNonAnsi(codepoint); } if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) { if (ansiMode != AnsiMode.NONE || buffer.length() == 0) { return true; } - return canPushCodepoint(0x10000); + return canPushNonAnsi(0x10000); } if (cp >= Character.MIN_LOW_SURROGATE && cp <= Character.MAX_LOW_SURROGATE) { return false; } - if (!Character.isValidCodePoint(cp)) { - return false; - } - - if (pendingCarriageReturn) { - return cp == NEWLINE; - } - - if (ansiMode != AnsiMode.NONE) { - return true; - } - - if (buffer.length() == 0) { - return true; - } - - if (cp == Constants.ESC) { - return false; - } - - return canPushCodepoint(cp); + // Delegate to base class + return super.canPush(cp); } /** * Finalizes pending state when no more input is available. * - *
This resolves incomplete CR line endings as newline sequences and resolves unterminated - * ANSI escapes as ANSI sequences, matching iterator semantics at end of input. + *
Extends the base implementation to resolve incomplete CR line endings and validate + * surrogate pairs. */ + @Override public void finish() { - if (state == State.ERROR) { + if (state == ERROR) { return; } if (pendingHighSurrogate != 0) { - state = State.ERROR; + state = ERROR; return; } if (pendingCarriageReturn) { pendingCarriageReturn = false; - state = State.CODEPOINT; + state = CODEPOINT; return; } - if (ansiMode != AnsiMode.NONE) { - ansiMode = AnsiMode.NONE; - state = State.ANSI_ESCAPE_SEQUENCE; - } + // Delegate to base class for ANSI finalization + super.finish(); } + /** Resets the decoder to its initial state, clearing all accumulated data. */ + @Override public void reset() { - buffer.setLength(0); - state = State.INCOMPLETE; - ansiMode = AnsiMode.NONE; + super.reset(); pendingHighSurrogate = 0; pendingCarriageReturn = false; - oscSeenEsc = false; - riCount = 0; - firstCodepoint = -1; - lastCodepoint = -1; - codepointCount = 0; - } - - public boolean isComplete() { - return state() != State.INCOMPLETE; } /** - * Returns true when the current sequence can be emitted as-is. + * Returns true if the decoder has completed a sequence and can be emitted as-is. * *
Unlike {@link #isComplete()}, this reports false for tails that are syntactically * extendable and usually require continuation (for example trailing ZWJ/virama/prepend). */ public boolean isReady() { - if (state == State.ERROR || state == State.INCOMPLETE) { + if (state == ERROR || state == INCOMPLETE) { return false; } - if (state == State.ANSI_ESCAPE_SEQUENCE) { + if (state == ANSI) { return true; } if (pendingHighSurrogate != 0 || pendingCarriageReturn || codepointCount == 0) { @@ -245,10 +156,6 @@ public boolean isReady() { || Unicode.isPrepend(lastCodepoint)); } - public State state() { - return state; - } - /** * Returns the lead code point for the decoded sequence. * @@ -256,10 +163,10 @@ public State state() { * "\r"} or {@code "\r\n"}. Returns -1 while incomplete or in error. */ public int codepoint() { - if (!isComplete() || state == State.ERROR) { + if (!isComplete() || state == ERROR) { return -1; } - if (state == State.ANSI_ESCAPE_SEQUENCE) { + if (state == ANSI) { return Constants.ESC; } return codepointCount == 0 ? -1 : firstCodepoint; @@ -274,86 +181,70 @@ public int codepoint() { * @return the visual column width, or -1 while incomplete or in error state */ public int width() { - if (!isComplete() || state == State.ERROR) { + if (!isComplete() || state == ERROR) { return -1; } - if (state == State.ANSI_ESCAPE_SEQUENCE) { + if (state == ANSI) { return 0; } return codepointCount == 0 ? -1 : calculateWidth(firstCodepoint); } + /** + * Implements the hook for handling non-ANSI characters. This method handles Unicode codepoints + * including newlines and grapheme cluster formation. + */ @Override - public String toString() { - return buffer.toString(); - } - - private void pushAnsi(char ch) { - if (state == State.ANSI_ESCAPE_SEQUENCE) { - state = State.ERROR; + protected void handleNonAnsi(int cp) { + if (pendingCarriageReturn) { + pendingCarriageReturn = false; + state = CODEPOINT; return; } - if (ansiMode == AnsiMode.NONE) { - if (ch == Constants.ESC) { - ansiMode = AnsiMode.PREFIX; - state = State.INCOMPLETE; - return; - } - state = State.ERROR; + if (cp == CARRIAGE_RETURN) { + firstCodepoint = NEWLINE; + lastCodepoint = NEWLINE; + codepointCount = 1; + state = INCOMPLETE; + pendingCarriageReturn = true; return; } - if (ansiMode == AnsiMode.PREFIX) { - if (ch == '[') { - ansiMode = AnsiMode.CSI; - state = State.INCOMPLETE; - } else if (ch == ']') { - ansiMode = AnsiMode.OSC; - state = State.INCOMPLETE; - oscSeenEsc = false; - } else { - state = State.ANSI_ESCAPE_SEQUENCE; - ansiMode = AnsiMode.NONE; - } + if (cp == NEWLINE) { + firstCodepoint = NEWLINE; + lastCodepoint = NEWLINE; + codepointCount = 1; + state = CODEPOINT; return; } - if (ansiMode == AnsiMode.CSI) { - if (ch >= 0x40 && ch <= 0x7E) { - state = State.ANSI_ESCAPE_SEQUENCE; - ansiMode = AnsiMode.NONE; - } else { - state = State.INCOMPLETE; - } - return; - } + pushCodepoint(cp); + } - if (ansiMode == AnsiMode.OSC) { - if (oscSeenEsc) { - if (ch == '\\') { - state = State.ANSI_ESCAPE_SEQUENCE; - ansiMode = AnsiMode.NONE; - oscSeenEsc = false; - return; - } - oscSeenEsc = (ch == Constants.ESC); - state = State.INCOMPLETE; - return; - } - if (ch == 0x07) { - state = State.ANSI_ESCAPE_SEQUENCE; - ansiMode = AnsiMode.NONE; - return; - } - oscSeenEsc = (ch == Constants.ESC); - state = State.INCOMPLETE; + /** Implements the hook for checking if a non-ANSI character can be pushed. */ + @Override + protected boolean canPushNonAnsi(int cp) { + if (pendingCarriageReturn) { + return cp == NEWLINE; } + return canPushCodepoint(cp); + } + + /** Implements the hook for resetting Unicode-specific state. */ + @Override + protected void resetNonAnsi() { + pendingHighSurrogate = 0; + pendingCarriageReturn = false; + riCount = 0; + firstCodepoint = -1; + lastCodepoint = -1; + codepointCount = 0; } private void pushCodepoint(int cp) { - if (state == State.ANSI_ESCAPE_SEQUENCE) { - state = State.ERROR; + if (state == ANSI) { + state = ERROR; return; } @@ -362,19 +253,19 @@ private void pushCodepoint(int cp) { lastCodepoint = cp; codepointCount = 1; riCount = Unicode.isRegionalIndicator(cp) ? 1 : 0; - state = State.CODEPOINT; + state = CODEPOINT; return; } if (shouldBreak(lastCodepoint, cp, riCount)) { - state = State.ERROR; + state = ERROR; return; } lastCodepoint = cp; codepointCount++; riCount = Unicode.isRegionalIndicator(cp) ? riCount + 1 : 0; - state = State.GRAPHEME_CLUSTER; + state = GRAPHEME_CLUSTER; } private boolean canPushCodepoint(int cp) { diff --git a/twinkle-text/src/test/java/org/codejive/twinkle/text/TestSequenceDecoder.java b/twinkle-text/src/test/java/org/codejive/twinkle/text/TestSequenceDecoder.java index aee184c..bdb3792 100644 --- a/twinkle-text/src/test/java/org/codejive/twinkle/text/TestSequenceDecoder.java +++ b/twinkle-text/src/test/java/org/codejive/twinkle/text/TestSequenceDecoder.java @@ -8,109 +8,109 @@ public class TestSequenceDecoder { @Test public void testSimpleCodepoint() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); assertThat(decoder.isComplete()).isFalse(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); decoder.push('A'); assertThat(decoder.isComplete()).isTrue(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.width()).isEqualTo(1); assertThat(decoder.toString()).isEqualTo("A"); } @Test public void testSurrogatePair() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); String clef = "\uD834\uDD1E"; decoder.push(clef.charAt(0)); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); decoder.push(clef.charAt(1)); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.toString()).isEqualTo(clef); } @Test public void testCombiningMarkUpgradesToGraphemeCluster() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('a'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.canPush('\u0301')).isTrue(); assertThat(decoder.canPush('b')).isFalse(); decoder.push('\u0301'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.width()).isEqualTo(1); assertThat(decoder.toString()).isEqualTo("a\u0301"); } @Test public void testAnsiCsiSequence() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); String csi = "\u001B[31m"; for (int i = 0; i < csi.length() - 1; i++) { decoder.push(csi.charAt(i)); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); } decoder.push(csi.charAt(csi.length() - 1)); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ANSI); assertThat(decoder.width()).isEqualTo(0); assertThat(decoder.toString()).isEqualTo(csi); } @Test public void testAnsiOscSequenceWithStTerminator() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); String osc = "\u001B]8;;http://example.com\u001B\\"; for (int i = 0; i < osc.length() - 1; i++) { decoder.push(osc.charAt(i)); } - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); decoder.push(osc.charAt(osc.length() - 1)); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ANSI); assertThat(decoder.toString()).isEqualTo(osc); } @Test public void testInvalidSurrogateGoesToError() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('\uD834'); decoder.push('x'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ERROR); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ERROR); assertThat(decoder.width()).isEqualTo(-1); } @Test public void testResetClearsState() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('A'); assertThat(decoder.isComplete()).isTrue(); decoder.reset(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); assertThat(decoder.width()).isEqualTo(-1); assertThat(decoder.toString()).isEmpty(); } @Test public void testCanPushDetectsBoundaryCharacters() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); assertThat(decoder.canPush('x')).isTrue(); decoder.push('x'); @@ -121,12 +121,12 @@ public void testCanPushDetectsBoundaryCharacters() { assertThat(decoder.canPush('\u001B')).isFalse(); // Probe does not mutate state. - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); } @Test public void testCanPushDuringAnsiAndAfterCompletion() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); assertThat(decoder.canPush('\u001B')).isTrue(); decoder.push('\u001B'); @@ -139,13 +139,13 @@ public void testCanPushDuringAnsiAndAfterCompletion() { assertThat(decoder.canPush('m')).isTrue(); decoder.push('m'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ANSI); assertThat(decoder.canPush('x')).isFalse(); } @Test public void testFamilySequenceReadinessAndCanPushBoundaries() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); String man = "\uD83D\uDC68"; String woman = "\uD83D\uDC69"; @@ -181,27 +181,27 @@ public void testFamilySequenceReadinessAndCanPushBoundaries() { pushStringAssertingCanPush(decoder, boy); assertThat(decoder.isReady()).isTrue(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.toString()) .isEqualTo(man + joiner + woman + joiner + girl + joiner + boy); } @Test public void testPushCodepointOverloadWithSupplementary() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); int man = 0x1F468; assertThat(decoder.canPush(man)).isTrue(); decoder.push(man); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.toString()).isEqualTo(new String(Character.toChars(man))); assertThat(decoder.isReady()).isTrue(); } @Test public void testCanPushCodepointOverloadForFamilyJoin() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push(0x1F468); assertThat(decoder.canPush(0x1F469)).isFalse(); @@ -212,12 +212,12 @@ public void testCanPushCodepointOverloadForFamilyJoin() { assertThat(decoder.canPush(0x1F469)).isTrue(); decoder.push(0x1F469); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); } @Test public void testInvalidCodepointOverloadInput() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); assertThat(decoder.canPush(-1)).isFalse(); assertThat(decoder.canPush(0x110000)).isFalse(); @@ -225,10 +225,10 @@ public void testInvalidCodepointOverloadInput() { assertThat(decoder.canPush(0xDC00)).isFalse(); decoder.push(0x110000); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ERROR); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ERROR); } - private static void pushStringAssertingCanPush(SequenceDecoder decoder, String value) { + private static void pushStringAssertingCanPush(UnicodeDecoder decoder, String value) { for (int i = 0; i < value.length(); i++) { char ch = value.charAt(i); assertThat(decoder.canPush(ch)).isTrue(); @@ -238,15 +238,15 @@ private static void pushStringAssertingCanPush(SequenceDecoder decoder, String v @Test public void testFinishLoneCrAtEofYieldsNewline() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('\r'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); assertThat(decoder.isReady()).isFalse(); decoder.finish(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.toString()).isEqualTo("\r"); assertThat(decoder.codepoint()).isEqualTo('\n'); assertThat(decoder.width()).isEqualTo(0); @@ -254,104 +254,104 @@ public void testFinishLoneCrAtEofYieldsNewline() { @Test public void testFinishAfterCrLfHasNoEffect() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); // CR+LF is already complete — finish() should leave state unchanged. decoder.push('\r'); decoder.push('\n'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); decoder.finish(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.toString()).isEqualTo("\r\n"); assertThat(decoder.codepoint()).isEqualTo('\n'); } @Test public void testFinishUnterminatedCsiYieldsAnsiSequence() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); // Push ESC [ 3 1 — missing final byte 'm'. decoder.push('\u001B'); decoder.push('['); decoder.push('3'); decoder.push('1'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); decoder.finish(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ANSI); assertThat(decoder.toString()).isEqualTo("\u001B[31"); assertThat(decoder.width()).isEqualTo(0); } @Test public void testFinishUnterminatedOscYieldsAnsiSequence() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); // Push ESC ] 0 ; T i t l e — no BEL or ST terminator. String osc = "\u001B]0;Title"; for (int i = 0; i < osc.length(); i++) { decoder.push(osc.charAt(i)); } - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); decoder.finish(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ANSI); assertThat(decoder.toString()).isEqualTo(osc); assertThat(decoder.width()).isEqualTo(0); } @Test public void testFinishPendingHighSurrogateYieldsError() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('\uD834'); // high surrogate, low surrogate never arrives - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); decoder.finish(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ERROR); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ERROR); } @Test public void testFinishOnCompleteStateIsNoOp() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('A'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); decoder.finish(); // Already complete — finish() must not change state or codepoints. - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.toString()).isEqualTo("A"); } @Test public void testFinishOnErrorStateIsNoOp() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push(0x110000); // invalid code point → ERROR - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ERROR); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ERROR); decoder.finish(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.ERROR); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.ERROR); } @Test public void testFinishOnEmptyDecoderIsNoOp() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); decoder.finish(); // Nothing was pushed — finish() on an empty decoder should leave it INCOMPLETE. - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.INCOMPLETE); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.INCOMPLETE); } // ------------------------------------------------------------------------- @@ -363,14 +363,14 @@ public void testFinishOnEmptyDecoderIsNoOp() { public void testVS16StaysAttachedToBase() { // ☎ + VS16: the variation selector must NOT cause a break — the decoder // should yield a single GRAPHEME_CLUSTER, not split into two CODEPOINTs. - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('\u260E'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.CODEPOINT); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.CODEPOINT); assertThat(decoder.canPush('\uFE0F')).isTrue(); decoder.push('\uFE0F'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.toString()).isEqualTo("\u260E\uFE0F"); // Width should be wide because of VS16 assertThat(Unicode.isWide(decoder.toString())).isTrue(); @@ -379,13 +379,13 @@ public void testVS16StaysAttachedToBase() { @Test public void testVS15StaysAttachedToBase() { // ☎ + VS15: variation selector must not cause a break either - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('\u260E'); assertThat(decoder.canPush('\uFE0E')).isTrue(); decoder.push('\uFE0E'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.toString()).isEqualTo("\u260E\uFE0E"); // Width should be narrow because of VS15 assertThat(Unicode.isWide(decoder.toString())).isFalse(); @@ -394,7 +394,7 @@ public void testVS15StaysAttachedToBase() { @Test public void testVS16DoesNotAttachToSecondCodepoint() { // After a complete codepoint, a VS on a *different* base must not attach to the first - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('A'); assertThat(decoder.canPush('\uFE0F')).isFalse(); } @@ -405,14 +405,14 @@ public void testVS16DoesNotAttachToSecondCodepoint() { @Test public void testTwoRegionalIndicatorsFormSingleCluster() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); pushString(decoder, new String(Character.toChars(0x1F1FA))); // 🇺 assertThat(decoder.isReady()).isTrue(); assertThat(decoder.canPush(0x1F1F8)).isTrue(); // 🇸 can attach decoder.push(0x1F1F8); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.isReady()).isTrue(); // A third regional indicator must NOT attach (would start a new flag) assertThat(decoder.canPush(0x1F1FA)).isFalse(); @@ -422,7 +422,7 @@ public void testTwoRegionalIndicatorsFormSingleCluster() { @Test public void testThirdRegionalIndicatorDoesNotAttach() { // Verify riCount logic: after 2 RI, a 3rd must break - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push(0x1F1FA); decoder.push(0x1F1F8); assertThat(decoder.canPush(0x1F1FA)).isFalse(); @@ -434,7 +434,7 @@ public void testThirdRegionalIndicatorDoesNotAttach() { @Test public void testZwjPreventsBreak() { - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push(0x1F468); // 👨 assertThat(decoder.canPush(Unicode.ZWJ)).isTrue(); decoder.push(Unicode.ZWJ); @@ -443,7 +443,7 @@ public void testZwjPreventsBreak() { assertThat(decoder.canPush(0x1F469)).isTrue(); // 👩 can follow decoder.push(0x1F469); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.isReady()).isTrue(); } @@ -454,12 +454,12 @@ public void testZwjPreventsBreak() { @Test public void testNonSpacingMarkStaysAttached() { // Combining grave accent (U+0300) is NON_SPACING_MARK — must not break - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('a'); assertThat(decoder.canPush('\u0300')).isTrue(); decoder.push('\u0300'); - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.toString()).isEqualTo("a\u0300"); assertThat(Unicode.isWide(decoder.toString())).isFalse(); } @@ -467,12 +467,12 @@ public void testNonSpacingMarkStaysAttached() { @Test public void testCombiningSpacingMarkStaysAttached() { // Devanagari vowel sign AA (U+093E) is COMBINING_SPACING_MARK - SequenceDecoder decoder = new SequenceDecoder(); + UnicodeDecoder decoder = new UnicodeDecoder(); decoder.push('\u0915'); // क assertThat(decoder.canPush('\u093E')).isTrue(); decoder.push('\u093E'); // ा - assertThat(decoder.state()).isEqualTo(SequenceDecoder.State.GRAPHEME_CLUSTER); + assertThat(decoder.state()).isEqualTo(UnicodeDecoder.GRAPHEME_CLUSTER); assertThat(decoder.toString()).isEqualTo("\u0915\u093E"); } @@ -480,7 +480,7 @@ public void testCombiningSpacingMarkStaysAttached() { // Helper // ------------------------------------------------------------------------- - private static void pushString(SequenceDecoder decoder, String s) { + private static void pushString(UnicodeDecoder decoder, String s) { for (int i = 0; i < s.length(); i++) { decoder.push(s.charAt(i)); }