diff --git a/internal/archdocs/pssg/schema/jsonld.go b/internal/archdocs/pssg/schema/jsonld.go index a50f139..eb7f76e 100644 --- a/internal/archdocs/pssg/schema/jsonld.go +++ b/internal/archdocs/pssg/schema/jsonld.go @@ -284,15 +284,19 @@ func MarshalSchemas(schemas ...map[string]interface{}) string { // stepName extracts a short name from an instruction step. func stepName(step string) string { - // Take first sentence + runes := []rune(step) + // Take first sentence if it fits within 80 runes. for _, sep := range []string{". ", ".\n"} { - if idx := strings.Index(step, sep); idx > 0 && idx < 80 { - return step[:idx+1] + if idx := strings.Index(step, sep); idx > 0 { + // idx is a byte offset; compute rune length of the candidate name. + if len([]rune(step[:idx+1])) < 80 { + return step[:idx+1] + } } } - // Truncate if too long - if len(step) > 80 { - return step[:77] + "..." + // Truncate if too long (rune-aware to avoid splitting multi-byte chars). + if len(runes) > 80 { + return string(runes[:77]) + "..." } return step } diff --git a/internal/archdocs/pssg/schema/jsonld_test.go b/internal/archdocs/pssg/schema/jsonld_test.go index 19cf02c..69ede7a 100644 --- a/internal/archdocs/pssg/schema/jsonld_test.go +++ b/internal/archdocs/pssg/schema/jsonld_test.go @@ -2,8 +2,54 @@ package schema import ( "testing" + "unicode/utf8" ) +func TestStepName(t *testing.T) { + // ASCII truncation: step longer than 80 bytes, no sentence break. + long := "" + for i := 0; i < 85; i++ { + long += "a" + } + got := stepName(long) + if len([]rune(got)) != 80 { // 77 + len("...") = 80 + t.Errorf("ASCII truncation: got %d runes, want 80", len([]rune(got))) + } + + // Short sentence extraction. + got = stepName("Mix ingredients. Then bake for 30 minutes.") + if got != "Mix ingredients." { + t.Errorf("short sentence: got %q, want %q", got, "Mix ingredients.") + } + + // Multi-byte truncation: 85 'é' chars (2 bytes each), no period. + // Byte length > 80 but we must truncate at rune boundary. + multiLong := "" + for i := 0; i < 85; i++ { + multiLong += "é" + } + got = stepName(multiLong) + if !utf8.ValidString(got) { + t.Errorf("multi-byte truncation produced invalid UTF-8: %q", got) + } + if len([]rune(got)) != 80 { // 77 runes + "..." + t.Errorf("multi-byte truncation: got %d runes, want 80", len([]rune(got))) + } + + // Multi-byte sentence: 'é' × 79 chars followed by ". rest" + // Sentence rune count = 80 (79 é + 1 period), which is NOT < 80, so falls through. + // Resulting truncation: 85-char total → truncate to 77+... + multiSentence := "" + for i := 0; i < 79; i++ { + multiSentence += "é" + } + multiSentence += ". rest of step" + got = stepName(multiSentence) + if !utf8.ValidString(got) { + t.Errorf("multi-byte sentence truncation produced invalid UTF-8: %q", got) + } +} + func TestParseDurationMinutes(t *testing.T) { cases := []struct { input string