From c2a6ee22208a01e354495b13a603904c6d02552c Mon Sep 17 00:00:00 2001 From: Steve Dignam Date: Fri, 10 Apr 2026 22:49:30 -0400 Subject: [PATCH] playground: add semantic syntax highlighting it's not pretty, but it works --- .oxfmtrc.json | 1 - .vscode/settings.json | 3 + crates/squawk_server/src/lsp_utils.rs | 1 + crates/squawk_wasm/src/lib.rs | 147 +++++++++++++++++++++++++- playground/src/App.tsx | 20 +++- playground/src/providers.tsx | 23 ++++ playground/src/squawk.tsx | 13 +++ 7 files changed, 205 insertions(+), 3 deletions(-) diff --git a/.oxfmtrc.json b/.oxfmtrc.json index d1ce246e..beae7566 100644 --- a/.oxfmtrc.json +++ b/.oxfmtrc.json @@ -6,7 +6,6 @@ "ignorePatterns": [ "build/", "node_modules/", - "playground/", "coverage/", ".venv/", ".mypy_cache/", diff --git a/.vscode/settings.json b/.vscode/settings.json index aed205dc..4991578c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,5 +11,8 @@ }, "[sql]": { "editor.tabSize": 2 + }, + "[typescriptreact]": { + "editor.defaultFormatter": "oxc.oxc-vscode" } } diff --git a/crates/squawk_server/src/lsp_utils.rs b/crates/squawk_server/src/lsp_utils.rs index 88226300..e681cc35 100644 --- a/crates/squawk_server/src/lsp_utils.rs +++ b/crates/squawk_server/src/lsp_utils.rs @@ -242,6 +242,7 @@ pub(crate) fn to_semantic_tokens( prev_start: 0, }; + // Duplicated in squawk-wasm, fyi for token in &*semantic_tokens { // Taken from rust-analyzer, this solves the case where we have a multi // line semantic token which isn't supported by the LSP spec. diff --git a/crates/squawk_wasm/src/lib.rs b/crates/squawk_wasm/src/lib.rs index 468f66ac..05b1a614 100644 --- a/crates/squawk_wasm/src/lib.rs +++ b/crates/squawk_wasm/src/lib.rs @@ -1,16 +1,104 @@ use line_index::LineIndex; use log::info; -use rowan::TextRange; +use rowan::{TextRange, TextSize}; use salsa::Setter; use serde::{Deserialize, Serialize}; use squawk_ide::builtins::builtins_line_index; use squawk_ide::db::{self, Database, File}; use squawk_ide::folding_ranges::{FoldKind, folding_ranges}; use squawk_ide::goto_definition::FileId; +use squawk_ide::semantic_tokens::{SemanticTokenType, semantic_tokens}; use squawk_syntax::ast::AstNode; use wasm_bindgen::prelude::*; use web_sys::js_sys::Error; +const SEMANTIC_TOKEN_TYPES: &[&str] = &[ + "comment", + "function", + "keyword", + "namespace", + "number", + "operator", + "parameter", + "property", + "string", + "struct", + "type", + "variable", +]; + +const SEMANTIC_TOKEN_MODIFIERS: &[&str] = &["declaration", "definition", "readonly"]; + +fn semantic_token_type_name(ty: SemanticTokenType) -> &'static str { + match ty { + SemanticTokenType::Bool | SemanticTokenType::Keyword => "keyword", + SemanticTokenType::Comment => "comment", + SemanticTokenType::Function => "function", + SemanticTokenType::Name | SemanticTokenType::NameRef => "variable", + SemanticTokenType::Number => "number", + SemanticTokenType::Operator | SemanticTokenType::Punctuation => "operator", + SemanticTokenType::Parameter | SemanticTokenType::PositionalParam => "parameter", + SemanticTokenType::String => "string", + SemanticTokenType::Type => "type", + } +} + +fn semantic_token_type_index(ty: SemanticTokenType) -> u32 { + let name = semantic_token_type_name(ty); + SEMANTIC_TOKEN_TYPES + .iter() + .position(|it| *it == name) + .unwrap() as u32 +} + +struct EncodedSemanticToken { + line: u32, + start: u32, + length: u32, + token_type: SemanticTokenType, + modifiers: u32, +} + +struct SemanticTokenEncoder { + data: Vec, + prev_line: u32, + prev_start: u32, +} + +impl SemanticTokenEncoder { + fn with_capacity(token_count: usize) -> Self { + Self { + data: Vec::with_capacity(token_count * 5), + prev_line: 0, + prev_start: 0, + } + } + + fn push(&mut self, token: EncodedSemanticToken) { + let delta_line = token.line - self.prev_line; + let delta_start = if delta_line == 0 { + token.start - self.prev_start + } else { + token.start + }; + + self.data.extend_from_slice(&[ + delta_line, + delta_start, + token.length, + semantic_token_type_index(token.token_type), + token.modifiers, + ]); + + self.prev_line = token.line; + self.prev_start = token.start; + } + + fn finish(self) -> Vec { + self.data + } +} + #[wasm_bindgen(start)] pub fn run() { use log::Level; @@ -429,6 +517,55 @@ impl SquawkDatabase { serde_wasm_bindgen::to_value(&results).map_err(into_error) } + pub fn semantic_tokens(&self) -> Result, Error> { + let file = self.file()?; + let line_index = db::line_index(&self.db, file); + let content = file.content(&self.db); + let tokens = semantic_tokens(&self.db, file, None); + + let mut encoder = SemanticTokenEncoder::with_capacity(tokens.len()); + + // Duplicated from squawk-server, fyi + for token in &tokens { + // Taken from rust-analyzer, this solves the case where we have a + // multi line semantic token which isn't supported by the LSP spec. + // see: https://github.com/rust-lang/rust-analyzer/blob/2efc80078029894eec0699f62ec8d5c1a56af763/crates/rust-analyzer/src/lsp/to_proto.rs#L781C28-L781C28 + for mut text_range in line_index.lines(token.range) { + if content[text_range].ends_with('\n') { + text_range = + TextRange::new(text_range.start(), text_range.end() - TextSize::of('\n')); + } + let start_lc = line_index.line_col(text_range.start()); + let end_lc = line_index.line_col(text_range.end()); + let start_wide = line_index + .to_wide(line_index::WideEncoding::Utf16, start_lc) + .unwrap(); + let end_wide = line_index + .to_wide(line_index::WideEncoding::Utf16, end_lc) + .unwrap(); + + encoder.push(EncodedSemanticToken { + line: start_wide.line, + start: start_wide.col, + length: end_wide.col - start_wide.col, + token_type: token.token_type, + // TODO: once we get modifiers going, we'll need to update this + modifiers: 0, + }); + } + } + + Ok(encoder.finish()) + } + + pub fn semantic_tokens_legend() -> Result { + let legend = SemanticTokensLegend { + token_types: SEMANTIC_TOKEN_TYPES.to_vec(), + token_modifiers: SEMANTIC_TOKEN_MODIFIERS.to_vec(), + }; + serde_wasm_bindgen::to_value(&legend).map_err(into_error) + } + pub fn completion(&self, line: u32, col: u32) -> Result { let file = self.file()?; let line_index = db::line_index(&self.db, file); @@ -656,6 +793,14 @@ struct WasmSelectionRange { end_column: u32, } +#[derive(Serialize)] +struct SemanticTokensLegend { + #[serde(rename = "tokenTypes")] + token_types: Vec<&'static str>, + #[serde(rename = "tokenModifiers")] + token_modifiers: Vec<&'static str>, +} + #[derive(Serialize)] struct WasmCompletionItem { label: String, diff --git a/playground/src/App.tsx b/playground/src/App.tsx index 75fb77dc..103bc1a7 100644 --- a/playground/src/App.tsx +++ b/playground/src/App.tsx @@ -22,6 +22,7 @@ import { provideFoldingRanges, provideSelectionRanges, provideCompletionItems, + semanticTokensProvider, } from "./providers" import BUILTINS_SQL from "./builtins.sql?raw" @@ -48,7 +49,7 @@ const SETTINGS = { value: DEFAULT_CONTENT, language: "pgsql", tabSize: 2, - theme: "vs-dark", + theme: "squawk-dark", minimap: { enabled: false }, automaticLayout: true, scrollBeyondLastLine: false, @@ -64,6 +65,7 @@ const SETTINGS = { renderWhitespace: "boundary", guides: { indentation: false }, lineNumbersMinChars: 3, + "semanticHighlighting.enabled": true, } satisfies monaco.editor.IStandaloneEditorConstructionOptions function clx(...args: (string | undefined | number | false)[]): string { @@ -288,6 +290,15 @@ function registerMonacoProvidersOnce() { return } monacoGlobalProvidersRegistered = true + // vs-dark maps variable to a blue color which makes everything look like a + // keyword. So we use white instead which was what the `foo` in `select 1 foo` + // was before semantic syntax highlighting. + monaco.editor.defineTheme("squawk-dark", { + base: "vs-dark", + inherit: true, + rules: [{ token: "variable", foreground: "D4D4D4" }], + colors: {}, + }) const languageConfig = monaco.languages.setLanguageConfiguration("pgsql", { comments: { lineComment: "--", @@ -473,6 +484,12 @@ function registerMonacoProvidersOnce() { }, ) + const documentSemanticTokensProvider = + monaco.languages.registerDocumentSemanticTokensProvider( + "pgsql", + semanticTokensProvider, + ) + return () => { languageConfig.dispose() codeActionProvider.dispose() @@ -484,6 +501,7 @@ function registerMonacoProvidersOnce() { inlayHintsProvider.dispose() selectionRangeProvider.dispose() completionProvider.dispose() + documentSemanticTokensProvider.dispose() tokenProvider.dispose() } } diff --git a/playground/src/providers.tsx b/playground/src/providers.tsx index 8c8ff07b..81f20ae5 100644 --- a/playground/src/providers.tsx +++ b/playground/src/providers.tsx @@ -9,6 +9,8 @@ import { hover, inlay_hints, selection_ranges, + semantic_tokens, + semantic_tokens_legend, DocumentSymbol, } from "./squawk" @@ -315,6 +317,27 @@ function convertCompletionKind( } } +export const semanticTokensProvider: monaco.languages.DocumentSemanticTokensProvider = + { + getLegend() { + return semantic_tokens_legend() + }, + provideDocumentSemanticTokens(model) { + const content = model.getValue() + const version = model.getVersionId() + if (!content) return null + + try { + const data = semantic_tokens(content, version) + return { data, resultId: undefined } + } catch (e) { + console.error("Error in provideDocumentSemanticTokens:", e) + return null + } + }, + releaseDocumentSemanticTokens() {}, + } + export async function provideCompletionItems( model: monaco.editor.ITextModel, position: monaco.Position, diff --git a/playground/src/squawk.tsx b/playground/src/squawk.tsx index 50269bb9..cde9909f 100644 --- a/playground/src/squawk.tsx +++ b/playground/src/squawk.tsx @@ -121,6 +121,19 @@ export function completion( return getDb(content, version).completion(line, column) } +export interface SemanticTokensLegend { + tokenTypes: string[] + tokenModifiers: string[] +} + +export function semantic_tokens(content: string, version: number): Uint32Array { + return getDb(content, version).semantic_tokens() +} + +export function semantic_tokens_legend(): SemanticTokensLegend { + return SquawkDatabase.semantic_tokens_legend() +} + export function dump_cst(content: string, version: number): string { return getDb(content, version).dump_cst() }