Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions PLAN.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ sql for benchmarks maybe?

https://github.com/tanelpoder/tpt-postgres/blob/main/demos/pqrsafe.sql

- Absurd

https://github.com/earendil-works/absurd/blob/56500e5a23beca5e976f329475063f24692d99cc/sql/absurd.sql

### CLI

from `deno`
Expand Down
2 changes: 1 addition & 1 deletion crates/squawk_parser/tests/data/ok/select_operators.sql
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ select 'null' is not null;
select 1 isnull;

-- notnull (non-standard syntax)
select 'foo' isnull;
select 'foo' notnull;

-- is true
select true is true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2661,7 +2661,7 @@ SOURCE_FILE
LITERAL
STRING "'foo'"
WHITESPACE " "
ISNULL_KW "isnull"
NOTNULL_KW "notnull"
SEMICOLON ";"
WHITESPACE "\n\n"
COMMENT "-- is true"
Expand Down
12 changes: 12 additions & 0 deletions crates/squawk_syntax/src/ast/generated/nodes.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 49 additions & 1 deletion crates/squawk_syntax/src/postgresql.ungram
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,55 @@ IsNot =
'is' 'not'

Op =
'or' | Gteq | '<' | '>' | FatArrow | '=' | 'in' | Neqb | Lteq | '+' | 'overlaps' | 'like' | 'ilike' | NotLike | NotIlike | NotIn | CustomOp | IsDistinctFrom | IsNotDistinctFrom | OperatorCall | 'is' | '^' | '%' | 'and' | '/' | Neq | 'collate' | '-' | ColonEq | ColonColon | 'value' | ':' | IsNot | SimilarTo | NotSimilarTo | AtTimeZone | IsJson | IsJsonValue | IsNotJson | IsJsonObject | IsJsonArray |IsJsonScalar | IsNotJsonValue | IsNotJsonObject | IsNotJsonArray | IsNotJsonScalar
'or'
| '-'
| ':'
| '/'
| '%'
| '^'
| '+'
| '<'
| '='
| '>'
| 'and'
| 'collate'
| 'ilike'
| 'in'
| 'is'
| 'isnull'
| 'like'
| 'overlaps'
| 'value'
| AtTimeZone
| ColonColon
| ColonEq
| CustomOp
| FatArrow
| Gteq
| IsDistinctFrom
| IsJson
| IsJsonArray
| IsJsonObject
| IsJsonScalar
| IsJsonValue
| IsNormalized
| IsNot
| IsNotDistinctFrom
| IsNotJson
| IsNotJsonArray
| IsNotJsonObject
| IsNotJsonScalar
| IsNotJsonValue
| IsNotNormalized
| Lteq
| Neq
| Neqb
| NotIlike
| NotIn
| NotLike
| NotSimilarTo
| OperatorCall
| SimilarTo

IsJson =
'is' 'json' JsonKeysUniqueClause?
Expand Down
99 changes: 88 additions & 11 deletions crates/xtask/src/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -900,22 +900,70 @@ fn generate_nodes(nodes: &[AstNodeSrc], enums: &[AstEnumSrc]) -> String {
format!("{PRELUDE}{output}")
}

fn update_textmate_keywords(all_keywords: &[String]) -> Result<()> {
let tmlanguage_path = project_root().join("squawk-vscode/syntaxes/pgsql.tmLanguage.json");
let content = std::fs::read_to_string(&tmlanguage_path)?;
let mut json: serde_json::Value = serde_json::from_str(&content)?;
// Multi-word keyword phrases that should be highlighted as keywords, not
// operators.
const KEYWORD_PHRASES: &[&str] = &["if not exists", "if exists"];

// Multi-word entries must come before their single-word components so the
// regex engine matches the longest form first.
const KEYWORD_OPERATORS: &[&str] = &[
"not between symmetric",
"is not distinct from",
"between symmetric",
"is distinct from",
"not similar to",
"at time zone",
"not between",
"similar to",
"not ilike",
"not like",
"overlaps",
"between",
"collate",
"notnull",
"is not",
"not in",
"isnull",
"ilike",
"like",
"and",
"not",
"in",
"is",
"or",
];

let mut keywords = all_keywords
fn keyword_phrases_match() -> String {
let patterns: Vec<String> = KEYWORD_PHRASES
.iter()
.map(|k| k.to_lowercase())
.collect::<Vec<_>>();
keywords.sort();
.map(|p| p.replace(' ', "\\s+"))
.collect();
format!("(?i)\\b({})\\b", patterns.join("|"))
}

fn operator_match() -> String {
let operator_patterns: Vec<String> = KEYWORD_OPERATORS
.iter()
.map(|op| op.replace(' ', "\\s+"))
.collect();
format!("(?i)\\b({})\\b", operator_patterns.join("|"))
}

fn keywords_match(all_keywords: &[String]) -> String {
let mut keywords: Vec<String> = all_keywords.iter().map(|k| k.to_lowercase()).collect();
keywords.sort();
let keywords_joined = keywords.join("|");
let match_pattern = format!("(?xi)\\b({keywords_joined})\\b");
format!("(?xi)\\b({keywords_joined})\\b")
}

json["repository"]["keywords"]["patterns"][0]["match"] =
serde_json::Value::String(match_pattern);
fn update_textmate_keywords(all_keywords: &[String]) -> Result<()> {
let tmlanguage_path = project_root().join("squawk-vscode/syntaxes/pgsql.tmLanguage.json");
let content = std::fs::read_to_string(&tmlanguage_path)?;
let mut json: serde_json::Value = serde_json::from_str(&content)?;

json["repository"]["keywords"]["patterns"][0]["match"] = keyword_phrases_match().into();
json["repository"]["keywords"]["patterns"][1]["match"] = operator_match().into();
json["repository"]["keywords"]["patterns"][2]["match"] = keywords_match(all_keywords).into();

let output = serde_json::to_string_pretty(&json)?;
std::fs::write(&tmlanguage_path, format!("{output}\n"))?;
Expand Down Expand Up @@ -964,3 +1012,32 @@ fn generate_tokens(tokens: &[(&'static str, &'static str)]) -> String {
let output = reformat(file.to_string()).replace("#[derive", "\n#[derive");
format!("{PRELUDE}{output}")
}

#[cfg(test)]
mod tests {
use super::*;

#[track_caller]
fn assert_sorted(list: &[&str], name: &str) {
for window in list.windows(2) {
let (a, b) = (window[0], window[1]);
assert!(
a.len() >= b.len(),
"{name} not sorted by length descending: \
{a:?} (len {}) comes before {b:?} (len {})",
a.len(),
b.len(),
);
}
}

#[test]
fn keyword_operators_sorted_by_length_desc() {
assert_sorted(KEYWORD_OPERATORS, "KEYWORD_OPERATORS");
}

#[test]
fn keyword_phrases_sorted_by_length_desc() {
assert_sorted(KEYWORD_PHRASES, "KEYWORD_PHRASES");
}
}
76 changes: 38 additions & 38 deletions squawk-vscode/syntaxes/pgsql.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,32 @@
}
],
"repository": {
"comments": {
"patterns": [
{
"captures": {
"1": {
"name": "punctuation.definition.comment.pgsql"
}
},
"match": "(--).*$\\n?",
"name": "comment.line.double-dash.pgsql"
},
{
"begin": "/\\*",
"captures": {
"0": {
"name": "punctuation.definition.comment.pgsql"
}
},
"end": "\\*/",
"name": "comment.block.c"
}
]
},
"create_entities": {
"patterns": [
{
"match": "(?i)\\b(create)\\s+(or\\s+replace\\s+)?(function|view)\\s+((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))",
"captures": {
"1": {
"name": "keyword.other.create.pgsql"
Expand All @@ -69,10 +91,10 @@
"4": {
"name": "entity.name.function.pgsql"
}
}
},
"match": "(?i)\\b(create)\\s+(or\\s+replace\\s+)?(function|view)\\s+((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))"
},
{
"match": "(?i)\\b(create)\\s+(aggregate|collation|conversion|database|domain|event\\s+trigger|group|(?:unique\\s+)?index|language|operator\\s+class|operator|rule|schema|sequence|(?:(?:global|local)\\s+)?(?:(?:temp|temporary|unlogged)\\s+)?table|tablespace|trigger|type|user|(?:materialized\\s+)?view)\\s+(if\\s+not\\s+exists\\s+)?((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))",
"captures": {
"1": {
"name": "keyword.other.create.pgsql"
Expand All @@ -86,30 +108,8 @@
"4": {
"name": "entity.name.function.pgsql"
}
}
}
]
},
"comments": {
"patterns": [
{
"captures": {
"1": {
"name": "punctuation.definition.comment.pgsql"
}
},
"match": "(--).*$\\n?",
"name": "comment.line.double-dash.pgsql"
},
{
"begin": "/\\*",
"captures": {
"0": {
"name": "punctuation.definition.comment.pgsql"
}
},
"end": "\\*/",
"name": "comment.block.c"
"match": "(?i)\\b(create)\\s+(aggregate|collation|conversion|database|domain|event\\s+trigger|group|(?:unique\\s+)?index|language|operator\\s+class|operator|rule|schema|sequence|(?:(?:global|local)\\s+)?(?:(?:temp|temporary|unlogged)\\s+)?table|tablespace|trigger|type|user|(?:materialized\\s+)?view)\\s+(if\\s+not\\s+exists\\s+)?((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))"
}
]
},
Expand Down Expand Up @@ -145,6 +145,14 @@
},
"keywords": {
"patterns": [
{
"match": "(?i)\\b(if\\s+not\\s+exists|if\\s+exists)\\b",
"name": "keyword.other.pgsql"
},
{
"match": "(?i)\\b(not\\s+between\\s+symmetric|is\\s+not\\s+distinct\\s+from|between\\s+symmetric|is\\s+distinct\\s+from|not\\s+similar\\s+to|at\\s+time\\s+zone|not\\s+between|similar\\s+to|not\\s+ilike|not\\s+like|overlaps|between|collate|notnull|is\\s+not|not\\s+in|isnull|ilike|like|and|not|in|is|or)\\b",
"name": "keyword.operator.pgsql"
},
{
"captures": {
"1": {
Expand All @@ -162,20 +170,12 @@
"name": "constant.numeric.pgsql"
},
{
"match": "\\*",
"name": "keyword.operator.star.pgsql"
},
{
"match": "[!<>]?=|<>|<|>",
"name": "keyword.operator.comparison.pgsql"
},
{
"match": "-|\\+|/",
"name": "keyword.operator.math.pgsql"
"match": "::",
"name": "keyword.operator.cast.pgsql"
},
{
"match": "\\|\\|",
"name": "keyword.operator.concatenator.pgsql"
"match": "[+\\-*/<>=~!@#%\\^&|`?]",
"name": "keyword.operator.pgsql"
}
]
},
Expand Down
Loading