package workflow import ( "math" "slices" "strconv" "strings" "unicode" ) // TokenKind represents the type of token returned by the lexer. // The values mirror the C# TokenKind enum. // // Note: The names are kept identical to the C# implementation for // easier mapping when porting the parser. // // The lexer is intentionally simple – it only tokenises the subset of // expressions that are used in GitHub Actions workflow `if:` expressions. // It does not evaluate the expression – that is left to the parser. type TokenKind int const ( TokenKindStartGroup TokenKind = iota TokenKindStartIndex TokenKindEndGroup TokenKindEndIndex TokenKindSeparator TokenKindDereference TokenKindWildcard TokenKindLogicalOperator TokenKindNumber TokenKindString TokenKindBoolean TokenKindNull TokenKindPropertyName TokenKindFunction TokenKindNamedValue TokenKindStartParameters TokenKindEndParameters TokenKindUnexpected ) // Token represents a single lexical token. // Raw holds the original text, Value holds the parsed value when applicable. // Index is the start position in the source string. // // The struct is intentionally minimal – it only contains what the parser // needs. If you need more information (e.g. token length) you can add it. type Token struct { Kind TokenKind Raw string Value interface{} Index int } // Lexer holds the state while tokenising an expression. // It is a direct port of the C# LexicalAnalyzer. // // Flags can be used to enable/disable features – for now we only support // a single flag that mirrors ExpressionFlags.DTExpressionsV1. // // The lexer is not thread‑safe – reuse a single instance per expression. type Lexer struct { expr string flags int index int last *Token stack []TokenKind // unclosed start tokens } // NewLexer creates a new lexer for the given expression. func NewLexer(expr string, flags int) *Lexer { return &Lexer{expr: expr, flags: flags} } func testTokenBoundary(c rune) bool { switch c { case '(', '[', ')', ']', ',', '.', '!', '>', '<', '=', '&', '|': return true default: return unicode.IsSpace(c) } } // Next returns the next token or nil if the end of the expression is reached. func (l *Lexer) Next() *Token { // Skip whitespace for l.index < len(l.expr) && unicode.IsSpace(rune(l.expr[l.index])) { l.index++ } if l.index >= len(l.expr) { return nil } c := l.expr[l.index] switch c { case '(': l.index++ // Function call or logical grouping if l.last != nil && l.last.Kind == TokenKindFunction { return l.createToken(TokenKindStartParameters, "(") } if l.flags&FlagV1 != 0 { // V1 does not support grouping – treat as unexpected return l.createToken(TokenKindUnexpected, "(") } return l.createToken(TokenKindStartGroup, "(") case '[': l.index++ return l.createToken(TokenKindStartIndex, "[") case ')': l.index++ if len(l.stack) > 0 && l.stack[len(l.stack)-1] == TokenKindStartParameters { return l.createToken(TokenKindEndParameters, ")") } return l.createToken(TokenKindEndGroup, ")") case ']': l.index++ return l.createToken(TokenKindEndIndex, "]") case ',': l.index++ return l.createToken(TokenKindSeparator, ",") case '*': l.index++ return l.createToken(TokenKindWildcard, "*") case '\'': return l.readString() case '!', '>', '<', '=', '&', '|': if l.flags&FlagV1 != 0 { l.index++ return l.createToken(TokenKindUnexpected, string(c)) } return l.readOperator() default: return l.defaultNext(c) } } func (l *Lexer) defaultNext(c byte) *Token { if c == '.' { // Could be number or dereference if l.last == nil || l.last.Kind == TokenKindSeparator || l.last.Kind == TokenKindStartGroup || l.last.Kind == TokenKindStartIndex || l.last.Kind == TokenKindStartParameters || l.last.Kind == TokenKindLogicalOperator { return l.readNumber() } l.index++ return l.createToken(TokenKindDereference, ".") } if c == '-' || c == '+' || unicode.IsDigit(rune(c)) { return l.readNumber() } return l.readKeyword() } // Helper to create a token and update lexer state. func (l *Lexer) createToken(kind TokenKind, raw string) *Token { // Token order check if !l.checkLastToken(kind, raw) { // Illegal token sequence return &Token{Kind: TokenKindUnexpected, Raw: raw, Index: l.index} } tok := &Token{Kind: kind, Raw: raw, Index: l.index} l.last = tok // Manage stack for grouping switch kind { case TokenKindStartGroup, TokenKindStartIndex, TokenKindStartParameters: l.stack = append(l.stack, kind) case TokenKindEndGroup, TokenKindEndIndex, TokenKindEndParameters: if len(l.stack) > 0 { l.stack = l.stack[:len(l.stack)-1] } } return tok } // nil last token represented by nil func (l *Lexer) getLastKind() *TokenKind { var lastKind *TokenKind if l.last != nil { lastKind = &l.last.Kind } return lastKind } // checkLastToken verifies that the token sequence is legal based on the last token. func (l *Lexer) checkLastToken(kind TokenKind, raw string) bool { lastKind := l.getLastKind() // Helper to check if lastKind is in allowed list allowed := func(allowedKinds ...TokenKind) bool { return lastKind != nil && slices.Contains(allowedKinds, *lastKind) } // For nil last, we treat as no previous token // Define allowed previous kinds for each token kind switch kind { case TokenKindStartGroup: return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartGroup, TokenKindStartParameters, TokenKindStartIndex, TokenKindLogicalOperator) case TokenKindStartIndex: return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindPropertyName, TokenKindNamedValue) case TokenKindStartParameters: return allowed(TokenKindFunction) case TokenKindEndGroup: return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue) case TokenKindEndIndex: return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue) case TokenKindEndParameters: return allowed(TokenKindStartParameters, TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue) case TokenKindSeparator: return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue) case TokenKindWildcard: return allowed(TokenKindStartIndex, TokenKindDereference) case TokenKindDereference: return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindPropertyName, TokenKindNamedValue) case TokenKindLogicalOperator: if raw == "!" { // "!" return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartGroup, TokenKindStartParameters, TokenKindStartIndex, TokenKindLogicalOperator) } return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue) case TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString: return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartIndex, TokenKindStartGroup, TokenKindStartParameters, TokenKindLogicalOperator) case TokenKindPropertyName: return allowed(TokenKindDereference) case TokenKindFunction, TokenKindNamedValue: return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartIndex, TokenKindStartGroup, TokenKindStartParameters, TokenKindLogicalOperator) default: return true } } // readNumber parses a numeric literal. func (l *Lexer) readNumber() *Token { start := l.index periods := 0 for l.index < len(l.expr) { ch := l.expr[l.index] if ch == '.' { periods++ } if testTokenBoundary(rune(ch)) && ch != '.' { break } l.index++ } raw := l.expr[start:l.index] if len(raw) > 2 { switch raw[:2] { case "0x", "0o": tok := l.createToken(TokenKindNumber, raw) if i, err := strconv.ParseInt(raw, 0, 32); err == nil { tok.Value = float64(i) return tok } } } // Try to parse as float64 var val interface{} = raw if f, err := strconv.ParseFloat(raw, 64); err == nil { val = f } tok := l.createToken(TokenKindNumber, raw) tok.Value = val return tok } // readString parses a single‑quoted string literal. func (l *Lexer) readString() *Token { start := l.index l.index++ // skip opening quote var sb strings.Builder closed := false for l.index < len(l.expr) { ch := l.expr[l.index] l.index++ if ch == '\'' { if l.index < len(l.expr) && l.expr[l.index] == '\'' { // escaped quote sb.WriteByte('\'') l.index++ continue } closed = true break } sb.WriteByte(ch) } raw := l.expr[start:l.index] tok := l.createToken(TokenKindString, raw) if closed { tok.Value = sb.String() } else { tok.Kind = TokenKindUnexpected } return tok } // readOperator parses logical operators (==, !=, >, >=, etc.). func (l *Lexer) readOperator() *Token { start := l.index l.index++ if l.index < len(l.expr) { two := l.expr[start : l.index+1] switch two { case "!=", ">=", "<=", "==", "&&", "||": l.index++ return l.createToken(TokenKindLogicalOperator, two) } } ch := l.expr[start] switch ch { case '!', '>', '<': return l.createToken(TokenKindLogicalOperator, string(ch)) } return l.createToken(TokenKindUnexpected, string(ch)) } // readKeyword parses identifiers, booleans, null, etc. func (l *Lexer) readKeyword() *Token { start := l.index for l.index < len(l.expr) && !unicode.IsSpace(rune(l.expr[l.index])) && !strings.ContainsRune("()[],.!<>==&|*", rune(l.expr[l.index])) { l.index++ } raw := l.expr[start:l.index] if l.last != nil && l.last.Kind == TokenKindDereference { return l.createToken(TokenKindPropertyName, raw) } switch raw { case "true": tok := l.createToken(TokenKindBoolean, raw) tok.Value = true return tok case "false": tok := l.createToken(TokenKindBoolean, raw) tok.Value = false return tok case "null": return l.createToken(TokenKindNull, raw) case "NaN": tok := l.createToken(TokenKindNumber, raw) tok.Value = math.NaN() return tok case "Infinity": tok := l.createToken(TokenKindNumber, raw) tok.Value = math.Inf(1) return tok } if l.index < len(l.expr) && l.expr[l.index] == '(' { return l.createToken(TokenKindFunction, raw) } return l.createToken(TokenKindNamedValue, raw) } // Flag constants – only V1 is used for now. const FlagV1 = 1 // UnclosedTokens returns the stack of unclosed start tokens. func (l *Lexer) UnclosedTokens() []TokenKind { return l.stack }