|
Luma 0.1.0
A low-level compiled alternative to C, C++, and more!
|
Recursive descent and Pratt parser for the Zura language. More...
#include <stddef.h>#include "../ast/ast.h"#include "../c_libs/memory/memory.h"#include "../helper/help.h"#include "../lexer/lexer.h"

Go to the source code of this file.
Classes | |
| struct | Parser |
| Parser state holding token stream and current position. More... | |
Macros | |
| #define | CURRENT_TOKEN_LENGTH(parser) ((int)p_current(parser).length) |
| Get the length of the current token's lexeme. | |
| #define | CURRENT_TOKEN_VALUE(parser) (p_current(parser).value) |
| Get the value string of the current token. | |
| #define | MAX_STMT 1024 |
| Maximum allowed size for statements, expressions, and types. | |
| #define | MAX_EXPR 1024 |
| #define | MAX_TYPE 1024 |
Enumerations | |
| enum | BindingPower { BP_NONE = 0 , BP_LOWEST , BP_ASSIGN , BP_TERNARY , BP_LOGICAL_OR , BP_LOGICAL_AND , BP_BITWISE_OR , BP_BITWISE_XOR , BP_BITWISE_AND , BP_EQUALITY , BP_RELATIONAL , BP_RANGE , BP_SHIFT , BP_SUM , BP_PRODUCT , BP_EXPONENT , BP_UNARY , BP_POSTFIX , BP_CALL , BP_PRIMARY } |
| Binding power (precedence) levels for expression parsing. More... | |
Functions | |
| void | parser_error (Parser *psr, const char *error_type, const char *file, const char *msg, int line, int col, int tk_length) |
| Report a parser error with detailed location info. | |
| char * | collect_doc_comments (Parser *parser) |
| Collects consecutive documentation comments before a declaration. | |
| void | consume_doc_comments (Parser *parser) |
| bool | p_has_tokens (Parser *psr) |
| Checks if there are more tokens available for parsing. | |
| Token | p_peek (Parser *psr, size_t offset) |
| Peeks at a token at the specified offset from current position. | |
| Token | p_current (Parser *psr) |
| Gets the current token without advancing the parser position. | |
| Token | p_advance (Parser *psr) |
| Advances to the next token and returns the current token. | |
| Token | p_consume (Parser *psr, LumaTokenType type, const char *error_msg) |
| Consumes a token of the expected type or reports an error. | |
| char * | get_name (Parser *psr) |
| Extracts and duplicates the current token's string value. | |
| Stmt * | parse (GrowableArray *tks, ArenaAllocator *arena, BuildConfig *config) |
| Parses a full program from tokens into an AST of statements. | |
| Expr * | parse_expr (Parser *parser, BindingPower bp) |
| Parses an expression using the Pratt parsing algorithm. | |
| Stmt * | parse_stmt (Parser *parser) |
| Parses a single statement. | |
| Type * | parse_type (Parser *parser) |
| Parses a type annotation. | |
| bool | init_parser_arrays (Parser *parser, GrowableArray *stmts, GrowableArray *modules) |
| const char * | parse_module_declaration (Parser *parser, char **out_module_doc) |
| Expr * | nud (Parser *parser) |
| Null Denotation - handles prefix expressions and primary expressions. | |
| Expr * | led (Parser *parser, Expr *left, BindingPower bp) |
| Pratt parser function for left denotation (infix/postfix parsing). | |
| BindingPower | get_bp (LumaTokenType kind) |
| Gets the binding power (precedence) for a given token type. | |
| Expr * | primary (Parser *parser) |
| Expr * | unary (Parser *parser) |
| Expr * | grouping (Parser *parser) |
| Expr * | binary (Parser *parser, Expr *left, BindingPower bp) |
| Expr * | call_expr (Parser *parser, Expr *left, BindingPower bp) |
| Expr * | assign_expr (Parser *parser, Expr *left, BindingPower bp) |
| Expr * | prefix_expr (Parser *parser, Expr *left, BindingPower bp) |
| Expr * | array_expr (Parser *parser) |
| Expr * | index_expr (Parser *parser) |
| Expr * | deref_expr (Parser *parser) |
| Expr * | addr_expr (Parser *parser) |
| Expr * | alloc_expr (Parser *parser) |
| Expr * | free_expr (Parser *parser) |
| Expr * | cast_expr (Parser *parser) |
| Expr * | input_expr (Parser *parser) |
| Expr * | system_expr (Parser *parser) |
| Expr * | syscall_expr (Parser *parser) |
| Expr * | sizeof_expr (Parser *parser) |
| Expr * | struct_expr (Parser *parser) |
| Expr * | named_struct_expr (Parser *parser, Expr *left, BindingPower bp) |
| Type * | tnud (Parser *parser) |
| Type * | tled (Parser *parser, Type *left, BindingPower bp) |
| BindingPower | tget_bp (Parser *parser, LumaTokenType kind) |
| Type * | pointer (Parser *parser) |
| Type * | array_type (Parser *parser) |
| Type * | function_type (Parser *parser, Type *return_type) |
| Stmt * | use_stmt (Parser *parser) |
| Stmt * | os_stmt (Parser *parser) |
| Stmt * | link_stmt (Parser *parser) |
| Stmt * | expr_stmt (Parser *parser) |
| Parses an expression statement. | |
| Stmt * | var_stmt (Parser *parser, bool is_public) |
| Parses a variable declaration statement. | |
| Stmt * | const_stmt (Parser *parser, bool is_public, bool returns_ownership, bool takes_ownership) |
| Parses a constant declaration statement. | |
| Stmt * | fn_stmt (Parser *parser, const char *name, bool is_public, bool is_static, bool returns_ownership, bool takes_ownership) |
| Parses a function declaration statement. | |
| Stmt * | enum_stmt (Parser *parser, const char *name, bool is_public) |
| Parses an enumeration declaration statement. | |
| Stmt * | struct_stmt (Parser *parser, const char *name, bool is_public) |
| Parses a structure declaration statement. | |
| Stmt * | print_stmt (Parser *parser, bool ln) |
| Parses print/println statements. | |
| Stmt * | return_stmt (Parser *parser) |
| Parses a return statement. | |
| Stmt * | block_stmt (Parser *parser) |
| Parses a block statement. | |
| Stmt * | infinite_loop_stmt (Parser *parser, int line, int col) |
| Parses an infinite loop statement. | |
| Stmt * | for_loop_stmt (Parser *parser, int line, int col) |
| Parses a for loop statement. | |
| Stmt * | loop_stmt (Parser *parser) |
| Parses loop statements (infinite, while, or for loops) | |
| Stmt * | if_stmt (Parser *parser) |
| Parses if/elif/else conditional statements. | |
| Stmt * | break_continue_stmt (Parser *parser, bool is_continue) |
| Parses break and continue statements. | |
| Stmt * | defer_stmt (Parser *parser) |
| Stmt * | switch_stmt (Parser *parser) |
| Stmt * | impl_stmt (Parser *parser) |
Recursive descent and Pratt parser for the Zura language.
This module implements parsing of tokens into an Abstract Syntax Tree (AST), handling expressions, statements, types, and error reporting. The parser uses binding power (precedence) to correctly parse expressions.
The parser supports:
| #define CURRENT_TOKEN_LENGTH | ( | parser | ) | ((int)p_current(parser).length) |
Get the length of the current token's lexeme.
| #define CURRENT_TOKEN_VALUE | ( | parser | ) | (p_current(parser).value) |
Get the value string of the current token.
| #define MAX_EXPR 1024 |
| #define MAX_STMT 1024 |
Maximum allowed size for statements, expressions, and types.
| #define MAX_TYPE 1024 |
| enum BindingPower |
Binding power (precedence) levels for expression parsing.
Used to control operator precedence and associativity in Pratt parsing.
| Expr * assign_expr | ( | Parser * | parser, |
| Expr * | left, | ||
| BindingPower | bp | ||
| ) |
| Expr * binary | ( | Parser * | parser, |
| Expr * | left, | ||
| BindingPower | bp | ||
| ) |
Parses a block statement.
Handles block statements with the syntax: { statement1; statement2; ... }
| parser | Pointer to the parser instance |
Parses break and continue statements.
Handles loop control statements with the syntax:
break; - Exit the current loopcontinue; - Skip to the next iteration of the current loop| parser | Pointer to the parser instance |
| is_continue | Whether this is a continue (true) or break (false) statement |
| Expr * call_expr | ( | Parser * | parser, |
| Expr * | left, | ||
| BindingPower | bp | ||
| ) |
| char * collect_doc_comments | ( | Parser * | parser | ) |
Collects consecutive documentation comments before a declaration.
Accumulates all doc comments (/// or //!) that appear immediately before the current token. Returns them as a single string with newlines preserved.
| parser | Pointer to the parser instance |
Parses a constant declaration statement.
Handles multiple forms of constant declarations:
const name: Type = value; - Explicit type annotationconst name = fn ... - Function declarationconst name = struct ... - Struct declarationconst name = enum ... - Enum declaration| parser | Pointer to the parser instance |
| is_public | Whether this declaration has public visibility |
| void consume_doc_comments | ( | Parser * | parser | ) |
Parses an enumeration declaration statement.
Handles enum declarations with the syntax: enum { member1, member2, member3, ... };
| parser | Pointer to the parser instance |
| name | Enum name (already parsed by caller) |
| is_public | Whether this enum has public visibility |
Parses an expression statement.
An expression statement consists of any expression followed by a semicolon. This is used for statements that evaluate an expression for its side effects, such as function calls or assignment expressions.
| parser | Pointer to the parser instance |
| Stmt * fn_stmt | ( | Parser * | parser, |
| const char * | name, | ||
| bool | is_public, | ||
| bool | is_static, | ||
| bool | returns_ownership, | ||
| bool | takes_ownership | ||
| ) |
Parses a function declaration statement.
Handles function declarations with the syntax: fn(param1: Type1, param2: Type2, ...) ReturnType { body }
| parser | Pointer to the parser instance |
| name | Function name (already parsed by caller) |
| is_public | Whether this function has public visibility |
Parses a for loop statement.
Handles for loops with the syntax:
| parser | Pointer to the parser instance |
| line | Line number where the loop statement starts |
| col | Column number where the loop statement starts |
| BindingPower get_bp | ( | LumaTokenType | kind | ) |
Gets the binding power (precedence) for a given token type.
This function is crucial for the Pratt parser implementation. It returns the binding power (precedence level) for different operators, which determines the order of operations during expression parsing.
Higher binding power values indicate higher precedence operators.
| kind | The token type to get binding power for |
| char * get_name | ( | Parser * | psr | ) |
Extracts and duplicates the current token's string value.
This function creates a null-terminated string copy of the current token's value using the arena allocator. It's primarily used for extracting identifier names, string literals, and other textual token content that needs to be preserved in the AST.
| psr | Pointer to the parser instance |
Parses if/elif/else conditional statements.
Handles complex conditional statements with multiple branches:
| parser | Pointer to the parser instance |
Parses an infinite loop statement.
Handles infinite loops with the syntax: loop { ... }
| parser | Pointer to the parser instance |
| line | Line number where the loop statement starts |
| col | Column number where the loop statement starts |
| bool init_parser_arrays | ( | Parser * | parser, |
| GrowableArray * | stmts, | ||
| GrowableArray * | modules | ||
| ) |
| Expr * led | ( | Parser * | parser, |
| Expr * | left, | ||
| BindingPower | bp | ||
| ) |
Pratt parser function for left denotation (infix/postfix parsing).
| parser | Parser pointer. |
| left | Left expression. |
| bp | Binding power. |
Pratt parser function for left denotation (infix/postfix parsing).
This is part of the Pratt parser implementation. The "led" function handles tokens that can appear after an expression has been parsed (binary operators and postfix operators).
| parser | Pointer to the parser instance |
| left | The left operand expression (already parsed) |
| bp | The current binding power context |
Parses loop statements (infinite, while, or for loops)
Dispatcher function that determines the type of loop based on the following tokens and delegates to the appropriate specialized parser:
loop { ... } → infinite looploop [initializers](...) { ... } → for looploop (condition) { ... } → while looploop (condition) : (optional_condition) { ... } → while loop with secondary condition| parser | Pointer to the parser instance |
| Expr * named_struct_expr | ( | Parser * | parser, |
| Expr * | left, | ||
| BindingPower | bp | ||
| ) |
Null Denotation - handles prefix expressions and primary expressions.
This is part of the Pratt parser implementation. The "nud" function handles tokens that can appear at the beginning of an expression (prefix operators and primary expressions like literals and identifiers).
| parser | Pointer to the parser instance |
Advances to the next token and returns the current token.
This function moves the parser position forward by one token and returns the token that was current before advancing. This is the primary mechanism for consuming tokens during parsing.
| psr | Pointer to the parser instance |
| Token p_consume | ( | Parser * | psr, |
| LumaTokenType | type, | ||
| const char * | error_msg | ||
| ) |
Consumes a token of the expected type or reports an error.
This function is used when the parser expects a specific token type at the current position. If the current token matches the expected type, it advances and returns the token. If not, it reports a syntax error with the provided error message.
| psr | Pointer to the parser instance |
| type | The expected token type that should be at the current position |
| error_msg | Error message to display if the token doesn't match |
Gets the current token without advancing the parser position.
This function returns the token at the current parser position without modifying the parser state. It's the most frequently used function for examining the current token during parsing.
| psr | Pointer to the parser instance |
| bool p_has_tokens | ( | Parser * | psr | ) |
Checks if there are more tokens available for parsing.
This function determines whether the parser has reached the end of the token stream. It's used throughout the parser to control parsing loops and prevent buffer overruns when accessing tokens.
| psr | Pointer to the parser instance |
Peeks at a token at the specified offset from current position.
This function allows looking ahead in the token stream without advancing the current position. It's useful for making parsing decisions based on upcoming tokens (lookahead parsing).
| psr | Pointer to the parser instance |
| offset | Number of positions ahead to look (0 = current, 1 = next, etc.) |
p_peek(parser, 0) is equivalent to p_current(parser)p_peek(parser, 1) looks at the next token| Stmt * parse | ( | GrowableArray * | tks, |
| ArenaAllocator * | arena, | ||
| BuildConfig * | config | ||
| ) |
Parses a full program from tokens into an AST of statements.
| tks | GrowableArray containing tokens. |
| arena | Memory arena for allocations. |
Parses a full program from tokens into an AST of statements.
This is the entry point for the parser. It takes a growable array of tokens and converts them into a complete program AST node containing all parsed statements.
| tks | Growable array containing all tokens from the lexer |
| arena | Arena allocator for memory management during parsing |
Main parsing function that converts tokens into an AST
This is the entry point for the parser. It takes a growable array of tokens and converts them into a complete program AST node containing all parsed statements.
| Expr * parse_expr | ( | Parser * | parser, |
| BindingPower | bp | ||
| ) |
Parses an expression using the Pratt parsing algorithm.
This is the core expression parsing function that implements the Pratt parser algorithm. It handles operator precedence and associativity automatically through the binding power mechanism.
| parser | Pointer to the parser instance |
| bp | Minimum binding power - only operators with higher binding power will be consumed by this call |
| const char * parse_module_declaration | ( | Parser * | parser, |
| char ** | out_module_doc | ||
| ) |
Parses a single statement.
This function dispatches to the appropriate statement parsing function based on the current token. It also handles visibility modifiers (public/private) that can appear before certain statement types.
| parser | Pointer to the parser instance |
Parses a type annotation.
This function parses type expressions used in variable declarations, function parameters, return types, etc. It handles primitive types, pointer types, array types, and user-defined types.
| parser | Pointer to the parser instance |
| void parser_error | ( | Parser * | psr, |
| const char * | error_type, | ||
| const char * | file, | ||
| const char * | msg, | ||
| int | line, | ||
| int | col, | ||
| int | tk_length | ||
| ) |
Report a parser error with detailed location info.
| psr | Pointer to the parser. |
| error_type | Type/category of the error. |
| file | Source file where error occurred. |
| msg | Error message. |
| line | Line number of error. |
| col | Column number of error. |
| tk_length | Length of the token causing the error. |
Report a parser error with detailed location info.
Creates and adds an error to the global error system with information about where the error occurred in the source code, including line and column information.
| psr | Pointer to the parser instance |
| error_type | String describing the type of error (e.g., "SyntaxError") |
| file | Path to the source file where the error occurred |
| msg | Detailed error message describing what went wrong |
| line | Line number where the error occurred (1-based) |
| col | Column number where the error occurred (1-based) |
| tk_length | Length of the token that caused the error |
| Expr * prefix_expr | ( | Parser * | parser, |
| Expr * | left, | ||
| BindingPower | bp | ||
| ) |
Parses print/println statements.
Handles output statements with the syntax:
print(expr1, expr2, ...);println(expr1, expr2, ...);| parser | Pointer to the parser instance |
| ln | Whether this is a println (true) or print (false) statement |
print(); Parses a return statement.
Handles return statements with optional return values:
return; - Return with no value (void return)return expression; - Return with a value| parser | Pointer to the parser instance |
Parses a structure declaration statement.
Handles struct declarations with public/private member visibility:
| parser | Pointer to the parser instance |
| name | Struct name (already parsed by caller) |
| is_public | Whether this struct has public visibility |
| BindingPower tget_bp | ( | Parser * | parser, |
| LumaTokenType | kind | ||
| ) |
| Type * tled | ( | Parser * | parser, |
| Type * | left, | ||
| BindingPower | bp | ||
| ) |
Parses a variable declaration statement.
Handles variable declarations with the syntax: var name: Type = value;
| parser | Pointer to the parser instance |
| is_public | Whether this variable has public visibility |