From ee7735b61065d2edf90206d2694893b47fc07d64 Mon Sep 17 00:00:00 2001 From: kotofyt Date: Sat, 17 Jan 2026 11:24:00 +0200 Subject: [PATCH] simple B compiler --- asmrigs/brb/lexer.cpp | 13 ---- asmrigs/brb/main.cpp | 129 +++++++++++++++++++++++++++++++++++++++ asmrigs/cbld/gen.cpp | 0 asmrigs/cbld/gen.h | 0 asmrigs/tests/003.b | 2 +- public/tier2/tokenizer.h | 12 +++- tier2/tokenizer.cpp | 18 ++++-- 7 files changed, 152 insertions(+), 22 deletions(-) create mode 100644 asmrigs/cbld/gen.cpp create mode 100644 asmrigs/cbld/gen.h diff --git a/asmrigs/brb/lexer.cpp b/asmrigs/brb/lexer.cpp index d125233..3f8619f 100644 --- a/asmrigs/brb/lexer.cpp +++ b/asmrigs/brb/lexer.cpp @@ -5,7 +5,6 @@ abstract_class CBLexerWord: public IBLexerWord public: virtual int GetType() override; - CUtlVector m_children; EBWordType m_eType; }; @@ -14,18 +13,6 @@ int CBLexerWord::GetType() return m_eType; } -uint32_t CBLexerWord::GetNumChildren() -{ - return m_children.GetSize(); -} - -ILexerWord **CBLexerWord::GetChildren() -{ - return m_children.GetData(); -} - - - class CBLexer: public ILexer { public: diff --git a/asmrigs/brb/main.cpp b/asmrigs/brb/main.cpp index e82ec5d..89e1a22 100644 --- a/asmrigs/brb/main.cpp +++ b/asmrigs/brb/main.cpp @@ -3,6 +3,127 @@ #include "tier2/ifilesystem.h" +class CTokenParser +{ +public: + const char *PeekToken(); + bool IsToken( const char *szString ); + const char *PeekStringLiteral(); + bool Continue(); + + Token_t *m_pTokens; + Token_t *m_pTokensEnd; + Token_t *m_pCurrentToken; +}; + +const char *CTokenParser::PeekToken() +{ + if ( m_pCurrentToken->m_bIsQuoted ) + return NULL; + return m_pCurrentToken->m_szValue; +} + +bool CTokenParser::IsToken( const char *szString ) +{ + if ( !V_strcmp(szString, m_pCurrentToken->m_szValue)) + return true; + return false; +}; + +const char *CTokenParser::PeekStringLiteral() +{ + if ( !m_pCurrentToken->m_bIsQuoted ) + return NULL; + return m_pCurrentToken->m_szValue; +} + +bool CTokenParser::Continue() +{ + m_pCurrentToken++; + if ( m_pCurrentToken == m_pTokensEnd ) + return false; + return true; +} + +void CompileErrorExpectedToken( Token_t *pToken, const char *szToken ) +{ + if (pToken->m_bIsQuoted) + V_printf("%d:%d: expected %s but got string literal\n", pToken->m_iLine, pToken->m_iCharacter, szToken); + else + V_printf("%d:%d: expected %s but got %s\n", pToken->m_iLine, pToken->m_iCharacter, szToken, pToken->m_szValue.GetString()); + exit(1); +} + +struct BExpression_t +{ + enum BExpressionType + { + BEXPRESSION_TYPE_NEW, + BEXPRESSION_TYPE_ADD, + BEXPRESSION_TYPE_SUBTRACT, + } m_eType; + CUtlVector m_children; +}; + +BExpression_t ParseVar( CTokenParser *pParser ) +{ + const char *szToken = pParser->PeekToken(); + + return szToken; +}; + +void ParseRValue( CTokenParser *pParser ) +{ + if (pParser->PeekToken() == "(") + { + pParser->PeekToken(); + ParseRValue(pParser) + } + BVar_t var = ParseVar( pParser ); +} + +void ParseStatement( CTokenParser *pParser ) +{ + if (pParser->IsToken("return")) + { + pParser->Continue(); + ParseRValue( pParser ); + if (!pParser->IsToken(";")) + CompileErrorExpectedToken(pParser->m_pCurrentToken, ";"); + V_printf(" ret\n"); + pParser->Continue(); + } +} + +void ParseGlobal( CTokenParser *pParser ) +{ + + const char *szObjectName = pParser->PeekToken(); + pParser->Continue(); + V_printf("%s:\n", szObjectName); + + if (pParser->IsToken("(")) + { + pParser->Continue(); + if (!pParser->IsToken(")")) + CompileErrorExpectedToken(pParser->m_pCurrentToken, ")"); + pParser->Continue(); + if (!pParser->IsToken("{")) + CompileErrorExpectedToken(pParser->m_pCurrentToken, "{"); + pParser->Continue(); + while (!pParser->IsToken("}")) + { + ParseStatement( pParser ); + } + pParser->Continue(); + } + else + { + CompileErrorExpectedToken(pParser->m_pCurrentToken, "("); + } +}; + + int main( int argc, char **argv ) { CommandLine()->CreateCommandLine(argc, argv); @@ -15,5 +136,13 @@ int main( int argc, char **argv ) filesystem->Close(pFile); + CUtlVector tokens = Tokenize(szFileContents); + CTokenParser parser; + parser.m_pTokens = tokens.GetData(); + parser.m_pTokensEnd = tokens.GetData()+tokens.GetSize(); + parser.m_pCurrentToken = tokens.GetData()-1; + parser.Continue(); + ParseGlobal(&parser); + V_free((void*)szFileContents); } diff --git a/asmrigs/cbld/gen.cpp b/asmrigs/cbld/gen.cpp new file mode 100644 index 0000000..e69de29 diff --git a/asmrigs/cbld/gen.h b/asmrigs/cbld/gen.h new file mode 100644 index 0000000..e69de29 diff --git a/asmrigs/tests/003.b b/asmrigs/tests/003.b index 37e8a38..c93610b 100644 --- a/asmrigs/tests/003.b +++ b/asmrigs/tests/003.b @@ -1,4 +1,4 @@ main() { - return 1 + 2 * 3 + return 1 + 2 * 3; } diff --git a/public/tier2/tokenizer.h b/public/tier2/tokenizer.h index 6fadea2..2da7ca3 100644 --- a/public/tier2/tokenizer.h +++ b/public/tier2/tokenizer.h @@ -13,9 +13,17 @@ struct Token_t uint32_t m_iCharacter; }; -typedef bool( *fnIsAlphabetSymbol )( char c ); +typedef bool( *IsAlphabetSymbolFn )( char c ); + +struct TokenizeProperties_t +{ + IsAlphabetSymbolFn m_pfnIsAlphabetSymbol; + IsAlphabetSymbolFn m_pfnIsFirstAlphabetSymbol; + bool m_bAllowSlashToContinueString; +}; + CUtlVector Tokenize( const char *szString ); -CUtlVector Tokenize( const char *szString, fnIsAlphabetSymbol pfnIsAlphabetSymbol ); +CUtlVector Tokenize( const char *szString, IsAlphabetSymbolFn pfnIsAlphabetSymbol ); #endif diff --git a/tier2/tokenizer.cpp b/tier2/tokenizer.cpp index 009a504..07ddd91 100644 --- a/tier2/tokenizer.cpp +++ b/tier2/tokenizer.cpp @@ -18,13 +18,14 @@ CUtlVector Tokenize( const char *psz ) return Tokenize(psz, IsWordSymbol); } -CUtlVector Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSymbol ) +CUtlVector Tokenize( const char *psz, IsAlphabetSymbolFn fnIsAlphabetSymbol ) { CUtlVector tokens = {}; size_t i = 0; char c; - uint32_t nCurrentLine = 0; + uint32_t nCurrentLine = 1; uint32_t nCurrentCharacter = 0; + uint32_t nStartingCharacter = 0; bool bIsQuoted = false; bool bIsSlash = false; CUtlString szStringValue; @@ -32,14 +33,17 @@ CUtlVector Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSy while (true) { c = psz[i]; - i++; if (c == '\0') break; if (c == '\n') { nCurrentCharacter = 0; + nStartingCharacter = 0; nCurrentLine++; } + else + nCurrentCharacter++; + i++; if (bIsQuoted) @@ -91,7 +95,7 @@ CUtlVector Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSy bIsQuoted = false; if (szStringValue == 0) continue; - tokens.AppendTail({szStringValue, true, nCurrentLine}); + tokens.AppendTail({szStringValue, true, nCurrentLine, nStartingCharacter}); szStringValue = 0; continue; default: @@ -113,10 +117,12 @@ CUtlVector Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSy continue; } else { if (szStringValue != 0) - tokens.AppendTail({szStringValue, false, nCurrentLine}); + tokens.AppendTail({szStringValue, false, nCurrentLine, nStartingCharacter}); + + nStartingCharacter = nCurrentCharacter; szStringValue = 0; if (V_isgraph(c) && c != '"') - tokens.AppendTail({CUtlString("%c", c), false, nCurrentLine}); + tokens.AppendTail({CUtlString("%c", c), false, nCurrentLine, nStartingCharacter}); } } };