From ee7735b61065d2edf90206d2694893b47fc07d64 Mon Sep 17 00:00:00 2001
From: kotofyt <bratelllo@gmail.com>
Date: Sat, 17 Jan 2026 11:24:00 +0200
Subject: [PATCH] simple B compiler

---
 asmrigs/brb/lexer.cpp    |  13 ----
 asmrigs/brb/main.cpp     | 129 +++++++++++++++++++++++++++++++++++++++
 asmrigs/cbld/gen.cpp     |   0
 asmrigs/cbld/gen.h       |   0
 asmrigs/tests/003.b      |   2 +-
 public/tier2/tokenizer.h |  12 +++-
 tier2/tokenizer.cpp      |  18 ++++--
 7 files changed, 152 insertions(+), 22 deletions(-)
 create mode 100644 asmrigs/cbld/gen.cpp
 create mode 100644 asmrigs/cbld/gen.h

diff --git a/asmrigs/brb/lexer.cpp b/asmrigs/brb/lexer.cpp
index d125233..3f8619f 100644
--- a/asmrigs/brb/lexer.cpp
+++ b/asmrigs/brb/lexer.cpp
@@ -5,7 +5,6 @@ abstract_class CBLexerWord: public IBLexerWord
 public:
 	virtual int GetType() override;
 
-	CUtlVector<ILexerWord*> m_children;
 	EBWordType m_eType;
 };
 
@@ -14,18 +13,6 @@ int CBLexerWord::GetType()
 	return m_eType;
 }
 
-uint32_t CBLexerWord::GetNumChildren()
-{
-	return m_children.GetSize();
-}
-
-ILexerWord **CBLexerWord::GetChildren()
-{
-	return m_children.GetData();
-}
-
-
-
 class CBLexer: public ILexer
 {
 public:
diff --git a/asmrigs/brb/main.cpp b/asmrigs/brb/main.cpp
index e82ec5d..89e1a22 100644
--- a/asmrigs/brb/main.cpp
+++ b/asmrigs/brb/main.cpp
@@ -3,6 +3,127 @@
 #include "tier2/ifilesystem.h"
 
 
+class CTokenParser
+{
+public:
+	const char *PeekToken();
+	bool IsToken( const char *szString );
+	const char *PeekStringLiteral();
+	bool Continue();
+	
+	Token_t *m_pTokens;
+	Token_t *m_pTokensEnd;
+	Token_t *m_pCurrentToken;
+};
+
+const char *CTokenParser::PeekToken()
+{
+	if ( m_pCurrentToken->m_bIsQuoted )
+		return NULL;
+	return m_pCurrentToken->m_szValue;
+}
+
+bool CTokenParser::IsToken( const char *szString )
+{
+	if ( !V_strcmp(szString, m_pCurrentToken->m_szValue))
+		return true;
+	return false;
+};
+
+const char *CTokenParser::PeekStringLiteral()
+{
+	if ( !m_pCurrentToken->m_bIsQuoted )
+		return NULL;
+	return m_pCurrentToken->m_szValue;
+}
+
+bool CTokenParser::Continue()
+{
+	m_pCurrentToken++;
+	if ( m_pCurrentToken == m_pTokensEnd )
+		return false;
+	return true;
+}
+
+void CompileErrorExpectedToken( Token_t *pToken, const char *szToken )
+{
+	if (pToken->m_bIsQuoted)
+		V_printf("%d:%d: expected %s but got string literal\n", pToken->m_iLine, pToken->m_iCharacter, szToken);
+	else
+		V_printf("%d:%d: expected %s but got %s\n", pToken->m_iLine, pToken->m_iCharacter, szToken, pToken->m_szValue.GetString());
+	exit(1);
+}
+
+struct BExpression_t
+{
+	enum BExpressionType
+	{
+		BEXPRESSION_TYPE_NEW,
+		BEXPRESSION_TYPE_ADD,
+		BEXPRESSION_TYPE_SUBTRACT,
+	} m_eType;
+	CUtlVector<BExpression_t> m_children;
+};
+
+BExpression_t ParseVar( CTokenParser *pParser )
+{
+	const char *szToken = pParser->PeekToken();
+
+	return szToken;
+};
+
+void ParseRValue( CTokenParser *pParser )
+{
+	if (pParser->PeekToken() == "(")
+	{
+		pParser->PeekToken();
+		ParseRValue(pParser)
+	}
+	BVar_t var = ParseVar( pParser );
+}
+
+void ParseStatement( CTokenParser *pParser )
+{
+	if (pParser->IsToken("return"))
+	{
+		pParser->Continue();
+		ParseRValue( pParser );
+		if (!pParser->IsToken(";"))
+			CompileErrorExpectedToken(pParser->m_pCurrentToken, ";");
+		V_printf("  ret\n");
+		pParser->Continue();
+	}
+}
+
+void ParseGlobal( CTokenParser *pParser )
+{
+
+	const char *szObjectName = pParser->PeekToken();
+	pParser->Continue();
+	V_printf("%s:\n", szObjectName);
+
+	if (pParser->IsToken("("))
+	{
+		pParser->Continue();
+		if (!pParser->IsToken(")"))
+			CompileErrorExpectedToken(pParser->m_pCurrentToken, ")");
+		pParser->Continue();
+		if (!pParser->IsToken("{"))
+			CompileErrorExpectedToken(pParser->m_pCurrentToken, "{");
+		pParser->Continue();
+		while (!pParser->IsToken("}"))
+		{
+			ParseStatement( pParser );
+		}
+		pParser->Continue();
+	}
+	else
+	{
+		CompileErrorExpectedToken(pParser->m_pCurrentToken, "(");
+	}
+};
+
+
 int main( int argc, char **argv )
 {
 	CommandLine()->CreateCommandLine(argc, argv);
@@ -15,5 +136,13 @@ int main( int argc, char **argv )
 	filesystem->Close(pFile);
 
 
+	CUtlVector<Token_t> tokens = Tokenize(szFileContents);
+	CTokenParser parser;
+	parser.m_pTokens = tokens.GetData();
+	parser.m_pTokensEnd = tokens.GetData()+tokens.GetSize();
+	parser.m_pCurrentToken = tokens.GetData()-1;
+	parser.Continue();
+	ParseGlobal(&parser);
+
 	V_free((void*)szFileContents);
 }
diff --git a/asmrigs/cbld/gen.cpp b/asmrigs/cbld/gen.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/asmrigs/cbld/gen.h b/asmrigs/cbld/gen.h
new file mode 100644
index 0000000..e69de29
diff --git a/asmrigs/tests/003.b b/asmrigs/tests/003.b
index 37e8a38..c93610b 100644
--- a/asmrigs/tests/003.b
+++ b/asmrigs/tests/003.b
@@ -1,4 +1,4 @@
 main()
 {
-	return 1 + 2 * 3
+	return 1 + 2 * 3;
 }
diff --git a/public/tier2/tokenizer.h b/public/tier2/tokenizer.h
index 6fadea2..2da7ca3 100644
--- a/public/tier2/tokenizer.h
+++ b/public/tier2/tokenizer.h
@@ -13,9 +13,17 @@ struct Token_t
 	uint32_t m_iCharacter;
 };
 
-typedef bool( *fnIsAlphabetSymbol )( char c );
+typedef bool( *IsAlphabetSymbolFn )( char c );
+
+struct TokenizeProperties_t
+{
+	IsAlphabetSymbolFn m_pfnIsAlphabetSymbol;
+	IsAlphabetSymbolFn m_pfnIsFirstAlphabetSymbol;
+	bool m_bAllowSlashToContinueString;
+};
+
 CUtlVector<Token_t> Tokenize( const char *szString );
-CUtlVector<Token_t> Tokenize( const char *szString, fnIsAlphabetSymbol pfnIsAlphabetSymbol );
+CUtlVector<Token_t> Tokenize( const char *szString, IsAlphabetSymbolFn pfnIsAlphabetSymbol );
 
 
 #endif
diff --git a/tier2/tokenizer.cpp b/tier2/tokenizer.cpp
index 009a504..07ddd91 100644
--- a/tier2/tokenizer.cpp
+++ b/tier2/tokenizer.cpp
@@ -18,13 +18,14 @@ CUtlVector<Token_t> Tokenize( const char *psz )
 	return Tokenize(psz, IsWordSymbol);
 }
 
-CUtlVector<Token_t> Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSymbol )
+CUtlVector<Token_t> Tokenize( const char *psz, IsAlphabetSymbolFn fnIsAlphabetSymbol )
 {
 	CUtlVector<Token_t> tokens = {};
 	size_t i = 0;
 	char c;
-	uint32_t nCurrentLine = 0;
+	uint32_t nCurrentLine = 1;
 	uint32_t nCurrentCharacter = 0;
+	uint32_t nStartingCharacter = 0;
 	bool bIsQuoted = false;
 	bool bIsSlash = false;
 	CUtlString szStringValue;
@@ -32,14 +33,17 @@ CUtlVector<Token_t> Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSy
 	while (true)
 	{
 		c = psz[i];
-		i++;
 		if (c == '\0')
 			break;
 		if (c == '\n')
 		{
 			nCurrentCharacter = 0;
+			nStartingCharacter = 0;
 			nCurrentLine++;
 		}
+		else
+			nCurrentCharacter++;
+		i++;
 		
 
 		if (bIsQuoted)
@@ -91,7 +95,7 @@ CUtlVector<Token_t> Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSy
 					bIsQuoted = false;
 					if (szStringValue == 0)
 						continue;
-					tokens.AppendTail({szStringValue, true, nCurrentLine});
+					tokens.AppendTail({szStringValue, true, nCurrentLine, nStartingCharacter});
 					szStringValue = 0;
 					continue;
 				default:
@@ -113,10 +117,12 @@ CUtlVector<Token_t> Tokenize( const char *psz, fnIsAlphabetSymbol fnIsAlphabetSy
 				continue;
 			} else {
 				if (szStringValue != 0)
-					tokens.AppendTail({szStringValue, false, nCurrentLine});
+					tokens.AppendTail({szStringValue, false, nCurrentLine, nStartingCharacter});
+
+				nStartingCharacter = nCurrentCharacter;
 				szStringValue = 0;
 				if (V_isgraph(c) && c != '"')
-					tokens.AppendTail({CUtlString("%c", c), false, nCurrentLine});
+					tokens.AppendTail({CUtlString("%c", c), false, nCurrentLine, nStartingCharacter});
 			}
 		}
 	};