diff --git a/src/main/java/org/jcnc/snow/compiler/Main.java b/src/main/java/org/jcnc/snow/compiler/Main.java index ec0a0a1..4f89567 100644 --- a/src/main/java/org/jcnc/snow/compiler/Main.java +++ b/src/main/java/org/jcnc/snow/compiler/Main.java @@ -3,14 +3,17 @@ package org.jcnc.snow.compiler; import org.jcnc.snow.compiler.lexer.LexerEngine; import org.jcnc.snow.compiler.lexer.token.Token; import org.jcnc.snow.compiler.parser.ParserEngine; +import org.jcnc.snow.compiler.parser.ast.ModuleNode; import org.jcnc.snow.compiler.parser.context.ParserContext; import org.jcnc.snow.compiler.parser.ast.base.Node; -import org.jcnc.snow.compiler.parser.function.ASTPrinter; +import org.jcnc.snow.compiler.semantic.SemanticAnalyzer; +import org.jcnc.snow.compiler.semantic.SemanticError; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.List; public class Main { @@ -29,11 +32,34 @@ public class Main { // 3. 语义分析 + // 3. 语义分析 + // parse() 返回的是顶层 Node 列表,我们只关心 ModuleNode + List modules = new ArrayList<>(); + for (Node n : ast) { + if (n instanceof ModuleNode m) { + modules.add(m); + } + } + SemanticAnalyzer analyzer = new SemanticAnalyzer(true); + List errors = analyzer.analyze(modules); + + if (!errors.isEmpty()) { + System.err.println("语义分析发现错误:"); + for (SemanticError e : errors) { + System.err.println(" " + e); + } + // 遇到错误直接退出 + System.exit(1); + } else { + // 无错误时也打印一条成功信息 + System.out.println("语义分析通过,没有发现错误。"); + } // 打印 +// System.out.println(source); // TokenPrinter.print(tokens); // 打印 Token 列表 - ASTPrinter.print(ast); // 打印 AST +// ASTPrinter.print(ast); // 打印 AST // ASTPrinter.printJson(ast); // 打印JSON AST } diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/BuiltinType.java b/src/main/java/org/jcnc/snow/compiler/semantic/BuiltinType.java new file mode 100644 index 0000000..50dee7f --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/BuiltinType.java @@ -0,0 +1,20 @@ +package org.jcnc.snow.compiler.semantic; + +/** + * 内置基础类型枚举:int, string, void。 + * 其中 int 也可用于表示真假值。 + */ +public enum BuiltinType implements Type { + INT, STRING, VOID; + + @Override + public boolean isCompatible(Type other) { + // 完全相同类型才兼容 + return this == other; + } + + @Override + public String toString() { + return name().toLowerCase(); + } +} diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/FunctionType.java b/src/main/java/org/jcnc/snow/compiler/semantic/FunctionType.java new file mode 100644 index 0000000..70be709 --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/FunctionType.java @@ -0,0 +1,36 @@ +package org.jcnc.snow.compiler.semantic; + +import java.util.List; + +/** + * 表示函数类型,包含参数类型列表和返回类型。 + */ +public record FunctionType(List paramTypes, Type returnType) implements Type { + public FunctionType(List paramTypes, Type returnType) { + this.paramTypes = List.copyOf(paramTypes); + this.returnType = returnType; + } + + @Override + public boolean isCompatible(Type other) { + if (!(other instanceof FunctionType)) return false; + FunctionType o = (FunctionType) other; + return returnType.isCompatible(o.returnType) + && paramTypes.equals(o.paramTypes); + } + + @Override + public String toString() { + return "(" + paramTypes + ") -> " + returnType; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof FunctionType)) return false; + FunctionType o = (FunctionType) obj; + return returnType.equals(o.returnType) + && paramTypes.equals(o.paramTypes); + } + +} diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/ModuleInfo.java b/src/main/java/org/jcnc/snow/compiler/semantic/ModuleInfo.java new file mode 100644 index 0000000..0fe4c8e --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/ModuleInfo.java @@ -0,0 +1,40 @@ +package org.jcnc.snow.compiler.semantic; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * 保存模块级别信息,包括模块名、导入列表和函数签名。 + */ +public class ModuleInfo { + private final String name; + private final Set imports = new HashSet<>(); + private final Map functions = new HashMap<>(); + + public ModuleInfo(String name) { + this.name = name; + } + + /** + * 获取模块名称。 + */ + public String getName() { + return name; + } + + /** + * 获取导入的模块名称集合。 + */ + public Set getImports() { + return imports; + } + + /** + * 获取模块中函数签名映射:函数名 -> 函数类型。 + */ + public Map getFunctions() { + return functions; + } +} diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/SemanticAnalyzer.java b/src/main/java/org/jcnc/snow/compiler/semantic/SemanticAnalyzer.java new file mode 100644 index 0000000..3640051 --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/SemanticAnalyzer.java @@ -0,0 +1,347 @@ +package org.jcnc.snow.compiler.semantic; + +import org.jcnc.snow.compiler.parser.ast.*; +import org.jcnc.snow.compiler.parser.ast.base.ExpressionNode; +import org.jcnc.snow.compiler.parser.ast.base.StatementNode; + +import java.util.*; + +public class SemanticAnalyzer { + private final Map modules = new HashMap<>(); + private final List errors = new ArrayList<>(); + private final boolean verbose; + private static final Map BUILTIN_TYPES = Map.of( + "int", BuiltinType.INT, + "string", BuiltinType.STRING, + "void", BuiltinType.VOID + ); + + // 默认关闭详细日志 + public SemanticAnalyzer() { + this(false); + } + + /** + * @param verbose 是否开启详细日志打印 + */ + public SemanticAnalyzer(boolean verbose) { + this.verbose = verbose; + } + + private void log(String msg) { + if (verbose) { + System.out.println("[SemanticAnalyzer] " + msg); + } + } + + public List analyze(List moduleNodes) { + log("开始语义分析"); + initBuiltinModule(); + log("内置模块初始化完成"); + registerUserModules(moduleNodes); + log("用户模块注册完成: " + modules.keySet()); + registerSignaturesAndImports(moduleNodes); + log("函数签名与导入检查完成"); + checkAllFunctions(moduleNodes); + log("所有函数检查完成,错误总数: " + errors.size()); + return errors; + } + + private void initBuiltinModule() { + ModuleInfo builtin = new ModuleInfo("BuiltinUtils"); + builtin.getFunctions().put("to_int", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.INT)); + builtin.getFunctions().put("to_string", new FunctionType(List.of(BuiltinType.INT), BuiltinType.STRING)); + builtin.getFunctions().put("print", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.VOID)); + modules.put(builtin.getName(), builtin); + log("内置函数 to_int, to_string, print 注册到模块 " + builtin.getName()); + } + + private void registerUserModules(List mods) { + for (var mod : mods) { + modules.put(mod.name(), new ModuleInfo(mod.name())); + log("注册用户模块: " + mod.name()); + } + } + + private void registerSignaturesAndImports(List mods) { + for (var mod : mods) { + ModuleInfo mi = modules.get(mod.name()); + for (var imp : mod.imports()) { + if (!modules.containsKey(imp.moduleName())) { + errors.add(new SemanticError(imp, "未知模块: " + imp.moduleName())); + log("错误: 未知模块导入 " + imp.moduleName()); + } else { + mi.getImports().add(imp.moduleName()); + log("模块 " + mod.name() + " 导入模块: " + imp.moduleName()); + } + } + for (var fn : mod.functions()) { + List params = new ArrayList<>(); + for (var p : fn.parameters()) { + Type t = parseType(p.type()); + if (t == null) { + errors.add(new SemanticError(p, "未知类型: " + p.type())); + log("错误: 参数未知类型 " + p.type() + " 在函数 " + fn.name()); + t = BuiltinType.INT; + } + params.add(t); + } + Type ret = parseType(fn.returnType()); + if (ret == null) { + errors.add(new SemanticError(fn, "未知返回类型: " + fn.returnType())); + log("错误: 函数返回类型未知 " + fn.returnType() + " 在函数 " + fn.name()); + ret = BuiltinType.VOID; + } + mi.getFunctions().put(fn.name(), new FunctionType(params, ret)); + log("注册函数: " + mod.name() + "." + fn.name() + " 参数类型: " + params + " 返回类型: " + ret); + } + } + } + + private void checkAllFunctions(List mods) { + for (var mod : mods) { + ModuleInfo mi = modules.get(mod.name()); + for (var fn : mod.functions()) { + log("开始检查函数: " + mod.name() + "." + fn.name()); + SymbolTable locals = new SymbolTable(null); + for (var p : fn.parameters()) { + locals.define(new Symbol(p.name(), parseType(p.type()), SymbolKind.VARIABLE)); + log("定义参数: " + p.name() + " 类型: " + p.type()); + } + fn.body().forEach(stmt -> checkStatement(mi, fn, locals, stmt)); + log("完成函数检查: " + mod.name() + "." + fn.name()); + } + } + } + + private void checkStatement(ModuleInfo mi, FunctionNode fn, SymbolTable locals, StatementNode stmt) { + log("检查语句: " + stmt); + switch (stmt) { + case DeclarationNode decl -> checkDeclaration(mi, decl, locals); + case AssignmentNode asg -> checkAssignment(mi, asg, locals); + case ExpressionStatementNode es -> checkExpression(mi, fn, locals, es.expression()); + case IfNode ifn -> checkIf(mi, fn, locals, ifn); + case LoopNode ln -> checkLoop(mi, fn, locals, ln); + case ReturnNode ret -> checkReturn(mi, fn, locals, ret); + default -> { + errors.add(new SemanticError(stmt, "不支持的语句类型: " + stmt)); + log("错误: 不支持的语句类型 " + stmt); + } + } + } + + private void checkDeclaration(ModuleInfo mi, DeclarationNode decl, SymbolTable locals) { + Type varType = Optional.ofNullable(parseType(decl.getType())).orElse(BuiltinType.INT); + log("声明变量: " + decl.getName() + " 类型: " + decl.getType()); + if (!locals.define(new Symbol(decl.getName(), varType, SymbolKind.VARIABLE))) { + errors.add(new SemanticError(decl, "变量重复声明: " + decl.getName())); + log("错误: 变量重复声明 " + decl.getName()); + } + decl.getInitializer().ifPresent(init -> { + Type initType = checkExpression(mi, null, locals, init); + if (!varType.isCompatible(initType)) { + errors.add(new SemanticError(decl, "初始化类型不匹配: 期望 " + varType + ", 实际 " + initType)); + log("错误: 初始化类型不匹配 for " + decl.getName() + ": 期望 " + varType + ", 实际 " + initType); + } else { + log("初始化成功: " + decl.getName() + " 类型: " + initType); + } + }); + } + + private void checkAssignment(ModuleInfo mi, AssignmentNode asg, SymbolTable locals) { + log("赋值检查: " + asg.variable()); + Symbol sym = locals.resolve(asg.variable()); + if (sym == null || sym.kind() != SymbolKind.VARIABLE) { + errors.add(new SemanticError(asg, "未声明的变量: " + asg.variable())); + log("错误: 未声明的变量 " + asg.variable()); + return; + } + Type valType = checkExpression(mi, null, locals, asg.value()); + if (!sym.type().isCompatible(valType)) { + errors.add(new SemanticError(asg, "赋值类型不匹配: 期望 " + sym.type() + ", 实际 " + valType)); + log("错误: 赋值类型不匹配 for " + asg.variable() + ": 期望 " + sym.type() + ", 实际 " + valType); + } else { + log("赋值成功: " + asg.variable() + " = " + valType); + } + } + + private void checkIf(ModuleInfo mi, FunctionNode fn, SymbolTable locals, IfNode ifn) { + log("检查 if 条件"); + Type cond = checkExpression(mi, fn, locals, ifn.condition()); + if (cond != BuiltinType.INT) { + errors.add(new SemanticError(ifn, "if 条件必须为 int 类型(表示真假)")); + log("错误: if 条件类型不为 int: " + cond); + } else { + log("if 条件类型合法: " + cond); + } + ifn.thenBranch().forEach(s -> checkStatement(mi, fn, locals, s)); + ifn.elseBranch().forEach(s -> checkStatement(mi, fn, locals, s)); + } + + private void checkLoop(ModuleInfo mi, FunctionNode fn, SymbolTable locals, LoopNode ln) { + log("检查 loop 循环"); + checkStatement(mi, fn, locals, ln.initializer()); + Type cond = checkExpression(mi, fn, locals, ln.condition()); + if (cond != BuiltinType.INT) { + errors.add(new SemanticError(ln, "loop 条件必须为 int 类型(表示真假)")); + log("错误: loop 条件类型不为 int: " + cond); + } else { + log("loop 条件类型合法: " + cond); + } + checkStatement(mi, fn, locals, ln.update()); + ln.body().forEach(s -> checkStatement(mi, fn, locals, s)); + } + + private void checkReturn(ModuleInfo mi, FunctionNode fn, SymbolTable locals, ReturnNode ret) { + log("检查 return"); + FunctionType expected = modules.get(mi.getName()).getFunctions().get(fn.name()); + Optional exp = ret.getExpression(); + if (exp.isPresent()) { + Type actual = checkExpression(mi, fn, locals, exp.get()); + if (!expected.returnType().isCompatible(actual)) { + errors.add(new SemanticError(ret, "return 类型不匹配: 期望 " + expected.returnType() + ", 实际 " + actual)); + log("错误: return 类型不匹配: 期望 " + expected.returnType() + ", 实际 " + actual); + } else { + log("return 类型合法: " + actual); + } + } else if (expected.returnType() != BuiltinType.VOID) { + errors.add(new SemanticError(ret, "非 void 函数必须返回值: " + expected.returnType())); + log("错误: 非 void 函数缺少返回值: 期望 " + expected.returnType()); + } else { + log("void 函数 return 合法"); + } + } + + private Type checkExpression(ModuleInfo mi, FunctionNode fn, SymbolTable locals, ExpressionNode expr) { + log("检查表达式: " + expr); + Type result; + switch (expr) { + case NumberLiteralNode ignored -> result = BuiltinType.INT; + case StringLiteralNode ignored -> result = BuiltinType.STRING; + case IdentifierNode id -> result = checkIdentifier(id, locals); + case CallExpressionNode call -> result = checkCall(mi, fn, locals, call); + case BinaryExpressionNode bin -> result = checkBinary(bin, mi, fn, locals); + default -> { + errors.add(new SemanticError(expr, "不支持的表达式类型: " + expr)); + log("错误: 不支持的表达式类型 " + expr); + result = BuiltinType.INT; + } + } + log("表达式类型: " + result); + return result; + } + + private Type checkIdentifier(IdentifierNode id, SymbolTable locals) { + log("检查标识符: " + id.name()); + Symbol sym = locals.resolve(id.name()); + if (sym == null) { + errors.add(new SemanticError(id, "未声明的标识符: " + id.name())); + log("错误: 未声明的标识符 " + id.name()); + return BuiltinType.INT; + } + log("标识符类型: " + sym.type()); + return sym.type(); + } + + private Type checkCall(ModuleInfo mi, FunctionNode fn, SymbolTable locals, CallExpressionNode call) { + log("检查函数调用: " + call.callee()); + ModuleInfo target = mi; + String name; + ExpressionNode callee = call.callee(); + + if (callee instanceof MemberExpressionNode(var object, var member)) { + if (object instanceof IdentifierNode(var modName)) { + if (!modules.containsKey(modName) || (!mi.getImports().contains(modName) && !mi.getName().equals(modName))) { + errors.add(new SemanticError(callee, "未知或未导入模块: " + modName)); + log("错误: 未导入模块 " + modName); + return BuiltinType.INT; + } + target = modules.get(modName); + name = member; + log("调用模块函数: " + modName + "." + member); + } else { + errors.add(new SemanticError(callee, "不支持的调用方式: " + callee)); + log("错误: 不支持的调用方式 " + callee); + return BuiltinType.INT; + } + } else if (callee instanceof IdentifierNode(var idName)) { + name = idName; + log("调用当前模块函数: " + idName); + } else { + errors.add(new SemanticError(callee, "不支持的调用方式: " + callee)); + log("错误: 不支持的调用方式 " + callee); + return BuiltinType.INT; + } + + FunctionType ft = target.getFunctions().get(name); + if (ft == null) { + errors.add(new SemanticError(callee, "函数未定义: " + name)); + log("错误: 函数未定义 " + name); + return BuiltinType.INT; + } + + var args = new ArrayList(); + for (var a : call.arguments()) { + Type at = checkExpression(mi, fn, locals, a); + args.add(at); + } + + if (args.size() != ft.paramTypes().size()) { + errors.add(new SemanticError(call, "参数数量不匹配: 期望 " + ft.paramTypes().size() + " 个, 实际 " + args.size())); + log("错误: 参数数量不匹配: 期望 " + ft.paramTypes().size() + " 个, 实际 " + args.size()); + } else { + for (int i = 0; i < args.size(); i++) { + if (!ft.paramTypes().get(i).isCompatible(args.get(i))) { + errors.add(new SemanticError(call, String.format( + "参数类型不匹配 (位置 %d): 期望 %s, 实际 %s", i, ft.paramTypes().get(i), args.get(i) + ))); + log("错误: 参数类型不匹配 (位置 " + i + "): 期望 " + ft.paramTypes().get(i) + ", 实际 " + args.get(i)); + } + } + if (verbose) log("参数检查完成,所有参数类型符合期望"); + } + + log("函数调用类型: 返回 " + ft.returnType()); + return ft.returnType(); + } + + private Type checkBinary(BinaryExpressionNode bin, ModuleInfo mi, FunctionNode fn, SymbolTable locals) { + log("检查二元表达式: " + bin.operator()); + Type l = checkExpression(mi, fn, locals, bin.left()); + Type r = checkExpression(mi, fn, locals, bin.right()); + String op = bin.operator(); + Type result; + switch (op) { + case "+" -> { + if (l == BuiltinType.STRING || r == BuiltinType.STRING) result = BuiltinType.STRING; + else if (l == BuiltinType.INT && r == BuiltinType.INT) result = BuiltinType.INT; + else result = null; + } + case "-", "*", "/", "%" -> { + if (l == BuiltinType.INT && r == BuiltinType.INT) result = BuiltinType.INT; + else result = null; + } + case "<", "<=", ">", ">=", "==", "!=" -> { + if (l == BuiltinType.INT && r == BuiltinType.INT) result = BuiltinType.INT; + else result = null; + } + default -> { + errors.add(new SemanticError(bin, "未知运算符: " + op)); + log("错误: 未知运算符 " + op); + return BuiltinType.INT; + } + } + if (result == null) { + errors.add(new SemanticError(bin, String.format("运算符 '%s' 不支持类型: %s, %s", op, l, r))); + log("错误: 运算符 '" + op + "' 不支持类型: " + l + ", " + r); + result = BuiltinType.INT; + } else { + log("二元表达式计算类型: " + result); + } + return result; + } + + private Type parseType(String name) { + return BUILTIN_TYPES.get(name); + } +} \ No newline at end of file diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/SemanticError.java b/src/main/java/org/jcnc/snow/compiler/semantic/SemanticError.java new file mode 100644 index 0000000..e4bf44c --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/SemanticError.java @@ -0,0 +1,14 @@ +package org.jcnc.snow.compiler.semantic; + +import org.jcnc.snow.compiler.parser.ast.base.Node; + +/** + * 语义错误类,保存 AST 节点和对应的错误信息。 + */ +public record SemanticError(Node node, String message) { + + @Override + public String toString() { + return "Semantic error at " + node + ": " + message; + } +} diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/Symbol.java b/src/main/java/org/jcnc/snow/compiler/semantic/Symbol.java new file mode 100644 index 0000000..24287f5 --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/Symbol.java @@ -0,0 +1,7 @@ +package org.jcnc.snow.compiler.semantic; + +/** + * 符号表中的一条记录,包含名称、类型和种类信息。 + */ +public record Symbol(String name, Type type, SymbolKind kind) { +} diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/SymbolKind.java b/src/main/java/org/jcnc/snow/compiler/semantic/SymbolKind.java new file mode 100644 index 0000000..260aaa4 --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/SymbolKind.java @@ -0,0 +1,10 @@ +package org.jcnc.snow.compiler.semantic; + +/** + * 符号的种类枚举,用于区分变量、函数和模块等不同符号类型。 + */ +public enum SymbolKind { + VARIABLE, + FUNCTION, + MODULE +} diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/SymbolTable.java b/src/main/java/org/jcnc/snow/compiler/semantic/SymbolTable.java new file mode 100644 index 0000000..80bf154 --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/SymbolTable.java @@ -0,0 +1,41 @@ +package org.jcnc.snow.compiler.semantic; + +import java.util.HashMap; +import java.util.Map; + +/** + * 符号表,支持链式作用域查找,通过 parent 引用形成作用域嵌套。 + */ +public class SymbolTable { + private final SymbolTable parent; + private final Map symbols = new HashMap<>(); + + public SymbolTable(SymbolTable parent) { + this.parent = parent; + } + + /** + * 在当前作用域中定义一个符号。如已存在同名符号,则返回 false。 + */ + public boolean define(Symbol symbol) { + if (symbols.containsKey(symbol.name())) { + return false; + } + symbols.put(symbol.name(), symbol); + return true; + } + + /** + * 查找一个符号,先从当前作用域开始,若未找到则向上查找父作用域。 + */ + public Symbol resolve(String name) { + Symbol sym = symbols.get(name); + if (sym != null) { + return sym; + } + if (parent != null) { + return parent.resolve(name); + } + return null; + } +} diff --git a/src/main/java/org/jcnc/snow/compiler/semantic/Type.java b/src/main/java/org/jcnc/snow/compiler/semantic/Type.java new file mode 100644 index 0000000..d7dc725 --- /dev/null +++ b/src/main/java/org/jcnc/snow/compiler/semantic/Type.java @@ -0,0 +1,11 @@ +package org.jcnc.snow.compiler.semantic; + +/** + * 类型接口。所有具体类型(内置类型、函数类型等)都应实现此接口。 + */ +public interface Type { + /** + * 判断此类型是否可接受赋值或兼容另一个类型。 + */ + boolean isCompatible(Type other); +}