增加语义分析功能

This commit is contained in:
Luke 2025-04-27 16:07:37 +08:00
parent b3da8b70ff
commit a596f4d03e
10 changed files with 554 additions and 2 deletions

View File

@ -3,14 +3,17 @@ package org.jcnc.snow.compiler;
import org.jcnc.snow.compiler.lexer.LexerEngine;
import org.jcnc.snow.compiler.lexer.token.Token;
import org.jcnc.snow.compiler.parser.ParserEngine;
import org.jcnc.snow.compiler.parser.ast.ModuleNode;
import org.jcnc.snow.compiler.parser.context.ParserContext;
import org.jcnc.snow.compiler.parser.ast.base.Node;
import org.jcnc.snow.compiler.parser.function.ASTPrinter;
import org.jcnc.snow.compiler.semantic.SemanticAnalyzer;
import org.jcnc.snow.compiler.semantic.SemanticError;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
public class Main {
@ -29,11 +32,34 @@ public class Main {
// 3. 语义分析
// 3. 语义分析
// parse() 返回的是顶层 Node 列表我们只关心 ModuleNode
List<ModuleNode> modules = new ArrayList<>();
for (Node n : ast) {
if (n instanceof ModuleNode m) {
modules.add(m);
}
}
SemanticAnalyzer analyzer = new SemanticAnalyzer(true);
List<SemanticError> errors = analyzer.analyze(modules);
if (!errors.isEmpty()) {
System.err.println("语义分析发现错误:");
for (SemanticError e : errors) {
System.err.println(" " + e);
}
// 遇到错误直接退出
System.exit(1);
} else {
// 无错误时也打印一条成功信息
System.out.println("语义分析通过,没有发现错误。");
}
// 打印
// System.out.println(source);
// TokenPrinter.print(tokens); // 打印 Token 列表
ASTPrinter.print(ast); // 打印 AST
// ASTPrinter.print(ast); // 打印 AST
// ASTPrinter.printJson(ast); // 打印JSON AST
}

View File

@ -0,0 +1,20 @@
package org.jcnc.snow.compiler.semantic;
/**
* 内置基础类型枚举int, string, void
* 其中 int 也可用于表示真假值
*/
public enum BuiltinType implements Type {
INT, STRING, VOID;
@Override
public boolean isCompatible(Type other) {
// 完全相同类型才兼容
return this == other;
}
@Override
public String toString() {
return name().toLowerCase();
}
}

View File

@ -0,0 +1,36 @@
package org.jcnc.snow.compiler.semantic;
import java.util.List;
/**
* 表示函数类型包含参数类型列表和返回类型
*/
public record FunctionType(List<Type> paramTypes, Type returnType) implements Type {
public FunctionType(List<Type> paramTypes, Type returnType) {
this.paramTypes = List.copyOf(paramTypes);
this.returnType = returnType;
}
@Override
public boolean isCompatible(Type other) {
if (!(other instanceof FunctionType)) return false;
FunctionType o = (FunctionType) other;
return returnType.isCompatible(o.returnType)
&& paramTypes.equals(o.paramTypes);
}
@Override
public String toString() {
return "(" + paramTypes + ") -> " + returnType;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (!(obj instanceof FunctionType)) return false;
FunctionType o = (FunctionType) obj;
return returnType.equals(o.returnType)
&& paramTypes.equals(o.paramTypes);
}
}

View File

@ -0,0 +1,40 @@
package org.jcnc.snow.compiler.semantic;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* 保存模块级别信息包括模块名导入列表和函数签名
*/
public class ModuleInfo {
private final String name;
private final Set<String> imports = new HashSet<>();
private final Map<String, FunctionType> functions = new HashMap<>();
public ModuleInfo(String name) {
this.name = name;
}
/**
* 获取模块名称
*/
public String getName() {
return name;
}
/**
* 获取导入的模块名称集合
*/
public Set<String> getImports() {
return imports;
}
/**
* 获取模块中函数签名映射函数名 -> 函数类型
*/
public Map<String, FunctionType> getFunctions() {
return functions;
}
}

View File

@ -0,0 +1,347 @@
package org.jcnc.snow.compiler.semantic;
import org.jcnc.snow.compiler.parser.ast.*;
import org.jcnc.snow.compiler.parser.ast.base.ExpressionNode;
import org.jcnc.snow.compiler.parser.ast.base.StatementNode;
import java.util.*;
public class SemanticAnalyzer {
private final Map<String, ModuleInfo> modules = new HashMap<>();
private final List<SemanticError> errors = new ArrayList<>();
private final boolean verbose;
private static final Map<String, Type> BUILTIN_TYPES = Map.of(
"int", BuiltinType.INT,
"string", BuiltinType.STRING,
"void", BuiltinType.VOID
);
// 默认关闭详细日志
public SemanticAnalyzer() {
this(false);
}
/**
* @param verbose 是否开启详细日志打印
*/
public SemanticAnalyzer(boolean verbose) {
this.verbose = verbose;
}
private void log(String msg) {
if (verbose) {
System.out.println("[SemanticAnalyzer] " + msg);
}
}
public List<SemanticError> analyze(List<ModuleNode> moduleNodes) {
log("开始语义分析");
initBuiltinModule();
log("内置模块初始化完成");
registerUserModules(moduleNodes);
log("用户模块注册完成: " + modules.keySet());
registerSignaturesAndImports(moduleNodes);
log("函数签名与导入检查完成");
checkAllFunctions(moduleNodes);
log("所有函数检查完成,错误总数: " + errors.size());
return errors;
}
private void initBuiltinModule() {
ModuleInfo builtin = new ModuleInfo("BuiltinUtils");
builtin.getFunctions().put("to_int", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.INT));
builtin.getFunctions().put("to_string", new FunctionType(List.of(BuiltinType.INT), BuiltinType.STRING));
builtin.getFunctions().put("print", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.VOID));
modules.put(builtin.getName(), builtin);
log("内置函数 to_int, to_string, print 注册到模块 " + builtin.getName());
}
private void registerUserModules(List<ModuleNode> mods) {
for (var mod : mods) {
modules.put(mod.name(), new ModuleInfo(mod.name()));
log("注册用户模块: " + mod.name());
}
}
private void registerSignaturesAndImports(List<ModuleNode> mods) {
for (var mod : mods) {
ModuleInfo mi = modules.get(mod.name());
for (var imp : mod.imports()) {
if (!modules.containsKey(imp.moduleName())) {
errors.add(new SemanticError(imp, "未知模块: " + imp.moduleName()));
log("错误: 未知模块导入 " + imp.moduleName());
} else {
mi.getImports().add(imp.moduleName());
log("模块 " + mod.name() + " 导入模块: " + imp.moduleName());
}
}
for (var fn : mod.functions()) {
List<Type> params = new ArrayList<>();
for (var p : fn.parameters()) {
Type t = parseType(p.type());
if (t == null) {
errors.add(new SemanticError(p, "未知类型: " + p.type()));
log("错误: 参数未知类型 " + p.type() + " 在函数 " + fn.name());
t = BuiltinType.INT;
}
params.add(t);
}
Type ret = parseType(fn.returnType());
if (ret == null) {
errors.add(new SemanticError(fn, "未知返回类型: " + fn.returnType()));
log("错误: 函数返回类型未知 " + fn.returnType() + " 在函数 " + fn.name());
ret = BuiltinType.VOID;
}
mi.getFunctions().put(fn.name(), new FunctionType(params, ret));
log("注册函数: " + mod.name() + "." + fn.name() + " 参数类型: " + params + " 返回类型: " + ret);
}
}
}
private void checkAllFunctions(List<ModuleNode> mods) {
for (var mod : mods) {
ModuleInfo mi = modules.get(mod.name());
for (var fn : mod.functions()) {
log("开始检查函数: " + mod.name() + "." + fn.name());
SymbolTable locals = new SymbolTable(null);
for (var p : fn.parameters()) {
locals.define(new Symbol(p.name(), parseType(p.type()), SymbolKind.VARIABLE));
log("定义参数: " + p.name() + " 类型: " + p.type());
}
fn.body().forEach(stmt -> checkStatement(mi, fn, locals, stmt));
log("完成函数检查: " + mod.name() + "." + fn.name());
}
}
}
private void checkStatement(ModuleInfo mi, FunctionNode fn, SymbolTable locals, StatementNode stmt) {
log("检查语句: " + stmt);
switch (stmt) {
case DeclarationNode decl -> checkDeclaration(mi, decl, locals);
case AssignmentNode asg -> checkAssignment(mi, asg, locals);
case ExpressionStatementNode es -> checkExpression(mi, fn, locals, es.expression());
case IfNode ifn -> checkIf(mi, fn, locals, ifn);
case LoopNode ln -> checkLoop(mi, fn, locals, ln);
case ReturnNode ret -> checkReturn(mi, fn, locals, ret);
default -> {
errors.add(new SemanticError(stmt, "不支持的语句类型: " + stmt));
log("错误: 不支持的语句类型 " + stmt);
}
}
}
private void checkDeclaration(ModuleInfo mi, DeclarationNode decl, SymbolTable locals) {
Type varType = Optional.ofNullable(parseType(decl.getType())).orElse(BuiltinType.INT);
log("声明变量: " + decl.getName() + " 类型: " + decl.getType());
if (!locals.define(new Symbol(decl.getName(), varType, SymbolKind.VARIABLE))) {
errors.add(new SemanticError(decl, "变量重复声明: " + decl.getName()));
log("错误: 变量重复声明 " + decl.getName());
}
decl.getInitializer().ifPresent(init -> {
Type initType = checkExpression(mi, null, locals, init);
if (!varType.isCompatible(initType)) {
errors.add(new SemanticError(decl, "初始化类型不匹配: 期望 " + varType + ", 实际 " + initType));
log("错误: 初始化类型不匹配 for " + decl.getName() + ": 期望 " + varType + ", 实际 " + initType);
} else {
log("初始化成功: " + decl.getName() + " 类型: " + initType);
}
});
}
private void checkAssignment(ModuleInfo mi, AssignmentNode asg, SymbolTable locals) {
log("赋值检查: " + asg.variable());
Symbol sym = locals.resolve(asg.variable());
if (sym == null || sym.kind() != SymbolKind.VARIABLE) {
errors.add(new SemanticError(asg, "未声明的变量: " + asg.variable()));
log("错误: 未声明的变量 " + asg.variable());
return;
}
Type valType = checkExpression(mi, null, locals, asg.value());
if (!sym.type().isCompatible(valType)) {
errors.add(new SemanticError(asg, "赋值类型不匹配: 期望 " + sym.type() + ", 实际 " + valType));
log("错误: 赋值类型不匹配 for " + asg.variable() + ": 期望 " + sym.type() + ", 实际 " + valType);
} else {
log("赋值成功: " + asg.variable() + " = " + valType);
}
}
private void checkIf(ModuleInfo mi, FunctionNode fn, SymbolTable locals, IfNode ifn) {
log("检查 if 条件");
Type cond = checkExpression(mi, fn, locals, ifn.condition());
if (cond != BuiltinType.INT) {
errors.add(new SemanticError(ifn, "if 条件必须为 int 类型(表示真假)"));
log("错误: if 条件类型不为 int: " + cond);
} else {
log("if 条件类型合法: " + cond);
}
ifn.thenBranch().forEach(s -> checkStatement(mi, fn, locals, s));
ifn.elseBranch().forEach(s -> checkStatement(mi, fn, locals, s));
}
private void checkLoop(ModuleInfo mi, FunctionNode fn, SymbolTable locals, LoopNode ln) {
log("检查 loop 循环");
checkStatement(mi, fn, locals, ln.initializer());
Type cond = checkExpression(mi, fn, locals, ln.condition());
if (cond != BuiltinType.INT) {
errors.add(new SemanticError(ln, "loop 条件必须为 int 类型(表示真假)"));
log("错误: loop 条件类型不为 int: " + cond);
} else {
log("loop 条件类型合法: " + cond);
}
checkStatement(mi, fn, locals, ln.update());
ln.body().forEach(s -> checkStatement(mi, fn, locals, s));
}
private void checkReturn(ModuleInfo mi, FunctionNode fn, SymbolTable locals, ReturnNode ret) {
log("检查 return");
FunctionType expected = modules.get(mi.getName()).getFunctions().get(fn.name());
Optional<ExpressionNode> exp = ret.getExpression();
if (exp.isPresent()) {
Type actual = checkExpression(mi, fn, locals, exp.get());
if (!expected.returnType().isCompatible(actual)) {
errors.add(new SemanticError(ret, "return 类型不匹配: 期望 " + expected.returnType() + ", 实际 " + actual));
log("错误: return 类型不匹配: 期望 " + expected.returnType() + ", 实际 " + actual);
} else {
log("return 类型合法: " + actual);
}
} else if (expected.returnType() != BuiltinType.VOID) {
errors.add(new SemanticError(ret, "非 void 函数必须返回值: " + expected.returnType()));
log("错误: 非 void 函数缺少返回值: 期望 " + expected.returnType());
} else {
log("void 函数 return 合法");
}
}
private Type checkExpression(ModuleInfo mi, FunctionNode fn, SymbolTable locals, ExpressionNode expr) {
log("检查表达式: " + expr);
Type result;
switch (expr) {
case NumberLiteralNode ignored -> result = BuiltinType.INT;
case StringLiteralNode ignored -> result = BuiltinType.STRING;
case IdentifierNode id -> result = checkIdentifier(id, locals);
case CallExpressionNode call -> result = checkCall(mi, fn, locals, call);
case BinaryExpressionNode bin -> result = checkBinary(bin, mi, fn, locals);
default -> {
errors.add(new SemanticError(expr, "不支持的表达式类型: " + expr));
log("错误: 不支持的表达式类型 " + expr);
result = BuiltinType.INT;
}
}
log("表达式类型: " + result);
return result;
}
private Type checkIdentifier(IdentifierNode id, SymbolTable locals) {
log("检查标识符: " + id.name());
Symbol sym = locals.resolve(id.name());
if (sym == null) {
errors.add(new SemanticError(id, "未声明的标识符: " + id.name()));
log("错误: 未声明的标识符 " + id.name());
return BuiltinType.INT;
}
log("标识符类型: " + sym.type());
return sym.type();
}
private Type checkCall(ModuleInfo mi, FunctionNode fn, SymbolTable locals, CallExpressionNode call) {
log("检查函数调用: " + call.callee());
ModuleInfo target = mi;
String name;
ExpressionNode callee = call.callee();
if (callee instanceof MemberExpressionNode(var object, var member)) {
if (object instanceof IdentifierNode(var modName)) {
if (!modules.containsKey(modName) || (!mi.getImports().contains(modName) && !mi.getName().equals(modName))) {
errors.add(new SemanticError(callee, "未知或未导入模块: " + modName));
log("错误: 未导入模块 " + modName);
return BuiltinType.INT;
}
target = modules.get(modName);
name = member;
log("调用模块函数: " + modName + "." + member);
} else {
errors.add(new SemanticError(callee, "不支持的调用方式: " + callee));
log("错误: 不支持的调用方式 " + callee);
return BuiltinType.INT;
}
} else if (callee instanceof IdentifierNode(var idName)) {
name = idName;
log("调用当前模块函数: " + idName);
} else {
errors.add(new SemanticError(callee, "不支持的调用方式: " + callee));
log("错误: 不支持的调用方式 " + callee);
return BuiltinType.INT;
}
FunctionType ft = target.getFunctions().get(name);
if (ft == null) {
errors.add(new SemanticError(callee, "函数未定义: " + name));
log("错误: 函数未定义 " + name);
return BuiltinType.INT;
}
var args = new ArrayList<Type>();
for (var a : call.arguments()) {
Type at = checkExpression(mi, fn, locals, a);
args.add(at);
}
if (args.size() != ft.paramTypes().size()) {
errors.add(new SemanticError(call, "参数数量不匹配: 期望 " + ft.paramTypes().size() + " 个, 实际 " + args.size()));
log("错误: 参数数量不匹配: 期望 " + ft.paramTypes().size() + " 个, 实际 " + args.size());
} else {
for (int i = 0; i < args.size(); i++) {
if (!ft.paramTypes().get(i).isCompatible(args.get(i))) {
errors.add(new SemanticError(call, String.format(
"参数类型不匹配 (位置 %d): 期望 %s, 实际 %s", i, ft.paramTypes().get(i), args.get(i)
)));
log("错误: 参数类型不匹配 (位置 " + i + "): 期望 " + ft.paramTypes().get(i) + ", 实际 " + args.get(i));
}
}
if (verbose) log("参数检查完成,所有参数类型符合期望");
}
log("函数调用类型: 返回 " + ft.returnType());
return ft.returnType();
}
private Type checkBinary(BinaryExpressionNode bin, ModuleInfo mi, FunctionNode fn, SymbolTable locals) {
log("检查二元表达式: " + bin.operator());
Type l = checkExpression(mi, fn, locals, bin.left());
Type r = checkExpression(mi, fn, locals, bin.right());
String op = bin.operator();
Type result;
switch (op) {
case "+" -> {
if (l == BuiltinType.STRING || r == BuiltinType.STRING) result = BuiltinType.STRING;
else if (l == BuiltinType.INT && r == BuiltinType.INT) result = BuiltinType.INT;
else result = null;
}
case "-", "*", "/", "%" -> {
if (l == BuiltinType.INT && r == BuiltinType.INT) result = BuiltinType.INT;
else result = null;
}
case "<", "<=", ">", ">=", "==", "!=" -> {
if (l == BuiltinType.INT && r == BuiltinType.INT) result = BuiltinType.INT;
else result = null;
}
default -> {
errors.add(new SemanticError(bin, "未知运算符: " + op));
log("错误: 未知运算符 " + op);
return BuiltinType.INT;
}
}
if (result == null) {
errors.add(new SemanticError(bin, String.format("运算符 '%s' 不支持类型: %s, %s", op, l, r)));
log("错误: 运算符 '" + op + "' 不支持类型: " + l + ", " + r);
result = BuiltinType.INT;
} else {
log("二元表达式计算类型: " + result);
}
return result;
}
private Type parseType(String name) {
return BUILTIN_TYPES.get(name);
}
}

View File

@ -0,0 +1,14 @@
package org.jcnc.snow.compiler.semantic;
import org.jcnc.snow.compiler.parser.ast.base.Node;
/**
* 语义错误类保存 AST 节点和对应的错误信息
*/
public record SemanticError(Node node, String message) {
@Override
public String toString() {
return "Semantic error at " + node + ": " + message;
}
}

View File

@ -0,0 +1,7 @@
package org.jcnc.snow.compiler.semantic;
/**
* 符号表中的一条记录包含名称类型和种类信息
*/
public record Symbol(String name, Type type, SymbolKind kind) {
}

View File

@ -0,0 +1,10 @@
package org.jcnc.snow.compiler.semantic;
/**
* 符号的种类枚举用于区分变量函数和模块等不同符号类型
*/
public enum SymbolKind {
VARIABLE,
FUNCTION,
MODULE
}

View File

@ -0,0 +1,41 @@
package org.jcnc.snow.compiler.semantic;
import java.util.HashMap;
import java.util.Map;
/**
* 符号表支持链式作用域查找通过 parent 引用形成作用域嵌套
*/
public class SymbolTable {
private final SymbolTable parent;
private final Map<String, Symbol> symbols = new HashMap<>();
public SymbolTable(SymbolTable parent) {
this.parent = parent;
}
/**
* 在当前作用域中定义一个符号如已存在同名符号则返回 false
*/
public boolean define(Symbol symbol) {
if (symbols.containsKey(symbol.name())) {
return false;
}
symbols.put(symbol.name(), symbol);
return true;
}
/**
* 查找一个符号先从当前作用域开始若未找到则向上查找父作用域
*/
public Symbol resolve(String name) {
Symbol sym = symbols.get(name);
if (sym != null) {
return sym;
}
if (parent != null) {
return parent.resolve(name);
}
return null;
}
}

View File

@ -0,0 +1,11 @@
package org.jcnc.snow.compiler.semantic;
/**
* 类型接口所有具体类型内置类型函数类型等都应实现此接口
*/
public interface Type {
/**
* 判断此类型是否可接受赋值或兼容另一个类型
*/
boolean isCompatible(Type other);
}