优化语法分析器

This commit is contained in:
Luke 2025-05-11 22:27:42 +08:00
parent 9ff16dd3a9
commit 089e629187
8 changed files with 196 additions and 278 deletions

View File

@ -0,0 +1,35 @@
package org.jcnc.snow.compiler.semantic.core;
import org.jcnc.snow.compiler.parser.ast.*;
import org.jcnc.snow.compiler.semantic.analyzers.AnalyzerRegistry;
import org.jcnc.snow.compiler.semantic.analyzers.expression.*;
import org.jcnc.snow.compiler.semantic.analyzers.statement.*;
/**
* 负责一次性把所有语句/表达式分析器注册进 `AnalyzerRegistry`
*/
public final class AnalyzerRegistrar {
private AnalyzerRegistrar() { }
public static void registerAll(AnalyzerRegistry registry) {
// ---------- 语句分析器 ----------
registry.registerStatementAnalyzer(DeclarationNode.class, new DeclarationAnalyzer());
registry.registerStatementAnalyzer(AssignmentNode.class, new AssignmentAnalyzer());
registry.registerStatementAnalyzer(IfNode.class, new IfAnalyzer());
registry.registerStatementAnalyzer(LoopNode.class, new LoopAnalyzer());
registry.registerStatementAnalyzer(ReturnNode.class, new ReturnAnalyzer());
registry.registerStatementAnalyzer(ExpressionStatementNode.class,
(ctx, mi, fn, locals, stmt) ->
registry.getExpressionAnalyzer(stmt.expression())
.analyze(ctx, mi, fn, locals, stmt.expression())
);
// ---------- 表达式分析器 ----------
registry.registerExpressionAnalyzer(NumberLiteralNode.class, new NumberLiteralAnalyzer());
registry.registerExpressionAnalyzer(StringLiteralNode.class, new StringLiteralAnalyzer());
registry.registerExpressionAnalyzer(IdentifierNode.class, new IdentifierAnalyzer());
registry.registerExpressionAnalyzer(CallExpressionNode.class, new CallExpressionAnalyzer());
registry.registerExpressionAnalyzer(BinaryExpressionNode.class,new BinaryExpressionAnalyzer());
registry.registerExpressionAnalyzer(MemberExpressionNode.class,new UnsupportedExpressionAnalyzer<>());
}
}

View File

@ -0,0 +1,27 @@
package org.jcnc.snow.compiler.semantic.core;
import org.jcnc.snow.compiler.semantic.type.*;
import java.util.Map;
/**
* 统一维护内置类型与内置模块 BuiltinUtils
*/
public final class BuiltinTypeRegistry {
public static final Map<String, Type> BUILTIN_TYPES = Map.of(
"int", BuiltinType.INT,
"long", BuiltinType.LONG,
"short", BuiltinType.SHORT,
"byte", BuiltinType.BYTE,
"float", BuiltinType.FLOAT,
"double", BuiltinType.DOUBLE,
"string", BuiltinType.STRING,
"void", BuiltinType.VOID
);
private BuiltinTypeRegistry() { }
public static void init(Context ctx) {
ctx.modules().put("BuiltinUtils", ModuleInfo.builtin());
}
}

View File

@ -47,6 +47,9 @@ public class Context {
return modules; return modules;
} }
public Map<String, ModuleInfo> modules() { return modules; }
public List<SemanticError> errors() { return errors; }
public AnalyzerRegistry registry() { return registry; }
/** /**
* 获取语义错误列表 * 获取语义错误列表
* 分析过程中产生的 {@link SemanticError} 会被收集到此列表 * 分析过程中产生的 {@link SemanticError} 会被收集到此列表
@ -81,13 +84,13 @@ public class Context {
/** /**
* 将类型名解析为内置类型 {@link Type} 实例 * 将类型名解析为内置类型 {@link Type} 实例
* <p> * <p>
* 若名称在 {@link SemanticAnalyzer#BUILTIN_TYPES} 中存在则返回对应类型 * 若名称在 {@link BuiltinTypeRegistry#BUILTIN_TYPES} 中存在则返回对应类型
* 否则返回 {@code null}调用方可据此决定降级为默认类型并记录错误 * 否则返回 {@code null}调用方可据此决定降级为默认类型并记录错误
* *
* @param name 类型名称 "int", "string", "void" * @param name 类型名称 "int", "string", "void"
* @return 对应的 {@link Type} {@code null} 表示未知类型 * @return 对应的 {@link Type} {@code null} 表示未知类型
*/ */
public Type parseType(String name) { public Type parseType(String name) {
return org.jcnc.snow.compiler.semantic.core.SemanticAnalyzer.BUILTIN_TYPES.get(name); return BuiltinTypeRegistry.BUILTIN_TYPES.get(name);
} }
} }

View File

@ -0,0 +1,33 @@
package org.jcnc.snow.compiler.semantic.core;
import org.jcnc.snow.compiler.parser.ast.*;
import org.jcnc.snow.compiler.semantic.error.SemanticError;
import org.jcnc.snow.compiler.semantic.symbol.*;
import java.util.*;
/**
* 遍历每个函数体并通过分发器执行语句/表达式分析
*/
public class FunctionChecker {
private final Context ctx;
public FunctionChecker(Context ctx) { this.ctx = ctx; }
public void check(Iterable<ModuleNode> mods) {
for (ModuleNode mod : mods) {
ModuleInfo mi = ctx.modules().get(mod.name());
for (FunctionNode fn : mod.functions()) {
SymbolTable locals = new SymbolTable(null);
fn.parameters().forEach(p -> locals.define(new Symbol(p.name(), ctx.parseType(p.type()), SymbolKind.VARIABLE)));
for (var stmt : fn.body()) {
var analyzer = ctx.getRegistry().getStatementAnalyzer(stmt);
if (analyzer != null) {
analyzer.analyze(ctx, mi, fn, locals, stmt);
} else {
ctx.errors().add(new SemanticError(stmt, "不支持的语句类型: " + stmt));
}
}
}
}
}
}

View File

@ -1,11 +1,9 @@
package org.jcnc.snow.compiler.semantic.core; package org.jcnc.snow.compiler.semantic.core;
import org.jcnc.snow.compiler.semantic.type.BuiltinType;
import org.jcnc.snow.compiler.semantic.type.FunctionType; import org.jcnc.snow.compiler.semantic.type.FunctionType;
import java.util.HashMap; import java.util.*;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/** /**
* 模块信息保存单个模块在语义分析阶段的元数据 * 模块信息保存单个模块在语义分析阶段的元数据
@ -66,4 +64,12 @@ public class ModuleInfo {
public Map<String, FunctionType> getFunctions() { public Map<String, FunctionType> getFunctions() {
return functions; return functions;
} }
public static ModuleInfo builtin() {
ModuleInfo mi = new ModuleInfo("BuiltinUtils");
mi.getFunctions().put("to_int", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.INT));
mi.getFunctions().put("to_string", new FunctionType(List.of(BuiltinType.INT), BuiltinType.STRING));
mi.getFunctions().put("print", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.VOID));
return mi;
}
} }

View File

@ -0,0 +1,17 @@
package org.jcnc.snow.compiler.semantic.core;
import org.jcnc.snow.compiler.parser.ast.ModuleNode;
/**
* 负责将用户模块名称填入 `modules` 映射
*/
public class ModuleRegistry {
private final Context ctx;
public ModuleRegistry(Context ctx) { this.ctx = ctx; }
public void registerUserModules(Iterable<ModuleNode> mods) {
for (ModuleNode mod : mods) {
ctx.modules().putIfAbsent(mod.name(), new ModuleInfo(mod.name()));
}
}
}

View File

@ -1,295 +1,47 @@
package org.jcnc.snow.compiler.semantic.core; package org.jcnc.snow.compiler.semantic.core;
import org.jcnc.snow.compiler.parser.ast.*; import org.jcnc.snow.compiler.parser.ast.ModuleNode;
import org.jcnc.snow.compiler.parser.ast.base.StatementNode;
import org.jcnc.snow.compiler.semantic.analyzers.AnalyzerRegistry; import org.jcnc.snow.compiler.semantic.analyzers.AnalyzerRegistry;
import org.jcnc.snow.compiler.semantic.error.SemanticError; import org.jcnc.snow.compiler.semantic.error.SemanticError;
import org.jcnc.snow.compiler.semantic.analyzers.expression.*;
import org.jcnc.snow.compiler.semantic.analyzers.statement.*;
import org.jcnc.snow.compiler.semantic.symbol.Symbol;
import org.jcnc.snow.compiler.semantic.symbol.SymbolKind;
import org.jcnc.snow.compiler.semantic.symbol.SymbolTable;
import org.jcnc.snow.compiler.semantic.type.BuiltinType;
import org.jcnc.snow.compiler.semantic.type.FunctionType;
import org.jcnc.snow.compiler.semantic.type.Type;
import java.util.*; import java.util.*;
/** /**
* 注册表 + 分析器组合的分而治之设计 * 语义分析总控仅负责调用各独立组件完成完整流程
* <p>
* 主要职责
* <ul>
* <li>初始化内置模块与类型映射</li>
* <li>注册用户模块及其函数签名与导入关系</li>
* <li>使用 StatementAnalyzer ExpressionAnalyzer 进行递归语义检查</li>
* <li>收集并返回所有发现的语义错误</li>
* </ul>
*/ */
public class SemanticAnalyzer { public class SemanticAnalyzer {
/**
* 内置类型映射表将类型名称映射到对应的 {@link Type} 实例
*/
public static final Map<String, Type> BUILTIN_TYPES = Map.of(
"int", BuiltinType.INT,
"long", BuiltinType.LONG,
"short", BuiltinType.SHORT,
"byte", BuiltinType.BYTE,
"float", BuiltinType.FLOAT,
"double", BuiltinType.DOUBLE,
"string", BuiltinType.STRING,
"void", BuiltinType.VOID
);
/** 所有已注册模块名称 -> {@link ModuleInfo} 的映射 */
private final Map<String, ModuleInfo> modules = new HashMap<>();
/** 收集语义分析过程中产生的所有错误 */
private final List<SemanticError> errors = new ArrayList<>();
/** 分发语句与表达式分析器的注册表 */
private final AnalyzerRegistry registry = new AnalyzerRegistry();
/** 全局上下文对象,封装 modules、errors、verbose 与 registry */
private final Context ctx; private final Context ctx;
private final AnalyzerRegistry registry = new AnalyzerRegistry();
// 组件
private final ModuleRegistry moduleRegistry;
private final SignatureRegistrar signatureRegistrar;
private final FunctionChecker functionChecker;
/**
* 构造语义分析器
*
* @param verbose 是否启用详细日志输出
*/
public SemanticAnalyzer(boolean verbose) { public SemanticAnalyzer(boolean verbose) {
/* 是否启用详细日志输出 */ this.ctx = new Context(new HashMap<>(), new ArrayList<>(), verbose, registry);
this.ctx = new Context(modules, errors, verbose, registry);
registerAnalyzers(); // 初始化内置模块与分析器注册表
BuiltinTypeRegistry.init(ctx);
AnalyzerRegistrar.registerAll(registry);
// 其余组件
this.moduleRegistry = new ModuleRegistry(ctx);
this.signatureRegistrar = new SignatureRegistrar(ctx);
this.functionChecker = new FunctionChecker(ctx);
} }
/**
* 注册所有的语句分析器 (StatementAnalyzer) 和表达式分析器 (ExpressionAnalyzer)
* {@link AnalyzerRegistry} 以便框架在遍历 AST 时能够根据节点类型分发到对应的分析器
* <p>
* <strong>语句分析器</strong>StatementAnalyzer
* <ul>
* <li>{@link DeclarationNode} {@link DeclarationAnalyzer}处理变量声明</li>
* <li>{@link AssignmentNode} {@link AssignmentAnalyzer}处理赋值语句</li>
* <li>{@link IfNode} {@link IfAnalyzer}处理条件分支</li>
* <li>{@link LoopNode} {@link LoopAnalyzer}处理循环语句</li>
* <li>{@link ReturnNode} {@link ReturnAnalyzer}处理返回语句</li>
* <li>{@link ExpressionStatementNode} 直接委托给相应的表达式分析器</li>
* </ul>
* <p>
* <strong>表达式分析器</strong>ExpressionAnalyzer
* <ul>
* <li>{@link NumberLiteralNode} {@link NumberLiteralAnalyzer}数字字面量</li>
* <li>{@link StringLiteralNode} {@link StringLiteralAnalyzer}字符串字面量</li>
* <li>{@link IdentifierNode} {@link IdentifierAnalyzer}标识符</li>
* <li>{@link CallExpressionNode} {@link CallExpressionAnalyzer}函数调用</li>
* <li>{@link BinaryExpressionNode} {@link BinaryExpressionAnalyzer}二元运算</li>
* </ul>
* <p>
* <strong>默认兜底</strong>
* <ul>
* <li>未注册的表达式节点会由 {@link UnsupportedExpressionAnalyzer} 处理记录错误并降级类型</li>
* </ul>
*/
private void registerAnalyzers() {
// -----------------------------------------------------------------
// 语句分析器注册StatementAnalyzer
// -----------------------------------------------------------------
// 变量声明语句DeclarationNode
registry.registerStatementAnalyzer(
DeclarationNode.class,
new DeclarationAnalyzer()
);
// 赋值语句AssignmentNode
registry.registerStatementAnalyzer(
AssignmentNode.class,
new AssignmentAnalyzer()
);
// if 分支语句IfNode
registry.registerStatementAnalyzer(
IfNode.class,
new IfAnalyzer()
);
// loop 循环语句LoopNode
registry.registerStatementAnalyzer(
LoopNode.class,
new LoopAnalyzer()
);
// return 语句ReturnNode
registry.registerStatementAnalyzer(
ReturnNode.class,
new ReturnAnalyzer()
);
// 表达式语句仅执行表达式不产生新的值ExpressionStatementNode
registry.registerStatementAnalyzer(
ExpressionStatementNode.class,
(ctx, mi, fn, locals, stmt) -> {
// 将表达式语句委托给对应的表达式分析器
var exprAnalyzer = ctx.getRegistry().getExpressionAnalyzer(stmt.expression());
exprAnalyzer.analyze(ctx, mi, fn, locals, stmt.expression());
}
);
// -----------------------------------------------------------------
// 表达式分析器注册ExpressionAnalyzer
// -----------------------------------------------------------------
// 数字字面量NumberLiteralNode INT
registry.registerExpressionAnalyzer(
NumberLiteralNode.class,
new NumberLiteralAnalyzer()
);
// 字符串字面量StringLiteralNode STRING
registry.registerExpressionAnalyzer(
StringLiteralNode.class,
new StringLiteralAnalyzer()
);
// 标识符IdentifierNode 查符号表
registry.registerExpressionAnalyzer(
IdentifierNode.class,
new IdentifierAnalyzer()
);
// 函数调用CallExpressionNode 查函数签名并校验参数
registry.registerExpressionAnalyzer(
CallExpressionNode.class,
new CallExpressionAnalyzer()
);
// 二元运算BinaryExpressionNode 根据运算符和操作数推导类型
registry.registerExpressionAnalyzer(
BinaryExpressionNode.class,
new BinaryExpressionAnalyzer()
);
// -----------------------------------------------------------------
// 默认兜底所有未显式注册的 MemberExpressionNode 及其它表达式
// -----------------------------------------------------------------
// 如果遇到不支持的表达式节点类型将由此分析器记录错误并降级为 INT
registry.registerExpressionAnalyzer(
MemberExpressionNode.class,
new UnsupportedExpressionAnalyzer<>()
);
}
/** /**
* 对给定的模块 AST 列表执行完整的语义分析 * 执行完整语义分析流程
*
* @param moduleNodes 所有待分析的 {@link ModuleNode} 列表
* @return 收集到的所有 {@link SemanticError}如果无错误则返回空列表
*/ */
public List<SemanticError> analyze(List<ModuleNode> moduleNodes) { public List<SemanticError> analyze(List<ModuleNode> modules) {
ctx.log("开始语义分析"); ctx.log("开始语义分析");
initBuiltinModule();
ctx.log("内置模块初始化完成");
registerUserModules(moduleNodes); moduleRegistry.registerUserModules(modules);
ctx.log("用户模块注册完成: " + modules.keySet()); signatureRegistrar.register(modules);
functionChecker.check(modules);
registerSignaturesAndImports(moduleNodes); ctx.log("分析完成,错误总数: " + ctx.errors().size());
ctx.log("函数签名与导入检查完成"); return ctx.errors();
checkAllFunctions(moduleNodes);
ctx.log("所有函数检查完成,错误总数: " + errors.size());
return errors;
}
/**
* 初始化内置工具模块及其函数签名
* <p>
* 包括to_intto_stringprint
*/
private void initBuiltinModule() {
ModuleInfo builtin = new ModuleInfo("BuiltinUtils");
builtin.getFunctions().put("to_int", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.INT));
builtin.getFunctions().put("to_string", new FunctionType(List.of(BuiltinType.INT), BuiltinType.STRING));
builtin.getFunctions().put("print", new FunctionType(List.of(BuiltinType.STRING), BuiltinType.VOID));
modules.put(builtin.getName(), builtin);
}
/**
* 注册所有用户模块到模块映射中只保留名称用于后续签名与导入检查
*
* @param mods AST 中的模块节点列表
*/
private void registerUserModules(List<ModuleNode> mods) {
for (ModuleNode mod : mods) {
modules.put(mod.name(), new ModuleInfo(mod.name()));
}
}
/**
* 为每个模块注册其导入关系与函数签名
* <p>
* 对未知模块或类型均会记录 {@link SemanticError} 并进行默认降级处理
*
* @param mods AST 中的模块节点列表
*/
private void registerSignaturesAndImports(List<ModuleNode> mods) {
for (ModuleNode mod : mods) {
ModuleInfo mi = modules.get(mod.name());
// 模块导入检查
for (ImportNode imp : mod.imports()) {
if (!modules.containsKey(imp.moduleName())) {
errors.add(new SemanticError(imp, "未知模块: " + imp.moduleName()));
} else {
mi.getImports().add(imp.moduleName());
}
}
// 函数签名注册
for (FunctionNode fn : mod.functions()) {
List<Type> params = new ArrayList<>();
for (ParameterNode p : fn.parameters()) {
Type t = ctx.parseType(p.type());
if (t == null) {
errors.add(new SemanticError(p, "未知类型: " + p.type()));
t = BuiltinType.INT;
}
params.add(t);
}
Type retType = Optional.ofNullable(ctx.parseType(fn.returnType()))
.orElse(BuiltinType.VOID);
mi.getFunctions().put(fn.name(), new FunctionType(params, retType));
}
}
}
/**
* 对所有模块中的每个函数体执行逐语句分析收集语义错误
*
* @param mods AST 中的模块节点列表
*/
private void checkAllFunctions(List<ModuleNode> mods) {
for (ModuleNode mod : mods) {
ModuleInfo mi = modules.get(mod.name());
for (FunctionNode fn : mod.functions()) {
SymbolTable locals = new SymbolTable(null);
// 参数预定义为局部变量
for (ParameterNode p : fn.parameters()) {
locals.define(new Symbol(p.name(),
ctx.parseType(p.type()),
SymbolKind.VARIABLE));
}
// 逐语句分发给对应的 StatementAnalyzer
for (StatementNode stmt : fn.body()) {
var analyzer = registry.getStatementAnalyzer(stmt);
if (analyzer != null) {
analyzer.analyze(ctx, mi, fn, locals, stmt);
} else {
errors.add(new SemanticError(stmt, "不支持的语句类型: " + stmt));
}
}
}
}
} }
} }

View File

@ -0,0 +1,45 @@
package org.jcnc.snow.compiler.semantic.core;
import org.jcnc.snow.compiler.parser.ast.*;
import org.jcnc.snow.compiler.semantic.error.SemanticError;
import org.jcnc.snow.compiler.semantic.type.*;
import java.util.*;
/**
* 负责函数签名登记与导入合法性检查
*/
public class SignatureRegistrar {
private final Context ctx;
public SignatureRegistrar(Context ctx) { this.ctx = ctx; }
public void register(Iterable<ModuleNode> mods) {
for (ModuleNode mod : mods) {
ModuleInfo mi = ctx.modules().get(mod.name());
// 导入检查
for (ImportNode imp : mod.imports()) {
if (!ctx.modules().containsKey(imp.moduleName())) {
ctx.errors().add(new SemanticError(imp, "未知模块: " + imp.moduleName()));
} else {
mi.getImports().add(imp.moduleName());
}
}
// 函数签名
for (FunctionNode fn : mod.functions()) {
List<Type> params = new ArrayList<>();
for (ParameterNode p : fn.parameters()) {
Type t = Optional.ofNullable(ctx.parseType(p.type()))
.orElseGet(() -> {
ctx.errors().add(new SemanticError(p, "未知类型: " + p.type()));
return BuiltinType.INT;
});
params.add(t);
}
Type ret = Optional.ofNullable(ctx.parseType(fn.returnType()))
.orElse(BuiltinType.VOID);
mi.getFunctions().put(fn.name(), new FunctionType(params, ret));
}
}
}
}