From d0e8cee6bd884196b13a1b100d12b7c916e2782b Mon Sep 17 00:00:00 2001 From: Luke Date: Thu, 17 Jul 2025 11:47:19 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E6=95=B0=E5=AD=97?= =?UTF-8?q?=E5=AD=97=E9=9D=A2=E9=87=8F=E4=B8=AD=E7=9A=84=E4=B8=8B=E5=88=92?= =?UTF-8?q?=E7=BA=BF=E5=88=86=E9=9A=94=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 增加了对数字中下划线的处理逻辑 - 添加了防止下划线连续出现、以下划线开头或结尾的校验 - 优化了小数部分的下划线检查 - 修复了数字后紧跟下划线的错误处理 - 最后返回的数字字面量中将移除所有的下划线 --- .../lexer/scanners/NumberTokenScanner.java | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/jcnc/snow/compiler/lexer/scanners/NumberTokenScanner.java b/src/main/java/org/jcnc/snow/compiler/lexer/scanners/NumberTokenScanner.java index 40575c9..c655595 100644 --- a/src/main/java/org/jcnc/snow/compiler/lexer/scanners/NumberTokenScanner.java +++ b/src/main/java/org/jcnc/snow/compiler/lexer/scanners/NumberTokenScanner.java @@ -80,6 +80,9 @@ public class NumberTokenScanner extends AbstractTokenScanner { StringBuilder literal = new StringBuilder(); State state = State.INT_PART; + boolean lastWasUnderscore = false; // 记录前一个是否是下划线 + boolean sawDigit = false; // 当前段落是否有数字(防止以下划线开头) + /* ───── 1. 主体扫描 —— 整数 / 小数 ───── */ mainLoop: while (!ctx.isAtEnd() && state != State.END) { @@ -89,10 +92,25 @@ public class NumberTokenScanner extends AbstractTokenScanner { case INT_PART: if (Character.isDigit(ch)) { literal.append(ctx.advance()); + lastWasUnderscore = false; + sawDigit = true; + } else if (ch == '_') { + if (!sawDigit) + throw new LexicalException("数字不能以下划线开头", line, col); + if (lastWasUnderscore) + throw new LexicalException("数字中下划线不能连续出现", line, col); + literal.append(ctx.advance()); + lastWasUnderscore = true; } else if (ch == '.') { + if (lastWasUnderscore) + throw new LexicalException("下划线不能出现在小数点前", line, col); state = State.DEC_POINT; literal.append(ctx.advance()); + // 不要重置sawDigit! + // sawDigit = false; // 移除此句 } else { + if (lastWasUnderscore) + throw new LexicalException("数字不能以下划线结尾", line, col); state = State.END; } break; @@ -100,18 +118,30 @@ public class NumberTokenScanner extends AbstractTokenScanner { /* 已读到小数点,下一字符必须是数字 */ case DEC_POINT: if (Character.isDigit(ch)) { - state = State.FRAC_PART; literal.append(ctx.advance()); + state = State.FRAC_PART; + sawDigit = true; + } else if (ch == '_') { // 防止小数点后直接跟下划线 + throw new LexicalException("小数点后不能直接跟下划线", line, col); } else { throw new LexicalException("小数点后必须跟数字", line, col); } break; /* 小数部分 */ + case FRAC_PART: if (Character.isDigit(ch)) { literal.append(ctx.advance()); + lastWasUnderscore = false; + } else if (ch == '_') { // 小数部分下划线检查 + if (lastWasUnderscore) + throw new LexicalException("数字中下划线不能连续出现", line, col); + literal.append(ctx.advance()); + lastWasUnderscore = true; } else { + if (lastWasUnderscore) + throw new LexicalException("数字不能以下划线结尾", line, col); state = State.END; } break; @@ -121,6 +151,10 @@ public class NumberTokenScanner extends AbstractTokenScanner { } } + // 主体结束后,下划线不能在末尾 + if (lastWasUnderscore) + throw new LexicalException("数字不能以下划线结尾", line, col); + /* ───── 2. 后缀及非法尾随字符检查 ───── */ if (!ctx.isAtEnd()) { char next = ctx.peek(); @@ -141,11 +175,11 @@ public class NumberTokenScanner extends AbstractTokenScanner { } } - /* 2-B. **非法字母**(既不是后缀,也没有空白隔开) */ + /* 2-B. **非法字母**(既不是后缀,也没有空白隔开) */ } else if (Character.isLetter(next)) { throw new LexicalException( "数字后不能紧跟未知标识符 '" + next + "'", line, col); - /* 2-C. **非法下划线** */ + /* 2-C. **非法下划线** */ } else if (next == '_') { throw new LexicalException( "数字后不能紧跟下划线 '_'", line, col); @@ -154,7 +188,7 @@ public class NumberTokenScanner extends AbstractTokenScanner { } /* ───── 3. 生成并返回 Token ───── */ - return new Token(TokenType.NUMBER_LITERAL, literal.toString(), line, col); + return new Token(TokenType.NUMBER_LITERAL, literal.toString().replace("_", ""), line, col); } /**