fix: 修复常量类型在 VM 中的表示问题
- 为浮点数添加 f 后缀,避免在 VM 中被错误解释为双精度浮点数 - 为长整数添加 L 后缀,保留类型信息 - 双精度浮点数和整数使用默认格式输出 - 支持数组常量的递归序列化,保留类型信息 - 优化了常量生成逻辑,提高了代码可读性和维护性
This commit is contained in:
parent
f6c877a7a1
commit
f61cfa5638
@ -7,21 +7,93 @@ import org.jcnc.snow.compiler.ir.instruction.LoadConstInstruction;
|
||||
import org.jcnc.snow.compiler.ir.value.IRConstant;
|
||||
import org.jcnc.snow.compiler.ir.value.IRVirtualRegister;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* <b>LoadConstGenerator - 将 IR {@code LoadConstInstruction} 生成 VM 指令</b>
|
||||
* <b>LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction}</b>
|
||||
*
|
||||
* <p>
|
||||
* 本类负责将 IR 层的常量加载指令 {@link LoadConstInstruction} 转换为对应的虚拟机指令。
|
||||
* 额外支持:如果常量类型为 {@code String},会同步登记到
|
||||
* {@link CallGenerator} 的字符串常量池,方便 syscall 降级场景使用。
|
||||
* This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions.
|
||||
* If the constant is a {@code String}, it will also be registered in the
|
||||
* {@link CallGenerator} string constant pool to support syscall downgrade scenarios.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Fix: When the constant is an array (List), type information is preserved in R_PUSH payload:
|
||||
* <ul>
|
||||
* <li>Float is output with <code>f</code> suffix (e.g., 0.1f);</li>
|
||||
* <li>Long is output with <code>L</code> suffix (e.g., 123L);</li>
|
||||
* <li>Double/Integer are output in their default format (e.g., 1.0, 42);</li>
|
||||
* <li>Supports recursive serialization of nested arrays.</li>
|
||||
* </ul>
|
||||
* This prevents float values from being misinterpreted as double on the VM side,
|
||||
* and avoids Double→Float cast exceptions in later F_STORE operations.
|
||||
* </p>
|
||||
*/
|
||||
public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruction> {
|
||||
|
||||
/**
|
||||
* 指定本生成器支持的 IR 指令类型(LoadConstInstruction)
|
||||
* Formats a constant value as a string for use as a VM payload.
|
||||
* Lists are recursively serialized, and Float/Long types include suffixes to preserve type information.
|
||||
*
|
||||
* @param v The constant value to format.
|
||||
* @return The formatted string for use in VM code.
|
||||
*/
|
||||
private static String formatConst(Object v) {
|
||||
return formatConst(v, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper for recursively formatting constant values (including nested arrays)
|
||||
* with appropriate type suffixes for array payloads.
|
||||
*
|
||||
* @param v The constant value to format.
|
||||
* @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied.
|
||||
* @return The formatted string for use in VM code.
|
||||
*/
|
||||
private static String formatConst(Object v, boolean insideArray) {
|
||||
if (v instanceof List<?> list) {
|
||||
// Recursively process each element in the list
|
||||
return "[" + list.stream()
|
||||
.map(x -> formatConst(x, true))
|
||||
.collect(Collectors.joining(", ")) + "]";
|
||||
}
|
||||
if (v instanceof String s) {
|
||||
return s;
|
||||
}
|
||||
if (v instanceof Float f) {
|
||||
// Always keep .0 for integer values
|
||||
float fv = f;
|
||||
String s = (fv == (long) fv) ? String.format("%.1f", fv) : f.toString();
|
||||
return insideArray ? (s + "f") : s;
|
||||
}
|
||||
if (v instanceof Long l) {
|
||||
return insideArray ? (l + "L") : l.toString();
|
||||
}
|
||||
if (v instanceof Double d) {
|
||||
double dv = d;
|
||||
// Always keep .0 for integer values
|
||||
return (dv == (long) dv) ? String.format("%.1f", dv) : Double.toString(dv);
|
||||
}
|
||||
if (v instanceof Short s) {
|
||||
return insideArray ? (s + "s") : Short.toString(s);
|
||||
}
|
||||
if (v instanceof Byte b) {
|
||||
return insideArray ? (b + "b") : Byte.toString(b);
|
||||
}
|
||||
if (v instanceof Boolean b) {
|
||||
return b ? "1" : "0";
|
||||
}
|
||||
return String.valueOf(v);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Specifies the type of IR instruction supported by this generator.
|
||||
*
|
||||
* @return The class object representing {@link LoadConstInstruction}.
|
||||
*/
|
||||
@Override
|
||||
public Class<LoadConstInstruction> supportedClass() {
|
||||
@ -29,12 +101,16 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成 VM 指令主流程
|
||||
* Generates the VM instructions for a given {@link LoadConstInstruction}.
|
||||
* <p>
|
||||
* This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions,
|
||||
* marking the local slot type for later operations, and registering string constants if necessary.
|
||||
* </p>
|
||||
*
|
||||
* @param ins 当前常量加载指令
|
||||
* @param out 指令输出构建器
|
||||
* @param slotMap 虚拟寄存器与物理槽位映射
|
||||
* @param currentFn 当前函数名
|
||||
* @param ins The {@link LoadConstInstruction} to generate code for.
|
||||
* @param out The {@link VMProgramBuilder} used to collect the generated instructions.
|
||||
* @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices.
|
||||
* @param currentFn The name of the current function.
|
||||
*/
|
||||
@Override
|
||||
public void generate(LoadConstInstruction ins,
|
||||
@ -42,35 +118,35 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
Map<IRVirtualRegister, Integer> slotMap,
|
||||
String currentFn) {
|
||||
|
||||
/* 1. 获取常量值 */
|
||||
// 1. Get the constant value
|
||||
IRConstant constant = (IRConstant) ins.operands().getFirst();
|
||||
Object value = constant.value();
|
||||
|
||||
/* 2. 生成 PUSH 指令,将常量值入栈 */
|
||||
out.emit(OpHelper.pushOpcodeFor(value) + " " + value);
|
||||
// 2. Generate PUSH instruction (array constants use type-aware formatting)
|
||||
String payload = formatConst(value);
|
||||
out.emit(OpHelper.pushOpcodeFor(value) + " " + payload);
|
||||
|
||||
/* 3. STORE 到目标槽位 */
|
||||
// 3. STORE the result to the destination slot
|
||||
int slot = slotMap.get(ins.dest());
|
||||
out.emit(OpHelper.storeOpcodeFor(value) + " " + slot);
|
||||
|
||||
/* 4. 标记槽位数据类型(用于后续类型推断和 LOAD/STORE 指令选择) */
|
||||
// 4. Mark the slot's data type for later inference and instruction selection
|
||||
char prefix = switch (value) {
|
||||
case Integer _ -> 'I'; // 整型
|
||||
case Long _ -> 'L'; // 长整型
|
||||
case Short _ -> 'S'; // 短整型
|
||||
case Byte _ -> 'B'; // 字节型
|
||||
case Double _ -> 'D'; // 双精度
|
||||
case Float _ -> 'F'; // 单精度
|
||||
case Boolean _ -> 'I'; // 布尔类型用 I 处理
|
||||
case String _ -> 'R'; // 字符串常量
|
||||
case java.util.List<?> _ -> 'R'; // 引用类型(如数组等)
|
||||
case null, default ->
|
||||
throw new IllegalStateException("未知的常量类型: "
|
||||
case Integer _ -> 'I'; // Integer
|
||||
case Long _ -> 'L'; // Long
|
||||
case Short _ -> 'S'; // Short
|
||||
case Byte _ -> 'B'; // Byte
|
||||
case Double _ -> 'D'; // Double
|
||||
case Float _ -> 'F'; // Float
|
||||
case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0)
|
||||
case String _ -> 'R'; // String constant
|
||||
case java.util.List<?> _ -> 'R'; // Reference type (arrays, etc.)
|
||||
case null, default -> throw new IllegalStateException("Unknown constant type: "
|
||||
+ (value != null ? value.getClass() : null));
|
||||
};
|
||||
out.setSlotType(slot, prefix);
|
||||
|
||||
/* 5. 如果是字符串常量,则登记到 CallGenerator 的常量池,便于 syscall 字符串降级使用 */
|
||||
// 5. If the constant is a string, register it for the CallGenerator string pool
|
||||
if (value instanceof String s) {
|
||||
CallGenerator.registerStringConst(ins.dest().id(), s);
|
||||
}
|
||||
|
||||
@ -38,48 +38,12 @@ public final class RPushCommand implements Command {
|
||||
|
||||
// ======== Parsing helpers ========
|
||||
|
||||
private static final class Cursor {
|
||||
final String s;
|
||||
int i;
|
||||
Cursor(String s) { this.s = s; this.i = 0; }
|
||||
boolean end() { return i >= s.length(); }
|
||||
char ch() { return s.charAt(i); }
|
||||
}
|
||||
|
||||
@Override
|
||||
public int execute(String[] parts, int pc,
|
||||
OperandStack stack,
|
||||
LocalVariableStore lvs,
|
||||
CallStack cs) {
|
||||
|
||||
if (parts.length < 2)
|
||||
throw new IllegalStateException("R_PUSH missing parameter");
|
||||
|
||||
// Join all arguments after R_PUSH into a single string, separated by spaces.
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 1; i < parts.length; i++) {
|
||||
if (i > 1) sb.append(' ');
|
||||
sb.append(parts[i]);
|
||||
}
|
||||
String literal = sb.toString().trim();
|
||||
|
||||
// If the literal is an array (e.g., [1, 2, "foo"]), parse elements and push as an unmodifiable list.
|
||||
if (literal.startsWith("[") && literal.endsWith("]")) {
|
||||
Object parsed = parseValue(new Cursor(literal));
|
||||
if (!(parsed instanceof List<?> list)) {
|
||||
// Should never happen for a bracketed value, but keep a guard.
|
||||
stack.push(parsed);
|
||||
} else {
|
||||
stack.push(deepUnmodifiable(list));
|
||||
}
|
||||
} else {
|
||||
// Otherwise, push the string literal as-is.
|
||||
stack.push(literal);
|
||||
}
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
/** Deeply wrap lists as unmodifiable; leave scalars intact. */
|
||||
/**
|
||||
* Deeply wraps lists as unmodifiable; leaves scalars unchanged.
|
||||
*
|
||||
* @param v input object
|
||||
* @return deeply unmodifiable version of the object
|
||||
*/
|
||||
private static Object deepUnmodifiableObject(Object v) {
|
||||
if (v instanceof List<?> l) {
|
||||
return deepUnmodifiable(l);
|
||||
@ -87,14 +51,26 @@ public final class RPushCommand implements Command {
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively wraps all nested lists as unmodifiable.
|
||||
*
|
||||
* @param l input list
|
||||
* @return deeply unmodifiable list
|
||||
*/
|
||||
private static List<?> deepUnmodifiable(List<?> l) {
|
||||
List<Object> out = new ArrayList<>(l.size());
|
||||
for (Object v : l) out.add(deepUnmodifiableObject(v));
|
||||
return Collections.unmodifiableList(out);
|
||||
}
|
||||
|
||||
// ======== Recursive-descent parser for array literals ========
|
||||
|
||||
/**
|
||||
* Parses a value starting from the cursor.
|
||||
* Skips whitespace, and delegates to the appropriate sub-parser depending on the character:
|
||||
* array, quoted string, or atomic value.
|
||||
*
|
||||
* @param c cursor
|
||||
* @return parsed value (Object)
|
||||
*/
|
||||
private static Object parseValue(Cursor c) {
|
||||
skipWs(c);
|
||||
if (c.end()) return "";
|
||||
@ -104,6 +80,13 @@ public final class RPushCommand implements Command {
|
||||
return parseAtom(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an array literal from the cursor, supporting nested structures.
|
||||
* Assumes the current character is '['.
|
||||
*
|
||||
* @param c cursor
|
||||
* @return List of parsed objects
|
||||
*/
|
||||
private static List<Object> parseArray(Cursor c) {
|
||||
// assumes current char is '['
|
||||
expect(c, '[');
|
||||
@ -126,74 +109,254 @@ public final class RPushCommand implements Command {
|
||||
return values;
|
||||
}
|
||||
|
||||
// ======== Recursive-descent parser for array literals ========
|
||||
|
||||
/**
|
||||
* Parses a string literal wrapped in double quotes (supports common escape sequences).
|
||||
* <p>
|
||||
* Assumes the cursor currently points to the starting quote character ("), and consumes the opening quote.
|
||||
* Parses the string content from the cursor, stopping at the closing quote (").
|
||||
* Supported escape sequences:
|
||||
* <ul>
|
||||
* <li>\n newline</li>
|
||||
* <li>\r carriage return</li>
|
||||
* <li>\t tab</li>
|
||||
* <li>\" double quote itself</li>
|
||||
* <li>\\ backslash</li>
|
||||
* <li>Other characters are output as-is</li>
|
||||
* </ul>
|
||||
* If the string is not closed properly (i.e., no closing quote is found before the end), returns the currently parsed content.
|
||||
*
|
||||
* @param c cursor object (must support ch() for current char, i for index, end() for boundary check)
|
||||
* @return parsed string content
|
||||
*/
|
||||
private static String parseQuoted(Cursor c) {
|
||||
// assumes current char is '"'
|
||||
// Assume current position is the opening quote; consume it
|
||||
expect(c, '\"');
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
// Traverse until the end or an unclosed string
|
||||
while (!c.end()) {
|
||||
char ch = c.ch();
|
||||
c.i++;
|
||||
if (ch == '\\') { // escape
|
||||
if (c.end()) break;
|
||||
if (ch == '\\') { // handle escape sequences
|
||||
if (c.end()) break; // nothing after escape char
|
||||
char nxt = c.ch();
|
||||
c.i++;
|
||||
// Common escapes
|
||||
switch (nxt) {
|
||||
case 'n' -> sb.append('\n');
|
||||
case 'r' -> sb.append('\r');
|
||||
case 't' -> sb.append('\t');
|
||||
case '\"' -> sb.append('\"');
|
||||
case '\\' -> sb.append('\\');
|
||||
default -> sb.append(nxt);
|
||||
case 'n' -> sb.append('\n'); // newline
|
||||
case 'r' -> sb.append('\r'); // carriage return
|
||||
case 't' -> sb.append('\t'); // tab
|
||||
case '\"' -> sb.append('\"'); // double quote
|
||||
case '\\' -> sb.append('\\'); // backslash
|
||||
default -> sb.append(nxt); // any other char as-is
|
||||
}
|
||||
} else if (ch == '\"') {
|
||||
// end quote
|
||||
// Found closing quote; end of string
|
||||
return sb.toString();
|
||||
} else {
|
||||
// Regular character
|
||||
sb.append(ch);
|
||||
}
|
||||
}
|
||||
// Unclosed string: return what we have
|
||||
// Unclosed string, return parsed content
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an atomic constant ("atom"), supporting type-suffixed numbers and booleans.
|
||||
* <p>
|
||||
* Examples: 0.1f, 123L, 3.14d, 100, true, false<br>
|
||||
* Parsing rules:
|
||||
* <ul>
|
||||
* <li>Supports float(f/F), long(l/L), double(d/D), short(s/S), byte(b/B) type suffixes</li>
|
||||
* <li>Supports boolean true/false (case-insensitive, converted to 1/0)</li>
|
||||
* <li>Decimals without suffix parsed as double; integers without suffix as int</li>
|
||||
* <li>If parsing fails, returns the original string</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param c cursor, must support ch() for current char, i for index, end() for boundary check, s for the original string
|
||||
* @return parsed Object
|
||||
*/
|
||||
private static Object parseAtom(Cursor c) {
|
||||
int start = c.i;
|
||||
// Read until a comma, ']' or whitespace
|
||||
while (!c.end()) {
|
||||
char ch = c.ch();
|
||||
if (ch == ',' || ch == ']' ) break;
|
||||
if (ch == ',' || ch == ']') break;
|
||||
if (Character.isWhitespace(ch)) break;
|
||||
c.i++;
|
||||
}
|
||||
// Extract current token
|
||||
String token = c.s.substring(start, c.i).trim();
|
||||
if (token.isEmpty()) return "";
|
||||
// booleans
|
||||
// Boolean parsing (case-insensitive, convert to 1/0)
|
||||
if ("true".equalsIgnoreCase(token)) return 1;
|
||||
if ("false".equalsIgnoreCase(token)) return 0;
|
||||
// number (int or double)
|
||||
// Handle numeric type suffixes
|
||||
try {
|
||||
char last = token.charAt(token.length() - 1);
|
||||
switch (last) {
|
||||
case 'f':
|
||||
case 'F':
|
||||
// float suffix
|
||||
return Float.parseFloat(token.substring(0, token.length() - 1));
|
||||
case 'l':
|
||||
case 'L':
|
||||
// long suffix
|
||||
return Long.parseLong(token.substring(0, token.length() - 1));
|
||||
case 'd':
|
||||
case 'D':
|
||||
// double suffix
|
||||
return Double.parseDouble(token.substring(0, token.length() - 1));
|
||||
case 's':
|
||||
case 'S':
|
||||
// short suffix
|
||||
return Short.parseShort(token.substring(0, token.length() - 1));
|
||||
case 'b':
|
||||
case 'B':
|
||||
// byte suffix
|
||||
return Byte.parseByte(token.substring(0, token.length() - 1));
|
||||
default:
|
||||
// No suffix, check for floating point or integer
|
||||
if (token.contains(".") || token.contains("e") || token.contains("E")) {
|
||||
return Double.parseDouble(token);
|
||||
} else {
|
||||
return Integer.parseInt(token);
|
||||
}
|
||||
}
|
||||
} catch (NumberFormatException ex) {
|
||||
// fallback: raw string
|
||||
// Parsing failed, fall back to original string (e.g. identifiers)
|
||||
return token;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Skips all whitespace characters at the current cursor position until a non-whitespace or end of text is reached.
|
||||
* <p>
|
||||
* The cursor index is automatically incremented, so it will point to the next non-whitespace character (or end of text).
|
||||
*
|
||||
* @param c cursor object (must support ch() for current char, i for index, end() for boundary check)
|
||||
*/
|
||||
private static void skipWs(Cursor c) {
|
||||
while (!c.end() && Character.isWhitespace(c.ch())) c.i++;
|
||||
// Increment cursor while not at end and is whitespace
|
||||
while (!c.end() && Character.isWhitespace(c.ch())) {
|
||||
c.i++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current cursor position matches the specified character.
|
||||
*
|
||||
* @param c cursor object
|
||||
* @param ch expected character
|
||||
* @return true if not at end and character matches ch, otherwise false
|
||||
*/
|
||||
private static boolean peek(Cursor c, char ch) {
|
||||
return !c.end() && c.ch() == ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that the current cursor position is the specified character; throws if not.
|
||||
* If it matches, skips the character and any following whitespace.
|
||||
*
|
||||
* @param c cursor object
|
||||
* @param ch expected character
|
||||
* @throws IllegalArgumentException if current position is not the expected character
|
||||
*/
|
||||
private static void expect(Cursor c, char ch) {
|
||||
if (c.end() || c.ch() != ch)
|
||||
throw new IllegalArgumentException("R_PUSH array literal parse error: expected '" + ch + "' at position " + c.i);
|
||||
c.i++; // consume
|
||||
skipWs(c);
|
||||
c.i++; // Consume current character
|
||||
skipWs(c); // Skip any subsequent whitespace
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Executes the R_PUSH instruction: pushes a constant or array constant onto the operand stack.
|
||||
* <p>
|
||||
* Processing steps:
|
||||
* <ul>
|
||||
* <li>1. Checks parameter count, throws if insufficient.</li>
|
||||
* <li>2. Concatenates all arguments (except opcode) into a raw literal string.</li>
|
||||
* <li>3. Checks if the literal is an array (starts with [ and ends with ]).</li>
|
||||
* <li>4. If array, recursively parses and pushes as a read-only List onto the operand stack.</li>
|
||||
* <li>5. Otherwise, pushes the literal string as-is.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param parts instruction and parameter strings (parts[0] is the opcode, others are params)
|
||||
* @param pc current instruction index
|
||||
* @param stack operand stack
|
||||
* @param lvs local variable store
|
||||
* @param cs call stack
|
||||
* @return next instruction index
|
||||
*/
|
||||
@Override
|
||||
public int execute(String[] parts, int pc,
|
||||
OperandStack stack,
|
||||
LocalVariableStore lvs,
|
||||
CallStack cs) {
|
||||
|
||||
// Check parameter count
|
||||
if (parts.length < 2)
|
||||
throw new IllegalStateException("R_PUSH missing parameter");
|
||||
|
||||
// Join all arguments into a complete literal string
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 1; i < parts.length; i++) {
|
||||
if (i > 1) sb.append(' ');
|
||||
sb.append(parts[i]);
|
||||
}
|
||||
String literal = sb.toString().trim();
|
||||
|
||||
// Check if this is an array literal
|
||||
if (literal.startsWith("[") && literal.endsWith("]")) {
|
||||
Object parsed = parseValue(new Cursor(literal));
|
||||
if (!(parsed instanceof List<?> list)) {
|
||||
// Should not happen in theory; safety fallback
|
||||
stack.push(parsed);
|
||||
} else {
|
||||
// Convert to read-only List before pushing to prevent modification
|
||||
stack.push(deepUnmodifiable(list));
|
||||
}
|
||||
} else {
|
||||
// Regular string, push as-is
|
||||
stack.push(literal);
|
||||
}
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple string cursor, supporting index increment and character reading, for use by the parser.
|
||||
*/
|
||||
private static final class Cursor {
|
||||
final String s; // Original string
|
||||
int i; // Current index
|
||||
|
||||
Cursor(String s) {
|
||||
this.s = s;
|
||||
this.i = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the cursor is at the end of the string.
|
||||
*
|
||||
* @return true if at end
|
||||
*/
|
||||
boolean end() {
|
||||
return i >= s.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character at the current cursor position.
|
||||
*
|
||||
* @return current character
|
||||
*/
|
||||
char ch() {
|
||||
return s.charAt(i);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user