fix: 修复常量类型在 VM 中的表示问题

- 为浮点数添加 f 后缀,避免在 VM 中被错误解释为双精度浮点数
- 为长整数添加 L 后缀,保留类型信息
- 双精度浮点数和整数使用默认格式输出
- 支持数组常量的递归序列化,保留类型信息
- 优化了常量生成逻辑,提高了代码可读性和维护性
This commit is contained in:
Luke 2025-08-02 12:32:28 +08:00
parent f6c877a7a1
commit f61cfa5638
2 changed files with 333 additions and 94 deletions

View File

@ -7,21 +7,93 @@ import org.jcnc.snow.compiler.ir.instruction.LoadConstInstruction;
import org.jcnc.snow.compiler.ir.value.IRConstant; import org.jcnc.snow.compiler.ir.value.IRConstant;
import org.jcnc.snow.compiler.ir.value.IRVirtualRegister; import org.jcnc.snow.compiler.ir.value.IRVirtualRegister;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
/** /**
* <b>LoadConstGenerator - IR {@code LoadConstInstruction} 生成 VM 指令</b> * <b>LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction}</b>
* *
* <p> * <p>
* 本类负责将 IR 层的常量加载指令 {@link LoadConstInstruction} 转换为对应的虚拟机指令 * This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions.
* 额外支持如果常量类型为 {@code String}会同步登记到 * If the constant is a {@code String}, it will also be registered in the
* {@link CallGenerator} 的字符串常量池方便 syscall 降级场景使用 * {@link CallGenerator} string constant pool to support syscall downgrade scenarios.
* </p>
*
* <p>
* Fix: When the constant is an array (List), type information is preserved in R_PUSH payload:
* <ul>
* <li>Float is output with <code>f</code> suffix (e.g., 0.1f);</li>
* <li>Long is output with <code>L</code> suffix (e.g., 123L);</li>
* <li>Double/Integer are output in their default format (e.g., 1.0, 42);</li>
* <li>Supports recursive serialization of nested arrays.</li>
* </ul>
* This prevents float values from being misinterpreted as double on the VM side,
* and avoids DoubleFloat cast exceptions in later F_STORE operations.
* </p> * </p>
*/ */
public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruction> { public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruction> {
/** /**
* 指定本生成器支持的 IR 指令类型LoadConstInstruction * Formats a constant value as a string for use as a VM payload.
* Lists are recursively serialized, and Float/Long types include suffixes to preserve type information.
*
* @param v The constant value to format.
* @return The formatted string for use in VM code.
*/
private static String formatConst(Object v) {
return formatConst(v, false);
}
/**
* Internal helper for recursively formatting constant values (including nested arrays)
* with appropriate type suffixes for array payloads.
*
* @param v The constant value to format.
* @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied.
* @return The formatted string for use in VM code.
*/
private static String formatConst(Object v, boolean insideArray) {
if (v instanceof List<?> list) {
// Recursively process each element in the list
return "[" + list.stream()
.map(x -> formatConst(x, true))
.collect(Collectors.joining(", ")) + "]";
}
if (v instanceof String s) {
return s;
}
if (v instanceof Float f) {
// Always keep .0 for integer values
float fv = f;
String s = (fv == (long) fv) ? String.format("%.1f", fv) : f.toString();
return insideArray ? (s + "f") : s;
}
if (v instanceof Long l) {
return insideArray ? (l + "L") : l.toString();
}
if (v instanceof Double d) {
double dv = d;
// Always keep .0 for integer values
return (dv == (long) dv) ? String.format("%.1f", dv) : Double.toString(dv);
}
if (v instanceof Short s) {
return insideArray ? (s + "s") : Short.toString(s);
}
if (v instanceof Byte b) {
return insideArray ? (b + "b") : Byte.toString(b);
}
if (v instanceof Boolean b) {
return b ? "1" : "0";
}
return String.valueOf(v);
}
/**
* Specifies the type of IR instruction supported by this generator.
*
* @return The class object representing {@link LoadConstInstruction}.
*/ */
@Override @Override
public Class<LoadConstInstruction> supportedClass() { public Class<LoadConstInstruction> supportedClass() {
@ -29,12 +101,16 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
} }
/** /**
* 生成 VM 指令主流程 * Generates the VM instructions for a given {@link LoadConstInstruction}.
* <p>
* This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions,
* marking the local slot type for later operations, and registering string constants if necessary.
* </p>
* *
* @param ins 当前常量加载指令 * @param ins The {@link LoadConstInstruction} to generate code for.
* @param out 指令输出构建器 * @param out The {@link VMProgramBuilder} used to collect the generated instructions.
* @param slotMap 虚拟寄存器与物理槽位映射 * @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices.
* @param currentFn 当前函数名 * @param currentFn The name of the current function.
*/ */
@Override @Override
public void generate(LoadConstInstruction ins, public void generate(LoadConstInstruction ins,
@ -42,35 +118,35 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
Map<IRVirtualRegister, Integer> slotMap, Map<IRVirtualRegister, Integer> slotMap,
String currentFn) { String currentFn) {
/* 1. 获取常量值 */ // 1. Get the constant value
IRConstant constant = (IRConstant) ins.operands().getFirst(); IRConstant constant = (IRConstant) ins.operands().getFirst();
Object value = constant.value(); Object value = constant.value();
/* 2. 生成 PUSH 指令,将常量值入栈 */ // 2. Generate PUSH instruction (array constants use type-aware formatting)
out.emit(OpHelper.pushOpcodeFor(value) + " " + value); String payload = formatConst(value);
out.emit(OpHelper.pushOpcodeFor(value) + " " + payload);
/* 3. STORE 到目标槽位 */ // 3. STORE the result to the destination slot
int slot = slotMap.get(ins.dest()); int slot = slotMap.get(ins.dest());
out.emit(OpHelper.storeOpcodeFor(value) + " " + slot); out.emit(OpHelper.storeOpcodeFor(value) + " " + slot);
/* 4. 标记槽位数据类型(用于后续类型推断和 LOAD/STORE 指令选择) */ // 4. Mark the slot's data type for later inference and instruction selection
char prefix = switch (value) { char prefix = switch (value) {
case Integer _ -> 'I'; // 整型 case Integer _ -> 'I'; // Integer
case Long _ -> 'L'; // 长整型 case Long _ -> 'L'; // Long
case Short _ -> 'S'; // 短整型 case Short _ -> 'S'; // Short
case Byte _ -> 'B'; // 字节型 case Byte _ -> 'B'; // Byte
case Double _ -> 'D'; // 双精度 case Double _ -> 'D'; // Double
case Float _ -> 'F'; // 单精度 case Float _ -> 'F'; // Float
case Boolean _ -> 'I'; // 布尔类型用 I 处理 case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0)
case String _ -> 'R'; // 字符串常量 case String _ -> 'R'; // String constant
case java.util.List<?> _ -> 'R'; // 引用类型如数组等 case java.util.List<?> _ -> 'R'; // Reference type (arrays, etc.)
case null, default -> case null, default -> throw new IllegalStateException("Unknown constant type: "
throw new IllegalStateException("未知的常量类型: "
+ (value != null ? value.getClass() : null)); + (value != null ? value.getClass() : null));
}; };
out.setSlotType(slot, prefix); out.setSlotType(slot, prefix);
/* 5. 如果是字符串常量,则登记到 CallGenerator 的常量池,便于 syscall 字符串降级使用 */ // 5. If the constant is a string, register it for the CallGenerator string pool
if (value instanceof String s) { if (value instanceof String s) {
CallGenerator.registerStringConst(ins.dest().id(), s); CallGenerator.registerStringConst(ins.dest().id(), s);
} }

View File

@ -38,48 +38,12 @@ public final class RPushCommand implements Command {
// ======== Parsing helpers ======== // ======== Parsing helpers ========
private static final class Cursor { /**
final String s; * Deeply wraps lists as unmodifiable; leaves scalars unchanged.
int i; *
Cursor(String s) { this.s = s; this.i = 0; } * @param v input object
boolean end() { return i >= s.length(); } * @return deeply unmodifiable version of the object
char ch() { return s.charAt(i); } */
}
@Override
public int execute(String[] parts, int pc,
OperandStack stack,
LocalVariableStore lvs,
CallStack cs) {
if (parts.length < 2)
throw new IllegalStateException("R_PUSH missing parameter");
// Join all arguments after R_PUSH into a single string, separated by spaces.
StringBuilder sb = new StringBuilder();
for (int i = 1; i < parts.length; i++) {
if (i > 1) sb.append(' ');
sb.append(parts[i]);
}
String literal = sb.toString().trim();
// If the literal is an array (e.g., [1, 2, "foo"]), parse elements and push as an unmodifiable list.
if (literal.startsWith("[") && literal.endsWith("]")) {
Object parsed = parseValue(new Cursor(literal));
if (!(parsed instanceof List<?> list)) {
// Should never happen for a bracketed value, but keep a guard.
stack.push(parsed);
} else {
stack.push(deepUnmodifiable(list));
}
} else {
// Otherwise, push the string literal as-is.
stack.push(literal);
}
return pc + 1;
}
/** Deeply wrap lists as unmodifiable; leave scalars intact. */
private static Object deepUnmodifiableObject(Object v) { private static Object deepUnmodifiableObject(Object v) {
if (v instanceof List<?> l) { if (v instanceof List<?> l) {
return deepUnmodifiable(l); return deepUnmodifiable(l);
@ -87,14 +51,26 @@ public final class RPushCommand implements Command {
return v; return v;
} }
/**
* Recursively wraps all nested lists as unmodifiable.
*
* @param l input list
* @return deeply unmodifiable list
*/
private static List<?> deepUnmodifiable(List<?> l) { private static List<?> deepUnmodifiable(List<?> l) {
List<Object> out = new ArrayList<>(l.size()); List<Object> out = new ArrayList<>(l.size());
for (Object v : l) out.add(deepUnmodifiableObject(v)); for (Object v : l) out.add(deepUnmodifiableObject(v));
return Collections.unmodifiableList(out); return Collections.unmodifiableList(out);
} }
// ======== Recursive-descent parser for array literals ======== /**
* Parses a value starting from the cursor.
* Skips whitespace, and delegates to the appropriate sub-parser depending on the character:
* array, quoted string, or atomic value.
*
* @param c cursor
* @return parsed value (Object)
*/
private static Object parseValue(Cursor c) { private static Object parseValue(Cursor c) {
skipWs(c); skipWs(c);
if (c.end()) return ""; if (c.end()) return "";
@ -104,6 +80,13 @@ public final class RPushCommand implements Command {
return parseAtom(c); return parseAtom(c);
} }
/**
* Parses an array literal from the cursor, supporting nested structures.
* Assumes the current character is '['.
*
* @param c cursor
* @return List of parsed objects
*/
private static List<Object> parseArray(Cursor c) { private static List<Object> parseArray(Cursor c) {
// assumes current char is '[' // assumes current char is '['
expect(c, '['); expect(c, '[');
@ -126,74 +109,254 @@ public final class RPushCommand implements Command {
return values; return values;
} }
// ======== Recursive-descent parser for array literals ========
/**
* Parses a string literal wrapped in double quotes (supports common escape sequences).
* <p>
* Assumes the cursor currently points to the starting quote character ("), and consumes the opening quote.
* Parses the string content from the cursor, stopping at the closing quote (").
* Supported escape sequences:
* <ul>
* <li>\n newline</li>
* <li>\r carriage return</li>
* <li>\t tab</li>
* <li>\" double quote itself</li>
* <li>\\ backslash</li>
* <li>Other characters are output as-is</li>
* </ul>
* If the string is not closed properly (i.e., no closing quote is found before the end), returns the currently parsed content.
*
* @param c cursor object (must support ch() for current char, i for index, end() for boundary check)
* @return parsed string content
*/
private static String parseQuoted(Cursor c) { private static String parseQuoted(Cursor c) {
// assumes current char is '"' // Assume current position is the opening quote; consume it
expect(c, '\"'); expect(c, '\"');
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
// Traverse until the end or an unclosed string
while (!c.end()) { while (!c.end()) {
char ch = c.ch(); char ch = c.ch();
c.i++; c.i++;
if (ch == '\\') { // escape if (ch == '\\') { // handle escape sequences
if (c.end()) break; if (c.end()) break; // nothing after escape char
char nxt = c.ch(); char nxt = c.ch();
c.i++; c.i++;
// Common escapes
switch (nxt) { switch (nxt) {
case 'n' -> sb.append('\n'); case 'n' -> sb.append('\n'); // newline
case 'r' -> sb.append('\r'); case 'r' -> sb.append('\r'); // carriage return
case 't' -> sb.append('\t'); case 't' -> sb.append('\t'); // tab
case '\"' -> sb.append('\"'); case '\"' -> sb.append('\"'); // double quote
case '\\' -> sb.append('\\'); case '\\' -> sb.append('\\'); // backslash
default -> sb.append(nxt); default -> sb.append(nxt); // any other char as-is
} }
} else if (ch == '\"') { } else if (ch == '\"') {
// end quote // Found closing quote; end of string
return sb.toString(); return sb.toString();
} else { } else {
// Regular character
sb.append(ch); sb.append(ch);
} }
} }
// Unclosed string: return what we have // Unclosed string, return parsed content
return sb.toString(); return sb.toString();
} }
/**
* Parses an atomic constant ("atom"), supporting type-suffixed numbers and booleans.
* <p>
* Examples: 0.1f, 123L, 3.14d, 100, true, false<br>
* Parsing rules:
* <ul>
* <li>Supports float(f/F), long(l/L), double(d/D), short(s/S), byte(b/B) type suffixes</li>
* <li>Supports boolean true/false (case-insensitive, converted to 1/0)</li>
* <li>Decimals without suffix parsed as double; integers without suffix as int</li>
* <li>If parsing fails, returns the original string</li>
* </ul>
*
* @param c cursor, must support ch() for current char, i for index, end() for boundary check, s for the original string
* @return parsed Object
*/
private static Object parseAtom(Cursor c) { private static Object parseAtom(Cursor c) {
int start = c.i; int start = c.i;
// Read until a comma, ']' or whitespace
while (!c.end()) { while (!c.end()) {
char ch = c.ch(); char ch = c.ch();
if (ch == ',' || ch == ']') break; if (ch == ',' || ch == ']') break;
if (Character.isWhitespace(ch)) break; if (Character.isWhitespace(ch)) break;
c.i++; c.i++;
} }
// Extract current token
String token = c.s.substring(start, c.i).trim(); String token = c.s.substring(start, c.i).trim();
if (token.isEmpty()) return ""; if (token.isEmpty()) return "";
// booleans // Boolean parsing (case-insensitive, convert to 1/0)
if ("true".equalsIgnoreCase(token)) return 1; if ("true".equalsIgnoreCase(token)) return 1;
if ("false".equalsIgnoreCase(token)) return 0; if ("false".equalsIgnoreCase(token)) return 0;
// number (int or double) // Handle numeric type suffixes
try { try {
char last = token.charAt(token.length() - 1);
switch (last) {
case 'f':
case 'F':
// float suffix
return Float.parseFloat(token.substring(0, token.length() - 1));
case 'l':
case 'L':
// long suffix
return Long.parseLong(token.substring(0, token.length() - 1));
case 'd':
case 'D':
// double suffix
return Double.parseDouble(token.substring(0, token.length() - 1));
case 's':
case 'S':
// short suffix
return Short.parseShort(token.substring(0, token.length() - 1));
case 'b':
case 'B':
// byte suffix
return Byte.parseByte(token.substring(0, token.length() - 1));
default:
// No suffix, check for floating point or integer
if (token.contains(".") || token.contains("e") || token.contains("E")) { if (token.contains(".") || token.contains("e") || token.contains("E")) {
return Double.parseDouble(token); return Double.parseDouble(token);
} else { } else {
return Integer.parseInt(token); return Integer.parseInt(token);
} }
}
} catch (NumberFormatException ex) { } catch (NumberFormatException ex) {
// fallback: raw string // Parsing failed, fall back to original string (e.g. identifiers)
return token; return token;
} }
} }
/**
* Skips all whitespace characters at the current cursor position until a non-whitespace or end of text is reached.
* <p>
* The cursor index is automatically incremented, so it will point to the next non-whitespace character (or end of text).
*
* @param c cursor object (must support ch() for current char, i for index, end() for boundary check)
*/
private static void skipWs(Cursor c) { private static void skipWs(Cursor c) {
while (!c.end() && Character.isWhitespace(c.ch())) c.i++; // Increment cursor while not at end and is whitespace
while (!c.end() && Character.isWhitespace(c.ch())) {
c.i++;
}
} }
/**
* Checks if the current cursor position matches the specified character.
*
* @param c cursor object
* @param ch expected character
* @return true if not at end and character matches ch, otherwise false
*/
private static boolean peek(Cursor c, char ch) { private static boolean peek(Cursor c, char ch) {
return !c.end() && c.ch() == ch; return !c.end() && c.ch() == ch;
} }
/**
* Asserts that the current cursor position is the specified character; throws if not.
* If it matches, skips the character and any following whitespace.
*
* @param c cursor object
* @param ch expected character
* @throws IllegalArgumentException if current position is not the expected character
*/
private static void expect(Cursor c, char ch) { private static void expect(Cursor c, char ch) {
if (c.end() || c.ch() != ch) if (c.end() || c.ch() != ch)
throw new IllegalArgumentException("R_PUSH array literal parse error: expected '" + ch + "' at position " + c.i); throw new IllegalArgumentException("R_PUSH array literal parse error: expected '" + ch + "' at position " + c.i);
c.i++; // consume c.i++; // Consume current character
skipWs(c); skipWs(c); // Skip any subsequent whitespace
}
/**
* Executes the R_PUSH instruction: pushes a constant or array constant onto the operand stack.
* <p>
* Processing steps:
* <ul>
* <li>1. Checks parameter count, throws if insufficient.</li>
* <li>2. Concatenates all arguments (except opcode) into a raw literal string.</li>
* <li>3. Checks if the literal is an array (starts with [ and ends with ]).</li>
* <li>4. If array, recursively parses and pushes as a read-only List onto the operand stack.</li>
* <li>5. Otherwise, pushes the literal string as-is.</li>
* </ul>
*
* @param parts instruction and parameter strings (parts[0] is the opcode, others are params)
* @param pc current instruction index
* @param stack operand stack
* @param lvs local variable store
* @param cs call stack
* @return next instruction index
*/
@Override
public int execute(String[] parts, int pc,
OperandStack stack,
LocalVariableStore lvs,
CallStack cs) {
// Check parameter count
if (parts.length < 2)
throw new IllegalStateException("R_PUSH missing parameter");
// Join all arguments into a complete literal string
StringBuilder sb = new StringBuilder();
for (int i = 1; i < parts.length; i++) {
if (i > 1) sb.append(' ');
sb.append(parts[i]);
}
String literal = sb.toString().trim();
// Check if this is an array literal
if (literal.startsWith("[") && literal.endsWith("]")) {
Object parsed = parseValue(new Cursor(literal));
if (!(parsed instanceof List<?> list)) {
// Should not happen in theory; safety fallback
stack.push(parsed);
} else {
// Convert to read-only List before pushing to prevent modification
stack.push(deepUnmodifiable(list));
}
} else {
// Regular string, push as-is
stack.push(literal);
}
return pc + 1;
}
/**
* A simple string cursor, supporting index increment and character reading, for use by the parser.
*/
private static final class Cursor {
final String s; // Original string
int i; // Current index
Cursor(String s) {
this.s = s;
this.i = 0;
}
/**
* Checks if the cursor is at the end of the string.
*
* @return true if at end
*/
boolean end() {
return i >= s.length();
}
/**
* Gets the character at the current cursor position.
*
* @return current character
*/
char ch() {
return s.charAt(i);
} }
} }
}