From 9bdc6245bf1ef9fb08f6bc452c4179b49660675c Mon Sep 17 00:00:00 2001
From: Luke
Date: Wed, 27 Aug 2025 10:35:23 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=AD=97=E7=AC=A6?=
=?UTF-8?q?=E4=B8=B2=E5=B8=B8=E9=87=8F=E5=9C=A8=20VM=20=E4=BB=A3=E7=A0=81?=
=?UTF-8?q?=E4=B8=AD=E7=9A=84=E8=A1=A8=E7=A4=BA=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- 在 LoadConstGenerator 中添加字符串转义功能,确保字符串常量在 VM 代码中正确表示
- 优化 formatConst 方法,对字符串常量进行转义处理
- 新增 escape 方法,用于字符串转义,包括控制字符和非 ASCII 字符的处理
---
.../backend/generator/LoadConstGenerator.java | 110 +++++++++++-------
1 file changed, 68 insertions(+), 42 deletions(-)
diff --git a/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java b/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java
index 84d2e7e..e9218ad 100644
--- a/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java
+++ b/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java
@@ -12,46 +12,47 @@ import java.util.Map;
import java.util.stream.Collectors;
/**
- * LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction}
+ * LoadConstGenerator
*
*
- * This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions.
+ * This generator converts an IR-level {@link LoadConstInstruction} into corresponding VM instructions.
* If the constant is a {@code String}, it will also be registered in the
- * {@link CallGenerator} string constant pool to support syscall downgrade scenarios.
+ * {@link CallGenerator} string constant pool for later use.
*
*
*
- * Fix: When the constant is an array (List), type information is preserved in R_PUSH payload:
+ * Key implementation notes:
*
- * - Float is output with
f suffix (e.g., 0.1f);
- * - Long is output with
L suffix (e.g., 123L);
- * - Double/Integer are output in their default format (e.g., 1.0, 42);
- * - Supports recursive serialization of nested arrays.
+ * - When the constant is an array (List), type information is preserved in the R_PUSH payload:
+ * - Float values get an
f suffix (e.g., 0.1f)
+ * - Long values get an
L suffix (e.g., 123L)
+ * - Double and Integer values use their default string format (e.g., 1.0, 42)
+ * - Nested arrays are recursively serialized with correct type suffixes.
*
- * This prevents float values from being misinterpreted as double on the VM side,
- * and avoids Double→Float cast exceptions in later F_STORE operations.
+ * This prevents type confusion on the VM side (e.g., float being misread as double)
+ * and avoids cast exceptions during store operations.
*
*/
public class LoadConstGenerator implements InstructionGenerator {
/**
- * Formats a constant value as a string for use as a VM payload.
- * Lists are recursively serialized, and Float/Long types include suffixes to preserve type information.
+ * Formats a constant value for use as a VM instruction payload.
+ * For lists, recursively formats each element with type suffixes where appropriate.
*
- * @param v The constant value to format.
- * @return The formatted string for use in VM code.
+ * @param v The constant value.
+ * @return The formatted string payload for VM code.
*/
private static String formatConst(Object v) {
return formatConst(v, false);
}
/**
- * Internal helper for recursively formatting constant values (including nested arrays)
- * with appropriate type suffixes for array payloads.
+ * Recursively formats constant values (including nested arrays), preserving
+ * type suffixes and escaping strings. Used internally for array/list handling.
*
- * @param v The constant value to format.
- * @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied.
- * @return The formatted string for use in VM code.
+ * @param v The constant value.
+ * @param insideArray Whether this value is inside an array context (controls type suffixing).
+ * @return The formatted string for VM code.
*/
private static String formatConst(Object v, boolean insideArray) {
if (v instanceof List> list) {
@@ -61,10 +62,10 @@ public class LoadConstGenerator implements InstructionGenerator sb.append("\\n");
+ case '\r' -> sb.append("\\r");
+ case '\t' -> sb.append("\\t");
+ case '\f' -> sb.append("\\f");
+ case '\b' -> sb.append("\\b");
+ case '\"' -> sb.append("\\\"");
+ case '\'' -> sb.append("\\'");
+ case '\\' -> sb.append("\\\\");
+ default -> {
+ // Escape non-ASCII and control characters using uXXXX
+ if (ch < 0x20 || ch > 0x7E) {
+ sb.append(String.format("\\u%04X", (int) ch));
+ } else {
+ sb.append(ch);
+ }
+ }
+ }
+ }
+ return sb.toString();
+ }
+
@Override
public Class supportedClass() {
return LoadConstInstruction.class;
}
/**
- * Generates the VM instructions for a given {@link LoadConstInstruction}.
- *
- * This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions,
- * marking the local slot type for later operations, and registering string constants if necessary.
- *
+ * Generates VM code for a LoadConstInstruction.
+ * Produces PUSH and STORE instructions, sets the slot type,
+ * and registers string constants if necessary.
*
- * @param ins The {@link LoadConstInstruction} to generate code for.
- * @param out The {@link VMProgramBuilder} used to collect the generated instructions.
- * @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices.
- * @param currentFn The name of the current function.
+ * @param ins The IR instruction to generate.
+ * @param out The output program builder.
+ * @param slotMap The mapping from IR virtual register to physical slot.
+ * @param currentFn The current function name.
*/
@Override
public void generate(LoadConstInstruction ins,
@@ -118,19 +144,19 @@ public class LoadConstGenerator implements InstructionGenerator slotMap,
String currentFn) {
- // 1. Get the constant value
+ // 1. Retrieve the constant value from the instruction
IRConstant constant = (IRConstant) ins.operands().getFirst();
Object value = constant.value();
- // 2. Generate PUSH instruction (array constants use type-aware formatting)
+ // 2. Format and emit the PUSH instruction (arrays will use type-aware formatting)
String payload = formatConst(value);
out.emit(OpHelper.pushOpcodeFor(value) + " " + payload);
- // 3. STORE the result to the destination slot
+ // 3. Emit STORE to the destination slot
int slot = slotMap.get(ins.dest());
out.emit(OpHelper.storeOpcodeFor(value) + " " + slot);
- // 4. Mark the slot's data type for later inference and instruction selection
+ // 4. Mark the slot's data type for later use (type inference, instruction selection, etc.)
char prefix = switch (value) {
case Integer _ -> 'I'; // Integer
case Long _ -> 'L'; // Long
@@ -138,15 +164,15 @@ public class LoadConstGenerator implements InstructionGenerator 'B'; // Byte
case Double _ -> 'D'; // Double
case Float _ -> 'F'; // Float
- case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0)
- case String _ -> 'R'; // String constant
- case java.util.List> _ -> 'R'; // Reference type (arrays, etc.)
+ case Boolean _ -> 'I'; // Booleans are treated as integers (1/0)
+ case String _ -> 'R'; // Reference type for strings
+ case java.util.List> _ -> 'R'; // Reference type for arrays/lists
case null, default -> throw new IllegalStateException("Unknown constant type: "
+ (value != null ? value.getClass() : null));
};
out.setSlotType(slot, prefix);
- // 5. If the constant is a string, register it for the CallGenerator string pool
+ // 5. Register the string constant for the string constant pool if needed
if (value instanceof String s) {
CallGenerator.registerStringConst(ins.dest().id(), s);
}