fix: 修复字符串常量在 VM 代码中的表示问题
- 在 LoadConstGenerator 中添加字符串转义功能,确保字符串常量在 VM 代码中正确表示 - 优化 formatConst 方法,对字符串常量进行转义处理 - 新增 escape 方法,用于字符串转义,包括控制字符和非 ASCII 字符的处理
This commit is contained in:
parent
16ba11c391
commit
9bdc6245bf
@ -12,46 +12,47 @@ import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* <b>LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction}</b>
|
||||
* <b>LoadConstGenerator</b>
|
||||
*
|
||||
* <p>
|
||||
* This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions.
|
||||
* This generator converts an IR-level {@link LoadConstInstruction} into corresponding VM instructions.
|
||||
* If the constant is a {@code String}, it will also be registered in the
|
||||
* {@link CallGenerator} string constant pool to support syscall downgrade scenarios.
|
||||
* {@link CallGenerator} string constant pool for later use.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Fix: When the constant is an array (List), type information is preserved in R_PUSH payload:
|
||||
* Key implementation notes:
|
||||
* <ul>
|
||||
* <li>Float is output with <code>f</code> suffix (e.g., 0.1f);</li>
|
||||
* <li>Long is output with <code>L</code> suffix (e.g., 123L);</li>
|
||||
* <li>Double/Integer are output in their default format (e.g., 1.0, 42);</li>
|
||||
* <li>Supports recursive serialization of nested arrays.</li>
|
||||
* <li>When the constant is an array (List), type information is preserved in the R_PUSH payload:</li>
|
||||
* <li>Float values get an <code>f</code> suffix (e.g., 0.1f)</li>
|
||||
* <li>Long values get an <code>L</code> suffix (e.g., 123L)</li>
|
||||
* <li>Double and Integer values use their default string format (e.g., 1.0, 42)</li>
|
||||
* <li>Nested arrays are recursively serialized with correct type suffixes.</li>
|
||||
* </ul>
|
||||
* This prevents float values from being misinterpreted as double on the VM side,
|
||||
* and avoids Double→Float cast exceptions in later F_STORE operations.
|
||||
* This prevents type confusion on the VM side (e.g., float being misread as double)
|
||||
* and avoids cast exceptions during store operations.
|
||||
* </p>
|
||||
*/
|
||||
public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruction> {
|
||||
|
||||
/**
|
||||
* Formats a constant value as a string for use as a VM payload.
|
||||
* Lists are recursively serialized, and Float/Long types include suffixes to preserve type information.
|
||||
* Formats a constant value for use as a VM instruction payload.
|
||||
* For lists, recursively formats each element with type suffixes where appropriate.
|
||||
*
|
||||
* @param v The constant value to format.
|
||||
* @return The formatted string for use in VM code.
|
||||
* @param v The constant value.
|
||||
* @return The formatted string payload for VM code.
|
||||
*/
|
||||
private static String formatConst(Object v) {
|
||||
return formatConst(v, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper for recursively formatting constant values (including nested arrays)
|
||||
* with appropriate type suffixes for array payloads.
|
||||
* Recursively formats constant values (including nested arrays), preserving
|
||||
* type suffixes and escaping strings. Used internally for array/list handling.
|
||||
*
|
||||
* @param v The constant value to format.
|
||||
* @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied.
|
||||
* @return The formatted string for use in VM code.
|
||||
* @param v The constant value.
|
||||
* @param insideArray Whether this value is inside an array context (controls type suffixing).
|
||||
* @return The formatted string for VM code.
|
||||
*/
|
||||
private static String formatConst(Object v, boolean insideArray) {
|
||||
if (v instanceof List<?> list) {
|
||||
@ -61,10 +62,10 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
.collect(Collectors.joining(", ")) + "]";
|
||||
}
|
||||
if (v instanceof String s) {
|
||||
return s;
|
||||
// Escape and wrap the string in double quotes, to avoid line breaks or control chars breaking VM code
|
||||
return "\"" + escape(s) + "\"";
|
||||
}
|
||||
if (v instanceof Float f) {
|
||||
// Always keep .0 for integer values
|
||||
float fv = f;
|
||||
String s = (fv == (long) fv) ? String.format("%.1f", fv) : f.toString();
|
||||
return insideArray ? (s + "f") : s;
|
||||
@ -74,7 +75,6 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
}
|
||||
if (v instanceof Double d) {
|
||||
double dv = d;
|
||||
// Always keep .0 for integer values
|
||||
return (dv == (long) dv) ? String.format("%.1f", dv) : Double.toString(dv);
|
||||
}
|
||||
if (v instanceof Short s) {
|
||||
@ -89,28 +89,54 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
return String.valueOf(v);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Specifies the type of IR instruction supported by this generator.
|
||||
* Escapes a string for use in VM code: replaces control characters and all non-ASCII characters
|
||||
* with their corresponding escape sequences, so the .water file remains single-line and parseable.
|
||||
* Supported escapes: \n, \r, \t, \f, \b, \", \', \\, and Unicode escapes like "uXXXX" for non-ASCII.
|
||||
*
|
||||
* @return The class object representing {@link LoadConstInstruction}.
|
||||
* @param s The input string.
|
||||
* @return The escaped string.
|
||||
*/
|
||||
private static String escape(String s) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
char ch = s.charAt(i);
|
||||
switch (ch) {
|
||||
case '\n' -> sb.append("\\n");
|
||||
case '\r' -> sb.append("\\r");
|
||||
case '\t' -> sb.append("\\t");
|
||||
case '\f' -> sb.append("\\f");
|
||||
case '\b' -> sb.append("\\b");
|
||||
case '\"' -> sb.append("\\\"");
|
||||
case '\'' -> sb.append("\\'");
|
||||
case '\\' -> sb.append("\\\\");
|
||||
default -> {
|
||||
// Escape non-ASCII and control characters using uXXXX
|
||||
if (ch < 0x20 || ch > 0x7E) {
|
||||
sb.append(String.format("\\u%04X", (int) ch));
|
||||
} else {
|
||||
sb.append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<LoadConstInstruction> supportedClass() {
|
||||
return LoadConstInstruction.class;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the VM instructions for a given {@link LoadConstInstruction}.
|
||||
* <p>
|
||||
* This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions,
|
||||
* marking the local slot type for later operations, and registering string constants if necessary.
|
||||
* </p>
|
||||
* Generates VM code for a LoadConstInstruction.
|
||||
* Produces PUSH and STORE instructions, sets the slot type,
|
||||
* and registers string constants if necessary.
|
||||
*
|
||||
* @param ins The {@link LoadConstInstruction} to generate code for.
|
||||
* @param out The {@link VMProgramBuilder} used to collect the generated instructions.
|
||||
* @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices.
|
||||
* @param currentFn The name of the current function.
|
||||
* @param ins The IR instruction to generate.
|
||||
* @param out The output program builder.
|
||||
* @param slotMap The mapping from IR virtual register to physical slot.
|
||||
* @param currentFn The current function name.
|
||||
*/
|
||||
@Override
|
||||
public void generate(LoadConstInstruction ins,
|
||||
@ -118,19 +144,19 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
Map<IRVirtualRegister, Integer> slotMap,
|
||||
String currentFn) {
|
||||
|
||||
// 1. Get the constant value
|
||||
// 1. Retrieve the constant value from the instruction
|
||||
IRConstant constant = (IRConstant) ins.operands().getFirst();
|
||||
Object value = constant.value();
|
||||
|
||||
// 2. Generate PUSH instruction (array constants use type-aware formatting)
|
||||
// 2. Format and emit the PUSH instruction (arrays will use type-aware formatting)
|
||||
String payload = formatConst(value);
|
||||
out.emit(OpHelper.pushOpcodeFor(value) + " " + payload);
|
||||
|
||||
// 3. STORE the result to the destination slot
|
||||
// 3. Emit STORE to the destination slot
|
||||
int slot = slotMap.get(ins.dest());
|
||||
out.emit(OpHelper.storeOpcodeFor(value) + " " + slot);
|
||||
|
||||
// 4. Mark the slot's data type for later inference and instruction selection
|
||||
// 4. Mark the slot's data type for later use (type inference, instruction selection, etc.)
|
||||
char prefix = switch (value) {
|
||||
case Integer _ -> 'I'; // Integer
|
||||
case Long _ -> 'L'; // Long
|
||||
@ -138,15 +164,15 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
case Byte _ -> 'B'; // Byte
|
||||
case Double _ -> 'D'; // Double
|
||||
case Float _ -> 'F'; // Float
|
||||
case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0)
|
||||
case String _ -> 'R'; // String constant
|
||||
case java.util.List<?> _ -> 'R'; // Reference type (arrays, etc.)
|
||||
case Boolean _ -> 'I'; // Booleans are treated as integers (1/0)
|
||||
case String _ -> 'R'; // Reference type for strings
|
||||
case java.util.List<?> _ -> 'R'; // Reference type for arrays/lists
|
||||
case null, default -> throw new IllegalStateException("Unknown constant type: "
|
||||
+ (value != null ? value.getClass() : null));
|
||||
};
|
||||
out.setSlotType(slot, prefix);
|
||||
|
||||
// 5. If the constant is a string, register it for the CallGenerator string pool
|
||||
// 5. Register the string constant for the string constant pool if needed
|
||||
if (value instanceof String s) {
|
||||
CallGenerator.registerStringConst(ins.dest().id(), s);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user