!65 enhancement: 支持字符串转义与 Unicode

Merge pull request !65 from Luke/feature/add-escape-characters
This commit is contained in:
Luke 2025-08-27 03:17:00 +00:00 committed by Gitee
commit 77eddb83f4
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
7 changed files with 303 additions and 133 deletions

View File

@ -1,7 +0,0 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Run" type="CompoundRunConfigurationType">
<toRun name="build_project2tar.ps1" type="PowerShellRunType" />
<toRun name="Demo1" type="Application" />
<method v="2" />
</configuration>
</component>

View File

@ -13,6 +13,11 @@
<toRun name="Demo19" type="Application" /> <toRun name="Demo19" type="Application" />
<toRun name="Demo2" type="Application" /> <toRun name="Demo2" type="Application" />
<toRun name="Demo20" type="Application" /> <toRun name="Demo20" type="Application" />
<toRun name="Demo21" type="Application" />
<toRun name="Demo22" type="Application" />
<toRun name="Demo23" type="Application" />
<toRun name="Demo24" type="Application" />
<toRun name="Demo25" type="Application" />
<toRun name="Demo3" type="Application" /> <toRun name="Demo3" type="Application" />
<toRun name="Demo4" type="Application" /> <toRun name="Demo4" type="Application" />
<toRun name="Demo6" type="Application" /> <toRun name="Demo6" type="Application" />

View File

@ -0,0 +1,20 @@
module: Main
function: main
returns: void
body:
// 1. 常见转义符
declare sNewline : string = "换行示例:\n第二行"
declare sTab : string = "制表符示例:\t列二"
declare sBackslash: string = "反斜杠示例: C:\\Snow"
declare sDQuote : string = "双引号示例: \"Snow\""
declare sSQuote : string = "单引号示例: \'Snow\'"
declare sCarriage : string = "回车示例:\rCarriage"
declare sBackspace: string = "退格示例: ABC\bD"
declare sFormFeed : string = "换页示例:\fPage-2"
// 2. Unicode 转义
declare sUnicode : string = "𪚥𠮷: \u4F60\u597D, Snow!"
end body
end function
end module

View File

@ -0,0 +1,82 @@
package org.jcnc.snow.common;
/**
* <p>
* 字符串转义/反转义工具类主要用于
* <ul>
* <li><b>编译期</b>将运行时的字符串安全地编码为单行形式用于 .water 指令文件的保存</li>
* <li><b>运行期</b>在虚拟机VM执行相关指令时将转义后的字符串还原成真实字符</li>
* </ul>
* <br>
* 转义规则兼容 Java 字符串转义包括 \n, \t, \r 等常见控制字符同时对于不可见或非 ASCII 字符会编码为 Unicode 形式 <code>uXXXX</code>
* </p>
*/
public final class StringEscape {
/**
* 工具类私有构造方法禁止实例化
*/
private StringEscape() {
}
/**
* <b>运行期方法</b>
* <p>将转义序列还原为实际字符</p>
*
* <ul>
* <li>支持常见的转义字符序列</li>
* <li>支持 uXXXX 形式的 Unicode 字符反转义</li>
* <li>对于无法识别的转义按原样输出</li>
* </ul>
*
* @param src 含有转义序列的字符串
* @return 反转义后的字符串原样还原
*/
public static String unescape(String src) {
StringBuilder out = new StringBuilder();
for (int i = 0; i < src.length(); i++) {
char c = src.charAt(i);
if (c != '\\') { // 非转义字符直接输出
out.append(c);
continue;
}
// 如果是最后一个字符为反斜杠则原样输出
if (i == src.length() - 1) {
out.append('\\');
break;
}
char n = src.charAt(++i); // 下一个字符
switch (n) {
case 'n' -> out.append('\n'); // 换行
case 't' -> out.append('\t'); // 制表符
case 'r' -> out.append('\r'); // 回车
case 'b' -> out.append('\b'); // 退格
case 'f' -> out.append('\f'); // 换页
case '\\' -> out.append('\\'); // 反斜杠
case '"' -> out.append('"'); // 双引号
case '\'' -> out.append('\''); // 单引号
case 'u' -> {
// Unicode 转义需读取接下来的 4 位十六进制数字
if (i + 4 <= src.length() - 1) {
String hex = src.substring(i + 1, i + 5);
try {
out.append((char) Integer.parseInt(hex, 16));
i += 4;
} catch (NumberFormatException ignore) {
// 非法 hex原样输出
out.append("\\u").append(hex);
i += 4;
}
} else {
// 字符串末尾长度不足原样输出
out.append("\\u");
}
}
default -> out.append(n); // 其他未定义的转义序列原样输出
}
}
return out.toString();
}
}

View File

@ -12,46 +12,47 @@ import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
/** /**
* <b>LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction}</b> * <b>LoadConstGenerator</b>
* *
* <p> * <p>
* This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions. * This generator converts an IR-level {@link LoadConstInstruction} into corresponding VM instructions.
* If the constant is a {@code String}, it will also be registered in the * If the constant is a {@code String}, it will also be registered in the
* {@link CallGenerator} string constant pool to support syscall downgrade scenarios. * {@link CallGenerator} string constant pool for later use.
* </p> * </p>
* *
* <p> * <p>
* Fix: When the constant is an array (List), type information is preserved in R_PUSH payload: * Key implementation notes:
* <ul> * <ul>
* <li>Float is output with <code>f</code> suffix (e.g., 0.1f);</li> * <li>When the constant is an array (List), type information is preserved in the R_PUSH payload:</li>
* <li>Long is output with <code>L</code> suffix (e.g., 123L);</li> * <li>Float values get an <code>f</code> suffix (e.g., 0.1f)</li>
* <li>Double/Integer are output in their default format (e.g., 1.0, 42);</li> * <li>Long values get an <code>L</code> suffix (e.g., 123L)</li>
* <li>Supports recursive serialization of nested arrays.</li> * <li>Double and Integer values use their default string format (e.g., 1.0, 42)</li>
* <li>Nested arrays are recursively serialized with correct type suffixes.</li>
* </ul> * </ul>
* This prevents float values from being misinterpreted as double on the VM side, * This prevents type confusion on the VM side (e.g., float being misread as double)
* and avoids DoubleFloat cast exceptions in later F_STORE operations. * and avoids cast exceptions during store operations.
* </p> * </p>
*/ */
public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruction> { public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruction> {
/** /**
* Formats a constant value as a string for use as a VM payload. * Formats a constant value for use as a VM instruction payload.
* Lists are recursively serialized, and Float/Long types include suffixes to preserve type information. * For lists, recursively formats each element with type suffixes where appropriate.
* *
* @param v The constant value to format. * @param v The constant value.
* @return The formatted string for use in VM code. * @return The formatted string payload for VM code.
*/ */
private static String formatConst(Object v) { private static String formatConst(Object v) {
return formatConst(v, false); return formatConst(v, false);
} }
/** /**
* Internal helper for recursively formatting constant values (including nested arrays) * Recursively formats constant values (including nested arrays), preserving
* with appropriate type suffixes for array payloads. * type suffixes and escaping strings. Used internally for array/list handling.
* *
* @param v The constant value to format. * @param v The constant value.
* @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied. * @param insideArray Whether this value is inside an array context (controls type suffixing).
* @return The formatted string for use in VM code. * @return The formatted string for VM code.
*/ */
private static String formatConst(Object v, boolean insideArray) { private static String formatConst(Object v, boolean insideArray) {
if (v instanceof List<?> list) { if (v instanceof List<?> list) {
@ -61,10 +62,10 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
.collect(Collectors.joining(", ")) + "]"; .collect(Collectors.joining(", ")) + "]";
} }
if (v instanceof String s) { if (v instanceof String s) {
return s; // Escape and wrap the string in double quotes, to avoid line breaks or control chars breaking VM code
return "\"" + escape(s) + "\"";
} }
if (v instanceof Float f) { if (v instanceof Float f) {
// Always keep .0 for integer values
float fv = f; float fv = f;
String s = (fv == (long) fv) ? String.format("%.1f", fv) : f.toString(); String s = (fv == (long) fv) ? String.format("%.1f", fv) : f.toString();
return insideArray ? (s + "f") : s; return insideArray ? (s + "f") : s;
@ -74,7 +75,6 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
} }
if (v instanceof Double d) { if (v instanceof Double d) {
double dv = d; double dv = d;
// Always keep .0 for integer values
return (dv == (long) dv) ? String.format("%.1f", dv) : Double.toString(dv); return (dv == (long) dv) ? String.format("%.1f", dv) : Double.toString(dv);
} }
if (v instanceof Short s) { if (v instanceof Short s) {
@ -89,28 +89,54 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
return String.valueOf(v); return String.valueOf(v);
} }
/** /**
* Specifies the type of IR instruction supported by this generator. * Escapes a string for use in VM code: replaces control characters and all non-ASCII characters
* with their corresponding escape sequences, so the .water file remains single-line and parseable.
* Supported escapes: \n, \r, \t, \f, \b, \", \', \\, and Unicode escapes like "uXXXX" for non-ASCII.
* *
* @return The class object representing {@link LoadConstInstruction}. * @param s The input string.
* @return The escaped string.
*/ */
private static String escape(String s) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); ++i) {
char ch = s.charAt(i);
switch (ch) {
case '\n' -> sb.append("\\n");
case '\r' -> sb.append("\\r");
case '\t' -> sb.append("\\t");
case '\f' -> sb.append("\\f");
case '\b' -> sb.append("\\b");
case '\"' -> sb.append("\\\"");
case '\'' -> sb.append("\\'");
case '\\' -> sb.append("\\\\");
default -> {
// Escape non-ASCII and control characters using uXXXX
if (ch < 0x20 || ch > 0x7E) {
sb.append(String.format("\\u%04X", (int) ch));
} else {
sb.append(ch);
}
}
}
}
return sb.toString();
}
@Override @Override
public Class<LoadConstInstruction> supportedClass() { public Class<LoadConstInstruction> supportedClass() {
return LoadConstInstruction.class; return LoadConstInstruction.class;
} }
/** /**
* Generates the VM instructions for a given {@link LoadConstInstruction}. * Generates VM code for a LoadConstInstruction.
* <p> * Produces PUSH and STORE instructions, sets the slot type,
* This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions, * and registers string constants if necessary.
* marking the local slot type for later operations, and registering string constants if necessary.
* </p>
* *
* @param ins The {@link LoadConstInstruction} to generate code for. * @param ins The IR instruction to generate.
* @param out The {@link VMProgramBuilder} used to collect the generated instructions. * @param out The output program builder.
* @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices. * @param slotMap The mapping from IR virtual register to physical slot.
* @param currentFn The name of the current function. * @param currentFn The current function name.
*/ */
@Override @Override
public void generate(LoadConstInstruction ins, public void generate(LoadConstInstruction ins,
@ -118,19 +144,19 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
Map<IRVirtualRegister, Integer> slotMap, Map<IRVirtualRegister, Integer> slotMap,
String currentFn) { String currentFn) {
// 1. Get the constant value // 1. Retrieve the constant value from the instruction
IRConstant constant = (IRConstant) ins.operands().getFirst(); IRConstant constant = (IRConstant) ins.operands().getFirst();
Object value = constant.value(); Object value = constant.value();
// 2. Generate PUSH instruction (array constants use type-aware formatting) // 2. Format and emit the PUSH instruction (arrays will use type-aware formatting)
String payload = formatConst(value); String payload = formatConst(value);
out.emit(OpHelper.pushOpcodeFor(value) + " " + payload); out.emit(OpHelper.pushOpcodeFor(value) + " " + payload);
// 3. STORE the result to the destination slot // 3. Emit STORE to the destination slot
int slot = slotMap.get(ins.dest()); int slot = slotMap.get(ins.dest());
out.emit(OpHelper.storeOpcodeFor(value) + " " + slot); out.emit(OpHelper.storeOpcodeFor(value) + " " + slot);
// 4. Mark the slot's data type for later inference and instruction selection // 4. Mark the slot's data type for later use (type inference, instruction selection, etc.)
char prefix = switch (value) { char prefix = switch (value) {
case Integer _ -> 'I'; // Integer case Integer _ -> 'I'; // Integer
case Long _ -> 'L'; // Long case Long _ -> 'L'; // Long
@ -138,15 +164,15 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
case Byte _ -> 'B'; // Byte case Byte _ -> 'B'; // Byte
case Double _ -> 'D'; // Double case Double _ -> 'D'; // Double
case Float _ -> 'F'; // Float case Float _ -> 'F'; // Float
case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0) case Boolean _ -> 'I'; // Booleans are treated as integers (1/0)
case String _ -> 'R'; // String constant case String _ -> 'R'; // Reference type for strings
case java.util.List<?> _ -> 'R'; // Reference type (arrays, etc.) case java.util.List<?> _ -> 'R'; // Reference type for arrays/lists
case null, default -> throw new IllegalStateException("Unknown constant type: " case null, default -> throw new IllegalStateException("Unknown constant type: "
+ (value != null ? value.getClass() : null)); + (value != null ? value.getClass() : null));
}; };
out.setSlotType(slot, prefix); out.setSlotType(slot, prefix);
// 5. If the constant is a string, register it for the CallGenerator string pool // 5. Register the string constant for the string constant pool if needed
if (value instanceof String s) { if (value instanceof String s) {
CallGenerator.registerStringConst(ins.dest().id(), s); CallGenerator.registerStringConst(ins.dest().id(), s);
} }

View File

@ -1,5 +1,6 @@
package org.jcnc.snow.compiler.parser.expression; package org.jcnc.snow.compiler.parser.expression;
import org.jcnc.snow.common.StringEscape;
import org.jcnc.snow.compiler.lexer.token.Token; import org.jcnc.snow.compiler.lexer.token.Token;
import org.jcnc.snow.compiler.parser.ast.base.ExpressionNode; import org.jcnc.snow.compiler.parser.ast.base.ExpressionNode;
import org.jcnc.snow.compiler.parser.ast.StringLiteralNode; import org.jcnc.snow.compiler.parser.ast.StringLiteralNode;
@ -26,8 +27,14 @@ public class StringLiteralParselet implements PrefixParselet {
*/ */
@Override @Override
public ExpressionNode parse(ParserContext ctx, Token token) { public ExpressionNode parse(ParserContext ctx, Token token) {
// 去除首尾引号
String raw = token.getRaw(); String raw = token.getRaw();
String content = raw.substring(1, raw.length() - 1); String inner = raw.substring(1, raw.length() - 1);
return new StringLiteralNode(content, new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName())); // 解析转义符与 Unicode 转义
String value = StringEscape.unescape(inner);
return new StringLiteralNode(
value,
new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName())
);
} }
} }

View File

@ -10,36 +10,54 @@ import java.util.Collections;
import java.util.List; import java.util.List;
/** /**
* The {@code RPushCommand} class implements the {@link Command} interface * The {@code RPushCommand} class implements the {@link Command} interface and provides
* and represents the "reference push" instruction ({@code R_PUSH}) in the virtual machine. * the "reference push" instruction ({@code R_PUSH}) for the virtual machine.
*
* <p> * <p>
* This instruction pushes a reference-type value onto the operand stack. * <b>Function:</b> Pushes a reference-type value (String literal or array literal) onto the operand stack.
* The input is parsed from the textual instruction form, which can represent: * </p>
*
* <h2>Supported Literals</h2>
* <ul> * <ul>
* <li>String literals</li> * <li><b>String Literals:</b> Quoted strings (e.g., {@code "hello\nworld"}) with escape sequence support.</li>
* <li>Array literals (e.g., {@code [1, 2, 3]}), including nested arrays</li> * <li><b>Array Literals:</b> Bracketed array forms (e.g., {@code [1, 2, [3, 4]]}), including nested arrays.</li>
* </ul> * </ul>
* </p>
* *
* <p> * <h2>Implementation Details</h2>
* For array literals, a nested list structure is constructed. In this implementation, * <ul>
* array literals are pushed as <b>mutable</b> {@link java.util.ArrayList} structures, * <li>Array literals are parsed into <b>mutable</b> {@link java.util.ArrayList} objects, to support in-place modification (e.g., by {@code ARR_SET}).</li>
* so that subsequent system calls such as {@code ARR_SET} can modify elements in-place. * <li>String literals wrapped in quotes are automatically unescaped according to Java string escape rules.</li>
* </p> * <li>Handles atomic values: numbers (including hex, binary, float, long, short, byte), booleans, and fallback to string.</li>
* </ul>
*
* <h2>Example Usage</h2>
* <pre>
* R_PUSH "hello\nworld" // pushes String "hello\nworld" (with actual newline)
* R_PUSH [1, 2, 3] // pushes ArrayList {1, 2, 3}
* R_PUSH [1, [2, 3], 4] // pushes nested arrays as mutable lists
* </pre>
*
* @author (your name or org)
* @since 1.0
*/ */
public class RPushCommand implements Command { public class RPushCommand implements Command {
/** /**
* Executes the R_PUSH command. * Executes the {@code R_PUSH} instruction. Parses the given literal parameter and pushes it onto the operand stack.
* <p>
* Handles:
* <ul>
* <li>Array literals (e.g., {@code [1, 2, "a"]}), parsed recursively as mutable ArrayLists</li>
* <li>Quoted string literals (e.g., {@code "abc\n"}), parsed with escape sequence support</li>
* <li>Unquoted raw strings, numbers, and atoms</li>
* </ul>
* *
* @param parts The parts of the instruction, where {@code parts[1..n]} are concatenated as the literal. * @param parts The instruction split into parts (opcode and arguments)
* @param pc The current program counter. * @param pc The current program counter
* @param stack The operand stack where the result will be pushed. * @param stack The operand stack to push the value onto
* @param local The local variable store (unused in this instruction). * @param local The local variable store (unused)
* @param callStack The call stack (unused in this instruction). * @param callStack The call stack (unused)
* @return The new program counter (typically {@code pc+1}). * @return The next program counter (pc + 1)
* @throws IllegalStateException if no literal parameter is provided. * @throws IllegalStateException if the R_PUSH parameter is missing or parsing fails
*/ */
@Override @Override
public int execute(String[] parts, int pc, OperandStack stack, LocalVariableStore local, CallStack callStack) { public int execute(String[] parts, int pc, OperandStack stack, LocalVariableStore local, CallStack callStack) {
@ -54,71 +72,64 @@ public class RPushCommand implements Command {
} }
String literal = sb.toString().trim(); String literal = sb.toString().trim();
// Check if this is an array literal // Handle array literal
if (literal.startsWith("[") && literal.endsWith("]")) { if (literal.startsWith("[") && literal.endsWith("]")) {
Object parsed = parseValue(new Cursor(literal)); Object parsed = parseValue(new Cursor(literal));
if (!(parsed instanceof List<?> list)) { if (!(parsed instanceof List<?> list)) {
// Should not happen in theory; safety fallback
stack.push(parsed); stack.push(parsed);
} else { } else {
// Push a deep-mutable copy so ARR_SET can modify elements in-place
stack.push(deepMutable(list)); stack.push(deepMutable(list));
} }
} else { }
// Regular string, push as-is // String literal with quotes and escapes
else if (literal.length() >= 2 && literal.startsWith("\"") && literal.endsWith("\"")) {
String decoded = parseQuoted(new Cursor(literal));
stack.push(decoded);
}
// Raw atom or string
else {
stack.push(literal); stack.push(literal);
} }
return pc + 1; return pc + 1;
} }
/** /**
* A simple string cursor, supporting index increment and character reading, for use by the parser. * Utility class for string parsing, used by the array and string literal parsers.
*/ */
static class Cursor { static class Cursor {
final String s; final String s;
int i; int i;
/** /**
* Constructs a new {@code Cursor} for the given string. * Constructs a cursor over the provided string.
* * @param s the input string to parse
* @param s The string to parse.
*/ */
Cursor(String s) { Cursor(String s) { this.s = s; this.i = 0; }
this.s = s;
this.i = 0;
}
/** /**
* Advances the cursor by one character. * Advances the cursor by one character.
*/ */
void skip() { void skip() { i++; }
i++;
}
/** /**
* @return {@code true} if the cursor has reached the end of the string. * Returns true if the cursor has reached the end of the string.
* @return true if end of string
*/ */
boolean end() { boolean end() { return i >= s.length(); }
return i >= s.length();
}
/** /**
* Gets the character at the current cursor position. * Returns the current character at the cursor position.
* * @return the current character
* @return current character
* @throws StringIndexOutOfBoundsException if at end of string
*/ */
char ch() { char ch() { return s.charAt(i); }
return s.charAt(i);
}
} }
/** /**
* Parses a value from the input string at the current cursor position. * Parses a value from the current cursor position.
* This can be an array literal, a quoted string, or a simple atom (number, word). * Supports arrays, quoted strings, or atoms.
* *
* @param c The cursor for parsing. * @param c the parsing cursor
* @return The parsed value (could be List, String, Number). * @return the parsed object (List, String, Number, Boolean, or String fallback)
*/ */
Object parseValue(Cursor c) { Object parseValue(Cursor c) {
skipWs(c); skipWs(c);
@ -130,9 +141,8 @@ public class RPushCommand implements Command {
} }
/** /**
* Skips whitespace characters in the input string. * Skips whitespace characters at the cursor.
* * @param c the parsing cursor
* @param c The cursor to advance.
*/ */
private static void skipWs(Cursor c) { private static void skipWs(Cursor c) {
while (!c.end()) { while (!c.end()) {
@ -143,13 +153,13 @@ public class RPushCommand implements Command {
} }
/** /**
* Parses an array literal from the input, including nested arrays. * Parses an array literal of the form [elem1, elem2, ...] (may be nested).
* Recursively parses elements using {@link #parseValue(Cursor)}.
* *
* @param c The cursor (positioned at '[' at entry). * @param c the parsing cursor
* @return A List representing the parsed array. * @return a List of parsed elements
*/ */
private Object parseArray(Cursor c) { private Object parseArray(Cursor c) {
// assumes current char is '['
c.skip(); // skip '[' c.skip(); // skip '['
List<Object> out = new ArrayList<>(); List<Object> out = new ArrayList<>();
skipWs(c); skipWs(c);
@ -170,13 +180,12 @@ public class RPushCommand implements Command {
} }
/** /**
* Parses a quoted string literal, handling escape characters. * Parses a quoted string, handling standard Java escape sequences (e.g. \n, \t, uXXXX).
* *
* @param c The cursor (positioned at '"' at entry). * @param c the parsing cursor
* @return The parsed string value. * @return the decoded string
*/ */
private static String parseQuoted(Cursor c) { private static String parseQuoted(Cursor c) {
// assumes current char is '"'
c.skip(); // skip opening quote c.skip(); // skip opening quote
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
while (!c.end()) { while (!c.end()) {
@ -190,8 +199,25 @@ public class RPushCommand implements Command {
case 'n' -> sb.append('\n'); case 'n' -> sb.append('\n');
case 'r' -> sb.append('\r'); case 'r' -> sb.append('\r');
case 't' -> sb.append('\t'); case 't' -> sb.append('\t');
case 'f' -> sb.append('\f');
case 'b' -> sb.append('\b');
case '\"' -> sb.append('\"'); case '\"' -> sb.append('\"');
case '\'' -> sb.append('\'');
case '\\' -> sb.append('\\'); case '\\' -> sb.append('\\');
case 'u' -> { // Unicode escape: uXXXX
StringBuilder uni = new StringBuilder();
for (int k = 0; k < 4 && !c.end(); ++k) {
uni.append(c.ch());
c.skip();
}
try {
int code = Integer.parseInt(uni.toString(), 16);
sb.append((char) code);
} catch (Exception e) {
// Invalid unicode, append as is
sb.append("\\u").append(uni);
}
}
default -> sb.append(esc); default -> sb.append(esc);
} }
} else if (ch == '\"') { } else if (ch == '\"') {
@ -204,10 +230,10 @@ public class RPushCommand implements Command {
} }
/** /**
* Parses an atom (number, hexadecimal, binary, or plain string token). * Parses an atomic value (number, boolean, or fallback string) from the cursor.
* *
* @param c The cursor. * @param c the parsing cursor
* @return An Integer, Double, or String, depending on the content. * @return the parsed object (Integer, Double, Float, Long, Boolean, or String)
*/ */
private static Object parseAtom(Cursor c) { private static Object parseAtom(Cursor c) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -218,7 +244,7 @@ public class RPushCommand implements Command {
c.skip(); c.skip();
} }
String token = sb.toString(); String token = sb.toString();
// try number // Try number parsing with various notations and types
try { try {
if (token.startsWith("0x") || token.startsWith("0X")) { if (token.startsWith("0x") || token.startsWith("0X")) {
return Integer.parseInt(token.substring(2), 16); return Integer.parseInt(token.substring(2), 16);
@ -226,6 +252,20 @@ public class RPushCommand implements Command {
if (token.startsWith("0b") || token.startsWith("0B")) { if (token.startsWith("0b") || token.startsWith("0B")) {
return Integer.parseInt(token.substring(2), 2); return Integer.parseInt(token.substring(2), 2);
} }
if (token.endsWith("f")) {
return Float.parseFloat(token.substring(0, token.length() - 1));
}
if (token.endsWith("L")) {
return Long.parseLong(token.substring(0, token.length() - 1));
}
if (token.endsWith("s")) {
return Short.parseShort(token.substring(0, token.length() - 1));
}
if (token.endsWith("b")) {
return Byte.parseByte(token.substring(0, token.length() - 1));
}
if (token.equals("1")) return true;
if (token.equals("0")) return false;
if (token.contains(".")) { if (token.contains(".")) {
return Double.parseDouble(token); return Double.parseDouble(token);
} }
@ -236,13 +276,11 @@ public class RPushCommand implements Command {
} }
} }
// ---------------------- helpers for immutability/mutability ----------------------
/** /**
* Recursively creates an unmodifiable copy of a list, with all nested lists also unmodifiable. * Creates a deeply unmodifiable version of the provided list (and its nested lists).
* *
* @param l The list to make unmodifiable. * @param l the original list
* @return An unmodifiable deep copy of the list. * @return an unmodifiable view of the list and all nested lists
*/ */
List<?> deepUnmodifiable(List<?> l) { List<?> deepUnmodifiable(List<?> l) {
List<Object> out = new ArrayList<>(l.size()); List<Object> out = new ArrayList<>(l.size());
@ -251,10 +289,10 @@ public class RPushCommand implements Command {
} }
/** /**
* Helper method for {@link #deepUnmodifiable(List)}. Recursively processes each element. * Helper for {@link #deepUnmodifiable(List)}; handles nested lists recursively.
* *
* @param v The object to process. * @param v the object to process
* @return Unmodifiable list if input is a list, otherwise the value itself. * @return an unmodifiable list if input is a list; otherwise, the object itself
*/ */
Object deepUnmodifiableObject(Object v) { Object deepUnmodifiableObject(Object v) {
if (v instanceof List<?> l) { if (v instanceof List<?> l) {
@ -264,11 +302,10 @@ public class RPushCommand implements Command {
} }
/** /**
* Create a deep mutable copy of a nested List structure, preserving element values. * Creates a deeply mutable version of the provided list (and its nested lists).
* Nested lists are turned into {@link java.util.ArrayList} so they can be modified by ARR_SET.
* *
* @param l The source list. * @param l the original list
* @return Deep mutable copy of the list. * @return a new mutable list (ArrayList), with all nested lists mutable
*/ */
private static java.util.List<?> deepMutable(java.util.List<?> l) { private static java.util.List<?> deepMutable(java.util.List<?> l) {
java.util.List<Object> out = new java.util.ArrayList<>(l.size()); java.util.List<Object> out = new java.util.ArrayList<>(l.size());
@ -277,10 +314,10 @@ public class RPushCommand implements Command {
} }
/** /**
* Helper method for {@link #deepMutable(List)}. Recursively processes each element. * Helper for {@link #deepMutable(List)}; handles nested lists recursively.
* *
* @param v The object to process. * @param v the object to process
* @return Mutable list if input is a list, otherwise the value itself. * @return a mutable list if input is a list; otherwise, the object itself
*/ */
private static Object deepMutableObject(Object v) { private static Object deepMutableObject(Object v) {
if (v instanceof java.util.List<?> l) { if (v instanceof java.util.List<?> l) {