!65 enhancement: 支持字符串转义与 Unicode
Merge pull request !65 from Luke/feature/add-escape-characters
This commit is contained in:
commit
77eddb83f4
@ -1,7 +0,0 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="Run" type="CompoundRunConfigurationType">
|
||||
<toRun name="build_project2tar.ps1" type="PowerShellRunType" />
|
||||
<toRun name="Demo1" type="Application" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
</component>
|
||||
@ -13,6 +13,11 @@
|
||||
<toRun name="Demo19" type="Application" />
|
||||
<toRun name="Demo2" type="Application" />
|
||||
<toRun name="Demo20" type="Application" />
|
||||
<toRun name="Demo21" type="Application" />
|
||||
<toRun name="Demo22" type="Application" />
|
||||
<toRun name="Demo23" type="Application" />
|
||||
<toRun name="Demo24" type="Application" />
|
||||
<toRun name="Demo25" type="Application" />
|
||||
<toRun name="Demo3" type="Application" />
|
||||
<toRun name="Demo4" type="Application" />
|
||||
<toRun name="Demo6" type="Application" />
|
||||
|
||||
20
playground/Demo/Demo26/Main.snow
Normal file
20
playground/Demo/Demo26/Main.snow
Normal file
@ -0,0 +1,20 @@
|
||||
module: Main
|
||||
function: main
|
||||
returns: void
|
||||
body:
|
||||
// 1. 常见转义符
|
||||
declare sNewline : string = "换行示例:\n第二行"
|
||||
declare sTab : string = "制表符示例:\t列二"
|
||||
declare sBackslash: string = "反斜杠示例: C:\\Snow"
|
||||
declare sDQuote : string = "双引号示例: \"Snow\""
|
||||
declare sSQuote : string = "单引号示例: \'Snow\'"
|
||||
declare sCarriage : string = "回车示例:\rCarriage"
|
||||
declare sBackspace: string = "退格示例: ABC\bD"
|
||||
declare sFormFeed : string = "换页示例:\fPage-2"
|
||||
|
||||
// 2. Unicode 转义
|
||||
declare sUnicode : string = "𪚥𠮷: \u4F60\u597D, Snow!"
|
||||
|
||||
end body
|
||||
end function
|
||||
end module
|
||||
82
src/main/java/org/jcnc/snow/common/StringEscape.java
Normal file
82
src/main/java/org/jcnc/snow/common/StringEscape.java
Normal file
@ -0,0 +1,82 @@
|
||||
package org.jcnc.snow.common;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* 字符串转义/反转义工具类,主要用于:
|
||||
* <ul>
|
||||
* <li><b>编译期</b>:将运行时的字符串安全地编码为单行形式(用于 .water 指令文件的保存)。</li>
|
||||
* <li><b>运行期</b>:在虚拟机(VM)执行相关指令时,将转义后的字符串还原成真实字符。</li>
|
||||
* </ul>
|
||||
* <br>
|
||||
* 转义规则兼容 Java 字符串转义(包括 \n, \t, \r 等常见控制字符),同时对于不可见或非 ASCII 字符,会编码为 Unicode 形式(如 <code>uXXXX</code>)。
|
||||
* </p>
|
||||
*/
|
||||
public final class StringEscape {
|
||||
|
||||
/**
|
||||
* 工具类私有构造方法,禁止实例化。
|
||||
*/
|
||||
private StringEscape() {
|
||||
}
|
||||
|
||||
/**
|
||||
* <b>运行期方法:</b>
|
||||
* <p>将转义序列还原为实际字符。</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>支持常见的转义字符序列。</li>
|
||||
* <li>支持 uXXXX 形式的 Unicode 字符反转义。</li>
|
||||
* <li>对于无法识别的转义,按原样输出。</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param src 含有转义序列的字符串
|
||||
* @return 反转义后的字符串,原样还原
|
||||
*/
|
||||
public static String unescape(String src) {
|
||||
StringBuilder out = new StringBuilder();
|
||||
for (int i = 0; i < src.length(); i++) {
|
||||
char c = src.charAt(i);
|
||||
if (c != '\\') { // 非转义字符,直接输出
|
||||
out.append(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 如果是最后一个字符为反斜杠,则原样输出
|
||||
if (i == src.length() - 1) {
|
||||
out.append('\\');
|
||||
break;
|
||||
}
|
||||
|
||||
char n = src.charAt(++i); // 下一个字符
|
||||
switch (n) {
|
||||
case 'n' -> out.append('\n'); // 换行
|
||||
case 't' -> out.append('\t'); // 制表符
|
||||
case 'r' -> out.append('\r'); // 回车
|
||||
case 'b' -> out.append('\b'); // 退格
|
||||
case 'f' -> out.append('\f'); // 换页
|
||||
case '\\' -> out.append('\\'); // 反斜杠
|
||||
case '"' -> out.append('"'); // 双引号
|
||||
case '\'' -> out.append('\''); // 单引号
|
||||
case 'u' -> {
|
||||
// Unicode 转义,需读取接下来的 4 位十六进制数字
|
||||
if (i + 4 <= src.length() - 1) {
|
||||
String hex = src.substring(i + 1, i + 5);
|
||||
try {
|
||||
out.append((char) Integer.parseInt(hex, 16));
|
||||
i += 4;
|
||||
} catch (NumberFormatException ignore) {
|
||||
// 非法 hex,原样输出
|
||||
out.append("\\u").append(hex);
|
||||
i += 4;
|
||||
}
|
||||
} else {
|
||||
// 字符串末尾长度不足,原样输出
|
||||
out.append("\\u");
|
||||
}
|
||||
}
|
||||
default -> out.append(n); // 其他未定义的转义序列,原样输出
|
||||
}
|
||||
}
|
||||
return out.toString();
|
||||
}
|
||||
}
|
||||
@ -12,46 +12,47 @@ import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* <b>LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction}</b>
|
||||
* <b>LoadConstGenerator</b>
|
||||
*
|
||||
* <p>
|
||||
* This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions.
|
||||
* This generator converts an IR-level {@link LoadConstInstruction} into corresponding VM instructions.
|
||||
* If the constant is a {@code String}, it will also be registered in the
|
||||
* {@link CallGenerator} string constant pool to support syscall downgrade scenarios.
|
||||
* {@link CallGenerator} string constant pool for later use.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Fix: When the constant is an array (List), type information is preserved in R_PUSH payload:
|
||||
* Key implementation notes:
|
||||
* <ul>
|
||||
* <li>Float is output with <code>f</code> suffix (e.g., 0.1f);</li>
|
||||
* <li>Long is output with <code>L</code> suffix (e.g., 123L);</li>
|
||||
* <li>Double/Integer are output in their default format (e.g., 1.0, 42);</li>
|
||||
* <li>Supports recursive serialization of nested arrays.</li>
|
||||
* <li>When the constant is an array (List), type information is preserved in the R_PUSH payload:</li>
|
||||
* <li>Float values get an <code>f</code> suffix (e.g., 0.1f)</li>
|
||||
* <li>Long values get an <code>L</code> suffix (e.g., 123L)</li>
|
||||
* <li>Double and Integer values use their default string format (e.g., 1.0, 42)</li>
|
||||
* <li>Nested arrays are recursively serialized with correct type suffixes.</li>
|
||||
* </ul>
|
||||
* This prevents float values from being misinterpreted as double on the VM side,
|
||||
* and avoids Double→Float cast exceptions in later F_STORE operations.
|
||||
* This prevents type confusion on the VM side (e.g., float being misread as double)
|
||||
* and avoids cast exceptions during store operations.
|
||||
* </p>
|
||||
*/
|
||||
public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruction> {
|
||||
|
||||
/**
|
||||
* Formats a constant value as a string for use as a VM payload.
|
||||
* Lists are recursively serialized, and Float/Long types include suffixes to preserve type information.
|
||||
* Formats a constant value for use as a VM instruction payload.
|
||||
* For lists, recursively formats each element with type suffixes where appropriate.
|
||||
*
|
||||
* @param v The constant value to format.
|
||||
* @return The formatted string for use in VM code.
|
||||
* @param v The constant value.
|
||||
* @return The formatted string payload for VM code.
|
||||
*/
|
||||
private static String formatConst(Object v) {
|
||||
return formatConst(v, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper for recursively formatting constant values (including nested arrays)
|
||||
* with appropriate type suffixes for array payloads.
|
||||
* Recursively formats constant values (including nested arrays), preserving
|
||||
* type suffixes and escaping strings. Used internally for array/list handling.
|
||||
*
|
||||
* @param v The constant value to format.
|
||||
* @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied.
|
||||
* @return The formatted string for use in VM code.
|
||||
* @param v The constant value.
|
||||
* @param insideArray Whether this value is inside an array context (controls type suffixing).
|
||||
* @return The formatted string for VM code.
|
||||
*/
|
||||
private static String formatConst(Object v, boolean insideArray) {
|
||||
if (v instanceof List<?> list) {
|
||||
@ -61,10 +62,10 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
.collect(Collectors.joining(", ")) + "]";
|
||||
}
|
||||
if (v instanceof String s) {
|
||||
return s;
|
||||
// Escape and wrap the string in double quotes, to avoid line breaks or control chars breaking VM code
|
||||
return "\"" + escape(s) + "\"";
|
||||
}
|
||||
if (v instanceof Float f) {
|
||||
// Always keep .0 for integer values
|
||||
float fv = f;
|
||||
String s = (fv == (long) fv) ? String.format("%.1f", fv) : f.toString();
|
||||
return insideArray ? (s + "f") : s;
|
||||
@ -74,7 +75,6 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
}
|
||||
if (v instanceof Double d) {
|
||||
double dv = d;
|
||||
// Always keep .0 for integer values
|
||||
return (dv == (long) dv) ? String.format("%.1f", dv) : Double.toString(dv);
|
||||
}
|
||||
if (v instanceof Short s) {
|
||||
@ -89,28 +89,54 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
return String.valueOf(v);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Specifies the type of IR instruction supported by this generator.
|
||||
* Escapes a string for use in VM code: replaces control characters and all non-ASCII characters
|
||||
* with their corresponding escape sequences, so the .water file remains single-line and parseable.
|
||||
* Supported escapes: \n, \r, \t, \f, \b, \", \', \\, and Unicode escapes like "uXXXX" for non-ASCII.
|
||||
*
|
||||
* @return The class object representing {@link LoadConstInstruction}.
|
||||
* @param s The input string.
|
||||
* @return The escaped string.
|
||||
*/
|
||||
private static String escape(String s) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
char ch = s.charAt(i);
|
||||
switch (ch) {
|
||||
case '\n' -> sb.append("\\n");
|
||||
case '\r' -> sb.append("\\r");
|
||||
case '\t' -> sb.append("\\t");
|
||||
case '\f' -> sb.append("\\f");
|
||||
case '\b' -> sb.append("\\b");
|
||||
case '\"' -> sb.append("\\\"");
|
||||
case '\'' -> sb.append("\\'");
|
||||
case '\\' -> sb.append("\\\\");
|
||||
default -> {
|
||||
// Escape non-ASCII and control characters using uXXXX
|
||||
if (ch < 0x20 || ch > 0x7E) {
|
||||
sb.append(String.format("\\u%04X", (int) ch));
|
||||
} else {
|
||||
sb.append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<LoadConstInstruction> supportedClass() {
|
||||
return LoadConstInstruction.class;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the VM instructions for a given {@link LoadConstInstruction}.
|
||||
* <p>
|
||||
* This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions,
|
||||
* marking the local slot type for later operations, and registering string constants if necessary.
|
||||
* </p>
|
||||
* Generates VM code for a LoadConstInstruction.
|
||||
* Produces PUSH and STORE instructions, sets the slot type,
|
||||
* and registers string constants if necessary.
|
||||
*
|
||||
* @param ins The {@link LoadConstInstruction} to generate code for.
|
||||
* @param out The {@link VMProgramBuilder} used to collect the generated instructions.
|
||||
* @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices.
|
||||
* @param currentFn The name of the current function.
|
||||
* @param ins The IR instruction to generate.
|
||||
* @param out The output program builder.
|
||||
* @param slotMap The mapping from IR virtual register to physical slot.
|
||||
* @param currentFn The current function name.
|
||||
*/
|
||||
@Override
|
||||
public void generate(LoadConstInstruction ins,
|
||||
@ -118,19 +144,19 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
Map<IRVirtualRegister, Integer> slotMap,
|
||||
String currentFn) {
|
||||
|
||||
// 1. Get the constant value
|
||||
// 1. Retrieve the constant value from the instruction
|
||||
IRConstant constant = (IRConstant) ins.operands().getFirst();
|
||||
Object value = constant.value();
|
||||
|
||||
// 2. Generate PUSH instruction (array constants use type-aware formatting)
|
||||
// 2. Format and emit the PUSH instruction (arrays will use type-aware formatting)
|
||||
String payload = formatConst(value);
|
||||
out.emit(OpHelper.pushOpcodeFor(value) + " " + payload);
|
||||
|
||||
// 3. STORE the result to the destination slot
|
||||
// 3. Emit STORE to the destination slot
|
||||
int slot = slotMap.get(ins.dest());
|
||||
out.emit(OpHelper.storeOpcodeFor(value) + " " + slot);
|
||||
|
||||
// 4. Mark the slot's data type for later inference and instruction selection
|
||||
// 4. Mark the slot's data type for later use (type inference, instruction selection, etc.)
|
||||
char prefix = switch (value) {
|
||||
case Integer _ -> 'I'; // Integer
|
||||
case Long _ -> 'L'; // Long
|
||||
@ -138,15 +164,15 @@ public class LoadConstGenerator implements InstructionGenerator<LoadConstInstruc
|
||||
case Byte _ -> 'B'; // Byte
|
||||
case Double _ -> 'D'; // Double
|
||||
case Float _ -> 'F'; // Float
|
||||
case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0)
|
||||
case String _ -> 'R'; // String constant
|
||||
case java.util.List<?> _ -> 'R'; // Reference type (arrays, etc.)
|
||||
case Boolean _ -> 'I'; // Booleans are treated as integers (1/0)
|
||||
case String _ -> 'R'; // Reference type for strings
|
||||
case java.util.List<?> _ -> 'R'; // Reference type for arrays/lists
|
||||
case null, default -> throw new IllegalStateException("Unknown constant type: "
|
||||
+ (value != null ? value.getClass() : null));
|
||||
};
|
||||
out.setSlotType(slot, prefix);
|
||||
|
||||
// 5. If the constant is a string, register it for the CallGenerator string pool
|
||||
// 5. Register the string constant for the string constant pool if needed
|
||||
if (value instanceof String s) {
|
||||
CallGenerator.registerStringConst(ins.dest().id(), s);
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package org.jcnc.snow.compiler.parser.expression;
|
||||
|
||||
import org.jcnc.snow.common.StringEscape;
|
||||
import org.jcnc.snow.compiler.lexer.token.Token;
|
||||
import org.jcnc.snow.compiler.parser.ast.base.ExpressionNode;
|
||||
import org.jcnc.snow.compiler.parser.ast.StringLiteralNode;
|
||||
@ -26,8 +27,14 @@ public class StringLiteralParselet implements PrefixParselet {
|
||||
*/
|
||||
@Override
|
||||
public ExpressionNode parse(ParserContext ctx, Token token) {
|
||||
// 去除首尾引号
|
||||
String raw = token.getRaw();
|
||||
String content = raw.substring(1, raw.length() - 1);
|
||||
return new StringLiteralNode(content, new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName()));
|
||||
String inner = raw.substring(1, raw.length() - 1);
|
||||
// 解析转义符与 Unicode 转义
|
||||
String value = StringEscape.unescape(inner);
|
||||
return new StringLiteralNode(
|
||||
value,
|
||||
new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName())
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -10,36 +10,54 @@ import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The {@code RPushCommand} class implements the {@link Command} interface
|
||||
* and represents the "reference push" instruction ({@code R_PUSH}) in the virtual machine.
|
||||
*
|
||||
* The {@code RPushCommand} class implements the {@link Command} interface and provides
|
||||
* the "reference push" instruction ({@code R_PUSH}) for the virtual machine.
|
||||
* <p>
|
||||
* This instruction pushes a reference-type value onto the operand stack.
|
||||
* The input is parsed from the textual instruction form, which can represent:
|
||||
* <b>Function:</b> Pushes a reference-type value (String literal or array literal) onto the operand stack.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Supported Literals</h2>
|
||||
* <ul>
|
||||
* <li>String literals</li>
|
||||
* <li>Array literals (e.g., {@code [1, 2, 3]}), including nested arrays</li>
|
||||
* <li><b>String Literals:</b> Quoted strings (e.g., {@code "hello\nworld"}) with escape sequence support.</li>
|
||||
* <li><b>Array Literals:</b> Bracketed array forms (e.g., {@code [1, 2, [3, 4]]}), including nested arrays.</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* For array literals, a nested list structure is constructed. In this implementation,
|
||||
* array literals are pushed as <b>mutable</b> {@link java.util.ArrayList} structures,
|
||||
* so that subsequent system calls such as {@code ARR_SET} can modify elements in-place.
|
||||
* </p>
|
||||
* <h2>Implementation Details</h2>
|
||||
* <ul>
|
||||
* <li>Array literals are parsed into <b>mutable</b> {@link java.util.ArrayList} objects, to support in-place modification (e.g., by {@code ARR_SET}).</li>
|
||||
* <li>String literals wrapped in quotes are automatically unescaped according to Java string escape rules.</li>
|
||||
* <li>Handles atomic values: numbers (including hex, binary, float, long, short, byte), booleans, and fallback to string.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Example Usage</h2>
|
||||
* <pre>
|
||||
* R_PUSH "hello\nworld" // pushes String "hello\nworld" (with actual newline)
|
||||
* R_PUSH [1, 2, 3] // pushes ArrayList {1, 2, 3}
|
||||
* R_PUSH [1, [2, 3], 4] // pushes nested arrays as mutable lists
|
||||
* </pre>
|
||||
*
|
||||
* @author (your name or org)
|
||||
* @since 1.0
|
||||
*/
|
||||
public class RPushCommand implements Command {
|
||||
|
||||
/**
|
||||
* Executes the R_PUSH command.
|
||||
* Executes the {@code R_PUSH} instruction. Parses the given literal parameter and pushes it onto the operand stack.
|
||||
* <p>
|
||||
* Handles:
|
||||
* <ul>
|
||||
* <li>Array literals (e.g., {@code [1, 2, "a"]}), parsed recursively as mutable ArrayLists</li>
|
||||
* <li>Quoted string literals (e.g., {@code "abc\n"}), parsed with escape sequence support</li>
|
||||
* <li>Unquoted raw strings, numbers, and atoms</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param parts The parts of the instruction, where {@code parts[1..n]} are concatenated as the literal.
|
||||
* @param pc The current program counter.
|
||||
* @param stack The operand stack where the result will be pushed.
|
||||
* @param local The local variable store (unused in this instruction).
|
||||
* @param callStack The call stack (unused in this instruction).
|
||||
* @return The new program counter (typically {@code pc+1}).
|
||||
* @throws IllegalStateException if no literal parameter is provided.
|
||||
* @param parts The instruction split into parts (opcode and arguments)
|
||||
* @param pc The current program counter
|
||||
* @param stack The operand stack to push the value onto
|
||||
* @param local The local variable store (unused)
|
||||
* @param callStack The call stack (unused)
|
||||
* @return The next program counter (pc + 1)
|
||||
* @throws IllegalStateException if the R_PUSH parameter is missing or parsing fails
|
||||
*/
|
||||
@Override
|
||||
public int execute(String[] parts, int pc, OperandStack stack, LocalVariableStore local, CallStack callStack) {
|
||||
@ -54,71 +72,64 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
String literal = sb.toString().trim();
|
||||
|
||||
// Check if this is an array literal
|
||||
// Handle array literal
|
||||
if (literal.startsWith("[") && literal.endsWith("]")) {
|
||||
Object parsed = parseValue(new Cursor(literal));
|
||||
if (!(parsed instanceof List<?> list)) {
|
||||
// Should not happen in theory; safety fallback
|
||||
stack.push(parsed);
|
||||
} else {
|
||||
// Push a deep-mutable copy so ARR_SET can modify elements in-place
|
||||
stack.push(deepMutable(list));
|
||||
}
|
||||
} else {
|
||||
// Regular string, push as-is
|
||||
}
|
||||
// String literal with quotes and escapes
|
||||
else if (literal.length() >= 2 && literal.startsWith("\"") && literal.endsWith("\"")) {
|
||||
String decoded = parseQuoted(new Cursor(literal));
|
||||
stack.push(decoded);
|
||||
}
|
||||
// Raw atom or string
|
||||
else {
|
||||
stack.push(literal);
|
||||
}
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple string cursor, supporting index increment and character reading, for use by the parser.
|
||||
* Utility class for string parsing, used by the array and string literal parsers.
|
||||
*/
|
||||
static class Cursor {
|
||||
final String s;
|
||||
int i;
|
||||
|
||||
/**
|
||||
* Constructs a new {@code Cursor} for the given string.
|
||||
*
|
||||
* @param s The string to parse.
|
||||
* Constructs a cursor over the provided string.
|
||||
* @param s the input string to parse
|
||||
*/
|
||||
Cursor(String s) {
|
||||
this.s = s;
|
||||
this.i = 0;
|
||||
}
|
||||
Cursor(String s) { this.s = s; this.i = 0; }
|
||||
|
||||
/**
|
||||
* Advances the cursor by one character.
|
||||
*/
|
||||
void skip() {
|
||||
i++;
|
||||
}
|
||||
void skip() { i++; }
|
||||
|
||||
/**
|
||||
* @return {@code true} if the cursor has reached the end of the string.
|
||||
* Returns true if the cursor has reached the end of the string.
|
||||
* @return true if end of string
|
||||
*/
|
||||
boolean end() {
|
||||
return i >= s.length();
|
||||
}
|
||||
boolean end() { return i >= s.length(); }
|
||||
|
||||
/**
|
||||
* Gets the character at the current cursor position.
|
||||
*
|
||||
* @return current character
|
||||
* @throws StringIndexOutOfBoundsException if at end of string
|
||||
* Returns the current character at the cursor position.
|
||||
* @return the current character
|
||||
*/
|
||||
char ch() {
|
||||
return s.charAt(i);
|
||||
}
|
||||
char ch() { return s.charAt(i); }
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a value from the input string at the current cursor position.
|
||||
* This can be an array literal, a quoted string, or a simple atom (number, word).
|
||||
* Parses a value from the current cursor position.
|
||||
* Supports arrays, quoted strings, or atoms.
|
||||
*
|
||||
* @param c The cursor for parsing.
|
||||
* @return The parsed value (could be List, String, Number).
|
||||
* @param c the parsing cursor
|
||||
* @return the parsed object (List, String, Number, Boolean, or String fallback)
|
||||
*/
|
||||
Object parseValue(Cursor c) {
|
||||
skipWs(c);
|
||||
@ -130,9 +141,8 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips whitespace characters in the input string.
|
||||
*
|
||||
* @param c The cursor to advance.
|
||||
* Skips whitespace characters at the cursor.
|
||||
* @param c the parsing cursor
|
||||
*/
|
||||
private static void skipWs(Cursor c) {
|
||||
while (!c.end()) {
|
||||
@ -143,13 +153,13 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an array literal from the input, including nested arrays.
|
||||
* Parses an array literal of the form [elem1, elem2, ...] (may be nested).
|
||||
* Recursively parses elements using {@link #parseValue(Cursor)}.
|
||||
*
|
||||
* @param c The cursor (positioned at '[' at entry).
|
||||
* @return A List representing the parsed array.
|
||||
* @param c the parsing cursor
|
||||
* @return a List of parsed elements
|
||||
*/
|
||||
private Object parseArray(Cursor c) {
|
||||
// assumes current char is '['
|
||||
c.skip(); // skip '['
|
||||
List<Object> out = new ArrayList<>();
|
||||
skipWs(c);
|
||||
@ -170,13 +180,12 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a quoted string literal, handling escape characters.
|
||||
* Parses a quoted string, handling standard Java escape sequences (e.g. \n, \t, uXXXX).
|
||||
*
|
||||
* @param c The cursor (positioned at '"' at entry).
|
||||
* @return The parsed string value.
|
||||
* @param c the parsing cursor
|
||||
* @return the decoded string
|
||||
*/
|
||||
private static String parseQuoted(Cursor c) {
|
||||
// assumes current char is '"'
|
||||
c.skip(); // skip opening quote
|
||||
StringBuilder sb = new StringBuilder();
|
||||
while (!c.end()) {
|
||||
@ -190,8 +199,25 @@ public class RPushCommand implements Command {
|
||||
case 'n' -> sb.append('\n');
|
||||
case 'r' -> sb.append('\r');
|
||||
case 't' -> sb.append('\t');
|
||||
case 'f' -> sb.append('\f');
|
||||
case 'b' -> sb.append('\b');
|
||||
case '\"' -> sb.append('\"');
|
||||
case '\'' -> sb.append('\'');
|
||||
case '\\' -> sb.append('\\');
|
||||
case 'u' -> { // Unicode escape: uXXXX
|
||||
StringBuilder uni = new StringBuilder();
|
||||
for (int k = 0; k < 4 && !c.end(); ++k) {
|
||||
uni.append(c.ch());
|
||||
c.skip();
|
||||
}
|
||||
try {
|
||||
int code = Integer.parseInt(uni.toString(), 16);
|
||||
sb.append((char) code);
|
||||
} catch (Exception e) {
|
||||
// Invalid unicode, append as is
|
||||
sb.append("\\u").append(uni);
|
||||
}
|
||||
}
|
||||
default -> sb.append(esc);
|
||||
}
|
||||
} else if (ch == '\"') {
|
||||
@ -204,10 +230,10 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an atom (number, hexadecimal, binary, or plain string token).
|
||||
* Parses an atomic value (number, boolean, or fallback string) from the cursor.
|
||||
*
|
||||
* @param c The cursor.
|
||||
* @return An Integer, Double, or String, depending on the content.
|
||||
* @param c the parsing cursor
|
||||
* @return the parsed object (Integer, Double, Float, Long, Boolean, or String)
|
||||
*/
|
||||
private static Object parseAtom(Cursor c) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
@ -218,7 +244,7 @@ public class RPushCommand implements Command {
|
||||
c.skip();
|
||||
}
|
||||
String token = sb.toString();
|
||||
// try number
|
||||
// Try number parsing with various notations and types
|
||||
try {
|
||||
if (token.startsWith("0x") || token.startsWith("0X")) {
|
||||
return Integer.parseInt(token.substring(2), 16);
|
||||
@ -226,6 +252,20 @@ public class RPushCommand implements Command {
|
||||
if (token.startsWith("0b") || token.startsWith("0B")) {
|
||||
return Integer.parseInt(token.substring(2), 2);
|
||||
}
|
||||
if (token.endsWith("f")) {
|
||||
return Float.parseFloat(token.substring(0, token.length() - 1));
|
||||
}
|
||||
if (token.endsWith("L")) {
|
||||
return Long.parseLong(token.substring(0, token.length() - 1));
|
||||
}
|
||||
if (token.endsWith("s")) {
|
||||
return Short.parseShort(token.substring(0, token.length() - 1));
|
||||
}
|
||||
if (token.endsWith("b")) {
|
||||
return Byte.parseByte(token.substring(0, token.length() - 1));
|
||||
}
|
||||
if (token.equals("1")) return true;
|
||||
if (token.equals("0")) return false;
|
||||
if (token.contains(".")) {
|
||||
return Double.parseDouble(token);
|
||||
}
|
||||
@ -236,13 +276,11 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------- helpers for immutability/mutability ----------------------
|
||||
|
||||
/**
|
||||
* Recursively creates an unmodifiable copy of a list, with all nested lists also unmodifiable.
|
||||
* Creates a deeply unmodifiable version of the provided list (and its nested lists).
|
||||
*
|
||||
* @param l The list to make unmodifiable.
|
||||
* @return An unmodifiable deep copy of the list.
|
||||
* @param l the original list
|
||||
* @return an unmodifiable view of the list and all nested lists
|
||||
*/
|
||||
List<?> deepUnmodifiable(List<?> l) {
|
||||
List<Object> out = new ArrayList<>(l.size());
|
||||
@ -251,10 +289,10 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for {@link #deepUnmodifiable(List)}. Recursively processes each element.
|
||||
* Helper for {@link #deepUnmodifiable(List)}; handles nested lists recursively.
|
||||
*
|
||||
* @param v The object to process.
|
||||
* @return Unmodifiable list if input is a list, otherwise the value itself.
|
||||
* @param v the object to process
|
||||
* @return an unmodifiable list if input is a list; otherwise, the object itself
|
||||
*/
|
||||
Object deepUnmodifiableObject(Object v) {
|
||||
if (v instanceof List<?> l) {
|
||||
@ -264,11 +302,10 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a deep mutable copy of a nested List structure, preserving element values.
|
||||
* Nested lists are turned into {@link java.util.ArrayList} so they can be modified by ARR_SET.
|
||||
* Creates a deeply mutable version of the provided list (and its nested lists).
|
||||
*
|
||||
* @param l The source list.
|
||||
* @return Deep mutable copy of the list.
|
||||
* @param l the original list
|
||||
* @return a new mutable list (ArrayList), with all nested lists mutable
|
||||
*/
|
||||
private static java.util.List<?> deepMutable(java.util.List<?> l) {
|
||||
java.util.List<Object> out = new java.util.ArrayList<>(l.size());
|
||||
@ -277,10 +314,10 @@ public class RPushCommand implements Command {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for {@link #deepMutable(List)}. Recursively processes each element.
|
||||
* Helper for {@link #deepMutable(List)}; handles nested lists recursively.
|
||||
*
|
||||
* @param v The object to process.
|
||||
* @return Mutable list if input is a list, otherwise the value itself.
|
||||
* @param v the object to process
|
||||
* @return a mutable list if input is a list; otherwise, the object itself
|
||||
*/
|
||||
private static Object deepMutableObject(Object v) {
|
||||
if (v instanceof java.util.List<?> l) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user