diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index d14e585b8..619db8258 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -68,6 +68,8 @@ public class BytecodeCompiler implements Visitor { // Token index tracking for error reporting private final TreeMap pcToTokenIndex = new TreeMap<>(); int currentTokenIndex = -1; // Track current token for error reporting + // Callsite ID counter for /o modifier support (unique across all compilations) + private static int nextCallsiteId = 1; // Track last result register for expression chaining int lastResultReg = -1; // Target output register for ALIAS elimination (same save/restore pattern as currentCallContext). @@ -3605,6 +3607,14 @@ int allocateRegister() { return reg; } + /** + * Allocate a unique callsite ID for /o modifier support. + * Each callsite with /o gets a unique ID so the pattern is compiled only once per callsite. + */ + int allocateCallsiteId() { + return nextCallsiteId++; + } + int allocateOutputRegister() { if (targetOutputReg >= 0) { int reg = targetOutputReg; diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index dca2ce741..512307bd9 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -540,7 +540,7 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c // ================================================================= case Opcodes.DEFINED, Opcodes.REF, Opcodes.BLESS, Opcodes.ISA, Opcodes.PROTOTYPE, - Opcodes.QUOTE_REGEX -> { + Opcodes.QUOTE_REGEX, Opcodes.QUOTE_REGEX_O -> { pc = executeTypeOps(opcode, bytecode, pc, registers, code); } @@ -1858,6 +1858,14 @@ private static int executeTypeOps(int opcode, int[] bytecode, int pc, registers[rd] = RuntimeRegex.getQuotedRegex(registers[patternReg].scalar(), registers[flagsReg].scalar()); return pc; } + case Opcodes.QUOTE_REGEX_O -> { + int rd = bytecode[pc++]; + int patternReg = bytecode[pc++]; + int flagsReg = bytecode[pc++]; + int callsiteId = bytecode[pc++]; + registers[rd] = RuntimeRegex.getQuotedRegex(registers[patternReg].scalar(), registers[flagsReg].scalar(), callsiteId); + return pc; + } default -> throw new RuntimeException("Unknown type opcode: " + opcode); } } diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java index 395f99dff..1107fc742 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java @@ -297,21 +297,39 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode bytecodeCompiler.throwCompilerException("quoteRegex requires pattern and flags"); } + // Check if /o modifier is used (flags are typically a StringNode) + boolean hasOModifier = false; + Node flagsNode = operand.elements.get(1); + if (flagsNode instanceof StringNode) { + hasOModifier = ((StringNode) flagsNode).value.contains("o"); + } + // Compile pattern and flags operand.elements.get(0).accept(bytecodeCompiler); // Pattern int patternReg = bytecodeCompiler.lastResultReg; - operand.elements.get(1).accept(bytecodeCompiler); // Flags + flagsNode.accept(bytecodeCompiler); // Flags int flagsReg = bytecodeCompiler.lastResultReg; // Allocate result register int rd = bytecodeCompiler.allocateOutputRegister(); - // Emit QUOTE_REGEX opcode - bytecodeCompiler.emit(Opcodes.QUOTE_REGEX); - bytecodeCompiler.emitReg(rd); - bytecodeCompiler.emitReg(patternReg); - bytecodeCompiler.emitReg(flagsReg); + // Emit appropriate opcode based on /o modifier + if (hasOModifier) { + // Use QUOTE_REGEX_O with callsite ID for /o modifier + int callsiteId = bytecodeCompiler.allocateCallsiteId(); + bytecodeCompiler.emit(Opcodes.QUOTE_REGEX_O); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.emitReg(patternReg); + bytecodeCompiler.emitReg(flagsReg); + bytecodeCompiler.emitReg(callsiteId); + } else { + // Normal QUOTE_REGEX + bytecodeCompiler.emit(Opcodes.QUOTE_REGEX); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.emitReg(patternReg); + bytecodeCompiler.emitReg(flagsReg); + } bytecodeCompiler.lastResultReg = rd; } else if (op.equals("++") || op.equals("--") || op.equals("++postfix") || op.equals("--postfix")) { @@ -1995,20 +2013,37 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode bytecodeCompiler.throwCompilerException("matchRegex requires pattern and flags"); } + // Check if /o modifier is used (flags are typically a StringNode) + boolean hasOModifier = false; + Node flagsNode = args.elements.get(1); + if (flagsNode instanceof StringNode) { + hasOModifier = ((StringNode) flagsNode).value.contains("o"); + } + // Compile pattern args.elements.get(0).accept(bytecodeCompiler); int patternReg = bytecodeCompiler.lastResultReg; // Compile flags - args.elements.get(1).accept(bytecodeCompiler); + flagsNode.accept(bytecodeCompiler); int flagsReg = bytecodeCompiler.lastResultReg; - // Create quoted regex using QUOTE_REGEX opcode + // Create quoted regex using appropriate opcode int regexReg = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emit(Opcodes.QUOTE_REGEX); - bytecodeCompiler.emitReg(regexReg); - bytecodeCompiler.emitReg(patternReg); - bytecodeCompiler.emitReg(flagsReg); + if (hasOModifier) { + // Use QUOTE_REGEX_O with callsite ID for /o modifier + int callsiteId = bytecodeCompiler.allocateCallsiteId(); + bytecodeCompiler.emit(Opcodes.QUOTE_REGEX_O); + bytecodeCompiler.emitReg(regexReg); + bytecodeCompiler.emitReg(patternReg); + bytecodeCompiler.emitReg(flagsReg); + bytecodeCompiler.emitReg(callsiteId); + } else { + bytecodeCompiler.emit(Opcodes.QUOTE_REGEX); + bytecodeCompiler.emitReg(regexReg); + bytecodeCompiler.emitReg(patternReg); + bytecodeCompiler.emitReg(flagsReg); + } // Check if a string was provided (from =~ binding) if (args.elements.size() > 2) { diff --git a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java index 1447d1a75..689030bd8 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java +++ b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java @@ -1273,6 +1273,14 @@ public String disassemble() { sb.append("QUOTE_REGEX r").append(rd).append(" = qr{r").append(patternReg) .append("}r").append(flagsReg).append("\n"); break; + case Opcodes.QUOTE_REGEX_O: + rd = bytecode[pc++]; + patternReg = bytecode[pc++]; + flagsReg = bytecode[pc++]; + int callsiteId = bytecode[pc++]; + sb.append("QUOTE_REGEX_O r").append(rd).append(" = qr{r").append(patternReg) + .append("}r").append(flagsReg).append(" callsite=").append(callsiteId).append("\n"); + break; case Opcodes.ITERATOR_CREATE: rd = bytecode[pc++]; rs = bytecode[pc++]; diff --git a/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java b/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java index eae31419c..ae7607844 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java +++ b/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java @@ -267,11 +267,14 @@ public static int executeStringConcatAssign(int[] bytecode, int pc, RuntimeBase[ if (BytecodeInterpreter.isImmutableProxy(registers[rd])) { registers[rd] = BytecodeInterpreter.ensureMutableScalar(registers[rd]); } + RuntimeScalar target = (RuntimeScalar) registers[rd]; RuntimeScalar result = StringOperators.stringConcat( - (RuntimeScalar) registers[rd], + target, (RuntimeScalar) registers[rs] ); - ((RuntimeScalar) registers[rd]).set(result); + target.set(result); + // Invalidate pos() - any string modification should reset pos to undef + RuntimePosLvalue.invalidatePos(target); return pc; } diff --git a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java index 40e468685..073ef588f 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java @@ -1816,6 +1816,12 @@ public class Opcodes { */ public static final short TIMES = 373; + /** + * Quote regex with /o modifier support: rd = RuntimeRegex.getQuotedRegex(pattern_reg, flags_reg, callsite_id) + * Format: QUOTE_REGEX_O rd pattern_reg flags_reg callsite_id + */ + public static final short QUOTE_REGEX_O = 374; + private Opcodes() { } // Utility class - no instantiation } diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitBinaryOperator.java b/src/main/java/org/perlonjava/backend/jvm/EmitBinaryOperator.java index 978b8755a..563b5c5bf 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitBinaryOperator.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitBinaryOperator.java @@ -291,6 +291,17 @@ static void handleCompoundAssignment(EmitterVisitor emitterVisitor, BinaryOperat } // assign to the Lvalue mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, "org/perlonjava/runtime/runtimetypes/RuntimeScalar", "set", "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); + + // For string concat assign (.=), invalidate pos() since string was modified + if (node.operator.equals(".=")) { + mv.visitInsn(Opcodes.DUP); + mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/RuntimePosLvalue", + "invalidatePos", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)V", + false); + } + EmitOperator.handleVoidContext(emitterVisitor); } } diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitRegex.java b/src/main/java/org/perlonjava/backend/jvm/EmitRegex.java index c17e59bdc..9c386521e 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitRegex.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitRegex.java @@ -16,6 +16,8 @@ * transliteration and replacement. */ public class EmitRegex { + // Callsite ID counter for /o modifier support (unique across all JVM compilations) + private static int nextCallsiteId = 100000; // Start at 100000 to avoid collision with interpreter IDs /** * Handles the binding regex operation where a variable is bound to a regex operation. @@ -247,14 +249,29 @@ static void handleMatchRegex(EmitterVisitor emitterVisitor, OperatorNode node) { ListNode operand = (ListNode) node.operand; EmitterVisitor scalarVisitor = emitterVisitor.with(RuntimeContextType.SCALAR); + // Check if /o modifier is present + boolean hasOModifier = false; + Node flagsNode = operand.elements.get(1); + if (flagsNode instanceof StringNode) { + hasOModifier = ((StringNode) flagsNode).value.contains("o"); + } + // Process pattern and flags operand.elements.get(0).accept(scalarVisitor); // Pattern - operand.elements.get(1).accept(scalarVisitor); // Flags - - // Create the regex matcher - emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, - "org/perlonjava/runtime/regex/RuntimeRegex", "getQuotedRegex", - "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); + flagsNode.accept(scalarVisitor); // Flags + + // Create the regex matcher (use 3-argument version for /o) + if (hasOModifier) { + int callsiteId = nextCallsiteId++; + emitterVisitor.ctx.mv.visitLdcInsn(callsiteId); + emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/regex/RuntimeRegex", "getQuotedRegex", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;I)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); + } else { + emitterVisitor.ctx.mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/regex/RuntimeRegex", "getQuotedRegex", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); + } int regexSlot = emitterVisitor.ctx.javaClassInfo.acquireSpillSlot(); boolean pooledRegex = regexSlot >= 0; diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index 01d23b18f..0e9188b25 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -42,6 +42,8 @@ protected boolean removeEldestEntry(Map.Entry eldest) { return size() > MAX_REGEX_CACHE_SIZE; } }; + // Cache for /o modifier - maps callsite ID to compiled regex (only first compilation is used) + private static final Map optimizedRegexCache = new LinkedHashMap<>(); // Global matcher used for regex operations public static Matcher globalMatcher; // Provides Perl regex variables like %+, %- public static String globalMatchString; // Provides Perl regex variables like $& @@ -314,6 +316,37 @@ public static RuntimeScalar getQuotedRegex(RuntimeScalar patternString, RuntimeS return new RuntimeScalar(compile(patternString.toString(), modifierStr)); } + /** + * Variant of getQuotedRegex that supports the /o modifier. + * When callsiteId is provided and modifiers contain 'o', the regex is compiled only once + * and cached for subsequent calls from the same callsite. + * + * @param patternString The regex pattern string. + * @param modifiers Modifiers for the regex pattern (may include 'o'). + * @param callsiteId Unique identifier for this callsite (used for /o caching). + * @return A RuntimeScalar representing the compiled regex. + */ + public static RuntimeScalar getQuotedRegex(RuntimeScalar patternString, RuntimeScalar modifiers, int callsiteId) { + String modifierStr = modifiers.toString(); + + // Check if /o modifier is present + if (modifierStr.contains("o")) { + // Check if we already have a cached regex for this callsite + RuntimeScalar cached = optimizedRegexCache.get(callsiteId); + if (cached != null) { + return cached; + } + + // Compile the regex and cache it + RuntimeScalar result = getQuotedRegex(patternString, modifiers); + optimizedRegexCache.put(callsiteId, result); + return result; + } + + // No /o modifier, use normal compilation + return getQuotedRegex(patternString, modifiers); + } + /** * Internal variant of qr// that includes a `replacement`. * This is the internal representation of the `s///` operation. @@ -409,6 +442,33 @@ public static RuntimeBase matchRegex(RuntimeScalar quotedRegex, RuntimeScalar st private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeScalar string, int ctx) { RuntimeRegex regex = resolveRegex(quotedRegex); regex = ensureCompiledForRuntime(regex); + + // Save original flags before potentially changing regex + RegexFlags originalFlags = regex.regexFlags; + + // Handle empty pattern - reuse last successful pattern or use empty pattern + if (regex.patternString == null || regex.patternString.isEmpty()) { + if (lastSuccessfulPattern != null) { + // Use the pattern from last successful match + // But keep the current flags (especially /g and /i) + Pattern pattern = lastSuccessfulPattern.pattern; + // Re-apply current flags if they differ + if (originalFlags != null && !originalFlags.equals(lastSuccessfulPattern.regexFlags)) { + // Need to recompile with current flags + int newFlags = originalFlags.toPatternFlags(); + pattern = Pattern.compile(lastSuccessfulPattern.patternString, newFlags); + } + // Create a temporary regex with the right pattern and current flags + RuntimeRegex tempRegex = new RuntimeRegex(); + tempRegex.pattern = pattern; + tempRegex.patternString = lastSuccessfulPattern.patternString; + tempRegex.hasPreservesMatch = lastSuccessfulPattern.hasPreservesMatch || (originalFlags != null && originalFlags.preservesMatch()); + tempRegex.regexFlags = originalFlags; + tempRegex.useGAssertion = originalFlags != null && originalFlags.useGAssertion(); + regex = tempRegex; + } + // If no previous pattern, the empty pattern matches empty string at start (default behavior) + } // Debug logging if (DEBUG_REGEX) { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimePosLvalue.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimePosLvalue.java index 8e6fb0807..9ac8697b9 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimePosLvalue.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimePosLvalue.java @@ -59,6 +59,21 @@ public static RuntimeScalar pos(RuntimeScalar perlVariable) { return position; } + /** + * Invalidate the pos() for a scalar when its string value is modified. + * This should be called on any string modification operation (.=, substr assignment, etc.) + * to ensure pos() returns undef after the modification. + * + * @param perlVariable the scalar whose pos should be invalidated + */ + public static void invalidatePos(RuntimeScalar perlVariable) { + if (perlVariable == null) { + return; + } + // Remove the cache entry entirely so pos() returns undef + positionCache.remove(perlVariable); + } + private static void clearZeroLengthMatchTracking(RuntimeScalar perlVariable) { CacheEntry cachedEntry = positionCache.get(perlVariable); if (cachedEntry != null) {