Perl5Matcher.java

Index Score
org.apache.oro.text.regex
Jakarta ORO

View: Reasons, Metrics, Source Code

These are the metrics that contribute to the Enerjy Score for this file, ranked by impact. So the metrics listed at the top influence the score to a greater extent that the metrics listed at the bottom.

MetricDescription
JAVA0034JAVA0034 Missing braces in if statement
CYCLOMATICCyclomatic complexity
COMPARISONSNumber of comparison operators
LOOPSNumber of loops
JAVA0039JAVA0039 Break statement with label
JAVA0177JAVA0177 Variable declaration missing initializer
ELOCEffective lines of code
LOGICAL_LINESNumber of statements
SIZESize of the file in bytes
LINESNumber of lines in the source file
LINE_COMMENTNumber of line comments
LOCLines of code
OPERATORSNumber of operators
PROGRAM_LENGTHHalstead program length
OPERANDSNumber of operands
JAVA0020JAVA0020 Field name does not have required form
DOC_COMMENTNumber of javadoc comment lines
COMMENTSComment lines
RETURNSNumber of return points from functions
JAVA0036JAVA0036 Missing braces in while statement
WHITESPACENumber of whitespace lines
INTERFACE_COMPLEXITYInterface complexity
JAVA0049JAVA0049 Nested block at depth N (maximum: M)
EXEC_COMMENTSComments in executable code
BLOCKSNumber of blocks
JAVA0031JAVA0031 Case statement not properly closed
JAVA0040JAVA0040 Switch statement contains N cases (maximum: M)
PARAMSNumber of formal parameter declarations
JAVA0018JAVA0018 Method name does not have required form
PROGRAM_VOCABHalstead program vocabulary
UNIQUE_OPERANDSNumber of unique operands
JAVA0174JAVA0174 Assigned local variable never used
JAVA0076JAVA0076 Use of magic number
JAVA0067JAVA0067 Array descriptor on identifier name
JAVA0035JAVA0035 Missing braces in for statement
JAVA0032JAVA0032 Switch statement missing default
UNIQUE_OPERATORSNumber of unique operators
NEST_DEPTHMaximum nesting depth
JAVA0256JAVA0256 Assignment of external collection/array to field
JAVA0117JAVA0117 Missing javadoc: method 'method'
JAVA0022JAVA0022 Static final field name does not have required form
PROGRAM_VOLUMEHalstead program volume
DECL_COMMENTSComments in declarations
JAVA0136JAVA0136 N methods defined in class (maximum: M)
JAVA0126JAVA0126 Method declares unchecked exception in throws
JAVA0110JAVA0110 Incorrect javadoc: no @return tag
JAVA0100JAVA0100 Class contains N non-final fields (maximum: M)
JAVA0108JAVA0108 Incorrect javadoc: no @param tag for 'parameter'
EXITSProcedure exits
JAVA0130JAVA0130 Non-static method does not use instance fields
JAVA0075JAVA0075 Method parameter hides field
/* * $Id: Perl5Matcher.java 124053 2005-01-04 01:24:35Z dfs $ * * Copyright 2000-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.oro.text.regex; import java.util.*; /** * The Perl5Matcher class is used to match regular expressions * (conforming to the Perl5 regular expression syntax) generated by * Perl5Compiler. * <p> * Perl5Compiler and Perl5Matcher are designed with the intent that * you use a separate instance of each per thread to avoid the overhead * of both synchronization and concurrent access (e.g., a match that takes * a long time in one thread will block the progress of another thread with * a shorter match). If you want to use a single instance of each * in a concurrent program, you must appropriately protect access to * the instances with critical sections. If you want to share Perl5Pattern * instances between concurrently executing instances of Perl5Matcher, you * must compile the patterns with {@link Perl5Compiler#READ_ONLY_MASK}. * * @version @version@ * @since 1.0 * @see PatternMatcher * @see Perl5Compiler */ public final class Perl5Matcher implements PatternMatcher { private static final char __EOS = Character.MAX_VALUE; private static final int __INITIAL_NUM_OFFSETS = 20; private boolean __multiline = false, __lastSuccess = false; private boolean __caseInsensitive = false; private char __previousChar, __input[], __originalInput[]; private Perl5Repetition __currentRep; private int __numParentheses, __bol, __eol, __currentOffset, __endOffset; private char[] __program; private int __expSize, __inputOffset, __lastParen; private int[] __beginMatchOffsets, __endMatchOffsets; private Stack __stack = new Stack(); private Perl5MatchResult __lastMatchResult = null; private static boolean __compare(char[] s1, int s1Offs, char[] s2, int s2Offs, int n) { int cnt; for(cnt = 0; cnt < n; cnt++, s1Offs++, s2Offs++) { if(s1Offs >= s1.length) return false; if(s2Offs >= s2.length) return false; if(s1[s1Offs] != s2[s2Offs]) return false; } return true; } private static int __findFirst(char[] input, int current, int endOffset, char[] mustString) { int count, saveCurrent; char ch; if(input.length == 0) return endOffset; ch = mustString[0]; // Find the offset of the first character of the must string while(current < endOffset) { if(ch == input[current]){ saveCurrent = current; count = 0; while(current < endOffset && count < mustString.length) { if(mustString[count] != input[current]) break; ++count; ++current; } current = saveCurrent; if(count >= mustString.length) break; } ++current; } return current; } private void __pushState(int parenFloor) { int[] state; int stateEntries, paren; stateEntries = 3*(__expSize - parenFloor); if(stateEntries <= 0) state = new int[3]; else state = new int[stateEntries + 3]; state[0] = __expSize; state[1] = __lastParen; state[2] = __inputOffset; for(paren = __expSize; paren > parenFloor; --paren, stateEntries-=3) { state[stateEntries] = __endMatchOffsets[paren]; state[stateEntries + 1] = __beginMatchOffsets[paren]; state[stateEntries + 2] = paren; } __stack.push(state); } private void __popState() { int[] state; int entry, paren; state = (int[])__stack.pop(); __expSize = state[0]; __lastParen = state[1]; __inputOffset = state[2]; for(entry = 3; entry < state.length; entry+=3) { paren = state[entry + 2]; __beginMatchOffsets[paren] = state[entry + 1]; if(paren <= __lastParen) __endMatchOffsets[paren] = state[entry]; } for(paren = __lastParen + 1; paren <= __numParentheses; paren++) { if(paren > __expSize) __beginMatchOffsets[paren] = OpCode._NULL_OFFSET; __endMatchOffsets[paren] = OpCode._NULL_OFFSET; } } // Initialize globals needed before calling __tryExpression for first time private void __initInterpreterGlobals(Perl5Pattern expression, char[] input, int beginOffset, int endOffset, int currentOffset) { // Remove this hack after more efficient case-folding and unicode // character classes are implemented __caseInsensitive = expression._isCaseInsensitive; __input = input; __endOffset = endOffset; __currentRep = new Perl5Repetition(); __currentRep._numInstances = 0; __currentRep._lastRepetition = null; __program = expression._program; __stack.setSize(0); // currentOffset should always be >= beginOffset and should // always be equal to zero when beginOffset equals 0, but we // make a weak attempt to protect against a violation of this // precondition if(currentOffset == beginOffset || currentOffset <= 0) __previousChar = '\n'; else { __previousChar = input[currentOffset - 1]; if(!__multiline && __previousChar == '\n') __previousChar = '\0'; } __numParentheses = expression._numParentheses; __currentOffset = currentOffset; __bol = beginOffset; __eol = endOffset; // Ok, here we're using endOffset as a temporary variable. endOffset = __numParentheses + 1; if(__beginMatchOffsets == null || endOffset > __beginMatchOffsets.length) { if(endOffset < __INITIAL_NUM_OFFSETS) endOffset = __INITIAL_NUM_OFFSETS; __beginMatchOffsets = new int[endOffset]; __endMatchOffsets = new int[endOffset]; } } // Set the match result information. Only call this if we successfully // matched. private void __setLastMatchResult() { int offs, maxEndOffs = 0; //endOffset+=dontTry; __lastMatchResult = new Perl5MatchResult(__numParentheses + 1); // This can happen when using Perl5StreamInput if(__endMatchOffsets[0] > __originalInput.length) throw new ArrayIndexOutOfBoundsException(); __lastMatchResult._matchBeginOffset_ = __beginMatchOffsets[0]; while(__numParentheses >= 0) { offs = __beginMatchOffsets[__numParentheses]; if(offs >= 0) __lastMatchResult._beginGroupOffset_[__numParentheses] = offs - __lastMatchResult._matchBeginOffset_; else __lastMatchResult._beginGroupOffset_[__numParentheses] = OpCode._NULL_OFFSET; offs = __endMatchOffsets[__numParentheses]; if(offs >= 0) { __lastMatchResult._endGroupOffset_[__numParentheses] = offs - __lastMatchResult._matchBeginOffset_; if(offs > maxEndOffs && offs <= __originalInput.length) maxEndOffs = offs; } else __lastMatchResult._endGroupOffset_[__numParentheses] = OpCode._NULL_OFFSET; --__numParentheses; } __lastMatchResult._match_ = new String(__originalInput, __beginMatchOffsets[0], maxEndOffs - __beginMatchOffsets[0]); // Free up for garbage collection __originalInput = null; } // Expects to receive a valid regular expression program. No checking // is done to ensure validity. // __originalInput must be set before calling this method for // __lastMatchResult to be set correctly. // beginOffset marks the beginning of the string // currentOffset marks where to start the pattern search private boolean __interpret(Perl5Pattern expression, char[] input, int beginOffset, int endOffset, int currentOffset) { boolean success; int minLength = 0, dontTry = 0, offset; char ch, mustString[]; __initInterpreterGlobals(expression, input, beginOffset, endOffset, currentOffset); success = false; mustString = expression._mustString; _mainLoop: while(true) { if(mustString != null && ((expression._anchor & Perl5Pattern._OPT_ANCH) == 0 || ((__multiline || (expression._anchor & Perl5Pattern._OPT_ANCH_MBOL) != 0) && expression._back >= 0))) { __currentOffset = __findFirst(__input, __currentOffset, endOffset, mustString); if(__currentOffset >= endOffset) { if((expression._options & Perl5Compiler.READ_ONLY_MASK) == 0) expression._mustUtility++; success = false; break _mainLoop; } else if(expression._back >= 0) { __currentOffset-=expression._back; if(__currentOffset < currentOffset) __currentOffset = currentOffset; minLength = expression._back + mustString.length; } else if(!expression._isExpensive && (expression._options & Perl5Compiler.READ_ONLY_MASK) == 0 && (--expression._mustUtility < 0)) { // Be careful! The preceding logical expression is constructed // so that mustUtility is only decremented if the expression is // compiled without READ_ONLY_MASK. mustString = expression._mustString = null; __currentOffset = currentOffset; } else { __currentOffset = currentOffset; minLength = mustString.length; } } if((expression._anchor & Perl5Pattern._OPT_ANCH) != 0) { if(__currentOffset == beginOffset && __tryExpression(beginOffset)) { success = true; break _mainLoop; } else if(__multiline || (expression._anchor & Perl5Pattern._OPT_ANCH_MBOL) != 0 || (expression._anchor & Perl5Pattern._OPT_IMPLICIT) != 0) { if(minLength > 0) dontTry = minLength - 1; endOffset-=dontTry; if(__currentOffset > currentOffset) --__currentOffset; while(__currentOffset < endOffset) { if(__input[__currentOffset++] == '\n') { if(__currentOffset < endOffset && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } } } } break _mainLoop; } if(expression._startString != null) { mustString = expression._startString; if((expression._anchor & Perl5Pattern._OPT_SKIP) != 0) { ch = mustString[0]; while(__currentOffset < endOffset) { if(ch == __input[__currentOffset]) { if(__tryExpression(__currentOffset)){ success = true; break _mainLoop; } ++__currentOffset; while(__currentOffset < endOffset && __input[__currentOffset] == ch) ++__currentOffset; } ++__currentOffset; } } else { while((__currentOffset = __findFirst(__input, __currentOffset, endOffset, mustString)) < endOffset){ if(__tryExpression(__currentOffset)) { success = true; break _mainLoop; } ++__currentOffset; } } break _mainLoop; } if((offset = expression._startClassOffset) != OpCode._NULL_OFFSET) { boolean doEvery, tmp; char op; doEvery = ((expression._anchor & Perl5Pattern._OPT_SKIP) == 0); if(minLength > 0) dontTry = minLength - 1; endOffset -= dontTry; tmp = true; switch(op = __program[offset]) { case OpCode._ANYOF: offset = OpCode._getOperand(offset); while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(ch < 256 && (__program[offset + (ch >> 4)] & (1 << (ch & 0xf))) == 0) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._ANYOFUN: case OpCode._NANYOFUN: offset = OpCode._getOperand(offset); while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(__matchUnicodeClass(ch, __program, offset, op)) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._BOUND: if(minLength > 0) { ++dontTry; --endOffset; } if(__currentOffset != beginOffset) { ch = __input[__currentOffset - 1]; tmp = OpCode._isWordCharacter(ch); } else tmp = OpCode._isWordCharacter(__previousChar); while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(tmp != OpCode._isWordCharacter(ch)){ tmp = !tmp; if(__tryExpression(__currentOffset)) { success = true; break _mainLoop; } } ++__currentOffset; } if((minLength > 0 || tmp) && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } break; case OpCode._NBOUND: if(minLength > 0) { ++dontTry; --endOffset; } if(__currentOffset != beginOffset) { ch = __input[__currentOffset - 1]; tmp = OpCode._isWordCharacter(ch); } else tmp = OpCode._isWordCharacter(__previousChar); while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(tmp != OpCode._isWordCharacter(ch)) tmp = !tmp; else if(__tryExpression(__currentOffset)) { success = true; break _mainLoop; } ++__currentOffset; } if((minLength > 0 || !tmp) && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } break; case OpCode._ALNUM: while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(OpCode._isWordCharacter(ch)) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._NALNUM: while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(!OpCode._isWordCharacter(ch)) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._SPACE: while(__currentOffset < endOffset) { if(Character.isWhitespace(__input[__currentOffset])) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._NSPACE: while(__currentOffset < endOffset) { if(!Character.isWhitespace(__input[__currentOffset])) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._DIGIT: while(__currentOffset < endOffset) { if(Character.isDigit(__input[__currentOffset])) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._NDIGIT: while(__currentOffset < endOffset) { if(!Character.isDigit(__input[__currentOffset])) { if(tmp && __tryExpression(__currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; } // end switch } else { if(minLength > 0) dontTry = minLength - 1; endOffset-=dontTry; do { if(__tryExpression(__currentOffset)) { success = true; break _mainLoop; } } while(__currentOffset++ < endOffset); } break _mainLoop; } // end while __lastSuccess = success; __lastMatchResult = null; return success; } private boolean __matchUnicodeClass(char code, char __program[], int offset ,char opcode) { boolean isANYOF = ( opcode == OpCode._ANYOFUN ); while( __program[offset] != OpCode._END ){ if( __program[offset] == OpCode._RANGE ){ offset++; if((code >= __program[offset]) && (code <= __program[offset+1])){ return isANYOF; } else { offset+=2; } } else if(__program[offset] == OpCode._ONECHAR) { offset++; if(__program[offset++] == code) return isANYOF; } else { isANYOF = (__program[offset] == OpCode._OPCODE) ? isANYOF : !isANYOF; offset++; switch ( __program[offset++] ) { case OpCode._ALNUM: if(OpCode._isWordCharacter(code)) return isANYOF; break; case OpCode._NALNUM: if(!OpCode._isWordCharacter(code)) return isANYOF; break; case OpCode._SPACE: if(Character.isWhitespace(code)) return isANYOF; break; case OpCode._NSPACE: if(!Character.isWhitespace(code)) return isANYOF; break; case OpCode._DIGIT: if(Character.isDigit(code)) return isANYOF; break; case OpCode._NDIGIT: if(!Character.isDigit(code)) return isANYOF; break; case OpCode._ALNUMC: if(Character.isLetterOrDigit(code)) return isANYOF; break; case OpCode._ALPHA: if(Character.isLetter(code)) return isANYOF; break; case OpCode._BLANK: if(Character.isSpaceChar(code)) return isANYOF; break; case OpCode._CNTRL: if(Character.isISOControl(code)) return isANYOF; break; case OpCode._LOWER: if(Character.isLowerCase(code)) return isANYOF; // Remove this hack after more efficient case-folding and unicode // character classes are implemented if(__caseInsensitive && Character.isUpperCase(code)) return isANYOF; break; case OpCode._UPPER: if(Character.isUpperCase(code)) return isANYOF; // Remove this hack after more efficient case-folding and unicode // character classes are implemented if(__caseInsensitive && Character.isLowerCase(code)) return isANYOF; break; case OpCode._PRINT: if(Character.isSpaceChar(code)) return isANYOF; // Fall through to check if the character is alphanumeric, // or a punctuation mark. Printable characters are either // alphanumeric, punctuation marks, or spaces. case OpCode._GRAPH: if(Character.isLetterOrDigit(code)) return isANYOF; // Fall through to check if the character is a punctuation mark. // Graph characters are either alphanumeric or punctuation. case OpCode._PUNCT: switch ( Character.getType(code) ) { case Character.DASH_PUNCTUATION: case Character.START_PUNCTUATION: case Character.END_PUNCTUATION: case Character.CONNECTOR_PUNCTUATION: case Character.OTHER_PUNCTUATION: case Character.MATH_SYMBOL: case Character.CURRENCY_SYMBOL: case Character.MODIFIER_SYMBOL: return isANYOF; default: break; } break; case OpCode._XDIGIT: if( (code >= '0' && code <= '9') || (code >= 'a' && code <= 'f') || (code >= 'A' && code <= 'F')) return isANYOF; break; case OpCode._ASCII: if(code < 0x80)return isANYOF; } } } return !isANYOF; } private boolean __tryExpression(int offset) { int count; __inputOffset = offset; __lastParen = 0; __expSize = 0; if(__numParentheses > 0) { for(count=0; count <= __numParentheses; count++) { __beginMatchOffsets[count] = OpCode._NULL_OFFSET; __endMatchOffsets[count] = OpCode._NULL_OFFSET; } } if(__match(1)){ __beginMatchOffsets[0] = offset; __endMatchOffsets[0] = __inputOffset; return true; } return false; } private int __repeat(int offset, int max) { int scan, eol, operand, ret; char ch; char op; scan = __inputOffset; eol = __eol; if(max != Character.MAX_VALUE && max < eol - scan) eol = scan + max; operand = OpCode._getOperand(offset); switch(op = __program[offset]) { case OpCode._ANY: while(scan < eol && __input[scan] != '\n') ++scan; break; case OpCode._SANY: scan = eol; break; case OpCode._EXACTLY: ++operand; while(scan < eol && __program[operand] == __input[scan]) ++scan; break; case OpCode._ANYOF: if(scan < eol && (ch = __input[scan]) < 256) { while((ch < 256 ) && (__program[operand + (ch >> 4)] & (1 << (ch & 0xf))) == 0) { if(++scan < eol) ch = __input[scan]; else break; } } break; case OpCode._ANYOFUN: case OpCode._NANYOFUN: if(scan < eol) { ch = __input[scan]; while(__matchUnicodeClass(ch, __program, operand, op)){ if(++scan < eol) ch = __input[scan]; else break; } } break; case OpCode._ALNUM: while(scan < eol && OpCode._isWordCharacter(__input[scan])) ++scan; break; case OpCode._NALNUM: while(scan < eol && !OpCode._isWordCharacter(__input[scan])) ++scan; break; case OpCode._SPACE: while(scan < eol && Character.isWhitespace(__input[scan])) ++scan; break; case OpCode._NSPACE: while(scan < eol && !Character.isWhitespace(__input[scan])) ++scan; break; case OpCode._DIGIT: while(scan < eol && Character.isDigit(__input[scan])) ++scan; break; case OpCode._NDIGIT: while(scan < eol && !Character.isDigit(__input[scan])) ++scan; break; default: break; } ret = scan - __inputOffset; __inputOffset = scan; return ret; } private boolean __match(int offset) { char nextChar, op; int scan, next, input, maxScan, current, line, arg; boolean inputRemains = true, minMod = false; Perl5Repetition rep; input = __inputOffset; inputRemains = (input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); scan = offset; maxScan = __program.length; while(scan < maxScan /*&& scan > 0*/){ next = OpCode._getNext(__program, scan); switch(op = __program[scan]) { case OpCode._BOL: if(input == __bol ? __previousChar == '\n' : (__multiline && (inputRemains || input < __eol) && __input[input - 1] == '\n')) break; return false; case OpCode._MBOL: if(input == __bol ? __previousChar == '\n' : ((inputRemains || input < __eol) && __input[input - 1] == '\n')) break; return false; case OpCode._SBOL: if(input == __bol && __previousChar == '\n') break; return false; case OpCode._GBOL: if(input == __bol) break; return true; case OpCode._EOL : if((inputRemains || input < __eol) && nextChar != '\n') return false; if(!__multiline && __eol - input > 1) return false; break; case OpCode._MEOL: if((inputRemains || input < __eol) && nextChar != '\n') return false; break; case OpCode._SEOL: if((inputRemains || input < __eol) && nextChar != '\n') return false; if(__eol - input > 1) return false; break; case OpCode._SANY: if(!inputRemains && input >= __eol) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._ANY: if((!inputRemains && input >= __eol) || nextChar == '\n') return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._EXACTLY: current = OpCode._getOperand(scan); line = __program[current++]; if(__program[current] != nextChar) return false; if(__eol - input < line) return false; if(line > 1 && !__compare(__program, current, __input, input, line)) return false; input+=line; inputRemains = (input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._ANYOF: current = OpCode._getOperand(scan); if(nextChar == __EOS && inputRemains) nextChar = __input[input]; if(nextChar >= 256 || (__program[current + (nextChar >> 4)] & (1 << (nextChar & 0xf))) != 0) return false; if(!inputRemains && input >= __eol) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._ANYOFUN: case OpCode._NANYOFUN: current = OpCode._getOperand(scan); if(nextChar == __EOS && inputRemains) nextChar = __input[input]; if(!__matchUnicodeClass(nextChar, __program, current, op)) return false; if(!inputRemains && input >= __eol) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._ALNUM: if(!inputRemains) return false; if(!OpCode._isWordCharacter(nextChar)) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._NALNUM: if(!inputRemains && input >= __eol) return false; if(OpCode._isWordCharacter(nextChar)) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._NBOUND: case OpCode._BOUND: boolean a, b; if(input == __bol) a = OpCode._isWordCharacter(__previousChar); else a = OpCode._isWordCharacter(__input[input - 1]); b = OpCode._isWordCharacter(nextChar); if((a == b) == (__program[scan] == OpCode._BOUND)) return false; break; case OpCode._SPACE: if(!inputRemains && input >= __eol) return false; if(!Character.isWhitespace(nextChar)) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._NSPACE: if(!inputRemains) return false; if(Character.isWhitespace(nextChar)) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._DIGIT: if(!Character.isDigit(nextChar)) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._NDIGIT: if(!inputRemains && input >= __eol) return false; if(Character.isDigit(nextChar)) return false; inputRemains = (++input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._REF: arg = OpCode._getArg1(__program, scan); current = __beginMatchOffsets[arg]; if(current == OpCode._NULL_OFFSET) return false; if(__endMatchOffsets[arg] == OpCode._NULL_OFFSET) return false; if(current == __endMatchOffsets[arg]) break; if(__input[current] != nextChar) return false; line = __endMatchOffsets[arg] - current; if(input + line > __eol) return false; if(line > 1 && !__compare(__input, current, __input, input, line)) return false; input+=line; inputRemains = (input < __endOffset); nextChar = (inputRemains ? __input[input] : __EOS); break; case OpCode._NOTHING: break; case OpCode._BACK: break; case OpCode._OPEN: arg = OpCode._getArg1(__program, scan); __beginMatchOffsets[arg] = input; if(arg > __expSize) __expSize = arg; break; case OpCode._CLOSE: arg = OpCode._getArg1(__program, scan); __endMatchOffsets[arg] = input; if(arg > __lastParen) __lastParen = arg; break; case OpCode._CURLYX: rep = new Perl5Repetition(); rep._lastRepetition = __currentRep; __currentRep = rep; rep._parenFloor = __lastParen; rep._numInstances = -1; rep._min = OpCode._getArg1(__program, scan); rep._max = OpCode._getArg2(__program, scan); rep._scan = OpCode._getNextOperator(scan) + 2; rep._next = next; rep._minMod = minMod; // Must initialize to -1 because if we initialize to 0 and are // at the beginning of the input the OpCode._WHILEM case will // not work right. rep._lastLocation = -1; __inputOffset = input; // use minMod as temporary minMod = __match(OpCode._getPrevOperator(next)); // leave scope call not pertinent? __currentRep = rep._lastRepetition; return minMod; case OpCode._WHILEM: rep = __currentRep; arg = rep._numInstances + 1; __inputOffset = input; if(input == rep._lastLocation) { __currentRep = rep._lastRepetition; line = __currentRep._numInstances; if(__match(rep._next)) return true; __currentRep._numInstances = line; __currentRep = rep; return false; } if(arg < rep._min) { rep._numInstances = arg; rep._lastLocation = input; if(__match(rep._scan)) return true; rep._numInstances = arg - 1; return false; } if(rep._minMod) { __currentRep = rep._lastRepetition; line = __currentRep._numInstances; if(__match(rep._next)) return true; __currentRep._numInstances = line; __currentRep = rep; if(arg >= rep._max) return false; __inputOffset = input; rep._numInstances = arg; rep._lastLocation = input; if(__match(rep._scan)) return true; rep._numInstances = arg - 1; return false; } if(arg < rep._max) { __pushState(rep._parenFloor); rep._numInstances = arg; rep._lastLocation = input; if(__match(rep._scan)) return true; __popState(); __inputOffset = input; } __currentRep = rep._lastRepetition; line = __currentRep._numInstances; if(__match(rep._next)) return true; rep._numInstances = line; __currentRep = rep; rep._numInstances = arg - 1; return false; case OpCode._BRANCH: if(__program[next] != OpCode._BRANCH) next = OpCode._getNextOperator(scan); else { int lastParen; lastParen = __lastParen; do { __inputOffset = input; if(__match(OpCode._getNextOperator(scan))) return true; for(arg = __lastParen; arg > lastParen; --arg) //__endMatchOffsets[arg] = 0; __endMatchOffsets[arg] = OpCode._NULL_OFFSET; __lastParen = arg; scan = OpCode._getNext(__program, scan); } while(scan != OpCode._NULL_OFFSET && __program[scan] == OpCode._BRANCH); return false; } break; case OpCode._MINMOD: minMod = true; break; case OpCode._CURLY: case OpCode._STAR: case OpCode._PLUS: if(op == OpCode._CURLY) { line = OpCode._getArg1(__program, scan); arg = OpCode._getArg2(__program, scan); scan = OpCode._getNextOperator(scan) + 2; } else if(op == OpCode._STAR) { line = 0; arg = Character.MAX_VALUE; scan = OpCode._getNextOperator(scan); } else { line = 1; arg = Character.MAX_VALUE; scan = OpCode._getNextOperator(scan); } if(__program[next] == OpCode._EXACTLY) { nextChar = __program[OpCode._getOperand(next) + 1]; current = 0; } else { nextChar = __EOS; current = -1000; } __inputOffset = input; if(minMod) { minMod = false; if(line > 0 && __repeat(scan, line) < line) return false; while(arg >= line || (arg == Character.MAX_VALUE && line > 0)) { // there may be a bug here with respect to // __inputOffset >= __endOffset, but it seems to be right for // now. the issue is with __inputOffset being reset later. // is this test really supposed to happen here? if(current == -1000 || __inputOffset >= __endOffset || __input[__inputOffset] == nextChar) { if(__match(next)) return true; } __inputOffset = input + line; if(__repeat(scan, 1) != 0) { ++line; __inputOffset = input + line; } else return false; } } else { arg = __repeat(scan, arg); if(line < arg && OpCode._opType[__program[next]] == OpCode._EOL && ((!__multiline && __program[next] != OpCode._MEOL) || __program[next] == OpCode._SEOL)) line = arg; while(arg >= line) { // there may be a bug here with respect to // __inputOffset >= __endOffset, but it seems to be right for // now. the issue is with __inputOffset being reset later. // is this test really supposed to happen here? if(current == -1000 || __inputOffset >= __endOffset || __input[__inputOffset] == nextChar) { if(__match(next)) return true; } --arg; __inputOffset = input + arg; } } return false; case OpCode._SUCCEED: case OpCode._END: __inputOffset = input; // This enforces the rule that two consecutive matches cannot have // the same end offset. if(__inputOffset == __lastMatchInputEndOffset) return false; return true; case OpCode._IFMATCH: __inputOffset = input; scan = OpCode._getNextOperator(scan); if(!__match(scan)) return false; break; case OpCode._UNLESSM: __inputOffset = input; scan = OpCode._getNextOperator(scan); if(__match(scan)) return false; break; default: // todo: Need to throw an exception here. } // end switch //scan = (next > 0 ? next : 0); scan = next; } // end while scan return false; } /** * Set whether or not subsequent calls to {@link #matches matches()} * or {@link #contains contains()} should treat the input as * consisting of multiple lines. The default behavior is for * input to be treated as consisting of multiple lines. This method * should only be called if the Perl5Pattern used for a match was * compiled without either of the Perl5Compiler.MULTILINE_MASK or * Perl5Compiler.SINGLELINE_MASK flags, and you want to alter the * behavior of how the <b>^</b>, <b>$</b>, and <b>.</b> metacharacters are * interpreted on the fly. The compilation options used when compiling * a pattern ALWAYS override the behavior specified by setMultiline(). See * {@link Perl5Compiler} for more details. * <p> * @param multiline If set to true treats the input as consisting of * multiple lines with respect to the <b>^</b> and <b>$</b> * metacharacters. If set to false treats the input as consisting * of a single line with respect to the <b>^</b> and <b>$</b> * metacharacters. */ public void setMultiline(boolean multiline) { __multiline = multiline; } /** * @return True if the matcher is treating input as consisting of multiple * lines with respect to the <b>^</b> and <b>$</b> metacharacters, * false otherwise. */ public boolean isMultiline() { return __multiline; } char[] _toLower(char[] input) { int current; char[] inp; // todo: // Certainly not the best way to do case insensitive matching. // Must definitely change this in some way, but for now we // do what Perl does and make a copy of the input, converting // it all to lowercase. This is truly better handled in the // compilation phase. inp = new char[input.length]; System.arraycopy(input, 0, inp, 0, input.length); input = inp; // todo: Need to inline toLowerCase() for(current = 0; current < input.length; current++) if(Character.isUpperCase(input[current])) input[current] = Character.toLowerCase(input[current]); return input; } /** * Determines if a prefix of a string (represented as a char[]) * matches a given pattern, starting from a given offset into the string. * If a prefix of the string matches the pattern, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The char[] to test for a prefix match. * @param pattern The Pattern to be matched. * @param offset The offset at which to start searching for the prefix. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(char[] input, Pattern pattern, int offset) { Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input; if(expression._isCaseInsensitive) input = _toLower(input); __initInterpreterGlobals(expression, input, 0, input.length, offset); __lastSuccess = __tryExpression(offset); __lastMatchResult = null; return __lastSuccess; } /** * Determines if a prefix of a string (represented as a char[]) * matches a given pattern. * If a prefix of the string matches the pattern, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The char[] to test for a prefix match. * @param pattern The Pattern to be matched. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(char[] input, Pattern pattern) { return matchesPrefix(input, pattern, 0); } /** * Determines if a prefix of a string matches a given pattern. * If a prefix of the string matches the pattern, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The String to test for a prefix match. * @param pattern The Pattern to be matched. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(String input, Pattern pattern) { return matchesPrefix(input.toCharArray(), pattern, 0); } /** * Determines if a prefix of a PatternMatcherInput instance * matches a given pattern. If there is a match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. Unlike the * {@link #contains(PatternMatcherInput, Pattern)} * method, the current offset of the PatternMatcherInput argument * is not updated. However, unlike the * {@link #matches matches(PatternMatcherInput, Pattern)} method, * matchesPrefix() will start its search from the current offset * rather than the begin offset of the PatternMatcherInput. * <p> * This method is useful for certain common token identification tasks * that are made more difficult without this functionality. * <p> * @param input The PatternMatcherInput to test for a prefix match. * @param pattern The Pattern to be matched. * @return True if input matches pattern, false otherwise. */ public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern) { char[] inp; Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input._originalBuffer; if(expression._isCaseInsensitive) { if(input._toLowerBuffer == null) input._toLowerBuffer = _toLower(__originalInput); inp = input._toLowerBuffer; } else inp = __originalInput; __initInterpreterGlobals(expression, inp, input._beginOffset, input._endOffset, input._currentOffset); __lastSuccess = __tryExpression(input._currentOffset); __lastMatchResult = null; return __lastSuccess; } /** * Determines if a string (represented as a char[]) exactly * matches a given pattern. If * there is an exact match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. The pattern must be * a Perl5Pattern instance, otherwise a ClassCastException will * be thrown. You are not required to, and indeed should NOT try to * (for performance reasons), catch a ClassCastException because it * will never be thrown as long as you use a Perl5Pattern as the pattern * parameter. * <p> * <b>Note:</b> matches() is not the same as sticking a ^ in front of * your expression and a $ at the end of your expression in Perl5 * and using the =~ operator, even though in many cases it will be * equivalent. matches() literally looks for an exact match according * to the rules of Perl5 expression matching. Therefore, if you have * a pattern <em>foo|foot</em> and are matching the input <em>foot</em> * it will not produce an exact match. But <em>foot|foo</em> will * produce an exact match for either <em>foot</em> or <em>foo</em>. * Remember, Perl5 regular expressions do not match the longest * possible match. From the perlre manpage: * <blockquote> * Alternatives are tried from left to right, so the first * alternative found for which the entire expression matches, * is the one that is chosen. This means that alternatives * are not necessarily greedy. For example: when matching * foo|foot against "barefoot", only the "foo" part will * match, as that is the first alternative tried, and it * successfully matches the target string. * </blockquote> * <p> * @param input The char[] to test for an exact match. * @param pattern The Perl5Pattern to be matched. * @return True if input matches pattern, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean matches(char[] input, Pattern pattern) { Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input; if(expression._isCaseInsensitive) input = _toLower(input); __initInterpreterGlobals(expression, input, 0, input.length, 0); __lastSuccess = (__tryExpression(0) && __endMatchOffsets[0] == input.length); __lastMatchResult = null; return __lastSuccess; } /** * Determines if a string exactly matches a given pattern. If * there is an exact match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. The pattern must be * a Perl5Pattern instance, otherwise a ClassCastException will * be thrown. You are not required to, and indeed should NOT try to * (for performance reasons), catch a ClassCastException because it * will never be thrown as long as you use a Perl5Pattern as the pattern * parameter. * <p> * <b>Note:</b> matches() is not the same as sticking a ^ in front of * your expression and a $ at the end of your expression in Perl5 * and using the =~ operator, even though in many cases it will be * equivalent. matches() literally looks for an exact match according * to the rules of Perl5 expression matching. Therefore, if you have * a pattern <em>foo|foot</em> and are matching the input <em>foot</em> * it will not produce an exact match. But <em>foot|foo</em> will * produce an exact match for either <em>foot</em> or <em>foo</em>. * Remember, Perl5 regular expressions do not match the longest * possible match. From the perlre manpage: * <blockquote> * Alternatives are tried from left to right, so the first * alternative found for which the entire expression matches, * is the one that is chosen. This means that alternatives * are not necessarily greedy. For example: when matching * foo|foot against "barefoot", only the "foo" part will * match, as that is the first alternative tried, and it * successfully matches the target string. * </blockquote> * <p> * @param input The String to test for an exact match. * @param pattern The Perl5Pattern to be matched. * @return True if input matches pattern, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean matches(String input, Pattern pattern) { return matches(input.toCharArray(), pattern); } /** * Determines if the contents of a PatternMatcherInput instance * exactly matches a given pattern. If * there is an exact match, a MatchResult instance * representing the match is made accesible via * {@link #getMatch()}. Unlike the * {@link #contains(PatternMatcherInput, Pattern)} * method, the current offset of the PatternMatcherInput argument * is not updated. You should remember that the region between * the begin (NOT the current) and end offsets of the PatternMatcherInput * will be tested for an exact match. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * <b>Note:</b> matches() is not the same as sticking a ^ in front of * your expression and a $ at the end of your expression in Perl5 * and using the =~ operator, even though in many cases it will be * equivalent. matches() literally looks for an exact match according * to the rules of Perl5 expression matching. Therefore, if you have * a pattern <em>foo|foot</em> and are matching the input <em>foot</em> * it will not produce an exact match. But <em>foot|foo</em> will * produce an exact match for either <em>foot</em> or <em>foo</em>. * Remember, Perl5 regular expressions do not match the longest * possible match. From the perlre manpage: * <blockquote> * Alternatives are tried from left to right, so the first * alternative found for which the entire expression matches, * is the one that is chosen. This means that alternatives * are not necessarily greedy. For example: when matching * foo|foot against "barefoot", only the "foo" part will * match, as that is the first alternative tried, and it * successfully matches the target string. * </blockquote> * <p> * @param input The PatternMatcherInput to test for a match. * @param pattern The Perl5Pattern to be matched. * @return True if input matches pattern, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean matches(PatternMatcherInput input, Pattern pattern) { char[] inp; Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input._originalBuffer; if(expression._isCaseInsensitive) { if(input._toLowerBuffer == null) input._toLowerBuffer = _toLower(__originalInput); inp = input._toLowerBuffer; } else inp = __originalInput; __initInterpreterGlobals(expression, inp, input._beginOffset, input._endOffset, input._beginOffset); __lastMatchResult = null; if(__tryExpression(input._beginOffset)) { if(__endMatchOffsets[0] == input._endOffset || input.length() == 0 || input._beginOffset == input._endOffset) { __lastSuccess = true; return true; } } __lastSuccess = false; return false; } /** * Determines if a string contains a pattern. If the pattern is * matched by some substring of the input, a MatchResult instance * representing the <b> first </b> such match is made acessible via * {@link #getMatch()}. If you want to access * subsequent matches you should either use a PatternMatcherInput object * or use the offset information in the MatchResult to create a substring * representing the remaining input. Using the MatchResult offset * information is the recommended method of obtaining the parts of the * string preceeding the match and following the match. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * @param input The String to test for a match. * @param pattern The Perl5Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean contains(String input, Pattern pattern) { return contains(input.toCharArray(), pattern); } /** * Determines if a string (represented as a char[]) contains a pattern. * If the pattern is * matched by some substring of the input, a MatchResult instance * representing the <b> first </b> such match is made acessible via * {@link #getMatch()}. If you want to access * subsequent matches you should either use a PatternMatcherInput object * or use the offset information in the MatchResult to create a substring * representing the remaining input. Using the MatchResult offset * information is the recommended method of obtaining the parts of the * string preceeding the match and following the match. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * @param input The char[] to test for a match. * @param pattern The Perl5Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean contains(char[] input, Pattern pattern) { Perl5Pattern expression; expression = (Perl5Pattern)pattern; __originalInput = input; if(expression._isCaseInsensitive) input = _toLower(input); return __interpret(expression, input, 0, input.length, 0); } private static final int __DEFAULT_LAST_MATCH_END_OFFSET = -100; private int __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET; /** * Determines if the contents of a PatternMatcherInput, starting from the * current offset of the input contains a pattern. * If a pattern match is found, a MatchResult * instance representing the <b>first</b> such match is made acessible via * {@link #getMatch()}. The current offset of the * PatternMatcherInput is set to the offset corresponding to the end * of the match, so that a subsequent call to this method will continue * searching where the last call left off. You should remember that the * region between the begin and end offsets of the PatternMatcherInput are * considered the input to be searched, and that the current offset * of the PatternMatcherInput reflects where a search will start from. * Matches extending beyond the end offset of the PatternMatcherInput * will not be matched. In other words, a match must occur entirely * between the begin and end offsets of the input. See * {@link PatternMatcherInput} for more details. * <p> * As a side effect, if a match is found, the PatternMatcherInput match * offset information is updated. See the * {@link PatternMatcherInput#setMatchOffsets(int, int)} * method for more details. * <p> * The pattern must be a Perl5Pattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * a Perl5Pattern as the pattern parameter. * <p> * This method is usually used in a loop as follows: * <blockquote><pre> * PatternMatcher matcher; * PatternCompiler compiler; * Pattern pattern; * PatternMatcherInput input; * MatchResult result; * * compiler = new Perl5Compiler(); * matcher = new Perl5Matcher(); * * try { * pattern = compiler.compile(somePatternString); * } catch(MalformedPatternException e) { * System.err.println("Bad pattern."); * System.err.println(e.getMessage()); * return; * } * * input = new PatternMatcherInput(someStringInput); * * while(matcher.contains(input, pattern)) { * result = matcher.getMatch(); * // Perform whatever processing on the result you want. * } * * </pre></blockquote> * <p> * @param input The PatternMatcherInput to test for a match. * @param pattern The Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than a * Perl5Pattern is passed as the pattern parameter. */ public boolean contains(PatternMatcherInput input, Pattern pattern) { char[] inp; Perl5Pattern expression; boolean matchFound; //if(input.length() > 0) { // We want to allow a null string to match at the end of the input // which is why we don't check endOfInput. Not sure if this is a // safe thing to do or not. if(input._currentOffset > input._endOffset) return false; //} /* else if(input._endOfInput()) return false; */ expression = (Perl5Pattern)pattern; __originalInput = input._originalBuffer; // Todo: // Really should only reduce to lowercase that part of the // input that is necessary, instead of the whole thing. // Adjust MatchResult offsets accordingly. Actually, pass an adjustment // value to __interpret. __originalInput = input._originalBuffer; if(expression._isCaseInsensitive) { if(input._toLowerBuffer == null) input._toLowerBuffer = _toLower(__originalInput); inp = input._toLowerBuffer; } else inp = __originalInput; __lastMatchInputEndOffset = input.getMatchEndOffset(); matchFound = __interpret(expression, inp, input._beginOffset, input._endOffset, input._currentOffset); if(matchFound) { input.setCurrentOffset(__endMatchOffsets[0]); input.setMatchOffsets(__beginMatchOffsets[0], __endMatchOffsets[0]); } else { input.setCurrentOffset(input._endOffset + 1); } // Restore so it doesn't interfere with other unrelated matches. __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET; return matchFound; } /** * Fetches the last match found by a call to a matches() or contains() * method. If you plan on modifying the original search input, you * must call this method BEFORE you modify the original search input, * as a lazy evaluation technique is used to create the MatchResult. * This reduces the cost of pattern matching when you don't care about * the actual match and only care if the pattern occurs in the input. * Otherwise, a MatchResult would be created for every match found, * whether or not the MatchResult was later used by a call to getMatch(). * <p> * @return A MatchResult instance containing the pattern match found * by the last call to any one of the matches() or contains() * methods. If no match was found by the last call, returns * null. */ public MatchResult getMatch() { if(!__lastSuccess) return null; if(__lastMatchResult == null) __setLastMatchResult(); return __lastMatchResult; } }

The table below shows all metrics for Perl5Matcher.java.

MetricValueDescription
BLOCKS134.00Number of blocks
BLOCK_COMMENT21.00Number of block comment lines
COMMENTS437.00Comment lines
COMMENT_DENSITY 0.43Comment density
COMPARISONS289.00Number of comparison operators
CYCLOMATIC434.00Cyclomatic complexity
DECL_COMMENTS18.00Comments in declarations
DOC_COMMENT348.00Number of javadoc comment lines
ELOC1023.00Effective lines of code
EXEC_COMMENTS28.00Comments in executable code
EXITS41.00Procedure exits
FUNCTIONS25.00Number of function declarations
HALSTEAD_DIFFICULTY215.36Halstead difficulty
HALSTEAD_EFFORT 0.00Halstead effort
INTERFACE_COMPLEXITY164.00Interface complexity
JAVA0001 0.00JAVA0001 Package name does not contain only lower case letters
JAVA0002 0.00JAVA0002 Package name does not begin with a top level domain name or country code
JAVA0003 0.00JAVA0003 Minimize use of on-demand (.*) imports
JAVA0004 0.00JAVA0004 Unnecessary import from java.lang
JAVA0005 0.00JAVA0005 Imports not in specified order
JAVA0006 0.00JAVA0006 Empty finally block
JAVA0007 0.00JAVA0007 Should not declare public field
JAVA0008 0.00JAVA0008 Empty catch block
JAVA0009 0.00JAVA0009 Protected member in final class
JAVA0010 0.00JAVA0010 Non-instantiable class does not contain a non-private static member
JAVA0011 0.00JAVA0011 Abstract class does not contain an abstract method
JAVA0012 0.00JAVA0012 Non-constructor method with same name as declaring class
JAVA0013 0.00JAVA0013 Non-blank final field is not static
JAVA0014 0.00JAVA0014 Class with only static members has non-private constructor
JAVA0015 0.00JAVA0015 Package class contains public nested type
JAVA0016 0.00JAVA0016 Abstract class contains public constructor
JAVA0017 0.00JAVA0017 Class name does not have required form
JAVA001812.00JAVA0018 Method name does not have required form
JAVA0019 0.00JAVA0019 Interface name does not have required form
JAVA002021.00JAVA0020 Field name does not have required form
JAVA0021 0.00JAVA0021 Interface method name does not have required form
JAVA0022 3.00JAVA0022 Static final field name does not have required form
JAVA0023 0.00JAVA0023 Empty finalize method
JAVA0024 0.00JAVA0024 Empty class
JAVA0025 0.00JAVA0025 Method override is empty
JAVA0026 0.00JAVA0026 Finalize method with parameters
JAVA0029 0.00JAVA0029 Private method not used
JAVA0030 0.00JAVA0030 Private field not used
JAVA0031 4.00JAVA0031 Case statement not properly closed
JAVA0032 2.00JAVA0032 Switch statement missing default
JAVA0033 0.00JAVA0033 default: not last case in switch statement
JAVA0034139.00JAVA0034 Missing braces in if statement
JAVA0035 2.00JAVA0035 Missing braces in for statement
JAVA0036 9.00JAVA0036 Missing braces in while statement
JAVA0038 0.00JAVA0038 Non-case label in switch statement
JAVA003921.00JAVA0039 Break statement with label
JAVA0040 3.00JAVA0040 Switch statement contains N cases (maximum: M)
JAVA0041 0.00JAVA0041 Nested synchronized block
JAVA0042 0.00JAVA0042 Empty synchronized statement
JAVA0043 0.00JAVA0043 Inner class does not use outer class
JAVA0044 0.00JAVA0044 Serializable class with no instance variables
JAVA0045 0.00JAVA0045 Serializable class with only transient fields
JAVA0046 0.00JAVA0046 Name of class not derived from Exception ends with 'Exception'
JAVA0047 0.00JAVA0047 Serializable class derives from invalid base class
JAVA0048 0.00JAVA0048 Name of class derived from Exception does not end with 'Exception'
JAVA004912.00JAVA0049 Nested block at depth N (maximum: M)
JAVA0050 0.00JAVA0050 Class derives from java.lang.Error
JAVA0051 0.00JAVA0051 Class derives from java.lang.RuntimeException
JAVA0052 0.00JAVA0052 Class derives from java.lang.Throwable
JAVA0053 0.00JAVA0053 Unused label
JAVA0054 0.00JAVA0054 Inheritance depth N exceeds maximum M
JAVA0055 0.00JAVA0055 Class should be interface
JAVA0056 0.00JAVA0056 Unnecessary abstract modifier for interface or annotation
JAVA0057 0.00JAVA0057 Unnecessary default constructor
JAVA0058 0.00JAVA0058 Constructor calls super()
JAVA0059 0.00JAVA0059 Method override only calls super()
JAVA0061 0.00JAVA0061 Inaccessible member in anonymous class
JAVA0062 0.00JAVA0062 Public class missing public member or protected constructor
JAVA0063 0.00JAVA0063 Identifier name should not contain '$'
JAVA0064 0.00JAVA0064 N variations of identifier name (maximum: M)
JAVA0065 0.00JAVA0065 Unnecessary final modifier for method in final class
JAVA0066 0.00JAVA0066 Unnecessary modifier for interface nested type
JAVA0067 3.00JAVA0067 Array descriptor on identifier name
JAVA0068 0.00JAVA0068 Modifiers not declared in recommended order
JAVA0071 0.00JAVA0071 Strings compared with ==
JAVA0073 0.00JAVA0073 Integer division in floating-point context
JAVA0074 0.00JAVA0074 Use of Object.notify()
JAVA0075 1.00JAVA0075 Method parameter hides field
JAVA007610.00JAVA0076 Use of magic number
JAVA0077 0.00JAVA0077 Private field not used in declaring class
JAVA0078 0.00JAVA0078 Floating point values compared with ==
JAVA0079 0.00JAVA0079 Use of instance to reference static member
JAVA0080 0.00JAVA0080 Import declaration not used
JAVA0081 0.00JAVA0081 Boolean literal in comparison
JAVA0082 0.00JAVA0082 Unnecessary widening cast
JAVA0083 0.00JAVA0083 Unnecessary instanceof test
JAVA0084 0.00JAVA0084 Should use compound assignment operator
JAVA0085 0.00JAVA0085 Use of sun.* class
JAVA0087 0.00JAVA0087 Use of Thread.sleep()
JAVA0089 0.00JAVA0089 Use of restricted package
JAVA0092 0.00JAVA0092 Use of restricted type
JAVA0093 0.00JAVA0093 Redundant assignment
JAVA0094 0.00JAVA0094 Field hides a superclass field
JAVA0095 0.00JAVA0095 Uninitialized private field
JAVA0096 0.00JAVA0096 Field in nested class hides outer field
JAVA0098 0.00JAVA0098 Minimize use of implicit field initializers
JAVA0100 1.00JAVA0100 Class contains N non-final fields (maximum: M)
JAVA0101 0.00JAVA0101 Unnecessary modifier for field in interface
JAVA0102 0.00JAVA0102 Last statement in finalize() not super.finalize()
JAVA0103 0.00JAVA0103 Explicit call to finalize()
JAVA0104 0.00JAVA0104 finalize() only calls super.finalize()
JAVA0105 0.00JAVA0105 Duplicate import declaration
JAVA0106 0.00JAVA0106 Unnecessary import from current package
JAVA0108 0.00JAVA0108 Incorrect javadoc: no @param tag for 'parameter'
JAVA0109 0.00JAVA0109 Incorrect javadoc: no parameter 'parameter'
JAVA0110 0.00JAVA0110 Incorrect javadoc: no @return tag
JAVA0111 0.00JAVA0111 Incorrect javadoc: @return tag for void method
JAVA0112 0.00JAVA0112 Incorrect javadoc: no exception 'exception' in throws
JAVA0113 1.00JAVA0113 Incorrect javadoc: no @author tag
JAVA0114 0.00JAVA0114 Incorrect javadoc: no @version tag
JAVA0115 0.00JAVA0115 Incorrect javadoc: no @throws or @exception tag for 'exception'
JAVA0116 0.00JAVA0116 Missing javadoc: field 'field'
JAVA0117 0.00JAVA0117 Missing javadoc: method 'method'
JAVA0118 0.00JAVA0118 Missing javadoc: type 'type'
JAVA0119 0.00JAVA0119 Control variable changed within body of for loop
JAVA0123 0.00JAVA0123 Use all three components of for loop
JAVA0125 0.00JAVA0125 Continue statement with label
JAVA0126 0.00JAVA0126 Method declares unchecked exception in throws
JAVA0128 0.00JAVA0128 Public constructor in non-public class
JAVA0130 1.00JAVA0130 Non-static method does not use instance fields
JAVA0131 0.00JAVA0131 Compatible method does not override base
JAVA0132 0.00JAVA0132 Method overload with compatible signature
JAVA0133 0.00JAVA0133 Non-synchronized method overrides synchronized method
JAVA0135 0.00JAVA0135 Only one of Object.equals and Object.hashCode defined: missing 'method'
JAVA0136 1.00JAVA0136 N methods defined in class (maximum: M)
JAVA0137 1.00JAVA0137 Non-abstract class missing constructor
JAVA0138 0.00JAVA0138 N parameters defined for method (maximum: M)
JAVA0139 0.00JAVA0139 Definition of main other than public static void main(java.lang.String[])
JAVA0141 0.00JAVA0141 Unnecessary modifier for method in interface
JAVA0143 0.00JAVA0143 Synchronized method
JAVA0144 0.00JAVA0144 Line exceeds maximum M characters
JAVA0145813.00JAVA0145 Tab character used in source file
JAVA0150 0.00JAVA0150 java.lang.Error (or subclass) thrown
JAVA0153 0.00JAVA0153 Inefficient conversion of integer to string
JAVA0159 0.00JAVA0159 Inefficient conversion of string to integer
JAVA0160 0.00JAVA0160 Method does not throw specified exception
JAVA0161 0.00JAVA0161 Conditional wait() not in loop
JAVA0163 0.00JAVA0163 Empty statement
JAVA0165 0.00JAVA0165 Conflicting return statement in finally block
JAVA0166 0.00JAVA0166 Generic exception caught
JAVA0167 0.00JAVA0167 ThreadDeath not rethrown
JAVA0169 0.00JAVA0169 Unnecessary catch block: exception 'exception'
JAVA0170 0.00JAVA0170 Caught exception not derived from java.lang.Exception
JAVA0171 0.00JAVA0171 Unused local variable
JAVA0173 0.00JAVA0173 Unused method parameter
JAVA0174 3.00JAVA0174 Assigned local variable never used
JAVA0175 1.00JAVA0175 Successive assignment to variable
JAVA0176 0.00JAVA0176 Local variable name does not have required form
JAVA017750.00JAVA0177 Variable declaration missing initializer
JAVA0179 0.00JAVA0179 Local variable hides visible field
JAVA0233 0.00JAVA0233 Definition of serialVersionUID other than 'private static final long serialVersionUID'
JAVA0234 0.00JAVA0234 Class is Serializable but does not define serialVersionUID
JAVA0235 0.00JAVA0235 Class defines serialVersionUID but does not implement Serializable
JAVA0236 0.00JAVA0236 Attempt to clone an object which does not implement Cloneable
JAVA0237 0.00JAVA0237 Class implements Cloneable but does not have public clone method
JAVA0238 0.00JAVA0238 Clone method does not call super.clone()
JAVA0239 0.00JAVA0239 Class declares 'readObject' or 'writeObject' but does not implement Serializable
JAVA0240 0.00JAVA0240 Serializable class which declares readObject or writeObject but not both
JAVA0241 0.00JAVA0241 'readObject' or 'writeObject' should be declared private in Serializable class
JAVA0242 0.00JAVA0242 Transient field in non-Serializable class
JAVA0243 0.00JAVA0243 'readResolve' or 'writeReplace' should be declared private or protected
JAVA0244 0.00JAVA0244 Field or method name in subclass differs only by case from inherited field or method
JAVA0245 0.00JAVA0245 JUnit TestCase with non-trivial constructor
JAVA0246 0.00JAVA0246 JUnit assertXXX statement missing message parameter
JAVA0247 0.00JAVA0247 JUnit 'setUp()' and 'tearDown()' should call super method
JAVA0248 0.00JAVA0248 JUnit method 'setUp' or 'tearDown' with incorrect signature
JAVA0249 0.00JAVA0249 JUnit TestCase 'suite()' should be declared static
JAVA0250 0.00JAVA0250 JUnit TestCase declares testXXX method with incorrect signature
JAVA0251 0.00JAVA0251 Use '%n' for line breaks in printf/format for platform independence
JAVA0252 0.00JAVA0252 'enum' is a Java 1.5 reserved word
JAVA0253 0.00JAVA0253 Not all enum constants consumed in switch statement
JAVA0254 0.00JAVA0254 Use enhanced for loop construct instead of Iterator
JAVA0255 0.00JAVA0255 Result of method invocation not used
JAVA0256 4.00JAVA0256 Assignment of external collection/array to field
JAVA0257 0.00JAVA0257 Use of 'Constant Interface' anti-pattern
JAVA0258 0.00JAVA0258 Implement Iterable for foreach compatibility
JAVA0259 0.00JAVA0259 Return of collection/array field
JAVA0260 0.00JAVA0260 Use 'enum' instead of Enumerated Type pattern
JAVA0261 0.00JAVA0261 Use specialized Enum collection types
JAVA0262 0.00JAVA0262 Use of char in integer context
JAVA0263 0.00JAVA0263 Long literal ends with 'l' instead of 'L'
JAVA0264 0.00JAVA0264 Integer math in long context - check for overflow
JAVA0265 0.00JAVA0265 Use of Throwable.printStackTrace()
JAVA0266 0.00JAVA0266 Use of System.out
JAVA0267 0.00JAVA0267 Use of System.err
JAVA0269 0.00JAVA0269 Contents of StringBuffer never used
JAVA0270 0.00JAVA0270 Use Java 5.0 enhanced for loop construct to iterate over all elements in an array
JAVA0271 0.00JAVA0271 Minimize use of on-demand (.*) static imports
JAVA0272 0.00JAVA0272 Thread.run() called
JAVA0273 0.00JAVA0273 Non-final derivative of Thread calls start() in constructor
JAVA0274 0.00JAVA0274 Serializable class has a synchronized readObject()
JAVA0275 0.00JAVA0275 Serializable class has a synchronized writeObject() and no other synchronized methods
JAVA0276 0.00JAVA0276 Unnecessary use of String constructor
JAVA0277 0.00JAVA0277 Iterator.next() implementation does not throw NoSuchElementException
JAVA0278 0.00JAVA0278 Unnecessary use of Boolean constructor
JAVA0279 0.00JAVA0279 Serialization method readObject or readObjectNoData calls an overridable method
JAVA0280 0.00JAVA0280 IllegalMonitorStateException caught
JAVA0281 0.00JAVA0281 Iterator.next() not called in loop
JAVA0282 0.00JAVA0282 Call to Iterator.next() in loop which does not test Iterator.hasNext()
JAVA0283 0.00JAVA0283 Control variable not updated in loop body
JAVA0284 0.00JAVA0284 Explicit garbage collection
JAVA0285 0.00JAVA0285 Dereference of potentially null variable
JAVA0286 0.00JAVA0286 Dereference of null variable
JAVA0287 0.00JAVA0287 Unnecessary null check
JAVA0288 0.00JAVA0288 Inconsistent null check
LINES1860.00Number of lines in the source file
LINE_COMMENT68.00Number of line comments
LOC1125.00Lines of code
LOGICAL_LINES625.00Number of statements
LOOPS41.00Number of loops
NEST_DEPTH 7.00Maximum nesting depth
OPERANDS2317.00Number of operands
OPERATORS4468.00Number of operators
PARAMS51.00Number of formal parameter declarations
PROGRAM_LENGTH6785.00Halstead program length
PROGRAM_VOCAB370.00Halstead program vocabulary
PROGRAM_VOLUME 0.00Halstead program volume
RETURNS113.00Number of return points from functions