HTMLFilter.java

Index Score
freenet.clients.http.filter
Freenet

View: Reasons, Metrics, Source Code

These are the metrics that contribute to the Enerjy Score for this file, ranked by impact. So the metrics listed at the top influence the score to a greater extent that the metrics listed at the bottom.

MetricDescription
ELOCEffective lines of code
JAVA0034JAVA0034 Missing braces in if statement
LOCLines of code
OPERANDSNumber of operands
CYCLOMATICCyclomatic complexity
PROGRAM_LENGTHHalstead program length
OPERATORSNumber of operators
PARAMSNumber of formal parameter declarations
LOGICAL_LINESNumber of statements
JAVA0145JAVA0145 Tab character used in source file
COMPARISONSNumber of comparison operators
INTERFACE_COMPLEXITYInterface complexity
EXEC_COMMENTSComments in executable code
LINESNumber of lines in the source file
UNIQUE_OPERANDSNumber of unique operands
LINE_COMMENTNumber of line comments
SIZESize of the file in bytes
PROGRAM_VOCABHalstead program vocabulary
LOOPSNumber of loops
RETURNSNumber of return points from functions
BLOCKSNumber of blocks
FUNCTIONSNumber of function declarations
JAVA0035JAVA0035 Missing braces in for statement
EXITSProcedure exits
JAVA0177JAVA0177 Variable declaration missing initializer
JAVA0270JAVA0270 Use Java 5.0 enhanced for loop construct to iterate over all elements in an array
JAVA0128JAVA0128 Public constructor in non-public class
JAVA0144JAVA0144 Line exceeds maximum M characters
JAVA0173JAVA0173 Unused method parameter
JAVA0076JAVA0076 Use of magic number
UNIQUE_OPERATORSNumber of unique operators
JAVA0117JAVA0117 Missing javadoc: method 'method'
JAVA0170JAVA0170 Caught exception not derived from java.lang.Exception
JAVA0166JAVA0166 Generic exception caught
JAVA0160JAVA0160 Method does not throw specified exception
JAVA0123JAVA0123 Use all three components of for loop
JAVA0031JAVA0031 Case statement not properly closed
JAVA0150JAVA0150 java.lang.Error (or subclass) thrown
JAVA0036JAVA0036 Missing braces in while statement
JAVA0077JAVA0077 Private field not used in declaring class
NEST_DEPTHMaximum nesting depth
PROGRAM_VOLUMEHalstead program volume
JAVA0032JAVA0032 Switch statement missing default
JAVA0138JAVA0138 N parameters defined for method (maximum: M)
JAVA0126JAVA0126 Method declares unchecked exception in throws
JAVA0110JAVA0110 Incorrect javadoc: no @return tag
JAVA0100JAVA0100 Class contains N non-final fields (maximum: M)
JAVA0108JAVA0108 Incorrect javadoc: no @param tag for 'parameter'
JAVA0254JAVA0254 Use enhanced for loop construct instead of Iterator
DOC_COMMENTNumber of javadoc comment lines
WHITESPACENumber of whitespace lines
JAVA0054JAVA0054 Inheritance depth N exceeds maximum M
DECL_COMMENTSComments in declarations
JAVA0068JAVA0068 Modifiers not declared in recommended order
/* -*- Mode: java; c-basic-indent: 4; tab-width: 4 -*- */ package freenet.clients.http.filter; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.nio.charset.MalformedInputException; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.StringTokenizer; import java.util.Vector; import freenet.l10n.L10n; import freenet.support.HTMLDecoder; import freenet.support.HTMLEncoder; import freenet.support.HTMLNode; import freenet.support.Logger; import freenet.support.api.Bucket; import freenet.support.api.BucketFactory; import freenet.support.io.Closer; import freenet.support.io.NullWriter; public class HTMLFilter implements ContentDataFilter, CharsetExtractor { private static boolean logMINOR; private static boolean logDEBUG; private static boolean deleteWierdStuff = true; private static boolean deleteErrors = true; public Bucket readFilter(Bucket bucket, BucketFactory bf, String charset, HashMap otherParams, FilterCallback cb) throws DataFilterException, IOException { logMINOR = Logger.shouldLog(Logger.MINOR, this); logDEBUG = Logger.shouldLog(Logger.DEBUG, this); if(logMINOR) Logger.minor(this, "readFilter(): charset="+charset); InputStream strm = bucket.getInputStream(); BufferedInputStream bis = new BufferedInputStream(strm, 4096); Bucket temp = bf.makeBucket(bucket.size()); OutputStream os = temp.getOutputStream(); BufferedOutputStream bos = new BufferedOutputStream(os, 4096); Reader r = null; Writer w = null; InputStreamReader isr = null; OutputStreamWriter osw = null; try { try { isr = new InputStreamReader(bis, charset); osw = new OutputStreamWriter(bos, charset); r = new BufferedReader(isr, 4096); w = new BufferedWriter(osw, 4096); } catch(UnsupportedEncodingException e) { throw UnknownCharsetException.create(e, charset); } HTMLParseContext pc = new HTMLParseContext(r, w, charset, cb, false); pc.run(temp); w.close(); os = null; } finally { Closer.close(os); Closer.close(strm); } return temp; } public Bucket writeFilter(Bucket bucket, BucketFactory bf, String charset, HashMap otherParams, FilterCallback cb) throws DataFilterException, IOException { throw new UnsupportedOperationException(); } public String getCharset(Bucket bucket, String parseCharset) throws DataFilterException, IOException { logMINOR = Logger.shouldLog(Logger.MINOR, this); if(logMINOR) Logger.minor(this, "getCharset(): default="+parseCharset); InputStream strm = bucket.getInputStream(); BufferedInputStream bis = new BufferedInputStream(strm, 4096); Writer w = new NullWriter(); Reader r; try { r = new BufferedReader(new InputStreamReader(bis, parseCharset), 4096); } catch (UnsupportedEncodingException e) { strm.close(); throw e; } HTMLParseContext pc = new HTMLParseContext(r, w, null, new NullFilterCallback(), true); try { pc.run(null); } catch (MalformedInputException e) { // Not this charset return null; } catch (IOException e) { throw e; } catch (Throwable t) { // Ignore ALL errors if(logMINOR) Logger.minor(this, "Caught "+t+" trying to detect MIME type with "+parseCharset); } try { r.close(); } catch (IOException e) { throw e; } catch (Throwable t) { if(logMINOR) Logger.minor(this, "Caught "+t+" closing stream after trying to detect MIME type with "+parseCharset); } if(logMINOR) Logger.minor(this, "Returning charset "+pc.detectedCharset); return pc.detectedCharset; } class HTMLParseContext { Reader r; Writer w; String charset; String detectedCharset; final FilterCallback cb; final boolean noOutput; HTMLParseContext(Reader r, Writer w, String charset, FilterCallback cb, boolean noOutput) { this.r = r; this.w = w; this.charset = charset; this.cb = cb; this.noOutput = noOutput; } Bucket run(Bucket temp) throws IOException, DataFilterException { /** * TOKENIZE Modes: * <p>0) in text transitions: '<' ->(1) 1) in tag, not in * quotes/comment/whitespace transitions: whitespace -> (4) (save * current element) '"' -> (2) '--' at beginning of tag -> (3) '>' -> * process whole tag 2) in tag, in quotes transitions: '"' -> (1) * '>' -> grumble about markup in quotes in tag might confuse older * user-agents (stay in current state) 3) in tag, in comment * transitions: '-->' -> save/ignore comment, go to (0) '<' or '>' -> * grumble about markup in comments 4) in tag, in whitespace * transitions: '"' -> (2) '>' -> save tag, (0) anything else not * whitespace -> (1) * </p> */ StringBuffer b = new StringBuffer(100); StringBuffer balt = new StringBuffer(4000); Vector splitTag = new Vector(); String currentTag = null; char pprevC = 0; char prevC = 0; char c = 0; mode = INTEXT; while (true) { int x; try { x = r.read(); } /** * libgcj up to at least 4.2.2 has a bug: InputStreamReader.refill() throws this exception when BufferedInputReader.refill() returns false for EOF. See: * line 299 at InputStreamReader.java (in refill()): http://www.koders.com/java/fidD8F7E2EB1E4C22DA90EBE0130306AE30F876AB00.aspx?s=refill#L279 * line 355 at BufferedInputStream.java (in refill()): http://www.koders.com/java/fid1949641524FAC0083432D79793F554CD85F46759.aspx?s=refill#L355 * TODO: remove this when the gcj bug is fixed and the affected gcj versions are outdated. */ catch(java.io.CharConversionException cce) { if(freenet.node.Node.checkForGCJCharConversionBug()) /* only ignore the exception on affected libgcj */ x = -1; else throw cce; } if (x == -1) { switch (mode) { case INTEXT : saveText(b, currentTag, w, this); break; default : // Dump unfinished tag break; } break; } else { pprevC = prevC; prevC = c; c = (char) x; switch (mode) { case INTEXT : if (c == '<') { saveText(b, currentTag, w, this); b.setLength(0); balt.setLength(0); mode = INTAG; } else { b.append(c); } break; case INTAG : balt.append(c); if (HTMLDecoder.isWhitespace(c)) { splitTag.add(b.toString()); mode = INTAGWHITESPACE; b.setLength(0); } else if ((c == '<') && Character.isWhitespace(balt.charAt(0))) { // Previous was an un-escaped < in a script. saveText(b, currentTag, w, this); balt.setLength(0); b.setLength(0); splitTag.clear(); } else if (c == '>') { splitTag.add(b.toString()); b.setLength(0); processTag(splitTag, w, this); currentTag = (String)splitTag.get(0); splitTag.clear(); balt.setLength(0); mode = INTEXT; } else if ( (b.length() == 2) && (c == '-') && (prevC == '-') && (pprevC == '!')) { mode = INTAGCOMMENT; b.append(c); } else if (c == '"') { mode = INTAGQUOTES; b.append(c); } else if (c == '\'') { mode = INTAGSQUOTES; b.append(c); } else if (c == '/') { /* Probable end tag */ currentTag = null; /* We didn't remember what was the last tag, so ... */ b.append(c); } else { b.append(c); } break; case INTAGQUOTES : if (c == '"') { mode = INTAG; b.append(c); // Part of the element } else if (c == '>') { b.append("&gt;"); } else if (c == '<') { b.append("&lt;"); } else { b.append(c); } break; case INTAGSQUOTES : if (c == '\'') { mode = INTAG; b.append(c); // Part of the element } else if (c == '<') { b.append("&lt;"); } else if (c == '>') { b.append("&gt;"); } else { b.append(c); } break; /* * Comments are often used to temporarily disable * markup; I shall allow it. (avian) White space is * not permitted between the markup declaration * open delimiter (" * <!") and the comment open delimiter ("--"), but * is permitted between the comment close delimiter * ("--") and the markup declaration close * delimiter (">"). A common error is to include a * string of hyphens ("---") within a comment. * Authors should avoid putting two or more * adjacent hyphens inside comments. However, the * only browser that actually gets it right is IE * (others either don't allow it or allow other * chars as well). The only safe course of action * is to allow any and all chars, but eat them. * (avian) */ case INTAGCOMMENT : if ((b.length() >= 4) && (c == '-') && (prevC == '-')) { b.append(c); mode = INTAGCOMMENTCLOSING; } else b.append(c); break; case INTAGCOMMENTCLOSING : if (c == '>') { saveComment(b, w, this); b.setLength(0); mode = INTEXT; } else { b.append(c); if(c != '-') mode = INTAGCOMMENT; } break; case INTAGWHITESPACE : if (c == '"') { mode = INTAGQUOTES; b.append(c); } else if (c == '\'') { // e.g. <div align = 'center'> (avian) mode = INTAGSQUOTES; b.append(c); } else if (c == '>') { if (!killTag) processTag(splitTag, w, this); killTag = false; currentTag = (String)splitTag.get(0); splitTag.clear(); b.setLength(0); balt.setLength(0); mode = INTEXT; } else if ((c == '<') && Character.isWhitespace(balt.charAt(0))) { // Previous was an un-escaped < in a script. saveText(balt, currentTag, w, this); balt.setLength(0); b.setLength(0); splitTag.clear(); mode = INTAG; } else if (HTMLDecoder.isWhitespace(c)) { // More whitespace, what fun } else { mode = INTAG; b.append(c); } } } } return temp; } int mode; static final int INTEXT = 0; static final int INTAG = 1; static final int INTAGQUOTES = 2; static final int INTAGSQUOTES = 3; static final int INTAGCOMMENT = 4; static final int INTAGCOMMENTCLOSING = 5; static final int INTAGWHITESPACE = 6; boolean killTag = false; // just this one boolean writeStyleScriptWithTag = false; // just this one boolean expectingBadComment = false; // has to be set on or off explicitly by tags boolean inStyle = false; // has to be set on or off explicitly by tags boolean inScript = false; // has to be set on or off explicitly by tags boolean killText = false; // has to be set on or off explicitly by tags boolean killStyle = false; int styleScriptRecurseCount = 0; String currentStyleScriptChunk = ""; StringBuffer writeAfterTag = new StringBuffer(1024); } void saveText(StringBuffer s, String tagName, Writer w, HTMLParseContext pc) throws IOException { if(pc.noOutput) return; if(logDEBUG) Logger.debug(this, "Saving text: "+s.toString()); if (pc.killText) { return; } for(int i=0;i<s.length();i++) { char c = s.charAt(i); if((c < 32) && (c != '\t') && (c != '\n') && (c != '\r') ) { // Not a real character // STRONGLY suggests somebody is using a bogus charset. // This could be in order to break the filter. s.deleteCharAt(i); if(logDEBUG) Logger.debug(this, "Removing '"+c+"' from the output stream"); } } String style = s.toString(); if (pc.inStyle || pc.inScript) { pc.currentStyleScriptChunk += style; return; // is parsed and written elsewhere } StringBuffer out = new StringBuffer(s.length()*2); for(int i=0;i<s.length();i++) { char c = s.charAt(i); if(c == '<') { out.append("&lt;"); } else { out.append(c); } } String sout = out.toString(); if(pc.cb != null) pc.cb.onText(HTMLDecoder.decode(sout), tagName); /* Tag name is given as type for the text */ w.write(sout); } void processTag(Vector splitTag, Writer w, HTMLParseContext pc) throws IOException, DataFilterException { // First, check that it is a recognized tag if(logDEBUG) { for(int i=0;i<splitTag.size();i++) Logger.debug(this, "Tag["+i+"]="+splitTag.get(i)); } ParsedTag t = new ParsedTag(splitTag); if (!pc.killTag) { t = t.sanitize(pc); if(pc.noOutput) return; // sanitize has done all the work we are interested in if (t != null) { if (pc.writeStyleScriptWithTag) { pc.writeStyleScriptWithTag = false; String style = pc.currentStyleScriptChunk; if ((style == null) || (style.length() == 0)) pc.writeAfterTag.append("<!-- "+l10n("deletedUnknownStyle")+" -->"); else w.write(style); pc.currentStyleScriptChunk = ""; } t.write(w); if (pc.writeAfterTag.length() > 0) { w.write(pc.writeAfterTag.toString()); pc.writeAfterTag = new StringBuffer(1024); } } else pc.writeStyleScriptWithTag = false; } else { pc.killTag = false; pc.writeStyleScriptWithTag = false; } } void saveComment(StringBuffer s, Writer w, HTMLParseContext pc) throws IOException { if(pc.noOutput) return; if((s.length() > 3) && (s.charAt(0) == '!') && (s.charAt(1) == '-') && (s.charAt(2) == '-')) { s.delete(0, 3); if(s.charAt(s.length()-1) == '-') s.setLength(s.length()-1); if(s.charAt(s.length()-1) == '-') s.setLength(s.length()-1); } if(logDEBUG) Logger.debug(this, "Saving comment: "+s.toString()); if (pc.expectingBadComment) return; // ignore it if (pc.inStyle || pc.inScript) { pc.currentStyleScriptChunk += s; return; // </style> handler should write } if (pc.killTag) { pc.killTag = false; return; } StringBuffer sb = new StringBuffer(); for(int i=0;i<s.length();i++) { char c = s.charAt(i); if(c == '<') { sb.append("&lt;"); } else if(c == '>') { sb.append("&gt;"); } else { sb.append(c); } } s = sb; w.write("<!-- "); w.write(s.toString()); w.write(" -->"); } static void throwFilterException(String msg) throws DataFilterException { // FIXME String longer = l10n("failedToParseLabel"); throw new DataFilterException(longer, longer, msg, new HTMLNode("div", msg)); } static class ParsedTag { final String element; final String[] unparsedAttrs; final boolean startSlash; final boolean endSlash; /* * public ParsedTag(ParsedTag t) { this.element = t.element; * this.unparsedAttrs = (String[]) t.unparsedAttrs.clone(); * this.startSlash = t.startSlash; this.endSlash = t.endSlash; } */ public ParsedTag(ParsedTag t, String[] outAttrs) { this.element = t.element; this.unparsedAttrs = outAttrs; this.startSlash = t.startSlash; this.endSlash = t.endSlash; } public ParsedTag(Vector v) { int len = v.size(); if (len == 0) { element = null; unparsedAttrs = new String[0]; startSlash = endSlash = false; return; } String s = (String) v.elementAt(len - 1); if (((len - 1 != 0) || (s.length() > 1)) && s.endsWith("/")) { s = s.substring(0, s.length() - 1); v.setElementAt(s, len - 1); if (s.length() == 0) len--; endSlash = true; // Don't need to set it back because everything is an I-value } else endSlash = false; s = (String) v.elementAt(0); if ((s.length() > 1) && s.startsWith("/")) { s = s.substring(1); v.setElementAt(s, 0); startSlash = true; } else startSlash = false; element = (String) v.elementAt(0); if (len > 1) { unparsedAttrs = new String[len - 1]; for (int x = 1; x < len; x++) unparsedAttrs[x - 1] = (String) v.elementAt(x); } else unparsedAttrs = new String[0]; if(logDEBUG) Logger.debug(this, "Element = "+element); } public ParsedTag sanitize(HTMLParseContext pc) throws DataFilterException { TagVerifier tv = (TagVerifier) allowedTagsVerifiers.get(element.toLowerCase()); if(logDEBUG) Logger.debug(this, "Got verifier: "+tv+" for "+element); if (tv == null) { if (deleteWierdStuff) { return null; } else { String err = "<!-- "+HTMLEncoder.encode(l10n("unknownTag", "tag", element))+ " -->"; if (!deleteErrors) throwFilterException(l10n("unknownTagLabel") + ' ' + err); return null; } } return tv.sanitize(this, pc); } public String toString() { if (element == null) return ""; StringBuffer sb = new StringBuffer("<"); if (startSlash) sb.append('/'); sb.append(element); if (unparsedAttrs != null) { int n = unparsedAttrs.length; for (int i = 0; i < n; i++) { sb.append(' ').append(unparsedAttrs[i]); } } if (endSlash) sb.append(" /"); sb.append('>'); return sb.toString(); } public void write(Writer w) throws IOException { String s = toString(); if (s != null) w.write(s); } } static final Hashtable allowedTagsVerifiers = new Hashtable(); static final String[] emptyStringArray = new String[0]; static { allowedTagsVerifiers.put("?xml", new XmlTagVerifier()); allowedTagsVerifiers.put( "!doctype", new DocTypeTagVerifier("!doctype")); allowedTagsVerifiers.put("html", new HtmlTagVerifier()); allowedTagsVerifiers.put( "head", new TagVerifier( "head", new String[] { "id" }, new String[] { "profile" }, null)); allowedTagsVerifiers.put( "title", new TagVerifier("title", new String[] { "id" })); allowedTagsVerifiers.put("meta", new MetaTagVerifier()); allowedTagsVerifiers.put( "body", new CoreTagVerifier( "body", new String[] { "bgcolor", "text", "link", "vlink", "alink" }, null, new String[] { "background" }, new String[] { "onload", "onunload" })); String[] group = { "div", "h1", "h2", "h3", "h4", "h5", "h6", "p", "caption" }; for (int x = 0; x < group.length; x++) allowedTagsVerifiers.put( group[x], new CoreTagVerifier( group[x], new String[] { "align" }, emptyStringArray, emptyStringArray, emptyStringArray)); String[] group2 = { "span", "address", "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym", "sub", "sup", "dt", "dd", "tt", "i", "b", "big", "small", "strike", "s", "u", "noframes", "fieldset", // Delete <noscript> / </noscript>. So we can at least see the non-scripting code. // "noscript", "xmp", "listing", "plaintext", "center", "bdo" }; for (int x = 0; x < group2.length; x++) allowedTagsVerifiers.put( group2[x], new CoreTagVerifier( group2[x], emptyStringArray, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "blockquote", new CoreTagVerifier( "blockquote", emptyStringArray, new String[] { "cite" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "q", new CoreTagVerifier( "q", emptyStringArray, new String[] { "cite" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "br", new BaseCoreTagVerifier( "br", new String[] { "clear" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "pre", new CoreTagVerifier( "pre", new String[] { "width", "xml:space" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "ins", new CoreTagVerifier( "ins", new String[] { "datetime" }, new String[] { "cite" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "del", new CoreTagVerifier( "del", new String[] { "datetime" }, new String[] { "cite" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "ul", new CoreTagVerifier( "ul", new String[] { "type", "compact" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "ol", new CoreTagVerifier( "ol", new String[] { "type", "compact", "start" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "li", new CoreTagVerifier( "li", new String[] { "type", "value" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "dl", new CoreTagVerifier( "dl", new String[] { "compact" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "dir", new CoreTagVerifier( "dir", new String[] { "compact" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "menu", new CoreTagVerifier( "menu", new String[] { "compact" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "table", new CoreTagVerifier( "table", new String[] { "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "align", "bgcolor" }, emptyStringArray, new String[] { "background" }, emptyStringArray)); allowedTagsVerifiers.put( "thead", new CoreTagVerifier( "thead", new String[] { "align", "char", "charoff", "valign" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "tfoot", new CoreTagVerifier( "tfoot", new String[] { "align", "char", "charoff", "valign" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "tbody", new CoreTagVerifier( "tbody", new String[] { "align", "char", "charoff", "valign" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "colgroup", new CoreTagVerifier( "colgroup", new String[] { "span", "width", "align", "char", "charoff", "valign" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "col", new CoreTagVerifier( "col", new String[] { "span", "width", "align", "char", "charoff", "valign" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "tr", new CoreTagVerifier( "tr", new String[] { "align", "char", "charoff", "valign", "bgcolor" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "th", new CoreTagVerifier( "th", new String[] { "abbr", "axis", "headers", "scope", "rowspan", "colspan", "align", "char", "charoff", "valign", "nowrap", "bgcolor", "width", "height" }, emptyStringArray, new String[] { "background" }, emptyStringArray)); allowedTagsVerifiers.put( "td", new CoreTagVerifier( "td", new String[] { "abbr", "axis", "headers", "scope", "rowspan", "colspan", "align", "char", "charoff", "valign", "nowrap", "bgcolor", "width", "height" }, emptyStringArray, new String[] { "background" }, emptyStringArray)); allowedTagsVerifiers.put( "a", new LinkTagVerifier( "a", new String[] { "accesskey", "tabindex", "name", "shape", "coords", "target" }, emptyStringArray, emptyStringArray, new String[] { "onfocus", "onblur" })); allowedTagsVerifiers.put( "link", new LinkTagVerifier( "link", new String[] { "media", "target" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "base", new BaseHrefTagVerifier( "base", new String[] { "id", "target" }, new String[] { /* explicitly sanitized by class */ })); allowedTagsVerifiers.put( "img", new CoreTagVerifier( "img", new String[] { "alt", "name", "height", "width", "ismap", "align", "border", "hspace", "vspace" }, new String[] { "longdesc", "usemap" }, new String[] { "src" }, emptyStringArray)); // FIXME: object tag - // http://www.w3.org/TR/html4/struct/objects.html#h-13.3 // FIXME: param tag - // http://www.w3.org/TR/html4/struct/objects.html#h-13.3.2 // applet tag PROHIBITED - we do not support applets (FIXME?) allowedTagsVerifiers.put( "map", new CoreTagVerifier( "map", new String[] { "name" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "area", new CoreTagVerifier( "area", new String[] { "accesskey", "tabindex", "shape", "coords", "nohref", "alt", "target" }, new String[] { "href" }, emptyStringArray, new String[] { "onfocus", "onblur" })); allowedTagsVerifiers.put("style", new StyleTagVerifier()); allowedTagsVerifiers.put( "font", new BaseCoreTagVerifier( "font", new String[] { "size", "color", "face" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "basefont", new BaseCoreTagVerifier( "basefont", new String[] { "size", "color", "face" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "hr", new CoreTagVerifier( "hr", new String[] { "align", "noshade", "size", "width" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "frameset", new CoreTagVerifier( "frameset", new String[] { "rows", "cols" }, emptyStringArray, emptyStringArray, new String[] { "onload", "onunload" }, false)); allowedTagsVerifiers.put( "frame", new BaseCoreTagVerifier( "frame", new String[] { "name", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" }, new String[] { "longdesc" }, new String[] { "src" })); allowedTagsVerifiers.put( "iframe", new BaseCoreTagVerifier( "iframe", new String[] { "name", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width" }, new String[] { "longdesc"}, new String[] { "src" })); allowedTagsVerifiers.put( "form", new FormTagVerifier( "form", new String[] { "name" }, // FIXME add a whitelist filter for accept // All other attributes are handled by FormTagVerifier. new String[] { }, new String[] { "onsubmit", "onreset" })); allowedTagsVerifiers.put( "input", new InputTagVerifier( "input", new String[] { "accesskey", "tabindex", "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "alt", "ismap", "accept", "align" }, new String[] { "usemap" }, new String[] { "src" }, new String[] { "onfocus", "onblur", "onselect", "onchange" })); allowedTagsVerifiers.put( "button", new CoreTagVerifier( "button", new String[] { "accesskey", "tabindex", "name", "value", "type", "disabled" }, emptyStringArray, emptyStringArray, new String[] { "onfocus", "onblur" })); allowedTagsVerifiers.put( "select", new CoreTagVerifier( "select", new String[] { "name", "size", "multiple", "disabled", "tabindex" }, emptyStringArray, emptyStringArray, new String[] { "onfocus", "onblur", "onchange" })); allowedTagsVerifiers.put( "optgroup", new CoreTagVerifier( "optgroup", new String[] { "disabled", "label" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "option", new CoreTagVerifier( "option", new String[] { "selected", "disabled", "label", "value" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "textarea", new CoreTagVerifier( "textarea", new String[] { "accesskey", "tabindex", "name", "rows", "cols", "disabled", "readonly" }, emptyStringArray, emptyStringArray, new String[] { "onfocus", "onblur", "onselect", "onchange" })); allowedTagsVerifiers.put( "isindex", new BaseCoreTagVerifier( "isindex", new String[] { "prompt" }, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put( "label", new CoreTagVerifier( "label", new String[] { "for", "accesskey" }, emptyStringArray, emptyStringArray, new String[] { "onfocus", "onblur" })); allowedTagsVerifiers.put( "legend", new CoreTagVerifier( "legend", new String[] { "accesskey", "align" }, emptyStringArray, emptyStringArray, emptyStringArray)); allowedTagsVerifiers.put("script", new ScriptTagVerifier()); } static class TagVerifier { final String tag; final HashSet allowedAttrs; final HashSet uriAttrs; final HashSet inlineURIAttrs; TagVerifier(String tag, String[] allowedAttrs) { this(tag, allowedAttrs, null, null); } TagVerifier(String tag, String[] allowedAttrs, String[] uriAttrs, String[] inlineURIAttrs) { this.tag = tag; this.allowedAttrs = new HashSet(); if (allowedAttrs != null) { for (int x = 0; x < allowedAttrs.length; x++) this.allowedAttrs.add(allowedAttrs[x]); } this.uriAttrs = new HashSet(); if (uriAttrs != null) { for (int x = 0; x < uriAttrs.length; x++) this.uriAttrs.add(uriAttrs[x]); } this.inlineURIAttrs = new HashSet(); if (inlineURIAttrs != null) { for (int x = 0; x < inlineURIAttrs.length; x++) this.inlineURIAttrs.add(inlineURIAttrs[x]); } } ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) throws DataFilterException { Hashtable h = new Hashtable(); boolean equals = false; String prevX = ""; if (t.unparsedAttrs != null) for (int i = 0; i < t.unparsedAttrs.length; i++) { String s = t.unparsedAttrs[i]; if (equals) { equals = false; s = stripQuotes(s); h.remove(prevX); h.put(prevX, s); prevX = ""; } else { int idx = s.indexOf('='); if (idx == s.length() - 1) { equals = true; if (idx == 0) { // prevX already set } else { prevX = s.substring(0, s.length() - 1); prevX = prevX.toLowerCase(); } } else if (idx > -1) { String x = s.substring(0, idx); if (x.length() == 0) x = prevX; x = x.toLowerCase(); String y; if (idx == s.length() - 1) y = ""; else y = s.substring(idx + 1, s.length()); y = stripQuotes(y); h.remove(x); h.put(x, y); prevX = x; } else { h.remove(s); h.put(s, new Object()); prevX = s; } } } h = sanitizeHash(h, t, pc); if (h == null) return null; if (t.startSlash) return new ParsedTag(t, null); String[] outAttrs = new String[h.size()]; int i = 0; for (Enumeration e = h.keys(); e.hasMoreElements();) { String x = (String) e.nextElement(); Object o = h.get(x); String y; if (o instanceof String) y = (String) o; else y = null; StringBuffer out = new StringBuffer(x); if (y != null) out.append( "=\"" ).append( y ).append( '"' ); outAttrs[i++] = out.toString(); } return new ParsedTag(t, outAttrs); } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = new Hashtable(); for (Enumeration e = h.keys(); e.hasMoreElements();) { String x = (String) e.nextElement(); Object o = h.get(x); // Straight attribs if (allowedAttrs.contains(x)) { hn.put(x, o); continue; } if (uriAttrs.contains(x)) { if(logMINOR) Logger.minor(this, "Non-inline URI attribute: "+x); // URI if (o instanceof String) { // Java's URL handling doesn't seem suitable String uri = (String) o; uri = HTMLDecoder.decode(uri); uri = htmlSanitizeURI(uri, null, null, pc.cb, pc, false); if (uri != null) { uri = HTMLEncoder.encode(uri); hn.put(x, uri); } } // FIXME: rewrite absolute URLs, handle ?date= etc } if (inlineURIAttrs.contains(x)) { if(logMINOR) Logger.minor(this, "Inline URI attribute: "+x); // URI if (o instanceof String) { // Java's URL handling doesn't seem suitable String uri = (String) o; uri = HTMLDecoder.decode(uri); uri = htmlSanitizeURI(uri, null, null, pc.cb, pc, true); if (uri != null) { uri = HTMLEncoder.encode(uri); hn.put(x, uri); } } // FIXME: rewrite absolute URLs, handle ?date= etc } } // lang, xml:lang and dir can go on anything // lang or xml:lang = language [ "-" country [ "-" variant ] ] // The variant can be just about anything; no way to test (avian) String s = getHashString(h, "lang"); if (s != null) hn.put("lang", s); s = getHashString(h, "xml:lang"); if (s != null) hn.put("xml:lang", s); s = getHashString(h, "dir"); if ((s != null) && (s.equalsIgnoreCase("ltr") || s.equalsIgnoreCase("rtl"))) hn.put("dir", s); return hn; } } static String stripQuotes(String s) { final String quotes = "\"'"; if (s.length() >= 2) { int n = quotes.length(); for (int x = 0; x < n; x++) { char cc = quotes.charAt(x); if ((s.charAt(0) == cc) && (s.charAt(s.length() - 1) == cc)) { if (s.length() > 2) s = s.substring(1, s.length() - 1); else s = ""; break; } } } return s; } // static String[] titleString = new String[] {"title"}; static abstract class ScriptStyleTagVerifier extends TagVerifier { ScriptStyleTagVerifier( String tag, String[] allowedAttrs, String[] uriAttrs) { super(tag, allowedAttrs, uriAttrs, null); } abstract void setStyle(boolean b, HTMLParseContext pc); abstract boolean getStyle(HTMLParseContext pc); abstract void processStyle(HTMLParseContext pc); Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); if (p.startSlash) { return finish(h, hn, pc); } else { return start(h, hn, pc); } } Hashtable finish( Hashtable h, Hashtable hn, HTMLParseContext pc) throws DataFilterException { if(logDEBUG) Logger.debug(this, "Finishing script/style"); // Finishing setStyle(false, pc); pc.styleScriptRecurseCount--; if (pc.styleScriptRecurseCount < 0) { if (deleteErrors) pc.writeAfterTag.append( "<!-- " + l10n("tooManyNestedStyleOrScriptTags") + " -->"); else throwFilterException(l10n("tooManyNestedStyleOrScriptTagsLong")); return null; } if(!pc.killStyle) { processStyle(pc); pc.writeStyleScriptWithTag = true; } else { pc.killStyle = false; pc.currentStyleScriptChunk = ""; } pc.expectingBadComment = false; // Pass it on, no params for </style> return hn; } Hashtable start(Hashtable h, Hashtable hn, HTMLParseContext pc) throws DataFilterException { if(logDEBUG) Logger.debug(this, "Starting script/style"); pc.styleScriptRecurseCount++; if (pc.styleScriptRecurseCount > 1) { if (deleteErrors) pc.writeAfterTag.append("<!-- " + l10n("tooManyNestedStyleOrScriptTags") + " -->"); else throwFilterException(l10n("tooManyNestedStyleOrScriptTagsLong")); return null; } setStyle(true, pc); String type = getHashString(h, "type"); if (type != null) { if (!type.equalsIgnoreCase("text/css") /* FIXME */ ) { pc.killStyle = true; pc.expectingBadComment = true; return null; // kill the tag } hn.put("type", "text/css"); } return hn; } } static class StyleTagVerifier extends ScriptStyleTagVerifier { StyleTagVerifier() { super( "style", new String[] { "id", "media", "title", "xml:space" }, emptyStringArray); } void setStyle(boolean b, HTMLParseContext pc) { pc.inStyle = b; } boolean getStyle(HTMLParseContext pc) { return pc.inStyle; } void processStyle(HTMLParseContext pc) { try { pc.currentStyleScriptChunk = sanitizeStyle(pc.currentStyleScriptChunk, pc.cb, pc); } catch (DataFilterException e) { Logger.error(this, "Error parsing style: "+e, e); pc.currentStyleScriptChunk = ""; } } } static class ScriptTagVerifier extends ScriptStyleTagVerifier { ScriptTagVerifier() { super( "script", new String[] { "id", "charset", "type", "language", "defer", "xml:space" }, new String[] { "src" }); /* * FIXME: src not supported type ignored (we will need to check * this when if/when we support scripts charset ignored */ } Hashtable sanitizeHash( Hashtable hn, ParsedTag p, HTMLParseContext pc) throws DataFilterException { // Call parent so we swallow the scripting super.sanitizeHash(hn, p, pc); return null; // Lose the tags } void setStyle(boolean b, HTMLParseContext pc) { pc.inScript = b; } boolean getStyle(HTMLParseContext pc) { return pc.inScript; } void processStyle(HTMLParseContext pc) { pc.currentStyleScriptChunk = sanitizeScripting(pc.currentStyleScriptChunk); } } static class BaseCoreTagVerifier extends TagVerifier { BaseCoreTagVerifier( String tag, String[] allowedAttrs, String[] uriAttrs, String[] inlineURIAttrs) { super(tag, allowedAttrs, uriAttrs, inlineURIAttrs); } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); // %i18n dealt with by TagVerifier // %coreattrs String id = getHashString(h, "id"); if (id != null) { hn.put("id", id); // hopefully nobody will be stupid enough to encode URLs into // the unique ID... :) } String classNames = getHashString(h, "class"); if (classNames != null) { hn.put("class", classNames); // ditto } String style = getHashString(h, "style"); if (style != null) { style = sanitizeStyle(style, pc.cb, pc); if (style != null) style = escapeQuotes(style); if (style != null) hn.put("style", style); } String title = getHashString(h, "title"); if (title != null) { // PARANOIA: title is PLAIN TEXT, right? In all user agents? :) hn.put("title", title); } return hn; } } static class CoreTagVerifier extends BaseCoreTagVerifier { final HashSet eventAttrs; static final String[] stdEvents = new String[] { "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmousemove", "onmouseout", "onkeypress", "onkeydown", "onkeyup", "onload", "onfocus", "onblur", "oncontextmenu", "onresize", "onscroll", "onunload", "onmouseenter", "onchange", "onreset", "onselect", "onsubmit", "onerror", }; CoreTagVerifier( String tag, String[] allowedAttrs, String[] uriAttrs, String[] inlineURIAttrs, String[] eventAttrs) { this(tag, allowedAttrs, uriAttrs, inlineURIAttrs, eventAttrs, true); } CoreTagVerifier( String tag, String[] allowedAttrs, String[] uriAttrs, String[] inlineURIAttrs, String[] eventAttrs, boolean addStdEvents) { super(tag, allowedAttrs, uriAttrs, inlineURIAttrs); this.eventAttrs = new HashSet(); if (eventAttrs != null) { for (int x = 0; x < eventAttrs.length; x++) this.eventAttrs.add(eventAttrs[x]); } if (addStdEvents) { for (int x = 0; x < stdEvents.length; x++) this.eventAttrs.add(stdEvents[x]); } } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); // events (default and added) for (Iterator e = eventAttrs.iterator(); e.hasNext();) { String name = (String) e.next(); String arg = getHashString(h, name); if (arg != null) { arg = sanitizeScripting(arg); if (arg != null) hn.put(name, arg); } } return hn; } } static class LinkTagVerifier extends CoreTagVerifier { LinkTagVerifier( String tag, String[] allowedAttrs, String[] uriAttrs, String[] inlineURIAttrs, String[] eventAttrs) { super(tag, allowedAttrs, uriAttrs, inlineURIAttrs, eventAttrs); } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); String hreflang = getHashString(h, "hreflang"); String charset = null; String type = getHashString(h, "type"); if (type != null) { String[] typesplit = splitType(type); type = typesplit[0]; if ((typesplit[1] != null) && (typesplit[1].length() > 0)) charset = typesplit[1]; if(logDEBUG) Logger.debug( this, "Processing link tag, type=" + type + ", charset=" + charset); } String c = getHashString(h, "charset"); if (c != null) charset = c; String href = getHashString(h, "href"); if (href != null) { final String[] rels = new String[] { "rel", "rev" }; for (int x = 0; x < rels.length; x++) { String reltype = rels[x]; String rel = getHashString(h, reltype); if (rel != null) { StringTokenizer tok = new StringTokenizer(rel, " "); while (tok.hasMoreTokens()) { String t = tok.nextToken(); if (t.equalsIgnoreCase("alternate") || t.equalsIgnoreCase("stylesheet")) { // FIXME: hardcoding text/css type = "text/css"; } // FIXME: do we want to do anything with the // other possible rel's? } hn.put(reltype, rel); } } // Core.logger.log(this, "Sanitizing URI: "+href+" with type "+ // type+" and charset "+charset, // Logger.DEBUG); href = HTMLDecoder.decode(href); href = htmlSanitizeURI(href, type, charset, pc.cb, pc, false); if (href != null) { href = HTMLEncoder.encode(href); hn.put("href", href); if (type != null) hn.put("type", type); if (charset != null) hn.put("charset", charset); if ((charset != null) && (hreflang != null)) hn.put("hreflang", hreflang); } } // FIXME: allow these if the charset and encoding are encoded into // the URL // FIXME: link types - // http://www.w3.org/TR/html4/types.html#type-links - the // stylesheet stuff, primarily - rel and rev properties - parse // these, use same fix as above (browser may assume text/css for // anything linked as a stylesheet) return hn; } } // We do not allow forms to act anywhere else than on / static class FormTagVerifier extends CoreTagVerifier{ FormTagVerifier( String tag, String[] allowedAttrs, String[] uriAttrs, String[] eventAttrs) { super(tag, allowedAttrs, uriAttrs, null, eventAttrs); } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); if(p.startSlash) { // Allow, but only with standard elements return hn; } String method = (String) h.get("method"); String action = (String) h.get("action"); String finalAction; try { finalAction = pc.cb.processForm(method, action); } catch (CommentException e) { pc.writeAfterTag.append("<!-- ").append(HTMLEncoder.encode(e.toString())).append(" -->"); return null; } if(finalAction == null) return null; hn.put("method", method); hn.put("action", finalAction); // Force enctype and accept-charset to acceptable values. hn.put("enctype", "multipart/form-data"); hn.put("accept-charset", "UTF-8"); return hn; } } static class InputTagVerifier extends CoreTagVerifier{ final HashSet allowedTypes; String[] types = new String[]{ "text", "password", "checkbox", "radio", "submit", "reset,", // no ! file "hidden", "image", "button" }; InputTagVerifier( String tag, String[] allowedAttrs, String[] uriAttrs, String[] inlineURIAttrs, String[] eventAttrs) { super(tag, allowedAttrs, uriAttrs, inlineURIAttrs, eventAttrs); this.allowedTypes = new HashSet(); if (types != null) { for (int x = 0; x < types.length; x++) this.allowedTypes.add(types[x]); } } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); // We drop the whole <input> if type isn't allowed if(!allowedTypes.contains(hn.get("type"))){ return null; } return hn; } } static class MetaTagVerifier extends TagVerifier { MetaTagVerifier() { super("meta", new String[] { "id" }); } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); /* * Several possibilities: a) meta http-equiv=X content=Y b) meta * name=X content=Y */ String http_equiv = getHashString(h, "http-equiv"); String name = getHashString(h, "name"); String content = getHashString(h, "content"); String scheme = getHashString(h, "scheme"); if(logMINOR) Logger.minor(this, "meta: name="+name+", content="+content+", http-equiv="+http_equiv+", scheme="+scheme); if (content != null) { if ((name != null) && (http_equiv == null)) { if (name.equalsIgnoreCase("Author")) { hn.put("name", name); hn.put("content", content); } else if (name.equalsIgnoreCase("Keywords")) { hn.put("name", name); hn.put("content", content); } else if (name.equalsIgnoreCase("Description")) { hn.put("name", name); hn.put("content", content); } } else if ((http_equiv != null) && (name == null)) { if (http_equiv.equalsIgnoreCase("Expires")) { hn.put("http-equiv", http_equiv); hn.put("content", content); } else if ( http_equiv.equalsIgnoreCase("Content-Script-Type")) { // We don't support script at this time. } else if ( http_equiv.equalsIgnoreCase("Content-Style-Type")) { // FIXME: charsets if (content.equalsIgnoreCase("text/css")) { // FIXME: selectable style languages - only matters // when we have implemented more than one // FIXME: if we ever do allow it... the spec // http://www.w3.org/TR/html4/present/styles.html#h-14.2.1 // says only the last definition counts... // but it only counts if it's in the HEAD section, // so we DONT need to parse the whole doc hn.put("http-equiv", http_equiv); hn.put("content", content); } // FIXME: add some more headers - Dublin Core? } else if (http_equiv.equalsIgnoreCase("Content-Type")) { if(logMINOR) Logger.minor(this, "Found http-equiv content-type="+content); String[] typesplit = splitType(content); if(logDEBUG) { for(int i=0;i<typesplit.length;i++) Logger.debug(this, "["+i+"] = "+typesplit[i]); } if (typesplit[0].equalsIgnoreCase("text/html") && ((typesplit[1] == null) || typesplit[1].equalsIgnoreCase(pc.charset))) { hn.put("http-equiv", http_equiv); hn.put( "content", typesplit[0] + (typesplit[1] != null ? "; charset=" + typesplit[1] : "")); } if(typesplit[1] != null) pc.detectedCharset = typesplit[1].trim(); } else if ( http_equiv.equalsIgnoreCase("Content-Language")) { hn.put("http-equiv", "Content-Language"); hn.put("content", content); } } } return hn; } } static class DocTypeTagVerifier extends TagVerifier { DocTypeTagVerifier(String tag) { super(tag, null); } static final Hashtable DTDs = new Hashtable(); static { DTDs.put( "-//W3C//DTD XHTML 1.0 Strict//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"); DTDs.put( "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"); DTDs.put( "-//W3C//DTD XHTML 1.0 Frameset//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"); DTDs.put( "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"); DTDs.put( "-//W3C//DTD HTML 4.01 Transitional//EN", "http://www.w3.org/TR/html4/loose.dtd"); DTDs.put( "-//W3C//DTD HTML 4.01 Frameset//EN", "http://www.w3.org/TR/html4/frameset.dtd"); DTDs.put("-//W3C//DTD HTML 3.2 Final//EN", new Object()); } ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) { if (!((t.unparsedAttrs.length == 3) || (t.unparsedAttrs.length == 4))) return null; if (!t.unparsedAttrs[0].equalsIgnoreCase("html")) return null; if (!t.unparsedAttrs[1].equalsIgnoreCase("public")) return null; String s = stripQuotes(t.unparsedAttrs[2]); if (!DTDs.containsKey(s)) return null; if (t.unparsedAttrs.length == 4) { String ss = stripQuotes(t.unparsedAttrs[3]); String spec = getHashString(DTDs, s); if ((spec != null) && !spec.equals(ss)) return null; } return t; } } static class XmlTagVerifier extends TagVerifier { XmlTagVerifier() { super("?xml", null); } ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) { if (t.unparsedAttrs.length != 2) return null; if (!t.unparsedAttrs[0].equals("version=\"1.0\"")) return null; if (!t.unparsedAttrs[1].startsWith("encoding=\"") && !t.unparsedAttrs[1].endsWith("\"?")) return null; if (!t .unparsedAttrs[1] .substring(10, t.unparsedAttrs[1].length() - 2) .equalsIgnoreCase(pc.charset)) return null; return t; } } static class HtmlTagVerifier extends TagVerifier { HtmlTagVerifier() { super("html", new String[] { "id", "version" }); } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); String xmlns = getHashString(h, "xmlns"); if ((xmlns != null) && xmlns.equals("http://www.w3.org/1999/xhtml")) hn.put("xmlns", xmlns); return hn; } } static class BaseHrefTagVerifier extends TagVerifier { BaseHrefTagVerifier(String string, String[] strings, String[] strings2) { super(string, strings, strings2, null); } Hashtable sanitizeHash( Hashtable h, ParsedTag p, HTMLParseContext pc) throws DataFilterException { Hashtable hn = super.sanitizeHash(h, p, pc); // Get the already-sanitized version. String baseHref = getHashString(hn, "href"); if(baseHref != null) { String ref = pc.cb.onBaseHref(baseHref); if(ref != null) hn.put("href", ref); } return hn; } } static String sanitizeStyle(String style, FilterCallback cb, HTMLParseContext hpc) throws DataFilterException { if(style == null) return null; if(hpc.noOutput) return null; Reader r = new StringReader(style); Writer w = new StringWriter(); style = style.trim(); if(logMINOR) Logger.minor(HTMLFilter.class, "Sanitizing style: " + style); CSSParser pc = new CSSParser(r, w, false, cb); try { pc.parse(); } catch (IOException e) { Logger.error( HTMLFilter.class, "IOException parsing inline CSS!"); } catch (Error e) { if (e.getMessage().equals("Error: could not match input")) { // this sucks, it should be a proper exception Logger.normal( HTMLFilter.class, "CSS Parse Error!", e); return "/* "+l10n("couldNotParseStyle")+" */"; } else throw e; } String s = w.toString(); if ((s == null) || (s.length() == 0)) return null; // Core.logger.log(SaferFilter.class, "Style now: " + s, Logger.DEBUG); if(logMINOR) Logger.minor(HTMLFilter.class, "Style finally: " + s); return s; } static String escapeQuotes(String s) { StringBuffer buf = new StringBuffer(s.length()); for (int x = 0; x < s.length(); x++) { char c = s.charAt(x); if (c == '\"') { buf.append("&quot;"); } else { buf.append(c); } } return buf.toString(); } static String sanitizeScripting(String script) { // Kill it. At some point we may want to allow certain recipes - FIXME return null; } static String sanitizeURI(String uri, FilterCallback cb, boolean inline) throws CommentException { return sanitizeURI(uri, null, null, cb, inline); } /* * While we're only interested in the type and the charset, the format is a * lot more flexible than that. (avian) TEXT/PLAIN; format=flowed; * charset=US-ASCII IMAGE/JPEG; name=test.jpeg; x-unix-mode=0644 */ static String[] splitType(String type) { StringFieldParser sfp; String charset = null, param, name, value; int x; sfp = new StringFieldParser(type, ';'); type = sfp.nextField().trim(); while (sfp.hasMoreFields()) { param = sfp.nextField(); x = param.indexOf('='); if (x != -1) { name = param.substring(0, x).trim(); value = param.substring(x + 1).trim(); if (name.equals("charset")) charset = value; } } return new String[] { type, charset }; } // A simple string splitter // StringTokenizer doesn't work well for our purpose. (avian) static class StringFieldParser { private String str; private int maxPos, curPos; private char c; public StringFieldParser(String str) { this(str, '\t'); } public StringFieldParser(String str, char c) { this.str = str; this.maxPos = str.length(); this.curPos = 0; this.c = c; } public boolean hasMoreFields() { return curPos <= maxPos; } public String nextField() { int start, end; if (curPos > maxPos) return null; start = curPos; while ((curPos < maxPos) && (str.charAt(curPos) != c)) curPos++; end = curPos; curPos++; return str.substring(start, end); } } static String htmlSanitizeURI( String suri, String overrideType, String overrideCharset, FilterCallback cb, HTMLParseContext pc, boolean inline) { try { return sanitizeURI(suri, overrideType, overrideCharset, cb, inline); } catch (CommentException e) { pc.writeAfterTag.append("<!-- ").append(HTMLEncoder.encode(e.toString())).append(" -->"); return null; } } static String sanitizeURI( String suri, String overrideType, String overrideCharset, FilterCallback cb, boolean inline) throws CommentException { if(logMINOR) Logger.minor(HTMLFilter.class, "Sanitizing URI: "+suri+" ( override type "+overrideType +" override charset "+overrideCharset+" ) inline="+inline, new Exception("debug")); if((overrideCharset != null) && (overrideCharset.length() > 0)) overrideType += "; charset="+overrideCharset; return cb.processURI(suri, overrideType, false, inline); } static String getHashString(Hashtable h, String key) { Object o = h.get(key); if (o == null) return null; if (o instanceof String) return (String) o; else return null; } private static String l10n(String key) { return L10n.getString("HTMLFilter."+key); } private static String l10n(String key, String pattern, String value) { return L10n.getString("HTMLFilter."+key, pattern, value); } }

The table below shows all metrics for HTMLFilter.java.

MetricValueDescription
BLOCKS223.00Number of blocks
BLOCK_COMMENT37.00Number of block comment lines
COMMENTS134.00Comment lines
COMMENT_DENSITY 0.08Comment density
COMPARISONS210.00Number of comparison operators
CYCLOMATIC323.00Cyclomatic complexity
DECL_COMMENTS 7.00Comments in declarations
DOC_COMMENT20.00Number of javadoc comment lines
ELOC1668.00Effective lines of code
EXEC_COMMENTS53.00Comments in executable code
EXITS100.00Procedure exits
FUNCTIONS70.00Number of function declarations
HALSTEAD_DIFFICULTY132.50Halstead difficulty
HALSTEAD_EFFORT 0.00Halstead effort
INTERFACE_COMPLEXITY308.00Interface complexity
JAVA0001 0.00JAVA0001 Package name does not contain only lower case letters
JAVA0002 1.00JAVA0002 Package name does not begin with a top level domain name or country code
JAVA0003 0.00JAVA0003 Minimize use of on-demand (.*) imports
JAVA0004 0.00JAVA0004 Unnecessary import from java.lang
JAVA0005 0.00JAVA0005 Imports not in specified order
JAVA0006 0.00JAVA0006 Empty finally block
JAVA0007 0.00JAVA0007 Should not declare public field
JAVA0008 0.00JAVA0008 Empty catch block
JAVA0009 0.00JAVA0009 Protected member in final class
JAVA0010 0.00JAVA0010 Non-instantiable class does not contain a non-private static member
JAVA0011 0.00JAVA0011 Abstract class does not contain an abstract method
JAVA0012 0.00JAVA0012 Non-constructor method with same name as declaring class
JAVA0013 0.00JAVA0013 Non-blank final field is not static
JAVA0014 0.00JAVA0014 Class with only static members has non-private constructor
JAVA0015 0.00JAVA0015 Package class contains public nested type
JAVA0016 0.00JAVA0016 Abstract class contains public constructor
JAVA0017 0.00JAVA0017 Class name does not have required form
JAVA0018 0.00JAVA0018 Method name does not have required form
JAVA0019 0.00JAVA0019 Interface name does not have required form
JAVA0020 0.00JAVA0020 Field name does not have required form
JAVA0021 0.00JAVA0021 Interface method name does not have required form
JAVA0022 0.00JAVA0022 Static final field name does not have required form
JAVA0023 0.00JAVA0023 Empty finalize method
JAVA0024 0.00JAVA0024 Empty class
JAVA0025 0.00JAVA0025 Method override is empty
JAVA0026 0.00JAVA0026 Finalize method with parameters
JAVA0029 0.00JAVA0029 Private method not used
JAVA0030 0.00JAVA0030 Private field not used
JAVA0031 1.00JAVA0031 Case statement not properly closed
JAVA0032 1.00JAVA0032 Switch statement missing default
JAVA0033 0.00JAVA0033 default: not last case in switch statement
JAVA003493.00JAVA0034 Missing braces in if statement
JAVA003511.00JAVA0035 Missing braces in for statement
JAVA0036 1.00JAVA0036 Missing braces in while statement
JAVA0038 0.00JAVA0038 Non-case label in switch statement
JAVA0039 0.00JAVA0039 Break statement with label
JAVA0040 0.00JAVA0040 Switch statement contains N cases (maximum: M)
JAVA0041 0.00JAVA0041 Nested synchronized block
JAVA0042 0.00JAVA0042 Empty synchronized statement
JAVA0043 0.00JAVA0043 Inner class does not use outer class
JAVA0044 0.00JAVA0044 Serializable class with no instance variables
JAVA0045 0.00JAVA0045 Serializable class with only transient fields
JAVA0046 0.00JAVA0046 Name of class not derived from Exception ends with 'Exception'
JAVA0047 0.00JAVA0047 Serializable class derives from invalid base class
JAVA0048 0.00JAVA0048 Name of class derived from Exception does not end with 'Exception'
JAVA0049 1.00JAVA0049 Nested block at depth N (maximum: M)
JAVA0050 0.00JAVA0050 Class derives from java.lang.Error
JAVA0051 0.00JAVA0051 Class derives from java.lang.RuntimeException
JAVA0052 0.00JAVA0052 Class derives from java.lang.Throwable
JAVA0053 0.00JAVA0053 Unused label
JAVA0054 3.00JAVA0054 Inheritance depth N exceeds maximum M
JAVA0055 0.00JAVA0055 Class should be interface
JAVA0056 0.00JAVA0056 Unnecessary abstract modifier for interface or annotation
JAVA0057 0.00JAVA0057 Unnecessary default constructor
JAVA0058 0.00JAVA0058 Constructor calls super()
JAVA0059 0.00JAVA0059 Method override only calls super()
JAVA0061 0.00JAVA0061 Inaccessible member in anonymous class
JAVA0062 0.00JAVA0062 Public class missing public member or protected constructor
JAVA0063 0.00JAVA0063 Identifier name should not contain '$'
JAVA0064 0.00JAVA0064 N variations of identifier name (maximum: M)
JAVA0065 0.00JAVA0065 Unnecessary final modifier for method in final class
JAVA0066 0.00JAVA0066 Unnecessary modifier for interface nested type
JAVA0067 0.00JAVA0067 Array descriptor on identifier name
JAVA0068 1.00JAVA0068 Modifiers not declared in recommended order
JAVA0071 0.00JAVA0071 Strings compared with ==
JAVA0073 0.00JAVA0073 Integer division in floating-point context
JAVA0074 0.00JAVA0074 Use of Object.notify()
JAVA0075 0.00JAVA0075 Method parameter hides field
JAVA0076 8.00JAVA0076 Use of magic number
JAVA0077 1.00JAVA0077 Private field not used in declaring class
JAVA0078 0.00JAVA0078 Floating point values compared with ==
JAVA0079 0.00JAVA0079 Use of instance to reference static member
JAVA0080 0.00JAVA0080 Import declaration not used
JAVA0081 0.00JAVA0081 Boolean literal in comparison
JAVA0082 0.00JAVA0082 Unnecessary widening cast
JAVA0083 0.00JAVA0083 Unnecessary instanceof test
JAVA0084 0.00JAVA0084 Should use compound assignment operator
JAVA0085 0.00JAVA0085 Use of sun.* class
JAVA0087 0.00JAVA0087 Use of Thread.sleep()
JAVA0089 0.00JAVA0089 Use of restricted package
JAVA0092 0.00JAVA0092 Use of restricted type
JAVA0093 0.00JAVA0093 Redundant assignment
JAVA0094 0.00JAVA0094 Field hides a superclass field
JAVA0095 0.00JAVA0095 Uninitialized private field
JAVA0096 0.00JAVA0096 Field in nested class hides outer field
JAVA0098 0.00JAVA0098 Minimize use of implicit field initializers
JAVA0100 1.00JAVA0100 Class contains N non-final fields (maximum: M)
JAVA0101 0.00JAVA0101 Unnecessary modifier for field in interface
JAVA0102 0.00JAVA0102 Last statement in finalize() not super.finalize()
JAVA0103 0.00JAVA0103 Explicit call to finalize()
JAVA0104 0.00JAVA0104 finalize() only calls super.finalize()
JAVA0105 0.00JAVA0105 Duplicate import declaration
JAVA0106 0.00JAVA0106 Unnecessary import from current package
JAVA0108 0.00JAVA0108 Incorrect javadoc: no @param tag for 'parameter'
JAVA0109 0.00JAVA0109 Incorrect javadoc: no parameter 'parameter'
JAVA0110 0.00JAVA0110 Incorrect javadoc: no @return tag
JAVA0111 0.00JAVA0111 Incorrect javadoc: @return tag for void method
JAVA0112 0.00JAVA0112 Incorrect javadoc: no exception 'exception' in throws
JAVA0113 0.00JAVA0113 Incorrect javadoc: no @author tag
JAVA0114 0.00JAVA0114 Incorrect javadoc: no @version tag
JAVA0115 0.00JAVA0115 Incorrect javadoc: no @throws or @exception tag for 'exception'
JAVA0116 0.00JAVA0116 Missing javadoc: field 'field'
JAVA0117 8.00JAVA0117 Missing javadoc: method 'method'
JAVA0118 1.00JAVA0118 Missing javadoc: type 'type'
JAVA0119 0.00JAVA0119 Control variable changed within body of for loop
JAVA0123 2.00JAVA0123 Use all three components of for loop
JAVA0125 0.00JAVA0125 Continue statement with label
JAVA0126 0.00JAVA0126 Method declares unchecked exception in throws
JAVA0128 4.00JAVA0128 Public constructor in non-public class
JAVA0130 0.00JAVA0130 Non-static method does not use instance fields
JAVA0131 0.00JAVA0131 Compatible method does not override base
JAVA0132 0.00JAVA0132 Method overload with compatible signature
JAVA0133 0.00JAVA0133 Non-synchronized method overrides synchronized method
JAVA0135 0.00JAVA0135 Only one of Object.equals and Object.hashCode defined: missing 'method'
JAVA0136 0.00JAVA0136 N methods defined in class (maximum: M)
JAVA0137 0.00JAVA0137 Non-abstract class missing constructor
JAVA0138 2.00JAVA0138 N parameters defined for method (maximum: M)
JAVA0139 0.00JAVA0139 Definition of main other than public static void main(java.lang.String[])
JAVA0141 0.00JAVA0141 Unnecessary modifier for method in interface
JAVA0143 0.00JAVA0143 Synchronized method
JAVA0144 6.00JAVA0144 Line exceeds maximum M characters
JAVA01457433.00JAVA0145 Tab character used in source file
JAVA0150 1.00JAVA0150 java.lang.Error (or subclass) thrown
JAVA0153 0.00JAVA0153 Inefficient conversion of integer to string
JAVA0159 0.00JAVA0159 Inefficient conversion of string to integer
JAVA0160 2.00JAVA0160 Method does not throw specified exception
JAVA0161 0.00JAVA0161 Conditional wait() not in loop
JAVA0163 0.00JAVA0163 Empty statement
JAVA0165 0.00JAVA0165 Conflicting return statement in finally block
JAVA0166 3.00JAVA0166 Generic exception caught
JAVA0167 0.00JAVA0167 ThreadDeath not rethrown
JAVA0169 0.00JAVA0169 Unnecessary catch block: exception 'exception'
JAVA0170 3.00JAVA0170 Caught exception not derived from java.lang.Exception
JAVA0171 0.00JAVA0171 Unused local variable
JAVA0173 3.00JAVA0173 Unused method parameter
JAVA0174 0.00JAVA0174 Assigned local variable never used
JAVA0175 0.00JAVA0175 Successive assignment to variable
JAVA0176 0.00JAVA0176 Local variable name does not have required form
JAVA017712.00JAVA0177 Variable declaration missing initializer
JAVA0179 0.00JAVA0179 Local variable hides visible field
JAVA0233 0.00JAVA0233 Definition of serialVersionUID other than 'private static final long serialVersionUID'
JAVA0234 0.00JAVA0234 Class is Serializable but does not define serialVersionUID
JAVA0235 0.00JAVA0235 Class defines serialVersionUID but does not implement Serializable
JAVA0236 0.00JAVA0236 Attempt to clone an object which does not implement Cloneable
JAVA0237 0.00JAVA0237 Class implements Cloneable but does not have public clone method
JAVA0238 0.00JAVA0238 Clone method does not call super.clone()
JAVA0239 0.00JAVA0239 Class declares 'readObject' or 'writeObject' but does not implement Serializable
JAVA0240 0.00JAVA0240 Serializable class which declares readObject or writeObject but not both
JAVA0241 0.00JAVA0241 'readObject' or 'writeObject' should be declared private in Serializable class
JAVA0242 0.00JAVA0242 Transient field in non-Serializable class
JAVA0243 0.00JAVA0243 'readResolve' or 'writeReplace' should be declared private or protected
JAVA0244 0.00JAVA0244 Field or method name in subclass differs only by case from inherited field or method
JAVA0245 0.00JAVA0245 JUnit TestCase with non-trivial constructor
JAVA0246 0.00JAVA0246 JUnit assertXXX statement missing message parameter
JAVA0247 0.00JAVA0247 JUnit 'setUp()' and 'tearDown()' should call super method
JAVA0248 0.00JAVA0248 JUnit method 'setUp' or 'tearDown' with incorrect signature
JAVA0249 0.00JAVA0249 JUnit TestCase 'suite()' should be declared static
JAVA0250 0.00JAVA0250 JUnit TestCase declares testXXX method with incorrect signature
JAVA0251 0.00JAVA0251 Use '%n' for line breaks in printf/format for platform independence
JAVA0252 0.00JAVA0252 'enum' is a Java 1.5 reserved word
JAVA0253 0.00JAVA0253 Not all enum constants consumed in switch statement
JAVA0254 1.00JAVA0254 Use enhanced for loop construct instead of Iterator
JAVA0255 0.00JAVA0255 Result of method invocation not used
JAVA0256 1.00JAVA0256 Assignment of external collection/array to field
JAVA0257 0.00JAVA0257 Use of 'Constant Interface' anti-pattern
JAVA0258 0.00JAVA0258 Implement Iterable for foreach compatibility
JAVA0259 0.00JAVA0259 Return of collection/array field
JAVA0260 0.00JAVA0260 Use 'enum' instead of Enumerated Type pattern
JAVA0261 0.00JAVA0261 Use specialized Enum collection types
JAVA0262 0.00JAVA0262 Use of char in integer context
JAVA0263 0.00JAVA0263 Long literal ends with 'l' instead of 'L'
JAVA0264 0.00JAVA0264 Integer math in long context - check for overflow
JAVA0265 0.00JAVA0265 Use of Throwable.printStackTrace()
JAVA0266 0.00JAVA0266 Use of System.out
JAVA0267 0.00JAVA0267 Use of System.err
JAVA0269 0.00JAVA0269 Contents of StringBuffer never used
JAVA0270 9.00JAVA0270 Use Java 5.0 enhanced for loop construct to iterate over all elements in an array
JAVA0271 0.00JAVA0271 Minimize use of on-demand (.*) static imports
JAVA0272 0.00JAVA0272 Thread.run() called
JAVA0273 0.00JAVA0273 Non-final derivative of Thread calls start() in constructor
JAVA0274 0.00JAVA0274 Serializable class has a synchronized readObject()
JAVA0275 0.00JAVA0275 Serializable class has a synchronized writeObject() and no other synchronized methods
JAVA0276 0.00JAVA0276 Unnecessary use of String constructor
JAVA0277 0.00JAVA0277 Iterator.next() implementation does not throw NoSuchElementException
JAVA0278 0.00JAVA0278 Unnecessary use of Boolean constructor
JAVA0279 0.00JAVA0279 Serialization method readObject or readObjectNoData calls an overridable method
JAVA0280 0.00JAVA0280 IllegalMonitorStateException caught
JAVA0281 0.00JAVA0281 Iterator.next() not called in loop
JAVA0282 0.00JAVA0282 Call to Iterator.next() in loop which does not test Iterator.hasNext()
JAVA0283 0.00JAVA0283 Control variable not updated in loop body
JAVA0284 0.00JAVA0284 Explicit garbage collection
JAVA0285 0.00JAVA0285 Dereference of potentially null variable
JAVA0286 0.00JAVA0286 Dereference of null variable
JAVA0287 0.00JAVA0287 Unnecessary null check
JAVA0288 0.00JAVA0288 Inconsistent null check
LINES2094.00Number of lines in the source file
LINE_COMMENT77.00Number of line comments
LOC1852.00Lines of code
LOGICAL_LINES726.00Number of statements
LOOPS26.00Number of loops
NEST_DEPTH 6.00Maximum nesting depth
OPERANDS3853.00Number of operands
OPERATORS6787.00Number of operators
PARAMS163.00Number of formal parameter declarations
PROGRAM_LENGTH10640.00Halstead program length
PROGRAM_VOCAB979.00Halstead program vocabulary
PROGRAM_VOLUME 0.00Halstead program volume
RETURNS145.00Number of return points from functions
SIZE56288.00Size of the file in bytes
UNIQUE_OPERANDS916.00Number of unique operands
UNIQUE_OPERATORS63.00Number of unique operators
WHITESPACE108.00Number of whitespace lines