Java's regular expression facilities are wide ranging and powerful which can lead to unwanted modification of the original regular expression string to form a pattern that matches too widely, possibly resulting in far too much information being matched. The primary means of preventing this vulnerability is to sanitize a regular expression string coming from untrusted input. Additionally, the programmer should look into ways of avoiding using regular expressions from untrusted input, or perhaps provide only a very limited subset of regular expression functionality to the user.
Noncompliant Code Example
Code Block |
---|
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/* Usage Test1 <regex>
 * Regex is used directly without santization causing sensitive data to be exposed
 *
 * Imagine this program searches a database of users for usernames that match a regex
 * Non malicious usage: Test1 John.*
 * Malicious usage: (?s)John.*
 */
public class Test1
{
   public static void main(String[] args)
   {
      if (args.length < 1) {
         System.err.println("Failed to specify a regex");
         return;
      }
      String sensitiveData; //represents sensitive data from a file or something
      int flags;
      String regex;
      Pattern p;
      Matcher m;
      //imagine a CSV style database: user,password
      sensitiveData = "JohnPaul,HearsGodsVoice\nJohnJackson,OlympicBobsleder\nJohnMayer,MakesBadMusic\n";
      regex = args[0];
      //regex = "(?s)John.*";
      flags = 0;
      regex += ","; //supposedly this forces the regex to only match names
      System.out.println("Pattern: \'" + regex + "\'");
      p = Pattern.compile(regex, flags);
      m = p.matcher(sensitiveData);
      while (m.find())
         System.out.println("Found \'" + m.group() + "\'");
      System.err.println("DONE");
   }
}
|