Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: grammar and explanation

...

  • match flags used in non-capturing groups (These override matching options that may or may not have been passed into the compile() method.)
  • greediness
  • grouping

Since Java regular expressions are similar to Perl, it is a good idea to apply lessons learned from Perl regex.

Noncompliant Code Example

This program searches a log file of previous searches for keywords that match a regular expression to present search suggestions to the user.

This class does not sanitize the incoming regular expression, and as a result, exposes too much information to the user.This program searches a database of users for searches that match a regular expressions to present search suggestions to from the log file to the user.

No Format
 A non-malicious use would be to enter "C" to match Charles and Cecilia.
 A malicious use would be to enter "?:)(^C.*,[0-9]+?,[0-9]+?$)|(?:" which
 grabs the IPs that made the search.

The outer parentheses of the malicious search string defeat the grouping protection. Using the OR operator allows injection of any arbitrary regex. Now this use will reveal all times and IPs the keyword 'BonoC' was searched.

Code Block
bgColor#FFCCCC
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ExploitableLog
{
    /* Say this logfile contains:
     * CSV style: search string, time (unix), ip (integer)
     *
     * Alice,1267773881,2147651708
     * Bono,1267774881,2147651708
     * Charles,1267775881,1175563058
     * Cecilia,1267773222,291232332
     *
     * and the CSVLog class has a readLine() method which retrieves a single
     * line from the CSVLog and returns null when at EOF
     */
    private CSVLog logfile;
    
    /* an application repeatedly calls this function that searches through the
     * search log for search suggestions for autocompletion
     */
    public Set<String> suggestSearches(String search)
    {
        Set<String> searches = new HashSet<String>();
        
        /* Construct regex from user string */
        //Regex matches full valid log lines. The grouping characters will limit
        //the returned string to only the keyword.
        String regex = "^(" + search + ".*),[0-9]+?,[0-9]+?$";
        Pattern p = Pattern.compile(regex);
        
        /* Read from log and match regex */
        String s;
        while ((s = logfile.readLine()) != null) { //gets a single line from the logfile
            Matcher m = p.matcher(s);
            if (m.find()) {
                String found = m.group(1);
                searches.add(found);
            }
        }
        
        return searches;
    }
    
    public ExploitableLog()
    {
        logfile = new CSVLog();
    }
    
    public class CSVLog
    {
        //this is supposed to come from a file, but its here as a string for
        //illustrative purposes
        public static final String logString =
            "Alice,1267773881,2147651408\n" +
            "Bono,1267774881,2147351708\n" +
            "Charles,1267775881,1175523058\n" +
            "Cecilia,1267773222,291232332\n";
        
        private String[] log;
        
        private int index;
        
        public CSVLog()
        {
            index = 0;
            log = logString.split("\n");
        }
        
        public String readLine()
        {
            if (index < log.length)
                return log[index++];
            return null;
        }
    }
}

...

Solutions include parsing the CSV into a class prior to matching or whitelisting only certain characters (such as letters and digits). Blacklisting might be difficult due to the variability of the regex language.

This solution filters out non-alphanumeric characters from the search string using Java's Character.isLetterOrDigit(). This removes the grouping parentheses and the OR operator which triggers the injection.

Code Block
bgColor#ccccff
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class FilteredLog
{
    /* Say this logfile contains:
     * CSV style: search string, time (unix), ip (integer)
     *
     * Alice,1267773881,2147651708
     * Bono,1267774881,2147651708
     * Charles,1267775881,1175563058
     * Cecilia,1267773222,291232332
     *
     * and the CSVLog class has a readLine() method which retrieves a single
     * line from the CSVLog and returns null when at EOF
     */
    private CSVLog logfile;
    
    /* an application repeatedly calls this function that searches through the
     * search log for search suggestions for autocompletion
     */
    public Set<String> suggestSearches(String search)
    {
        Set<String> searches = new HashSet<String>();
        
        /* Filter user input */
        StringBuilder sb = new StringBuilder(search.length());
        for (int i = 0; i < search.length(); ++i) {
            char ch = search.charAt(i);
            if (Character.isLetterOrDigit(ch))
                sb.append(ch);
        }
        search = sb.toString();
        
        /* Construct regex from user string */
        //Regex matches full valid log lines. The grouping characters will limit
        //the returned string to only the keyword.
        String regex = "^(" + search + ".*),[0-9]+?,[0-9]+?$";
        Pattern p = Pattern.compile(regex);
        
        /* Read from log and match regex */
        String s;
        while ((s = logfile.readLine()) != null) {
            Matcher m = p.matcher(s);
            if (m.find()) {
                String found = m.group(1);
                searches.add(found);
            }
        }
        
        return searches;
    }
    
    public FilteredLog()
    {
        logfile = new CSVLog();
    }
    
    public class CSVLog
    {
        //this is supposed to come from a file, but its here as a string for
        //illustrative purposes
        public static final String logString =
            "Alice,1267773881,2147651408\n" +
            "Bono,1267774881,2147351708\n" +
            "Charles,1267775881,1175523058\n" +
            "Cecilia,1267773222,291232332\n";
        
        private String[] log;
        
        private int index;
        
        public CSVLog()
        {
            index = 0;
            log = logString.split("\n");
        }
        
        public String readLine()
        {
            if (index < log.length)
                return log[index++];
            return null;
        }
    }
}

...