routie dev init since i didn't adhere to any proper guidance up until now
This commit is contained in:
+21
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2020 Michael Schmidt
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
+54
@@ -0,0 +1,54 @@
|
||||
# Single-character super-linear RegExps
|
||||
|
||||
<sup><sub>what a name...</sub></sup>
|
||||
|
||||
[](https://github.com/RunDevelopment/scslre/actions)
|
||||
[](https://www.npmjs.com/package/scslre)
|
||||
|
||||
A library to find JS RegExp with super-linear worst-case time complexity for attack strings that repeat a single character.
|
||||
|
||||
The static analysis method implemented by this library focuses on finding attack string tuples where a single character is repeated. This major limitation allows the library to be fast while also offering decent support for backreferences and [assertions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Assertions).
|
||||
|
||||
This library is not intended as a full static analysis to guard against super-linear worst-case time complexity. It is meant to be as a supplementary analysis on top of existing general analysis methods that don't (or don't fully) support advanced regex features, or as a lightweight analysis on top of existing full (but heavyweight) analysis methods. Libraries that provide such general or near-full analysis are known as [recheck](https://github.com/MakeNowJust-Labo/recheck) and [vuln-regex-detector](https://github.com/davisjam/vuln-regex-detector). You may consider using these libraries as well.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
This library exports only a single function, `analyse`, which takes a RegExp literal and returns a list of reports that show the quantifiers causing super-linear worst-case time complexity.
|
||||
|
||||
### Documentation
|
||||
|
||||
For more information on the exact inputs and outputs of each function, see the full API documentation.
|
||||
|
||||
- [Latest release](https://rundevelopment.github.io/scslre/docs/latest/)
|
||||
- [Development](https://rundevelopment.github.io/scslre/docs/dev/)
|
||||
|
||||
|
||||
## Limitations
|
||||
|
||||
### Analysis
|
||||
|
||||
This library is implemented using a very limited static analysis method that can only find attack strings where a single character is repeated. Attack strings are generated from a tuple _(x,y,z)_ such that every string _s = xy<sup>n</sup>z_ (or `x + y.repeat(n) + z` for JS folks) takes _O(n<sup>p</sup>)_ or _O(2<sup>n</sup>)_ many steps to reject, p>1. This analysis method can only find tuples where _y_ is a single character. E.g. the polynomial backtracking in `/^(ab)*(ab)*$/` for _(x,y,z) = ("", "ab", "c")_ cannot be detected by this library because _y_ is not a single character.
|
||||
|
||||
However, this limitation allows the static analysis method to be quick and to provide good (but not perfect) support for backreferences and assertions (e.g. `\b`, `(?<!ba+)`).
|
||||
|
||||
### False negatives
|
||||
|
||||
The analysis method primarily searches for polynomial backtracking. Finds of exponential backtracking are only a byproduct. Because of this, not all causes of super-linear worst-case time complexity are found.
|
||||
|
||||
### False positives
|
||||
|
||||
This library doesn't actually search for the whole tuple _(x,y,z)_; it only searches for _y_ and assumes that adequate values for _x_ and _z_ can be found. A single-character approximation of the suffix _z_ will be computed and accounted for but false positives are still possible.
|
||||
|
||||
|
||||
## Reports
|
||||
|
||||
There are 3 different types of reports that each indicate a different type of cause for the super-linear worst-case time complexity. All are explained in the documentation of their types.
|
||||
|
||||
### Exponential backtracking
|
||||
|
||||
While most reports show polynomial backtracking, some report exponential backtracking. Exponential backtracking is a lot more dangerous and can easily be exploited for [ReDoS attacks](https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS).
|
||||
|
||||
While other reports may be dismissed, __all reports of exponential backtracking must be fixed__.
|
||||
|
||||
All reports with `exponential: true` report exponential backtracking.
|
||||
+247
@@ -0,0 +1,247 @@
|
||||
// Generated by dts-bundle v0.7.3
|
||||
// Dependencies for this module:
|
||||
// ../refa
|
||||
// ../@eslint-community/regexpp
|
||||
|
||||
declare module "scslre" {
|
||||
import { CharSet } from "refa";
|
||||
import { AST } from "@eslint-community/regexpp";
|
||||
export interface AnalysisResult {
|
||||
/**
|
||||
* The parse AST of the analysed literal.
|
||||
*/
|
||||
parsed: ParsedLiteral;
|
||||
/**
|
||||
* The analysed literal.
|
||||
*/
|
||||
literal: Literal;
|
||||
/**
|
||||
* A list of all reports found under the constraints of the given analysis options.
|
||||
*/
|
||||
reports: Report[];
|
||||
}
|
||||
export interface ReportBase {
|
||||
type: Report["type"];
|
||||
/**
|
||||
* The character to be repeated in order to create an input for which the analysed literal will have super-linear
|
||||
* runtime behavior.
|
||||
*/
|
||||
character: {
|
||||
/**
|
||||
* A non-empty set of characters that can be repeated to cause super-linear runtime.
|
||||
*
|
||||
* CharSet is a class from the [refa](https://github.com/RunDevelopment/refa) library.
|
||||
*/
|
||||
set: CharSet;
|
||||
/**
|
||||
* A single character that can be repeated to cause super-linear runtime.
|
||||
*
|
||||
* The implementation is allowed to pick any character in `set` but makes a best effort to pick a
|
||||
* "humanly readable" character.
|
||||
*/
|
||||
pick: string;
|
||||
/**
|
||||
* A literal that represents `set`.
|
||||
*
|
||||
* E.g. if `set` only contained the character "a" (lower case A), then the literal may be `/a/`.
|
||||
*/
|
||||
literal: Literal;
|
||||
};
|
||||
/**
|
||||
* Returns a new literal with this cause of super-linear runtime being fixed. If the cause of this report could not
|
||||
* be automatically fixed, `undefined` will be returned.
|
||||
*
|
||||
* A fixed literal is guaranteed to behave exactly the same as the analysed literal.
|
||||
*/
|
||||
fix(): Literal | undefined;
|
||||
/**
|
||||
* Whether the polynomial backtracking of this report causes exponential backtracking.
|
||||
*/
|
||||
exponential: boolean;
|
||||
}
|
||||
/**
|
||||
* This report indicates super-linear runtime caused by polynomial backtracking between two distinct quantifiers.
|
||||
*
|
||||
* ### Examples
|
||||
*
|
||||
* `/a+a+/`, `/\d*\w+/`, `/a*(?:a{2}d?|cd?)b?a+/`, `/(?:a+ba+){2}/`, `(?:a|ba+)+`
|
||||
*
|
||||
* ### Description
|
||||
*
|
||||
* This type of super-linear runtime is caused by the polynomial backtracking between two unbounded quantifiers.
|
||||
*
|
||||
* #### Start and end quantifiers
|
||||
*
|
||||
* While the start and end quantifiers are guaranteed to be distinct unbounded quantifiers, one may be parent
|
||||
* (or ancestor) of the other (e.g. `/(?:a|ba+)+/`). The matching direction of the quantifiers may also be different
|
||||
* (e.g. `/a+(?<!a*b)/`).
|
||||
*
|
||||
* ### Notes
|
||||
*
|
||||
* This type is called "trade" because polynomial backtracking between two quantifiers looks like the two quantifiers
|
||||
* are exchanging characters, a trade of sorts.
|
||||
*/
|
||||
export interface TradeReport extends ReportBase {
|
||||
type: "Trade";
|
||||
startQuant: AST.Quantifier;
|
||||
endQuant: AST.Quantifier;
|
||||
}
|
||||
/**
|
||||
* This report indicates super-linear runtime cause by polynomial backtracking of a quantifier with itself.
|
||||
*
|
||||
* ### Examples
|
||||
*
|
||||
* `(?:a+){2}`, `(?:a+)+`
|
||||
*
|
||||
* ### Description
|
||||
*
|
||||
* This type of super-linear runtime is the special case of the trade type ([[`TradeReport`]]) where a quantifier trades characters with
|
||||
* itself. As this requires some form of repetition of the quantifier, the self quantifier is always nested within a
|
||||
* parent quantifier. The maximum of the parent quantifier determines the degree of polynomial backtracking (e.g.
|
||||
* `/(a+){0,3}/` backtracks in _O(n^3)_ and `/(a+)+/` backtracks in _O(2^n)_).
|
||||
*
|
||||
* ### Fixing
|
||||
*
|
||||
* To fix these reports, quantifier must be prevent from reaching itself. This can be accomplished by e.g. removing the
|
||||
* quantifier (e.g. `/(?:a+)+/` => `/(?:a)+/`), using assertions (e.g. `/(a+|b){0,3}/` => `/(a+(?!a)|b){0,3}/`), or
|
||||
* rewriting the affected parts of the pattern. Reports of simple cases usually have a fix for you.
|
||||
*/
|
||||
export interface SelfReport extends ReportBase {
|
||||
type: "Self";
|
||||
/**
|
||||
* An unbounded quantifier that can reach itself.
|
||||
*/
|
||||
quant: AST.Quantifier;
|
||||
/**
|
||||
* A parent quantifier of [[`quant`]].
|
||||
*
|
||||
* The maximum of this quantifier is at least 2.
|
||||
*
|
||||
* This is guaranteed to be not the same quantifier as [[`quant`]].
|
||||
*/
|
||||
parentQuant: AST.Quantifier;
|
||||
}
|
||||
/**
|
||||
* This report indicates super-linear runtime cause by the matching algorithm moving the regexes across the input
|
||||
* string.
|
||||
*
|
||||
* ### Examples
|
||||
*
|
||||
* `/a+b/`
|
||||
*
|
||||
* ### Description
|
||||
*
|
||||
* This type of super-linear runtime is not caused by backtracking but by the matching algorithm itself. While the
|
||||
* regex engine will try to optimize as much as possible, in some cases, it will be forced to match a pattern against
|
||||
* every suffix of the given input string according the
|
||||
* [ECMAScript specification](https://tc39.es/ecma262/#sec-regexpbuiltinexec). Because there are _n_ many suffixes for
|
||||
* a rejecting input string with length _n_, the total runtime will be the time it takes to reject every suffix times
|
||||
* _n_. For non-finite languages, even a DFA (that guarantees _O(n)_ __for every suffix__) might have a total worst-case
|
||||
* time complexity of _O(n^2)_.
|
||||
*
|
||||
* ### Fixing
|
||||
*
|
||||
* This type of super-linear runtime is the hardest to fix (if at all possible) because the fixed regex has to reject
|
||||
* all suffixes with an average worst-case time complexity of _O(1)_.
|
||||
*
|
||||
* ### Notes
|
||||
*
|
||||
* Literals with the
|
||||
* [sticky flag](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky)
|
||||
* (e.g. `/a+b/y`) and anchored literals (e.g. `/^a+b/` and `/\ba+b/` but not `/^\s+b/m` and `/\Ba+b/`) are immune to
|
||||
* this type of super-linear runtime.
|
||||
*
|
||||
* This type can never cause exponential backtracking.
|
||||
*/
|
||||
export interface MoveReport extends ReportBase {
|
||||
type: "Move";
|
||||
/**
|
||||
* The unbounded quantifier that caused this report.
|
||||
*/
|
||||
quant: AST.Quantifier;
|
||||
/**
|
||||
* This type can never cause exponential backtracking.
|
||||
*/
|
||||
exponential: false;
|
||||
}
|
||||
export type Report = TradeReport | MoveReport | SelfReport;
|
||||
/**
|
||||
* A light-weight representation of JS RegExp literal.
|
||||
*
|
||||
* Only the `source` and `flags` properties have to be given. `source` and `flags` are required to be syntactically
|
||||
* valid.
|
||||
*
|
||||
* Literals are only guaranteed to be compatible with the `RegExp` constructor. The `source` may contain line breaks or
|
||||
* unescaped `/` characters. To convert a literal to a valid RegExp literal, use:
|
||||
*
|
||||
* ```js
|
||||
* RegExp(literal.source, literal.flags).toString()
|
||||
* ```
|
||||
*
|
||||
* _Note:_ A [bug](https://bugs.chromium.org/p/v8/issues/detail?id=9618) in v8's `RegExp.properties.toString`
|
||||
* implementation caused some line breaks to not be escaped in older versions of NodeJS. You can use
|
||||
* [this workaround](https://github.com/terser/terser/pull/425/files#diff-9aa82f0ed674e050695a7422b1cd56d43ce47e6953688a16a003bf49c3481622R216)
|
||||
* to correct invalid RegExp literals.
|
||||
*/
|
||||
export interface Literal {
|
||||
source: string;
|
||||
flags: string;
|
||||
}
|
||||
/**
|
||||
* A representation of a parsed `RegExp` instance.
|
||||
*
|
||||
* This library uses [regexpp](https://github.com/mysticatea/regexpp) to parse JS RegExps. For more information on the
|
||||
* regexpp AST format, see [the definition](https://github.com/mysticatea/regexpp/blob/master/src/ast.ts) or see it live
|
||||
* in action on [astexplorer.net](https://astexplorer.net/#/gist/3b0c6dc514ab66df13b87c441a653a1a/latest).
|
||||
*/
|
||||
export interface ParsedLiteral {
|
||||
pattern: AST.Pattern;
|
||||
flags: AST.Flags;
|
||||
}
|
||||
export interface AnalysisOptions {
|
||||
/**
|
||||
* The maximum number of reports to be returned.
|
||||
*
|
||||
* @default Infinity
|
||||
*/
|
||||
maxReports?: number;
|
||||
/**
|
||||
* A record of allowed report types. All reports of a type that is mapped to `false` will be omitted.
|
||||
*
|
||||
* By default, all report types are allowed.
|
||||
*/
|
||||
reportTypes?: Partial<Record<Report["type"], boolean>>;
|
||||
/**
|
||||
* Whether the analyser is allowed to assume that a rejecting suffix can always be found.
|
||||
*
|
||||
* To exploit ambiguity in quantifiers, it is necessary to force the regex engine to go through all possible paths.
|
||||
* This can only be done by finding a suffix that causes the exploitable part of analysed regex to reject the input
|
||||
* string. If such a suffix cannot be found, the regex is not exploitable.
|
||||
*
|
||||
* If this option is set to `false`, a heuristic will be used to determine whether a rejecting suffix can be found.
|
||||
* This will prevent reporting false positives - non-exploitable quantifiers.
|
||||
*
|
||||
* The heuristic makes the assumption that the regex is used as is - that the regex is not modified or used to
|
||||
* construct other regexes. If this assumption is not met, the heuristic will prevent the reporting of potential
|
||||
* true positives.
|
||||
*
|
||||
* By setting this option to `true`, the heuristic will not be used and all reports are assumed to be true
|
||||
* positives.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
assumeRejectingSuffix?: boolean;
|
||||
}
|
||||
/**
|
||||
* Analyses the given (parsed or unparsed) RegExp literal for causes of super-linear runtime complexity.
|
||||
*
|
||||
* If the given (unparsed) literal is not a syntactically valid JS RegExp, a `SyntaxError` will be thrown.
|
||||
*
|
||||
* @param input A literal or parsed literal.
|
||||
* @param options An optional record of options.
|
||||
*/
|
||||
export function analyse(
|
||||
input: Readonly<Literal> | Readonly<ParsedLiteral>,
|
||||
options?: Readonly<AnalysisOptions>
|
||||
): AnalysisResult;
|
||||
}
|
||||
+1
File diff suppressed because one or more lines are too long
+66
@@ -0,0 +1,66 @@
|
||||
{
|
||||
"name": "scslre",
|
||||
"version": "0.3.0",
|
||||
"description": "A library to find JS RegExp with super-linear worst-case time complexity for attack strings that repeat a single character.",
|
||||
"main": "index",
|
||||
"scripts": {
|
||||
"check": "npm run lint && npx tsc --noEmit && cd tests && npx tsc --noEmit",
|
||||
"lint": "npx eslint --ignore-path .gitignore **/*.ts",
|
||||
"test": "cd tests && mocha -r ts-node/register '**/*.ts'",
|
||||
"build": "npx rimraf ./index.* .out/** && npx tsc && rollup -c && npm run build:dts && npm run build:docs",
|
||||
"build:dts": "dts-bundle --main ./.out/index.d.ts --name scslre --out ../index.d.ts && prettier ./index.d.ts --write",
|
||||
"build:docs": "typedoc",
|
||||
"coverage": "npx nyc --reporter=html --reporter=text npm run test",
|
||||
"update-docs-branch": "git checkout docs && git merge master && npm run build:docs && git add * && git commit -m \"Task: Updated docs\" && git push && git checkout master",
|
||||
"prepublishOnly": "npm run build"
|
||||
},
|
||||
"keywords": [
|
||||
"regex",
|
||||
"regexp",
|
||||
"backtracking",
|
||||
"polynomial",
|
||||
"exponential"
|
||||
],
|
||||
"author": "Michael Schmidt",
|
||||
"homepage": "https://github.com/RunDevelopment/scslre#readme",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/RunDevelopment/scslre.git"
|
||||
},
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-node-resolve": "^9.0.0",
|
||||
"@types/chai": "^4.3.4",
|
||||
"@types/mocha": "^10.0.1",
|
||||
"@types/node": "^12.12.31",
|
||||
"@typescript-eslint/eslint-plugin": "^5.57.0",
|
||||
"@typescript-eslint/parser": "^5.57.0",
|
||||
"chai": "^4.3.7",
|
||||
"dts-bundle": "^0.7.3",
|
||||
"eslint": "^8.37.0",
|
||||
"eslint-config-prettier": "^8.8.0",
|
||||
"eslint-plugin-prettier": "^4.2.1",
|
||||
"mocha": "^10.2.0",
|
||||
"nyc": "^15.1.0",
|
||||
"prettier": "^2.8.7",
|
||||
"rimraf": "^3.0.2",
|
||||
"rollup": "^2.31.0",
|
||||
"rollup-plugin-terser": "^7.0.2",
|
||||
"ts-node": "^8.8.1",
|
||||
"typedoc": "^0.24.8",
|
||||
"typescript": "5.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@eslint-community/regexpp": "^4.8.0",
|
||||
"refa": "^0.12.0",
|
||||
"regexp-ast-analysis": "^0.7.0"
|
||||
},
|
||||
"files": [
|
||||
"index.js",
|
||||
"index.mjs",
|
||||
"index.d.ts"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^14.0.0 || >=16.0.0"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user