At the moment I'm using the JS API to generate the parser on runtime. This works fine.
I then tried to generate the parser using the CLI, to avoid generating it during runtime. When I use it though I get errors (~half of my tests for parsing the string throw errors).
grammar.pegjs
pegjs -o parser.js grammar.pegjs
peg.generate('...')
and replace it with the new parserconst parser = require('./parser');
parser.parse('...');
Expected behavior:
I would expect that the generated parser from the CLI works the same as the generated parser from the JS API.
Actual behavior:
Using the JS API, when I pass this string ('foo = "bar"'
) to the parser I get the following AST:
{
kind: 'condition',
target: 'foo',
operator: '=',
value: 'bar',
valueType: 'string',
attributeType: undefined
}
However, when I use the "generated" parser using the CLI, and pass the same string ('foo = "bar"'
) I get the following error:
SyntaxError: Expected "(", boolean, date, datetime, number, string, or time but "\"" found.
at peg$buildStructuredError (/Users/emmenko/xxx/parser.js:446:12)
at Object.peg$parse [as parse] (/Users/emmenko/xxx/parser.js:2865:11)
at repl:1:7
at ContextifyScript.Script.runInThisContext (vm.js:50:33)
at REPLServer.defaultEval (repl.js:240:29)
at bound (domain.js:301:14)
at REPLServer.runBound [as eval] (domain.js:314:12)
at REPLServer.onLine (repl.js:441:10)
at emitOne (events.js:121:20)
at REPLServer.emit (events.js:211:7)
0.10.0
8.9.1
[email protected]
Nice, you filled it correctly 馃憤, now we just need the grammar and I can help you 馃槃
Here you go:
// GRAMMAR
const parser = peg.generate(`
{
function getFlattenedValue (value) {
if (!value) return undefined
return Array.isArray(value)
? value.map(function(v){return v.value})
: value.value
}
function getValueType (value) {
if (!value) return undefined
var rawType = value.type
if (Array.isArray(value))
rawType = value[0].type
switch (rawType) {
case 'string':
case 'number':
case 'boolean':
return rawType
default:
return 'string'
}
}
function getAttributeType (target, op, val) {
if (typeof target === 'string' && target.indexOf('attributes.') === 0) {
if (!val)
return undefined
switch (op) {
case 'in':
case 'not in':
return val[0].type;
case 'contains':
return 'set-' + val.type
default:
return Array.isArray(val) ? 'set-' + val[0].type : val.type;
}
}
}
function transformToCondition (target, op, val) {
return {
kind: "condition",
target: target,
operator: op,
value: getFlattenedValue(val),
valueType: getValueType(val),
attributeType: getAttributeType(target, op, val),
}
}
function createIdentifier (body) {
return body
.map(identifiers => identifiers.filter(identifier => (identifier && identifier !== '.'))) // gets raw_identifiers without dots and empty identifiers
.filter(identifiers => identifiers.length > 0) // filter out empty identifiers arrays
.map(identifiers => identifiers.join('.'))
.join('.') // join back to construct the path
}
}
// ----- DSL Grammar -----
predicate
= ws exp:expression ws { return exp; }
expression
= head:term tail:("or" term)*
{
if (tail.length === 0) {
return head;
}
return {
kind: "logical",
logical: "or",
conditions: [head].concat(tail.map(function(el){return el[1];})),
};
}
term
= head:factor tail:("and" factor)*
{
if (tail.length === 0) {
return head;
}
return {
kind: "logical",
logical: "and",
conditions: [head].concat(tail.map(function(el){return el[1];})),
};
}
factor
= ws negation:"not" ws primary:primary ws
{
return {
kind: "negation",
condition: primary,
};
}
/ ws primary:primary ws { return primary; }
primary
= basic_comparison
/ list_comparison
/ empty_comparison
/ parens
// ----- Comparators -----
basic_comparison
= target:val_expression ws op:single_operators ws val:value
{ return transformToCondition(target, op, val); }
list_comparison
= target:val_expression ws op:list_operators ws val:list_of_values
{ return transformToCondition(target, op, val); }
empty_comparison
= target:val_expression ws op:empty_operators
{ return transformToCondition(target, op); }
// ----- Operators -----
single_operators
= "!="
/ "="
/ "<>"
/ ">="
/ ">"
/ "<="
/ "<"
/ "contains"
list_operators
= "!="
/ "="
/ "<>"
/ "not in"
/ "in"
/ "contains all"
/ "contains any"
empty_operators
= "is not empty"
/ "is empty"
/ "is not defined"
/ "is defined"
list_of_values
= ws "(" ws head:value tail:(ws "," ws value)* ws ")" ws
{
if (tail.length === 0) {
return [head];
}
return [head].concat(tail.map(function(el){ return el[el.length -1];}));
}
// ----- Expressions -----
val_expression
= application_expression
/ constant_expression
/ field_expression
application_expression
= identifier ws "(" ws function_argument (ws "," ws function_argument)* ws ")"
constant_expression = ws val:value ws { return val; }
field_expression = ws i:identifier ws { return i; }
function_argument
= expression
/ constant_expression
/ field_expression
value
= v:boolean { return { type: 'boolean', value: v }; }
/ v:datetime { return { type: 'datetime', value: v }; }
/ v:date { return { type: 'date', value: v }; }
/ v:time { return { type: 'time', value: v }; }
/ v:number { return { type: 'number', value: v }; }
/ v:string { return { type: 'string', value: v }; }
// ----- Common rules -----
parens
= ws "(" ws ex:expression ws ")" ws { return ex; }
identifier
= body:((raw_identifier "." escaped_identifier)+ / (raw_identifier "." raw_identifier)+)
{
return createIdentifier(body)
}
/ i:raw_identifier { return i; }
escaped_identifier
= "\`" head:raw_identifier tail:("-" raw_identifier)* "\`"
{ return [head].concat(tail.map(function(el){return el.join('');})).join(''); }
raw_identifier = i:[a-zA-Z0-9_]* { return i.join(''); }
ws "whitespace" = [ \\t\\n\\r]*
// ----- Types: booleans -----
boolean "boolean"
= "false" { return false; }
/ "true" { return true; }
// ----- Types: datetime -----
datetime "datetime"
= quotation_mark datetime:datetime_format quotation_mark
{ return datetime.map(function(el){return Array.isArray(el) ? el.join('') : el;}).join(''); }
datetime_format = date_format time_mark time_format zulu_mark
time_mark = "T"
zulu_mark = "Z"
// ----- Types: date -----
date "date"
= quotation_mark date:date_format quotation_mark { return date.join("");}
date_format = [0-9][0-9][0-9][0-9] minus [0-9][0-9] minus [0-9][0-9]
// ----- Types: time -----
time "time"
= quotation_mark time:time_format quotation_mark { return time.join("");}
time_format = [0-2][0-9] colon [0-5][0-9] colon [0-5][0-9] decimal_point [0-9][0-9][0-9]
colon = ":"
// ----- Types: numbers -----
number "number"
= minus? int frac? exp? { return parseFloat(text()); }
decimal_point = "."
digit1_9 = [1-9]
e = [eE]
exp = e (minus / plus)? DIGIT+
frac = decimal_point DIGIT+
int = zero / (digit1_9 DIGIT*)
minus = "-"
plus = "+"
zero = "0"
// ----- Types: strings -----
string "string"
= quotation_mark chars:char* quotation_mark { return chars.join(""); }
char
= unescaped
/ escape
sequence:(
'"'
/ "\\\\"
/ "/"
/ "b" { return "\\b"; }
/ "f" { return "\\f"; }
/ "n" { return "\\n"; }
/ "r" { return "\\r"; }
/ "t" { return "\\t"; }
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG)
{ return String.fromCharCode(parseInt(digits, 16)); }
)
{ return sequence; }
escape = "\\\\"
quotation_mark = '"'
unescaped = [^\\0-\\x1F\\x22\\x5C]
// See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4234).
DIGIT = [0-9]
HEXDIG = [0-9a-f]i
A little related addition to the bug. I setup pegjs
via the pegjs-loader. It operates on the JS API under the hood calling parser.generate
it also leads to the same error.
Many thanks for the project by the way!
@emmenko I don't know why your grammar was working with the API (will continue to try to find out why), but your grammar was incorrect, the unescaped
rule should be:
unescaped = !'"' [^\\0-\\x1F\\x22\\x5C]
Tell me if this fixes the issue from your side
@tdeekens If its the same error (e.g. Expected ... but "\"" found.
), then check if your grammar is correct, or post it here
@futagoza me and @tdeekens are on the same team, so it's the same issue 馃槄
We'll keep you posted! Thanks for your support so far 馃檹
I don't know why your grammar was working with the API
We never had a problem with that to be honest. Thanks for pointing it out anyway!
Is it working now?
Unfortunately it didn't help 鈽癸笍
Using your grammar, PEG.js 0.10, Node 8.9.0 and the input foo = "bar"
, I tried this via 3 routes:
pegjs
CLIAll 3 showed the same error: Line 1, column 7: Expected "(", boolean, date, datetime, number, string, or time but "\"" found.
If I change your grammar, it fixes this error for all 3 routes:
// orignal
unescaped = [^\\0-\\x1F\\x22\\x5C]
// fixed
unescaped = !'"' [^\\0-\\x1F\\x22\\x5C]
After applying the fixed rule, can you check if:
Also, after tweaking the input a bit, I realised that your grammar doesn't account for newlines as whitespaces correctly, this is most likely because of your ws
rule.
EDIT: Here's my test script:
/* eslint node/no-unsupported-features: 0 */
"use strict";
const { exec } = require( "child_process" );
const { readFileSync } = require( "fs" );
const { join } = require( "path" );
const { generate } = require( "pegjs" );
function test( parser ) {
try {
console.log( parser.parse( `foo = "bar"` ) );
} catch ( error ) {
if ( error.name !== "SyntaxError" ) throw error;
const loc = error.location.start;
console.log( `Line ${ loc.line }, column ${ loc.column }: ${ error.message }` );
}
}
const COMMAND = process.argv[ 2 ];
switch ( COMMAND ) {
case "api":
test( generate( readFileSync( join( __dirname, "grammar.pegjs" ), "utf8" ) ) );
break;
case "cli":
exec( "node node_modules/pegjs/bin/pegjs -o parser.js grammar.pegjs", error => {
if ( error ) console.error( error ), process.exit( 1 );
test( require( "./parser" ) );
} );
break;
default:
console.error( `Invalid command "${ COMMAND }" passed to test script.` );
process.exit( 1 );
}
Many thanks for the feedback! We'll try tomorrow with your suggestion and we'll let you know as soon as possible if that helped. 馃檹
Thanks for the feedback. Apologies first for the confusion. I just wanted to point out that the issue was also in the webpack-loader. Sorry it that caused confusion on this issue.
We tried out the improvement. It fixes the parser in general but we run into a new issue now we have a hard time understanding the reason of.
An example is of a test is (more below)
Object {
+ "attributeType": undefined,
"kind": "condition",
"operator": "=",
"target": "foo",
- "value": "bar",
+ "value": ",b,a,r",
"valueType": "string",
}
We think the error might likely be on our side we're just not sure where yet. This happens e.g. with the following input
categories.id != ("b33f8e3a-f8d1-476f-a595-2615c4b57556")
which becomes
categories.id != (",b,3,3,f,8,e,3,a,-,f,8,d,1,-,4,7,6,f,-,a,5,9,5,-,2,6,1,5,c,4,b,5,7,5,5,6")
when parsed.
We would obviously be super grateful for a clue but also understand if can support us there.
whoops, my mistake 馃槰, this should fix that
unescaped = !'"' value:[^\\0-\\x1F\\x22\\x5C] { return value; }
Thanks for the super quick response. It does help but not when using the CLI or webpack-loader which often return the initial error of SyntaxError: Expected "(", boolean, date, datetime, number, string, or time but "\"" found.
. Something which happens for instance with a not(sku = "123")
or a more complex example lineItemTotal(sku = "SKU1" or list contains all (1,2,3), field.name, "third arg") = "10 EUR"
. Might that still have something todo with the escaping?
Yep, turns out it's because of the double escaping. Here are the fixed rules:
ws "whitespace" = [ \t\n\r]*
char
= unescaped
/ escape
sequence:(
'"'
/ "\\"
/ "/"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG)
{ return String.fromCharCode(parseInt(digits, 16)); }
)
{ return sequence; }
escape = "\\"
unescaped = !'"' value:[^\0-\x1F\x22\x5C] { return value; }
EDIT: It seem's you might want to work on the rules that parse the complex example: lineItemTotal(sku = "SKU1" or list contains all (1,2,3), field.name, "third arg") = "10 EUR"
, it is currently ouputing a wierd "kind":"condition"
node
Thanks a lot for the help and advice. It seems to solve the problems we had. We will look into the advise regarding the "condition" node.
You're welcome 馃槃