@@ -6,9 +6,31 @@ Each node has its own JavaScript class, so
661 . it's possible to dinstinguish them by checking ` instanceof `
772 . they can be extended in pure JavaScript
88
9- Here's how it looks like :
9+ Basic usage :
1010
1111``` js
12+ const inspect = require (' util' ).inspect
13+ const { parse , Send } = require (' lib-ruby-parser' );
14+
15+ function print_parse_result (parser_result ) {
16+ console .log (inspect (parser_result, { showHidden: false , depth: null }))
17+ }
18+
19+ // This function must be defined by you.
20+ // It takes a string and return an array of bytes.
21+ // The following code is just an example:
22+ function bytes (str ) {
23+ const bytes = unescape (encodeURIComponent (str)).split (' ' ).map (c => c .charCodeAt (0 ))
24+ return new Uint8Array (bytes)
25+ }
26+
27+ const input = bytes (" 2 + 3 # x" );
28+ const options = { record_tokens: true };
29+ const result = parse (input, options);
30+ print_parser_result (result);
31+
32+ // prints:
33+
1234ParserResult {
1335 ast: Send {
1436 recv: Int {
@@ -34,37 +56,58 @@ ParserResult {
3456 tokens: [
3557 Token {
3658 name: ' tINTEGER' ,
37- value: ' 2 ' ,
59+ value: Uint8Array ( 1 ) [ 50 ] ,
3860 loc: Loc { begin: 0 , end: 1 }
3961 },
40- Token { name: ' tPLUS' , value: ' +' , loc: Loc { begin: 2 , end: 3 } },
62+ Token {
63+ name: ' tPLUS' ,
64+ value: Uint8Array (1 ) [ 43 ],
65+ loc: Loc { begin: 2 , end: 3 }
66+ },
4167 Token {
4268 name: ' tINTEGER' ,
43- value: ' 3 ' ,
69+ value: Uint8Array ( 1 ) [ 51 ] ,
4470 loc: Loc { begin: 4 , end: 5 }
4571 },
46- Token { name: ' EOF' , value: ' ' , loc: Loc { begin: 5 , end: 5 } }
72+ Token {
73+ name: ' tNL' ,
74+ value: Uint8Array (1 ) [ 10 ],
75+ loc: Loc { begin: 8 , end: 9 }
76+ },
77+ Token {
78+ name: ' EOF' ,
79+ value: Uint8Array (0 ) [],
80+ loc: Loc { begin: 8 , end: 8 }
81+ }
4782 ],
4883 diagnostics: [],
49- comments: [],
84+ comments: [
85+ Comment {
86+ kind: ' inline' ,
87+ location: Range { begin_pos: 6 , end_pos: 9 }
88+ }
89+ ],
5090 magic_comments: [],
51- input: ' 2 + 3'
91+ input: Uint8Array (9 ) [
92+ 50 , 32 , 43 , 32 , 51 ,
93+ 32 , 35 , 32 , 120
94+ ]
5295}
5396```
5497
5598## API
5699
57100tldr; all classes mirror Rust implementation.
58101
59- TypeScript definition:
102+ Rough TypeScript definition:
60103
61104``` ts
62105interface Loc { begin: number , end: number }
63106interface Range { begin_pos: number , end_pos: number }
64107
65108interface Token {
66109 name: string ,
67- value: string ,
110+ value: Uint8Array ,
68111 loc: Loc
69112}
70113
@@ -93,3 +136,55 @@ type Node = Args | Class | ... /* other nodes */;
93136
94137function parse(code : String ): ParserResult
95138```
139+
140+ ` String ` and ` Symbol ` nodes are slightly exceptional as they contain ` StringValue ` Rust structure that is a ` Uint8Array ` here .
141+
142+ This structure can be converted into JS ` String ` by using ` bytes_to_utf8_lossy ` function (keep in mind that it replaces unknown chars with a special ` Unicode Replacement Character U+FFFD ` , if you want some other strategy you are free to define your own converting function ):
143+
144+ ` ` ` js
145+ const { parse, bytes_to_utf8_lossy } = require(path_to_require)
146+
147+ const result = parse(bytes('"a\\ xFFb"'), { record_tokens: true });
148+ console.log(result.ast)
149+ console.log(bytes_to_utf8_lossy(result.ast.value))
150+
151+ // prints
152+
153+ Str {
154+ value: Uint8Array(3) [ 97, 255, 98 ], // "a" = 97, "\xFF " = 255, "b" = 98
155+ begin_l: Range { begin_pos: 0, end_pos: 1 },
156+ end_l: Range { begin_pos: 7, end_pos: 8 },
157+ expression_l: Range { begin_pos: 0, end_pos: 8 }
158+ }
159+
160+ a�b
161+ ` ` `
162+
163+ ## Encodings
164+
165+ If you want to support encodings other than UTF - 8 / ASCII - 8BIT / BINARY you need a custom decoder :
166+
167+ ` ` ` js
168+ const custom_decoder = (encoding: String, input: Uint8Array) => {
169+ // Do some **real** decoding into UTF-8 here
170+ //
171+ // Here for simplicity we convert all "2" into "3"
172+ //
173+ assert(encoding === "US-ASCII");
174+ assert(input === bytes("# encoding: us-ascii\n 2 + 2"));
175+
176+ return bytes("# encoding: us-ascii\n 3 + 3");
177+ }
178+
179+ const result = parse(
180+ bytes("# encoding: us-ascii\n 2 + 2"),
181+ { custom_decoder }
182+ );
183+ assert(result.ast.recv.value === "3");
184+ ` ` `
185+
186+ ## Platform support
187+
188+ Currently NPM packages include pre - compiled ` .node ` files for Mac and Linux .
189+
190+ If you need Windows support first go to [C ++ bindings ](https :// github.com/lib-ruby-parser/cpp-bindings) and create an issue there. This repo is just a wrapper around its builds.
0 commit comments