Skip to content

Commit ee33c3d

Browse files
committed
ref type convertes, add spec about yajl opts, note about allow_partial_values
1 parent 9192050 commit ee33c3d

File tree

4 files changed

+154
-23
lines changed

4 files changed

+154
-23
lines changed

CHANGELOG.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10-
### Added
11-
12-
- Possibility to reuse config using `JsonScanner::Config`
13-
1410
### Fixed
1511

1612
- Potential problems with garbage collection of the `result` array and other `VALUE`s
@@ -19,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1915
### Added
2016

2117
- Report `bytes_consumed` in `ParseError`s
18+
- Possibility to reuse config using `JsonScanner::Config`
19+
- Warn about `yajl`'s bug when `yajl_allow_partial_values` is set
2220

2321
## [0.2.0] - 2024-12-27
2422

README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,38 @@ JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
5656
# => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
5757
```
5858

59+
It supports multiple options
60+
61+
```ruby
62+
JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
63+
# => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
64+
JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
65+
# JsonScanner::ParseError (parse error: premature EOF)
66+
# [0, 42,
67+
# (right here) ------^
68+
JsonScanner.scan('[0, /* answer */ 42, 0]', [[(1..-1)]], allow_comments: true)
69+
# => [[[17, 19, :number], [21, 22, :number]]]
70+
JsonScanner.scan("\"\x81\x83\"", [[]], dont_validate_strings: true)
71+
# => [[[0, 4, :string]]]
72+
JsonScanner.scan("{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]], dont_validate_strings: true, with_path: true)
73+
# => [[[["\x81\x83"], [7, 9, :number]]]]
74+
JsonScanner.scan('[0, 42, 0]garbage', [[(1..-1)]], allow_trailing_garbage: true)
75+
# => [[[4, 6, :number], [8, 9, :number]]]
76+
JsonScanner.scan('[0, 42, 0] [0, 34]', [[(1..-1)]], allow_multiple_values: true)
77+
# => [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]]
78+
JsonScanner.scan('[0, 42, 0,', [[(1..-1)]], allow_partial_values: true)
79+
# => [[[4, 6, :number], [8, 9, :number]]]
80+
81+
# This is a bug of yajl that affects only numbers
82+
JsonScanner.scan('[0, 42, 0', [[(1..-1)]], allow_partial_values: true)
83+
# => [[[4, 6, :number], [-1, 0, :number]]]
84+
JsonScanner.scan('[0, 42, true', [[(1..-1)]], allow_partial_values: true)
85+
# => [[[4, 6, :number], [8, 12, :boolean]]]
86+
87+
JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symbolize_path_keys: true)
88+
# => [[[[:a], [6, 7, :number]]]]
89+
```
90+
5991
You can also create a config and reuse it
6092

6193
```ruby

ext/json_scanner/json_scanner.c

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ void scan_ctx_debug(scan_ctx *ctx)
140140
break;
141141
case PATH_INDEX:
142142
fprintf(stderr, "%ld", ctx->current_path[i].value.index);
143+
break;
143144
}
144145
if (i < ctx->current_path_len - 1)
145146
fprintf(stderr, ", ");
@@ -193,7 +194,7 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
193194
break;
194195
case T_FIXNUM:
195196
case T_BIGNUM:
196-
RB_NUM2LONG(entry);
197+
NUM2LONG(entry);
197198
break;
198199
default:
199200
{
@@ -204,9 +205,9 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
204205
return rb_exc_new_cstr(rb_eArgError, "path elements must be strings, integers, or ranges");
205206
if (range_beg != any_key_sym || range_end != any_key_sym)
206207
{
207-
if (RB_NUM2LONG(range_beg) < 0L)
208+
if (NUM2LONG(range_beg) < 0L)
208209
return rb_exc_new_cstr(rb_eArgError, "range start must be positive");
209-
end_val = RB_NUM2LONG(range_end);
210+
end_val = NUM2LONG(range_end);
210211
if (end_val < -1L)
211212
return rb_exc_new_cstr(rb_eArgError, "range end must be positive or -1");
212213
if (end_val == -1L && open_ended)
@@ -273,8 +274,8 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
273274
else
274275
{
275276
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
276-
paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
277-
paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
277+
paths[i].elems[j].value.range.start = NUM2LONG(range_beg);
278+
paths[i].elems[j].value.range.end = NUM2LONG(range_end);
278279
// (value..-1) works as expected, (value...-1) is forbidden above
279280
if (paths[i].elems[j].value.range.end == -1L)
280281
paths[i].elems[j].value.range.end = LONG_MAX;
@@ -347,37 +348,38 @@ typedef enum
347348
} value_type;
348349

349350
// noexcept
350-
VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
351+
VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
351352
{
352-
VALUE values[3];
353-
VALUE point = rb_ary_new_capa(3);
353+
VALUE values[3], point;
354+
size_t curr_pos = yajl_get_bytes_consumed(sctx->handle);
355+
point = rb_ary_new_capa(3);
354356
// noexcept
355-
values[1] = RB_ULONG2NUM(curr_pos);
357+
values[1] = ULL2NUM(curr_pos);
356358
switch (type)
357359
{
358360
// FIXME: size_t can be longer than ulong
359361
case null_value:
360-
values[0] = RB_ULONG2NUM(curr_pos - length);
362+
values[0] = LL2NUM(curr_pos - length);
361363
values[2] = null_sym;
362364
break;
363365
case boolean_value:
364-
values[0] = RB_ULONG2NUM(curr_pos - length);
366+
values[0] = LL2NUM(curr_pos - length);
365367
values[2] = boolean_sym;
366368
break;
367369
case number_value:
368-
values[0] = RB_ULONG2NUM(curr_pos - length);
370+
values[0] = LL2NUM(curr_pos - length);
369371
values[2] = number_sym;
370372
break;
371373
case string_value:
372-
values[0] = RB_ULONG2NUM(curr_pos - length);
374+
values[0] = LL2NUM(curr_pos - length);
373375
values[2] = string_sym;
374376
break;
375377
case object_value:
376-
values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
378+
values[0] = LL2NUM(sctx->starts[sctx->current_path_len]);
377379
values[2] = object_sym;
378380
break;
379381
case array_value:
380-
values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
382+
values[0] = LL2NUM(sctx->starts[sctx->current_path_len]);
381383
values[2] = array_sym;
382384
break;
383385
}
@@ -402,7 +404,7 @@ VALUE create_path(scan_ctx *sctx)
402404
entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
403405
break;
404406
case PATH_INDEX:
405-
entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
407+
entry = LONG2NUM(sctx->current_path[i].value.index);
406408
break;
407409
default:
408410
entry = Qnil;
@@ -459,7 +461,7 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
459461
{
460462
if (point == Qundef)
461463
{
462-
point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
464+
point = create_point(sctx, type, length);
463465
if (sctx->with_path)
464466
{
465467
path = create_path(sctx);
@@ -499,6 +501,7 @@ int scan_on_boolean(void *ctx, int bool_val)
499501
int scan_on_number(void *ctx, const char *val, size_t len)
500502
{
501503
scan_ctx *sctx = (scan_ctx *)ctx;
504+
// yajl_get_bytes_consumed works incorrectly for numbers in yajl_allow_partial_values mode when partial value is a number
502505
if (sctx->current_path_len > sctx->max_path_len)
503506
return true;
504507
increment_arr_index(sctx);
@@ -793,9 +796,27 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
793796
{
794797
char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
795798
err_msg = rb_utf8_str_new_cstr(str);
796-
bytes_consumed = RB_ULONG2NUM(yajl_get_bytes_consumed(handle));
799+
bytes_consumed = ULL2NUM(yajl_get_bytes_consumed(handle));
797800
yajl_free_error(handle, (unsigned char *)str);
798801
}
802+
// // Needed when yajl_allow_partial_values is set
803+
// if (ctx->current_path_len > 0)
804+
// {
805+
// if (ctx->current_path_len > ctx->max_path_len)
806+
// ctx->current_path_len = ctx->max_path_len;
807+
// for (int i = ctx->current_path_len - 1; i > 0; i--)
808+
// {
809+
// switch (ctx->current_path[i].type)
810+
// {
811+
// case PATH_KEY:
812+
// scan_on_end_object(ctx);
813+
// break;
814+
// case PATH_INDEX:
815+
// scan_on_end_array(ctx);
816+
// break;
817+
// }
818+
// }
819+
// }
799820
// callback_err = ctx->rb_err;
800821
if (free_ctx)
801822
{

spec/json_scanner_spec.rb

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
)
2323
end
2424

25-
it "supports symbols" do
25+
it "supports 'symbolize_path_keys'" do
2626
expect(
2727
described_class.scan('{"a": {"b": 1}}', [[:a, "b"]], with_path: true),
2828
).to eq([[[%w[a b], [12, 13, :number]]]])
@@ -118,6 +118,9 @@
118118
expect do
119119
described_class.scan "{1}", [], verbose_error: true
120120
end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
121+
expect do
122+
described_class.scan("[0, 42,", [[(1..-1)]], verbose_error: true)
123+
end.to raise_error described_class::ParseError, /parse error: premature EOF.*\[0, 42,.*\(right here\) ------\^/m
121124
end
122125

123126
it "includes bytes consumed in the exception" do
@@ -229,6 +232,83 @@
229232
end.to raise_error(described_class::ParseError)
230233
end
231234

235+
context "yajl params" do
236+
it "supports 'allow_comments'" do
237+
params = ["[0, /* answer */ 42, 0]", [[(1..-1)]]]
238+
expect(described_class.scan(*params, allow_comments: true)).to eq(
239+
[[[17, 19, :number], [21, 22, :number]]],
240+
)
241+
expect do
242+
described_class.scan(*params)
243+
end.to raise_error(described_class::ParseError)
244+
end
245+
246+
it "supports 'dont_validate_strings'" do
247+
params = ["\"\x81\x83\"", [[]]]
248+
expect(described_class.scan(*params, dont_validate_strings: true)).to eq(
249+
[[[0, 4, :string]]],
250+
)
251+
expect do
252+
described_class.scan(*params)
253+
end.to raise_error(described_class::ParseError)
254+
params = ["{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]]]
255+
expect(described_class.scan(*params, dont_validate_strings: true, with_path: true)).to eq(
256+
[[[["\x81\x83".dup.force_encoding(Encoding::BINARY)], [7, 9, :number]]]],
257+
)
258+
expect do
259+
described_class.scan(*params, with_path: true)
260+
end.to raise_error(described_class::ParseError)
261+
end
262+
263+
it "supports 'allow_trailing_garbage'" do
264+
params = ["[0, 42, 0]garbage", [[(1..-1)]]]
265+
expect(described_class.scan(*params, allow_trailing_garbage: true)).to eq(
266+
[[[4, 6, :number], [8, 9, :number]]],
267+
)
268+
expect do
269+
described_class.scan(*params)
270+
end.to raise_error(described_class::ParseError)
271+
end
272+
273+
it "supports 'allow_multiple_values'" do
274+
params = ["[0, 42, 0] [0, 34]", [[(1..-1)]]]
275+
expect(described_class.scan(*params, allow_multiple_values: true)).to eq(
276+
[[[4, 6, :number], [8, 9, :number], [16, 18, :number]]],
277+
)
278+
expect do
279+
described_class.scan(*params)
280+
end.to raise_error(described_class::ParseError)
281+
expect(described_class.scan("[0, 42, 0] [0, 34]", [[]], allow_multiple_values: true)).to eq(
282+
[[[0, 10, :array], [12, 19, :array]]],
283+
)
284+
expect(described_class.scan('{"42": 34} [0, 34]', [[]], allow_multiple_values: true)).to eq(
285+
[[[0, 10, :object], [12, 19, :array]]],
286+
)
287+
expect(described_class.scan('[0, 42, 0] {"42": 34}', [[]], allow_multiple_values: true)).to eq(
288+
[[[0, 10, :array], [12, 22, :object]]],
289+
)
290+
expect(described_class.scan('{"42": 34} {"0": 34}', [[]], allow_multiple_values: true)).to eq(
291+
[[[0, 10, :object], [12, 21, :object]]],
292+
)
293+
end
294+
295+
it "supports 'allow_partial_values'" do
296+
params = ["[0, 42, 0,", [[(1..-1)]]]
297+
expect(described_class.scan(*params, allow_partial_values: true)).to eq(
298+
[[[4, 6, :number], [8, 9, :number]]],
299+
)
300+
expect do
301+
described_class.scan(*params)
302+
end.to raise_error(described_class::ParseError)
303+
expect(described_class.scan("[0, 42, 0", [[(1..-1)]], allow_partial_values: true)).to eq(
304+
[[[4, 6, :number], [-1, 0, :number]]],
305+
)
306+
expect(described_class.scan("[0, 42, true", [[(1..-1)]], allow_partial_values: true)).to eq(
307+
[[[4, 6, :number], [8, 12, :boolean]]],
308+
)
309+
end
310+
end
311+
232312
describe described_class::Config do
233313
it "saves state" do
234314
key = "abracadabra".dup

0 commit comments

Comments
 (0)