Skip to content

Commit 8595cd7

Browse files
committed
Try out the dot-subscripting patch
There is a patch on the Postgres mailinglist that allows https://commitfest.postgresql.org/patch/5214/
1 parent 0ff0a5b commit 8595cd7

File tree

9 files changed

+120
-39
lines changed

9 files changed

+120
-39
lines changed

src/pg/pgduckdb_subscript.cpp

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,35 @@ AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, A_In
8989
}
9090
}
9191

92+
bool
93+
AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, Node *subscript, bool is_slice) {
94+
if (IsA(subscript, A_Indices)) {
95+
// If the subscript is an A_Indices node, we can add the expressions directly
96+
AddSubscriptExpressions(sbsref, pstate, castNode(A_Indices, subscript), is_slice);
97+
return true;
98+
}
99+
100+
if (IsA(subscript, String)) {
101+
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript);
102+
return true;
103+
}
104+
105+
if (IsA(subscript, A_Star)) {
106+
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, NULL);
107+
return true;
108+
}
109+
110+
return false;
111+
}
112+
92113
/*
93114
* DuckdbSubscriptTransform is called by the parser when a subscripting
94115
* operation is performed on a duckdb type that can be indexed by arbitrary
95116
* expressions. All this does is parse those expressions and make sure the
96117
* subscript returns an an duckdb.unresolved_type again.
97118
*/
98119
void
99-
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
120+
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
100121
bool is_assignment, const char *type_name) {
101122
/*
102123
* We need to populate our cache for some of the code below. Normally this
@@ -111,18 +132,22 @@ DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct Pars
111132
elog(ERROR, "Assignment to %s is not supported", type_name);
112133
}
113134

114-
if (indirection == NIL) {
135+
if (*indirection == NIL) {
115136
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
116137
}
117138

118139
// Transform each subscript expression
119-
foreach_node(A_Indices, subscript, indirection) {
120-
AddSubscriptExpressions(sbsref, pstate, subscript, is_slice);
140+
foreach_ptr(Node, subscript, *indirection) {
141+
if (!AddSubscriptExpressions(sbsref, pstate, subscript, is_slice)) {
142+
break;
143+
}
121144
}
122145

123146
// Set the result type of the subscripting operation
124147
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
125148
sbsref->reftypmod = -1;
149+
150+
*indirection = list_delete_first_n(*indirection, list_length(sbsref->refupperindexpr));
126151
}
127152

128153
/*
@@ -136,7 +161,7 @@ DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct Pars
136161
* Currently this is used for duckdb.row and duckdb.struct types.
137162
*/
138163
void
139-
DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
164+
DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
140165
bool is_assignment, const char *type_name) {
141166
/*
142167
* We need to populate our cache for some of the code below. Normally this
@@ -151,33 +176,40 @@ DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct
151176
elog(ERROR, "Assignment to %s is not supported", type_name);
152177
}
153178

154-
if (indirection == NIL) {
179+
if (*indirection == NIL) {
155180
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
156181
}
157182

158183
bool first = true;
159184

160185
// Transform each subscript expression
161-
foreach_node(A_Indices, subscript, indirection) {
162-
/* The first subscript needs to be a TEXT constant, since it should be
163-
* a column reference. But the subscripts after that can be anything,
164-
* DuckDB should interpret those. */
165-
if (first) {
166-
sbsref->refupperindexpr =
167-
lappend(sbsref->refupperindexpr, CoerceSubscriptToText(pstate, subscript, type_name));
186+
foreach_ptr(Node, subscript, *indirection) {
187+
/*
188+
* If the first subscript is an index expression then it needs to be
189+
* coerced to text, since it should be a column reference. But the
190+
* subscripts after that can be anything, DuckDB should interpret
191+
* those.
192+
*/
193+
if (first && IsA(subscript, A_Indices)) {
194+
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr,
195+
CoerceSubscriptToText(pstate, castNode(A_Indices, subscript), type_name));
168196
if (is_slice) {
169197
sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, NULL);
170198
}
171199
first = false;
172200
continue;
173201
}
174202

175-
AddSubscriptExpressions(sbsref, pstate, subscript, is_slice);
203+
if (!AddSubscriptExpressions(sbsref, pstate, subscript, is_slice)) {
204+
break;
205+
}
176206
}
177207

178208
// Set the result type of the subscripting operation
179209
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
180210
sbsref->reftypmod = -1;
211+
212+
*indirection = list_delete_first_n(*indirection, list_length(sbsref->refupperindexpr));
181213
}
182214

183215
static bool
@@ -229,7 +261,7 @@ DuckdbSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefStat
229261
}
230262

231263
void
232-
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
264+
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
233265
bool is_assignment) {
234266
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.row");
235267
}
@@ -249,7 +281,7 @@ static SubscriptRoutines duckdb_row_subscript_routines = {
249281
};
250282

251283
void
252-
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate,
284+
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate,
253285
bool is_slice, bool is_assignment) {
254286
DuckdbSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.unresolved_type");
255287
}
@@ -269,7 +301,7 @@ static SubscriptRoutines duckdb_unresolved_type_subscript_routines = {
269301
};
270302

271303
void
272-
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
304+
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
273305
bool is_assignment) {
274306
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.struct");
275307
}
@@ -289,7 +321,7 @@ static SubscriptRoutines duckdb_struct_subscript_routines = {
289321
};
290322

291323
void
292-
DuckdbMapSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
324+
DuckdbMapSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
293325
bool is_assignment) {
294326
DuckdbSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.map");
295327
}

src/pgduckdb_ruleutils.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,14 @@ pgduckdb_subscript_has_custom_alias(Plan *plan, List *rtable, Var *subscript_var
350350
int varno;
351351
int varattno;
352352

353+
if (strcmp(colname, "?column?") == 0) {
354+
/*
355+
* If the column name is "?column?", then it means that Postgres
356+
* couldn't figure out a decent alias.
357+
*/
358+
return false;
359+
}
360+
353361
/*
354362
* If we have a syntactic referent for the Var, and we're working from a
355363
* parse tree, prefer to use the syntactic referent. Otherwise, fall back
@@ -388,6 +396,15 @@ pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf) {
388396
}
389397

390398
Assert(sbsref->refupperindexpr);
399+
400+
if (linitial(sbsref->refupperindexpr) == NULL) {
401+
return sbsref;
402+
}
403+
404+
if (IsA(linitial(sbsref->refupperindexpr), String)) {
405+
return sbsref;
406+
}
407+
391408
Oid typoutput;
392409
bool typIsVarlena;
393410
Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));

src/vendor/pg_ruleutils_18.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13082,17 +13082,33 @@ printSubscripts(SubscriptingRef *sbsref, deparse_context *context)
1308213082
lowlist_item = list_head(sbsref->reflowerindexpr); /* could be NULL */
1308313083
foreach(uplist_item, sbsref->refupperindexpr)
1308413084
{
13085-
appendStringInfoChar(buf, '[');
13086-
if (lowlist_item)
13085+
Node *up = (Node *) lfirst(uplist_item);
13086+
13087+
if (!up)
13088+
{
13089+
appendStringInfoString(buf, ".*");
13090+
}
13091+
else if (IsA(up, String))
13092+
{
13093+
appendStringInfoChar(buf, '.');
13094+
appendStringInfoString(buf, quote_identifier(strVal(up)));
13095+
}
13096+
else
1308713097
{
13098+
appendStringInfoChar(buf, '[');
13099+
if (lowlist_item)
13100+
{
13101+
/* If subexpression is NULL, get_rule_expr prints nothing */
13102+
get_rule_expr((Node *) lfirst(lowlist_item), context, false);
13103+
appendStringInfoChar(buf, ':');
13104+
}
1308813105
/* If subexpression is NULL, get_rule_expr prints nothing */
13089-
get_rule_expr((Node *) lfirst(lowlist_item), context, false);
13090-
appendStringInfoChar(buf, ':');
13091-
lowlist_item = lnext(sbsref->reflowerindexpr, lowlist_item);
13106+
get_rule_expr((Node *) lfirst(uplist_item), context, false);
13107+
appendStringInfoChar(buf, ']');
1309213108
}
13093-
/* If subexpression is NULL, get_rule_expr prints nothing */
13094-
get_rule_expr((Node *) lfirst(uplist_item), context, false);
13095-
appendStringInfoChar(buf, ']');
13109+
13110+
if (lowlist_item)
13111+
lowlist_item = lnext(sbsref->reflowerindexpr, lowlist_item);
1309613112
}
1309713113
}
1309813114

test/pycheck/motherduck_test.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,29 +280,36 @@ def test_md_duckdb_only_types(md_cur: Cursor, ddb: Duckdb):
280280
ddb.sql("""
281281
CREATE TABLE t1(
282282
m MAP(INT, VARCHAR),
283+
ms MAP(VARCHAR, INT),
283284
s STRUCT(v VARCHAR, i INTEGER),
284285
u UNION(t time, d date),
285286
)""")
286287
ddb.sql("""
287288
INSERT INTO t1 VALUES (
288289
MAP{1: 'abc'},
290+
MAP{'abc': 1},
289291
{'v': 'struct abc', 'i': 123},
290292
'12:00'::time,
291293
), (
292294
MAP{2: 'def'},
295+
MAP{'def': 2},
293296
{'v': 'struct def', 'i': 456},
294297
'2023-10-01'::date,
295298
)
296299
""")
297300
md_cur.wait_until_table_exists("t1")
298301
assert md_cur.sql("""select * from t1""") == [
299-
("{1=abc}", "{'v': struct abc, 'i': 123}", "12:00:00"),
300-
("{2=def}", "{'v': struct def, 'i': 456}", "2023-10-01"),
302+
("{1=abc}", "{abc=1}", "{'v': struct abc, 'i': 123}", "12:00:00"),
303+
("{2=def}", "{def=2}", "{'v': struct def, 'i': 456}", "2023-10-01"),
301304
]
302305

303306
assert md_cur.sql("""select m[1] from t1""") == ["abc", None]
307+
assert md_cur.sql("""select ms['abc'] from t1""") == [1, None]
308+
assert md_cur.sql("""select (ms).abc from t1""") == [1, None]
304309
assert md_cur.sql("""select s['v'] from t1""") == ["struct abc", "struct def"]
305310
assert md_cur.sql("""select s['i'] from t1""") == [123, 456]
311+
assert md_cur.sql("""select (s).v from t1""") == ["struct abc", "struct def"]
312+
assert md_cur.sql("""select (s).i from t1""") == [123, 456]
306313
assert md_cur.sql("""select union_extract(u,'t') from t1""") == [
307314
datetime.time(12, 0),
308315
None,

test/regression/expected/json_functions_duckdb.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,15 @@ SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') F
343343
{'family': canidae, 'coolness': NULL}
344344
(2 rows)
345345

346+
SELECT (transformed).* FROM (
347+
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') as transformed FROM example2
348+
) q;
349+
family | coolness
350+
----------+----------
351+
anatidae | 42.42
352+
canidae |
353+
(2 rows)
354+
346355
SELECT public.json_transform(j, '{"family": "TINYINT", "coolness": "DECIMAL(4, 2)"}') FROM example2;
347356
json_transform
348357
-------------------------------------

test/regression/expected/read_functions.out

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,20 +63,15 @@ SELECT arraycol[1:2] FROM (
6363
(1 row)
6464

6565
SELECT r['arraycol'][:] FROM read_parquet('../../data/indexable.parquet') r;
66-
r.arraycol[:]
67-
---------------
68-
{11,22,33}
69-
(1 row)
66+
ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Parser Error: syntax error at or near "*"
7067

68+
LINE 1: SELECT r.arraycol.* FROM system.main.read_parquet('../../data/indexable.parquet...
69+
^
7170
SELECT arraycol[:] FROM (
7271
SELECT r['arraycol'] arraycol
7372
FROM read_parquet('../../data/indexable.parquet') r
7473
) q;
75-
arraycol
76-
------------
77-
{11,22,33}
78-
(1 row)
79-
74+
ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Binder Error: Cannot extract field from expression "arraycol.*" because it is not a struct
8075
-- Subqueries correctly expand *, in case of multiple columns.
8176
SELECT * FROM (
8277
SELECT 'something' as prefix, *, 'something else' as postfix
@@ -506,7 +501,7 @@ SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r WHERE r['c'] > 50
506501
51
507502
(1 row)
508503

509-
SELECT r['a'], r['b'], r['c'] FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4 AND r['c'] < 51.2;
504+
SELECT (r).a, (r).b, (r).c FROM read_json('../../data/table.json') r WHERE (r).c > 50.4 AND (r).c < 51.2;
510505
a | b | c
511506
----+---------+------
512507
50 | json_50 | 50.5

test/regression/sql/json_functions_duckdb.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ SELECT public.json_group_structure(j) FROM example2;
212212
-- ('{"family": "canidae", "species": ["labrador", "bulldog"], "hair": true}');
213213
-- -- <JSON_TRANSFORM>
214214
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') FROM example2;
215+
SELECT (transformed).* FROM (
216+
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') as transformed FROM example2
217+
) q;
215218

216219
SELECT public.json_transform(j, '{"family": "TINYINT", "coolness": "DECIMAL(4, 2)"}') FROM example2;
217220

test/regression/sql/read_functions.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,4 +284,4 @@ SELECT * FROM iceberg_metadata('../../data/lineitem_iceberg', allow_moved_paths
284284

285285
SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r;
286286
SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4;
287-
SELECT r['a'], r['b'], r['c'] FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4 AND r['c'] < 51.2;
287+
SELECT (r).a, (r).b, (r).c FROM read_json('../../data/table.json') r WHERE (r).c > 50.4 AND (r).c < 51.2;

test/regression/sql/unresolved_type.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,5 @@ select make_timestamp(1686570000000000);
4848
select make_timestamp(r['microseconds']) from duckdb.query($$ SELECT 1686570000000000 AS microseconds $$) r;
4949
select make_timestamptz(1686570000000000);
5050
select make_timestamptz(r['microseconds']) from duckdb.query($$ SELECT 1686570000000000 AS microseconds $$) r;
51+
52+
SELECT s FROM (select (r).s FROM duckdb.query($$ SELECT {'key1': 'value1', 'key2': 42} AS s $$) r);

0 commit comments

Comments
 (0)