Skip to content

Commit 67baa89

Browse files
docs: Extend documentation tooling to support linking to Iceberg functions (facebookincubator#13207)
Summary: Context: facebookincubator#13980 Extend documentation tooling to provide support for linking to an Iceberg function using `:iceberg:func:<name>` syntax. Pull Request resolved: facebookincubator#13207 Reviewed By: mbasmanova Differential Revision: D77704517 Pulled By: kagamiori fbshipit-source-id: 941d801b2f5199ae28f50899bdcfc4d4214ad391
1 parent ab882b9 commit 67baa89

File tree

6 files changed

+1180
-342
lines changed

6 files changed

+1180
-342
lines changed

velox/docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"issue",
5151
"pr",
5252
"spark",
53+
"iceberg",
5354
"sphinx.ext.autodoc",
5455
"sphinx.ext.doctest",
5556
"sphinx.ext.mathjax",

velox/docs/ext/function.py

Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""The Function common functions."""
16+
17+
from __future__ import annotations
18+
19+
import ast
20+
import re
21+
from inspect import Parameter
22+
from typing import NamedTuple
23+
24+
from docutils import nodes
25+
from docutils.nodes import Element, Node
26+
from sphinx import addnodes
27+
from sphinx.addnodes import desc_signature, pending_xref
28+
29+
from sphinx.environment import BuildEnvironment
30+
from sphinx.locale import _
31+
from sphinx.util.inspect import signature_from_str
32+
33+
# REs for Function signatures
34+
function_sig_re = re.compile(
35+
r"""^ ([\w.]*\.)? # class name(s)
36+
(\w+) \s* # thing name
37+
(?: \(\s*(.*)\s*\) # optional: arguments
38+
(?:\s* -> \s* (.*))? # return annotation
39+
)? $ # and nothing more
40+
""",
41+
re.VERBOSE,
42+
)
43+
44+
pairindextypes = {
45+
"module": _("module"),
46+
"keyword": _("keyword"),
47+
"operator": _("operator"),
48+
"object": _("object"),
49+
"exception": _("exception"),
50+
"statement": _("statement"),
51+
"builtin": _("built-in function"),
52+
}
53+
54+
55+
class ObjectEntry(NamedTuple):
56+
docname: str
57+
node_id: str
58+
objtype: str
59+
aliased: bool
60+
61+
62+
class ModuleEntry(NamedTuple):
63+
docname: str
64+
node_id: str
65+
synopsis: str
66+
platform: str
67+
deprecated: bool
68+
69+
70+
def parse_reftarget(
71+
reftarget: str, suppress_prefix: bool = False
72+
) -> tuple[str, str, str, bool]:
73+
"""Parse a type string and return (reftype, reftarget, title, refspecific flag)"""
74+
refspecific = False
75+
if reftarget.startswith("."):
76+
reftarget = reftarget[1:]
77+
title = reftarget
78+
refspecific = True
79+
elif reftarget.startswith("~"):
80+
reftarget = reftarget[1:]
81+
title = reftarget.split(".")[-1]
82+
elif suppress_prefix:
83+
title = reftarget.split(".")[-1]
84+
elif reftarget.startswith("typing."):
85+
title = reftarget[7:]
86+
else:
87+
title = reftarget
88+
89+
if reftarget == "None" or reftarget.startswith("typing."):
90+
# typing module provides non-class types. Obj reference is good to refer them.
91+
reftype = "obj"
92+
else:
93+
reftype = "class"
94+
95+
return reftype, reftarget, title, refspecific
96+
97+
98+
def type_to_xref(
99+
function_module: str,
100+
target: str,
101+
env: BuildEnvironment | None = None,
102+
suppress_prefix: bool = False,
103+
) -> addnodes.pending_xref:
104+
"""Convert a type string to a cross reference node."""
105+
if env:
106+
kwargs = {
107+
function_module + ":module": env.ref_context.get(
108+
function_module + ":module"
109+
),
110+
function_module + ":class": env.ref_context.get(function_module + ":class"),
111+
}
112+
else:
113+
kwargs = {}
114+
115+
reftype, target, title, refspecific = parse_reftarget(target, suppress_prefix)
116+
contnodes = [nodes.Text(title)]
117+
118+
return pending_xref(
119+
"",
120+
*contnodes,
121+
refdomain=function_module,
122+
reftype=reftype,
123+
reftarget=target,
124+
refspecific=refspecific,
125+
**kwargs,
126+
)
127+
128+
129+
def parse_annotation(
130+
function_module: str, annotation: str, env: BuildEnvironment | None
131+
) -> list[Node]:
132+
"""Parse type annotation."""
133+
134+
def unparse(node: ast.AST) -> list[Node]:
135+
if isinstance(node, ast.Attribute):
136+
return [nodes.Text(f"{unparse(node.value)[0]}.{node.attr}")]
137+
elif isinstance(node, ast.BinOp):
138+
result: list[Node] = unparse(node.left)
139+
result.extend(unparse(node.op))
140+
result.extend(unparse(node.right))
141+
return result
142+
elif isinstance(node, ast.BitOr):
143+
return [
144+
addnodes.desc_sig_space(),
145+
addnodes.desc_sig_punctuation("", "|"),
146+
addnodes.desc_sig_space(),
147+
]
148+
elif isinstance(node, ast.Constant):
149+
if node.value is Ellipsis:
150+
return [addnodes.desc_sig_punctuation("", "...")]
151+
elif isinstance(node.value, bool):
152+
return [addnodes.desc_sig_keyword("", repr(node.value))]
153+
elif isinstance(node.value, int):
154+
return [addnodes.desc_sig_literal_number("", repr(node.value))]
155+
elif isinstance(node.value, str):
156+
return [addnodes.desc_sig_literal_string("", repr(node.value))]
157+
else:
158+
# handles None, which is further handled by type_to_xref later
159+
# and fallback for other types that should be converted
160+
return [nodes.Text(repr(node.value))]
161+
elif isinstance(node, ast.Expr):
162+
return unparse(node.value)
163+
elif isinstance(node, ast.Index):
164+
return unparse(node.value)
165+
elif isinstance(node, ast.Invert):
166+
return [addnodes.desc_sig_punctuation("", "~")]
167+
elif isinstance(node, ast.List):
168+
result = [addnodes.desc_sig_punctuation("", "[")]
169+
if node.elts:
170+
# check if there are elements in node.elts to only pop the
171+
# last element of result if the for-loop was run at least
172+
# once
173+
for elem in node.elts:
174+
result.extend(unparse(elem))
175+
result.append(addnodes.desc_sig_punctuation("", ","))
176+
result.append(addnodes.desc_sig_space())
177+
result.pop()
178+
result.pop()
179+
result.append(addnodes.desc_sig_punctuation("", "]"))
180+
return result
181+
elif isinstance(node, ast.Module):
182+
return sum((unparse(e) for e in node.body), [])
183+
elif isinstance(node, ast.Name):
184+
return [nodes.Text(node.id)]
185+
elif isinstance(node, ast.Subscript):
186+
if getattr(node.value, "id", "") in {"Optional", "Union"}:
187+
return _unparse_pep_604_annotation(node)
188+
result = unparse(node.value)
189+
result.append(addnodes.desc_sig_punctuation("", "]"))
190+
result.extend(unparse(node.slice))
191+
result.append(addnodes.desc_sig_punctuation("", "]"))
192+
193+
# Wrap the Text nodes inside brackets by literal node if the subscript is a Literal
194+
if result[0] in ("Literal", "typing.Literal"):
195+
for i, subnode in enumerate(result[1:], start=1):
196+
if isinstance(subnode, nodes.Text):
197+
result[i] = nodes.literal("", "", subnode)
198+
return result
199+
elif isinstance(node, ast.UnaryOp):
200+
return unparse(node.op) + unparse(node.operand)
201+
elif isinstance(node, ast.Tuple):
202+
if node.elts:
203+
result = []
204+
for elem in node.elts:
205+
result.extend(unparse(elem))
206+
result.append(addnodes.desc_sig_punctuation("", ","))
207+
result.append(addnodes.desc_sig_space())
208+
result.pop()
209+
result.pop()
210+
else:
211+
result = [
212+
addnodes.desc_sig_punctuation("", "("),
213+
addnodes.desc_sig_punctuation("", ")"),
214+
]
215+
216+
return result
217+
else:
218+
raise SyntaxError # unsupported syntax
219+
220+
def _unparse_pep_604_annotation(node: ast.Subscript) -> list[Node]:
221+
subscript = node.slice
222+
if isinstance(subscript, ast.Index):
223+
subscript = subscript.value # type: ignore[assignment]
224+
225+
flattened: list[Node] = []
226+
if isinstance(subscript, ast.Tuple):
227+
flattened.extend(unparse(subscript.elts[0]))
228+
for elt in subscript.elts[1:]:
229+
flattened.extend(unparse(ast.BitOr()))
230+
flattened.extend(unparse(elt))
231+
else:
232+
# e.g. a Union[] inside an Optional[]
233+
flattened.extend(unparse(subscript))
234+
235+
if getattr(node.value, "id", "") == "Optional":
236+
flattened.extend(unparse(ast.BitOr()))
237+
flattened.append(nodes.Text("None"))
238+
239+
return flattened
240+
241+
try:
242+
tree = ast.parse(annotation)
243+
result: list[Node] = []
244+
for node in unparse(tree):
245+
if isinstance(node, nodes.literal):
246+
result.append(node[0])
247+
elif isinstance(node, nodes.Text) and node.strip():
248+
if (
249+
result
250+
and isinstance(result[-1], addnodes.desc_sig_punctuation)
251+
and result[-1].astext() == "~"
252+
):
253+
result.pop()
254+
result.append(
255+
type_to_xref(
256+
function_module, str(node), env, suppress_prefix=True
257+
)
258+
)
259+
else:
260+
result.append(type_to_xref(function_module, str(node), env))
261+
else:
262+
result.append(node)
263+
return result
264+
except SyntaxError:
265+
return [type_to_xref(function_module, annotation, env)]
266+
267+
268+
def parse_arglist(
269+
function_module: str, arglist: str, env: BuildEnvironment | None = None
270+
) -> addnodes.desc_parameterlist:
271+
"""Parse a list of arguments using AST parser"""
272+
params = addnodes.desc_parameterlist(arglist)
273+
sig = signature_from_str("(%s)" % arglist)
274+
last_kind = None
275+
for param in sig.parameters.values():
276+
if param.kind != param.POSITIONAL_ONLY and last_kind == param.POSITIONAL_ONLY:
277+
# PEP-570: Separator for Positional Only Parameter: /
278+
params += addnodes.desc_parameter(
279+
"", "", addnodes.desc_sig_operator("", "/")
280+
)
281+
if param.kind == param.KEYWORD_ONLY and last_kind in (
282+
param.POSITIONAL_OR_KEYWORD,
283+
param.POSITIONAL_ONLY,
284+
None,
285+
):
286+
# PEP-3102: Separator for Keyword Only Parameter: *
287+
params += addnodes.desc_parameter(
288+
"", "", addnodes.desc_sig_operator("", "*")
289+
)
290+
291+
node = addnodes.desc_parameter()
292+
if param.kind == param.VAR_POSITIONAL:
293+
node += addnodes.desc_sig_operator("", "*")
294+
node += addnodes.desc_sig_name("", param.name)
295+
elif param.kind == param.VAR_KEYWORD:
296+
node += addnodes.desc_sig_operator("", "**")
297+
node += addnodes.desc_sig_name("", param.name)
298+
else:
299+
node += addnodes.desc_sig_name("", param.name)
300+
301+
if param.annotation is not param.empty:
302+
children = parse_annotation(function_module, param.annotation, env)
303+
node += addnodes.desc_sig_punctuation("", ":")
304+
node += addnodes.desc_sig_space()
305+
node += addnodes.desc_sig_name("", "", *children) # type: ignore
306+
if param.default is not param.empty:
307+
if param.annotation is not param.empty:
308+
node += addnodes.desc_sig_space()
309+
node += addnodes.desc_sig_operator("", "=")
310+
node += addnodes.desc_sig_space()
311+
else:
312+
node += addnodes.desc_sig_operator("", "=")
313+
node += nodes.inline(
314+
"", param.default, classes=["default_value"], support_smartquotes=False
315+
)
316+
317+
params += node
318+
last_kind = param.kind
319+
320+
if last_kind == Parameter.POSITIONAL_ONLY:
321+
# PEP-570: Separator for Positional Only Parameter: /
322+
params += addnodes.desc_parameter("", "", addnodes.desc_sig_operator("", "/"))
323+
324+
return params
325+
326+
327+
def pseudo_parse_arglist(signode: desc_signature, arglist: str) -> None:
328+
""" "Parse" a list of arguments separated by commas.
329+
330+
Arguments can have "optional" annotations given by enclosing them in
331+
brackets. Currently, this will split at any comma, even if it's inside a
332+
string literal (e.g. default argument value).
333+
"""
334+
paramlist = addnodes.desc_parameterlist()
335+
stack: list[Element] = [paramlist]
336+
try:
337+
for argument in arglist.split(","):
338+
argument = argument.strip()
339+
ends_open = ends_close = 0
340+
while argument.startswith("["):
341+
stack.append(addnodes.desc_optional())
342+
stack[-2] += stack[-1]
343+
argument = argument[1:].strip()
344+
while argument.startswith("]"):
345+
stack.pop()
346+
argument = argument[1:].strip()
347+
while argument.endswith("]") and not argument.endswith("[]"):
348+
ends_close += 1
349+
argument = argument[:-1].strip()
350+
while argument.endswith("["):
351+
ends_open += 1
352+
argument = argument[:-1].strip()
353+
if argument:
354+
stack[-1] += addnodes.desc_parameter(
355+
"", "", addnodes.desc_sig_name(argument, argument)
356+
)
357+
while ends_open:
358+
stack.append(addnodes.desc_optional())
359+
stack[-2] += stack[-1]
360+
ends_open -= 1
361+
while ends_close:
362+
stack.pop()
363+
ends_close -= 1
364+
if len(stack) != 1:
365+
raise IndexError
366+
except IndexError:
367+
# if there are too few or too many elements on the stack, just give up
368+
# and treat the whole argument list as one argument, discarding the
369+
# already partially populated paramlist node
370+
paramlist = addnodes.desc_parameterlist()
371+
paramlist += addnodes.desc_parameter(arglist, arglist)
372+
signode += paramlist
373+
else:
374+
signode += paramlist

0 commit comments

Comments
 (0)