Skip to content

Commit dba487b

Browse files
committed
Previous generator error fixes, code block fixes
1 parent cc9f8d9 commit dba487b

File tree

229 files changed

+7140
-65
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

229 files changed

+7140
-65
lines changed

scripts/builtin/confusionMatrix.dml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
# and actual labels. We return both the counts and relative frequency
2424
# (normalized by sum of true labels)
2525
#
26-
# .. code-block::
26+
# .. code-block:: text
2727
#
2828
# True Labels
2929
# 1 2

scripts/builtin/cooccurrenceMatrix.dml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
#
2020
#-------------------------------------------------------------
2121

22-
## Cleans and processes text data by removing punctuation, converting it to lowercase, and reformatting.
23-
## Adds an index column to the result. The implementation is based on
22+
# Cleans and processes text data by removing punctuation, converting it to lowercase, and reformatting.
23+
# Adds an index column to the result. The implementation is based on
2424
# https://github.com/stanfordnlp/GloVe/blob/master/src/cooccur.c
2525
#
2626
# INPUT:
@@ -32,6 +32,7 @@
3232
# ------------------------------------------------------------------------------
3333
# result (Frame[Unknown]): Processed text data with an index column.
3434
# ------------------------------------------------------------------------------
35+
3536
processText = function(Frame[Unknown] S) return (Frame[Unknown] result){
3637
print("processText");
3738
tmpStr = map(S[,1], "x -> x.replaceAll(\"[.]\", \"\")");

scripts/builtin/correctTypos.dml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@
3434
#
3535
# INPUT:
3636
# ----------------------------------------------------------------------------------------
37-
# strings The nx1 input frame of corrupted strings
38-
# frequency_threshold Strings that occur above this frequency level will not be corrected
39-
# distance_threshold Max distance at which strings are considered similar
40-
# is_verbose Print debug information
37+
# strings The nx1 input frame of corrupted strings
38+
# frequency_threshold Strings that occur above this frequency level will not be corrected
39+
# distance_threshold Max distance at which strings are considered similar
40+
# is_verbose Print debug information
4141
# ----------------------------------------------------------------------------------------
4242
#
4343
# OUTPUT:

scripts/builtin/decisionTree.dml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
# and the following trees, M would look as follows:
3131
#
3232
# (L1) |d<5|
33-
# / \
33+
# / \\
3434
# (L2) P1:2 |a<7|
35-
# / \
35+
# / \\
3636
# (L3) P2:2 P3:1
3737
#
3838
# --> M :=

scripts/builtin/dist.dml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,22 @@
3131
# Y Euclidean distance matrix
3232
# -----------------------------------------------------------------------------------------------
3333

34+
35+
# .. code-block:: python
36+
# import numpy as np
37+
# from systemds.context import SystemDSContext
38+
# from numpy.testing import assert_allclose
39+
# with SystemDSContext() as sds:
40+
# X = sds.from_numpy(np.array([[0], [3], [4]]))
41+
# out = dist(X).compute()
42+
# expected = np.array([
43+
# [0, 3, 4],
44+
# [3, 0, 1],
45+
# [4, 1, 0]
46+
# ])
47+
# assert_allclose(out, expected)
48+
49+
3450
m_dist = function(Matrix[Double] X) return (Matrix[Double] Y) {
3551
n = nrow(X)
3652
s = rowSums(X^2)

scripts/builtin/imputeByKNN.dml

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,23 +25,16 @@
2525
# the missing values by column means. Currently, only the column with the most
2626
# missing values is actually imputed.
2727
#
28-
# ------------------------------------------------------------------------------
2928
# INPUT:
3029
# ------------------------------------------------------------------------------
31-
# X Matrix with missing values, which are represented as NaNs
32-
# method Method used for imputing missing values with different performance
33-
# and accuracy tradeoffs:
34-
# 'dist' (default): Compute all-pairs distances and impute the
35-
# missing values by closest. O(N^2 * #features)
36-
# 'dist_missing': Compute distances between data and records with
37-
# missing values. O(N*M * #features), assuming
38-
# that the number of records with MV is M<<N.
39-
# 'dist_sample': Compute distances between sample of data and
40-
# records with missing values. O(S*M * #features)
41-
# with M<<N and S<<N, but suboptimal imputation.
42-
# seed Root seed value for random/sample calls for deterministic behavior
43-
# -1 for true randomization
44-
# sample_frac Sample fraction for 'dist_sample' (value between 0 and 1)
30+
# X Matrix with missing values, which are represented as NaNs
31+
# method Method used for imputing missing values with different performance and accuracy tradeoffs:\n
32+
# - 'dist' (default): Compute all-pairs distances and impute the missing values by closest. O(N^2 * #features)
33+
# - 'dist_missing': Compute distances between data and records with missing values. O(N*M * #features), assuming that the number of records with MV is M<<N.
34+
# - 'dist_sample': Compute distances between sample of data and records with missing values. O(S*M * #features) with M<<N and S<<N, but suboptimal imputation.
35+
#
36+
# seed Root seed value for random/sample calls for deterministic behavior. -1 for true randomization
37+
# sample_frac Sample fraction for 'dist_sample' (value between 0 and 1)
4538
# ------------------------------------------------------------------------------
4639
#
4740
# OUTPUT:

scripts/builtin/quantizeByCluster.dml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
# the product quantization. Only relevant when space_decomp = TRUE.
5959
# ------------------------------------------------------------------------------------------
6060

61-
m_quantizeByCluster = function(Matrix[Double]X, Integer M = 4, Integer k = 10, Integer runs = 10,
61+
m_quantizeByCluster = function(Matrix[Double] X, Integer M = 4, Integer k = 10, Integer runs = 10,
6262
Integer max_iter = 1000, Double eps = 1e-6, Integer avg_sample_size_per_centroid = 50, Boolean separate=TRUE, Boolean space_decomp=FALSE, Integer seed = -1)
6363
return(Matrix[Double] codebook, Matrix[Double] codes, Matrix[Double] R)
6464
{

scripts/builtin/randomForest.dml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,17 @@
2626
# and optionally subset of features (columns). During tree construction, split
2727
# candidates are additionally chosen on a sample of remaining features.
2828
#
29-
# .. code-block::
29+
# .. code-block:: text
3030
#
3131
# For example, given a feature matrix with features [a,b,c,d]
3232
# and the following two trees, M (the output) would look as follows:
3333
#
3434
# (L1) |a<7| |d<5|
35-
# / \ / \
35+
# / \\ / \\
3636
# (L2) |c<3| |b<4| |a<7| P3:2
37-
# / \ / \ / \
37+
# / \\ / \\ / \\
3838
# (L3) P1:2 P2:1 P3:1 P4:2 P1:2 P2:1
39+
#
3940
# --> M :=
4041
# [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2], (1st tree)
4142
# [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]] (2nd tree)

scripts/builtin/shapExplainer.dml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
# S Matrix holding the shapley values along the cols, one row per instance.
5252
# expected Double holding the average prediction of all instances.
5353
# -----------------------------------------------------------------------------
54+
5455
s_shapExplainer = function(String model_function, list[unknown] model_args, Matrix[Double] x_instances,
5556
Matrix[Double] X_bg, Integer n_permutations = 10, Integer n_samples = 100, Integer remove_non_var=0,
5657
Matrix[Double] partitions=as.matrix(-1), Integer seed = -1, Integer verbose = 0)

scripts/builtin/topk_cleaning.dml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@
2121

2222
# This function cleans top-K item (where K is given as input)for a given list of users.
2323
# metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
24+
#
25+
# INPUT:
26+
#-------------------------------------------------------------------------------
27+
# TODO TODO
28+
#-------------------------------------------------------------------------------
29+
#
30+
# OUTPUT:
31+
#-------------------------------------------------------------------------------
32+
# TODO TODO
33+
#-------------------------------------------------------------------------------
34+
2435

2536
source("scripts/pipelines/scripts/utils.dml") as utils;
2637
source("scripts/pipelines/scripts/enumerateLogical.dml") as lg;

0 commit comments

Comments
 (0)