gVarCluster

gvarcluster

Table of Contents

Description

Provides an action for performing variable clustering and providing undirected network for mining relationship among variables

gVarCluster.gvarcluster <result=results> <status=rc> / attributes={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, collection={{details=TRUE | FALSE, *name="string", *vars={"variable-name-1" <, "variable-name-2", ...>}}, {...}}, diagnostics={eyecatcher="string"}, display={caseSensitive=TRUE | FALSE, exclude=TRUE | FALSE, excludeAll=TRUE | FALSE, keyIsPath=TRUE | FALSE, names={"string-1" <, "string-2", ...>}, pathType="LABEL" | "NAME", traceNames=TRUE | FALSE}, exact=TRUE | FALSE, freq="variable-name", inputs={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, maxIter=64-bit-integer, maxMember=64-bit-integer, maxSteps=64-bit-integer, minCluster=64-bit-integer, multimember={{details=TRUE | FALSE, *name="string", noEffect=TRUE | FALSE, stdize=TRUE | FALSE, *vars={"variable-name-1" <, "variable-name-2", ...>}, weight={"variable-name-1" <, "variable-name-2", ...>}}, {...}}, nominals={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, outCP={casOut={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, eps=double, list=TRUE | FALSE}, outEdge={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, outputTables={groupByVarsRaw=TRUE | FALSE, includeAll=TRUE | FALSE, names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>}, repeated=TRUE | FALSE, replace=TRUE | FALSE}, outTree={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, outVert={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, polynomial={{degree=integer, details=TRUE | FALSE, labelStyle={expand=TRUE | FALSE, exponent="string", includeName=TRUE | FALSE, productSymbol="NONE" | "string"}, mDegree=integer, *name="string", noSeparate=TRUE | FALSE, standardize={method="MOMENTS" | "MRANGE" | "WMOMENTS", options="CENTER" | "CENTERSCALE" | "NONE" | "SCALE", prefix="NONE" | "string"}, *vars={"variable-name-1" <, "variable-name-2", ...>}}, {...}}, rho=double, select="ADJBIC" | "CV" | "NONE" | "PENALIZED", stop=64-bit-integer, *table={caslib="string", computedOnDemand=TRUE | FALSE, computedVars={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, computedVarsProgram="string", dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>}, groupBy={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, groupByMode="NOSORT" | "REDISTRIBUTE", importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}, *name="table-name", orderBy={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, singlePass=TRUE | FALSE, vars={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, where="where-expression", whereTable={casLib="string", dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}, importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}, *name="table-name", vars={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, where="where-expression"}}, target="string", weight="variable-name", xTol=double ;
Settings
ParameterDescription
attributes changes the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored. For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).
collection defines a set of variables that are treated as a single effect that has multiple degrees of freedom. The collection value can be one or more of the following: details=TRUE | FALSE: when set to True, requests a table that shows additional details that are related to this effect. * name="string": specifies the name of the effect. * vars={"variable-name-1" <, "variable-name-2", ...>}: specifies a set of variables that are treated as a single effect that has multiple degrees of freedom. The columns in the design matrix that are contributed by a collection effect are the design columns of its constituent variables in the order in which they appear in the definition of the collection effect.
diagnostics eyecatcher="string": specifies a quoted string that will be prefixed to any messages that are associated with this action invocation.
display specifies a list of results tables to send to the client for display. For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).
exact when set to True, performs graphical variable clustering without preprocessing by thresholding the sample covariance into connected components. By default, the preprocessing step is performed.
freq names the numeric variable that contains the frequency of occurrence for each observation.
inputs specifies variables to use for analysis. For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).
maxIter specifies the maximum number of iterations for estimating the sparse precision covariance matrix by using coordinate descent.
maxMember stops the action when the number of members within any cluster is greater than or equal to the specified value.
maxSteps specifies the maximum number of clustering steps.
minCluster stops the action when the number of clusters is less than or equal to the specified value.
multimember uses one or more classification variables specified in the vars parameter in such a way that each observation can be associated with one or more levels of the union of the levels of the classification variables. For more information about specifying the multimember parameter, see the common multimember parameter (Appendix A: Common Parameters).
nominals specifies nominal variables to use for analysis. For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).
outCP creates a data set that contains a symmetric matrix that depicts the covariances among variables and also creates a set of statistics about the input data set and variables. casOut: specifies the output table. For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters). eps: specifies an epsilon value such that matrix entries that have an absolute value smaller than epsilon are ignored in the output. You must specify the list parameter when you specify the eps parameter. list: when set to True, outputs the symmetric matrix in the list-of-lists (LIL) format.
outEdge creates a data set for use with the Hypergroup action in the tkhypgrp action library. This table contains the information that defines the edges in the network: _FROM_, _TO_ and _WEIGHT_. For more information about specifying the outEdge parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).
outputTables lists the names of results tables to save as CAS tables on the server. For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).
outTree creates a data set that depicts a tree diagram to display the hierarchical clustering results. The tree diagram can be plotted using the DENDROGRAM statement in the Graph Template Language. For more information about specifying the outTree parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).
outVert creates a data set for use with the Hypergroup action in the tkhypgrp action library. This table contains the vertices in the network and their size. For more information about specifying the outVert parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).
polynomial specifies a polynomial effect. All specified variables must be numeric. A design matrix column is generated for each term of the specified polynomial. By default, each of these terms is treated as a separate effect for the purpose of model building. For more information about specifying the polynomial parameter, see the common polynomial parameter (Appendix A: Common Parameters).
rho specifies the value of rho that determines the sequence of regulation parameters [the first power of rho, the second power of rho, and so on], that are used on sequential clustering steps.
select when set to True, performs graphical variable clustering without preprocessing by thresholding the sample covariance into connected components. By default, the preprocessing step is performed.
stop requests that the action stop if the clustering results do not change in the previous number of consecutive step that is specified in this parameter.
table specifies the settings for an input table. For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).
target specifies the target variable to use for analysis.
weight names the numeric variable to use to perform a weighted analysis of the data.
xTol specifies the minimal absolute tolerance at which an iteration stops.

Examples

FAQ

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}
collection={{collection-1} <, {collection-2}, ...>}
diagnostics={_diagnostics}
display={displayTables}
exact=TRUE | FALSE
freq="variable-name"
inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}
maxIter=64-bit-integer
maxMember=64-bit-integer
maxSteps=64-bit-integer
minCluster=64-bit-integer
multimember={{multimember-1} <, {multimember-2}, ...>}
nominals={{casinvardesc-1} <, {casinvardesc-2}, ...>}
outCP={OutputCPStatement}
outEdge={casouttable}
outputTables={outputTables}
outTree={casouttable}
outVert={casouttable}
polynomial={{polynomial-1} <, {polynomial-2}, ...>}
rho=double
select="ADJBIC" | "CV" | "NONE" | "PENALIZED"
stop=64-bit-integer
* table={castable}
target="string"
weight="variable-name"
xTol=double
What is the Graphical Variable Clustering Action Set?
What is the purpose of the attributes parameter?
What is the name subparameter in attributes?
What is the purpose of the collection parameter?
What is the details subparameter in collection?
What is the name subparameter in collection?
What is the vars subparameter in collection?
What is the purpose of the diagnostics parameter?
What is the eyecatcher subparameter in diagnostics?
What is the purpose of the display parameter?
What is the exact parameter?
What is the freq parameter?
What is the purpose of the inputs parameter?
What is the maxIter parameter?
What is the maxMember parameter?
What is the maxSteps parameter?
What is the minCluster parameter?
What is the purpose of the multimember parameter?
What is the nominals parameter?
What is the outCP parameter?
What is the casOut subparameter in outCP?
What is the eps subparameter in outCP?
What is the list subparameter in outCP?
What is the outEdge parameter?
What is the outputTables parameter?
What is the outTree parameter?
What is the outVert parameter?
What is the polynomial parameter?
What is the rho parameter?
What is the select parameter?
What is the stop parameter?
What is the table parameter?
What is the target parameter?
What is the weight parameter?
What is the xTol parameter?