gVarCluster

gvarcluster

Description

Provides an action for performing variable clustering and providing undirected network for mining relationship among variables

gVarCluster.gvarcluster <result=results> <status=rc> / attributes={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, collection={{details=TRUE | FALSE, *name="string", *vars={"variable-name-1" <, "variable-name-2", ...>}}, {...}}, diagnostics={eyecatcher="string"}, display={caseSensitive=TRUE | FALSE, exclude=TRUE | FALSE, excludeAll=TRUE | FALSE, keyIsPath=TRUE | FALSE, names={"string-1" <, "string-2", ...>}, pathType="LABEL" | "NAME", traceNames=TRUE | FALSE}, exact=TRUE | FALSE, freq="variable-name", inputs={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, maxIter=64-bit-integer, maxMember=64-bit-integer, maxSteps=64-bit-integer, minCluster=64-bit-integer, multimember={{details=TRUE | FALSE, *name="string", noEffect=TRUE | FALSE, stdize=TRUE | FALSE, *vars={"variable-name-1" <, "variable-name-2", ...>}, weight={"variable-name-1" <, "variable-name-2", ...>}}, {...}}, nominals={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, outCP={casOut={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, eps=double, list=TRUE | FALSE}, outEdge={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, outputTables={groupByVarsRaw=TRUE | FALSE, includeAll=TRUE | FALSE, names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>}, repeated=TRUE | FALSE, replace=TRUE | FALSE}, outTree={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, outVert={caslib="string", compress=TRUE | FALSE, indexVars={"variable-name-1" <, "variable-name-2", ...>}, label="string", lifetime=64-bit-integer, maxMemSize=64-bit-integer, memoryFormat="DVR" | "INHERIT" | "STANDARD", name="table-name", promote=TRUE | FALSE, replace=TRUE | FALSE, replication=integer, tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE", threadBlockSize=64-bit-integer, timeStamp="string", where={"string-1" <, "string-2", ...>}}, polynomial={{degree=integer, details=TRUE | FALSE, labelStyle={expand=TRUE | FALSE, exponent="string", includeName=TRUE | FALSE, productSymbol="NONE" | "string"}, mDegree=integer, *name="string", noSeparate=TRUE | FALSE, standardize={method="MOMENTS" | "MRANGE" | "WMOMENTS", options="CENTER" | "CENTERSCALE" | "NONE" | "SCALE", prefix="NONE" | "string"}, *vars={"variable-name-1" <, "variable-name-2", ...>}}, {...}}, rho=double, select="ADJBIC" | "CV" | "NONE" | "PENALIZED", stop=64-bit-integer, *table={caslib="string", computedOnDemand=TRUE | FALSE, computedVars={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, computedVarsProgram="string", dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>}, groupBy={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, groupByMode="NOSORT" | "REDISTRIBUTE", importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}, *name="table-name", orderBy={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, singlePass=TRUE | FALSE, vars={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, where="where-expression", whereTable={casLib="string", dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}, importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}, *name="table-name", vars={{format="string", formattedLength=integer, label="string", *name="variable-name", nfd=integer, nfl=integer}, {...}}, where="where-expression"}}, target="string", weight="variable-name", xTol=double ;
Settings
ParameterDescription
attributeschanges the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored. For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).
collectiondefines a set of variables that are treated as a single effect that has multiple degrees of freedom. The collection value can be one or more of the following: details=TRUE | FALSE: when set to True, requests a table that shows additional details that are related to this effect. * name="string": specifies the name of the effect. * vars={"variable-name-1" <, "variable-name-2", ...>}: specifies a set of variables that are treated as a single effect that has multiple degrees of freedom. The columns in the design matrix that are contributed by a collection effect are the design columns of its constituent variables in the order in which they appear in the definition of the collection effect.
diagnosticseyecatcher="string": specifies a quoted string that will be prefixed to any messages that are associated with this action invocation.
displayspecifies a list of results tables to send to the client for display. For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).
exactwhen set to True, performs graphical variable clustering without preprocessing by thresholding the sample covariance into connected components. By default, the preprocessing step is performed.
freqnames the numeric variable that contains the frequency of occurrence for each observation.
inputsspecifies variables to use for analysis. For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).
maxIterspecifies the maximum number of iterations for estimating the sparse precision covariance matrix by using coordinate descent.
maxMemberstops the action when the number of members within any cluster is greater than or equal to the specified value.
maxStepsspecifies the maximum number of clustering steps.
minClusterstops the action when the number of clusters is less than or equal to the specified value.
multimemberuses one or more classification variables specified in the vars parameter in such a way that each observation can be associated with one or more levels of the union of the levels of the classification variables. For more information about specifying the multimember parameter, see the common multimember parameter (Appendix A: Common Parameters).
nominalsspecifies nominal variables to use for analysis. For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).
outCPcreates a data set that contains a symmetric matrix that depicts the covariances among variables and also creates a set of statistics about the input data set and variables. casOut: specifies the output table. For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters). eps: specifies an epsilon value such that matrix entries that have an absolute value smaller than epsilon are ignored in the output. You must specify the list parameter when you specify the eps parameter. list: when set to True, outputs the symmetric matrix in the list-of-lists (LIL) format.
outEdgecreates a data set for use with the Hypergroup action in the tkhypgrp action library. This table contains the information that defines the edges in the network: _FROM_, _TO_ and _WEIGHT_. For more information about specifying the outEdge parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).
outputTableslists the names of results tables to save as CAS tables on the server. For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).
outTreecreates a data set that depicts a tree diagram to display the hierarchical clustering results. The tree diagram can be plotted using the DENDROGRAM statement in the Graph Template Language. For more information about specifying the outTree parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).
outVertcreates a data set for use with the Hypergroup action in the tkhypgrp action library. This table contains the vertices in the network and their size. For more information about specifying the outVert parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).
polynomialspecifies a polynomial effect. All specified variables must be numeric. A design matrix column is generated for each term of the specified polynomial. By default, each of these terms is treated as a separate effect for the purpose of model building. For more information about specifying the polynomial parameter, see the common polynomial parameter (Appendix A: Common Parameters).
rhospecifies the value of rho that determines the sequence of regulation parameters [the first power of rho, the second power of rho, and so on], that are used on sequential clustering steps.
selectwhen set to True, performs graphical variable clustering without preprocessing by thresholding the sample covariance into connected components. By default, the preprocessing step is performed.
stoprequests that the action stop if the clustering results do not change in the previous number of consecutive step that is specified in this parameter.
tablespecifies the settings for an input table. For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).
targetspecifies the target variable to use for analysis.
weightnames the numeric variable to use to perform a weighted analysis of the data.
xTolspecifies the minimal absolute tolerance at which an iteration stops.

Examples

FAQ

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}
collection={{collection-1} <, {collection-2}, ...>}
diagnostics={_diagnostics}
display={displayTables}
exact=TRUE | FALSE
freq="variable-name"
inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}
maxIter=64-bit-integer
maxMember=64-bit-integer
maxSteps=64-bit-integer
minCluster=64-bit-integer
multimember={{multimember-1} <, {multimember-2}, ...>}
nominals={{casinvardesc-1} <, {casinvardesc-2}, ...>}
outCP={OutputCPStatement}
outEdge={casouttable}
outputTables={outputTables}
outTree={casouttable}
outVert={casouttable}
polynomial={{polynomial-1} <, {polynomial-2}, ...>}
rho=double
select="ADJBIC" | "CV" | "NONE" | "PENALIZED"
stop=64-bit-integer
* table={castable}
target="string"
weight="variable-name"
xTol=double
What is the Graphical Variable Clustering Action Set?
What is the purpose of the attributes parameter?
What is the name subparameter in attributes?
What is the purpose of the collection parameter?
What is the details subparameter in collection?
What is the name subparameter in collection?
What is the vars subparameter in collection?
What is the purpose of the diagnostics parameter?
What is the eyecatcher subparameter in diagnostics?
What is the purpose of the display parameter?
What is the exact parameter?
What is the freq parameter?
What is the purpose of the inputs parameter?
What is the maxIter parameter?
What is the maxMember parameter?
What is the maxSteps parameter?
What is the minCluster parameter?
What is the purpose of the multimember parameter?
What is the nominals parameter?
What is the outCP parameter?
What is the casOut subparameter in outCP?
What is the eps subparameter in outCP?
What is the list subparameter in outCP?
What is the outEdge parameter?
What is the outputTables parameter?
What is the outTree parameter?
What is the outVert parameter?
What is the polynomial parameter?
What is the rho parameter?
What is the select parameter?
What is the stop parameter?
What is the table parameter?
What is the target parameter?
What is the weight parameter?
What is the xTol parameter?