Starting off as a muggle that naïve to the Math's and Data Science world.

Sankey Diagram

/* Create a temporary table for sankey diagram */
PROC SQL;
CREATE TABLE WORK.CATE_DS (GROUP char(200),LABEL char(200),HIGH num,LOW num,UPPER num,LOWER num,X num);
QUIT;

/* Create a macro to fillup WORK.CATE_DS for highlow diagram */
OPTION MCOMPILENOTE = ALL;
%MACRO MACRO_FILL_CATE_DS (pcate_vari, x);
  PROC FREQ DATA =  SASHELP.CARS;
    TABLES &pcate_vari / OUT = WORK.CATE_DS_SINGLE_CATE outcum;
  RUN;
  
  DATA WORK.CATE_DS_SINGLE_CATE;
    SET WORK.CATE_DS_SINGLE_CATE;
    CUM_FREQ_LAG = lag(CUM_FREQ);
  RUN;

  PROC SQL;
    INSERT INTO WORK.CATE_DS (GROUP, LABEL, HIGH, LOW, X)
    SELECT 
       CATS(%tslit(&pcate_vari), ':', &pcate_vari) AS GROUP, 
                               %tslit(&pcate_vari) AS LABEL,
                                          CUM_FREQ AS HIGH,
                         COALESCE(CUM_FREQ_LAG, 0) AS LOW,
                                                &x AS X
    FROM WORK.CATE_DS_SINGLE_CATE;
  QUIT;
%MEND MACRO_FILL_CATE_DS;

/* Execute MACRO [MACRO_FILL_CATE_DS] on categorical variable */
%MACRO_FILL_CATE_DS(Type, 1);
%MACRO_FILL_CATE_DS(Origin, 2);
%MACRO_FILL_CATE_DS(DriveTrain, 3);

/* Create a macro to fillup WORK.CATE_DS for band diagram */
OPTION MCOMPILENOTE = ALL;
%MACRO MACRO_FILL_CATE_DS_2 (pcate_vari_from, pcate_vari_to, pcate_vari_order, x);
  PROC SQL;
    CREATE TABLE WORK.CATE_DS_SINGLE_CATE AS
    SELECT 
      CATS(%tslit(&pcate_vari_from), ':', &pcate_vari_from, '-', %tslit(&pcate_vari_to), ':', &pcate_vari_to) AS GROUP, 
                                                                                                     COUNT(*) AS CNT
    FROM SASHELP.CARS
    GROUP BY &pcate_vari_from, &pcate_vari_to
    ORDER BY &pcate_vari_order;
  QUIT;

  DATA WORK.CATE_DS_SINGLE_CATE;
    SET WORK.CATE_DS_SINGLE_CATE;
    UPPER+CNT;
    LOWER = lag(UPPER);
    X = &x;
  RUN;

  PROC SQL;
    INSERT INTO WORK.CATE_DS (GROUP, UPPER, LOWER, X)
    SELECT 
      GROUP AS GROUP, 
      UPPER AS UPPER, 
      COALESCE(LOWER, 0) AS LOWER, 
      X AS X
    FROM WORK.CATE_DS_SINGLE_CATE;
  QUIT;
%MEND MACRO_FILL_CATE_DS_2;

/* Execute MACRO [MACRO_FILL_CATE_DS_2] on 2 categorical variable */

%MACRO_FILL_CATE_DS_2(Type, Origin, Type, 1);
%MACRO_FILL_CATE_DS_2(Type, Origin, Origin, 2);
%MACRO_FILL_CATE_DS_2(Origin, DriveTrain, Origin, 2);
%MACRO_FILL_CATE_DS_2(Origin, DriveTrain, DriveTrain, 3);

/* Create a temporary table to colormap */

data colormap;
   input rownum color $;
   datalines;
1 #8DD3C7 
2 #FFFFB3 
3 #BEBADA 
4 #FB8072 
5 #80B1D3 
6 #FDB462 
7 #B3DE69 
8 #FCCDE5 
9 #D9D9D9 
10 #BC80BD 
11 #CCEBC5 
12 #E41A1C 
13 #377EB8 
14 #4DAF4A 
15 #984EA3 
16 #FF7F00 
17 #FFFF33 
18 #A65628 
19 #F781BF 
20 #999999
;

/* Create a temporary table to colormap with categorical variable value */
PROC SQL;
  CREATE TABLE WORK.AttrMap AS
  SELECT DISTINCT 'GROUP' AS ID, value, c.color AS fillcolor, 'Black' AS linecolor
  FROM (SELECT 
          monotonic() as rownum,
          group_value
        FROM (SELECT DISTINCT
                SCAN(GROUP, 1, '-') as group_value
              FROM WORK.CATE_DS) src)a
  LEFT JOIN (SELECT GROUP as value,
               SCAN(GROUP, 1, '-') as group_value
           FROM WORK.CATE_DS) b
  ON a.group_value = b.group_value

  LEFT JOIN WORK.colormap c
  ON a.rownum = c.rownum;
QUIT;

/* Generate Sankey Diagram */
ODS GRAPHICS ON / WIDTH=16in HEIGHT=8in;
PROC SGPLOT DATA=WORK.CATE_DS DATTRMAP=WORK.AttrMap;
  BAND X=X LOWER=LOWER UPPER=UPPER / GROUP=GROUP ATTRID=GROUP;
  HIGHLOW X=X HIGH=HIGH LOW=LOW / GROUP=GROUP TYPE=BAR BARWIDTH=0.05 ATTRID=GROUP LOWLABEL=GROUP;
  XAXISTABLE LABEL / STAT=FREQ POSITION=TOP;
  XAXIS DISPLAY=NONE;
RUN;

Leave a comment