An Introduction to SAS PROC SQL
[ 참고 :: http://www2.sas.com/proceedings/sugi27/p070-27.pdf ]
ANY QUESTIONS ?? xyxonxyxon@empas.com
가상 서점 고객세분화연습 ::
SQL 1227 버전 (최종 ) ::
02_리비젼_롯데마트CRM데이터분석교육_20141227_SASprocSQLV04_A1.pdf
12월 27일 ...
강남TOZ (^^;) 같은 방입니다.
::::::::::::::::::::::::::::::::::::::::
::::::::::::::::::::::::::::::::::::::::
2014 12월 20일 :: 복습
LIBNAME LM_POS01 'C:\SASTEST_201412';
PROC IMPORT OUT= LM_POS01.SALE_HEAD
DATAFILE= "C:\SASTEST_201412\sale_head_201412.csv"
REPLACE;
GETNAMES=YES;
RUN;
PROC IMPORT OUT= LM_POS01.SALE_PROD
DATAFILE= "C:\SASTEST_201412\sale_prod_201412.csv"
REPLACE;
GETNAMES=YES;
RUN;
PROC IMPORT OUT= LM_POS01.PRODUCT
DATAFILE= "C:\SASTEST_201412\product.csv"
REPLACE;
GETNAMES=YES;
RUN;
PROC IMPORT OUT= LM_POS01.CD_CAT_DEPT
DATAFILE= "C:\SASTEST_201412\cd_cat_dept.csv"
REPLACE;
GETNAMES=YES;
RUN;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP001 AS
SELECT CUST_NO, COUNT(*) AS NTRX
FROM LM_POS01.SALE_HEAD
GROUP BY CUST_CARD_NO;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP002 AS
SELECT CUST_NO, COUNT(*) AS NTRX_PRD
FROM LM_POS01.SALE_PROD
GROUP BY CUST_NO;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP003 AS
SELECT A.*, B.*
FROM LM_POS01.SPTMP002 AS A LEFT JOIN LM_POS01.SPTMP001 AS B
ON A.CUST_NO=B.CUST_NO
ORDER BY A.NTRX_PRD DESC;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP003 AS
SELECT A.*, B.*
FROM LM_POS01.SPTMP002 AS A LEFT JOIN LM_POS01.SPTMP001 AS B
ON A.CUST_NO=B.CUST_NO
ORDER BY A.NTRX_PRD DESC;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP004 AS
SELECT STR_CD, SALE_DY, TRD_NO, CUST_NO, CUST_CARD_NO, PROD_CD, SALE_SALE_AMT
FROM LM_POS01.SALE_PROD
WHERE STR_CD=101
;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP005 AS
SELECT A.*, B.PROD_NM, B.DEPT_CD
FROM LM_POS01.SPTMP004 AS A LEFT JOIN LM_POS01.PRODUCT AS B
ON A.PROD_CD = B.PROD_CD
;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP006 AS
SELECT A.*, B.DEPT_CD, B.DEPT_NM
FROM LM_POS01.SPTMP005 AS A LEFT JOIN LM_POS01.CD_CAT_DEPT AS B
ON A.DEPT_CD = B.DEPT_CD
;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP007 AS
SELECT DISTINCT DEPT_CD, DEPT_NM, SUM(SALE_SALE_AMT) AS SSAMT,
COUNT(DISTINCT CUST_NO) AS CCNT,
SUM(SALE_SALE_AMT) / COUNT(DISTINCT CUST_NO) AS AVG_SSAMT
FROM LM_POS01.SPTMP006
GROUP BY DEPT_CD;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP008 AS
SELECT PROD_NM, SUBSTR(PROD_NM,3,6) AS SPROD_NM
FROM LM_POS01.PRODUCT
;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.SPTMP009 AS
SELECT TRD_NO, CUST_NO, SALE_SALE_AMT,
CASE WHEN SALE_SALE_AMT > 50000 THEN 1
ELSE 0 END
AS IS_BIG
FROM LM_POS01.SALE_HEAD;
QUIT;
내일은 20일 (토요일) ...
TOZ 강남역점 (02-3476-0118 ) 에서 뵙지요.... 10시는 동일
--------------[2014 12 15 ]
Day 2 교재 수정본 + 추가 연습문제 :
리비젼_롯데마트CRM데이터분석교육_20141212_SAS기초V02_A.ppt
연습을 위한 수정된 STR 테이블 :
---------------
SELECT STR_CD, COUNT(DISTINCT CUST_NO) AS CCNT
FROM ....
GROUP BY STR_CD
대/소문자 모두 사용 가능
PROC PRINT DATA=gumjin; RUN;
proc print data=gumjin; run;
모든 명령문은 semicolon(;)으로 마침
한 줄에 여러 개의 명령문 사용
모든 실행 문은 RUN; 에 의해 프로그램 수행 후 종료
주석달기 :: /* */ *
SAS keyword expressions (예: DROP, KEEP, RENAME, WHERE)를 SQL에서 사용 가능함
DATA AAA01 ;
INPUT SERIALNO AGE SEX $ EDUC MARRI;
LABEL SEX='성별';
CARDS;
182 58 F 2 2
7 55 F 2 2
201 55 M 2 2
217 52 F 2 2
815 65 F 2 5
528 49 F 2 2
1083 69 M 2 2
1063 64 F 2 5
595 39 F 2 2
79 80 M 2 2
;
RUN;
데이터 입력시 Tip
- 변수명은 32자 이내
- 숫자로 시작 불가!
- 특수기호 사용 불가! (except “_”)
- 자료의 입력은 되도록 숫자변수로
- 주민등록번호나 젂화번호 등은 문자변수로 입력
- 날짜변수는 mm dd yy로 나누어 입력
- (한 셀에 입력했을 경우에는 SAS에서 구분 가능)
- 문자변수는 되도록 자릿수를 맞춰서 입력
* 라이브러리 생성/지정
* 로컬의 텍스트 파일 데이터 불러오기
* 서버의 데이터 가져오기
* 데이터 내보내기
* 변수 정의
* 간단한 계산
Creating output and new tables
PROC SQL;
SELECT *
FROM VITALS;
QUIT;
PROC SQL;
CREATE TABLE BP AS
SELECT PATIENT, DATE, BPS, BPD
FROM VITALS;
QUIT;
PROC SQL;
CREATE TABLE BP AS
SELECT *
FROM VITALS(DROP=TEMP);
QUIT;
Duplicates and sorting
PROC SQL;
CREATE TABLE PATIDS AS
SELECT DISTINCT PATIENT
FROM VITALS;
QUIT;
PROC SQL;
CREATE TABLE VISITS AS
SELECT PATIENT, DATE
FROM VITALS
ORDER BY PATIENT, DATE DESCENDING;
QUIT;
Sub-setting and Calculating
PROC SQL;
CREATE TABLE BPODD AS
SELECT PATIENT, DATE FORMAT=DATE7., BPS,
BPD, (TEMP-32)/9*5 AS TEMPC
FROM VITALS
WHERE PATIENT IN (101 103)
ORDER BY PATIENT, DATE DESCENDING;
QUIT;
DATE FORMAT=DATE7.
would be replaced with
PUT(DATE,DATE7.) AS CDATE LENGTH=7
PROC SQL;
CREATE TABLE MEANBP AS
SELECT COUNT(*) AS N, ROUND(MEAN(BPS),
0.01) FORMAT=6.2 AS BPSMEAN,
ROUND(MEAN(BPD),0.01) FORMAT=6.2 AS
BPDMEAN
FROM VITALS;
QUIT;
PROC SQL;
CREATE TABLE HIGHBPP1 AS
SELECT PATIENT, COUNT(PATIENT) AS N,
DATE FORMAT=DATE7., BPD,
MAX(BPD) AS BPDHIGH,
ROUND(BPD/(CALCULATED BPDHIGH)*100, 0.01)
FORMAT=6.2 AS BPDPCT
FROM VITALS
GROUP BY PATIENT;
QUIT;
PROC SQL;
CREATE TABLE HIGHBPP2 AS
SELECT PATIENT, COUNT(PATIENT) AS N,
DATE FORMAT=DATE7., MAX(BPD) AS BPDHIGH
FROM VITALS
WHERE PATIENT IN (101 102 103)
GROUP BY PATIENT
HAVING BPD = CALCULATED BPDHIGH
ORDER BY CALCULATED BPDHIGH;
QUIT;
PROC SQL;
CREATE TABLE TESTMED AS
SELECT PATIENT,
CASE ((PATIENT/2 = INT(PATIENT/2)) +
(PATIENT = .))
WHEN 1 THEN 'Med A' WHEN 0 THEN
'Med B' ELSE 'Error' END AS DOSEGRP
LENGTH=5
FROM VITALS
ORDER BY PATIENT;
QUIT;
Joining Tables
PROC SQL;
CREATE TABLE BOTH AS
SELECT A.PATIENT,
A.DATE FORMAT=DATE7. AS DATE, A.PULSE,
B.MED, B.DOSES, B.AMT FORMAT=4.1
FROM VITALS A INNER JOIN DOSING B
ON (A.PATIENT = B.PATIENT) AND
(A.DATE = B.DATE)
ORDER BY PATIENT, DATE;
QUIT;
PROC SQL;
CREATE TABLE LEFT AS
SELECT A.PATIENT,
A.DATE FORMAT=DATE7. AS DATE, A.PULSE,
B.MED, B.DOSES, B.AMT FORMAT=4.1
FROM VITALS A LEFT JOIN DOSING B
ON (A.PATIENT = B.PATIENT) AND
(A.DATE = B.DATE)
ORDER BY PATIENT, DATE;
QUIT;
PROC SQL;
CREATE TABLE RIGHT AS
SELECT A.PATIENT,
A.DATE FORMAT=DATE7. AS DATE, A.PULSE,
B.MED, B.DOSES, B.AMT FORMAT=4.1
FROM VITALS A RIGHT JOIN DOSING B
ON (A.PATIENT = B.PATIENT) AND
(A.DATE = B.DATE)
ORDER BY PATIENT, DATE;
QUIT;
PROC SQL;
CREATE TABLE FULL AS
SELECT A.PATIENT,
A.DATE FORMAT=DATE7. AS DATE, A.PULSE,
B.MED, B.DOSES, B.AMT FORMAT=4.1
FROM VITALS A FULL JOIN DOSING B
ON (A.PATIENT = B.PATIENT) AND
(A.DATE = B.DATE)
ORDER BY PATIENT, DATE;
QUIT;
PROC SQL and SAS Macro language
PROC SQL NOPRINT;
SELECT DISTINCT PATIENT INTO :PAT1- :PAT999
FROM VITALS
ORDER BY PATIENT;
QUIT;
PROC SQL NOPRINT;
SELECT DISTINCT PATIENT INTO :PATLIST
SEPARATED BY ‘,’
FROM VITALS
ORDER BY PATIENT;
QUIT;
Editing Observations
PROC SQL NOPRINT;
INSERT INTO VITALS
VALUES(102 ‘20AUG2001’d 75 98.4 122
90);
INSERT INTO VITALS
SET PATIENT=102, DATE=’27AUG2001’d,
PULSE=77, TEMP=98.8, BPS=129,
BPD=88;
QUIT;
PROC SQL NOPRINT;
DELETE FROM VITALS
WHERE PATIENT = 101;
QUIT;
PROC SQL NOPRINT;
UPDATE VITALS
SET PULSE=77, BPD=74
WHERE PATIENT=103 AND
DATE=’31JUL2001’d;
QUIT;
Data Table Management
PROC SQL NOPRINT;
ALTER TABLE VITALS
MODIFY DATE FORMAT=MMDDYY8.
DROP TEMP;
QUIT;
PROC SQL NOPRINT;
DROP TABLE BP;
QUIT;
PROC SQL;
CREATE TABLE VARINFO AS
SELECT *
FROM DICTIONARY.COLUMNS
WHERE UPCASE(LIBNAME) = ‘WORK’ AND
UPCASE(MEMNAME) = ‘VITALS’;
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.TEMP001 AS
SELECT STR_CD
FROM LM_POS01.AAA02;
QUIT;
--------
PROC IMPORT OUT= LM_POS01.aaa02
DATAFILE= "C:\SASTEST_201412\a01_test.csv"
REPLACE;
GETNAMES=YES;
RUN;
*----------------
PROC SQL;
CREATE TABLE LM_POS01.STR_CD_TB03 AS
SELECT A.*, B.*
FROM LM_POS01.STR_CD_TB01 A INNER JOIN LM_POS01.FULL_STRCD B
ON (A.STR_CD=B.STR_CD)
WHERE A.STR_CD IN (302 322);
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.STR_CD_TB04 AS
SELECT A.*, B.*
FROM LM_POS01.STR_CD_TB01 A LEFT JOIN LM_POS01.FULL_STRCD B
ON (A.STR_CD=B.STR_CD)
WHERE A.STR_CD IN (302 322);
QUIT;
PROC IMPORT OUT= LM_POS01.FULL_STRCD
DATAFILE= "C:\SASTEST_201412\str_cd_mst.CSV"
REPLACE;
GETNAMES=YES;
RUN;
PROC IMPORT OUT= LM_POS01.FULL_STRCD01
DATAFILE= "C:\SASTEST_201412\str_cd_mst01.CSV"
REPLACE;
GETNAMES=YES;
RUN;
PROC SQL;
CREATE TABLE LM_POS01.STR_CD_TB031 AS
SELECT A.*, B.*
FROM LM_POS01.STR_CD_TB01 A INNER JOIN LM_POS01.FULL_STRCD01 B
ON (A.STR_CD=B.STR_CD)
WHERE A.STR_CD IN (302 322);
QUIT;
PROC SQL;
CREATE TABLE LM_POS01.STR_CD_TB041 AS
SELECT A.*, B.*
FROM LM_POS01.STR_CD_TB01 A LEFT JOIN LM_POS01.FULL_STRCD01 B
ON (A.STR_CD=B.STR_CD)
WHERE A.STR_CD IN (302 322);
QUIT;
*-------
PROC SQL; UPDATE LM_POS01.TMP001 SET SSAMT < 0 THEN 0 ELSE SSAMT END;
QUIT;
PROC SQL; UPDATE LM_POS01.TMP001 SET SSAMT IS NULL THEN 0 ELSE SSAMT
END;
QUIT;
* --------------
- data step intro
( http://www2.sas.com/proceedings/sugi30/134-30.pdf )
*------------
** #--------------------------
** DATA TYPE CONVERSION EXample...
** ------------------------------------
*convert a character variable to a numeric variable using Input function;
data char_num;
char1='1234.56';
num1=input(char1,7.);
char2='01/01/1960';
num2=input(char2,mmddyy10.);
char3='123';
*character variable can be automatically converted to numeric variable by using it in a numeric context;
num3=char3 * 1;
num33=char3 + 0;
run;
proc print;
run;
*convert a numeric variable to a character variable using Put function;
data num_char;
id=123456;
id_char=put(id,6.);
run;
proc print;
run;
'R 데이터 분석' 카테고리의 다른 글
[R 데이터분석] SQLDF 에서 특정 문자열 포함 문자열 선택 Like 기능 사용법 (0) | 2015.02.24 |
---|---|
[R 데이터분석] (0) | 2014.12.21 |
[RF] 랜덤포리스트 for an Extremely Small Sample Set (0) | 2014.10.23 |
bikini forecasting case [EDA sample] (0) | 2014.09.03 |
예측모델링 워크샵용 공유 (0) | 2014.08.26 |