1. Data Warehouse
1.1. DW
Vendors
RedShift, BigQuery, Snowflake : cloud only
Azure Synapse Analytics(MS), Autonomous Data Warehouse(Oracle), Db2 Warehouse(IBM), Teradata Vantage, Vertica : on premise & cloud
1.2. Structure
Fact table: business measure
fact table의 각 row: measurement event
Demension: fact table에 필수적을 동반되며 business process measurement event와 연관된 내용 서술
event와 관련된 who, what, when, how, and why
각 dimension들은 하나의 pk로 정의됨
1) Star schema
business-level로 fact table이 생성됨 (sales data..) sales facts table: item, location, branch, time, units, dollars로 구성
2) Snowflake schema Star schema에서 정규화되서 더 확장된 버전
2. Data Mart
2.1. Types
DW에서 주제 지향적 subset
1) Dependent
2) Independent
3) Hybrid
3. Query
3.1. Grouping Sets
select country, category, sum(amount) as totalsales
from factsales
left join dimcontry
on factsales.countryid = dimcontry.countryid
left join dimcategory
on factsales.categoryid=dimcategory.categoryid
group by grouping sets(country, category)
order by country, category
> COUNTRY CATEGORY TOTALSALES
Argentina 21755581
Australia 21522004
Austria 21365726
Azerbaijan 21325766
...
Books 239357597
Electronics 239912568
Software 240289802
Sports 240709913
Toys 240736378
3.2. Rollups
select country, category, sum(amount) as totalsales
from factsales
left join dimcontry
on factsales.countryid = dimcontry.countryid
left join dimcategory
on factsales.categoryid=dimcategory.categoryid
group by rollup(country, category)
order by country, category
> COUNTRY CATEGORY TOTALSALES
Argentina Books 4285010
Argentina Electronics 4338757
Argentina Software 4292153
Argentina Sports 4450354
Argentina Toys 4389307
Argentina 21755581
...
Vietnam Books 4238299
Vietnam Electronics 4339044
Vietnam Software 4221673
Vietnam Sports 4175508
Vietnam Toys 4306048
Vietnam 21280572
1201006258
3.3. Cubes
select country, category, sum(amount) as totalsales
from factsales
left join dimcontry
on factsales.countryid = dimcontry.countryid
left join dimcategory
on factsales.categoryid=dimcategory.categoryid
group by cube(country, category)
order by country, category
> COUNTRY CATEGORY TOTALSALES
Argentina Books 4285010
Argentina Electronics 4338757
Argentina Software 4292153
Argentina Sports 4450354
Argentina Toys 4389307
Argentina 21755581
Vietnam Books 4238299
Vietnam Electronics 4339044
Vietnam Software 4221673
Vietnam Sports 4175508
Vietnam Toys 4306048
Vietnam 21280572
...
Books 239357597
Electronics 239912568
Software 240289802
Sports 240709913
Toys 240736378
1201006258
3.4. Materialized Views
CREATE TABLE total_sales_per_country (country, category, totalsales) AS
(select country, category, sum(amount) as totalsales
from factsales
left join dimcontry
on factsales.countryid = dimcontry.countryid
left join dimcategory
on factsales.categoryid=dimcategory.categoryid
group by country, category)
DATA INITIALLY DEFERRED
REFRESH DEFERRED
MAINTAINED BY SYSTEM;
refresh table total_sales_per_country;
select * from total_sales_per_country where totalsales > 4306048;