星型基准测试

    开始生成数据:

    1. $ ./dbgen -s 1000 -T l
    2. $ ./dbgen -s 1000 -T p
    3. $ ./dbgen -s 1000 -T s
    4. $ ./dbgen -s 1000 -T d

    在ClickHouse中创建表结构:

    1. CREATE TABLE customer
    2. (
    3. C_CUSTKEY UInt32,
    4. C_NAME String,
    5. C_ADDRESS String,
    6. C_CITY LowCardinality(String),
    7. C_NATION LowCardinality(String),
    8. C_REGION LowCardinality(String),
    9. C_PHONE String,
    10. C_MKTSEGMENT LowCardinality(String)
    11. )
    12. ENGINE = MergeTree ORDER BY (C_CUSTKEY);
    13. CREATE TABLE lineorder
    14. (
    15. LO_ORDERKEY UInt32,
    16. LO_LINENUMBER UInt8,
    17. LO_CUSTKEY UInt32,
    18. LO_PARTKEY UInt32,
    19. LO_SUPPKEY UInt32,
    20. LO_ORDERDATE Date,
    21. LO_ORDERPRIORITY LowCardinality(String),
    22. LO_SHIPPRIORITY UInt8,
    23. LO_QUANTITY UInt8,
    24. LO_EXTENDEDPRICE UInt32,
    25. LO_REVENUE UInt32,
    26. LO_SUPPLYCOST UInt32,
    27. LO_TAX UInt8,
    28. LO_COMMITDATE Date,
    29. LO_SHIPMODE LowCardinality(String)
    30. )
    31. ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
    32. CREATE TABLE part
    33. (
    34. P_PARTKEY UInt32,
    35. P_NAME String,
    36. P_MFGR LowCardinality(String),
    37. P_CATEGORY LowCardinality(String),
    38. P_BRAND LowCardinality(String),
    39. P_COLOR LowCardinality(String),
    40. P_TYPE LowCardinality(String),
    41. P_SIZE UInt8,
    42. P_CONTAINER LowCardinality(String)
    43. )
    44. ENGINE = MergeTree ORDER BY P_PARTKEY;
    45. CREATE TABLE supplier
    46. (
    47. S_SUPPKEY UInt32,
    48. S_NAME String,
    49. S_ADDRESS String,
    50. S_CITY LowCardinality(String),
    51. S_NATION LowCardinality(String),
    52. )
    53. ENGINE = MergeTree ORDER BY S_SUPPKEY;

    写入数据:

    1. $ clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl
    2. $ clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl
    3. $ clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
    4. $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl

    将«星型模型»转换为非规范化的«平面模型»:

    1. SET max_memory_usage = 20000000000, allow_experimental_multiple_joins_emulation = 1;
    2. CREATE TABLE lineorder_flat
    3. ENGINE = MergeTree
    4. PARTITION BY toYear(LO_ORDERDATE)
    5. ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
    6. SELECT l.*, c.*, s.*, p.*
    7. FROM lineorder l
    8. ANY INNER JOIN customer c ON (c.C_CUSTKEY = l.LO_CUSTKEY)
    9. ANY INNER JOIN supplier s ON (s.S_SUPPKEY = l.LO_SUPPKEY)
    10. ANY INNER JOIN part p ON (p.P_PARTKEY = l.LO_PARTKEY);
    11. ALTER TABLE lineorder_flat DROP COLUMN C_CUSTKEY, DROP COLUMN S_SUPPKEY, DROP COLUMN P_PARTKEY;

    运行查询:

    1. SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;

    Q1.2

    Q1.3

    1. SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toISOWeek(LO_ORDERDATE) = 6 AND toYear(LO_ORDERDATE) = 1994 AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;

    Q2.1

    1. SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;

    Q2.2

    1. SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;

    Q2.3

    1. SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
    1. SELECT C_NATION, S_NATION, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION, S_NATION, year ORDER BY year asc, revenue desc;

    Q3.2

    Q3.3

    1. SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;

    Q3.4

    1. SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND toYYYYMM(LO_ORDERDATE) = '199712' GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;

    Q4.1

    1. SELECT toYear(LO_ORDERDATE) AS year, C_NATION, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, C_NATION ORDER BY year, C_NATION;

    Q4.2

    1. SELECT toYear(LO_ORDERDATE) AS year, S_NATION, P_CATEGORY, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, S_NATION, P_CATEGORY ORDER BY year, S_NATION, P_CATEGORY;

      原始文章