Clickhouse¶
Install¶
mkdir -p /docker/clickhouse/config
mkdir -p /docker/clickhouse/data
touch /docker/clickhouse/config/config.xml
touch /docker/clickhouse/config/users.xml
docker run -d --name clickhouse-server \
--ulimit nofile=262144:262144 \
-p 8123:8123 \
-p 9000:9000 \
-v /docker/clickhouse/data:/var/lib/clickhouse \
--restart unless-stopped \
clickhouse/clickhouse-server
docker exec -it clickhouse-server clickhouse-client --query "SELECT name, value FROM system.settings WHERE name IN ('max_memory_usage', 'max_concurrent_queries')"
docker exec -it clickhouse-server clickhouse-client
> create database qbdata_db;
> CREATE USER qbdata_db IDENTIFIED WITH plaintext_password BY '***';
> GRANT ALL ON qbdata_db.* TO qbdata_db;
# verify
SHOW DATABASES;
SHOW USERS;
# example
-- Create a database
CREATE DATABASE my_database;
-- Create a user
CREATE USER my_user IDENTIFIED WITH plaintext_password BY 'my_password';
-- Grant privileges
GRANT ALL ON my_database.* TO my_user;
> EXIT;
Commands¶
Functions¶
- has
-- or/hasany SELECT count(1) from middle_source_plus msp where hasAny(msp.array_brand, ['宝马', '比亚迪']); -- and /hasall SELECT count(1) from middle_source_plus msp where hasall(msp.array_brand, ['宝马', '比亚迪']); -- equeal/ has SELECT count(1) from middle_source_plus msp where has(msp.array_brand, '比亚迪');
Performance¶
-
Limit 小批次更新
-
投影
ALTER TABLE my_table ADD PROJECTION my_projection ( SELECT column1, column2, sum(column3) GROUP BY column1, column2 );
-
查看merge操作
SELECT * FROM system.merges;
-
限制内存
SELECT name, value FROM system.settings WHERE name LIKE '%max_memory%'; SET max_memory_usage = 10000000000; -- 例如设置更高的限制,10 GB
-
内存溢出
-- 查看参数 SELECT name, value FROM system.settings WHERE name LIKE '%max_bytes_before_external%'; -- 在内存不足时将 溢出内存数据写入磁盘; SET max_bytes_before_external_sort = 1000000000; -- 1 GB SET max_bytes_before_external_group_by = 1000000000; -- 1 GB
投影¶
-- 在现有表上添加投影
ALTER TABLE table_name
ADD PROJECTION projection_name
(
SELECT col1, col2, sum(col3) as sum_col3
GROUP BY col1, col2
);
-- 在创建表时定义投影
CREATE TABLE table_name
(
col1 String,
col2 String,
col3 Int32
)
ENGINE = MergeTree()
ORDER BY (col1)
SETTINGS index_granularity = 8192
PROJECTIONS
(
proj1
(
SELECT col1, col2, sum(col3)
GROUP BY col1, col2
)
);
物化视图(MATERIALIZED VIEW)¶
- create data table
CREATE TABLE qbdata_db.test_sales
(
`sale_year` String COMMENT '年份',
`sale_month` String COMMENT '月份',
`sale_group_id` String COMMENT '集团ID',
`sale_country` String COMMENT '销售国家',
`sale_quantity` UInt32 COMMENT '销量'
)
ENGINE = MergeTree()
ORDER BY sale_group_id
SETTINGS index_granularity = 8192;
-
Create Materialized View(Below...)
-
Insert And Select
INSERT INTO test_sales (sale_year, sale_month, sale_group_id, sale_country, sale_quantity)
VALUES (2024, 8, 1, 'China', 100);
INSERT INTO test_sales (sale_year, sale_month, sale_group_id, sale_country, sale_quantity)
VALUES (2024, 8, 1, 'China', 50);
INSERT INTO test_sales (sale_year, sale_month, sale_group_id, sale_country, sale_quantity)
VALUES (2024, 8, 2, 'China', 50);
INSERT INTO test_sales (sale_year, sale_month, sale_group_id, sale_country, sale_quantity)
VALUES (2024, 8, 2, 'China', 20);
MATERIALIZED VIEW(SummingMergeTree)¶
- Create View(SummingMergeTree)
会对order by的相同主键行的
group_quantity
自动求和。
CREATE MATERIALIZED VIEW test_view_sales_ymgc
ENGINE = SummingMergeTree()
ORDER BY (sale_year, sale_month, sale_group_id, sale_country)
POPULATE AS
SELECT
toInt32(sale_year) AS sale_year,
toInt32(sale_month) AS sale_month,
sale_group_id,
sale_country,
sale_quantity AS group_quantity
FROM test_sales;
- Select
SELECT * FROM test_view_sales_ymgc;

-- with group by
SELECT
sale_year,
sale_month,
sale_group_id,
sale_country,
SUM(group_quantity) AS total_quantity
FROM test_view_sales_ymgc
GROUP BY sale_year, sale_month, sale_group_id, sale_country;

- 强制合并后查询
OPTIMIZE TABLE test_view_sales_ymgc FINAL; SELECT * FROM test_view_sales_ymgc;
Hint¶
-
SummingMergeTree
引擎的工作原理:- 该引擎会在后台合并过程中,对具有相同主键的行的数值列进行求和。
- 数据的最终一致性是通过后台合并实现的,可能不会立即反映在查询结果中。
-
避免在物化视图的
SELECT
中使用聚合函数和GROUP BY
: -
让引擎自动处理聚合,否则可能导致数据重复累加或结果不正确。
-
查询时的聚合:
-
如果需要立即查看聚合后的结果,可以在查询时使用
GROUP BY
和SUM
。 - 这样可以避免等待后台合并完成。
MATERIALIZED VIEW(AggregatingMergeTree)¶
- Create View
CREATE MATERIALIZED VIEW test_view_sales_agg
ENGINE = AggregatingMergeTree()
ORDER BY (sale_year, sale_month, sale_group_id, sale_country)
AS
SELECT
sale_year,
sale_month,
sale_group_id,
sale_country,
sumState(sale_quantity) AS sum_quantity_state,
maxState(sale_quantity) AS max_quantity_state,
minState(sale_quantity) AS min_quantity_state
FROM test_sales
GROUP BY sale_year, sale_month, sale_group_id, sale_country;
- Insert History Data(Optional)
INSERT INTO test_view_sales_agg
SELECT
sale_year,
sale_month,
sale_group_id,
sale_country,
sumState(sale_quantity) AS sum_quantity_state,
maxState(sale_quantity) AS max_quantity_state,
minState(sale_quantity) AS min_quantity_state
FROM test_sales
GROUP BY sale_year, sale_month, sale_group_id, sale_country;
- Select
由于
AggregatingMergeTree
存储的是聚合函数的状态,需要在查询时应用...Merge()
函数来获取最终结果。
SELECT
sale_year,
sale_month,
sale_group_id,
sale_country,
sumMerge(sum_quantity_state) AS total_quantity,
maxMerge(max_quantity_state) AS max_quantity,
minMerge(min_quantity_state) AS min_quantity
FROM test_view_sales_agg
GROUP BY sale_year, sale_month, sale_group_id, sale_country;
sumMerge()
、maxMerge()
、minMerge()
:这些函数用于从存储的聚合状态中计算最终结果。GROUP BY
:在查询中需要再次使用GROUP BY
,以确保正确合并聚合状态。
Hint¶
-
聚合函数的状态存储:
- 在使用
AggregatingMergeTree
时,聚合函数的状态(如sumState
)被存储为专门的数据类型AggregateFunction
。 - 在查询时,需要使用对应的
...Merge
函数来计算最终结果。 -
数据类型:
-
确保数值列的数据类型正确,例如
Int32
、Float64
等。 - 聚合函数状态列的数据类型会自动推导为
AggregateFunction
类型。 -
物化视图的更新:
-
物化视图会在源表有新数据插入时自动更新,不需要手动刷新。
- 由于物化视图是异步更新的,可能会有微小的延迟。
-
查询优化:
-
在查询时使用
GROUP BY
和对应的...Merge
函数,可以确保获取准确的聚合结果。
- 在使用