Skip to main content

clickhouse

Clickhouse集群部署

单机部署

安装clickhouse
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754

echo "deb [arch=amd64] https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update

#安装指定版本
apt-get install -y clickhouse-common-static="23.4.2.11" clickhouse-server="23.4.2.11" clickhouse-client="23.4.2.11"


挂载卷:
mount -t nfs -o nolock,proto=tcp,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport 172.19.35.5:/ossdata /mnt

mkdir -p /mnt/clickhouse/
chown clickhouse:clickhouse -R /mnt/clickhouse


修改clickhouse的配置文件

把默认目录/var/lib/clickhouse修改为 /mnt/clickhouse
vim中操作: /etc/clickhouse-server/config.xml
:%s@/var/lib/clickhouse@/mnt/clickhouse@g


重启clickhouse
systemctl restart clickhouse-server


按照官方导入测试数据
https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema

总结文档:

clickhouse搭建集群问题总结_clickhouse集群最少几个节点-CSDN博客

DBA日常操作

常用查询命令

use bjgsbp20200828

SELECT * FROM GPSContDataH3 WHERE timestamp_column >= toDateTime('2023-01-01 00:00:00') AND timestamp_column < toDateTime('2023-01-02 00:00:00');


SELECT * FROM GPSContDataH3 WHERE timestamp_column >= toDateTime('2022-12-30 00:00:00') AND timestamp_column < toDateTime('2022-12-31 00:00:00');

SELECT * FROM GPSContDataH3 WHERE unix_time= toDateTime('2023-01-01 00:00:00', 'Asia/Istanbul')


select * from GPSContDataH3 order by unix_time desc limit 1 \G


tmux new -s datazip
tmux attach -t

121.36.244.174
123.60.41.178

tar zcvf 202307-202311.tar.gz ./202307new ./202308 ./202309 ./202310 ./202311


nohup bash select.sh 1 > select.log 2>&1 &
nohup bash select.sh 2 > select2.log 2>&1 &

查看每个库的大小-按照实际字节展示

SELECT
database,
formatReadableSize(sum(data_compressed_bytes)) AS total_compressed_data_size
FROM system.parts
GROUP BY database
ORDER BY total_compressed_data_size DESC;

查看每个库的大小-按照TiB单位展示

SELECT
database,
round(sum(data_compressed_bytes) / pow(1024, 3), 2) AS total_compressed_data_size_gib
FROM system.parts
GROUP BY database
ORDER BY total_compressed_data_size_gib DESC;
select count(1) from bshdq20220123.StressData_local WHERE unix_time<toDateTime('2023-01-01 00:00:00', 'Asia/Istanbul')

clickhouse添加密码

<clickhouse>
<users>
<default>
<password remove='1' />
<password_sha256_hex>e588fd15143cece8cde30db368df0fb75c2d6bd1c513c2896406f3193cf6ea75</password_sha256_hex>
</default>
</users>
</clickhouse>

查看单个库的总大小

SELECT
database,
round(sum(data_compressed_bytes) / pow(1024, 3), 2) AS total_compressed_data_size_gib
FROM system.parts
WHERE database = 'hthtdqjgjkjc20211203'
GROUP BY database
ORDER BY total_compressed_data_size_gib DESC;


SELECT
sum(rows) AS `总行数`,
formatReadableSize(sum(data_uncompressed_bytes)) AS `原始大小`,
formatReadableSize(sum(data_compressed_bytes)) AS `压缩大小`,
round(
(
sum(data_compressed_bytes) / sum(data_uncompressed_bytes)
) * 100,
0
) AS `压缩率`
FROM
system.parts
WHERE
database != 'system' and partition<='202312' and active=1

查看多个库的大小

SELECT
database,
round(sum(data_compressed_bytes) / pow(1024, 3), 2) AS total_compressed_data_size_gib
FROM system.parts
WHERE database IN ('hthtdqjgjkjc20211203', 'qhdxtmg20191124','chktdq20230531','xmgswhhtdqsljc20220526','ztjlskz20230420','tddl1')
GROUP BY database
ORDER BY total_compressed_data_size_gib DESC;

查看当前数据库执行的语句

##delete
select * from system.query_log where type='Delete' order by event_time desc limit 10 \G;

##alert -> delete
select * from system.query_log where query_kind='Alter' order by event_time desc limit 10 \G;

查询单个库表大小

SELECT
table AS `表名`,
sum(rows) AS `总行数`,
formatReadableSize(sum(data_uncompressed_bytes)) AS `原始大小`,
formatReadableSize(sum(data_compressed_bytes)) AS `压缩大小`,
round(
(
sum(data_compressed_bytes) / sum(data_uncompressed_bytes)
) * 100,
0
) AS `压缩率`
FROM
system.parts
WHERE
database = 'wzskdq20211109' -- table IN ('temp_1')
GROUP BY
table;

查询多个库的大小

		
SELECT
database,
formatReadableSize (sum(bytes)) AS bytes_size,
formatReadableSize (sum(primary_key_bytes_in_memory)) AS primary_keys_size,
formatReadableSize (sum(data_uncompressed_bytes)) AS `原始大小`,
formatReadableSize (sum(data_compressed_bytes)) AS `压缩大小`,
round(
(
sum(data_compressed_bytes) / sum(data_uncompressed_bytes)
) * 100,
0
) AS `压缩率`
FROM
system.parts
WHERE
1 = 1 AND database in ('ymgdq20230712')
GROUP BY
database
ORDER BY
bytes_size DESC
LIMIT
10;

查看一个库中所有的表大小

SELECT 
sum(rows) AS `总行数`,
formatReadableSize(sum(data_uncompressed_bytes)) AS `原始大小`,
formatReadableSize(sum(data_compressed_bytes)) AS `压缩大小`,
round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS `压缩率`,
`table` AS `表名`
FROM system.parts where database = 'bzhhdq20220906' group by `table`
#过滤按照年份 - partition
clickhouse-client --query="SELECT partition,path,database FROM system.parts where database = 'tzgjq20211109' and \`table\`= 'DistanceContData_local' and active and partition < '202301' ;"

查询每个分区以及子路劲占用空间

SELECT
partition,
name,
bytes_on_disk / (1024 * 1024 * 1024) AS bytes_on_disk_gb,
path
FROM system.parts
WHERE database = 'yxhdq20211213' AND table = 'StressData_local';

查看指定库每个月每个月所占磁盘的空间

(累计多个分片之和)

SELECT
database AS `项目名称`,
partition,
formatReadableSize(bytes_on_disk) AS `磁盘占用`,
formatReadableSize(data_uncompressed_bytes) AS `数据未压缩/字节`,
formatReadableSize(data_compressed_bytes) AS `数据压缩/字节`,
compress_rate AS `压缩率`,
rows AS `数据量/条`
FROM
(
SELECT
database,
partition,
sum(rows) AS rows,
sum(bytes_on_disk) AS bytes_on_disk,
sum(data_uncompressed_bytes) AS data_uncompressed_bytes,
sum(data_compressed_bytes) AS data_compressed_bytes,
round((data_compressed_bytes / data_uncompressed_bytes) * 100, 2) AS compress_rate
FROM data_platform.system_parts
WHERE active AND (database = 'cqhhdgldq20230103')
GROUP BY
database,
partition
)
ORDER BY partition ASC

image-20240328180443929

查询每个分区(每个月) 表所占磁盘空间的总和 单位GB

SELECT 
partition,
sum(bytes_on_disk) / (1024 * 1024 * 1024) AS bytes_on_disk_gb
FROM system.parts
WHERE database = 'bzhhdq20220906' AND table = 'TempHumiContData_local' and active and partition <= '202305' and partition >= '202305' GROUP BY partition;


查询每所有库每个月所占的数据量

SELECT partition                                   AS "月度",
formatReadableSize(data_uncompressed_bytes) AS "数据未压缩/字节",
formatReadableSize(data_compressed_bytes) AS "数据压缩/字节",
concat(toString(compress_rate), '%') AS "压缩比率",
formatReadableQuantity(rows) AS "数据量/条"
FROM (
SELECT partition,
sum(rows) AS rows,
sum(bytes_on_disk) AS bytes_on_disk,
sum(data_uncompressed_bytes) AS data_uncompressed_bytes,
sum(data_compressed_bytes) AS data_compressed_bytes,
round((data_compressed_bytes / data_uncompressed_bytes) * 100,
2) AS compress_rate
FROM data_platform.system_parts
WHERE active AND partition >= '202401' AND partition <= '202403' group by partition
);

image-20240328180303181

查看clickhous实例所占磁盘总空间

SELECT formatReadableSize(sum(bytes_on_disk)) AS total_data_size
FROM system.parts

查看分区所在的磁盘

查看表分区所在磁盘
SELECT partition,path,table,database FROM system.parts where database = 'jhyhdq20230427' and `table`= 'DistanceContData_local' and active ;

移动分区到指定磁盘

查看磁盘名称
select policy_name,volume_name,volume_priority,disks,volume_type,max_data_part_size from system.storage_policies;
alter table bzhhdq20220906.StressData_local move partition '202401' to DISK 'cloud_hdd_data4';

文件导出导入

csv文件导出
clickhouse-client -q "select * from bzhhdq20220906.TempHumiContData_distributed where unix_time > '2023-04-01 00:00:00' and unix_time < '2023-05-01 00:00:00'" --format CSVWithNames > /root/bzhhdq20220906.TempHumiContData_distributed.csv


clickhouse client -h 10.1.134.81 -u default --query="insert into bzhhdq20220906.TempHumiContData_distributed FORMAT CSV"< /opt/tableA.csv
SELECT  partition,path,table,database  FROM system.parts where database = 'jnhhdegldq20200422' and `table`= 'TopInfoNodeData_local' and active ;

root@clickhouse04:~# clickhouse-client --query="SELECT partition FROM system.parts where database = 'jnhhdegldq20200422' and \`table\`= 'StressData_materialized_local' and active and partition < '202305' and partition > '202301' ;"
202302
202302
202303
202303
202303
202304
202304

image-20231117202442355

alter table system.query_log drop partition '202311';

#获取上面执行完的query_id
select * from system.query_log WHERE query_id='be797ab2-7363-4161-9209-c592ca359ea7' \G; #获取查询支持哪些字段

SELECT partition,path,table,database FROM system.parts where database = 'system' and `table`= 'query_log' and active ;

image-20231117202048652

SELECT DISTINCT partition from (SELECT partition FROM system.parts where database = 'jnhhdegldq20200422' and table= 'StressData_materialized_local' and active and partition > '202301') ORDER BY partition;

clickhouse异步处理记录

select * from system.mutations limit 1

查看磁盘

SELECT
name,
path,
formatReadableSize (free_space) AS free,
formatReadableSize (total_space) AS total,
formatReadableSize (keep_free_space) AS reserved
FROM
system.disks
ekc04 :) select * from system.disks;

SELECT *
FROM system.disks

Query id: be502040-6263-4479-b6d4-64ca055ccf89

┌─name────────────┬─path──────────────────────────────┬───free_space─┬───total_space─┬─unreserved_space─┬─keep_free_space─┬─type──┬─is_encrypted─┬─is_read_only─┬─is_write_once─┬─is_remote─┬─is_broken─┬─cache_path─┐
│ cloud_hdd_data2 │ /data2/container/clickhouse/ │ 469926281216 │ 1056758804480 │ 469926281216 │ 0 │ local │ 0 │ 0 │ 0 │ 0 │ 0 │ │
│ cloud_hdd_data3 │ /data3/container/clickhouse/ │ 1548288 │ 2113652535296 │ 1548288 │ 0 │ local │ 0 │ 0 │ 0 │ 0 │ 0 │ │
│ cloud_hdd_data4 │ /data4/container/clickhouse/ │ 519495372800 │ 3170543869952 │ 519495372800 │ 0 │ local │ 0 │ 0 │ 0 │ 0 │ 0 │ │
│ default │ /data1/container/clickhouse/data/ │ 32945684480 │ 211243667456 │ 32945684480 │ 0 │ local │ 0 │ 0 │ 0 │ 0 │ 0 │ │
└─────────────────┴───────────────────────────────────┴──────────────┴───────────────┴──────────────────┴─────────────────┴───────┴──────────────┴──────────────┴───────────────┴───────────┴───────────┴────────────┘

4 rows in set. Elapsed: 0.002 sec.

查看存储策略

ekc04 :) select * from system.storage_policies;

SELECT *
FROM system.storage_policies

Query id: 517044a2-d7f0-4e4d-bc1c-52ce3e80f161

┌─policy_name─┬─volume_name─┬─volume_priority─┬─disks───────────────┬─volume_type─┬─max_data_part_size─┬─move_factor─┬─prefer_not_to_merge─┬─perform_ttl_move_on_insert─┬─load_balancing─┐
│ default │ default │ 1 │ ['default'] │ JBOD │ 0 │ 0.1 │ 0 │ 1 │ ROUND_ROBIN │
│ default │ data2 │ 2 │ ['cloud_hdd_data2'] │ JBOD │ 0 │ 0.1 │ 0 │ 1 │ ROUND_ROBIN │
│ default │ data4 │ 3 │ ['cloud_hdd_data4'] │ JBOD │ 0 │ 0.1 │ 0 │ 1 │ ROUND_ROBIN │
│ default │ data3 │ 4 │ ['cloud_hdd_data3'] │ JBOD │ 0 │ 0.1 │ 0 │ 1 │ ROUND_ROBIN │
└─────────────┴─────────────┴─────────────────┴─────────────────────┴─────────────┴────────────────────┴─────────────┴─────────────────────┴────────────────────────────┴────────────────┘

clickhouse的多路径存储策略 - wshenJin - 博客园 (cnblogs.com)

创建数据库


CREATE TABLE iteshell.test_local on cluster cluster_test (
CounterID UInt32,
StartDate Date,
Sign Int8,
IsNew UInt8,
VisitID UInt64,
UserID UInt64,
StartTime DateTime
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/iteshell.test_local', '{replica}')
PARTITION BY toYYYYMM(StartDate)
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID)
SETTINGS index_granularity = 8192




create table iteshell.test_all on cluster cluster_test as iteshell.test_local
ENGINE = Distributed(cluster_test,iteshell,test_local,rand())


#插入数据
insert into iteshell.test_local values(1, '2023-12-20 00:11:22', 1, 1, 1, 11, toDateTime('2021-07-08 00:11:22'));
insert into iteshell.test_all values(1, '2023-12-02 00:11:22', 1, 1, 1, 11, toDateTime('2021-07-08 00:11:22'));
insert into iteshell.test_all values(1, '2023-12-03 00:11:22', 1, 1, 1, 11, toDateTime('2021-07-08 00:11:22'));
insert into iteshell.test_all values(1, '2023-12-05 00:11:22', 1, 1, 1, 11, toDateTime('2021-07-08 00:11:22'));
insert into iteshell.test_all values(1, '2023-12-06 00:11:22', 1, 1, 1, 11, toDateTime('2021-07-08 00:11:22'));
insert into iteshell.test_all values(1, '2023-12-08 00:11:22', 1, 1, 1, 11, toDateTime('2021-07-08 00:11:22'));

查看receive_timeout

默认时间为300s,可以调整,调整方式为:

1800 900
SELECT *
FROM system.settings
WHERE name = 'receive_timeout'

clickhouse冷热分离

技术分享 | ClickHouse 冷热存储分离方案线上实践-腾讯云开发者社区-腾讯云 (tencent.com)

clickhouse的多路径存储策略 - wshenJin - 博客园 (cnblogs.com)

ClickHouse多卷存储策略_clickhouse storage_policy-CSDN博客

apk修改源

sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories

apk update #更新
apk add busybox-extras #安装telnet
apk add 安装软件

apk del 删除软件

apk upgrade 升级软件

apk info 列出已安装的软件信息

apk search 通过名字或描述搜索有没有改软件

apk fetch 从仓库下载软件到本地目录,下载下来的是.apk包

consul snapshot save --http-addr=http://10.12.142.216:8500 -token=b3a9bca3-6e8e-9678-ea35-ccb8fb272d42 consul_state_$ts.snap

clickhouse备份和还原

1、Remote

通过remote函数实现

clickhouse client  -h 172.x.x.x  --port  9000 -u default  --query="insert into ycssd1.ZHCARadarData_distributed  select *  from remote('172.x.x.x',ycssd1.ZHCARadarData_distributed,'default','')  where  unix_time > '2024-01-01 00:00:00' and unix_time < '2024-01-10 00:00:00';"

2、clickhouse copier -手动方式

/data4/apache-zookeeper-3.6.3-bin/bin/zkCli.sh  -server 127.0.0.1:3181 create  /data/
/data4/apache-zookeeper-3.6.3-bin/bin/zkCli.sh -server 127.0.0.1:3181 create /data/clickhouse/
/data4/apache-zookeeper-3.6.3-bin/bin/zkCli.sh -server 127.0.0.1:3181 create /data/clickhouse/copytasks/
/data4/apache-zookeeper-3.6.3-bin/bin/zkCli.sh -server 127.0.0.1:3181 create /data/clickhouse/copytasks/ztjlskz20230420
/data4/apache-zookeeper-3.6.3-bin/bin/zkCli.sh -server 127.0.0.1:3181 create /data/clickhouse/copytasks/ztjlskz20230420/description "`cat /root/clickhouse-copier/task/task-ztjlskz20230420-VibrationData_local.yaml`"
clickhouse-copier --config zookeeper.xml --task-path /data/clickhouse/copytasks/ztjlskz20230420 --base-dir /root/clickhouse-copier/clickhouse-copier/ztjlskz20230420_VibrationData_local/

#想删除node节点信息,使用deleteall

zookeeper配置文件

<yandex>
<logger>
<level>trace</level>
<size>100M</size>
<count>3</count>
</logger>

<zookeeper>

<node index="1">
<host>172.19.49.39</host>
<port>3181</port>
</node>
<node index="2">
<host>172.19.49.40</host>
<port>3181</port>
</node>
<node index="3">
<host>172.19.49.41</host>
<port>3181</port>
</node>

</zookeeper>
</yandex>

task.yaml配置文件

root@ekc01:~/clickhouse-copier# cat task/task-tongyong.yaml.bak 
<yandex>
<remote_servers>
<src_cluster>
<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<replica>
<host>ekc01</host>
<port>9000</port>
<user>default</user>
</replica>
<replica>
<host>ekc02</host>
<port>9000</port>
<user>default</user>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<replica>
<host>ekc03</host>
<port>9000</port>
<user>default</user>
</replica>
<replica>
<host>ekc04</host>
<port>9000</port>
<user>default</user>
</replica>
</shard>
</src_cluster>
<dst_cluster>

<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<replica>
<host>172.19.49.62</host>
<port>9000</port>
<user>default</user>
</replica>
<replica>
<host>172.19.49.63</host>
<port>9000</port>
<user>default</user>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<replica>
<host>172.19.49.64</host>
<port>9000</port>
<user>default</user>
</replica>
<replica>
<host>172.19.49.65</host>
<port>9000</port>
<user>default</user>
</replica>
</shard>


</dst_cluster>
</remote_servers>
<!-- 最大worker数 -->
<max_workers>4</max_workers>
<!-- fetch (pull) data 设置为只读 -->
<settings_pull>
<readonly>1</readonly>
</settings_pull>
<!-- insert (push) data 设置为读写 -->
<settings_push>
<readonly>0</readonly>
</settings_push>

<settings>
<connect_timeout>999999999</connect_timeout>
<distributed_foreground_insert>1</distributed_foreground_insert>
</settings>

<!-- 任务描述,可以配置多个,多个任务会顺序执行 -->
<tables>
<!-- 复制任务,名字自定义 -->
<!-- A table task, copies one table. -->
<table_hits>
<!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
<cluster_pull>src_cluster</cluster_pull>
<database_pull>src_database</database_pull>
<table_pull>src_local</table_pull>

<!-- Destination cluster name and tables in which the data should be inserted -->
<cluster_push>dst_cluster</cluster_push>
<database_push>dst_database</database_push>
<table_push>dst_local</table_push>

<!-- Engine of destination tables.
If destination tables have not be created, workers create them using columns definition from source tables and engine
definition from here.

NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
system.parts table.
-->
<engine>
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{layer}-{shard}/tables/"dst_database".dst_local', '{replica}', ver)
PARTITION BY toYYYYMM(unix_time)
ORDER BY (module_name, sensor_id, node_id, channel, unix_time, microsecond)
SETTINGS index_granularity = 8192
</engine>
<!-- Sharding key used to insert data to destination cluster -->
<!-- <sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key> -->

<!-- Optional expression that filter data while pull them from source servers -->
<!-- <where_condition>CounterID != 0</where_condition> -->

<!-- This section specifies partitions that should be copied, other partition will be ignored.
Partition names should have the same format as
partition column of system.parts table (i.e. a quoted text).
Since partition key of source and destination cluster could be different,
these partition names specify destination partitions.

NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
it is strictly recommended to specify them explicitly.
If you already have some ready partitions on destination cluster they
will be removed at the start of the copying since they will be interpeted
as unfinished data from the previous copying!!!
-->

<sharding_key>cityHash64(node_id)</sharding_key>
<!--
<enabled_partitions>
<partition>'888888'</partition>
</enabled_partitions>
-->
</table_hits>

<!-- Next table to copy. It is not copied until previous table is copying. -->
<!--
<table_visits>
</table_visits>
-->

</tables>
</yandex>

注意的点:

cityHash64(node_id) #根据实际情况进行调整

        <cluster_pull>src_cluster</cluster_pull>
<database_pull>src_database</database_pull>
<table_pull>src_local</table_pull>

<!-- Destination cluster name and tables in which the data should be inserted -->
<cluster_push>dst_cluster</cluster_push>
<database_push>dst_database</database_push>
<table_push>dst_local</table_push>
        <enabled_partitions>
<partition>'888888'</partition>
</enabled_partitions>
        <engine>
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{layer}-{shard}/tables/"dst_database".dst_local', '{replica}', ver)
PARTITION BY toYYYYMM(unix_time)
ORDER BY (module_name, sensor_id, node_id, channel, unix_time, microsecond)
SETTINGS index_granularity = 8192
</engine>

3、clickhouse copier 脚本

指定数据库和表

拉取代码

https://github.com/shouji1128955/bookcode.git

#创建一个任务出来
bash copy.sh dbname

cd dbname
bash jilu.sh
nohup bash run.sh &

注意: worker工作数非常重要,影响传输的时间

传输完成之后,会发钉钉,需要配置

1、查看日志nohup.out,查看copier的日志,核对errlog是否正常

2、数据库需要核对,是否count对得上

4、在zookeeper验证同步是否一致

首先进入到zk的cli中

查看当前同步的状态,下面可以看出,一共有7个,当前成功为6个

[zk: localhost:2181(CONNECTED) 81] get   /data/clickhouse/copytasks/chktdq20230531/VibrationData_local/status 
{"table_hits":{"all_partitions_count":7,"processed_partitions_count":6}}

查看是否同步完了

下面这种是同步完的

[zk: localhost:2181(CONNECTED) 77] get /data/clickhouse/copytasks/chktdq20230531/VibrationData_local/tables/dst_cluster.chktdq20230531.VibrationData_local/202310/attach_is_done 
1
ekc04#20240203123012_21621

#有返回值是1这种属于同步完成

下面这种是没有同步完的

[zk: localhost:2181(CONNECTED) 78] get /data/clickhouse/copytasks/chktdq20230531/VibrationData_local/tables/dst_cluster.chktdq20230531.VibrationData_local/202312/attach_is_done 
Node does not exist: /data/clickhouse/copytasks/chktdq20230531/VibrationData_local/tables/dst_cluster.chktdq20230531.VibrationData_local/202312/attach_is_done

集群维护

1、表日志保留设置

ALTER table `system`.metric_log MODIFY TTL event_date + toIntervalDay(30);  #对表设置日志保留时间

2、冷热分离方案

此处提供思路

实现对obs(华为云对象存储) 或者oss+云网关v3 挂载到服务器上

-- 创建表时指定存储策略和TTL设置
CREATE TABLE my_table (
`date` Date,
`data` String
) ENGINE = MergeTree()
PARTITION BY date
ORDER BY date
TTL date + INTERVAL 90 DAY TO VOLUME 'obs_volume'
SETTINGS storage_policy = 'my_storage_policy'; 类似这种
  • 在开源的clickhouse的配置定义存储策略,通常在clickhouse的配置文件中完成,需要指定本地磁盘和OBS磁盘路劲
  • 接下来需要创建 clickhous表时,使用SETTINGS storage_policy参数来指定你的存储策略,这样,写入的数据首先存储在本地磁盘上,然后根据TTL规则自动迁移到OBS
  • 接下里需要配置TTL设置,你可以指定数据在一定时间后自动从本地磁盘迁移到OBS,例如,可以设置90天后的数据自动迁移.(这部分按照你的业务逻辑来设定冷热数据的定义)

3、clickhouse数据目录大小统计

du -h --max-depth=3 | grep "moving"  |  awk '$1 ~ /G$/ && $1 > 10 {print}'