跳到主要内容

K8s Prome外挂存储部署

· 阅读需 10 分钟

K8s Prome外挂存储部署

storageClass动态挂载对接NFS存储

安装 NFS

服务端:192.168.3.101

$ yum -y install nfs-utils rpcbind

# 共享目录
$ mkdir -p /data/k8s && chmod 755 /data/k8s

$ echo '/data/k8s *(insecure,rw,sync,no_root_squash)'>>/etc/exports

$ systemctl enable rpcbind && systemctl start rpcbind
$ systemctl enable nfs && systemctl start nfs

客户端:k8s集群slave节点

$ yum -y install nfs-utils rpcbind
$ mkdir /nfsdata
$ mount -t nfs 192.168.3.101:/data/k8s /nfsdata

准备目录

mkdir /data/yaml/nfs/ -p

创建provisioner

value: storage.pri/nfs #名字虽然可以随便起,以后引用要一致 这个镜像中volume的mountPath默认为/persistentvolumes,不能修改,否则运行时会报错

[root@k8s-master nfs]# cat provisioner-nfs.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-provisioner
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-provisioner-runner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["watch", "create", "update", "patch"]
- apiGroups: [""]
resources: ["services", "endpoints"]
verbs: ["get","create","list", "watch","update"]
- apiGroups: ["extensions"]
resources: ["podsecuritypolicies"]
resourceNames: ["nfs-provisioner"]
verbs: ["use"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-provisioner
subjects:
- kind: ServiceAccount
name: nfs-provisioner
namespace: default
roleRef:
kind: ClusterRole
name: nfs-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
#vi nfs-deployment.yaml;创建nfs-client的授权
kind: Deployment
apiVersion: apps/v1
metadata:
name: nfs-client-provisioner
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: nfs-client-provisioner
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccount: nfs-provisioner
containers:
- name: nfs-client-provisioner
image: lizhenliang/nfs-client-provisioner
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: storage.pri/nfs
- name: NFS_SERVER
value: 192.168.3.101
- name: NFS_PATH
value: /data/k8s
volumes:
- name: nfs-client-root
nfs:
server: 192.168.3.101
path: /data/k8s

创建storageclass

cat > /data/yaml/nfs/storageclass-nfs.yaml <<eof
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: storage-nfs
provisioner: storage.pri/nfs
reclaimPolicy: Delete
eof

改变默认sc

kubectl apply -f /data/yaml/nfs/
kubectl patch storageclass storage-nfs -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
kubectl get sc
kubectl get po -o wide

验证动态供应

storageClassName: storage-nfs #这个class一定注意要和sc的名字一样

cat > /data/yaml/nfs/pvc-nfs.yaml <<eof
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pvc-nfs
spec:
storageClassName: storage-nfs
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
eof
kubectl apply -f /data/yaml/nfs/pvc-nfs.yaml
kubectl get pvc

部署 Prometheus

# 需要准备配置文件,因此使用configmap的形式保存
$ cat prometheus.yml
# my global config
global:
scrape_interval: 30s
evaluation_interval: 30s
# scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']

# kubectl -n monitor create configmap prometheus-config --from-file=prometheus.yml

# pvc
$ cat pvc.yaml
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: prometheus
namespace: monitor
spec:
accessModes:
- ReadWriteOnce
storageClassName: storage-nfs
resources:
requests:
storage: 200Gi

# prometheus的资源文件
# 出现Prometheus数据存储权限问题,因为Prometheus内部使用nobody启动进程,挂载数据目录后权限为root,因此使用initContainer进行目录权限修复:
$ cat prometheus-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitor
labels:
app: prometheus
spec:
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
initContainers:
- name: "change-permission-of-directory"
image: busybox
command: ["/bin/sh"]
args: ["-c", "chown -R 65534:65534 /prometheus"]
securityContext:
privileged: true
volumeMounts:
- mountPath: "/etc/prometheus"
name: config-volume
- mountPath: "/prometheus"
name: data
containers:
- image: prom/prometheus:v2.19.2
name: prometheus
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus" # 指定tsdb数据路径
- "--web.enable-lifecycle" # 支持热更新,直接执行localhost:9090/-/reload立即生效
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
- "--web.console.templates=/usr/share/prometheus/consoles"
ports:
- containerPort: 9090
name: http
volumeMounts:
- mountPath: "/etc/prometheus"
name: config-volume
- mountPath: "/prometheus"
name: data
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 100m
memory: 512Mi
volumes:
- name: data
persistentVolumeClaim:
claimName: prometheus
- configMap:
name: prometheus-config
name: config-volume

# rbac,prometheus会调用k8s api做服务发现进行抓取指标
$ cat prometheus-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: monitor
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitor

# 提供Service,为Ingress使用
$ cat prometheus-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitor
labels:
app: prometheus
spec:
selector:
app: prometheus
type: ClusterIP
ports:
- name: web
port: 9090
targetPort: http

$ cat prometheus-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: prometheus
namespace: monitor
spec:
rules:
- host: prometheus.yuan.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus
port:
number: 9090

storageClass动态挂载对接Ceph存储

安装 Ceph

安装个单节点demo

vi ceph.sh

#!/bin/bash
#ceph一键部署脚本
#for Centos7

#1.disable iptables&selinux

echo -n "正在配置iptables防火墙......"
systemctl stop firewalld > /dev/null 2>&1
systemctl disable firewalld > /dev/null 2>&1
if [ $? -eq 0 ];then
echo -n "Iptables防火墙初始化完毕!"
fi

echo -n "正在关闭SELinux......"
setenforce 0 > /dev/null 2>&1
sed -i '/^SELINUX=/s/=.*/=disabled/' /etc/selinux/config
if [ $? -eq 0 ];then
echo -n "SELinux初始化完毕!"
fi

#2.set hostname as ceph##

HOSTNAME=ceph
hostnamectl set-hostname ceph
IP=`ip route |grep src|grep metric|awk -F" " '{ print $9 }'`
echo "$IP $HOSTNAME" >>/etc/hosts

#3.install epel.repo#
yum install -y epel-release

cat <<EOF > /etc/yum.repos.d/ceph.repo
[Ceph]
name=Ceph packages for $basearch
baseurl=http://mirrors.163.com/ceph/rpm-jewel/el7/x86_64
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=http://mirrors.163.com/ceph/keys/release.asc
priority=1
[Ceph-noarch]
name=Ceph noarch packages
baseurl=http://mirrors.163.com/ceph/rpm-jewel/el7/noarch
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=http://mirrors.163.com/ceph/keys/release.asc
priority=1
[ceph-source]
name=Ceph source packages
baseurl=http://mirrors.163.com/ceph/rpm-jewel/el7/SRPMS
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=http://mirrors.163.com/ceph/keys/release.asc
priority=1
EOF


#sed -e "s/^metalink=/#metalink=/g" \
# -e "s/^mirrorlist=http/#mirrorlist=http/g" \
# -e "s@^#baseurl=@baseurl=@g" \
# -i /etc/yum.repos.d/*.repo

#echo 192.168.239.241 mirror.centos.org >> /etc/hosts
#echo 192.168.239.241 download.fedoraproject.org >> /etc/hosts


#4.update system & install ceph-deploy##

yum update -y &&yum clean all &&yum -y install ceph-deploy

#5.设置本机密匙#
#ssh-keygen
#ssh-copy-id ceph

ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#ssh -o stricthostkeychecking=no $HOSTNAME

#####################################################################
#1.ceph服务初始化###
yum clean all &&yum -y install ceph-deploy

mkdir /etc/ceph &&cd /etc/ceph
ceph-deploy new ceph

#ceph-deploy new $HOSTNAME

#2.修改配置文件 ###

cp ceph.conf ceph.conf.bak
#sed -i 's/cephx/none/g' /etc/ceph/ceph.conf
sed -i 's@^$@osd_pool_default_size = 1@g' ceph.conf
echo "mon_pg_warn_max_per_osd = 1000" >> /etc/ceph/ceph.conf


#3.安装ceph###

ceph-deploy install ceph

#4.创建monitor服务###
ceph-deploy mon create ceph
ceph-deploy gatherkeys ceph
#4.osd###

#准备osd ###
mkfs.xfs /dev/sdb
mkdir -p /var/local/osd
mount /dev/sdb /var/local/osd/
chown -R ceph:ceph /var/local/osd*
#创建osd ###
ceph-deploy osd prepare ceph:/var/local/osd
#激活osd ###
ceph-deploy osd activate ceph:/var/local/osd
#chown -R ceph:ceph /var/local/osd* 有些同学可能会忘记配置目录权限引起激活osd失败
#查看状态:###
ceph-deploy osd list ceph

#5.修改配置文件权限###

ceph-deploy admin ceph
chmod +r /etc/ceph/*

#6.部署mds服务###

ceph-deploy mds create ceph
ceph mds stat

#7.创建ceph文件系统###

ceph fs ls
ceph osd pool create cephfs_data 128
ceph osd pool create cephfs_metadata 128
ceph fs new cephfs cephfs_metadata cephfs_data
ceph fs ls

#8.挂载Ceph文件系统

mkdir /ceph
yum install -y ceph-fuse
IP=`ip route |grep src|grep metric|awk -F" " '{ print $9 }'`
ceph-fuse -m $IP:6789/ /ceph
df -Th

#9.查看ceph状态

#ceph monitor仲裁状态:ceph quorum_status --format json-pretty

ceph mon stat
ceph osd stat
#ceph osd tree(显示crush图)
ceph osd tree
ceph pg stat
#ceph auth list(集群的认证密码)

ssh -o stricthostkeychecking=no $HOSTNAME

创建Cephfs并挂载
# CephFS需要使用两个Pool来分别存储数据和元数据
ceph osd pool create cephfs_data 128
ceph osd pool create cephfs_meta 128
ceph osd lspools

# 创建一个CephFS
ceph fs new cephfs cephfs_meta cephfs_data

# 查看
ceph fs ls

# ceph auth get-key client.admin
client.admin
key: AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==

# slave 挂载
$ mount -t ceph 172.21.51.55:6789:/ /mnt/cephfs -o name=admin,secret=AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==

部署cephfs-provisioner

$ cat external-storage-cephfs-provisioner.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: cephfs-provisioner
namespace: kube-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cephfs-provisioner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "get", "delete"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cephfs-provisioner
subjects:
- kind: ServiceAccount
name: cephfs-provisioner
namespace: kube-system
roleRef:
kind: ClusterRole
name: cephfs-provisioner
apiGroup: rbac.authorization.k8s.io

---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: cephfs-provisioner
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "get", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: cephfs-provisioner
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: cephfs-provisioner
subjects:
- kind: ServiceAccount
name: cephfs-provisioner
namespace: kube-system

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cephfs-provisioner
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: cephfs-provisioner
strategy:
type: Recreate
template:
metadata:
labels:
app: cephfs-provisioner
spec:
containers:
- name: cephfs-provisioner
image: "quay.io/external_storage/cephfs-provisioner:latest"
env:
- name: PROVISIONER_NAME
value: ceph.com/cephfs
imagePullPolicy: IfNotPresent
command:
- "/usr/local/bin/cephfs-provisioner"
args:
- "-id=cephfs-provisioner-1"
- "-disable-ceph-namespace-isolation=true"
serviceAccount: cephfs-provisioner

在ceph monitor机器中查看admin账户的key

$ ceph auth list
$ ceph auth get-key client.admin
AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==

创建secret

$ echo -n AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==|base64
QVFCUFRzdGdjMDc4TkJBQTc4RDEvS0FCZ2xJWkhLaDcrRzJYOHc9PQ==
$ cat ceph-admin-secret.yaml
apiVersion: v1
data:
key: QVFCUFRzdGdjMDc4TkJBQTc4RDEvS0FCZ2xJWkhLaDcrRzJYOHc9PQ==
kind: Secret
metadata:
name: ceph-admin-secret
namespace: kube-system
type: Opaque

创建storageclass

$ cat cephfs-storage-class.yaml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: dynamic-cephfs
provisioner: ceph.com/cephfs
parameters:
monitors: 172.21.51.55:6789
adminId: admin
adminSecretName: ceph-admin-secret
adminSecretNamespace: "kube-system"
claimRoot: /volumes/kubernetes

动态pvc验证及实现分析

创建pvc,指定storageclass和存储大小,即可实现动态存储。

创建pvc测试自动生成pv

$ cat cephfs-pvc-test.yaml
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: cephfs-claim
spec:
accessModes:
- ReadWriteOnce
storageClassName: dynamic-cephfs
resources:
requests:
storage: 2Gi

$ kubectl create -f cephfs-pvc-test.yaml

$ kubectl get pv
pvc-2abe427e-7568-442d-939f-2c273695c3db 2Gi RWO Delete Bound default/cephfs-claim dynamic-cephfs 1s

本机添加 hosts 解析

sudo vim /etc/hosts
192.168.3.101 prometheus.yuan.com

# 访问浏览器
prometheus.yuan.com