跳到主要内容

14 篇博文 含有标签「K8s」

查看所有标签

K8s Prome外挂存储部署

· 阅读需 10 分钟

K8s Prome外挂存储部署

storageClass动态挂载对接NFS存储

安装 NFS

服务端:192.168.3.101

$ yum -y install nfs-utils rpcbind

# 共享目录
$ mkdir -p /data/k8s && chmod 755 /data/k8s

$ echo '/data/k8s *(insecure,rw,sync,no_root_squash)'>>/etc/exports

$ systemctl enable rpcbind && systemctl start rpcbind
$ systemctl enable nfs && systemctl start nfs

客户端:k8s集群slave节点

$ yum -y install nfs-utils rpcbind
$ mkdir /nfsdata
$ mount -t nfs 192.168.3.101:/data/k8s /nfsdata

准备目录

mkdir /data/yaml/nfs/ -p

创建provisioner

value: storage.pri/nfs #名字虽然可以随便起,以后引用要一致 这个镜像中volume的mountPath默认为/persistentvolumes,不能修改,否则运行时会报错

[root@k8s-master nfs]# cat provisioner-nfs.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-provisioner
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-provisioner-runner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["watch", "create", "update", "patch"]
- apiGroups: [""]
resources: ["services", "endpoints"]
verbs: ["get","create","list", "watch","update"]
- apiGroups: ["extensions"]
resources: ["podsecuritypolicies"]
resourceNames: ["nfs-provisioner"]
verbs: ["use"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-provisioner
subjects:
- kind: ServiceAccount
name: nfs-provisioner
namespace: default
roleRef:
kind: ClusterRole
name: nfs-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
#vi nfs-deployment.yaml;创建nfs-client的授权
kind: Deployment
apiVersion: apps/v1
metadata:
name: nfs-client-provisioner
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: nfs-client-provisioner
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccount: nfs-provisioner
containers:
- name: nfs-client-provisioner
image: lizhenliang/nfs-client-provisioner
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: storage.pri/nfs
- name: NFS_SERVER
value: 192.168.3.101
- name: NFS_PATH
value: /data/k8s
volumes:
- name: nfs-client-root
nfs:
server: 192.168.3.101
path: /data/k8s

创建storageclass

cat > /data/yaml/nfs/storageclass-nfs.yaml <<eof
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: storage-nfs
provisioner: storage.pri/nfs
reclaimPolicy: Delete
eof

改变默认sc

kubectl apply -f /data/yaml/nfs/
kubectl patch storageclass storage-nfs -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
kubectl get sc
kubectl get po -o wide

验证动态供应

storageClassName: storage-nfs #这个class一定注意要和sc的名字一样

cat > /data/yaml/nfs/pvc-nfs.yaml <<eof
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pvc-nfs
spec:
storageClassName: storage-nfs
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
eof
kubectl apply -f /data/yaml/nfs/pvc-nfs.yaml
kubectl get pvc

部署 Prometheus

# 需要准备配置文件,因此使用configmap的形式保存
$ cat prometheus.yml
# my global config
global:
scrape_interval: 30s
evaluation_interval: 30s
# scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']

# kubectl -n monitor create configmap prometheus-config --from-file=prometheus.yml

# pvc
$ cat pvc.yaml
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: prometheus
namespace: monitor
spec:
accessModes:
- ReadWriteOnce
storageClassName: storage-nfs
resources:
requests:
storage: 200Gi

# prometheus的资源文件
# 出现Prometheus数据存储权限问题,因为Prometheus内部使用nobody启动进程,挂载数据目录后权限为root,因此使用initContainer进行目录权限修复:
$ cat prometheus-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitor
labels:
app: prometheus
spec:
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
initContainers:
- name: "change-permission-of-directory"
image: busybox
command: ["/bin/sh"]
args: ["-c", "chown -R 65534:65534 /prometheus"]
securityContext:
privileged: true
volumeMounts:
- mountPath: "/etc/prometheus"
name: config-volume
- mountPath: "/prometheus"
name: data
containers:
- image: prom/prometheus:v2.19.2
name: prometheus
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus" # 指定tsdb数据路径
- "--web.enable-lifecycle" # 支持热更新,直接执行localhost:9090/-/reload立即生效
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
- "--web.console.templates=/usr/share/prometheus/consoles"
ports:
- containerPort: 9090
name: http
volumeMounts:
- mountPath: "/etc/prometheus"
name: config-volume
- mountPath: "/prometheus"
name: data
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 100m
memory: 512Mi
volumes:
- name: data
persistentVolumeClaim:
claimName: prometheus
- configMap:
name: prometheus-config
name: config-volume

# rbac,prometheus会调用k8s api做服务发现进行抓取指标
$ cat prometheus-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: monitor
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitor

# 提供Service,为Ingress使用
$ cat prometheus-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitor
labels:
app: prometheus
spec:
selector:
app: prometheus
type: ClusterIP
ports:
- name: web
port: 9090
targetPort: http

$ cat prometheus-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: prometheus
namespace: monitor
spec:
rules:
- host: prometheus.yuan.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus
port:
number: 9090

storageClass动态挂载对接Ceph存储

安装 Ceph

安装个单节点demo

vi ceph.sh

#!/bin/bash
#ceph一键部署脚本
#for Centos7

#1.disable iptables&selinux

echo -n "正在配置iptables防火墙......"
systemctl stop firewalld > /dev/null 2>&1
systemctl disable firewalld > /dev/null 2>&1
if [ $? -eq 0 ];then
echo -n "Iptables防火墙初始化完毕!"
fi

echo -n "正在关闭SELinux......"
setenforce 0 > /dev/null 2>&1
sed -i '/^SELINUX=/s/=.*/=disabled/' /etc/selinux/config
if [ $? -eq 0 ];then
echo -n "SELinux初始化完毕!"
fi

#2.set hostname as ceph##

HOSTNAME=ceph
hostnamectl set-hostname ceph
IP=`ip route |grep src|grep metric|awk -F" " '{ print $9 }'`
echo "$IP $HOSTNAME" >>/etc/hosts

#3.install epel.repo#
yum install -y epel-release

cat <<EOF > /etc/yum.repos.d/ceph.repo
[Ceph]
name=Ceph packages for $basearch
baseurl=http://mirrors.163.com/ceph/rpm-jewel/el7/x86_64
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=http://mirrors.163.com/ceph/keys/release.asc
priority=1
[Ceph-noarch]
name=Ceph noarch packages
baseurl=http://mirrors.163.com/ceph/rpm-jewel/el7/noarch
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=http://mirrors.163.com/ceph/keys/release.asc
priority=1
[ceph-source]
name=Ceph source packages
baseurl=http://mirrors.163.com/ceph/rpm-jewel/el7/SRPMS
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=http://mirrors.163.com/ceph/keys/release.asc
priority=1
EOF


#sed -e "s/^metalink=/#metalink=/g" \
# -e "s/^mirrorlist=http/#mirrorlist=http/g" \
# -e "s@^#baseurl=@baseurl=@g" \
# -i /etc/yum.repos.d/*.repo

#echo 192.168.239.241 mirror.centos.org >> /etc/hosts
#echo 192.168.239.241 download.fedoraproject.org >> /etc/hosts


#4.update system & install ceph-deploy##

yum update -y &&yum clean all &&yum -y install ceph-deploy

#5.设置本机密匙#
#ssh-keygen
#ssh-copy-id ceph

ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#ssh -o stricthostkeychecking=no $HOSTNAME

#####################################################################
#1.ceph服务初始化###
yum clean all &&yum -y install ceph-deploy

mkdir /etc/ceph &&cd /etc/ceph
ceph-deploy new ceph

#ceph-deploy new $HOSTNAME

#2.修改配置文件 ###

cp ceph.conf ceph.conf.bak
#sed -i 's/cephx/none/g' /etc/ceph/ceph.conf
sed -i 's@^$@osd_pool_default_size = 1@g' ceph.conf
echo "mon_pg_warn_max_per_osd = 1000" >> /etc/ceph/ceph.conf


#3.安装ceph###

ceph-deploy install ceph

#4.创建monitor服务###
ceph-deploy mon create ceph
ceph-deploy gatherkeys ceph
#4.osd###

#准备osd ###
mkfs.xfs /dev/sdb
mkdir -p /var/local/osd
mount /dev/sdb /var/local/osd/
chown -R ceph:ceph /var/local/osd*
#创建osd ###
ceph-deploy osd prepare ceph:/var/local/osd
#激活osd ###
ceph-deploy osd activate ceph:/var/local/osd
#chown -R ceph:ceph /var/local/osd* 有些同学可能会忘记配置目录权限引起激活osd失败
#查看状态:###
ceph-deploy osd list ceph

#5.修改配置文件权限###

ceph-deploy admin ceph
chmod +r /etc/ceph/*

#6.部署mds服务###

ceph-deploy mds create ceph
ceph mds stat

#7.创建ceph文件系统###

ceph fs ls
ceph osd pool create cephfs_data 128
ceph osd pool create cephfs_metadata 128
ceph fs new cephfs cephfs_metadata cephfs_data
ceph fs ls

#8.挂载Ceph文件系统

mkdir /ceph
yum install -y ceph-fuse
IP=`ip route |grep src|grep metric|awk -F" " '{ print $9 }'`
ceph-fuse -m $IP:6789/ /ceph
df -Th

#9.查看ceph状态

#ceph monitor仲裁状态:ceph quorum_status --format json-pretty

ceph mon stat
ceph osd stat
#ceph osd tree(显示crush图)
ceph osd tree
ceph pg stat
#ceph auth list(集群的认证密码)

ssh -o stricthostkeychecking=no $HOSTNAME

创建Cephfs并挂载
# CephFS需要使用两个Pool来分别存储数据和元数据
ceph osd pool create cephfs_data 128
ceph osd pool create cephfs_meta 128
ceph osd lspools

# 创建一个CephFS
ceph fs new cephfs cephfs_meta cephfs_data

# 查看
ceph fs ls

# ceph auth get-key client.admin
client.admin
key: AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==

# slave 挂载
$ mount -t ceph 172.21.51.55:6789:/ /mnt/cephfs -o name=admin,secret=AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==

部署cephfs-provisioner

$ cat external-storage-cephfs-provisioner.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: cephfs-provisioner
namespace: kube-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cephfs-provisioner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "get", "delete"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cephfs-provisioner
subjects:
- kind: ServiceAccount
name: cephfs-provisioner
namespace: kube-system
roleRef:
kind: ClusterRole
name: cephfs-provisioner
apiGroup: rbac.authorization.k8s.io

---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: cephfs-provisioner
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "get", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: cephfs-provisioner
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: cephfs-provisioner
subjects:
- kind: ServiceAccount
name: cephfs-provisioner
namespace: kube-system

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cephfs-provisioner
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: cephfs-provisioner
strategy:
type: Recreate
template:
metadata:
labels:
app: cephfs-provisioner
spec:
containers:
- name: cephfs-provisioner
image: "quay.io/external_storage/cephfs-provisioner:latest"
env:
- name: PROVISIONER_NAME
value: ceph.com/cephfs
imagePullPolicy: IfNotPresent
command:
- "/usr/local/bin/cephfs-provisioner"
args:
- "-id=cephfs-provisioner-1"
- "-disable-ceph-namespace-isolation=true"
serviceAccount: cephfs-provisioner

在ceph monitor机器中查看admin账户的key

$ ceph auth list
$ ceph auth get-key client.admin
AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==

创建secret

$ echo -n AQBPTstgc078NBAA78D1/KABglIZHKh7+G2X8w==|base64
QVFCUFRzdGdjMDc4TkJBQTc4RDEvS0FCZ2xJWkhLaDcrRzJYOHc9PQ==
$ cat ceph-admin-secret.yaml
apiVersion: v1
data:
key: QVFCUFRzdGdjMDc4TkJBQTc4RDEvS0FCZ2xJWkhLaDcrRzJYOHc9PQ==
kind: Secret
metadata:
name: ceph-admin-secret
namespace: kube-system
type: Opaque

创建storageclass

$ cat cephfs-storage-class.yaml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: dynamic-cephfs
provisioner: ceph.com/cephfs
parameters:
monitors: 172.21.51.55:6789
adminId: admin
adminSecretName: ceph-admin-secret
adminSecretNamespace: "kube-system"
claimRoot: /volumes/kubernetes

动态pvc验证及实现分析

创建pvc,指定storageclass和存储大小,即可实现动态存储。

创建pvc测试自动生成pv

$ cat cephfs-pvc-test.yaml
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: cephfs-claim
spec:
accessModes:
- ReadWriteOnce
storageClassName: dynamic-cephfs
resources:
requests:
storage: 2Gi

$ kubectl create -f cephfs-pvc-test.yaml

$ kubectl get pv
pvc-2abe427e-7568-442d-939f-2c273695c3db 2Gi RWO Delete Bound default/cephfs-claim dynamic-cephfs 1s

本机添加 hosts 解析

sudo vim /etc/hosts
192.168.3.101 prometheus.yuan.com

# 访问浏览器
prometheus.yuan.com

helm3 chart开发

· 阅读需 5 分钟

安装helm3

#wget https://docs.rancher.cn/download/helm/helm-v3.3.0-linux-amd64.tar.gz 
wget https://get.helm.sh/helm-v3.9.2-linux-amd64.tar.gz #下载helm安装包

tar -zxvf helm-v3.3.0-linux-amd64.tar.gz #解压安装包
cp linux-amd64/helm /usr/local/bin/

helm version #检验是否安装成功

![](/img/helm chart开发/1.jpg)

helm自动补全

source <(helm completion bash)
echo "source <(helm completion bash)" >> ~/.bashrc
source ~/.bashrc

helm常用命令

命令用法描述
createhelm create NAME [flags]create a new chart with the given name
installhelm install [NAME] [CHART] [flags]installs a chart
pullhelm pull [chart URL |repo/chartname] [...] [flags]download a chart from a repository and (optionally) unpack it in local directory
repohelm repo ...add, list, remove, update, and index chart repositories
searchhelm search [command] ( repo/hub )search for a keyword in charts
uninstallhelm uninstall RELEASE_NAME [...] [flags]uninstall a release
upgradehelm upgrade [RELEASE] [CHART] [flags]upgrade a release

创建chart包

helm create httpbin  #创建httpbin chart

查看httpbin的目录结构

sudo apt install tree #安装tree

tree httpbin -a
httpbin
├── charts
├── Chart.yaml
├── .helmignore
├── templates
│ ├── deployment.yaml
│ ├── _helpers.tpl
│ ├── ingress.yaml
│ ├── NOTES.txt
│ ├── serviceaccount.yaml
│ ├── service.yaml
│ └── tests
│ └── test-connection.yaml
└── values.yaml

3 directories, 10 files

Chart包文件结构

Helm规范了Chart的目录和文件结构,这些目录或者文件都有确定的用途。

  • charts/,包含其它Chart,称之为Sub Chart,或者依赖Chart。
  • Chart.yaml,包含Chart的说明,可在从模板中访问Chart定义的值。
  • .helmignore,定义了在helm package时哪些文件不会打包到Chart包tgz中。
  • ci/,缺省没有该目录,持续集成的一些脚本。
  • templates/,用于放置模板文件,主要定义提交给Kubernetes的资源yaml文件。安装Chart时,Helm会根据chart.yaml、values.yam以及命令行提供的值对Templates进行渲染,最后会将渲染的资源提交给Kubernetes。
  • _helpers.tpl,定义了一些可重用的模板片断,此文件中的定义在任何资源定义模板中可用。
  • NOTES.txt,提供了安装后的使用说明,在Chart安装和升级等操作后,
  • tests/,包含了测试用例。测试用例是pod资源,指定一个的命令来运行容器。容器应该成功退出(exit 0),测试被认为是成功的。该pod定义必须包含helm测试hook注释之一:helm.sh/hook: test-success或helm.sh/hook: test-failure。
  • values.yaml,values文件对模板很重要,该文件包含Chart默认值。Helm渲染template时使用这些值

修改values.yaml

修改values.yaml如下,用于Helm渲染template

  • 将镜像改为image.repository=docker.io/kennethreitz/httpbin。
  • 不创建serviceAccount,serviceAccount.create=false
  • 为了Kubernetes集群外能访问Service,将type改为NodePort。并增加一个参数,为nodePort配置一个固定端口
cd httpbin/
vi values.yaml

# Default values for httpbin.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

image:
repository: docker.io/kennethreitz/httpbin #修改镜像为httpbin
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: latest

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

serviceAccount:
# Specifies whether a service account should be created
create: false #不创建serviceAccount,改为false
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""

podAnnotations: {}

podSecurityContext: {}
# fsGroup: 2000

securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000

service:
type: NodePort #将type改为NodePort
port: 80
nodePort: 30080 #为NodePort配置一个固定端口


ingress:
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths: []
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local

resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi

autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80

nodeSelector: {}

tolerations: []

affinity: {}

修改service.yaml

由于配置了固定的nodePort,所以在service.yaml中增加该参数,并引用了对应的value值

vi templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: {{ include "httpbin.fullname" . }}
labels:
{{- include "httpbin.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
nodePort: {{ .Values.service.nodePort }} #新增参数
selector:
{{- include "httpbin.selectorLabels" . | nindent 4 }}

查看渲染后结果

helm template my-release httpbin

![](/img/helm chart开发/2.jpg)

安装httpbin

渲染没有问题后开始安装httpbin

helm install my-release httpbin

![](/img/helm chart开发/3.jpg)

访问httpbin

kubectl get pod #观察httpbin是否部署成功

![](/img/helm chart开发/4.jpg)

访问 http://IP:30080

打包chart

部署成功后可将chart进行打包

helm package httpbin

kubeadm-ubuntu

· 阅读需 13 分钟

主机规划

主机IP主机名称配置
172.16.10.81k8s-master01Ubuntu 20.04.1 LTS,2核CPU,8G内存,40G系统盘
172.16.10.82k8s-master02Ubuntu 20.04.1 LTS,2核CPU,8G内存,40G系统盘
172.16.10.83k8s-master03Ubuntu 20.04.1 LTS,2核CPU,8G内存,40G系统盘
172.16.10.84k8s-node01Ubuntu 20.04.1 LTS,2核CPU,8G内存,40G系统盘

安装基础软件包,各个节点操作

# 更新apt包索引
apt-get update

# 安装软件包以允许apt通过HTTPS使用存储库
apt-get -y install \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common \
ntpdate

环境操作(所有节点)

  • 关闭swap
swapoff -a  
sed -i 's/.*swap.*/#&/' /etc/fstab
  • 关闭防火墙
ufw disable
  • 配置时间时区语言
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
sudo echo 'LANG="en_US.UTF-8"' >> /etc/profile
source /etc/profile
  • 配置内核参数让iptables对bridge的数据进行控制
cat >> /etc/sysctl.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.conf.all.forwarding = 1
net.ipv4.ip_forward= 1
fs.aio-max-nr = 1048576
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_fin_timeout = 30
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_max_syn_backlog = 8096
EOF

sysctl -p
  • 添加/etc/hosts
172.16.10.81 k8s-master01
172.16.10.82 k8s-master02
172.16.10.83 k8s-master03
172.16.10.84 k8s-node01
  • 时间同步
ntpdate cn.pool.ntp.org
  • 编辑计划任务,每小时做一次同步
crontab -e
* */1 * * * /usr/sbin/ntpdate cn.pool.ntp.org
  • 开启ipvs,不开启ipvs将会使用iptables,但是效率低,所以官网推荐需要开通ipvs内核
#查看是否加载了ipvs模块
lsmod|grep ip_vs

#若未开启,使用以下命令加载
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh

  • ssh允许root用户登录
vi /etc/ssh/sshd_config
PermitRootLogin yes

systemctl restart sshd
  • 配置免密登录
# 在master01操作
ssh-keygen -t rsa
ssh-copy-id root@k8s-master01
ssh-copy-id root@k8s-master02
ssh-copy-id root@k8s-master03

安装docker(所有节点)

# 添加Docker的官方GPG密钥
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -

# 搜索指纹的后8个字符,验证您现在是否拥有带有指纹的密钥
sudo apt-key fingerprint 0EBFCD88

# 安装add-apt-repository工具
apt-get -y install software-properties-common

# 添加稳定的存储库
add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"

# 更新apt包索引
apt-get update

# 查看Docker版本
apt-cache madison docker-ce

# 安装Docker
apt-get -y install docker-ce=5:19.03.12~3-0~ubuntu-focal docker-ce-cli=5:19.03.12~3-0~ubuntu-focal containerd.io

# 查看Docker信息
docker info

# 解决问题:WARNING: No swap limit support(操作系统下docker不支持内存限制的警告)
在基于RPM的系统上不会发生此警告,该系统默认情况下启用这些功能。
vi /etc/default/grub 添加或编辑GRUB_CMDLINE_LINUX行以添加这两个键值对"cgroup_enable=memory swapaccount=1",
最终效果:
GRUB_CMDLINE_LINUX="cgroup_enable=memory swapaccount=1 net.ifnames=0 vga=792 console=tty0 console=ttyS0,115200n8 noibrs"

### 执行命令更新grub并重启机器
update-grub
reboot

# 修改docker配置文件,配置镜像加速器
cat > /etc/docker/daemon.json << EOF
{
"oom-score-adjust": -1000,
"log-driver": "json-file",
"log-opts": {
"max-size": "100m",
"max-file": "3"
},
"max-concurrent-downloads": 10,
"insecure-registries": ["0.0.0.0/0"],
"max-concurrent-uploads": 10,
"registry-mirrors": ["https://dockerhub.azk8s.cn"],
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
EOF

systemctl daemon-reload && systemctl restart docker && systemctl enable docker

安装kubernetes1.17.3高可用集群

安装kubeadm,kubelet,kubectl(所有节点)

#下载 gpg 密钥
curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -

#添加 k8s 镜像源
cat <<EOF >/etc/apt/sources.list.d/kubernetes.list
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF

# 更新apt包索引
sudo apt-get update

# 查看kubeadm版本
apt-cache madison kubeadm

#安装对应kubeadm,kubelet,kubectl版本
sudo apt-get install -y kubelet=1.17.3-00 kubeadm=1.17.3-00 kubectl=1.17.3-00

#查看kubeadm版本
kubeadm version

kubeadm命令详解

kubeadm config upload from-file:由配置文件上传到集群中生成ConfigMap。
kubeadm config upload from-flags:由配置参数生成ConfigMap。
kubeadm config view:查看当前集群中的配置值。
kubeadm config print init-defaults:输出kubeadm init默认参数文件的内容。
kubeadm config print join-defaults:输出kubeadm join默认参数文件的内容。
kubeadm config migrate:在新旧版本之间进行配置转换。
kubeadm config images list:列出所需的镜像列表。
kubeadm config images pull:拉取镜像到本地。
kubeadm reset :卸载服务

搭建haproxy+keepalived高可用集群

安装keepalived(master节点)

-> k8s-master01
sudo apt-get install keepalived -y

#编辑Keepalived配置文件
vi /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
}

vrrp_instance VI_1 {
state BACKUP
nopreempt
interface eth0
virtual_router_id 80
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass just0kk
}
virtual_ipaddress {
192.168.10.88 #VIP地址
}
}

systemctl start keepalived && systemctl enable keepalived

-> k8s-master02
sudo apt-get install keepalived -y

#编辑Keepalived配置文件
vi /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
}

vrrp_instance VI_1 {
state BACKUP
nopreempt
interface eth0
virtual_router_id 80
priority 50
advert_int 1
authentication {
auth_type PASS
auth_pass just0kk
}
virtual_ipaddress {
192.168.10.88 #VIP地址
}
}

systemctl start keepalived && systemctl enable keepalived

-> k8s-master03
sudo apt-get install keepalived -y

#编辑Keepalived配置文件
vi /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
}

vrrp_instance VI_1 {
state BACKUP
nopreempt
interface eth0
virtual_router_id 80
priority 30
advert_int 1
authentication {
auth_type PASS
auth_pass just0kk
}
virtual_ipaddress {
192.168.10.88 #VIP地址
}
}

systemctl start keepalived && systemctl enable keepalived

安装haproxy(HAProxy节点)

# 编辑haproxy配置文件
cat >> /root/haproxy.cfg <<EOF
global
log 127.0.0.1 local0
log 127.0.0.1 local1 notice
maxconn 4096
daemon

defaults
log global
mode http
option httplog
option dontlognull
retries 3
option redispatch
timeout connect 5000
timeout client 50000
timeout server 50000

frontend stats-front
bind *:8081
mode http
default_backend stats-back

frontend fe_k8s_6444
bind *:6444
mode tcp
timeout client 1h
log global
option tcplog
default_backend be_k8s_6443
acl is_websocket hdr(Upgrade) -i WebSocket
acl is_websocket hdr_beg(Host) -i ws

backend stats-back
mode http
balance roundrobin
stats uri /haproxy/stats
stats auth pxcstats:secret

backend be_k8s_6443
mode tcp
timeout queue 1h
timeout server 1h
timeout connect 1h
log global
balance roundrobin
server k8s-master01 192.168.10.81:6443
server k8s-master02 192.168.10.82:6443
server k8s-master03 192.168.10.83:6443
EOF

#docker启动haproxy
docker run --name haproxy -v /root/haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro --restart=always --net=host -d haproxy

#验证
docker logs haproxy #显示New worker #1 forked

在master1节点初始化k8s集群

cat > kubeadm-config.yaml <<EOF
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
kubernetesVersion: v1.17.3
controlPlaneEndpoint: "192.168.10.88:6444"
apiServer:
certSANs:
- 192.168.10.81
- 192.168.10.82
- 192.168.10.83
- 192.168.10.84
- 192.168.10.88
networking:
podSubnet: 10.244.0.0/16
imageRepository: "registry.cn-hangzhou.aliyuncs.com/google_containers"
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
EOF

#初始化集群
kubeadm init --config kubeadm-config.yaml

显示如下,说明初始化成功了

To start using your cluster, you need torun the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to thecluster.
Run "kubectl apply -f [podnetwork].yaml"with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of workernodes by running the following on each as root:

kubeadm join 192.168.10.88:6444 --token 34lypv.r9czddehwscnwrgg \
--discovery-token-ca-cert-hash sha256:44adbf1427b9a034ac1eac131bd7a3a4c868439fe067b158bad68b9336c24607 \
--control-plane

:kubeadm join ... 这条命令需要记住,下面我们把k8s的master2、master3,在下面会用到

在master1节点执行如下,这样才能有权限操作k8s资源

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf$HOME/.kube/config
sudo chown $(id -u):$(id -g)$HOME/.kube/config

在master1节点执行kubectl get nodes

STATUS为NotReady,因为还未安装网络插件

NAME        STATUS     ROLES    AGE    VERSION
master1 NotReady master 2m13s v1.17.3

master1节点安装网络插件calico

kubectl apply -f https://docs.projectcalico.org/v3.8/manifests/calico.yaml

安装calico之后,在master1节点执行kubectl get nodes 显示如下,看到STATUS是Ready,kubectl get pods -n kube-system可以看到coredns也是running状态,说明master1节点的calico安装完成

NAME        STATUS     ROLES    AGE    VERSION
master1 Ready master 2m13s v1.17.3

把master1节点的证书拷贝到master2和master3上

# master2,master3创建相关文件夹
cd /root && mkdir -p /etc/kubernetes/pki/etcd &&mkdir -p ~/.kube/

# 在master1节点把证书拷贝到master2和master3上,在master1上操作
scp /etc/kubernetes/pki/ca.crt k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/ca.key k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.key k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.pub k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.crt k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.key k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.crt k8s-master02:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/pki/etcd/ca.key k8s-master02:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/pki/ca.crt k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/ca.key k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.key k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.pub k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.crt k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.key k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.crt k8s-master03:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/pki/etcd/ca.key k8s-master03:/etc/kubernetes/pki/etcd/


# 证书拷贝之后在master2和master3上执行如下命令,形成集群
kubeadm join 192.168.10.88:6444 --token 34lypv.r9czddehwscnwrgg \
--discovery-token-ca-cert-hash sha256:44adbf1427b9a034ac1eac131bd7a3a4c868439fe067b158bad68b9336c24607 \
--control-plane

#--control-plane:这个参数表示加入到k8s集群的是master节点

# 在master2和master3上操作:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf$HOME/.kube/config
sudo chown $(id -u):$(id -g)$HOME/.kube/config

# kubectl get nodes 显示如下:
NAME STATUS ROLES AGE VERSION
master1 Ready master 39m v1.17.3
master2 Ready master 5m9s v1.17.3
master3 Ready master 2m33s v1.17.3

把node1节点加入到k8s集群,在node节点操作

kubeadm join 192.168.10.88:6444 --token 34lypv.r9czddehwscnwrgg \
--discovery-token-ca-cert-hash sha256:44adbf1427b9a034ac1eac131bd7a3a4c868439fe067b158bad68b9336c24607

检查集群状况

# 检查组件状态是否正常
kubectl get componentstatuses

# 查看集群系统信息
kubectl cluster-info

# 查看核心组件是否运行正常(Running)
kubectl -n kube-system get pod

证书替换

查看证书有效时间

# 查看ca证书有效期
openssl x509 -in /etc/kubernetes/pki/ca.crt -noout -text |grep Not
可看到ca证书有效期是10年
# 查看apiserver证书有效期
openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text |grep Not
可看到apiserver证书有效期是1年

延长证书过期时间

# 把update-kubeadm-cert.sh文件上传到master1、master2、master3节点
git clone https://github.com/judddd/kubernetes1.17.3.git
chmod +x update-kubeadm-cert.sh

# 将所有组件证书延迟至10年
./update-kubeadm-cert.sh all

#验证
openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text |grep Not

kubernetes集群升级

查看当前版本

kubectl get nodes

master节点编辑kubeadm-config-upgrade.yaml

ssh k8s-master01
cp kubeadm-config.yaml kubeadm-config-upgrade.yaml
vi kubeadm-config-upgrade.yaml
#将Kubernetes版本改为1.18.2

scp kubeadm-config-upgrade.yaml root@k8s-master02:/root/
scp kubeadm-config-upgrade.yaml root@k8s-master03:/root/

所有节点升级kubeadm,kubelet,kubectl

sudo apt-get install -y kubelet=1.18.2-00 kubeadm=1.18.2-00 kubectl=1.18.2-00

master节点升级Kubernetes集群

# 三个master节点执行
kubeadm upgrade apply --config=kubeadm-config-upgrade.yaml

node节点升级

#node节点执行
kubeadm upgrade node

所有节点升级成功后,若未正常显示版本,可重启节点

kubectl get node

升级成功

ETCD数据备份恢复

手动备份

# 拷贝etcd容器的etcdctl命令到宿主机本地的 /usr/bin/下
docker cp k8s_etcd_etcd-master01_kube-system_32144e70958a19d4b529ed946b3e2726_1:/usr/local/bin/etcdctl /usr/bin

#创建备份目录
mkdir /etcd_backup/

#开始备份
ETCDCTL_API=3 etcdctl --endpoints https://192.168.10.81:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
snapshot save /etcd_backup/etcd-snapshot-`date +%Y%m%d`.db

查看备份数据的状态

ETCDCTL_API=3 etcdctl \
--endpoints https://192.168.10.81:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
--write-out=table snapshot status /etcd_backup/etcd-snapshot-`date +%Y%m%d`.db

拷贝etcdctl以及备份数据到其他master节点中

scp /usr/bin/etcdctl root@k8s-master02:/usr/bin/
scp /etcd_backup/etcd-snapshot-`date +%Y%m%d`.db root@k8s-master02:/root/

scp /usr/bin/etcdctl root@k8s-master03:/usr/bin/
scp /etcd_backup/etcd-snapshot-`date +%Y%m%d`.db root@k8s-master03:/root/

Etcd数据恢复

# 停止集群三台master节点的kubelet服务
systemctl stop kubelet

# etcd数据存放目录是/var/lib/etcd/,此目录是容器挂载宿主机的/var/lib/etcd/。删除宿主机的/var/lib/etcd/目录就是清空etcd容器的数据

# 清空三台master节点 etcd容器数据
rm -rf /var/lib/etcd/

# 恢复etcd数据
-> k8s-master01
export ETCDCTL_API=3
etcdctl snapshot restore /etcd_backup/etcd-snapshot-`date +%Y%m%d`.db \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
--name=master01 \
--data-dir=/var/lib/etcd \
--skip-hash-check \
--initial-advertise-peer-urls=https://192.168.10.81:2380 \
--initial-cluster "master01=https://192.168.10.81:2380,master02=https://192.168.10.82:2380,master03=https://192.168.10.83:2380"

-> k8s-master02
export ETCDCTL_API=3
etcdctl snapshot restore /root/etcd-snapshot-`date +%Y%m%d`.db \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
--name=master02 \
--data-dir=/var/lib/etcd \
--skip-hash-check \
--initial-advertise-peer-urls=https://192.168.10.82:2380 \
--initial-cluster "master01=https://192.168.10.81:2380,master02=https://192.168.10.82:2380,master03=https://192.168.10.83:2380"

-> k8s-master03
export ETCDCTL_API=3
etcdctl snapshot restore /root/etcd-snapshot-`date +%Y%m%d`.db \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
--name=master03 \
--data-dir=/var/lib/etcd \
--skip-hash-check \
--initial-advertise-peer-urls=https://192.168.10.83:2380 \
--initial-cluster "master01=https://192.168.10.81:2380,master02=https://192.168.10.82:2380,master03=https://192.168.10.83:2380"

#三台master开启kubelet服务
systemctl start kubelet

#查看etcd是否健康
ETCDCTL_API=3 etcdctl --endpoints https://192.168.10.81:2379,https://192.168.10.82:2379,https://192.168.10.83:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/peer.crt \
--key=/etc/kubernetes/pki/etcd/peer.key \
endpoint health

k8s运行正常