包含标签 2021 的文章

nginx配置未生效

我已经遇到过不止一次的愚蠢操作:

新建nginx配置文件(如abc.com),加域名等等配置好后,nginx -t再nginx -s reload,测试后域名地址打不开。。。

……

阅读全文

腾讯云tke集群Pod内存使用率告警--Python实现

代码样例:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import json
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.monitor.v20180724 import monitor_client, models
import notify_mem
import logging
import sys
import time
import os
import pickle

LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
DATE_FORMAT = "%m/%d/%YT%H:%M:%S"
logging.basicConfig(filename='tke_pod_mem.log', level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)

workload_name = ["dingtalk-webhook",
            "logstash"]

# 定义拉取的时间点
def pull_time() -> str:
    minute = int(time.strftime("%M")) - int(sys.argv[4])
    min_time = time.strftime("%Y-%m-%dT%H:") + str(minute) + ':00+08:00'
    return min_time

# 获取数据
def main(get_time: str) -> list:
    try:
        cred = credential.Credential("", "")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "monitor.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = monitor_client.MonitorClient(cred, "", clientProfile)

        req = models.DescribeStatisticDataRequest()
        params = {
            "Module": "monitor",
            "Namespace": "QCE/TKE",
            "MetricNames": ["K8sPodRateMemNoCacheLimit"],
            "Conditions": [
                {
                    "Key": "tke_cluster_instance_id",
                    "Operator": "=",
                    "Value": [""]
                },
                {
                    "Key": "workload_name",
                    "Operator": "in",
                    "Value": workload_name
                }
            ],
            "Period": 60,
            "StartTime": get_time,
            "EndTime": get_time
        }
        req.from_json_string(json.dumps(params))

        resp = client.DescribeStatisticData(req)
        res = resp.to_json_string()

        dimensions_values = json.loads(res)['Data'][0]['Points']
        print(dimensions_values)

        return dimensions_values

    except TencentCloudSDKException as err:
        logging.info(err)


# 数据处理
def handler(data_lists: list, get_time: str) -> None:
    n = 0
    for data_list in data_lists:

        # 控制发送告警数量,用于限流
        if n <= int(sys.argv[3]):
            dimensions = data_list['Dimensions']
            value = data_list['Values'][0]['Value']

            # 无监控数据的过滤掉
            if value is not None:
                if float(value) > int(sys.argv[5]):

                    for pod_name in dimensions:
                        if 'pod_name' == pod_name['Name']:

                            pod_name_id = pod_name['Value']
                            if pod_name_id not in str(sys.argv[6]):

                                if pod_name_id not in dict_tmp:
                                    dict_tmp[pod_name['Value']] = 0

                                # 先判断是否发送过告警
                                if dict_tmp[pod_name_id] == 0:
                                    notify_mem.notify(pod_name['Value'], value, sys.argv[1], get_time, sys.argv[5])
                                    n += 1
                                    dict_tmp[pod_name_id] += 1
                                    logging.info(dict_tmp)
                                elif dict_tmp[pod_name_id] == int(sys.argv[2]):
                                    dict_tmp[pod_name_id] = 0
                                    logging.info(dict_tmp)
                                else:
                                    dict_tmp[pod_name_id] += 1
                                    logging.info(dict_tmp)
                                logging.info(f'{pod_name_id} values: {value}')
                            else:
                                logging.info(f'white pod_name: {pod_name_id}')
                else:

                    for pod_name in dimensions:
                        if 'pod_name' == pod_name['Name']:

                            pod_name_id = pod_name['Value']
                            if pod_name_id not in str(sys.argv[6]):

                                if pod_name_id in dict_tmp and dict_tmp[pod_name_id] > 0:
                                    notify_mem.resolve(pod_name_id, value, sys.argv[1], get_time, sys.argv[5])
                                    dict_tmp[pod_name_id] = 0
                                    logging.info(f'resolved.')


if __name__ == '__main__':
    '''
    argv1: dingding_url
    argv2: alert internal(seconds)
    argv3: 每分钟告警数量
    argv4: 获取几分钟前的数据,比如2
    argv5: 告警阈值
    argv6: 白名单
    '''

    if not os.path.exists('tmp.dict'):
        with open('tmp.dict', 'wb') as f:
            dict_tmp = {}
            pickle.dump(dict_tmp, f)

    with open('tmp.dict', 'rb') as f1:
        dict_tmp = pickle.load(f1)

    pull_time = pull_time()
    for i in range(5):
        get_data = main(pull_time)
        if get_data:
            handler(get_data, pull_time)
            logging.info('Success.')
            break
        else:
            logging.info('time sleep 10.')
            time.sleep(10)
            if i == 4:
                logging.info('Failed. No value.')

    with open('tmp.dict', 'wb') as f2:
        pickle.dump(dict_tmp, f2)

    logging.info('done.')
……

阅读全文

使用helm chart部署skywalking

简易版yaml文件(es7.yml)如下

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
oap:
  name: oap
  dynamicConfigEnabled: false
  image:
    tag: 8.5.2-es7
  replicas: 1
  storageType: elasticsearch7
  resources:
    limits:
      cpu: 2
      memory: 2Gi
    requests:
      cpu: 0.1
      memory: 1Gi
  env:
    SW_NAMESPACE: skywalking
    dayStep: 15

ui:
  image:
    tag: 8.5.0
  service:
    type: NodePort

elasticsearch:
  enabled: false
  config:
    host: 192.168.2.206
    port:
      http: 9200
    user: ""
    password: ""

参考地址:

……

阅读全文

使用k8s configmap管理skywalking配置文件

依赖helm部署后的skywalking

0. 修改oap的yaml配置文件,开启动态配置支持

1
2
3
4
oap:
  name: oap
  dynamicConfigEnabled: true
  ....

1. 修改application.yml文件

1
2
3
4
5
6
7
configuration:
  selector: ${SW_CONFIGURATION:k8s-configmap}
  ......
  k8s-configmap:
    period: ${SW_CONFIG_CONFIGMAP_PERIOD:60}
    namespace: ${SW_CLUSTER_K8S_NAMESPACE:default}
    labelSelector: ${SW_CLUSTER_K8S_LABEL:app=collector,release=skywalking}

主要将selector: ${SW_CONFIGURATION:none}改为selector: ${SW_CONFIGURATION:k8s-configmap}

……

阅读全文

Binlog样例

binlog_row_image: FULL

1. update
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
BEGIN
/*!*/;
# at 5273
#210728  0:53:03 server id 87904  end_log_pos 5346 CRC32 0x1a0ec140     Table_map: `xxl_job`.`xxl_job_registry` mapped to number 725
# at 5346
#210728  0:53:03 server id 87904  end_log_pos 5526 CRC32 0xaf71ae85     Update_rows: table id 725 flags: STMT_END_F
### UPDATE `xxl_job`.`xxl_job_registry`
### WHERE
###   @1=1374
###   @2='EXECUTOR'
###   @3='webapp'
###   @4='http://10.0.0.87:9999/'
###   @5='2021-07-28 00:52:34'
### SET
###   @1=1374
###   @2='EXECUTOR'
###   @3='webapp'
###   @4='http://10.0.0.87:9999/'
###   @5='2021-07-28 00:53:04'
# at 5526
#210728  0:53:03 server id 87903  end_log_pos 5557 CRC32 0xbfb76016     Xid = 657481860
COMMIT/*!*/;

包含如下信息: server id、CRC32、表名、update操作、update的前后的字段值

……

阅读全文

Docker使用记录

安装(centos7)

yum remove docker \
    docker-client \
    docker-client-latest \
    docker-common \
    docker-latest \
    docker-latest-logrotate \
    docker-logrotate \
    docker-engine

yum install -y yum-utils

yum-config-manager \
    --add-repo \
    https://download.docker.com/linux/centos/docker-ce.repo

# 查看具体版本
# yum list docker-ce --showduplicates | sort -r

yum install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin

systemctl start docker

curl -sL https://github.com/docker/compose/releases/download/v2.5.0/docker-compose-linux-x86_64 -o /usr/bin/docker-compose
chmod +x /usr/bin/docker-compose

常用命令,以jenkins/jenkins:2.359-jdk11镜像为例

……

阅读全文

golang程序镜像Dockerfile

FROM centos:centos7
RUN yum install -y epel-release --nogpgcheck
RUN yum install -y golang git --nogpgcheck
RUN go env -w GO111MODULE=off
RUN mkdir /dumper
RUN mkdir /app
COPY main.go /app
COPY src /app
COPY pkg /app
WORKDIR /app
RUN go get github.com/tencentyun/cos-go-sdk-v5
RUN go get github.com/fsnotify/fsnotify
RUN go build -tags netgo main.go
CMD ["/app/main"]
……

阅读全文