From e6ab950022f7a664d95d242d7e1901cf7a9920bc Mon Sep 17 00:00:00 2001 From: wangzihao Date: Mon, 22 Jul 2024 12:04:49 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E6=97=A5=E5=BF=97reset-con?= =?UTF-8?q?fig=E6=97=A5=E5=BF=97=E5=88=B7=E5=B1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/common/file_manager.go | 2 +- pkg/device/ascendtolerance.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/common/file_manager.go b/pkg/common/file_manager.go index aa3b0cb5..6d45f6b3 100644 --- a/pkg/common/file_manager.go +++ b/pkg/common/file_manager.go @@ -47,7 +47,6 @@ func WriteToFile(info, path string) error { // RemoveFileAndDir remove file and dir func RemoveFileAndDir(namespace, name string) error { file := GenResetFileName(namespace, name) - hwlog.RunLog.Infof("delete cm(%s) file(%s)", name, file) rmErr := os.Remove(file) if rmErr != nil { return fmt.Errorf("failed to remove file(%s): %v", file, rmErr) @@ -57,6 +56,7 @@ func RemoveFileAndDir(namespace, name string) error { if err != nil { return fmt.Errorf("failed to remove dir(%s): %v", dir, err) } + hwlog.RunLog.Infof("delete cm(%s) file(%s)", name, file) return nil } diff --git a/pkg/device/ascendtolerance.go b/pkg/device/ascendtolerance.go index bcf3b041..e255fd37 100644 --- a/pkg/device/ascendtolerance.go +++ b/pkg/device/ascendtolerance.go @@ -143,7 +143,7 @@ func (hrt *HotResetTools) SyncResetCM(client *kubeclient.ClientK8s) { }), ) cmInformer := cmFactory.Core().V1().ConfigMaps().Informer() - cmInformer.AddEventHandler(client.ResourceEventHandler(kubeclient.CMResource, checkConfigMap)) + cmInformer.AddEventHandlerWithResyncPeriod(client.ResourceEventHandler(kubeclient.CMResource, checkConfigMap), 0) go cmInformer.Run(wait.NeverStop) hrt.queue = client.Queue -- Gitee From 48f74081c006573ed7417223dcff545f862a60b0 Mon Sep 17 00:00:00 2001 From: wangzihao Date: Mon, 22 Jul 2024 17:05:43 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E6=97=A5=E5=BF=97reset-con?= =?UTF-8?q?fig=E6=97=A5=E5=BF=97=E5=88=B7=E5=B1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/common/common.go | 25 +++++++++++++++++++++++++ pkg/device/ascendtolerance.go | 18 ++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/pkg/common/common.go b/pkg/common/common.go index 33969a9d..48371ab0 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -582,6 +582,31 @@ func IntInList(num int32, list []int32) bool { return false } +// StrInList check if string in list +func StrInList(str string, list []string) bool { + for _, val := range list { + if val == str { + return true + } + } + return false +} + +// DeleteStrFromList delete string from list +func DeleteStrFromList(str string, list []string) { + index := -1 + for i, val := range list { + if val == str { + index = i + break + } + } + if index != -1 { + list = append(list[:index], list[index+1:]...) + return + } +} + // GetJobNameOfPod get job name of pod from annotations or labels func GetJobNameOfPod(pod *v1.Pod) string { taskName, ok := pod.Labels[ResetTaskNameKey] diff --git a/pkg/device/ascendtolerance.go b/pkg/device/ascendtolerance.go index e255fd37..45a8f1f3 100644 --- a/pkg/device/ascendtolerance.go +++ b/pkg/device/ascendtolerance.go @@ -88,6 +88,7 @@ type HotResetTools struct { podIndexer cache.Indexer cmIndexer cache.Indexer jobs map[string]string + cmKeyList []string } // NewHotResetManager create HotResetManager and init data @@ -123,6 +124,7 @@ func NewHotResetManager(devUsage string) HotResetManager { common.ResetError: common.ResetErrorLevel, common.IsolateError: common.IsolateErrorLevel, }, + cmKeyList: []string{}, } } @@ -143,7 +145,7 @@ func (hrt *HotResetTools) SyncResetCM(client *kubeclient.ClientK8s) { }), ) cmInformer := cmFactory.Core().V1().ConfigMaps().Informer() - cmInformer.AddEventHandlerWithResyncPeriod(client.ResourceEventHandler(kubeclient.CMResource, checkConfigMap), 0) + cmInformer.AddEventHandler(client.ResourceEventHandler(kubeclient.CMResource, checkConfigMap)) go cmInformer.Run(wait.NeverStop) hrt.queue = client.Queue @@ -162,7 +164,7 @@ func (hrt *HotResetTools) run() { } func (hrt *HotResetTools) processNextWorkItem() bool { - hwlog.RunLog.Infof("queue length: %d", hrt.queue.Len()) + hwlog.RunLog.Debugf("queue length: %d", hrt.queue.Len()) obj, shutdown := hrt.queue.Get() if shutdown { hwlog.RunLog.Errorf("shutdown, stop processing work queue") @@ -208,7 +210,7 @@ func (hrt *HotResetTools) handlePodAddEvent(obj interface{}) { hwlog.RunLog.Errorf("get kubeclient event error") return } - hwlog.RunLog.Infof("handle pod(%s) %s event", event.Key, event.Type) + hwlog.RunLog.Debugf("handle pod(%s) %s event", event.Key, event.Type) pod, err := hrt.getPodFromCache(event.Key) if err != nil { hwlog.RunLog.Warn(err) @@ -222,9 +224,15 @@ func (hrt *HotResetTools) handlePodAddEvent(obj interface{}) { return } hrt.jobs[event.Key] = jobName - cm, err := hrt.GetCMFromCache(pod.GetNamespace() + "/" + common.ResetInfoCMNamePrefix + jobName) + cmKey := fmt.Sprintf(pod.GetNamespace() + "/" + common.ResetInfoCMNamePrefix + jobName) + cm, err := hrt.GetCMFromCache(cmKey) if err != nil { + if common.StrInList(cmKey, hrt.cmKeyList) { + hrt.queue.AddRateLimited(obj) + return + } hwlog.RunLog.Warn(err) + hrt.cmKeyList = append(hrt.cmKeyList, cmKey) hrt.queue.AddRateLimited(obj) return } @@ -254,6 +262,8 @@ func (hrt *HotResetTools) handlePodDeleteEvent(obj interface{}) { return } namespace := keySlice[0] + cmKey := fmt.Sprintf(namespace + "/" + common.ResetInfoCMNamePrefix + jobName) + common.DeleteStrFromList(cmKey, hrt.cmKeyList) rmErr := common.RemoveFileAndDir(namespace, common.ResetInfoCMNamePrefix+jobName) if rmErr != nil { hwlog.RunLog.Errorf("Failed to remove file: %v", rmErr) -- Gitee From 0c8c88cad8d8dcf1e894f73fcdfe1fdfe1e86d9e Mon Sep 17 00:00:00 2001 From: wangzihao Date: Tue, 23 Jul 2024 16:46:43 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E6=97=A5=E5=BF=97reset-con?= =?UTF-8?q?fig=E6=97=A5=E5=BF=97=E5=88=B7=E5=B1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/common/common.go | 25 ------------------------- pkg/device/ascendtolerance.go | 24 +++++++++++++++--------- 2 files changed, 15 insertions(+), 34 deletions(-) diff --git a/pkg/common/common.go b/pkg/common/common.go index 48371ab0..33969a9d 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -582,31 +582,6 @@ func IntInList(num int32, list []int32) bool { return false } -// StrInList check if string in list -func StrInList(str string, list []string) bool { - for _, val := range list { - if val == str { - return true - } - } - return false -} - -// DeleteStrFromList delete string from list -func DeleteStrFromList(str string, list []string) { - index := -1 - for i, val := range list { - if val == str { - index = i - break - } - } - if index != -1 { - list = append(list[:index], list[index+1:]...) - return - } -} - // GetJobNameOfPod get job name of pod from annotations or labels func GetJobNameOfPod(pod *v1.Pod) string { taskName, ok := pod.Labels[ResetTaskNameKey] diff --git a/pkg/device/ascendtolerance.go b/pkg/device/ascendtolerance.go index 45a8f1f3..f5f1e156 100644 --- a/pkg/device/ascendtolerance.go +++ b/pkg/device/ascendtolerance.go @@ -88,7 +88,7 @@ type HotResetTools struct { podIndexer cache.Indexer cmIndexer cache.Indexer jobs map[string]string - cmKeyList []string + cmKeys map[string]string } // NewHotResetManager create HotResetManager and init data @@ -124,7 +124,7 @@ func NewHotResetManager(devUsage string) HotResetManager { common.ResetError: common.ResetErrorLevel, common.IsolateError: common.IsolateErrorLevel, }, - cmKeyList: []string{}, + cmKeys: map[string]string{}, } } @@ -227,12 +227,11 @@ func (hrt *HotResetTools) handlePodAddEvent(obj interface{}) { cmKey := fmt.Sprintf(pod.GetNamespace() + "/" + common.ResetInfoCMNamePrefix + jobName) cm, err := hrt.GetCMFromCache(cmKey) if err != nil { - if common.StrInList(cmKey, hrt.cmKeyList) { - hrt.queue.AddRateLimited(obj) - return + _, ok = hrt.cmKeys[string(pod.UID)] + if !ok { + hwlog.RunLog.Warn(err) + hrt.cmKeys[string(pod.UID)] = "" } - hwlog.RunLog.Warn(err) - hrt.cmKeyList = append(hrt.cmKeyList, cmKey) hrt.queue.AddRateLimited(obj) return } @@ -249,6 +248,12 @@ func (hrt *HotResetTools) handlePodDeleteEvent(obj interface{}) { return } hwlog.RunLog.Debugf("handle pod(%s) delete event", event.Key) + pod, err := hrt.getPodFromCache(event.Key) + if err != nil { + hwlog.RunLog.Warn(err) + hrt.queue.AddRateLimited(obj) + return + } jobName, ok := hrt.jobs[event.Key] if !ok { hwlog.RunLog.Errorf("job of pod(%s) not found in cache", event.Key) @@ -262,8 +267,9 @@ func (hrt *HotResetTools) handlePodDeleteEvent(obj interface{}) { return } namespace := keySlice[0] - cmKey := fmt.Sprintf(namespace + "/" + common.ResetInfoCMNamePrefix + jobName) - common.DeleteStrFromList(cmKey, hrt.cmKeyList) + if _, ok = hrt.cmKeys[string(pod.UID)]; ok { + delete(hrt.cmKeys, string(pod.UID)) + } rmErr := common.RemoveFileAndDir(namespace, common.ResetInfoCMNamePrefix+jobName) if rmErr != nil { hwlog.RunLog.Errorf("Failed to remove file: %v", rmErr) -- Gitee From 22c8931ec2bdcf5b7abd5201a32dd9c926477712 Mon Sep 17 00:00:00 2001 From: wangzihao Date: Tue, 23 Jul 2024 16:53:41 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E6=97=A5=E5=BF=97reset-con?= =?UTF-8?q?fig=E6=97=A5=E5=BF=97=E5=88=B7=E5=B1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/device/ascendtolerance.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/device/ascendtolerance.go b/pkg/device/ascendtolerance.go index f5f1e156..988d6009 100644 --- a/pkg/device/ascendtolerance.go +++ b/pkg/device/ascendtolerance.go @@ -88,7 +88,7 @@ type HotResetTools struct { podIndexer cache.Indexer cmIndexer cache.Indexer jobs map[string]string - cmKeys map[string]string + noResetCmPodUIDs map[string]string } // NewHotResetManager create HotResetManager and init data @@ -124,7 +124,7 @@ func NewHotResetManager(devUsage string) HotResetManager { common.ResetError: common.ResetErrorLevel, common.IsolateError: common.IsolateErrorLevel, }, - cmKeys: map[string]string{}, + noResetCmPodUIDs: map[string]string{}, } } @@ -227,10 +227,10 @@ func (hrt *HotResetTools) handlePodAddEvent(obj interface{}) { cmKey := fmt.Sprintf(pod.GetNamespace() + "/" + common.ResetInfoCMNamePrefix + jobName) cm, err := hrt.GetCMFromCache(cmKey) if err != nil { - _, ok = hrt.cmKeys[string(pod.UID)] + _, ok = hrt.noResetCmPodUIDs[string(pod.UID)] if !ok { hwlog.RunLog.Warn(err) - hrt.cmKeys[string(pod.UID)] = "" + hrt.noResetCmPodUIDs[string(pod.UID)] = "" } hrt.queue.AddRateLimited(obj) return @@ -267,8 +267,8 @@ func (hrt *HotResetTools) handlePodDeleteEvent(obj interface{}) { return } namespace := keySlice[0] - if _, ok = hrt.cmKeys[string(pod.UID)]; ok { - delete(hrt.cmKeys, string(pod.UID)) + if _, ok = hrt.noResetCmPodUIDs[string(pod.UID)]; ok { + delete(hrt.noResetCmPodUIDs, string(pod.UID)) } rmErr := common.RemoveFileAndDir(namespace, common.ResetInfoCMNamePrefix+jobName) if rmErr != nil { -- Gitee From 0f36f8359b27aee0a014dcd9b2285754caada22b Mon Sep 17 00:00:00 2001 From: wangzihao Date: Tue, 23 Jul 2024 17:07:57 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E6=97=A5=E5=BF=97reset-con?= =?UTF-8?q?fig=E6=97=A5=E5=BF=97=E5=88=B7=E5=B1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/device/ascendtorlerance_test.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/device/ascendtorlerance_test.go b/pkg/device/ascendtorlerance_test.go index f1a593cf..8fa522ca 100644 --- a/pkg/device/ascendtorlerance_test.go +++ b/pkg/device/ascendtorlerance_test.go @@ -1212,11 +1212,12 @@ func TestHandleCMDeleteEvent(t *testing.T) { func newHotResetTools() *HotResetTools { return &HotResetTools{ - ringNum: common.Ascend910RingsNum, - resetTask: map[string]struct{}{}, - resetDev: map[int32]struct{}{}, - faultDev2PodMap: map[int32]v1.Pod{}, - jobs: map[string]string{}, + ringNum: common.Ascend910RingsNum, + resetTask: map[string]struct{}{}, + resetDev: map[int32]struct{}{}, + faultDev2PodMap: map[int32]v1.Pod{}, + jobs: map[string]string{}, + noResetCmPodUIDs: map[string]string{}, processPolicyTable: map[string]int{ common.EmptyError: common.EmptyErrorLevel, common.IgnoreError: common.IgnoreErrorLevel, -- Gitee From 36f4f68bf5d6aa2f98e70bf915be5b268824aea7 Mon Sep 17 00:00:00 2001 From: wangzihao Date: Wed, 24 Jul 2024 10:18:29 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E6=97=A5=E5=BF=97reset-con?= =?UTF-8?q?fig=E6=97=A5=E5=BF=97=E5=88=B7=E5=B1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/device/ascendtolerance.go | 28 +++++++++++----------------- pkg/device/ascendtorlerance_test.go | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pkg/device/ascendtolerance.go b/pkg/device/ascendtolerance.go index 988d6009..045f42f0 100644 --- a/pkg/device/ascendtolerance.go +++ b/pkg/device/ascendtolerance.go @@ -88,7 +88,7 @@ type HotResetTools struct { podIndexer cache.Indexer cmIndexer cache.Indexer jobs map[string]string - noResetCmPodUIDs map[string]string + noResetCmPodKeys map[string]string } // NewHotResetManager create HotResetManager and init data @@ -110,11 +110,12 @@ func NewHotResetManager(devUsage string) HotResetManager { return nil } return &HotResetTools{ - ringNum: ringNumber, - resetTask: map[string]struct{}{}, - resetDev: map[int32]struct{}{}, - faultDev2PodMap: map[int32]v1.Pod{}, - jobs: map[string]string{}, + ringNum: ringNumber, + resetTask: map[string]struct{}{}, + resetDev: map[int32]struct{}{}, + faultDev2PodMap: map[int32]v1.Pod{}, + jobs: map[string]string{}, + noResetCmPodKeys: map[string]string{}, processPolicyTable: map[string]int{ common.EmptyError: common.EmptyErrorLevel, common.IgnoreError: common.IgnoreErrorLevel, @@ -124,7 +125,6 @@ func NewHotResetManager(devUsage string) HotResetManager { common.ResetError: common.ResetErrorLevel, common.IsolateError: common.IsolateErrorLevel, }, - noResetCmPodUIDs: map[string]string{}, } } @@ -227,10 +227,10 @@ func (hrt *HotResetTools) handlePodAddEvent(obj interface{}) { cmKey := fmt.Sprintf(pod.GetNamespace() + "/" + common.ResetInfoCMNamePrefix + jobName) cm, err := hrt.GetCMFromCache(cmKey) if err != nil { - _, ok = hrt.noResetCmPodUIDs[string(pod.UID)] + _, ok = hrt.noResetCmPodKeys[event.Key] if !ok { hwlog.RunLog.Warn(err) - hrt.noResetCmPodUIDs[string(pod.UID)] = "" + hrt.noResetCmPodKeys[event.Key] = "" } hrt.queue.AddRateLimited(obj) return @@ -248,11 +248,8 @@ func (hrt *HotResetTools) handlePodDeleteEvent(obj interface{}) { return } hwlog.RunLog.Debugf("handle pod(%s) delete event", event.Key) - pod, err := hrt.getPodFromCache(event.Key) - if err != nil { - hwlog.RunLog.Warn(err) - hrt.queue.AddRateLimited(obj) - return + if _, ok = hrt.noResetCmPodKeys[event.Key]; ok { + delete(hrt.noResetCmPodKeys, event.Key) } jobName, ok := hrt.jobs[event.Key] if !ok { @@ -267,9 +264,6 @@ func (hrt *HotResetTools) handlePodDeleteEvent(obj interface{}) { return } namespace := keySlice[0] - if _, ok = hrt.noResetCmPodUIDs[string(pod.UID)]; ok { - delete(hrt.noResetCmPodUIDs, string(pod.UID)) - } rmErr := common.RemoveFileAndDir(namespace, common.ResetInfoCMNamePrefix+jobName) if rmErr != nil { hwlog.RunLog.Errorf("Failed to remove file: %v", rmErr) diff --git a/pkg/device/ascendtorlerance_test.go b/pkg/device/ascendtorlerance_test.go index 8fa522ca..8932d315 100644 --- a/pkg/device/ascendtorlerance_test.go +++ b/pkg/device/ascendtorlerance_test.go @@ -1217,7 +1217,7 @@ func newHotResetTools() *HotResetTools { resetDev: map[int32]struct{}{}, faultDev2PodMap: map[int32]v1.Pod{}, jobs: map[string]string{}, - noResetCmPodUIDs: map[string]string{}, + noResetCmPodKeys: map[string]string{}, processPolicyTable: map[string]int{ common.EmptyError: common.EmptyErrorLevel, common.IgnoreError: common.IgnoreErrorLevel, -- Gitee