diff --git a/pkg/common/file_manager.go b/pkg/common/file_manager.go index aa3b0cb50823f61e00814978895599de726faa88..6d45f6b393ef770d6dfcfd1633d8f987e6fb6e17 100644 --- a/pkg/common/file_manager.go +++ b/pkg/common/file_manager.go @@ -47,7 +47,6 @@ func WriteToFile(info, path string) error { // RemoveFileAndDir remove file and dir func RemoveFileAndDir(namespace, name string) error { file := GenResetFileName(namespace, name) - hwlog.RunLog.Infof("delete cm(%s) file(%s)", name, file) rmErr := os.Remove(file) if rmErr != nil { return fmt.Errorf("failed to remove file(%s): %v", file, rmErr) @@ -57,6 +56,7 @@ func RemoveFileAndDir(namespace, name string) error { if err != nil { return fmt.Errorf("failed to remove dir(%s): %v", dir, err) } + hwlog.RunLog.Infof("delete cm(%s) file(%s)", name, file) return nil } diff --git a/pkg/device/ascendtolerance.go b/pkg/device/ascendtolerance.go index bcf3b04164b78832a9692758639f810923b46646..045f42f0718be385a913ba7c179201e684b75662 100644 --- a/pkg/device/ascendtolerance.go +++ b/pkg/device/ascendtolerance.go @@ -88,6 +88,7 @@ type HotResetTools struct { podIndexer cache.Indexer cmIndexer cache.Indexer jobs map[string]string + noResetCmPodKeys map[string]string } // NewHotResetManager create HotResetManager and init data @@ -109,11 +110,12 @@ func NewHotResetManager(devUsage string) HotResetManager { return nil } return &HotResetTools{ - ringNum: ringNumber, - resetTask: map[string]struct{}{}, - resetDev: map[int32]struct{}{}, - faultDev2PodMap: map[int32]v1.Pod{}, - jobs: map[string]string{}, + ringNum: ringNumber, + resetTask: map[string]struct{}{}, + resetDev: map[int32]struct{}{}, + faultDev2PodMap: map[int32]v1.Pod{}, + jobs: map[string]string{}, + noResetCmPodKeys: map[string]string{}, processPolicyTable: map[string]int{ common.EmptyError: common.EmptyErrorLevel, common.IgnoreError: common.IgnoreErrorLevel, @@ -162,7 +164,7 @@ func (hrt *HotResetTools) run() { } func (hrt *HotResetTools) processNextWorkItem() bool { - hwlog.RunLog.Infof("queue length: %d", hrt.queue.Len()) + hwlog.RunLog.Debugf("queue length: %d", hrt.queue.Len()) obj, shutdown := hrt.queue.Get() if shutdown { hwlog.RunLog.Errorf("shutdown, stop processing work queue") @@ -208,7 +210,7 @@ func (hrt *HotResetTools) handlePodAddEvent(obj interface{}) { hwlog.RunLog.Errorf("get kubeclient event error") return } - hwlog.RunLog.Infof("handle pod(%s) %s event", event.Key, event.Type) + hwlog.RunLog.Debugf("handle pod(%s) %s event", event.Key, event.Type) pod, err := hrt.getPodFromCache(event.Key) if err != nil { hwlog.RunLog.Warn(err) @@ -222,9 +224,14 @@ func (hrt *HotResetTools) handlePodAddEvent(obj interface{}) { return } hrt.jobs[event.Key] = jobName - cm, err := hrt.GetCMFromCache(pod.GetNamespace() + "/" + common.ResetInfoCMNamePrefix + jobName) + cmKey := fmt.Sprintf(pod.GetNamespace() + "/" + common.ResetInfoCMNamePrefix + jobName) + cm, err := hrt.GetCMFromCache(cmKey) if err != nil { - hwlog.RunLog.Warn(err) + _, ok = hrt.noResetCmPodKeys[event.Key] + if !ok { + hwlog.RunLog.Warn(err) + hrt.noResetCmPodKeys[event.Key] = "" + } hrt.queue.AddRateLimited(obj) return } @@ -241,6 +248,9 @@ func (hrt *HotResetTools) handlePodDeleteEvent(obj interface{}) { return } hwlog.RunLog.Debugf("handle pod(%s) delete event", event.Key) + if _, ok = hrt.noResetCmPodKeys[event.Key]; ok { + delete(hrt.noResetCmPodKeys, event.Key) + } jobName, ok := hrt.jobs[event.Key] if !ok { hwlog.RunLog.Errorf("job of pod(%s) not found in cache", event.Key) diff --git a/pkg/device/ascendtorlerance_test.go b/pkg/device/ascendtorlerance_test.go index f1a593cf5054359fa31549d7e512916be8c3a45e..8932d31530eeb96ffa84b7a4f25a2fdbc54e4799 100644 --- a/pkg/device/ascendtorlerance_test.go +++ b/pkg/device/ascendtorlerance_test.go @@ -1212,11 +1212,12 @@ func TestHandleCMDeleteEvent(t *testing.T) { func newHotResetTools() *HotResetTools { return &HotResetTools{ - ringNum: common.Ascend910RingsNum, - resetTask: map[string]struct{}{}, - resetDev: map[int32]struct{}{}, - faultDev2PodMap: map[int32]v1.Pod{}, - jobs: map[string]string{}, + ringNum: common.Ascend910RingsNum, + resetTask: map[string]struct{}{}, + resetDev: map[int32]struct{}{}, + faultDev2PodMap: map[int32]v1.Pod{}, + jobs: map[string]string{}, + noResetCmPodKeys: map[string]string{}, processPolicyTable: map[string]int{ common.EmptyError: common.EmptyErrorLevel, common.IgnoreError: common.IgnoreErrorLevel,