From 102caa0db4dca4b896084353b6d50b68e5878582 Mon Sep 17 00:00:00 2001 From: Light-Alex <245212467@qq.com> Date: Tue, 16 Jul 2024 10:04:49 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?= =?UTF-8?q?=20Modification=E3=80=91=E6=94=AF=E6=8C=81=E8=87=AA=E5=AE=9A?= =?UTF-8?q?=E4=B9=89=E6=95=85=E9=9A=9C=E8=B6=85=E6=97=B6=E5=8A=A8=E6=80=81?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=20=E3=80=90=E4=BF=AE=E6=94=B9=E4=BA=BA=20Mod?= =?UTF-8?q?ifier=E3=80=91yanchuanxiang?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/common/constants.go | 8 + pkg/common/fault_code.go | 160 ++++++++++---- pkg/common/fault_code_test.go | 382 +++++++++++++++++++++++++++++++++- pkg/server/manager.go | 3 +- 4 files changed, 504 insertions(+), 49 deletions(-) diff --git a/pkg/common/constants.go b/pkg/common/constants.go index 075144a4..f45b6c94 100644 --- a/pkg/common/constants.go +++ b/pkg/common/constants.go @@ -670,6 +670,14 @@ const ( MinLinkDownTimeout = 1 // MaxLinkDownTimeout is the max time for the linkdown event MaxLinkDownTimeout = 30 + // MaxFaultTimeout is the max time(s) for the fault duration time of fault duration + MaxFaultTimeout = 600 + // MinFaultTimeout is the min time(s) for the fault duration time of fault duration + MinFaultTimeout = 0 + // MaxRecoverTimeout is the max time(s) for the fault recover duration time of fault duration + MaxRecoverTimeout = 600 + // MinRecoverTimeout is the min time(s) for the fault recover duration time of fault duration + MinRecoverTimeout = 0 // DefaultSubscribeToPollingTime is the default time from subscribe to polling DefaultSubscribeToPollingTime = 5 // MaxLogicID is the maximum logic ID diff --git a/pkg/common/fault_code.go b/pkg/common/fault_code.go index 7cc4fc21..6837c1df 100644 --- a/pkg/common/fault_code.go +++ b/pkg/common/fault_code.go @@ -106,9 +106,12 @@ var ( manuallySeparateNpuMapLock sync.Mutex // manuallySeparateNpuMap manually separate npu info cache manuallySeparateNpuMap = make(map[int32]ManuallyFaultInfo, GeneralMapSize) - // faultTypeSet is a set that contains all the fault level - faultTypeSet = sets.NewString(NotHandleFault, RestartRequest, RestartBusiness, FreeRestartNPU, + // FaultTypeSet is a set that contains all the fault level + FaultTypeSet = sets.NewString(NotHandleFault, RestartRequest, RestartBusiness, FreeRestartNPU, RestartNPU, PreSeparateNPU, SeparateNPU, ManuallySeparateNPU) + // FaultDurationTypeSet is a set that contains all the fault Duration level + FaultDurationTypeSet = sets.NewString(NotHandleFault, RestartRequest, RestartBusiness, FreeRestartNPU, + RestartNPU, PreSeparateNPU, SeparateNPU) ) // fault customization @@ -127,7 +130,11 @@ var ( linkDownTimeoutCustomization = ParamOption.LinkdownTimeout // linkUpTimeoutCustomization is the customized timeout for link up event linkUpTimeoutCustomization = int64(DefaultLinkUpTimeout) - faultSeverityMap = make(map[int64]int8, common.MaxErrorCodeCount) + // faultDurationMap is the cache saving the occur duration of a fault, key is event id + faultDurationMap = make(map[string]*FaultDurationCache, common.MaxErrorCodeCount) + // faultDurationMapLock is the lock of faultDurationMap + faultDurationMapLock sync.Mutex + faultSeverityMap = make(map[int64]int8, common.MaxErrorCodeCount) ) // ManuallyFaultInfo save the info of ManuallySeparateNPU @@ -205,6 +212,21 @@ type FaultDurationCustomization struct { FaultDuration } +// FaultDurationCache is the cache saving the FaultDuration +type FaultDurationCache struct { + // key: logicID, value: fault duration data + Duration map[int32]FaultDurationData + FaultDuration +} + +// FaultDurationData saved data during fault duration statistics +type FaultDurationData struct { + TimeoutStatus bool + FaultEventQueue []common.DevFaultInfo + FaultDurationTime int64 + FaultRecoverDurationTime int64 +} + // FaultDuration is the base info of fault duration type FaultDuration struct { FaultTimeout int64 @@ -247,14 +269,6 @@ func LoadFaultCodeFromFile() error { return LoadFaultCode(faultCodeBytes) } -// ResetFaultCustomization clears the cache -func ResetFaultCustomization() { - hwlog.RunLog.Debug("reset fault customization, will clear cache") - faultFrequencyMapLock.Lock() - faultFrequencyMap = make(map[string]*FaultFrequencyCache, GeneralMapSize) - faultFrequencyMapLock.Unlock() -} - // LoadFaultCustomizationFromFile load fault customization from faultCustomization.json func LoadFaultCustomizationFromFile() error { faultCodeBytes, err := utils.LoadFile(faultCustomizationFilePath) @@ -267,6 +281,17 @@ func LoadFaultCustomizationFromFile() error { return nil } +// ResetFaultCustomizationCache reset fault customization cache +func ResetFaultCustomizationCache() { + hwlog.RunLog.Debug("reset fault customization, fault customization cache will be cleared") + faultFrequencyMapLock.Lock() + faultFrequencyMap = make(map[string]*FaultFrequencyCache, common.MaxErrorCodeCount) + faultFrequencyMapLock.Unlock() + faultDurationMapLock.Lock() + faultDurationMap = make(map[string]*FaultDurationCache, common.MaxErrorCodeCount) + faultDurationMapLock.Unlock() +} + // LoadFaultCode loads the fault codes func LoadFaultCode(faultCodeBytes []byte) error { var fileInfo faultFileInfo @@ -302,43 +327,60 @@ func LoadFaultCustomization(faultCustomizationByte []byte) error { } func loadFaultDurationCustomization(customization []FaultDurationCustomization) { + handledEventId := make(sets.String, common.MaxErrorCodeCount) for _, cus := range customization { + if !validateFaultDurationCustomization(cus) { + continue + } for _, id := range cus.EventId { - if id != LinkDownFaultCodeStr { - hwlog.RunLog.Warnf("FaultDuration only support network fault(%s) now, skip event id %s", - LinkDownFaultCodeStr, id) + id = strings.ToLower(id) + if handledEventId.Has(id) { + hwlog.RunLog.Warnf("duplicated event id detected when handling FaultDuration, skip, "+ + "event id: %s", id) continue } - if cus.FaultTimeout < MinLinkDownTimeout || cus.FaultTimeout > MaxLinkDownTimeout { - linkDownTimeoutCustomization = ParamOption.LinkdownTimeout - hwlog.RunLog.Errorf("LinkDownTimeout exceed limit(%d~%d), use default(%d)", - MinLinkDownTimeout, MaxLinkDownTimeout, ParamOption.LinkdownTimeout) - } else { - linkDownTimeoutCustomization = cus.FaultTimeout - hwlog.RunLog.Debugf("modify LinkDownTimeout success: %d", cus.FaultTimeout) - } - if cus.RecoverTimeout < MinLinkUpTimeout || cus.RecoverTimeout > MaxLinkUpTimeout { - linkUpTimeoutCustomization = DefaultLinkUpTimeout - hwlog.RunLog.Errorf("LinkUpTimeout exceed limit(%d~%d), use default(%d)", - MinLinkUpTimeout, MaxLinkUpTimeout, DefaultLinkUpTimeout) + handledEventId.Insert(id) + if cache, ok := faultDurationMap[id]; ok { + cache.FaultTimeout = cus.FaultTimeout + cache.RecoverTimeout = cus.RecoverTimeout + cache.FaultHandling = cus.FaultHandling + hwlog.RunLog.Debugf("update FaultDuration for event id %s success, FaultTimeout: %d, "+ + "RecoverTimeout: %d, FaultHandling: %s", id, cus.FaultTimeout, cus.RecoverTimeout, + cus.FaultHandling) } else { - linkUpTimeoutCustomization = cus.RecoverTimeout - hwlog.RunLog.Debugf("modify LinkUpTimeout success: %d", cus.RecoverTimeout) + faultDurationMap[id] = &FaultDurationCache{ + Duration: make(map[int32]FaultDurationData, GeneralMapSize), + FaultDuration: FaultDuration{ + FaultTimeout: cus.FaultTimeout, + RecoverTimeout: cus.RecoverTimeout, + FaultHandling: cus.FaultHandling, + }, + } + hwlog.RunLog.Debugf("insert FaultDuration for event id %s success, FaultTimeout: %d, "+ + "RecoverTimeout: %d, FaultHandling: %s", id, cus.FaultTimeout, cus.RecoverTimeout, + cus.FaultHandling) } - return } } - linkUpTimeoutCustomization = DefaultLinkUpTimeout - linkDownTimeoutCustomization = ParamOption.LinkdownTimeout - hwlog.RunLog.Infof("did not find network fault timeout customization, use default LinkDownTimeout: %d, "+ - "LinkupTimeout: %d", ParamOption.LinkdownTimeout, DefaultLinkUpTimeout) + // delete event id those in cache but not in CM + cachedEventIds := make([]string, 0, len(faultDurationMap)) + for k := range faultDurationMap { + cachedEventIds = append(cachedEventIds, k) + } + for _, cachedId := range cachedEventIds { + if !handledEventId.Has(cachedId) && len(cachedId) != 0 { + delete(faultDurationMap, cachedId) + hwlog.RunLog.Infof("delete FaultDuration for event id %s", cachedId) + } + } } func loadGraceToleranceCustomization(customization GraceToleranceCustomization) { if customization.WaitDeviceResetTime < MinWaitDeviceResetTime || customization.WaitDeviceResetTime > MaxWaitDeviceResetTime { - hwlog.RunLog.Errorf("WaitDeviceResetTime exceed limit(%d~%d), use default(%d)", - MinWaitDeviceResetTime, MaxWaitDeviceResetTime, DefaultWaitDeviceResetTime) + hwlog.RunLog.Errorf("WaitDeviceResetTime(%d) exceed limit(%d~%d), use default(%d)", + customization.WaitDeviceResetTime, MinWaitDeviceResetTime, + MaxWaitDeviceResetTime, DefaultWaitDeviceResetTime) WaitDeviceResetTime = DefaultWaitDeviceResetTime } else { hwlog.RunLog.Debugf("modify WaitDeviceResetTime(%d) success", customization.WaitDeviceResetTime) @@ -346,8 +388,9 @@ func loadGraceToleranceCustomization(customization GraceToleranceCustomization) } if customization.WaitProcessReadCMTime < MinWaitProcessReadCMTime || customization. WaitProcessReadCMTime > MaxWaitProcessReadCMTime { - hwlog.RunLog.Errorf("WaitProcessReadCMTime exceed limit(%d~%d), use default(%d)", - MinWaitProcessReadCMTime, MaxWaitProcessReadCMTime, DefaultProcessReadCMTime) + hwlog.RunLog.Errorf("WaitProcessReadCMTime(%d) exceed limit(%d~%d), use default(%d)", + customization.WaitProcessReadCMTime, MinWaitProcessReadCMTime, + MaxWaitProcessReadCMTime, DefaultProcessReadCMTime) WaitProcessReadCMTime = DefaultProcessReadCMTime } else { hwlog.RunLog.Debugf("modify WaitProcessReadCMTime(%d) success", customization.WaitProcessReadCMTime) @@ -355,8 +398,9 @@ func loadGraceToleranceCustomization(customization GraceToleranceCustomization) } if customization.WaitFaultSelfHealingTime < MinWaitFaultSelfHealingTime || time.Duration(customization.WaitFaultSelfHealingTime) > MaxWaitFaultSelfHealingTime { - hwlog.RunLog.Errorf("WaitFaultSelfHealingTime exceed limit(%d~%d), use default(%d)", - MinWaitFaultSelfHealingTime, MaxWaitFaultSelfHealingTime, DefaultWaitFaultSelfHealingTime) + hwlog.RunLog.Errorf("WaitFaultSelfHealingTime(%d) exceed limit(%d~%d), use default(%d)", + customization.WaitFaultSelfHealingTime, + MinWaitFaultSelfHealingTime, WaitProcessReadCMTime, DefaultWaitFaultSelfHealingTime) WaitFaultSelfHealingTime = DefaultWaitFaultSelfHealingTime } else { hwlog.RunLog.Debugf("modify WaitFaultSelfHealingTime(%d) success", customization.WaitFaultSelfHealingTime) @@ -375,7 +419,8 @@ func loadFaultFrequencyCustomization(customizations []FaultFrequencyCustomizatio for _, id := range cus.EventId { id = strings.ToLower(id) if handledEventId.Has(id) { - hwlog.RunLog.Warnf("duplicated event id detected when handling FaultFrequency, skip, id: %s", id) + hwlog.RunLog.Warnf("duplicated event id detected when handling FaultFrequency, "+ + "skip, event id: %s", id) continue } handledEventId.Insert(id) @@ -445,9 +490,36 @@ func validateFaultFrequencyCustomization(customization FaultFrequencyCustomizati customization.Times, MinFaultFrequencyTimes, MaxFaultFrequencyTimes) return false } - if !faultTypeSet.Has(customization.FaultHandling) { - hwlog.RunLog.Warnf("FaultHandling(%s) in this FaultFrequency is unrecognized, skip", - customization.FaultHandling) + if !FaultTypeSet.Has(customization.FaultHandling) { + hwlog.RunLog.Warnf("FaultHandling(%s) in this FaultFrequency is unrecognized, skip. "+ + "The supported range of FaultHandling in this FaultFrequency is %v", + customization.FaultHandling, FaultTypeSet.List()) + return false + } + return true +} + +func validateFaultDurationCustomization(faultDurationCustomization FaultDurationCustomization) bool { + if len(faultDurationCustomization.EventId) == 0 { + hwlog.RunLog.Warnf("empty event id in this FaultDuration, skip") + return false + } + if faultDurationCustomization.FaultTimeout > MaxFaultTimeout || + faultDurationCustomization.FaultTimeout < MinFaultTimeout { + hwlog.RunLog.Warnf("FaultTimeout(%d) in this FaultDuration exceeds limit(%d~%d), skip", + faultDurationCustomization.FaultTimeout, MinFaultTimeout, MaxFaultTimeout) + return false + } + if faultDurationCustomization.RecoverTimeout > MaxRecoverTimeout || + faultDurationCustomization.RecoverTimeout < MinRecoverTimeout { + hwlog.RunLog.Warnf("RecoverTimeout(%d) in this FaultDuration exceeds limit(%d~%d), skip", + faultDurationCustomization.RecoverTimeout, MinRecoverTimeout, MaxRecoverTimeout) + return false + } + if !FaultDurationTypeSet.Has(faultDurationCustomization.FaultHandling) { + hwlog.RunLog.Warnf("FaultHandling(%s) in this FaultDuration is unrecognized, skip. "+ + "The supported range of FaultHandling in this FaultDuration is %v", + faultDurationCustomization.FaultHandling, FaultDurationTypeSet.List()) return false } return true @@ -478,7 +550,7 @@ func GetNetworkFaultTypeByCode(faultCodes []string) string { // GetFaultType will return the fault type from fault codes, fault frequency and ManuallySeparateNPU cache func GetFaultType(faultCodes []int64, logicId int32) string { - faultTypes := make([]string, 0, len(faultTypeSet)) + faultTypes := make([]string, 0, len(FaultTypeSet)) faultTypes = append(faultTypes, GetFaultTypeByCode(faultCodes)) faultTypes = append(faultTypes, GetFaultTypeFromFaultFrequency(logicId)) if QueryManuallyFaultInfoByLogicID(logicId) { diff --git a/pkg/common/fault_code_test.go b/pkg/common/fault_code_test.go index 20ca49b6..3ba872e2 100644 --- a/pkg/common/fault_code_test.go +++ b/pkg/common/fault_code_test.go @@ -18,7 +18,9 @@ package common import ( "encoding/json" "errors" + "strings" "testing" + "time" "github.com/agiledragon/gomonkey/v2" "github.com/smartystreets/goconvey/convey" @@ -606,12 +608,34 @@ func TestLinkDownTimeoutCheckCase05(t *testing.T) { }) } -// TestResetFaultCustomization for test ResetFaultCustomization -func TestResetFaultCustomization(t *testing.T) { - convey.Convey("test ResetFaultCustomization success", t, func() { +// TestResetFaultCustomizationCache for test ResetFaultCustomizationCache +func TestResetFaultCustomizationCache(t *testing.T) { + convey.Convey("test ResetFaultCustomizationCache success", t, func() { + faultFrequencyMap = map[string]*FaultFrequencyCache{ + "80E18005": { + Frequency: make(map[int32][]int64, GeneralMapSize), + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 2, + FaultHandling: ManuallySeparateNPU, + }, + }, + } + faultDurationMap = map[string]*FaultDurationCache{ + "81078603": { + Duration: make(map[int32]FaultDurationData, GeneralMapSize), + FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU, + }, + }, + } + expectVal := 0 - ResetFaultCustomization() + ResetFaultCustomizationCache() convey.So(len(faultFrequencyMap), convey.ShouldEqual, expectVal) + convey.So(len(faultDurationMap), convey.ShouldEqual, expectVal) }) } @@ -650,3 +674,353 @@ func TestDeleteManuallyFaultInfo(t *testing.T) { }) }) } + +// TestLoadGraceToleranceCustomization for test loadGraceToleranceCustomization +func TestLoadGraceToleranceCustomization(t *testing.T) { + convey.Convey("test loadGraceToleranceCustomization success", t, func() { + graceToleranceCustomization := GraceToleranceCustomization{ + WaitDeviceResetTime: 150, + WaitProcessReadCMTime: 30, + WaitFaultSelfHealingTime: 15, + } + WaitDeviceResetTime = time.Duration(0) + WaitProcessReadCMTime = time.Duration(0) + WaitFaultSelfHealingTime = time.Duration(0) + loadGraceToleranceCustomization(graceToleranceCustomization) + convey.So(WaitDeviceResetTime, convey.ShouldEqual, 150) + convey.So(WaitProcessReadCMTime, convey.ShouldEqual, 30) + convey.So(WaitFaultSelfHealingTime, convey.ShouldEqual, 15) + }) + + convey.Convey("test loadGraceToleranceCustomization abnormal condition success", t, func() { + graceToleranceCustomization := GraceToleranceCustomization{ + WaitDeviceResetTime: 59, + WaitProcessReadCMTime: 91, + WaitFaultSelfHealingTime: 0, + } + WaitDeviceResetTime = time.Duration(0) + WaitProcessReadCMTime = time.Duration(0) + WaitFaultSelfHealingTime = time.Duration(0) + loadGraceToleranceCustomization(graceToleranceCustomization) + convey.So(WaitDeviceResetTime, convey.ShouldEqual, 150) + convey.So(WaitProcessReadCMTime, convey.ShouldEqual, 30) + convey.So(WaitFaultSelfHealingTime, convey.ShouldEqual, 15) + }) +} + +// TestValidateFaultFrequencyCustomizationPart1 for test validateFaultFrequencyCustomization +func TestValidateFaultFrequencyCustomizationPart1(t *testing.T) { + convey.Convey("test validateFaultFrequencyCustomization success", t, func() { + faultFrequencyCustomization := FaultFrequencyCustomization{ + EventId: []string{"80C98000", "80B78000"}, + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU, + }, + } + result := validateFaultFrequencyCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, true) + }) +} + +// TestValidateFaultFrequencyCustomizationPart2 for test validateFaultFrequencyCustomization +func TestValidateFaultFrequencyCustomizationPart2(t *testing.T) { + convey.Convey("test validateFaultFrequencyCustomization failed case1", t, func() { + faultFrequencyCustomization := FaultFrequencyCustomization{ + EventId: []string{}, + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU, + }, + } + result := validateFaultFrequencyCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) + convey.Convey("test validateFaultFrequencyCustomization failed case2", t, func() { + faultFrequencyCustomization := FaultFrequencyCustomization{ + EventId: []string{"80C98000", "80B78000"}, + FaultFrequency: FaultFrequency{ + TimeWindow: 59, + Times: 3, + FaultHandling: ManuallySeparateNPU, + }, + } + result := validateFaultFrequencyCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) + convey.Convey("test validateFaultFrequencyCustomization failed case3", t, func() { + faultFrequencyCustomization := FaultFrequencyCustomization{ + EventId: []string{"80C98000", "80B78000"}, + FaultFrequency: FaultFrequency{ + TimeWindow: 60, + Times: 0, + FaultHandling: ManuallySeparateNPU, + }, + } + result := validateFaultFrequencyCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) + convey.Convey("test validateFaultFrequencyCustomization failed case4", t, func() { + faultFrequencyCustomization := FaultFrequencyCustomization{ + EventId: []string{"80C98000", "80B78000"}, + FaultFrequency: FaultFrequency{ + TimeWindow: 60, + Times: 2, + FaultHandling: "separatesNPU", + }, + } + result := validateFaultFrequencyCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) +} + +// TestLoadFaultFrequencyCustomizationCase1 for test loadFaultFrequencyCustomization +func TestLoadFaultFrequencyCustomizationCase1(t *testing.T) { + convey.Convey("test loadFaultFrequencyCustomization success case1", t, func() { + faultCode1 := "80C98000" + faultCode2 := "80E18005" + faultFrequencyCustomizations := []FaultFrequencyCustomization{{ + EventId: []string{faultCode1}, FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 2, + FaultHandling: ManuallySeparateNPU, + }}, { + EventId: []string{faultCode2, faultCode1}, FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU}}} + expectVal := map[string]*FaultFrequencyCache{ + strings.ToLower(faultCode1): {Frequency: make(map[int32][]int64, common.MaxErrorCodeCount), + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 2, + FaultHandling: ManuallySeparateNPU}}, + strings.ToLower(faultCode2): {Frequency: make(map[int32][]int64, common.MaxErrorCodeCount), + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU}}} + faultFrequencyMap = make(map[string]*FaultFrequencyCache, common.MaxErrorCodeCount) + loadFaultFrequencyCustomization(faultFrequencyCustomizations) + convey.So(faultFrequencyMap, convey.ShouldResemble, expectVal) + }) +} + +// TestLoadFaultFrequencyCustomizationCase2 for test loadFaultFrequencyCustomization +func TestLoadFaultFrequencyCustomizationCase2(t *testing.T) { + convey.Convey("test loadFaultFrequencyCustomization success case2", t, func() { + faultCode1 := "80C98000" + faultCode2 := "80E18005" + faultFrequencyCustomizations := []FaultFrequencyCustomization{{ + EventId: []string{faultCode1}, FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 0, + FaultHandling: ManuallySeparateNPU, + }}, { + EventId: []string{faultCode2}, FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU}}} + expectVal := map[string]*FaultFrequencyCache{ + strings.ToLower(faultCode2): {Frequency: make(map[int32][]int64, common.MaxErrorCodeCount), + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU}}} + faultFrequencyMap = map[string]*FaultFrequencyCache{ + strings.ToLower(faultCode1): {Frequency: make(map[int32][]int64, common.MaxErrorCodeCount), + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 2, + FaultHandling: ManuallySeparateNPU}}, + strings.ToLower(faultCode2): {Frequency: make(map[int32][]int64, common.MaxErrorCodeCount), + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU}}} + loadFaultFrequencyCustomization(faultFrequencyCustomizations) + convey.So(faultFrequencyMap, convey.ShouldResemble, expectVal) + }) +} + +// TestValidateFaultDurationCustomizationPart1 for test validateFaultDurationCustomization +func TestValidateFaultDurationCustomizationPart1(t *testing.T) { + convey.Convey("test validateFaultFrequencyCustomization success", t, func() { + faultFrequencyCustomization := FaultDurationCustomization{ + EventId: []string{"81078603"}, + FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU, + }, + } + result := validateFaultDurationCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, true) + }) +} + +// TestValidateFaultDurationCustomization for test validateFaultDurationCustomization +func TestValidateFaultDurationCustomizationPart2(t *testing.T) { + convey.Convey("test validateFaultFrequencyCustomization failed case1", t, func() { + faultFrequencyCustomization := FaultDurationCustomization{ + EventId: []string{}, + FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU, + }, + } + result := validateFaultDurationCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) + convey.Convey("test validateFaultFrequencyCustomization failed case2", t, func() { + faultFrequencyCustomization := FaultDurationCustomization{ + EventId: []string{"81078603"}, + FaultDuration: FaultDuration{ + FaultTimeout: -1, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU, + }, + } + result := validateFaultDurationCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) + convey.Convey("test validateFaultFrequencyCustomization failed case3", t, func() { + faultFrequencyCustomization := FaultDurationCustomization{ + EventId: []string{"81078603"}, + FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: -1, + FaultHandling: PreSeparateNPU, + }, + } + result := validateFaultDurationCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) + convey.Convey("test validateFaultFrequencyCustomization failed case4", t, func() { + faultFrequencyCustomization := FaultDurationCustomization{ + EventId: []string{"81078603"}, + FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: 60, + FaultHandling: ManuallySeparateNPU, + }, + } + result := validateFaultDurationCustomization(faultFrequencyCustomization) + convey.So(result, convey.ShouldEqual, false) + }) +} + +// TestLoadFaultDurationCustomizationCase1 for test loadFaultDurationCustomizationCase1 +func TestLoadFaultDurationCustomizationCase1(t *testing.T) { + convey.Convey("test loadFaultDurationCustomizationCase1 success case1", t, func() { + faultCode1 := "81078603" + faultCode2 := "80E0180F" + faultDurationCustomization := []FaultDurationCustomization{{ + EventId: []string{faultCode1}, FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU, + }}, { + EventId: []string{faultCode2, faultCode1}, FaultDuration: FaultDuration{ + FaultTimeout: 120, + RecoverTimeout: 0, + FaultHandling: RestartBusiness}}} + expectVal := map[string]*FaultDurationCache{ + strings.ToLower(faultCode1): {Duration: make(map[int32]FaultDurationData, common.MaxErrorCodeCount), + FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU}}, + strings.ToLower(faultCode2): {Duration: make(map[int32]FaultDurationData, common.MaxErrorCodeCount), + FaultDuration: FaultDuration{ + FaultTimeout: 120, + RecoverTimeout: 0, + FaultHandling: RestartBusiness}}} + faultDurationMap = make(map[string]*FaultDurationCache, common.MaxErrorCodeCount) + loadFaultDurationCustomization(faultDurationCustomization) + convey.So(faultDurationMap, convey.ShouldResemble, expectVal) + }) +} + +// TestLoadFaultDurationCustomizationCase2 for test loadFaultDurationCustomizationCase1 +func TestLoadFaultDurationCustomizationCase2(t *testing.T) { + convey.Convey("test loadFaultDurationCustomizationCase1 success case2", t, func() { + faultCode1 := "81078603" + faultCode2 := "80E0180F" + faultDurationCustomization := []FaultDurationCustomization{{ + EventId: []string{faultCode1}, FaultDuration: FaultDuration{ + FaultTimeout: -1, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU, + }}, { + EventId: []string{faultCode2}, FaultDuration: FaultDuration{ + FaultTimeout: 120, + RecoverTimeout: 0, + FaultHandling: RestartBusiness}}} + expectVal := map[string]*FaultDurationCache{ + strings.ToLower(faultCode2): {Duration: make(map[int32]FaultDurationData, common.MaxErrorCodeCount), + FaultDuration: FaultDuration{ + FaultTimeout: 120, + RecoverTimeout: 0, + FaultHandling: RestartBusiness}}} + faultDurationMap = map[string]*FaultDurationCache{ + strings.ToLower(faultCode1): {Duration: make(map[int32]FaultDurationData, common.MaxErrorCodeCount), + FaultDuration: FaultDuration{ + FaultTimeout: 30, + RecoverTimeout: 60, + FaultHandling: PreSeparateNPU}}, + strings.ToLower(faultCode2): {Duration: make(map[int32]FaultDurationData, common.MaxErrorCodeCount), + FaultDuration: FaultDuration{ + FaultTimeout: 120, + RecoverTimeout: 0, + FaultHandling: RestartBusiness}}} + loadFaultDurationCustomization(faultDurationCustomization) + convey.So(faultDurationMap, convey.ShouldResemble, expectVal) + }) +} + +// TestGetFaultTypeFromFaultFrequency for test GetFaultTypeFromFaultFrequency +func TestGetFaultTypeFromFaultFrequency(t *testing.T) { + convey.Convey("test GetFaultTypeFromFaultFrequency success case1", t, func() { + logicId := int32(0) + faultCode := "80E18005" + faultFrequencyMap = map[string]*FaultFrequencyCache{ + strings.ToLower(faultCode): { + Frequency: map[int32][]int64{ + logicId: {time.Now().Unix() - 2}, + }, + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU, + }, + }, + } + convey.So(GetFaultTypeFromFaultFrequency(logicId), convey.ShouldEqual, NormalNPU) + }) + + convey.Convey("test GetFaultTypeFromFaultFrequency success case2", t, func() { + logicId := int32(0) + faultCode := "80E18005" + faultFrequencyMap = map[string]*FaultFrequencyCache{ + strings.ToLower(faultCode): { + Frequency: map[int32][]int64{ + logicId: {time.Now().Unix() - 10, time.Now().Unix() - 8, time.Now().Unix() - 2}, + }, + FaultFrequency: FaultFrequency{ + TimeWindow: 86400, + Times: 3, + FaultHandling: ManuallySeparateNPU, + }, + }, + } + manuallySeparateNpuMap = make(map[int32]ManuallyFaultInfo, GeneralMapSize) + recoverFaultFrequencyMap = make(map[int32]string, GeneralMapSize) + convey.So(GetFaultTypeFromFaultFrequency(logicId), convey.ShouldEqual, ManuallySeparateNPU) + convey.So(manuallySeparateNpuMap[logicId].FirstHandle, convey.ShouldEqual, true) + convey.So(recoverFaultFrequencyMap[logicId], convey.ShouldEqual, strings.ToLower("80E18005")) + }) +} diff --git a/pkg/server/manager.go b/pkg/server/manager.go index 9bfe7250..ea3de8ff 100644 --- a/pkg/server/manager.go +++ b/pkg/server/manager.go @@ -1041,7 +1041,7 @@ func loadFaultCustomization(configMap *v1.ConfigMap) { if !ok { hwlog.RunLog.Warnf("did not find key(%s) in configmap, "+ "reset fault customization", common.FaultCustomizationKey) - common.ResetFaultCustomization() + common.ResetFaultCustomizationCache() if err := common.LoadFaultCustomizationFromFile(); err != nil { hwlog.RunLog.Errorf("load fault customization from faultCustomization.json failed, err: %v", err) return @@ -1051,6 +1051,7 @@ func loadFaultCustomization(configMap *v1.ConfigMap) { } if err := common.LoadFaultCustomization([]byte(faultCustomization)); err != nil { hwlog.RunLog.Errorf("load fault customization from cm failed, err: %v", err) + common.ResetFaultCustomizationCache() if err = common.LoadFaultCustomizationFromFile(); err != nil { hwlog.RunLog.Errorf("load fault customization from faultCustomization.json failed, err: %v", err) return -- Gitee