From 3fc73ca1bbf87cc3b7533e0789071b8e2d03315d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=9F=E7=95=85?= <631866165@qq.com> Date: Mon, 20 May 2019 12:52:18 +0800 Subject: [PATCH 1/3] new dir python --- python/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/.keep diff --git a/python/.keep b/python/.keep new file mode 100644 index 0000000..e69de29 -- Gitee From 91343c94cf1d49ade0d5a9cab71b86552d66cb6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=9F=E7=95=85?= <631866165@qq.com> Date: Mon, 20 May 2019 12:55:04 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E9=BB=98=E8=AE=A4=E5=8F=82=E6=95=B0?= =?UTF-8?q?=E8=A6=81=E8=AE=BE=E7=BD=AE=E4=B8=BA=E5=8F=AF=E5=8F=98=E5=AF=B9?= =?UTF-8?q?=E8=B1=A1=E7=9A=84=E5=AF=BC=E8=87=B4representatives=E4=B8=80?= =?UTF-8?q?=E7=9B=B4=E8=A2=AB=E5=BE=AA=E7=8E=AF=E5=88=A9=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/python.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 python/python.md diff --git a/python/python.md b/python/python.md new file mode 100644 index 0000000..36b9321 --- /dev/null +++ b/python/python.md @@ -0,0 +1,54 @@ +```python +def train_knnm(x, labels, erd, representatives=list()): + states = np.zeros(len(x)) # Mark all the instance as 'not covered' + distance_matrix = get_distance_matrix(x) + not_covered = get_not_covered(states) + + # while there are examples to group + while len(not_covered) > 0: + max_neighbourhood = list() + global min_md + tuple_max_neighbourhood = None + for i in not_covered: + # get distance from i to all other tuples + distances = distance_matrix[i] + + # sort distances + sorted_distances = [d for d in sorted(enumerate(distances), key=lambda a: a[1])] + + # filter only those which has not been yet covered + sorted_distances = [d for d in sorted_distances if states[d[0]] == 0] + + # compute neighbourhood + q = 0 + neighbourhood = list() + errors = 0 + while q < len(sorted_distances) and (labels[sorted_distances[q][0]] == labels[i] or errors < erd):#根绝容忍度等判断簇是否继续扩散 + neighbourhood.append(sorted_distances[q][0]) + if labels[sorted_distances[q][0]] != labels[i]: + errors += 1 + q += 1 + + if len(neighbourhood) > len(max_neighbourhood):#找当前最大的簇 + max_neighbourhood = neighbourhood + tuple_max_neighbourhood = i + # add representative + # representatives format (rep(di), all_tuples in neighbourhood, class(di), Sim(di)) + rep = (tuple_max_neighbourhood, x[tuple_max_neighbourhood]) + num = max_neighbourhood + cls = labels[tuple_max_neighbourhood] + sim = distance_matrix[tuple_max_neighbourhood, max_neighbourhood[-1]] + lay = 0 + representatives.append([rep, num, cls, sim, lay])#当前最大的簇形成新簇 + if len(num)/sim Date: Mon, 20 May 2019 13:00:46 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20python.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/python.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/python.md b/python/python.md index 36b9321..1789062 100644 --- a/python/python.md +++ b/python/python.md @@ -1,3 +1,5 @@ +方法的默认参数被设置为可变对象,导致调用方法时参数没有初始化,被反复利用 + ```python def train_knnm(x, labels, erd, representatives=list()): states = np.zeros(len(x)) # Mark all the instance as 'not covered' @@ -23,12 +25,11 @@ def train_knnm(x, labels, erd, representatives=list()): q = 0 neighbourhood = list() errors = 0 - while q < len(sorted_distances) and (labels[sorted_distances[q][0]] == labels[i] or errors < erd):#根绝容忍度等判断簇是否继续扩散 + while q < len(sorted_distances) and (labels[sorted_distances[q][0]] == labels[i] or errors < erd): neighbourhood.append(sorted_distances[q][0]) if labels[sorted_distances[q][0]] != labels[i]: errors += 1 q += 1 - if len(neighbourhood) > len(max_neighbourhood):#找当前最大的簇 max_neighbourhood = neighbourhood tuple_max_neighbourhood = i -- Gitee