Commit 796e014e authored by Laurens D'hooge's avatar Laurens D'hooge
Browse files

python final hierarchy construction script

parent e5e52cf1
from pprint import pprint
import numpy as np
import json
import sys
import os
dataset = sys.argv[1]
to_convert = os.listdir(f'{dataset}/HFBRs-feature-selection/HFBR-calculations')
for tc in to_convert:
hfbr_base = json.load(fp=open(f'{dataset}/HFBRs-feature-selection/HFBR-calculations/{tc}', 'r'))
wilcox_descending_feature_pairs = hfbr_base['wilcox_descending_feature_pairs']
wilcox_p_reject_corrected = hfbr_base['wilcox_p_reject_corrected']
zero_variance_after_balance = hfbr_base['zero_variance_after_balance']
vif_removed = vif_labels = open(f'{dataset}/HFBRs-feature-selection/VIF-calculations/{tc.replace("-HFBR-calculations.json", "-VIF-removal.txt")}', 'r').read().rstrip().split(',')
ctr = {}
hierarchies = []
for combo in zip(wilcox_descending_feature_pairs, wilcox_p_reject_corrected):
hierarchy = [[]]
current_hierarchy_index = 0
for pair_idx, decision in enumerate(combo[1]):
first_feature = combo[0][2*pair_idx]
if first_feature not in ctr:
ctr[first_feature] = [0]*len(combo[1])
second_feature = combo[0][2*pair_idx+1]
if second_feature not in ctr:
ctr[second_feature] = [0]*len(combo[1])
if decision:
hierarchy[current_hierarchy_index].append(first_feature)
ctr[first_feature][current_hierarchy_index] += 1
if pair_idx+1 != len(combo[1]):
hierarchy.append([])
current_hierarchy_index += 1
ctr[second_feature][current_hierarchy_index] += 1
else:
hierarchy[current_hierarchy_index].append(first_feature)
ctr[first_feature][current_hierarchy_index] += 1
ctr[second_feature][current_hierarchy_index] += 1
hierarchies.append(hierarchy)
pprint(hierarchies)
print(ctr)
ctr = {k: np.argmax(np.array(v)) for k, v in ctr.items()}
print(ctr)
argmax_hierarchy = [[]]
prev_v = 0
for k, v in sorted(ctr.items(), key=lambda item: item[1]):
print(k,v)
if v > prev_v:
for i in range(v-prev_v):
argmax_hierarchy.append([])
prev_v = v
argmax_hierarchy[v].append(k)
argmax_hierarchy.append(zero_variance_after_balance)
argmax_hierarchy.append(vif_removed)
pprint(argmax_hierarchy)
with open(f'{dataset}/HFBRs-feature-selection/{tc.replace("-HFBR-calculations.json", "-HFBR.json")}', 'w') as fd:
json.dump(argmax_hierarchy, fd, indent=4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment