Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Laurens D'hooge
clean-ids-collection
Commits
796e014e
Commit
796e014e
authored
Oct 21, 2021
by
Laurens D'hooge
Browse files
python final hierarchy construction script
parent
e5e52cf1
Changes
1
Hide whitespace changes
Inline
Side-by-side
construct_hfbr_from_json_calc.py
0 → 100644
View file @
796e014e
from
pprint
import
pprint
import
numpy
as
np
import
json
import
sys
import
os
dataset
=
sys
.
argv
[
1
]
to_convert
=
os
.
listdir
(
f
'
{
dataset
}
/HFBRs-feature-selection/HFBR-calculations'
)
for
tc
in
to_convert
:
hfbr_base
=
json
.
load
(
fp
=
open
(
f
'
{
dataset
}
/HFBRs-feature-selection/HFBR-calculations/
{
tc
}
'
,
'r'
))
wilcox_descending_feature_pairs
=
hfbr_base
[
'wilcox_descending_feature_pairs'
]
wilcox_p_reject_corrected
=
hfbr_base
[
'wilcox_p_reject_corrected'
]
zero_variance_after_balance
=
hfbr_base
[
'zero_variance_after_balance'
]
vif_removed
=
vif_labels
=
open
(
f
'
{
dataset
}
/HFBRs-feature-selection/VIF-calculations/
{
tc
.
replace
(
"-HFBR-calculations.json"
,
"-VIF-removal.txt"
)
}
'
,
'r'
).
read
().
rstrip
().
split
(
','
)
ctr
=
{}
hierarchies
=
[]
for
combo
in
zip
(
wilcox_descending_feature_pairs
,
wilcox_p_reject_corrected
):
hierarchy
=
[[]]
current_hierarchy_index
=
0
for
pair_idx
,
decision
in
enumerate
(
combo
[
1
]):
first_feature
=
combo
[
0
][
2
*
pair_idx
]
if
first_feature
not
in
ctr
:
ctr
[
first_feature
]
=
[
0
]
*
len
(
combo
[
1
])
second_feature
=
combo
[
0
][
2
*
pair_idx
+
1
]
if
second_feature
not
in
ctr
:
ctr
[
second_feature
]
=
[
0
]
*
len
(
combo
[
1
])
if
decision
:
hierarchy
[
current_hierarchy_index
].
append
(
first_feature
)
ctr
[
first_feature
][
current_hierarchy_index
]
+=
1
if
pair_idx
+
1
!=
len
(
combo
[
1
]):
hierarchy
.
append
([])
current_hierarchy_index
+=
1
ctr
[
second_feature
][
current_hierarchy_index
]
+=
1
else
:
hierarchy
[
current_hierarchy_index
].
append
(
first_feature
)
ctr
[
first_feature
][
current_hierarchy_index
]
+=
1
ctr
[
second_feature
][
current_hierarchy_index
]
+=
1
hierarchies
.
append
(
hierarchy
)
pprint
(
hierarchies
)
print
(
ctr
)
ctr
=
{
k
:
np
.
argmax
(
np
.
array
(
v
))
for
k
,
v
in
ctr
.
items
()}
print
(
ctr
)
argmax_hierarchy
=
[[]]
prev_v
=
0
for
k
,
v
in
sorted
(
ctr
.
items
(),
key
=
lambda
item
:
item
[
1
]):
print
(
k
,
v
)
if
v
>
prev_v
:
for
i
in
range
(
v
-
prev_v
):
argmax_hierarchy
.
append
([])
prev_v
=
v
argmax_hierarchy
[
v
].
append
(
k
)
argmax_hierarchy
.
append
(
zero_variance_after_balance
)
argmax_hierarchy
.
append
(
vif_removed
)
pprint
(
argmax_hierarchy
)
with
open
(
f
'
{
dataset
}
/HFBRs-feature-selection/
{
tc
.
replace
(
"-HFBR-calculations.json"
,
"-HFBR.json"
)
}
'
,
'w'
)
as
fd
:
json
.
dump
(
argmax_hierarchy
,
fd
,
indent
=
4
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment