STRLCPY/451-CorporateRiskMiner

Render subgraphs and sort tables
Peter Zatka-Haas committed 2 years ago

b5666617

1 parent 9be656be

■ ■ ■ ■ ■ ■

.streamlit/config.toml

1 + [theme]
2 + base="light"

All occurrences

■ ■ ■ ■ ■ ■

src/app.py

1	1		import streamlit as st
2		-	import pandas as pd
3	2		from streamlit_agraph import agraph, Config
4		-	from utils import build_agraph_components, get_edges_df, get_subgraph_df, get_nodes_df
	3	+	from utils import (
	4	+	build_agraph_components,
	5	+	get_edges_df,
	6	+	get_subgraph_df,
	7	+	get_nodes_df,
	8	+	get_subgraph_with_risk_score,
	9	+	)
5	10
6	11
7	12		st.set_page_config(layout="wide")
8	13
9	14
10		-	SLIDER_MIN = 1
	15	+	SLIDER_MIN = 0
11	16		SLIDER_MAX = 100
12	17		SLIDER_DEFAULT = 50
	18	+	DEFAULT_NUM_SUBGRAPHS_TO_SHOW = 3
13	19
14	20		nodes = get_nodes_df()
15	21		edges = get_edges_df()
		skipped 2 lines
18	24		with st.sidebar:
19	25		st.title("Corporate risks")
20	26
21		-	weight_chains = st.slider(
22		-	"Long ownership chains",
23		-	min_value=SLIDER_MIN,
24		-	max_value=SLIDER_MAX,
25		-	value=SLIDER_DEFAULT,
	27	+	weight_chains = (
	28	+	st.slider(
	29	+	"Long ownership chains",
	30	+	min_value=SLIDER_MIN,
	31	+	max_value=SLIDER_MAX,
	32	+	value=SLIDER_DEFAULT,
	33	+	)
	34	+	/ SLIDER_MAX
26	35		)
27		-	weight_cyclic = st.slider(
28		-	"Cyclic ownership",
29		-	min_value=SLIDER_MIN,
30		-	max_value=SLIDER_MAX,
31		-	value=SLIDER_DEFAULT,
	36	+	weight_cyclic = (
	37	+	st.slider(
	38	+	"Cyclic ownership",
	39	+	min_value=SLIDER_MIN,
	40	+	max_value=SLIDER_MAX,
	41	+	value=SLIDER_DEFAULT,
	42	+	)
	43	+	/ SLIDER_MAX
32	44		)
33		-	weight_psc_haven = st.slider(
34		-	"Persons of significant control associated with tax havens",
35		-	min_value=SLIDER_MIN,
36		-	max_value=SLIDER_MAX,
37		-	value=SLIDER_DEFAULT,
	45	+	weight_psc_haven = (
	46	+	st.slider(
	47	+	"Persons of significant control associated with tax havens",
	48	+	min_value=SLIDER_MIN,
	49	+	max_value=SLIDER_MAX,
	50	+	value=SLIDER_DEFAULT,
	51	+	)
	52	+	/ SLIDER_MAX
38	53		)
39		-	weight_pep = st.slider(
40		-	"Officers/PSCs are politically exposed",
41		-	min_value=SLIDER_MIN,
42		-	max_value=SLIDER_MAX,
43		-	value=SLIDER_DEFAULT,
	54	+	weight_pep = (
	55	+	st.slider(
	56	+	"Officers/PSCs are politically exposed",
	57	+	min_value=SLIDER_MIN,
	58	+	max_value=SLIDER_MAX,
	59	+	value=SLIDER_DEFAULT,
	60	+	)
	61	+	/ SLIDER_MAX
44	62		)
45		-	weight_sanctions = st.slider(
46		-	"Officers/PSCs/Companies are sanctioned",
47		-	min_value=SLIDER_MIN,
48		-	max_value=SLIDER_MAX,
49		-	value=SLIDER_DEFAULT,
	63	+	weight_sanctions = (
	64	+	st.slider(
	65	+	"Officers/PSCs/Companies are sanctioned",
	66	+	min_value=SLIDER_MIN,
	67	+	max_value=SLIDER_MAX,
	68	+	value=SLIDER_DEFAULT,
	69	+	)
	70	+	/ SLIDER_MAX
50	71		)
51		-	weight_disqualified = st.slider(
52		-	"Officers are disqualified directors",
53		-	min_value=SLIDER_MIN,
54		-	max_value=SLIDER_MAX,
55		-	value=SLIDER_DEFAULT,
	72	+	weight_disqualified = (
	73	+	st.slider(
	74	+	"Officers are disqualified directors",
	75	+	min_value=SLIDER_MIN,
	76	+	max_value=SLIDER_MAX,
	77	+	value=SLIDER_DEFAULT,
	78	+	)
	79	+	/ SLIDER_MAX
56	80		)
57		-
58	81		custom_names_a = st.multiselect(
59	82		label="Custom persons of interest",
60	83		options=nodes["node_id"],
		skipped 5 lines
66	89
67	90
68	91		with st.container():
69		-	st.write(subgraphs)
	92	+
	93	+	subgraph_with_risk_scores = get_subgraph_with_risk_score(
	94	+	subgraphs,
	95	+	weight_chains=weight_chains,
	96	+	weight_cyclic=weight_cyclic,
	97	+	weight_psc_haven=weight_psc_haven,
	98	+	weight_pep=weight_pep,
	99	+	weight_sanctions=weight_sanctions,
	100	+	weight_disqualified=weight_disqualified,
	101	+	)
	102	+
	103	+	st.dataframe(data=subgraph_with_risk_scores, use_container_width=True)
70	104
71		-	selected_subgraph_hash = st.selectbox(
72		-	label="Select subgraph to explore", options=subgraphs.index
	105	+	selected_subgraph_hashes = st.multiselect(
	106	+	label="Select corporate network(s) to explore",
	107	+	options=list(subgraph_with_risk_scores.index),
	108	+	default=list(
	109	+	subgraph_with_risk_scores.head(DEFAULT_NUM_SUBGRAPHS_TO_SHOW).index
	110	+	),
73	111		)
74	112
75		-	nodes_selected = nodes.loc[nodes["subgraph_hash"] == selected_subgraph_hash]
76		-	edges_selected = edges.loc[edges["subgraph_hash"] == selected_subgraph_hash]
77	113
78	114		with st.container():
	115	+	num_subgraphs_to_display = len(selected_subgraph_hashes)
	116	+	cols = st.columns(num_subgraphs_to_display)
79	117
80		-	col1, col2 = st.columns(2)
	118	+	for c, subgraph_hash in enumerate(selected_subgraph_hashes):
	119	+	nodes_selected = nodes.loc[nodes["subgraph_hash"] == subgraph_hash]
	120	+	edges_selected = edges.loc[edges["subgraph_hash"] == subgraph_hash]
81	121
82		-	with col1:
83		-	(node_objects, edge_objects) = build_agraph_components(
84		-	nodes_selected, edges_selected
85		-	)
86		-	agraph(
87		-	nodes=node_objects,
88		-	edges=edge_objects,
89		-	config=Config(
90		-	width=500,
91		-	height=500,
92		-	),
93		-	)
	122	+	with cols[c]:
	123	+	(node_objects, edge_objects) = build_agraph_components(
	124	+	nodes_selected, edges_selected
	125	+	)
	126	+	agraph(
	127	+	nodes=node_objects,
	128	+	edges=edge_objects,
	129	+	config=Config(
	130	+	width=round(1080 / num_subgraphs_to_display),
	131	+	height=200,
	132	+	),
	133	+	)
	134	+
	135	+	st.markdown("People")
	136	+	st.dataframe(
	137	+	nodes_selected.query("is_person == 1"),
	138	+	use_container_width=True,
	139	+	)
94	140
95		-	with col2:
96		-	st.write(nodes_selected)
	141	+	st.markdown("Companies")
	142	+	st.dataframe(
	143	+	nodes_selected.query("is_person == 0"),
	144	+	use_container_width=True,
	145	+	)
97	146

■ ■ ■ ■ ■ ■

src/utils.py

	1	+	from curses import use_default_colors
1	2		import streamlit as st
2	3		from streamlit_agraph import Node, Edge
3	4		import json
4	5		import pandas as pd
5	6
6		-	NODE_COLOUR_PERSON = "#4684B2"
7		-	NODE_COLOUR_COMPANY = "#46B247"
8		-
	7	+	NODE_COLOUR_NON_DODGY = "#72EF77"
	8	+	NODE_COLOUR_DODGY = "#EF7272"
	9	+	NODE_IMAGE_PERSON = "http://i.ibb.co/LrY3tfw/747376.png" # https://www.flaticon.com/free-icon/user_747376
	10	+	NODE_IMAGE_COMPANY = "http://i.ibb.co/fx6r1dZ/4812244.png" # https://www.flaticon.com/free-icon/company_4812244
9	11
10		-	@st.cache()
	12	+	# @st.cache()
11	13		def get_subgraph_df():
12		-	return pd.read_csv("subgraphs.csv", index_col="subgraph_hash")
	14	+	return pd.read_csv("./data/subgraphs.csv", index_col="subgraph_hash")
13	15
14	16
15		-	@st.cache()
	17	+	# @st.cache()
16	18		def get_nodes_df():
17		-	return pd.read_csv("nodes.csv")
	19	+	return pd.read_csv("./data/nodes.csv")
18	20
19	21
20		-	@st.cache()
	22	+	# @st.cache()
21	23		def get_edges_df():
22		-	return pd.read_csv("edges.csv")
	24	+	return pd.read_csv("./data/edges.csv")
	25	+
	26	+
	27	+	def get_subgraph_with_risk_score(
	28	+	subgraph_table,
	29	+	weight_chains,
	30	+	weight_cyclic,
	31	+	weight_psc_haven,
	32	+	weight_pep,
	33	+	weight_sanctions,
	34	+	weight_disqualified,
	35	+	):
	36	+
	37	+	out = subgraph_table.copy()
	38	+	out["total_risk"] = (
	39	+	(out["cyclicity"] * weight_cyclic / out["cyclicity"].max())
	40	+	+ (
	41	+	out["multi_jurisdiction"]
	42	+	* weight_psc_haven
	43	+	/ out["multi_jurisdiction"].max()
	44	+	)
	45	+	+ (out["num_sanctions"] * weight_sanctions / out["num_sanctions"].max())
	46	+	+ (out["num_peps"] * weight_pep / out["num_peps"].max())
	47	+	)
	48	+	return out.sort_values(by="total_risk", ascending=False)
23	49
24	50
25		-	@st.cache()
26	51		def build_agraph_components(
27	52		nodes,
28	53		edges,
29		-	node_colour_person=NODE_COLOUR_PERSON,
30		-	node_colour_company=NODE_COLOUR_COMPANY,
31	54		):
32	55		"""Create agraph object from node and edge list"""
33	56
		skipped 7 lines
41	64		id=row["node_id"],
42	65		label=node_metadata["name"],
43	66		size=25,
44		-	color=node_colour_person
	67	+	color=NODE_COLOUR_DODGY
	68	+	if (row["pep"] > 0 or row["sanction"] > 0)
	69	+	else NODE_COLOUR_NON_DODGY,
	70	+	image=NODE_IMAGE_PERSON
45	71		if row["is_person"] == 1
46		-	else node_colour_company,
	72	+	else NODE_IMAGE_COMPANY,
	73	+	shape="circularImage",
47	74		)
48	75		)
49	76
		skipped 11 lines

	1	+	[theme]
	2	+	base="light"

Render subgraphs and sort tables