🤬
  • ■ ■ ■ ■ ■ ■
    .streamlit/config.toml
     1 +[theme]
     2 +base="light"
  • ■ ■ ■ ■ ■ ■
    src/app.py
    1 1  import streamlit as st
    2  -import pandas as pd
    3 2  from streamlit_agraph import agraph, Config
    4  -from utils import build_agraph_components, get_edges_df, get_subgraph_df, get_nodes_df
     3 +from utils import (
     4 + build_agraph_components,
     5 + get_edges_df,
     6 + get_subgraph_df,
     7 + get_nodes_df,
     8 + get_subgraph_with_risk_score,
     9 +)
    5 10   
    6 11   
    7 12  st.set_page_config(layout="wide")
    8 13   
    9 14   
    10  -SLIDER_MIN = 1
     15 +SLIDER_MIN = 0
    11 16  SLIDER_MAX = 100
    12 17  SLIDER_DEFAULT = 50
     18 +DEFAULT_NUM_SUBGRAPHS_TO_SHOW = 3
    13 19   
    14 20  nodes = get_nodes_df()
    15 21  edges = get_edges_df()
    skipped 2 lines
    18 24  with st.sidebar:
    19 25   st.title("Corporate risks")
    20 26   
    21  - weight_chains = st.slider(
    22  - "Long ownership chains",
    23  - min_value=SLIDER_MIN,
    24  - max_value=SLIDER_MAX,
    25  - value=SLIDER_DEFAULT,
     27 + weight_chains = (
     28 + st.slider(
     29 + "Long ownership chains",
     30 + min_value=SLIDER_MIN,
     31 + max_value=SLIDER_MAX,
     32 + value=SLIDER_DEFAULT,
     33 + )
     34 + / SLIDER_MAX
    26 35   )
    27  - weight_cyclic = st.slider(
    28  - "Cyclic ownership",
    29  - min_value=SLIDER_MIN,
    30  - max_value=SLIDER_MAX,
    31  - value=SLIDER_DEFAULT,
     36 + weight_cyclic = (
     37 + st.slider(
     38 + "Cyclic ownership",
     39 + min_value=SLIDER_MIN,
     40 + max_value=SLIDER_MAX,
     41 + value=SLIDER_DEFAULT,
     42 + )
     43 + / SLIDER_MAX
    32 44   )
    33  - weight_psc_haven = st.slider(
    34  - "Persons of significant control associated with tax havens",
    35  - min_value=SLIDER_MIN,
    36  - max_value=SLIDER_MAX,
    37  - value=SLIDER_DEFAULT,
     45 + weight_psc_haven = (
     46 + st.slider(
     47 + "Persons of significant control associated with tax havens",
     48 + min_value=SLIDER_MIN,
     49 + max_value=SLIDER_MAX,
     50 + value=SLIDER_DEFAULT,
     51 + )
     52 + / SLIDER_MAX
    38 53   )
    39  - weight_pep = st.slider(
    40  - "Officers/PSCs are politically exposed",
    41  - min_value=SLIDER_MIN,
    42  - max_value=SLIDER_MAX,
    43  - value=SLIDER_DEFAULT,
     54 + weight_pep = (
     55 + st.slider(
     56 + "Officers/PSCs are politically exposed",
     57 + min_value=SLIDER_MIN,
     58 + max_value=SLIDER_MAX,
     59 + value=SLIDER_DEFAULT,
     60 + )
     61 + / SLIDER_MAX
    44 62   )
    45  - weight_sanctions = st.slider(
    46  - "Officers/PSCs/Companies are sanctioned",
    47  - min_value=SLIDER_MIN,
    48  - max_value=SLIDER_MAX,
    49  - value=SLIDER_DEFAULT,
     63 + weight_sanctions = (
     64 + st.slider(
     65 + "Officers/PSCs/Companies are sanctioned",
     66 + min_value=SLIDER_MIN,
     67 + max_value=SLIDER_MAX,
     68 + value=SLIDER_DEFAULT,
     69 + )
     70 + / SLIDER_MAX
    50 71   )
    51  - weight_disqualified = st.slider(
    52  - "Officers are disqualified directors",
    53  - min_value=SLIDER_MIN,
    54  - max_value=SLIDER_MAX,
    55  - value=SLIDER_DEFAULT,
     72 + weight_disqualified = (
     73 + st.slider(
     74 + "Officers are disqualified directors",
     75 + min_value=SLIDER_MIN,
     76 + max_value=SLIDER_MAX,
     77 + value=SLIDER_DEFAULT,
     78 + )
     79 + / SLIDER_MAX
    56 80   )
    57  - 
    58 81   custom_names_a = st.multiselect(
    59 82   label="Custom persons of interest",
    60 83   options=nodes["node_id"],
    skipped 5 lines
    66 89   
    67 90   
    68 91  with st.container():
    69  - st.write(subgraphs)
     92 + 
     93 + subgraph_with_risk_scores = get_subgraph_with_risk_score(
     94 + subgraphs,
     95 + weight_chains=weight_chains,
     96 + weight_cyclic=weight_cyclic,
     97 + weight_psc_haven=weight_psc_haven,
     98 + weight_pep=weight_pep,
     99 + weight_sanctions=weight_sanctions,
     100 + weight_disqualified=weight_disqualified,
     101 + )
     102 + 
     103 + st.dataframe(data=subgraph_with_risk_scores, use_container_width=True)
    70 104   
    71  - selected_subgraph_hash = st.selectbox(
    72  - label="Select subgraph to explore", options=subgraphs.index
     105 + selected_subgraph_hashes = st.multiselect(
     106 + label="Select corporate network(s) to explore",
     107 + options=list(subgraph_with_risk_scores.index),
     108 + default=list(
     109 + subgraph_with_risk_scores.head(DEFAULT_NUM_SUBGRAPHS_TO_SHOW).index
     110 + ),
    73 111   )
    74 112   
    75  -nodes_selected = nodes.loc[nodes["subgraph_hash"] == selected_subgraph_hash]
    76  -edges_selected = edges.loc[edges["subgraph_hash"] == selected_subgraph_hash]
    77 113   
    78 114  with st.container():
     115 + num_subgraphs_to_display = len(selected_subgraph_hashes)
     116 + cols = st.columns(num_subgraphs_to_display)
    79 117   
    80  - col1, col2 = st.columns(2)
     118 + for c, subgraph_hash in enumerate(selected_subgraph_hashes):
     119 + nodes_selected = nodes.loc[nodes["subgraph_hash"] == subgraph_hash]
     120 + edges_selected = edges.loc[edges["subgraph_hash"] == subgraph_hash]
    81 121   
    82  - with col1:
    83  - (node_objects, edge_objects) = build_agraph_components(
    84  - nodes_selected, edges_selected
    85  - )
    86  - agraph(
    87  - nodes=node_objects,
    88  - edges=edge_objects,
    89  - config=Config(
    90  - width=500,
    91  - height=500,
    92  - ),
    93  - )
     122 + with cols[c]:
     123 + (node_objects, edge_objects) = build_agraph_components(
     124 + nodes_selected, edges_selected
     125 + )
     126 + agraph(
     127 + nodes=node_objects,
     128 + edges=edge_objects,
     129 + config=Config(
     130 + width=round(1080 / num_subgraphs_to_display),
     131 + height=200,
     132 + ),
     133 + )
     134 + 
     135 + st.markdown("*People*")
     136 + st.dataframe(
     137 + nodes_selected.query("is_person == 1"),
     138 + use_container_width=True,
     139 + )
    94 140   
    95  - with col2:
    96  - st.write(nodes_selected)
     141 + st.markdown("*Companies*")
     142 + st.dataframe(
     143 + nodes_selected.query("is_person == 0"),
     144 + use_container_width=True,
     145 + )
    97 146   
  • ■ ■ ■ ■ ■ ■
    src/utils.py
     1 +from curses import use_default_colors
    1 2  import streamlit as st
    2 3  from streamlit_agraph import Node, Edge
    3 4  import json
    4 5  import pandas as pd
    5 6   
    6  -NODE_COLOUR_PERSON = "#4684B2"
    7  -NODE_COLOUR_COMPANY = "#46B247"
    8  - 
     7 +NODE_COLOUR_NON_DODGY = "#72EF77"
     8 +NODE_COLOUR_DODGY = "#EF7272"
     9 +NODE_IMAGE_PERSON = "http://i.ibb.co/LrY3tfw/747376.png" # https://www.flaticon.com/free-icon/user_747376
     10 +NODE_IMAGE_COMPANY = "http://i.ibb.co/fx6r1dZ/4812244.png" # https://www.flaticon.com/free-icon/company_4812244
    9 11   
    10  -@st.cache()
     12 +# @st.cache()
    11 13  def get_subgraph_df():
    12  - return pd.read_csv("subgraphs.csv", index_col="subgraph_hash")
     14 + return pd.read_csv("./data/subgraphs.csv", index_col="subgraph_hash")
    13 15   
    14 16   
    15  -@st.cache()
     17 +# @st.cache()
    16 18  def get_nodes_df():
    17  - return pd.read_csv("nodes.csv")
     19 + return pd.read_csv("./data/nodes.csv")
    18 20   
    19 21   
    20  -@st.cache()
     22 +# @st.cache()
    21 23  def get_edges_df():
    22  - return pd.read_csv("edges.csv")
     24 + return pd.read_csv("./data/edges.csv")
     25 + 
     26 + 
     27 +def get_subgraph_with_risk_score(
     28 + subgraph_table,
     29 + weight_chains,
     30 + weight_cyclic,
     31 + weight_psc_haven,
     32 + weight_pep,
     33 + weight_sanctions,
     34 + weight_disqualified,
     35 +):
     36 + 
     37 + out = subgraph_table.copy()
     38 + out["total_risk"] = (
     39 + (out["cyclicity"] * weight_cyclic / out["cyclicity"].max())
     40 + + (
     41 + out["multi_jurisdiction"]
     42 + * weight_psc_haven
     43 + / out["multi_jurisdiction"].max()
     44 + )
     45 + + (out["num_sanctions"] * weight_sanctions / out["num_sanctions"].max())
     46 + + (out["num_peps"] * weight_pep / out["num_peps"].max())
     47 + )
     48 + return out.sort_values(by="total_risk", ascending=False)
    23 49   
    24 50   
    25  -@st.cache()
    26 51  def build_agraph_components(
    27 52   nodes,
    28 53   edges,
    29  - node_colour_person=NODE_COLOUR_PERSON,
    30  - node_colour_company=NODE_COLOUR_COMPANY,
    31 54  ):
    32 55   """Create agraph object from node and edge list"""
    33 56   
    skipped 7 lines
    41 64   id=row["node_id"],
    42 65   label=node_metadata["name"],
    43 66   size=25,
    44  - color=node_colour_person
     67 + color=NODE_COLOUR_DODGY
     68 + if (row["pep"] > 0 or row["sanction"] > 0)
     69 + else NODE_COLOUR_NON_DODGY,
     70 + image=NODE_IMAGE_PERSON
    45 71   if row["is_person"] == 1
    46  - else node_colour_company,
     72 + else NODE_IMAGE_COMPANY,
     73 + shape="circularImage",
    47 74   )
    48 75   )
    49 76   
    skipped 11 lines
Please wait...
Page is in error, reload to recover