import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
from matplotlib import style

df = pd.read_pickle('DATA/light_data.pkl')
df = df[df["reaction_time"] != 0]               # 過濾掉沒有互動的組合
df

page_info = pd.read_csv('DATA/1000-page-info.csv')[["page_id", "page_name"]]
page_info.head(10)

weeks = sorted(df.week_start_date.unique())

network_by_week = []

for week in weeks:
    df_this_week = df[df['week_start_date'] == week]
    week_string = pd.to_datetime(week).date()
    G = nx.Graph()

    G.add_nodes_from(df_this_week['user_id'].unique(), bipartite = 0)
    G.add_nodes_from(df_this_week['page_id'].unique(), bipartite = 1)
    G.add_weighted_edges_from(
       zip(df_this_week['user_id'], df_this_week['page_id'], df_this_week['reaction_time'])
    )

    network_by_week.append(G)

from scipy.sparse.linalg import eigsh

def eigenvector_centrality(network: nx.Graph):
    """Calculates the eigenvector centrality of a bipartite network"""
    user_nodes = sorted({n for n, d in network.nodes(data=True) if d["bipartite"] == 0})
    page_nodes = sorted({n for n, d in network.nodes(data=True) if d["bipartite"] == 1})

    A = nx.bipartite.biadjacency_matrix(network,
                                        row_order=user_nodes,
                                        column_order=page_nodes)

    evalue_page, evector_page = eigsh(
        (A.T @ A).asfptype(),   # A' A
        k=1,                    # calculate one only
        which='LA')             # get the largest

    # create a dict
    evector_page_dict = dict(zip(
                            page_nodes,
                            [abs(r[0]) for r in evector_page]
                        ))
    return evector_page_dict

centrality_by_week_page = [eigenvector_centrality(G) for G in network_by_week]

first_week_centrality_df = (pd.DataFrame(
     centrality_by_week_page[0].items(),
     columns=["page_id", "eigenvector_centrality"]
     )
     .merge(page_info[["page_id", "page_name"]], on = "page_id")
     .sort_values(by = "eigenvector_centrality", ascending=False))

first_week_centrality_df.head(10)

centrality_panel_data = pd.DataFrame()

for w, cen in enumerate(centrality_by_week_page):
    week = weeks[w]
    centrality_df = pd.DataFrame(
        cen.items(),
        columns=["page_id", "eigenvector_centrality"]
    )
    centrality_df["week_start_date"] = week
    centrality_panel_data = pd.concat([centrality_panel_data, centrality_df])

centrality_panel_data = centrality_panel_data.merge(page_info, on = "page_id")

centrality_panel_data

top_10_in_first_week = list(centrality_panel_data[centrality_panel_data['week_start_date'] == weeks[0]]
                        .sort_values(by = "eigenvector_centrality", ascending = False)
                        .head(10)["page_id"])

centrality_panel_data[
    centrality_panel_data["page_id"].isin(top_10_in_first_week)
    ].pivot(
        index = "week_start_date", columns="page_name", values = "eigenvector_centrality"
    ).plot(
        labels = {
            "value" : "Centrality",
            "page_name": "Page Name",
            "week_start_date": "Date"
        },
        title = "Eigenvector Centrality of Fan Pages during 2016 Presidential Election <br>"+\
            "<sup>Top 10 in First Week</sup>"
    )

top_10_in_9_25 = list(centrality_panel_data[centrality_panel_data['week_start_date'] == weeks[8]]
                        .sort_values(by = "eigenvector_centrality", ascending = False)
                        .head(10)["page_id"])
centrality_panel_data[
    centrality_panel_data["page_id"].isin(top_10_in_9_25)
    ].pivot(
        index = "week_start_date", columns="page_name", values = "eigenvector_centrality"
    ).plot(
        labels = {
            "value" : "Centrality",
            "page_name": "Page Name",
            "week_start_date": "Date"
        },
        title = "Eigenvector Centrality of Fan Pages during 2016 Presidential Election <br>"+\
        "<sup>Top 10 During Debate</sup>"
    )

centrality_panel_data[
    centrality_panel_data["page_id"].isin(top_10_in_9_25[:5] + top_10_in_first_week[:5])
    ].pivot(
        index = "week_start_date", columns="page_name", values = "eigenvector_centrality"
    ).plot(
        labels = {
            "value" : "Centrality",
            "page_name": "Page Name",
            "week_start_date": "Date"
        },
        title = "Eigenvector Centrality of Fan Pages during 2016 Presidential Election <br>"+\
        "<sup>Wings Combined</sup>"
    )

from scipy.sparse.linalg import eigsh

def unweightet_eigenvector_centrality(network: nx.Graph):
    """Calculates the eigenvector centrality of a bipartite network"""
    user_nodes = sorted({n for n, d in network.nodes(data=True) if d["bipartite"] == 0})
    page_nodes = sorted({n for n, d in network.nodes(data=True) if d["bipartite"] == 1})

    A = (nx.bipartite.biadjacency_matrix(network,
                                        row_order=user_nodes,
                                        column_order=page_nodes) > 0) * 1

    evalue_page, evector_page = eigsh(
        (A.T @ A).asfptype(),   # A' A
        k=1,                    # calculate one only
        which='LA')             # get the largest

    # create a dict
    evector_page_dict = dict(zip(
                            page_nodes,
                            [abs(r[0]) for r in evector_page]
                        ))
    return evector_page_dict

u_centrality_by_week_page = [unweightet_eigenvector_centrality(G) for G in network_by_week]
u_centrality_panel_data = pd.DataFrame()

for w, cen in enumerate(u_centrality_by_week_page):
    week = weeks[w]
    centrality_df = pd.DataFrame(
        cen.items(),
        columns=["page_id", "unweighted_eigenvector_centrality"]
    )
    centrality_df["week_start_date"] = week
    u_centrality_panel_data = pd.concat([u_centrality_panel_data, centrality_df])

u_centrality_panel_data = u_centrality_panel_data.merge(page_info, on = "page_id")

page_from_left_right = \
        list(u_centrality_panel_data[u_centrality_panel_data['week_start_date'] == weeks[0]]
            .sort_values(by = "unweighted_eigenvector_centrality", ascending = False)
            .head(5)["page_id"]) + \
        list(u_centrality_panel_data[centrality_panel_data['week_start_date'] == weeks[8]]
            .sort_values(by = "unweighted_eigenvector_centrality", ascending = False)
            .head(5)["page_id"])


u_centrality_panel_data[
    u_centrality_panel_data["page_id"].isin(page_from_left_right)
    ].pivot(
        index = "week_start_date", columns="page_name", values = "unweighted_eigenvector_centrality"
    ).plot(
        labels = {
            "value" : "Centrality",
            "page_name": "Page Name",
            "week_start_date": "Date"
        },
        title = "Unweighted Eigenvector Centrality of Fan Pages during 2016 Presidential Election <br>"+\
            "<sup>Top 5 in Frist Week and Top 5 During Debate</sup>"
    )

from itertools import count
def gen_page_comembership_graph(G:nx.Graph):
    """Turns a bipartite graph into page only, preserving the node names"""
    user_nodes = sorted({n for n, d in G.nodes(data=True) if d["bipartite"] == 0})
    page_nodes = sorted({n for n, d in G.nodes(data=True) if d["bipartite"] == 1})

    # use unweighted
    A = (nx.bipartite.biadjacency_matrix(G, row_order=user_nodes, column_order=page_nodes) > 0)* 1

    page_comembership_matrix = (A.T @ A).asfptype()

    new_G = nx.from_scipy_sparse_array(page_comembership_matrix)
    mapping = {u:v for u, v in zip(count(), page_nodes)}
    return nx.relabel_nodes(new_G, mapping)

page_comembership_graph_by_week = [gen_page_comembership_graph(G)
                                   for G in network_by_week]

communities_by_week: list[list[set]] = [nx.community.louvain_communities(G)
                                        for G in page_comembership_graph_by_week]

ideology_panel = pd.DataFrame()

trump_page_id = 153080620724
clinton_page_id = 889307941125736
johnson_page_id = 165297924363

for w, community_sets in enumerate(communities_by_week):
    week = weeks[w]

    trump_community = []
    clinton_community = []
    johnson_community = []
    other = []

    for s in community_sets:
        if trump_page_id in s:
            trump_community = list(s)
            continue

        if clinton_page_id in s:
            clinton_community = list(s)
            continue

        if johnson_page_id in s:
            johnson_community = list(s)
            continue

        other += list(s)

    # prepare for dataframe
    temp_ideology_df = pd.DataFrame(
        {
            'page_id': trump_community + clinton_community + johnson_community + other,
            'community': ["Republican"] * len(trump_community) +
                         ["Democrat"] * len(clinton_community) +
                         ["Libertarian"] * len(johnson_community) +
                          ["Others"] * len(other)
        }
    )
    temp_ideology_df['week_start_date'] = week
    ideology_panel = pd.concat([ideology_panel, temp_ideology_df])

u_centrality_panel_data_community = u_centrality_panel_data.merge(
    ideology_panel, on = ["week_start_date", "page_id"])

u_centrality_panel_data_community[
    u_centrality_panel_data_community.page_id.isin(page_from_left_right)
    ].query("week_start_date == '2016-07-31'").head(10)

test_page_name = ["Fox News", "CNN", "Donald J. Trump", "Hillary Clinton", "Occupy Democrats", 
                  "The Huffington Post", "The Political Insider", "American News"]

u_centrality_panel_data_community[
    u_centrality_panel_data_community.page_name.isin(test_page_name)
].pivot(
    index = "week_start_date",
    columns = "page_name",
    values = "community"
)[test_page_name]

ideology_centrality_mean_panel = (u_centrality_panel_data_community
                        .query("community != 'Others'")
                        .pivot_table(
                            index = "week_start_date",
                            columns = "community",
                            values = "unweighted_eigenvector_centrality",
                            aggfunc= np.mean
                        )
                    )
ideology_centrality_mean_panel

fig, ax = plt.subplots(1,1, figsize = (10,5))
sns.lineplot(
    u_centrality_panel_data_community, 
    x = "week_start_date", y = "unweighted_eigenvector_centrality", 
    hue = "community", ax = ax)

ax.set_ylabel("Centrality")
ax.set_xlabel("Date")
ax.legend(title = "Ideology",bbox_to_anchor=(1.02, 0.55), loc='upper left', borderaxespad=0)
ax.set_title("Average Unweighted Eigenvector Centrality Based on Ideology", size = 15)
fig = ax.get_figure()

	user_id	page_id	week_start_date	reaction_time
4143404	502548149224	31160214090	2016-07-31	1
629806	502548149224	177486166274	2016-07-31	2
1394501	502548149224	109200595768753	2016-07-31	2
3579378	504535781680	199098633470668	2016-07-31	9
3308861	511651751124	124955570892789	2016-07-31	1
...	...	...	...	...
3758962	10211264967209740	444258668967650	2016-10-30	3
4320618	10211264967209740	889307941125736	2016-10-30	2
1847862	10211474845295627	47689998796	2016-10-30	1
3396153	10211474845295627	138691142964027	2016-10-30	3
4208844	10211474845295627	997108126967413	2016-10-30	1

	page_id	page_name
0	15704546335	Fox News
1	153080620724	Donald J. Trump
2	346937065399354	Occupy Democrats
3	95475020353	Breitbart
4	889307941125736	Hillary Clinton
5	17614953850	Funny Or Die
6	7976226799	The Daily Show
7	18468761129	The Huffington Post
8	123624513983	Western Journalism
9	182919686769	The Daily Caller

	page_id	eigenvector_centrality	page_name
370	153080620724	0.562609	Donald J. Trump
102	15704546335	0.517959	Fox News
575	133961323610549	0.289411	Donald Trump For President
310	95475020353	0.278827	Breitbart
139	22067606728	0.174215	Allen West
509	112723252096438	0.167596	The Political Insider
650	179035672287016	0.150079	American News
343	123624513983	0.133615	Western Journalism
701	226821494115353	0.120344	Nation In Distress
264	69813760388	0.112960	Sean Hannity

	page_id	eigenvector_centrality	week_start_date	page_name
0	5281959998	0.002696	2016-07-31	The New York Times
1	5281959998	0.008039	2016-08-07	The New York Times
2	5281959998	0.002108	2016-08-14	The New York Times
3	5281959998	0.001636	2016-08-21	The New York Times
4	5281959998	0.001855	2016-08-28	The New York Times
...	...	...	...	...
13133	173347701125	0.000455	2016-10-02	Governor Jan Brewer
13134	173347701125	0.000204	2016-10-09	Governor Jan Brewer
13135	173347701125	0.019458	2016-10-16	Governor Jan Brewer
13136	173347701125	0.028357	2016-10-23	Governor Jan Brewer
13137	173347701125	0.033625	2016-10-30	Governor Jan Brewer

民調來源	希拉蕊贏 (%)	川普贏 (%)	無意見 (%)
FiveThirtyEight	V		-
CNN/ORC	62	27	-
Public Policy Polling	51	40	-
YouGov	57	30	-
Politico/Morning Consult	49	26	25
Echelon Insights	48	22	-
Reuters/Ipsos	56	26	-
NBC News/SurveyMonkey	52	21	26
Gallup	61	27	-
Fox News	61	21	-
ABC News/The Washington Post	53	18	-

風向往哪跑？¶

美國 2016 總統大選時期 Facebook 粉絲專頁與用戶行為的社會網路探討¶

結論¶

讀取資料¶

社會網路分析¶

中心性¶

特徵向量中心性 (eigenvector centrality)¶

中心性的走向¶

9月25那一週，發生什麼事？¶

互動次數為加權的影響¶

粉專政黨傾向 -- 自動分類¶

Louvain 演算法¶

結論¶

	page_id	unweighted_eigenvector_centrality	week_start_date	page_name	community
1421	15704546335	0.463815	2016-07-31	Fox News	Republican
1547	18468761129	0.063269	2016-07-31	The Huffington Post	Democrat
1939	22067606728	0.172206	2016-07-31	Allen West	Republican
5170	153080620724	0.400613	2016-07-31	Donald J. Trump	Republican
7102	112723252096438	0.203428	2016-07-31	The Political Insider	Republican
7256	114517875225866	0.042081	2016-07-31	The Other 98%	Democrat
9071	179035672287016	0.239638	2016-07-31	American News	Republican
11026	346937065399354	0.063586	2016-07-31	Occupy Democrats	Democrat
12862	889307941125736	0.032486	2016-07-31	Hillary Clinton	Democrat

community	Democrat	Libertarian	Republican
week_start_date
2016-07-31	0.005015	0.002686	0.028381
2016-08-07	0.008039	0.003409	0.027957
2016-08-14	0.004319	0.002716	0.030536
2016-08-21	0.003619	0.003457	0.030032
2016-08-28	0.004382	0.003694	0.029484
2016-09-04	0.003438	0.002570	0.025865
2016-09-11	0.005770	0.002940	0.028071
2016-09-18	0.008601	0.003601	0.025698
2016-09-25	0.019583	0.002632	0.013086
2016-10-02	0.006693	0.002565	0.027114
2016-10-09	0.005332	0.002874	0.026586
2016-10-16	0.006705	0.003947	0.027352
2016-10-23	0.003587	0.003563	0.027935
2016-10-30	0.004020	0.003357	0.028640