SELECT
    ps.parkid,
    ps.dow,
    ps.hr,
    AVG(ps.transqty) / pi2.numS AS mean,
    STD(ps.transqty) / pi2.numS AS std_dev,
    pi2.numS AS tot,
    pi2.parkname AS name
FROM parking_space ps
LEFT JOIN parking_info pi2 ON ps.parkid = pi2.parkid
GROUP BY ps.parkid, dow, hr
having tot>10;

import pandas as pd
avg_weekly = pd.read_csv("/Users/ted/Agg_Weekly_park.csv")

avg_weekly

pivot_table = pd.pivot_table(
    avg_weekly,
    values=['mean', 'std_dev'],
    index=['parkid', 'name'],
    columns=['day_of_week', 'hour_of_day'],
    fill_value=None
)

pivot_table

# 重新命名
pivot_table.columns = [f'w{col[1]}_h{col[2]}_t_{col[0]}' for col in pivot_table.columns]
df = pivot_table.reset_index()

threshold = 20

zero_percentages = (df == 0).sum(axis=1) / df.shape[1] * 100
filtered_df = df[zero_percentages < threshold].reset_index(drop=True)

from sklearn.decomposition import PCA

pca = PCA()
X  = filtered_df.iloc[:, 2:].values # 前兩欄分別是 id 與 name
pca.fit(X);

import matplotlib.pyplot as plt
import numpy as np

plt.rcParams["figure.dpi"] = 200

X_after_pca = pca.fit_transform(X)
exp_var_pca = pca.explained_variance_ratio_
cum_exp_var = np.cumsum(exp_var_pca)

plt.bar(range(0,len(exp_var_pca)), exp_var_pca, alpha=0.5, align='center', label='Individual explained variance')
plt.step(range(0,len(cum_exp_var)), cum_exp_var, where='mid',label='Cumulative explained variance')
plt.hlines(1,-1,10,'grey', '--')
plt.xlim(-1,10)
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal component index')
plt.legend();

plt.scatter(X_after_pca[:,0], X_after_pca[:,1], label = "Projected", s = 10)
plt.xlabel("First Principal Axis");
plt.ylabel("Second Principal Axis");

import sklearn
import sklearn.cluster as cl
from sklearn.metrics import silhouette_score

X_after_pca = PCA(5).fit_transform(X)

methods = [
    cl.DBSCAN(eps= round(0.2 * k, 2)) for k in range(4, 20)
] + [cl.HDBSCAN()]


for i, classification in enumerate(methods):

    class_result = classification.fit_predict(X_after_pca)
    print(i, classification, silhouette_score(X, class_result), sep='\t\t')

0		DBSCAN(eps=0.8)		0.030908239724709574
1		DBSCAN(eps=1.0)		0.024005344458985566
2		DBSCAN(eps=1.2)		0.19847945551904503
3		DBSCAN(eps=1.4)		0.07086482219387516
4		DBSCAN(eps=1.6)		0.3022433195220288
5		DBSCAN(eps=1.8)		0.3908591420120785
6		DBSCAN(eps=2.0)		0.3908591420120785
7		DBSCAN(eps=2.2)		0.3908591420120785
8		DBSCAN(eps=2.4)		0.3908591420120785
9		DBSCAN(eps=2.6)		0.3908591420120785
10		DBSCAN(eps=2.8)		0.49431614265243934
11		DBSCAN(eps=3.0)		0.49431614265243934
12		DBSCAN(eps=3.2)		0.49431614265243934
13		DBSCAN(eps=3.4)		0.49431614265243934
14		DBSCAN(eps=3.6)		0.49431614265243934
15		DBSCAN(eps=3.8)		0.49431614265243934
16		HDBSCAN()		0.09261302832965683

def plot_classification(index:int, method_list:list[sklearn.base.ClusterMixin], ax: plt.Axes = None, **kwargs):
    if ax is None:
        fig, ax = plt.subplots(1,1);
    else:
        fig = ax.get_figure();
        
    _lables = method_list[index].labels_
    for i in set(_lables):
        current_class_index = _lables == i
        ax.scatter(X_after_pca[current_class_index,0], X_after_pca[current_class_index,1], label = i, **kwargs);
    return fig

fig, axs = plt.subplots(1,2, figsize=(10,5))

# e = 1.6
plot_classification(5, methods, s = 20, ax=axs[0]);
axs[0].set_title("DBSCAN: epsilon = 1.8");
# e = 2.8
plot_classification(10, methods, s = 20, ax=axs[1]);
axs[1].set_title("DBSCAN: epsilon = 2.8");
fig.legend();

fig = plot_classification(-1, methods, s = 10);
plt.title("HDBSCAN")
fig.legend();

filtered_df[(filtered_df.name.str.contains("大安森林公園", regex=False)) |
            (filtered_df.name.str.contains("峨眉", regex=False))]

print(methods[-1].labels_[9], methods[-1].labels_[44])
ax = fig.get_axes()[0]
ax.annotate("XiMen",(X_after_pca[9, 0],X_after_pca[9,1]));
ax.annotate("Daan", (X_after_pca[44, 0],X_after_pca[44,1]));
fig

-1 3

from sklearn.cluster import KMeans 

kmean_methods = [KMeans(n_clusters=k, n_init='auto') for k in range(2,10)]

for i, method in enumerate(kmean_methods):

    class_result = method.fit_predict(X_after_pca)
    print(i, method, silhouette_score(X_after_pca, class_result), sep='\t')

0	KMeans(n_clusters=2, n_init='auto')	0.4887518770278574
1	KMeans(n_clusters=3, n_init='auto')	0.4188605079244328
2	KMeans(n_clusters=4, n_init='auto')	0.37022545642063287
3	KMeans(n_clusters=5, n_init='auto')	0.34914910570272634
4	KMeans(n_clusters=6, n_init='auto')	0.3001368802686542
5	KMeans(n_clusters=7, n_init='auto')	0.3243873161616326
6	KMeans(n_init='auto')	0.26306554757605705
7	KMeans(n_clusters=9, n_init='auto')	0.26314494388951676

inertias = [m.inertia_ for m in kmean_methods]
plt.plot(range(2,10), inertias);
plt.title("Elbow Method on Determining the Number of Clusters")
plt.xlabel("Num of Cluster");plt.ylabel("Inertia")

Text(0, 0.5, 'Inertia')

fig_km, axs_km = plt.subplots(1,4, figsize=(15,5), sharey=True)

for k in range(4):
    plot_classification(k, kmean_methods, ax = axs_km[k]);
    axs_km[k].set_title(f"K = {k+2}")

	parkid	day_of_week	hour_of_day	mean	std_dev	tot	name
0	1	1	0	0.421106	0.032556	2043	府前廣場地下停車場
1	1	1	1	0.424592	0.028223	2043	府前廣場地下停車場
2	1	1	2	0.431254	0.022614	2043	府前廣場地下停車場
3	1	1	3	0.435719	0.021331	2043	府前廣場地下停車場
4	1	1	4	0.437168	0.021646	2043	府前廣場地下停車場
...	...	...	...	...	...	...	...
76099	844	7	19	0.000000	0.000000	43	南港一停車場
76100	844	7	20	0.000000	0.000000	43	南港一停車場
76101	844	7	21	0.000000	0.000000	43	南港一停車場
76102	844	7	22	0.000000	0.000000	43	南港一停車場
76103	844	7	23	0.000000	0.000000	43	南港一停車場

		mean										...	std_dev
	day_of_week	1										...	7
	hour_of_day	0	1	2	3	4	5	6	7	8	9	...	14	15	16	17	18	19	20	21	22	23
parkid	name
1	府前廣場地下停車場	0.421106	0.424592	0.431254	0.435719	0.437168	0.437143	0.437451	0.434357	0.435869	0.435599	...	0.084597	0.080629	0.080358	0.085321	0.087082	0.082692	0.083183	0.073328	0.050715	0.032881
2	松壽廣場地下停車場	0.759176	0.789066	0.823278	0.847070	0.868077	0.884634	0.887619	0.877473	0.862216	0.825586	...	0.002054	0.002540	0.007033	0.016616	0.034007	0.024046	0.062292	0.099321	0.080670	0.042448
3	臺北市災害應變中心地下停車場	0.139969	0.134263	0.129508	0.124331	0.121478	0.123838	0.132678	0.120245	0.107883	0.079952	...	0.031716	0.037882	0.026565	0.015224	0.013718	0.014450	0.038249	0.034337	0.034251	0.025750
5	立農公園地下停車場	0.158263	0.153653	0.152223	0.149977	0.148897	0.149218	0.152457	0.163953	0.170810	0.157592	...	0.053645	0.068144	0.056192	0.053291	0.042687	0.044983	0.044725	0.039821	0.036238	0.037991
7	萬華國中地下停車場	0.072748	0.069376	0.064593	0.064272	0.065304	0.071601	0.072713	0.083826	0.094596	0.091981	...	0.047514	0.043207	0.035581	0.040952	0.049147	0.039931	0.036365	0.033947	0.028808	0.026149
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
839	林森台大站	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
841	臺北市立明倫高級中學停車場	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
842	弘盛開發股份有限公司停車場	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
843	吉拾停車場台大尊賢場	0.372449	0.372236	0.372236	0.372449	0.372449	0.372449	0.372662	0.374787	0.368835	0.373937	...	0.030039	0.059590	0.073072	0.067825	0.099040	0.101176	0.074218	0.060103	0.074097	0.077558
844	南港一停車場	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000

	parkid	name	w1_h0_t_mean	w1_h1_t_mean	w1_h2_t_mean	w1_h3_t_mean	w1_h4_t_mean	w1_h5_t_mean	w1_h6_t_mean	w1_h7_t_mean	...	w7_h14_t_std_dev	w7_h15_t_std_dev	w7_h16_t_std_dev	w7_h17_t_std_dev	w7_h18_t_std_dev	w7_h19_t_std_dev	w7_h20_t_std_dev	w7_h21_t_std_dev	w7_h22_t_std_dev	w7_h23_t_std_dev
9	15	峨眉立體停車場	0.602972	0.687351	0.731729	0.754734	0.767604	0.773315	0.773878	0.776211	...	0.003967	0.010019	0.028323	0.027535	0.012231	0.002326	0.015601	0.055169	0.088419	0.073061
44	56	大安森林公園地下停車場	0.276852	0.275976	0.275412	0.275333	0.274615	0.274232	0.268632	0.259637	...	0.027648	0.025743	0.029795	0.042588	0.032717	0.031523	0.029337	0.020106	0.016430	0.015290

可否根據停車場空位性質，區分種類？¶

利用自行搜集之橫斷面資料，透過非監督式機器學習探討停車場的種類¶

資料¶

主成分分析 (PCA)¶

Density Based 分群法¶

KMeans 分群法¶

結論¶