import pandas as pd
import calendar
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind

# before congestion pricing dataset
df_before = pd.read_csv("data/violations_before.csv")
df_before.head()

# convert firstdate col to datetime
df_before['first_date_only'] = pd.to_datetime(df_before['first_date_only'])

# Keep only 2024 and later
df_before= df_before[df_before['first_date_only'].dt.year == 2024]

# combine daily violations into month (count)
monthly_counts_before = (
    df_before.groupby(df_before['first_date_only'].dt.to_period('M'))['violation_id']
      .count()
      .reset_index()
)


monthly_counts_before.head(40)

# before congestion pricing dataset
df_after = pd.read_csv("data/violations_after.csv")
df_after.head()

df_after['violation_status'].unique()

array(['EXEMPT - EMERGENCY VEHICLE', 'EXEMPT - BUS/PARATRANSIT',
       'EXEMPT - OTHER', 'TECHNICAL ISSUE/OTHER',
       'DRIVER/VEHICLE INFO MISSING', 'EXEMPT - COMMERCIAL UNDER 20',
       'VIOLATION ISSUED'], dtype=object)

# convert firstdate col to datetime
df_after['first_date_only'] = pd.to_datetime(df_after['first_date_only'])

# combine daily violations into month (count)
monthly_counts_after = (
    df_after.groupby(df_after['first_date_only'].dt.to_period('M'))['violation_id']
      .count()
      .reset_index()
)

monthly_counts_after = monthly_counts_after.iloc[:-1]
monthly_counts_after.head(12)

monthly_counts_before['first_date_only'] = monthly_counts_before['first_date_only'].dt.to_timestamp()
monthly_counts_after['first_date_only'] = monthly_counts_after['first_date_only'].dt.to_timestamp()

plt.figure(figsize=(10,6))
plt.plot(monthly_counts_before['first_date_only'], monthly_counts_before['violation_id'], marker='o', label='2024 (Before)')
plt.plot(monthly_counts_after['first_date_only'], monthly_counts_after['violation_id'], marker='o', label='2025 (After)')

plt.xticks(rotation=45)
plt.title("Monthly Violations: 2024 vs 2025")
plt.xlabel("Month")
plt.ylabel("Number of Violations")
plt.legend()
plt.tight_layout()
plt.show()

#Comparing the monthly violation, it showed that there is more violation after congestion pricing.
#Potential Factors: More exempt violation than issued ones, more cameras are implemented,

#Spliting the violation status from Exempt and Violation Issued

#spliting before congestion pricing year 2024
df_2024_issued = df_before[df_before['violation_status'].str.contains("VIOLATION ISSUED", case=False, na=False)]
df_2024_exempt = df_before[df_before['violation_status'].str.contains("EXEMPT", case=False, na=False)]

#spliting before congestion pricing year 2024
df_2025_issued = df_after[df_after['violation_status'].str.contains("VIOLATION ISSUED", case=False, na=False)]
df_2025_exempt = df_after[df_after['violation_status'].str.contains("EXEMPT", case=False, na=False)]

#Compare the amount of monthly violation issued of 2024 to 2025 (before and after congestion pricing)
monthly_2024_issued = (
    df_2024_issued.groupby(df_2024_issued['first_date_only'].dt.to_period('M'))['violation_id']
                  .count()
                  .reset_index()
)
monthly_2025_issued = (
    df_2025_issued.groupby(df_2025_issued['first_date_only'].dt.to_period('M'))['violation_id']
                  .count()
                  .reset_index()
)

# Convert Period → string for plotting
monthly_2024_issued['first_date_only'] = monthly_2024_issued['first_date_only'].astype(str)
monthly_2025_issued['first_date_only'] = monthly_2025_issued['first_date_only'].astype(str)
monthly_2025_issued  = monthly_2025_issued.iloc[:-1]
monthly_2025_issued.head(12)

plt.figure(figsize=(12,6))

plt.plot(monthly_2024_issued['first_date_only'], monthly_2024_issued['violation_id'],
         marker='o', label="2024 Issued (Before)")

plt.plot(monthly_2025_issued['first_date_only'], monthly_2025_issued['violation_id'],
         marker='o', label="2025 Issued (After)")

plt.xticks(rotation=45)
plt.title("Monthly Violations Issued: 2024 vs 2025")
plt.xlabel("Month")
plt.ylabel("Number of Violations Issued")
plt.legend()
plt.tight_layout()
plt.show()

# After congestion pricing, more violations were tracked: 
# possible factor: increased in enforcement

#select Jan–Jul 2024
jan_jul_2024 = monthly_2024_issued[
    monthly_2024_issued['first_date_only'].astype(str).between("2024-01", "2024-07")
]['violation_id']

#select Jan–Jul 2025
jan_jul_2025 = monthly_2025_issued[
    monthly_2025_issued['first_date_only'].astype(str).between("2025-01", "2025-07")
]['violation_id']

#run independent t-test
t_stat, p_val = ttest_ind(jan_jul_2024, jan_jul_2025, equal_var=False)

print("t-statistic:", t_stat)
print("p-value:", p_val)

t-statistic: -10.041066443697138
p-value: 1.1272212126484257e-05

	violation_id	vehicle_id	first_occurrence	last_occurrence	violation_status	violation_type	bus_route_id	violation_latitude	violation_longitude	stop_id	stop_name	bus_stop_latitude	bus_stop_longitude	violation_georeference	bus_stop_georeference	first_date_only	first_time_only	last_date_only	last_time_only
0	453199364	21944709e605608ed8c5467b85d101f1d35aa589850cc1...	2025-01-04 13:19:59	2025-01-04 14:15:28	EXEMPT - COMMERCIAL UNDER 20	MOBILE DOUBLE PARKED	M101	40.798618	-73.939526	405181	3 AV/E 116 ST	40.797798	-73.940040	POINT (-73.939526 40.798618)	POINT (-73.94004 40.797798)	2025-01-04	13:19:59	2025-01-04	14:15:28
1	453077336	9a80c56e9783ef2cb59276cfdb6b2ec025289cf2d017c9...	2025-01-04 15:18:12	2025-01-04 15:27:06	DRIVER/VEHICLE INFO MISSING	MOBILE BUS STOP	M15+	40.796899	-73.937919	401738	2 AV/E 125 ST	40.802473	-73.933855	POINT (-73.937919 40.796899)	POINT (-73.933855 40.802473)	2025-01-04	15:18:12	2025-01-04	15:27:06
2	453072301	e2a9484b77b3923c4cef2c32a6f1138fbf4ed0ec521d37...	2025-01-04 17:13:07	2025-01-04 17:16:40	VIOLATION ISSUED	MOBILE BUS STOP	M101	40.794568	-73.944642	403416	LEXINGTON AV/E 116 ST	40.798149	-73.942022	POINT (-73.944642 40.794568)	POINT (-73.942022 40.798149)	2025-01-04	17:13:07	2025-01-04	17:16:40
3	453052476	5f00dd05f4d96d72c3f7e24a5e915def23fa82d26366ca...	2025-01-04 14:24:17	2025-01-04 14:27:24	VIOLATION ISSUED	MOBILE BUS STOP	M15+	40.703220	-74.010570	403754	WATER ST/WALL ST	40.705045	-74.007758	POINT (-74.01057 40.70322)	POINT (-74.007758 40.705045)	2025-01-04	14:24:17	2025-01-04	14:27:24
4	452953656	421d84e2d1638a96cad98d531e96b5c9d5406dfc443b63...	2025-01-03 06:53:46	2025-01-03 07:23:53	EXEMPT - EMERGENCY VEHICLE	MOBILE DOUBLE PARKED	M101	40.842347	-73.935115	402740	AMSTERDAM AV/W 171 ST	40.841978	-73.935267	POINT (-73.935115 40.842347)	POINT (-73.935267 40.841978)	2025-01-03	06:53:46	2025-01-03	07:23:53

	first_date_only	violation_id
0	2024-01	8491
1	2024-02	7432
2	2024-03	6995
3	2024-04	7135
4	2024-05	6416
5	2024-06	9502
6	2024-07	17379
7	2024-08	17857
8	2024-09	28495
9	2024-10	57741
10	2024-11	50750
11	2024-12	42271

	violation_id	vehicle_id	first_occurrence	last_occurrence	violation_status	violation_type	bus_route_id	violation_latitude	violation_longitude	stop_id	stop_name	bus_stop_latitude	bus_stop_longitude	violation_georeference	bus_stop_georeference	first_date_only	first_time_only	last_date_only	last_time_only
0	489741940	7feac037b62d591ffb1214e356157f3dd197fc22fee5bb...	2025-08-20 10:52:57	2025-08-20 11:16:57	EXEMPT - EMERGENCY VEHICLE	MOBILE BUS STOP	M101	40.815113	-73.955040	401458	AMSTERDAM AV/W 131 ST	40.816009	-73.954424	POINT (-73.95504 40.815113)	POINT (-73.954424 40.816009)	2025-08-20	10:52:57	2025-08-20	11:16:57
1	489741935	e9122b4e6dac9160ed4fca952a3815fe4c737bfc70cb97...	2025-08-20 23:35:29	2025-08-20 23:39:32	EXEMPT - EMERGENCY VEHICLE	MOBILE BUS STOP	M101	40.795630	-73.941683	402704	3 AV/E 109 ST	40.793765	-73.943005	POINT (-73.941683 40.79563)	POINT (-73.943005 40.793765)	2025-08-20	23:35:29	2025-08-20	23:39:32
2	489741747	de04d0589e26e7da94c192dff0be14d105abcc8342c87f...	2025-08-20 23:18:06	2025-08-20 23:24:10	EXEMPT - EMERGENCY VEHICLE	MOBILE BUS LANE	M101	40.808403	-73.946993	402658	W 125 ST/ADAM C POWELL BLVD	40.808700	-73.947836	POINT (-73.946993 40.808403)	POINT (-73.947836 40.8087)	2025-08-20	23:18:06	2025-08-20	23:24:10
3	489741639	6d7caef8bf15e2bf8d46aa1e6dd8c0da1ae2fbc5b31ace...	2025-08-20 22:58:39	2025-08-20 23:02:18	EXEMPT - EMERGENCY VEHICLE	MOBILE DOUBLE PARKED	M101	40.842800	-73.934798	405069	AMSTERDAM AV/W 175 ST	40.843810	-73.934076	POINT (-73.934798 40.8428)	POINT (-73.934076 40.84381)	2025-08-20	22:58:39	2025-08-20	23:02:18
4	489741496	414bf604b556873481cb6a6fa325cef72d37bf7c168385...	2025-08-21 12:01:22	2025-08-21 14:40:41	EXEMPT - EMERGENCY VEHICLE	MOBILE BUS STOP	M42	40.747781	-73.969323	903046	E 41 ST/1 AV	40.747979	-73.969789	POINT (-73.96932254145938 40.74778115789982)	POINT (-73.969789 40.747979)	2025-08-21	12:01:22	2025-08-21	14:40:41

	first_date_only	violation_id
0	2025-01	41393
1	2025-02	37137
2	2025-03	52274
3	2025-04	59384
4	2025-05	64605
5	2025-06	67020
6	2025-07	64922

	first_date_only	violation_id
0	2025-01	22722
1	2025-02	19610
2	2025-03	27442
3	2025-04	32438
4	2025-05	34093
5	2025-06	34710
6	2025-07	32929