{ "file_info": { "file_path": "./projects_cleaned.csv", "total_rows": 355, "total_columns": 22, "memory_usage_mb": 0.3523597717285156, "total_cells": 7810 }, "structural_info": { "column_names": [ "project_number", "project_en", "developer_number", "developer_en", "start_date", "end_date", "adoption_date", "prj_type_en", "project_value", "escrow_account_number", "project_status", "percent_completed", "inspection_date", "completion_date", "description_en", "area_en", "zone_en", "cnt_land", "cnt_building", "cnt_villa", "cnt_unit", "master_project_en" ], "dtypes": { "project_number": "object", "project_en": "object", "developer_number": "object", "developer_en": "object", "start_date": "object", "end_date": "object", "adoption_date": "object", "prj_type_en": "object", "project_value": "float64", "escrow_account_number": "object", "project_status": "object", "percent_completed": "float64", "inspection_date": "object", "completion_date": "object", "description_en": "object", "area_en": "object", "zone_en": "object", "cnt_land": "float64", "cnt_building": "float64", "cnt_villa": "float64", "cnt_unit": "float64", "master_project_en": "object" }, "index_info": { "type": "", "is_unique": true, "has_duplicates": "False" } }, "columns_analysis": { "project_number": { "dtype": "object", "non_null_count": 355, "null_count": 0, "null_percentage": 0.0, "unique_values": 354, "unique_percentage": 99.72, "categorical_info": { "most_frequent": "3593", "most_frequent_count": 2, "least_frequent": "4167", "least_frequent_count": 1, "top_5_values": { "3593": 2, "807": 1, "986": 1, "2895": 1, "4063": 1 } }, "string_length": { "min": 3, "max": 122, "mean": 5.121126760563381, "median": 4.0 } }, "project_en": { "dtype": "object", "non_null_count": 340, "null_count": 15, "null_percentage": 4.23, "unique_values": 339, "unique_percentage": 99.71, "categorical_info": { "most_frequent": "d villas", "most_frequent_count": 2, "least_frequent": "rockland square", "least_frequent_count": 1, "top_5_values": { "d villas": 2, "samana resorts": 1, "reef 996": 1, "marina place 1": 1, "paradise one": 1 } }, "string_length": { "min": 4, "max": 65, "mean": 18.66764705882353, "median": 16.0 } }, "developer_number": { "dtype": "object", "non_null_count": 341, "null_count": 14, "null_percentage": 3.94, "unique_values": 216, "unique_percentage": 63.34, "categorical_info": { "most_frequent": "1002", "most_frequent_count": 17, "least_frequent": "2233", "least_frequent_count": 1, "top_5_values": { "1002": 17, "1866": 10, "2035": 9, "1051": 7, "1598": 7 } }, "string_length": { "min": 1, "max": 13, "mean": 3.9237536656891496, "median": 4.0 } }, "developer_en": { "dtype": "object", "non_null_count": 341, "null_count": 14, "null_percentage": 3.94, "unique_values": 215, "unique_percentage": 63.05, "categorical_info": { "most_frequent": "azizi developments l.l.c", "most_frequent_count": 17, "least_frequent": "cds real estate development l.l.c", "least_frequent_count": 1, "top_5_values": { "azizi developments l.l.c": 17, "damac mry investment l.l.c": 10, "damac elite investment co. l.l.c": 9, "binghatti developers fze": 7, "samana signature real estate developments l.l.c": 7 } }, "string_length": { "min": 1, "max": 80, "mean": 33.88563049853372, "median": 34.0 } }, "start_date": { "dtype": "object", "non_null_count": 341, "null_count": 14, "null_percentage": 3.94, "unique_values": 91, "unique_percentage": 26.69, "categorical_info": { "most_frequent": "2025-06-01 00:00:00", "most_frequent_count": 29, "least_frequent": "2025-09-22 00:00:00", "least_frequent_count": 1, "top_5_values": { "2025-06-01 00:00:00": 29, "2025-08-01 00:00:00": 26, "2025-09-01 00:00:00": 26, "2025-10-01 00:00:00": 25, "2025-07-01 00:00:00": 22 } }, "string_length": { "min": 1, "max": 147, "mean": 19.21700879765396, "median": 19.0 } }, "end_date": { "dtype": "object", "non_null_count": 340, "null_count": 15, "null_percentage": 4.23, "unique_values": 145, "unique_percentage": 42.65, "categorical_info": { "most_frequent": "2028-12-31 00:00:00", "most_frequent_count": 27, "least_frequent": "2027-03-22 00:00:00", "least_frequent_count": 1, "top_5_values": { "2028-12-31 00:00:00": 27, "2028-06-30 00:00:00": 13, "2027-09-30 00:00:00": 12, "2027-12-31 00:00:00": 12, "2029-03-31 00:00:00": 11 } }, "string_length": { "min": 1, "max": 24, "mean": 18.86764705882353, "median": 19.0 } }, "adoption_date": { "dtype": "object", "non_null_count": 254, "null_count": 101, "null_percentage": 28.45, "unique_values": 247, "unique_percentage": 97.24, "categorical_info": { "most_frequent": "2025-05-12 09:10:22", "most_frequent_count": 4, "least_frequent": "2025-10-07 13:40:37", "least_frequent_count": 1, "top_5_values": { "2025-05-12 09:10:22": 4, "2025-05-02 10:08:15": 2, "2025-05-07 11:23:23": 2, "2025-04-03 00:00:00": 2, "2025-05-05 00:00:00": 2 } }, "string_length": { "min": 19, "max": 23, "mean": 19.015748031496063, "median": 19.0 } }, "prj_type_en": { "dtype": "object", "non_null_count": 337, "null_count": 18, "null_percentage": 5.07, "unique_values": 1, "unique_percentage": 0.3, "categorical_info": { "most_frequent": "normal", "most_frequent_count": 337, "least_frequent": "normal", "least_frequent_count": 337, "top_5_values": { "normal": 337 } }, "string_length": { "min": 6, "max": 6, "mean": 6.0, "median": 6.0 } }, "project_value": { "dtype": "float64", "non_null_count": 336, "null_count": 19, "null_percentage": 5.35, "unique_values": 241, "unique_percentage": 71.73, "statistics": { "mean": 194081395.3690476, "median": 79570929.5, "std": 350877262.29434377, "min": 1000000.0, "max": 3033696240.0, "q25": 39500000.0, "q75": 188019286.75, "skewness": 4.632742026266175, "kurtosis": 27.186612055159774 }, "outliers": { "count": 41, "percentage": 11.55 } }, "escrow_account_number": { "dtype": "object", "non_null_count": 266, "null_count": 89, "null_percentage": 25.07, "unique_values": 265, "unique_percentage": 99.62, "categorical_info": { "most_frequent": "14041024920004", "most_frequent_count": 2, "least_frequent": "0205902532504", "least_frequent_count": 1, "top_5_values": { "14041024920004": 2, "012802437844": 1, "0012405538002": 1, "001582146524301": 1, "0012315743002": 1 } }, "string_length": { "min": 8, "max": 22, "mean": 13.041353383458647, "median": 13.0 } }, "project_status": { "dtype": "object", "non_null_count": 337, "null_count": 18, "null_percentage": 5.07, "unique_values": 3, "unique_percentage": 0.89, "categorical_info": { "most_frequent": "active", "most_frequent_count": 250, "least_frequent": "cancelled", "least_frequent_count": 2, "top_5_values": { "active": 250, "pending": 85, "cancelled": 2 } }, "string_length": { "min": 6, "max": 9, "mean": 6.270029673590504, "median": 6.0 } }, "percent_completed": { "dtype": "float64", "non_null_count": 313, "null_count": 42, "null_percentage": 11.83, "unique_values": 52, "unique_percentage": 16.61, "statistics": { "mean": 1.9477635782747607, "median": 0.0, "std": 8.867299446255698, "min": 0.0, "max": 68.05, "q25": 0.0, "q75": 0.0, "skewness": 5.342075911424932, "kurtosis": 29.34031228392463 }, "outliers": { "count": 55, "percentage": 15.49 } }, "inspection_date": { "dtype": "object", "non_null_count": 314, "null_count": 41, "null_percentage": 11.55, "unique_values": 313, "unique_percentage": 99.68, "categorical_info": { "most_frequent": "2025-02-27 19:48:36", "most_frequent_count": 2, "least_frequent": "2024-10-16 13:10:52", "least_frequent_count": 1, "top_5_values": { "2025-02-27 19:48:36": 2, "2025-10-13 18:21:58": 1, "2019-05-16 12:18:08": 1, "2025-04-24 16:25:26": 1, "2024-04-03 15:20:25": 1 } }, "string_length": { "min": 19, "max": 47, "mean": 19.089171974522294, "median": 19.0 } }, "completion_date": { "dtype": "object", "non_null_count": 2, "null_count": 353, "null_percentage": 99.44, "unique_values": 2, "unique_percentage": 100.0, "categorical_info": { "most_frequent": "2011-05-30 00:00:00", "most_frequent_count": 1, "least_frequent": "co-working spaces and a kids club. facilities in the complex open to members of the public include a selection of fine dining outlets and retail units on the podium level", "least_frequent_count": 1, "top_5_values": { "2011-05-30 00:00:00": 1, "co-working spaces and a kids club. facilities in the complex open to members of the public include a selection of fine dining outlets and retail units on the podium level": 1 } }, "string_length": { "min": 19, "max": 170, "mean": 94.5, "median": 94.5 } }, "description_en": { "dtype": "object", "non_null_count": 337, "null_count": 18, "null_percentage": 5.07, "unique_values": 319, "unique_percentage": 94.66, "categorical_info": { "most_frequent": "project consists of g+3p+14 residential building located in dubai investment park second, dubai. the building is designed to have a concrete structure and block with internal and external finishes.", "most_frequent_count": 4, "least_frequent": "b+g+3p+17", "least_frequent_count": 1, "top_5_values": { "project consists of g+3p+14 residential building located in dubai investment park second, dubai. the building is designed to have a concrete structure and block with internal and external finishes.": 4, "g+2p+12+roof": 3, "project consists of g+2p+8 residential building located in dubai investment park second, dubai. the building is designed to have a concrete structure and block with internal and external finishes.": 3, "3b + ground + m + 3ba + 16 + r": 3, "g+4+roof": 2 } }, "string_length": { "min": 1, "max": 783, "mean": 92.31750741839762, "median": 46.0 } }, "area_en": { "dtype": "object", "non_null_count": 337, "null_count": 18, "null_percentage": 5.07, "unique_values": 54, "unique_percentage": 16.02, "categorical_info": { "most_frequent": "palm deira", "most_frequent_count": 46, "least_frequent": "al hebiah second", "least_frequent_count": 1, "top_5_values": { "palm deira": 46, "madinat al mataar": 26, "wadi al safa 5": 24, "wadi al safa 3": 24, "al barsha south fourth": 24 } }, "string_length": { "min": 6, "max": 130, "mean": 16.13946587537092, "median": 14.0 } }, "zone_en": { "dtype": "object", "non_null_count": 239, "null_count": 116, "null_percentage": 32.68, "unique_values": 8, "unique_percentage": 3.35, "categorical_info": { "most_frequent": "dubai development authority (dda)", "most_frequent_count": 98, "least_frequent": "which is described as follows:", "least_frequent_count": 1, "top_5_values": { "dubai development authority (dda)": 98, "trakhees": 83, "dubai municipality": 41, "dubai south": 11, "dubai silicon oasis authority": 3 } }, "string_length": { "min": 8, "max": 60, "mean": 20.824267782426777, "median": 18.0 } }, "cnt_land": { "dtype": "float64", "non_null_count": 332, "null_count": 23, "null_percentage": 6.48, "unique_values": 7, "unique_percentage": 2.11, "statistics": { "mean": 0.2921686746987952, "median": 0.0, "std": 1.714219576647455, "min": 0.0, "max": 24.0, "q25": 0.0, "q75": 0.0, "skewness": 10.820064131532215, "kurtosis": 131.26326033728665 }, "outliers": { "count": 44, "percentage": 12.39 } }, "cnt_building": { "dtype": "float64", "non_null_count": 332, "null_count": 23, "null_percentage": 6.48, "unique_values": 1, "unique_percentage": 0.3, "statistics": { "mean": 0.0, "median": 0.0, "std": 0.0, "min": 0.0, "max": 0.0, "q25": 0.0, "q75": 0.0, "skewness": 0.0, "kurtosis": 0.0 }, "outliers": { "count": 0, "percentage": 0.0 } }, "cnt_villa": { "dtype": "float64", "non_null_count": 332, "null_count": 23, "null_percentage": 6.48, "unique_values": 29, "unique_percentage": 8.73, "statistics": { "mean": 22.087349397590362, "median": 0.0, "std": 95.07676127394544, "min": 0.0, "max": 1121.0, "q25": 0.0, "q75": 0.0, "skewness": 6.655005677088903, "kurtosis": 59.90362806054476 }, "outliers": { "count": 31, "percentage": 8.73 } }, "cnt_unit": { "dtype": "float64", "non_null_count": 332, "null_count": 23, "null_percentage": 6.48, "unique_values": 191, "unique_percentage": 57.53, "statistics": { "mean": 234.24698795180723, "median": 116.0, "std": 445.42724575376207, "min": 0.0, "max": 4526.0, "q25": 47.0, "q75": 269.75, "skewness": 6.465435974535892, "kurtosis": 54.86574287118425 }, "outliers": { "count": 26, "percentage": 7.32 } }, "master_project_en": { "dtype": "object", "non_null_count": 2, "null_count": 353, "null_percentage": 99.44, "unique_values": 2, "unique_percentage": 100.0, "categorical_info": { "most_frequent": "maison elysee iii by pantheon", "most_frequent_count": 1, "least_frequent": "mohammed bin rashid al maktoum city -district one west - phase 2", "least_frequent_count": 1, "top_5_values": { "maison elysee iii by pantheon": 1, "mohammed bin rashid al maktoum city -district one west - phase 2": 1 } }, "string_length": { "min": 29, "max": 64, "mean": 46.5, "median": 46.5 } } }, "data_quality": { "total_missing_values": 1350, "missing_percentage": 17.29, "columns_with_missing": { "project_en": 15, "developer_number": 14, "developer_en": 14, "start_date": 14, "end_date": 15, "adoption_date": 101, "prj_type_en": 18, "project_value": 19, "escrow_account_number": 89, "project_status": 18, "percent_completed": 42, "inspection_date": 41, "completion_date": 353, "description_en": 18, "area_en": 18, "zone_en": 116, "cnt_land": 23, "cnt_building": 23, "cnt_villa": 23, "cnt_unit": 23, "master_project_en": 353 }, "missing_percentage_by_column": { "project_en": 4.23, "developer_number": 3.94, "developer_en": 3.94, "start_date": 3.94, "end_date": 4.23, "adoption_date": 28.45, "prj_type_en": 5.07, "project_value": 5.35, "escrow_account_number": 25.07, "project_status": 5.07, "percent_completed": 11.83, "inspection_date": 11.55, "completion_date": 99.44, "description_en": 5.07, "area_en": 5.07, "zone_en": 32.68, "cnt_land": 6.48, "cnt_building": 6.48, "cnt_villa": 6.48, "cnt_unit": 6.48, "master_project_en": 99.44 }, "duplicate_rows": 0, "duplicate_percentage": 0.0 }, "statistical_summary": { "numeric_columns_count": 6, "categorical_columns_count": 16, "datetime_columns_count": 0, "data_types_distribution": { "object": 16, "float64": 6 }, "potential_identifier_columns": [] } }