dld_backend/projects_cleaned.json
2025-10-30 12:13:02 +05:30

686 lines
19 KiB
JSON

{
"file_info": {
"file_path": "./projects_cleaned.csv",
"total_rows": 355,
"total_columns": 22,
"memory_usage_mb": 0.3523597717285156,
"total_cells": 7810
},
"structural_info": {
"column_names": [
"project_number",
"project_en",
"developer_number",
"developer_en",
"start_date",
"end_date",
"adoption_date",
"prj_type_en",
"project_value",
"escrow_account_number",
"project_status",
"percent_completed",
"inspection_date",
"completion_date",
"description_en",
"area_en",
"zone_en",
"cnt_land",
"cnt_building",
"cnt_villa",
"cnt_unit",
"master_project_en"
],
"dtypes": {
"project_number": "object",
"project_en": "object",
"developer_number": "object",
"developer_en": "object",
"start_date": "object",
"end_date": "object",
"adoption_date": "object",
"prj_type_en": "object",
"project_value": "float64",
"escrow_account_number": "object",
"project_status": "object",
"percent_completed": "float64",
"inspection_date": "object",
"completion_date": "object",
"description_en": "object",
"area_en": "object",
"zone_en": "object",
"cnt_land": "float64",
"cnt_building": "float64",
"cnt_villa": "float64",
"cnt_unit": "float64",
"master_project_en": "object"
},
"index_info": {
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
"is_unique": true,
"has_duplicates": "False"
}
},
"columns_analysis": {
"project_number": {
"dtype": "object",
"non_null_count": 355,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 354,
"unique_percentage": 99.72,
"categorical_info": {
"most_frequent": "3593",
"most_frequent_count": 2,
"least_frequent": "4167",
"least_frequent_count": 1,
"top_5_values": {
"3593": 2,
"807": 1,
"986": 1,
"2895": 1,
"4063": 1
}
},
"string_length": {
"min": 3,
"max": 122,
"mean": 5.121126760563381,
"median": 4.0
}
},
"project_en": {
"dtype": "object",
"non_null_count": 340,
"null_count": 15,
"null_percentage": 4.23,
"unique_values": 339,
"unique_percentage": 99.71,
"categorical_info": {
"most_frequent": "d villas",
"most_frequent_count": 2,
"least_frequent": "rockland square",
"least_frequent_count": 1,
"top_5_values": {
"d villas": 2,
"samana resorts": 1,
"reef 996": 1,
"marina place 1": 1,
"paradise one": 1
}
},
"string_length": {
"min": 4,
"max": 65,
"mean": 18.66764705882353,
"median": 16.0
}
},
"developer_number": {
"dtype": "object",
"non_null_count": 341,
"null_count": 14,
"null_percentage": 3.94,
"unique_values": 216,
"unique_percentage": 63.34,
"categorical_info": {
"most_frequent": "1002",
"most_frequent_count": 17,
"least_frequent": "2233",
"least_frequent_count": 1,
"top_5_values": {
"1002": 17,
"1866": 10,
"2035": 9,
"1051": 7,
"1598": 7
}
},
"string_length": {
"min": 1,
"max": 13,
"mean": 3.9237536656891496,
"median": 4.0
}
},
"developer_en": {
"dtype": "object",
"non_null_count": 341,
"null_count": 14,
"null_percentage": 3.94,
"unique_values": 215,
"unique_percentage": 63.05,
"categorical_info": {
"most_frequent": "azizi developments l.l.c",
"most_frequent_count": 17,
"least_frequent": "cds real estate development l.l.c",
"least_frequent_count": 1,
"top_5_values": {
"azizi developments l.l.c": 17,
"damac mry investment l.l.c": 10,
"damac elite investment co. l.l.c": 9,
"binghatti developers fze": 7,
"samana signature real estate developments l.l.c": 7
}
},
"string_length": {
"min": 1,
"max": 80,
"mean": 33.88563049853372,
"median": 34.0
}
},
"start_date": {
"dtype": "object",
"non_null_count": 341,
"null_count": 14,
"null_percentage": 3.94,
"unique_values": 91,
"unique_percentage": 26.69,
"categorical_info": {
"most_frequent": "2025-06-01 00:00:00",
"most_frequent_count": 29,
"least_frequent": "2025-09-22 00:00:00",
"least_frequent_count": 1,
"top_5_values": {
"2025-06-01 00:00:00": 29,
"2025-08-01 00:00:00": 26,
"2025-09-01 00:00:00": 26,
"2025-10-01 00:00:00": 25,
"2025-07-01 00:00:00": 22
}
},
"string_length": {
"min": 1,
"max": 147,
"mean": 19.21700879765396,
"median": 19.0
}
},
"end_date": {
"dtype": "object",
"non_null_count": 340,
"null_count": 15,
"null_percentage": 4.23,
"unique_values": 145,
"unique_percentage": 42.65,
"categorical_info": {
"most_frequent": "2028-12-31 00:00:00",
"most_frequent_count": 27,
"least_frequent": "2027-03-22 00:00:00",
"least_frequent_count": 1,
"top_5_values": {
"2028-12-31 00:00:00": 27,
"2028-06-30 00:00:00": 13,
"2027-09-30 00:00:00": 12,
"2027-12-31 00:00:00": 12,
"2029-03-31 00:00:00": 11
}
},
"string_length": {
"min": 1,
"max": 24,
"mean": 18.86764705882353,
"median": 19.0
}
},
"adoption_date": {
"dtype": "object",
"non_null_count": 254,
"null_count": 101,
"null_percentage": 28.45,
"unique_values": 247,
"unique_percentage": 97.24,
"categorical_info": {
"most_frequent": "2025-05-12 09:10:22",
"most_frequent_count": 4,
"least_frequent": "2025-10-07 13:40:37",
"least_frequent_count": 1,
"top_5_values": {
"2025-05-12 09:10:22": 4,
"2025-05-02 10:08:15": 2,
"2025-05-07 11:23:23": 2,
"2025-04-03 00:00:00": 2,
"2025-05-05 00:00:00": 2
}
},
"string_length": {
"min": 19,
"max": 23,
"mean": 19.015748031496063,
"median": 19.0
}
},
"prj_type_en": {
"dtype": "object",
"non_null_count": 337,
"null_count": 18,
"null_percentage": 5.07,
"unique_values": 1,
"unique_percentage": 0.3,
"categorical_info": {
"most_frequent": "normal",
"most_frequent_count": 337,
"least_frequent": "normal",
"least_frequent_count": 337,
"top_5_values": {
"normal": 337
}
},
"string_length": {
"min": 6,
"max": 6,
"mean": 6.0,
"median": 6.0
}
},
"project_value": {
"dtype": "float64",
"non_null_count": 336,
"null_count": 19,
"null_percentage": 5.35,
"unique_values": 241,
"unique_percentage": 71.73,
"statistics": {
"mean": 194081395.3690476,
"median": 79570929.5,
"std": 350877262.29434377,
"min": 1000000.0,
"max": 3033696240.0,
"q25": 39500000.0,
"q75": 188019286.75,
"skewness": 4.632742026266175,
"kurtosis": 27.186612055159774
},
"outliers": {
"count": 41,
"percentage": 11.55
}
},
"escrow_account_number": {
"dtype": "object",
"non_null_count": 266,
"null_count": 89,
"null_percentage": 25.07,
"unique_values": 265,
"unique_percentage": 99.62,
"categorical_info": {
"most_frequent": "14041024920004",
"most_frequent_count": 2,
"least_frequent": "0205902532504",
"least_frequent_count": 1,
"top_5_values": {
"14041024920004": 2,
"012802437844": 1,
"0012405538002": 1,
"001582146524301": 1,
"0012315743002": 1
}
},
"string_length": {
"min": 8,
"max": 22,
"mean": 13.041353383458647,
"median": 13.0
}
},
"project_status": {
"dtype": "object",
"non_null_count": 337,
"null_count": 18,
"null_percentage": 5.07,
"unique_values": 3,
"unique_percentage": 0.89,
"categorical_info": {
"most_frequent": "active",
"most_frequent_count": 250,
"least_frequent": "cancelled",
"least_frequent_count": 2,
"top_5_values": {
"active": 250,
"pending": 85,
"cancelled": 2
}
},
"string_length": {
"min": 6,
"max": 9,
"mean": 6.270029673590504,
"median": 6.0
}
},
"percent_completed": {
"dtype": "float64",
"non_null_count": 313,
"null_count": 42,
"null_percentage": 11.83,
"unique_values": 52,
"unique_percentage": 16.61,
"statistics": {
"mean": 1.9477635782747607,
"median": 0.0,
"std": 8.867299446255698,
"min": 0.0,
"max": 68.05,
"q25": 0.0,
"q75": 0.0,
"skewness": 5.342075911424932,
"kurtosis": 29.34031228392463
},
"outliers": {
"count": 55,
"percentage": 15.49
}
},
"inspection_date": {
"dtype": "object",
"non_null_count": 314,
"null_count": 41,
"null_percentage": 11.55,
"unique_values": 313,
"unique_percentage": 99.68,
"categorical_info": {
"most_frequent": "2025-02-27 19:48:36",
"most_frequent_count": 2,
"least_frequent": "2024-10-16 13:10:52",
"least_frequent_count": 1,
"top_5_values": {
"2025-02-27 19:48:36": 2,
"2025-10-13 18:21:58": 1,
"2019-05-16 12:18:08": 1,
"2025-04-24 16:25:26": 1,
"2024-04-03 15:20:25": 1
}
},
"string_length": {
"min": 19,
"max": 47,
"mean": 19.089171974522294,
"median": 19.0
}
},
"completion_date": {
"dtype": "object",
"non_null_count": 2,
"null_count": 353,
"null_percentage": 99.44,
"unique_values": 2,
"unique_percentage": 100.0,
"categorical_info": {
"most_frequent": "2011-05-30 00:00:00",
"most_frequent_count": 1,
"least_frequent": "co-working spaces and a kids club. facilities in the complex open to members of the public include a selection of fine dining outlets and retail units on the podium level",
"least_frequent_count": 1,
"top_5_values": {
"2011-05-30 00:00:00": 1,
"co-working spaces and a kids club. facilities in the complex open to members of the public include a selection of fine dining outlets and retail units on the podium level": 1
}
},
"string_length": {
"min": 19,
"max": 170,
"mean": 94.5,
"median": 94.5
}
},
"description_en": {
"dtype": "object",
"non_null_count": 337,
"null_count": 18,
"null_percentage": 5.07,
"unique_values": 319,
"unique_percentage": 94.66,
"categorical_info": {
"most_frequent": "project consists of g+3p+14 residential building located in dubai investment park second, dubai. the building is designed to have a concrete structure and block with internal and external finishes.",
"most_frequent_count": 4,
"least_frequent": "b+g+3p+17",
"least_frequent_count": 1,
"top_5_values": {
"project consists of g+3p+14 residential building located in dubai investment park second, dubai. the building is designed to have a concrete structure and block with internal and external finishes.": 4,
"g+2p+12+roof": 3,
"project consists of g+2p+8 residential building located in dubai investment park second, dubai. the building is designed to have a concrete structure and block with internal and external finishes.": 3,
"3b + ground + m + 3ba + 16 + r": 3,
"g+4+roof": 2
}
},
"string_length": {
"min": 1,
"max": 783,
"mean": 92.31750741839762,
"median": 46.0
}
},
"area_en": {
"dtype": "object",
"non_null_count": 337,
"null_count": 18,
"null_percentage": 5.07,
"unique_values": 54,
"unique_percentage": 16.02,
"categorical_info": {
"most_frequent": "palm deira",
"most_frequent_count": 46,
"least_frequent": "al hebiah second",
"least_frequent_count": 1,
"top_5_values": {
"palm deira": 46,
"madinat al mataar": 26,
"wadi al safa 5": 24,
"wadi al safa 3": 24,
"al barsha south fourth": 24
}
},
"string_length": {
"min": 6,
"max": 130,
"mean": 16.13946587537092,
"median": 14.0
}
},
"zone_en": {
"dtype": "object",
"non_null_count": 239,
"null_count": 116,
"null_percentage": 32.68,
"unique_values": 8,
"unique_percentage": 3.35,
"categorical_info": {
"most_frequent": "dubai development authority (dda)",
"most_frequent_count": 98,
"least_frequent": "which is described as follows:",
"least_frequent_count": 1,
"top_5_values": {
"dubai development authority (dda)": 98,
"trakhees": 83,
"dubai municipality": 41,
"dubai south": 11,
"dubai silicon oasis authority": 3
}
},
"string_length": {
"min": 8,
"max": 60,
"mean": 20.824267782426777,
"median": 18.0
}
},
"cnt_land": {
"dtype": "float64",
"non_null_count": 332,
"null_count": 23,
"null_percentage": 6.48,
"unique_values": 7,
"unique_percentage": 2.11,
"statistics": {
"mean": 0.2921686746987952,
"median": 0.0,
"std": 1.714219576647455,
"min": 0.0,
"max": 24.0,
"q25": 0.0,
"q75": 0.0,
"skewness": 10.820064131532215,
"kurtosis": 131.26326033728665
},
"outliers": {
"count": 44,
"percentage": 12.39
}
},
"cnt_building": {
"dtype": "float64",
"non_null_count": 332,
"null_count": 23,
"null_percentage": 6.48,
"unique_values": 1,
"unique_percentage": 0.3,
"statistics": {
"mean": 0.0,
"median": 0.0,
"std": 0.0,
"min": 0.0,
"max": 0.0,
"q25": 0.0,
"q75": 0.0,
"skewness": 0.0,
"kurtosis": 0.0
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"cnt_villa": {
"dtype": "float64",
"non_null_count": 332,
"null_count": 23,
"null_percentage": 6.48,
"unique_values": 29,
"unique_percentage": 8.73,
"statistics": {
"mean": 22.087349397590362,
"median": 0.0,
"std": 95.07676127394544,
"min": 0.0,
"max": 1121.0,
"q25": 0.0,
"q75": 0.0,
"skewness": 6.655005677088903,
"kurtosis": 59.90362806054476
},
"outliers": {
"count": 31,
"percentage": 8.73
}
},
"cnt_unit": {
"dtype": "float64",
"non_null_count": 332,
"null_count": 23,
"null_percentage": 6.48,
"unique_values": 191,
"unique_percentage": 57.53,
"statistics": {
"mean": 234.24698795180723,
"median": 116.0,
"std": 445.42724575376207,
"min": 0.0,
"max": 4526.0,
"q25": 47.0,
"q75": 269.75,
"skewness": 6.465435974535892,
"kurtosis": 54.86574287118425
},
"outliers": {
"count": 26,
"percentage": 7.32
}
},
"master_project_en": {
"dtype": "object",
"non_null_count": 2,
"null_count": 353,
"null_percentage": 99.44,
"unique_values": 2,
"unique_percentage": 100.0,
"categorical_info": {
"most_frequent": "maison elysee iii by pantheon",
"most_frequent_count": 1,
"least_frequent": "mohammed bin rashid al maktoum city -district one west - phase 2",
"least_frequent_count": 1,
"top_5_values": {
"maison elysee iii by pantheon": 1,
"mohammed bin rashid al maktoum city -district one west - phase 2": 1
}
},
"string_length": {
"min": 29,
"max": 64,
"mean": 46.5,
"median": 46.5
}
}
},
"data_quality": {
"total_missing_values": 1350,
"missing_percentage": 17.29,
"columns_with_missing": {
"project_en": 15,
"developer_number": 14,
"developer_en": 14,
"start_date": 14,
"end_date": 15,
"adoption_date": 101,
"prj_type_en": 18,
"project_value": 19,
"escrow_account_number": 89,
"project_status": 18,
"percent_completed": 42,
"inspection_date": 41,
"completion_date": 353,
"description_en": 18,
"area_en": 18,
"zone_en": 116,
"cnt_land": 23,
"cnt_building": 23,
"cnt_villa": 23,
"cnt_unit": 23,
"master_project_en": 353
},
"missing_percentage_by_column": {
"project_en": 4.23,
"developer_number": 3.94,
"developer_en": 3.94,
"start_date": 3.94,
"end_date": 4.23,
"adoption_date": 28.45,
"prj_type_en": 5.07,
"project_value": 5.35,
"escrow_account_number": 25.07,
"project_status": 5.07,
"percent_completed": 11.83,
"inspection_date": 11.55,
"completion_date": 99.44,
"description_en": 5.07,
"area_en": 5.07,
"zone_en": 32.68,
"cnt_land": 6.48,
"cnt_building": 6.48,
"cnt_villa": 6.48,
"cnt_unit": 6.48,
"master_project_en": 99.44
},
"duplicate_rows": 0,
"duplicate_percentage": 0.0
},
"statistical_summary": {
"numeric_columns_count": 6,
"categorical_columns_count": 16,
"datetime_columns_count": 0,
"data_types_distribution": {
"object": 16,
"float64": 6
},
"potential_identifier_columns": []
}
}