dld_backend/brokers_cleaned.json
2025-10-30 12:13:02 +05:30

327 lines
8.6 KiB
JSON

{
"file_info": {
"file_path": "./brokers_cleaned.csv",
"total_rows": 37332,
"total_columns": 10,
"memory_usage_mb": 16.050750732421875,
"total_cells": 373320
},
"structural_info": {
"column_names": [
"broker_number",
"broker_en",
"gender_en",
"license_start_date",
"license_end_date",
"webpage",
"phone",
"fax",
"real_estate_number",
"real_estate_en"
],
"dtypes": {
"broker_number": "int64",
"broker_en": "object",
"gender_en": "object",
"license_start_date": "object",
"license_end_date": "object",
"webpage": "float64",
"phone": "object",
"fax": "object",
"real_estate_number": "int64",
"real_estate_en": "object"
},
"index_info": {
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
"is_unique": true,
"has_duplicates": "False"
}
},
"columns_analysis": {
"broker_number": {
"dtype": "int64",
"non_null_count": 37332,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 37332,
"unique_percentage": 100.0,
"statistics": {
"mean": 65268.60639665702,
"median": 68863.5,
"std": 18225.086347215525,
"min": 10.0,
"max": 99999.0,
"q25": 53626.75,
"q75": 79976.25,
"skewness": -0.906619112625021,
"kurtosis": 0.5830411491701528
},
"outliers": {
"count": 665,
"percentage": 1.78
}
},
"broker_en": {
"dtype": "object",
"non_null_count": 37331,
"null_count": 1,
"null_percentage": 0.0,
"unique_values": 37219,
"unique_percentage": 99.7,
"categorical_info": {
"most_frequent": "ahmed mahmoud abdelrahman bassyoni",
"most_frequent_count": 5,
"least_frequent": "sally nehad khouly",
"least_frequent_count": 1,
"top_5_values": {
"ahmed mahmoud abdelrahman bassyoni": 5,
"samia metahri": 4,
"hesham ibrahim mohamed saber ibrahim": 3,
"usman liaqat liaqat ali": 3,
"bethelhem seyoum gudisa": 2
}
},
"string_length": {
"min": 3,
"max": 82,
"mean": 25.398757065173715,
"median": 24.0
}
},
"gender_en": {
"dtype": "object",
"non_null_count": 37332,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2,
"unique_percentage": 0.01,
"categorical_info": {
"most_frequent": "male",
"most_frequent_count": 24698,
"least_frequent": "female",
"least_frequent_count": 12634,
"top_5_values": {
"male": 24698,
"female": 12634
}
},
"string_length": {
"min": 4,
"max": 6,
"mean": 4.67684560162863,
"median": 4.0
}
},
"license_start_date": {
"dtype": "object",
"non_null_count": 37327,
"null_count": 5,
"null_percentage": 0.01,
"unique_values": 36292,
"unique_percentage": 97.23,
"categorical_info": {
"most_frequent": "2013-04-17 00:00:00",
"most_frequent_count": 10,
"least_frequent": "2015-07-01 00:00:00",
"least_frequent_count": 1,
"top_5_values": {
"2013-04-17 00:00:00": 10,
"2022-04-21 00:00:00": 8,
"2012-03-18 00:00:00": 8,
"2023-11-02 00:00:00": 8,
"2024-01-24 00:00:00": 7
}
},
"string_length": {
"min": 19,
"max": 19,
"mean": 19.0,
"median": 19.0
}
},
"license_end_date": {
"dtype": "object",
"non_null_count": 37329,
"null_count": 3,
"null_percentage": 0.01,
"unique_values": 5540,
"unique_percentage": 14.84,
"categorical_info": {
"most_frequent": "2026-01-05 00:00:00",
"most_frequent_count": 428,
"least_frequent": "2026-10-03 14:38:07",
"least_frequent_count": 1,
"top_5_values": {
"2026-01-05 00:00:00": 428,
"2025-11-21 00:00:00": 351,
"2026-06-24 00:00:00": 290,
"2026-04-22 00:00:00": 257,
"2025-12-22 00:00:00": 253
}
},
"string_length": {
"min": 19,
"max": 19,
"mean": 19.0,
"median": 19.0
}
},
"webpage": {
"dtype": "float64",
"non_null_count": 0,
"null_count": 37332,
"null_percentage": 100.0,
"unique_values": 0,
"unique_percentage": 0,
"statistics": {
"mean": null,
"median": null,
"std": null,
"min": null,
"max": null,
"q25": null,
"q75": null,
"skewness": null,
"kurtosis": null
}
},
"phone": {
"dtype": "object",
"non_null_count": 22775,
"null_count": 14557,
"null_percentage": 38.99,
"unique_values": 13873,
"unique_percentage": 60.91,
"categorical_info": {
"most_frequent": "044297040",
"most_frequent_count": 262,
"least_frequent": "971|506837100",
"least_frequent_count": 1,
"top_5_values": {
"044297040": 262,
"040000000": 245,
"048762333": 240,
"043233609": 218,
"044223500": 175
}
},
"string_length": {
"min": 1,
"max": 15,
"mean": 10.174577387486279,
"median": 9.0
}
},
"fax": {
"dtype": "object",
"non_null_count": 4387,
"null_count": 32945,
"null_percentage": 88.25,
"unique_values": 1855,
"unique_percentage": 42.28,
"categorical_info": {
"most_frequent": "044279980",
"most_frequent_count": 147,
"least_frequent": "0586370773",
"least_frequent_count": 1,
"top_5_values": {
"044279980": 147,
"0": 99,
"04305291": 90,
"044294400": 74,
"044550200": 67
}
},
"string_length": {
"min": 1,
"max": 28,
"mean": 8.694324139503077,
"median": 9.0
}
},
"real_estate_number": {
"dtype": "int64",
"non_null_count": 37332,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 11432,
"unique_percentage": 30.62,
"statistics": {
"mean": 27756.587592414016,
"median": 29313.0,
"std": 15030.301981507731,
"min": 3.0,
"max": 56170.0,
"q25": 16908.75,
"q75": 39379.0,
"skewness": -0.302721171881687,
"kurtosis": -0.8117807547155373
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"real_estate_en": {
"dtype": "object",
"non_null_count": 37332,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 11397,
"unique_percentage": 30.53,
"categorical_info": {
"most_frequent": "f a m real estate broker l.l.c (branch)",
"most_frequent_count": 385,
"least_frequent": "majid sultan real estate brokerage",
"least_frequent_count": 1,
"top_5_values": {
"f a m real estate broker l.l.c (branch)": 385,
"white and white real estate l.l.c": 374,
"driven properties l.l.c (branch)": 342,
"harbor real estate broker l.l.c": 315,
"metropolitan premium properties l.l.c": 292
}
},
"string_length": {
"min": 6,
"max": 112,
"mean": 30.162434372656165,
"median": 29.0
}
}
},
"data_quality": {
"total_missing_values": 84843,
"missing_percentage": 22.73,
"columns_with_missing": {
"broker_en": 1,
"license_start_date": 5,
"license_end_date": 3,
"webpage": 37332,
"phone": 14557,
"fax": 32945
},
"missing_percentage_by_column": {
"license_start_date": 0.01,
"license_end_date": 0.01,
"webpage": 100.0,
"phone": 38.99,
"fax": 88.25
},
"duplicate_rows": 0,
"duplicate_percentage": 0.0
},
"statistical_summary": {
"numeric_columns_count": 3,
"categorical_columns_count": 7,
"datetime_columns_count": 0,
"data_types_distribution": {
"object": 7,
"int64": 2,
"float64": 1
},
"potential_identifier_columns": [
"broker_number"
]
}
}