327 lines
8.6 KiB
JSON
327 lines
8.6 KiB
JSON
{
|
|
"file_info": {
|
|
"file_path": "./brokers_cleaned.csv",
|
|
"total_rows": 37332,
|
|
"total_columns": 10,
|
|
"memory_usage_mb": 16.050750732421875,
|
|
"total_cells": 373320
|
|
},
|
|
"structural_info": {
|
|
"column_names": [
|
|
"broker_number",
|
|
"broker_en",
|
|
"gender_en",
|
|
"license_start_date",
|
|
"license_end_date",
|
|
"webpage",
|
|
"phone",
|
|
"fax",
|
|
"real_estate_number",
|
|
"real_estate_en"
|
|
],
|
|
"dtypes": {
|
|
"broker_number": "int64",
|
|
"broker_en": "object",
|
|
"gender_en": "object",
|
|
"license_start_date": "object",
|
|
"license_end_date": "object",
|
|
"webpage": "float64",
|
|
"phone": "object",
|
|
"fax": "object",
|
|
"real_estate_number": "int64",
|
|
"real_estate_en": "object"
|
|
},
|
|
"index_info": {
|
|
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
|
|
"is_unique": true,
|
|
"has_duplicates": "False"
|
|
}
|
|
},
|
|
"columns_analysis": {
|
|
"broker_number": {
|
|
"dtype": "int64",
|
|
"non_null_count": 37332,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 37332,
|
|
"unique_percentage": 100.0,
|
|
"statistics": {
|
|
"mean": 65268.60639665702,
|
|
"median": 68863.5,
|
|
"std": 18225.086347215525,
|
|
"min": 10.0,
|
|
"max": 99999.0,
|
|
"q25": 53626.75,
|
|
"q75": 79976.25,
|
|
"skewness": -0.906619112625021,
|
|
"kurtosis": 0.5830411491701528
|
|
},
|
|
"outliers": {
|
|
"count": 665,
|
|
"percentage": 1.78
|
|
}
|
|
},
|
|
"broker_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 37331,
|
|
"null_count": 1,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 37219,
|
|
"unique_percentage": 99.7,
|
|
"categorical_info": {
|
|
"most_frequent": "ahmed mahmoud abdelrahman bassyoni",
|
|
"most_frequent_count": 5,
|
|
"least_frequent": "sally nehad khouly",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"ahmed mahmoud abdelrahman bassyoni": 5,
|
|
"samia metahri": 4,
|
|
"hesham ibrahim mohamed saber ibrahim": 3,
|
|
"usman liaqat liaqat ali": 3,
|
|
"bethelhem seyoum gudisa": 2
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 3,
|
|
"max": 82,
|
|
"mean": 25.398757065173715,
|
|
"median": 24.0
|
|
}
|
|
},
|
|
"gender_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 37332,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 2,
|
|
"unique_percentage": 0.01,
|
|
"categorical_info": {
|
|
"most_frequent": "male",
|
|
"most_frequent_count": 24698,
|
|
"least_frequent": "female",
|
|
"least_frequent_count": 12634,
|
|
"top_5_values": {
|
|
"male": 24698,
|
|
"female": 12634
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 4,
|
|
"max": 6,
|
|
"mean": 4.67684560162863,
|
|
"median": 4.0
|
|
}
|
|
},
|
|
"license_start_date": {
|
|
"dtype": "object",
|
|
"non_null_count": 37327,
|
|
"null_count": 5,
|
|
"null_percentage": 0.01,
|
|
"unique_values": 36292,
|
|
"unique_percentage": 97.23,
|
|
"categorical_info": {
|
|
"most_frequent": "2013-04-17 00:00:00",
|
|
"most_frequent_count": 10,
|
|
"least_frequent": "2015-07-01 00:00:00",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"2013-04-17 00:00:00": 10,
|
|
"2022-04-21 00:00:00": 8,
|
|
"2012-03-18 00:00:00": 8,
|
|
"2023-11-02 00:00:00": 8,
|
|
"2024-01-24 00:00:00": 7
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 19,
|
|
"max": 19,
|
|
"mean": 19.0,
|
|
"median": 19.0
|
|
}
|
|
},
|
|
"license_end_date": {
|
|
"dtype": "object",
|
|
"non_null_count": 37329,
|
|
"null_count": 3,
|
|
"null_percentage": 0.01,
|
|
"unique_values": 5540,
|
|
"unique_percentage": 14.84,
|
|
"categorical_info": {
|
|
"most_frequent": "2026-01-05 00:00:00",
|
|
"most_frequent_count": 428,
|
|
"least_frequent": "2026-10-03 14:38:07",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"2026-01-05 00:00:00": 428,
|
|
"2025-11-21 00:00:00": 351,
|
|
"2026-06-24 00:00:00": 290,
|
|
"2026-04-22 00:00:00": 257,
|
|
"2025-12-22 00:00:00": 253
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 19,
|
|
"max": 19,
|
|
"mean": 19.0,
|
|
"median": 19.0
|
|
}
|
|
},
|
|
"webpage": {
|
|
"dtype": "float64",
|
|
"non_null_count": 0,
|
|
"null_count": 37332,
|
|
"null_percentage": 100.0,
|
|
"unique_values": 0,
|
|
"unique_percentage": 0,
|
|
"statistics": {
|
|
"mean": null,
|
|
"median": null,
|
|
"std": null,
|
|
"min": null,
|
|
"max": null,
|
|
"q25": null,
|
|
"q75": null,
|
|
"skewness": null,
|
|
"kurtosis": null
|
|
}
|
|
},
|
|
"phone": {
|
|
"dtype": "object",
|
|
"non_null_count": 22775,
|
|
"null_count": 14557,
|
|
"null_percentage": 38.99,
|
|
"unique_values": 13873,
|
|
"unique_percentage": 60.91,
|
|
"categorical_info": {
|
|
"most_frequent": "044297040",
|
|
"most_frequent_count": 262,
|
|
"least_frequent": "971|506837100",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"044297040": 262,
|
|
"040000000": 245,
|
|
"048762333": 240,
|
|
"043233609": 218,
|
|
"044223500": 175
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 1,
|
|
"max": 15,
|
|
"mean": 10.174577387486279,
|
|
"median": 9.0
|
|
}
|
|
},
|
|
"fax": {
|
|
"dtype": "object",
|
|
"non_null_count": 4387,
|
|
"null_count": 32945,
|
|
"null_percentage": 88.25,
|
|
"unique_values": 1855,
|
|
"unique_percentage": 42.28,
|
|
"categorical_info": {
|
|
"most_frequent": "044279980",
|
|
"most_frequent_count": 147,
|
|
"least_frequent": "0586370773",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"044279980": 147,
|
|
"0": 99,
|
|
"04305291": 90,
|
|
"044294400": 74,
|
|
"044550200": 67
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 1,
|
|
"max": 28,
|
|
"mean": 8.694324139503077,
|
|
"median": 9.0
|
|
}
|
|
},
|
|
"real_estate_number": {
|
|
"dtype": "int64",
|
|
"non_null_count": 37332,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 11432,
|
|
"unique_percentage": 30.62,
|
|
"statistics": {
|
|
"mean": 27756.587592414016,
|
|
"median": 29313.0,
|
|
"std": 15030.301981507731,
|
|
"min": 3.0,
|
|
"max": 56170.0,
|
|
"q25": 16908.75,
|
|
"q75": 39379.0,
|
|
"skewness": -0.302721171881687,
|
|
"kurtosis": -0.8117807547155373
|
|
},
|
|
"outliers": {
|
|
"count": 0,
|
|
"percentage": 0.0
|
|
}
|
|
},
|
|
"real_estate_en": {
|
|
"dtype": "object",
|
|
"non_null_count": 37332,
|
|
"null_count": 0,
|
|
"null_percentage": 0.0,
|
|
"unique_values": 11397,
|
|
"unique_percentage": 30.53,
|
|
"categorical_info": {
|
|
"most_frequent": "f a m real estate broker l.l.c (branch)",
|
|
"most_frequent_count": 385,
|
|
"least_frequent": "majid sultan real estate brokerage",
|
|
"least_frequent_count": 1,
|
|
"top_5_values": {
|
|
"f a m real estate broker l.l.c (branch)": 385,
|
|
"white and white real estate l.l.c": 374,
|
|
"driven properties l.l.c (branch)": 342,
|
|
"harbor real estate broker l.l.c": 315,
|
|
"metropolitan premium properties l.l.c": 292
|
|
}
|
|
},
|
|
"string_length": {
|
|
"min": 6,
|
|
"max": 112,
|
|
"mean": 30.162434372656165,
|
|
"median": 29.0
|
|
}
|
|
}
|
|
},
|
|
"data_quality": {
|
|
"total_missing_values": 84843,
|
|
"missing_percentage": 22.73,
|
|
"columns_with_missing": {
|
|
"broker_en": 1,
|
|
"license_start_date": 5,
|
|
"license_end_date": 3,
|
|
"webpage": 37332,
|
|
"phone": 14557,
|
|
"fax": 32945
|
|
},
|
|
"missing_percentage_by_column": {
|
|
"license_start_date": 0.01,
|
|
"license_end_date": 0.01,
|
|
"webpage": 100.0,
|
|
"phone": 38.99,
|
|
"fax": 88.25
|
|
},
|
|
"duplicate_rows": 0,
|
|
"duplicate_percentage": 0.0
|
|
},
|
|
"statistical_summary": {
|
|
"numeric_columns_count": 3,
|
|
"categorical_columns_count": 7,
|
|
"datetime_columns_count": 0,
|
|
"data_types_distribution": {
|
|
"object": 7,
|
|
"int64": 2,
|
|
"float64": 1
|
|
},
|
|
"potential_identifier_columns": [
|
|
"broker_number"
|
|
]
|
|
}
|
|
} |