dld_backend/transactions_cleaned.json
2025-10-30 12:13:02 +05:30

662 lines
18 KiB
JSON

{
"file_info": {
"file_path": "./transactions_cleaned.csv",
"total_rows": 218518,
"total_columns": 22,
"memory_usage_mb": 197.48046779632568,
"total_cells": 4807396
},
"structural_info": {
"column_names": [
"transaction_number",
"instance_date",
"group_en",
"procedure_en",
"is_offplan_en",
"is_free_hold_en",
"usage_en",
"area_en",
"prop_type_en",
"prop_sb_type_en",
"trans_value",
"procedure_area",
"actual_area",
"rooms_en",
"parking",
"nearest_metro_en",
"nearest_mall_en",
"nearest_landmark_en",
"total_buyer",
"total_seller",
"master_project_en",
"project_en"
],
"dtypes": {
"transaction_number": "object",
"instance_date": "object",
"group_en": "object",
"procedure_en": "object",
"is_offplan_en": "object",
"is_free_hold_en": "object",
"usage_en": "object",
"area_en": "object",
"prop_type_en": "object",
"prop_sb_type_en": "object",
"trans_value": "float64",
"procedure_area": "float64",
"actual_area": "float64",
"rooms_en": "object",
"parking": "float64",
"nearest_metro_en": "object",
"nearest_mall_en": "object",
"nearest_landmark_en": "object",
"total_buyer": "int64",
"total_seller": "int64",
"master_project_en": "object",
"project_en": "object"
},
"index_info": {
"type": "<class 'pandas.core.indexes.range.RangeIndex'>",
"is_unique": true,
"has_duplicates": "False"
}
},
"columns_analysis": {
"transaction_number": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 211404,
"unique_percentage": 96.74,
"categorical_info": {
"most_frequent": "43-202-2025",
"most_frequent_count": 311,
"least_frequent": "41-13774-2025",
"least_frequent_count": 1,
"top_5_values": {
"43-202-2025": 311,
"49-57-2025": 273,
"43-21-2025": 217,
"43-234-2025": 190,
"49-83-2025": 189
}
},
"string_length": {
"min": 8,
"max": 15,
"mean": 13.11555112164673,
"median": 13.0
}
},
"instance_date": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 208067,
"unique_percentage": 95.22,
"categorical_info": {
"most_frequent": "2025-07-10 17:03:09",
"most_frequent_count": 311,
"least_frequent": "2025-04-30 17:48:48",
"least_frequent_count": 1,
"top_5_values": {
"2025-07-10 17:03:09": 311,
"2025-05-30 14:37:12": 273,
"2025-01-29 15:49:05": 217,
"2025-08-07 12:45:12": 190,
"2025-08-18 16:12:54": 189
}
},
"string_length": {
"min": 19,
"max": 19,
"mean": 19.0,
"median": 19.0
}
},
"group_en": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 3,
"unique_percentage": 0.0,
"categorical_info": {
"most_frequent": "sales",
"most_frequent_count": 170352,
"least_frequent": "gifts",
"least_frequent_count": 7735,
"top_5_values": {
"sales": 170352,
"mortgage": 40431,
"gifts": 7735
}
},
"string_length": {
"min": 5,
"max": 8,
"mean": 5.555070978134524,
"median": 5.0
}
},
"procedure_en": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 37,
"unique_percentage": 0.02,
"categorical_info": {
"most_frequent": "sell - pre registration",
"most_frequent_count": 103781,
"least_frequent": "modify delayed mortgage",
"least_frequent_count": 3,
"top_5_values": {
"sell - pre registration": 103781,
"sale": 42941,
"mortgage registration": 26972,
"delayed sell": 20249,
"grant": 6321
}
},
"string_length": {
"min": 4,
"max": 48,
"mean": 17.5632304890215,
"median": 23.0
}
},
"is_offplan_en": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2,
"unique_percentage": 0.0,
"categorical_info": {
"most_frequent": "ready",
"most_frequent_count": 113341,
"least_frequent": "off-plan",
"least_frequent_count": 105177,
"top_5_values": {
"ready": 113341,
"off-plan": 105177
}
},
"string_length": {
"min": 5,
"max": 8,
"mean": 6.443958850071848,
"median": 5.0
}
},
"is_free_hold_en": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2,
"unique_percentage": 0.0,
"categorical_info": {
"most_frequent": "free hold",
"most_frequent_count": 209097,
"least_frequent": "non free hold",
"least_frequent_count": 9421,
"top_5_values": {
"free hold": 209097,
"non free hold": 9421
}
},
"string_length": {
"min": 9,
"max": 13,
"mean": 9.17245261259942,
"median": 9.0
}
},
"usage_en": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 2,
"unique_percentage": 0.0,
"categorical_info": {
"most_frequent": "residential",
"most_frequent_count": 213004,
"least_frequent": "commercial",
"least_frequent_count": 5514,
"top_5_values": {
"residential": 213004,
"commercial": 5514
}
},
"string_length": {
"min": 10,
"max": 11,
"mean": 10.974766380801581,
"median": 11.0
}
},
"area_en": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 268,
"unique_percentage": 0.12,
"categorical_info": {
"most_frequent": "jumeirah village circle",
"most_frequent_count": 18939,
"least_frequent": "dubai international airport",
"least_frequent_count": 1,
"top_5_values": {
"jumeirah village circle": 18939,
"business bay": 14511,
"dubai land residence complex": 7341,
"al yelayiss 1": 5812,
"madinat al mataar": 5787
}
},
"string_length": {
"min": 4,
"max": 33,
"mean": 15.837455038028903,
"median": 14.0
}
},
"prop_type_en": {
"dtype": "object",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 3,
"unique_percentage": 0.0,
"categorical_info": {
"most_frequent": "unit",
"most_frequent_count": 169915,
"least_frequent": "building",
"least_frequent_count": 15967,
"top_5_values": {
"unit": 169915,
"land": 32636,
"building": 15967
}
},
"string_length": {
"min": 4,
"max": 8,
"mean": 4.292277981676567,
"median": 4.0
}
},
"prop_sb_type_en": {
"dtype": "object",
"non_null_count": 208951,
"null_count": 9567,
"null_percentage": 4.38,
"unique_values": 42,
"unique_percentage": 0.02,
"categorical_info": {
"most_frequent": "flat",
"most_frequent_count": 158612,
"least_frequent": "health club",
"least_frequent_count": 1,
"top_5_values": {
"flat": 158612,
"villa": 16313,
"residential": 13429,
"commercial": 5148,
"office": 5020
}
},
"string_length": {
"min": 4,
"max": 50,
"mean": 5.052806638877057,
"median": 4.0
}
},
"trans_value": {
"dtype": "float64",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 73759,
"unique_percentage": 33.75,
"statistics": {
"mean": 3283859.255751517,
"median": 1500000.0,
"std": 15313415.930326678,
"min": 10.77,
"max": 1578530000.0,
"q25": 910000.0,
"q75": 2750000.0,
"skewness": 46.95032216760977,
"kurtosis": 3329.9980178193723
},
"outliers": {
"count": 19301,
"percentage": 8.83
}
},
"procedure_area": {
"dtype": "float64",
"non_null_count": 218466,
"null_count": 52,
"null_percentage": 0.02,
"unique_values": 35074,
"unique_percentage": 16.05,
"statistics": {
"mean": 261.3489334267117,
"median": 90.36,
"std": 5415.7054677162405,
"min": 0.0,
"max": 1715752.11,
"q25": 64.22,
"q75": 152.31,
"skewness": 222.57109352903205,
"kurtosis": 61279.78606538791
},
"outliers": {
"count": 26361,
"percentage": 12.06
}
},
"actual_area": {
"dtype": "float64",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 34584,
"unique_percentage": 15.83,
"statistics": {
"mean": 262.78477356556436,
"median": 91.08,
"std": 5382.863713033483,
"min": 5.17,
"max": 1715752.11,
"q25": 65.06,
"q75": 153.48,
"skewness": 225.535006380266,
"kurtosis": 62535.69611765087
},
"outliers": {
"count": 26523,
"percentage": 12.14
}
},
"rooms_en": {
"dtype": "object",
"non_null_count": 183145,
"null_count": 35373,
"null_percentage": 16.19,
"unique_values": 14,
"unique_percentage": 0.01,
"categorical_info": {
"most_frequent": "1 b/r",
"most_frequent_count": 71525,
"least_frequent": "10 b/r",
"least_frequent_count": 1,
"top_5_values": {
"1 b/r": 71525,
"2 b/r": 42780,
"studio": 37722,
"3 b/r": 18375,
"4 b/r": 7022
}
},
"string_length": {
"min": 3,
"max": 11,
"mean": 5.224144803297933,
"median": 5.0
}
},
"parking": {
"dtype": "float64",
"non_null_count": 95220,
"null_count": 123298,
"null_percentage": 56.42,
"unique_values": 558,
"unique_percentage": 0.59,
"statistics": {
"mean": 22.358372916115005,
"median": 1.0,
"std": 1592.0959438077602,
"min": -1.0,
"max": 297298.0,
"q25": 1.0,
"q75": 1.0,
"skewness": 144.8271736848027,
"kurtosis": 24036.929143027846
},
"outliers": {
"count": 7633,
"percentage": 3.49
}
},
"nearest_metro_en": {
"dtype": "object",
"non_null_count": 120617,
"null_count": 97901,
"null_percentage": 44.8,
"unique_values": 55,
"unique_percentage": 0.05,
"categorical_info": {
"most_frequent": "dubai internet city",
"most_frequent_count": 16919,
"least_frequent": "ggico metro station",
"least_frequent_count": 1,
"top_5_values": {
"dubai internet city": 16919,
"business bay metro station": 13248,
"nakheel metro station": 11501,
"buj khalifa dubai mall metro station": 10814,
"sharaf dg metro station": 7911
}
},
"string_length": {
"min": 9,
"max": 36,
"mean": 23.059717950206025,
"median": 23.0
}
},
"nearest_mall_en": {
"dtype": "object",
"non_null_count": 118121,
"null_count": 100397,
"null_percentage": 45.94,
"unique_values": 5,
"unique_percentage": 0.0,
"categorical_info": {
"most_frequent": "marina mall",
"most_frequent_count": 38565,
"least_frequent": "city centre mirdif",
"least_frequent_count": 8743,
"top_5_values": {
"marina mall": 38565,
"dubai mall": 32510,
"mall of the emirates": 27759,
"ibn-e-battuta mall": 10544,
"city centre mirdif": 8743
}
},
"string_length": {
"min": 10,
"max": 20,
"mean": 13.982788835177487,
"median": 11.0
}
},
"nearest_landmark_en": {
"dtype": "object",
"non_null_count": 145904,
"null_count": 72614,
"null_percentage": 33.23,
"unique_values": 14,
"unique_percentage": 0.01,
"categorical_info": {
"most_frequent": "sports city swimming academy",
"most_frequent_count": 42232,
"least_frequent": "jabel ali",
"least_frequent_count": 3,
"top_5_values": {
"sports city swimming academy": 42232,
"downtown dubai": 22411,
"motor city": 18649,
"img world adventures": 18410,
"burj al arab": 14198
}
},
"string_length": {
"min": 9,
"max": 33,
"mean": 18.984201941002304,
"median": 20.0
}
},
"total_buyer": {
"dtype": "int64",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 1,
"unique_percentage": 0.0,
"statistics": {
"mean": 0.0,
"median": 0.0,
"std": 0.0,
"min": 0.0,
"max": 0.0,
"q25": 0.0,
"q75": 0.0,
"skewness": 0.0,
"kurtosis": 0.0
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"total_seller": {
"dtype": "int64",
"non_null_count": 218518,
"null_count": 0,
"null_percentage": 0.0,
"unique_values": 1,
"unique_percentage": 0.0,
"statistics": {
"mean": 0.0,
"median": 0.0,
"std": 0.0,
"min": 0.0,
"max": 0.0,
"q25": 0.0,
"q75": 0.0,
"skewness": 0.0,
"kurtosis": 0.0
},
"outliers": {
"count": 0,
"percentage": 0.0
}
},
"master_project_en": {
"dtype": "object",
"non_null_count": 502,
"null_count": 218016,
"null_percentage": 99.77,
"unique_values": 8,
"unique_percentage": 1.59,
"categorical_info": {
"most_frequent": "maison elysee ii",
"most_frequent_count": 171,
"least_frequent": "hills park",
"least_frequent_count": 6,
"top_5_values": {
"maison elysee ii": 171,
"elysee heights": 153,
"jebel ali village townhouses- phase 1": 45,
"remraam - al ramth": 45,
"maison elysee iii by pantheon": 37
}
},
"string_length": {
"min": 6,
"max": 37,
"mean": 17.44223107569721,
"median": 16.0
}
},
"project_en": {
"dtype": "object",
"non_null_count": 187284,
"null_count": 31234,
"null_percentage": 14.29,
"unique_values": 2690,
"unique_percentage": 1.44,
"categorical_info": {
"most_frequent": "binghatti skyrise",
"most_frequent_count": 2438,
"least_frequent": "sondos rosa",
"least_frequent_count": 1,
"top_5_values": {
"binghatti skyrise": 2438,
"sobha solis": 1687,
"binghatti elite": 1640,
"skyvue": 1480,
"sobha orbis": 1383
}
},
"string_length": {
"min": 2,
"max": 77,
"mean": 17.992311142436087,
"median": 16.0
}
}
},
"data_quality": {
"total_missing_values": 688452,
"missing_percentage": 14.32,
"columns_with_missing": {
"prop_sb_type_en": 9567,
"procedure_area": 52,
"rooms_en": 35373,
"parking": 123298,
"nearest_metro_en": 97901,
"nearest_mall_en": 100397,
"nearest_landmark_en": 72614,
"master_project_en": 218016,
"project_en": 31234
},
"missing_percentage_by_column": {
"prop_sb_type_en": 4.38,
"procedure_area": 0.02,
"rooms_en": 16.19,
"parking": 56.42,
"nearest_metro_en": 44.8,
"nearest_mall_en": 45.94,
"nearest_landmark_en": 33.23,
"master_project_en": 99.77,
"project_en": 14.29
},
"duplicate_rows": 2704,
"duplicate_percentage": 1.24
},
"statistical_summary": {
"numeric_columns_count": 6,
"categorical_columns_count": 16,
"datetime_columns_count": 0,
"data_types_distribution": {
"object": 16,
"float64": 4,
"int64": 2
},
"potential_identifier_columns": []
}
}