From eca6705b299c5afe2e8ed995529f7afc5559ae0b Mon Sep 17 00:00:00 2001 From: Nemael <100dragons@gmail.com> Date: Sat, 24 Oct 2020 11:14:07 +0200 Subject: Added sanitization of data, puts instead of null or / data. Also strips trailing whitespaces. It only sanitizes phone_number, website and email because they these data are following a format. Name, address, etc... are not sanitized --- doc/import/LocationsParser.rb | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'doc/import') diff --git a/doc/import/LocationsParser.rb b/doc/import/LocationsParser.rb index 3ce79c0..ccb165a 100644 --- a/doc/import/LocationsParser.rb +++ b/doc/import/LocationsParser.rb @@ -10,6 +10,18 @@ class Parser @list_of_locations["type"] = "FeatureCollection" @list_of_locations["features"] = [] end + def sanitize(data) + #Sanitize data. Puts "" instead of null in empty data, and puts "" instead of "/" + if (data == nil) + data = "" + end + data = data.strip + res = data + if (data == "/") + res = "" + end + return res + end def parseDeweyFiles(data_file) #Parses the .json files contained in the Dewey folder data = JSON.parse(File.read(data_file)) @@ -27,9 +39,9 @@ class Parser new_item["properties"]["name"] = data[i]["fields"]["name"] new_item["properties"]["description"] = data[i]["fields"]["comment"] new_item["properties"]["entry_number"] = data[i]["pk"] - new_item["properties"]["phone_number"] = data[i]["fields"]["phone"] - new_item["properties"]["website"] = data[i]["fields"]["web"] - new_item["properties"]["email"] = data[i]["fields"]["email"] + new_item["properties"]["phone_number"] = sanitize(data[i]["fields"]["phone"]) + new_item["properties"]["website"] = sanitize(data[i]["fields"]["web"]) + new_item["properties"]["email"] = sanitize(data[i]["fields"]["email"]) new_item["properties"]["address"] = data[i]["fields"]["address"] new_item["properties"]["city"] = "" #No equivalent in Dewey data new_item["properties"]["postal_code"] = "" #No equivalent in Dewey data @@ -55,9 +67,9 @@ class Parser new_item["properties"]["name"] = data[i]["Denomination_FULL"] new_item["properties"]["description"] = data[i]["description"] new_item["properties"]["entry_number"] = data[i]["NumEntr"] - new_item["properties"]["phone_number"] = coord[i]["Tel"] - new_item["properties"]["website"] = coord[i]["Web"] - new_item["properties"]["email"] = coord[i]["Email"] + new_item["properties"]["phone_number"] = sanitize(coord[i]["Tel"]) + new_item["properties"]["website"] = sanitize(coord[i]["Web"]) + new_item["properties"]["email"] = sanitize(coord[i]["Email"]) new_item["properties"]["address"] = coord[i]["Adresse"] new_item["properties"]["city"] = data[i]["INS_COMMUNE"] new_item["properties"]["postal_code"] = coord[i]["Code postal"] -- cgit v1.2.3