require 'json' require 'csv' require 'rgeo' require 'rgeo-geojson' class Parser def initialize() @list_of_locations = {} @list_of_locations["type"] = "FeatureCollection" @list_of_locations["features"] = [] end def fuse_two_entries(entry_1, entry_2) #Outputs a string that contains the value of the two entries, or only one if they repeat if entry_1 and entry_2 if entry_1 != entry_2 return entry_1 + " ou " + entry_2 else return entry_1 end elsif entry_1 return entry_1 elsif entry_2 return entry_2 else return nil end end def reverse_endianness(string) #Reverse endianness of the received string, by chunk of two hexadecimal characters res = "" while string.length > 0 res += string[-2..-1] string = string[0..-3] end return res end def cut_from_wkb(string) #Removes data that stumped the Rgeo library res = string[0..6] + string[15..-1] return res end def reverse_polygon_endianness(string) res = "" endian = string[0..1] #Reverse the endianness of the metadata of the polygon string = string[2..-1] if endian == "01" res += "00" for i in 0..3 res += reverse_endianness(string[0..7]) string = string[8..-1] end end while string != nil and string.length > 0 #Reverse Endianness of the coordinates of each point res += reverse_endianness(string[0..15]) string = string[16..-1] end res = res[0..9] + res[18..-1] #Removes data that stumped the RGeo library res[2] = "0" return res end def reverse_multipolygon_endianness(string) res = ""#Reverse the endianness of the metadata of the multipolygon res += "00" string = string[2..-1] for i in 0..2 res += reverse_endianness(string[0..7]) number_of_elements = string[0..1] string = string[8..-1] end res += "00" string = string[2..-1] res += reverse_endianness(string[0..7]) #Polygon type stored here string = string[8..-1] number_of_polygons = string[0..1].to_i(16) res += reverse_endianness(string[0..7]) #Number_of_polygons stored here string = string[8..-1] for i in 0..number_of_polygons-1 #Reverse Endianness of the coordinates of each point number_of_duo_of_values = string[0..1].to_i(16) res += reverse_endianness(string[0..7]) #Number_of_duo_of_values stored here string = string[8..-1] for j in 0..number_of_duo_of_values-1 res += reverse_endianness(string[0..15]) string = string[16..-1] res += reverse_endianness(string[0..15]) string = string[16..-1] end end res = res[0..9] + res[18..-1] #Removes data that stumped the RGeo library res[2] = "0" return res end def reverse_wkb_endianness(string) #Check if an element is a polygon or a multipolygon (might need different shapes in the future, like a point or a line if string[3] == "6" res = reverse_multipolygon_endianness(string) elsif string[3] == "3" res = reverse_polygon_endianness(string) else puts "ALERT" res = cut_from_wkb(string) end return res end def parsePotagersFiles(data_file) data = CSV.parse(File.read(data_file), headers: true, :col_sep => ",") factory = RGeo::Cartesian.factory i = 0 while i < data.length begin new_item = {} new_item["type"] = "Feature" #Coordinates string = reverse_wkb_endianness(data[i]["the_geom"]) geom = factory.parse_wkb(string) geom = RGeo::GeoJSON.encode(geom) new_item["geometry"] = geom #IN COMMON data new_item["properties"] = {} new_item["properties"]["name"] = fuse_two_entries(data[i]["nom"], data[i]["name"]) new_item["properties"]["description"] = data[i]["descriptio"] new_item["properties"]["entry_number"] = data[i]["cartodb_id"] new_item["properties"]["phone_number"] = "" #No equivalent in potagers database new_item["properties"]["website"] = "" #No equivalent in potagers database #There is two fields for emails, "email" and "email_2", so if both are filled, puts "or" between the two emails new_item["properties"]["mail"] = fuse_two_entries(data[i]["email"], data[i]["email_2"]) #There is two fields for address, "rue_num" and "rue", sometimes a field is empty, so a string is constructed with nil as a parameter, to create "" and add to the address string new_item["properties"]["address"] = String(data[i]["rue"]) + " " + String(data[i]["rue_num"]) new_item["properties"]["city"] = data[i]["n_commune"] new_item["properties"]["postal_code"] = "" #No equivalent in potagers database new_item["properties"]["categories"] = [43] new_item["properties"]["source"] = data[i]["source"] #Supplementary data new_item["meta"] = {} new_item["meta"]["commentaire"] = data[i]["comment_2"] new_item["meta"]["visibilite"] = data[i]["visibilite"] new_item["meta"]["acces"] = data[i]["acces"] new_item["meta"]["accessibilite"] = data[i]["accessibil"] new_item["meta"]["horaires"] = data[i]["horaires"] new_item["meta"]["utilisation"] = data[i]["utilisatio"] new_item["meta"]["type_de_lo"] = data[i]["type_de_lo"] new_item["meta"]["pollution"] = data[i]["pollution"] new_item["meta"]["pleine_t_1"] = data[i]["pleine_t_1"] new_item["meta"]["compost"] = data[i]["compost"] new_item["meta"]["g4_analyse"] = data[i]["g4_analyse"] new_item["meta"]["g3_autre_a"] = data[i]["g3_autre_a"] new_item["meta"]["g3_colla_1"] = data[i]["g3_colla_1"] new_item["meta"]["g3_finance"] = data[i]["g3_finance"] new_item["meta"]["g3_finance_2"] = data[i]["g3_finance"] new_item["meta"]["vandalisme"] = data[i]["vandalisme"] new_item["meta"]["g3_accord_2"] = data[i]["g3_accor_2"] new_item["meta"]["accord avec"] = data[i]["accord_ave"] new_item["meta"]["proprietaire"] = fuse_two_entries(data[i]["proprietai"], data[i]["propriet_1"]) new_item["meta"]["forme_juridique"] = data[i]["forme_juri"] new_item["meta"]["g2_participants"] = data[i]["g2_partici"] new_item["meta"]["gestionnaire"] = data[i]["gestionnai"] new_item["meta"]["pleine_ter"] = data[i]["pleine_ter"] new_item["meta"]["license"] = data[i]["licence"] new_item["meta"]["icon"] = data[i]["icon"] new_item["meta"]["code_potager"] = data[i]["code_potag"] new_item["meta"]["type"] = data[i]["type"] new_item["meta"]["historique"] = data[i]["historique"] new_item["meta"]["annee_de_creation"] = data[i]["annee_de_c"] new_item["meta"]["adresse_co"] = data[i]["adresse_co"] new_item["meta"]["begin"] = data[i]["begin"] new_item["meta"]["end"] = data[i]["end"] new_item["meta"]["area_sqm"] = data[i]["area_sqm"] new_item["meta"]["area_acres"] = data[i]["area_acres"] new_item["properties"]["srid"] = "4326" @list_of_locations["features"].push(new_item) rescue RGeo::Error::InvalidGeometry => e puts "---" puts "Donnee numero " + i.to_s + " invalide. Erreur: " + e.to_s puts "valeur: " + data[i]["the_geom"] end i += 1 end end def writeToFile(write_file) File.write(write_file, JSON.pretty_generate(@list_of_locations)) end end BEGIN { puts "Starting parser" } parser = Parser.new() parser.parsePotagersFiles("potagers_merged_spjoined_ar2_1.csv.diff") parser.writeToFile("LocationsPotagers.geojson") END { puts "Ending parser" }