diff options
author | Nemael <100dragons@gmail.com> | 2021-02-05 16:09:19 +0100 |
---|---|---|
committer | Nemael <100dragons@gmail.com> | 2021-02-05 16:09:19 +0100 |
commit | b30fad1d0744ec070335612a46d2a15bac06f1d2 (patch) | |
tree | e3492d32fa962d60b3d800ec01cb995d2b1799c3 /doc/import/ParserPotagers.rb | |
parent | 757decefafb5d82557a8b7fa9691f94f19c3207e (diff) | |
download | incommon-map-b30fad1d0744ec070335612a46d2a15bac06f1d2.tar.gz |
Split parser in three files with thress .geojson output. Also added a parser for Potagers. Added the need of rgeo and rgeo-geojson gems
Diffstat (limited to 'doc/import/ParserPotagers.rb')
-rw-r--r-- | doc/import/ParserPotagers.rb | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/doc/import/ParserPotagers.rb b/doc/import/ParserPotagers.rb new file mode 100644 index 0000000..91f9300 --- /dev/null +++ b/doc/import/ParserPotagers.rb @@ -0,0 +1,204 @@ +require 'json' +require 'csv' +require 'rgeo' +require 'rgeo-geojson' + +class Parser + def initialize() + @list_of_locations = {} + @list_of_locations["type"] = "FeatureCollection" + @list_of_locations["features"] = [] + end + + def fuse_two_entries(entry_1, entry_2) + if entry_1 and entry_2 + if entry_1 != entry_2 + return entry_1 + " ou " + entry_2 + else + return entry_1 + end + elsif entry_1 + return entry_1 + elsif entry_2 + return entry_2 + else + return nil + end + end + + def reverse_endianness(string) + res = "" + while string.length > 0 + res += string[-2..-1] + string = string[0..-3] + end + return res + end + + def cut_from_wkb(string) + res = string[0..6] + string[15..-1] + return res + end + + def reverse_polygon_endianness(string) + res = "" + endian = string[0..1] + string = string[2..-1] + if endian == "01" #Reverse 4 bytes 3 times, then check 1 byte, then reverse 4 bytes 3 times, then reverse 8 bytes until the end of the string + res += "00" + for i in 0..3 + res += reverse_endianness(string[0..7]) + string = string[8..-1] + end + end + + while string != nil and string.length > 0 + res += reverse_endianness(string[0..15]) + string = string[16..-1] + end + res = res[0..9] + res[18..-1] + res[2] = "0" + return res + end + + def reverse_multipolygon_endianness(string) + res = "" + res += "00" + string = string[2..-1] + for i in 0..2 + res += reverse_endianness(string[0..7]) + number_of_elements = string[0..1] + string = string[8..-1] + end + res += "00" + string = string[2..-1] + res += reverse_endianness(string[0..7]) #Polygon type stored here + string = string[8..-1] + + number_of_polygons = string[0..1].to_i(16) + res += reverse_endianness(string[0..7]) #Number_of_polygons stored here + string = string[8..-1] + + for i in 0..number_of_polygons-1 + number_of_duo_of_values = string[0..1].to_i(16) + res += reverse_endianness(string[0..7]) #Number_of_duo_of_values stored here + string = string[8..-1] + for j in 0..number_of_duo_of_values-1 + res += reverse_endianness(string[0..15]) + string = string[16..-1] + res += reverse_endianness(string[0..15]) + string = string[16..-1] + end + end + res = res[0..9] + res[18..-1] + res[2] = "0" + return res + end + + def reverse_wkb_endianness(string) + if string[3] == "6" + res = reverse_multipolygon_endianness(string) + elsif string[3] == "3" + res = reverse_polygon_endianness(string) + else + puts "ALERT" + res = cut_from_wkb(string) + end + return res + end + + def parsePotagersFiles(data_file) + data = CSV.parse(File.read(data_file), headers: true, :col_sep => ",") + factory = RGeo::Cartesian.factory + + i = 0 + while i < data.length + begin + new_item = {} + new_item["type"] = "Feature" + #Coordinates + string = reverse_wkb_endianness(data[i]["the_geom"]) + geom = factory.parse_wkb(string) + geom = RGeo::GeoJSON.encode(geom) + new_item["geometry"] = geom + + #IN COMMON data + new_item["properties"] = {} + new_item["properties"]["name"] = fuse_two_entries(data[i]["nom"], data[i]["name"]) + new_item["properties"]["description"] = data[i]["descriptio"] + new_item["properties"]["entry_number"] = data[i]["cartodb_id"] + new_item["properties"]["phone_number"] = "" #No equivalent in potagers database + new_item["properties"]["website"] = "" #No equivalent in potagers database + #There is two fields for emails, "email" and "email_2", so if both are filled, puts "or" between the two emails + new_item["properties"]["mail"] = fuse_two_entries(data[i]["email"], data[i]["email_2"]) + #There is two fields for address, "rue_num" and "rue", sometimes a field is empty, so a string is constructed with nil as a parameter, to create "" and add to the address string + new_item["properties"]["address"] = String(data[i]["rue"]) + " " + String(data[i]["rue_num"]) + new_item["properties"]["city"] = data[i]["n_commune"] #Or should it always be "brussels" + new_item["properties"]["postal_code"] = "" #No equivalent in potagers database + new_item["properties"]["categories"] = [43] + new_item["properties"]["source"] = data[i]["source"] + + #Supplementary data + new_item["meta"] = {} + new_item["meta"]["commentaire"] = data[i]["comment_2"] + new_item["meta"]["visibilite"] = data[i]["visibilite"] + new_item["meta"]["acces"] = data[i]["acces"] + new_item["meta"]["accessibilite"] = data[i]["accessibil"] + new_item["meta"]["horaires"] = data[i]["horaires"] + new_item["meta"]["utilisation"] = data[i]["utilisatio"] + new_item["meta"]["type_de_lo"] = data[i]["type_de_lo"] + new_item["meta"]["pollution"] = data[i]["pollution"] + new_item["meta"]["pleine_t_1"] = data[i]["pleine_t_1"] + new_item["meta"]["compost"] = data[i]["compost"] + new_item["meta"]["g4_analyse"] = data[i]["g4_analyse"] + new_item["meta"]["g3_autre_a"] = data[i]["g3_autre_a"] + new_item["meta"]["g3_colla_1"] = data[i]["g3_colla_1"] + new_item["meta"]["g3_finance"] = data[i]["g3_finance"] + new_item["meta"]["g3_finance_2"] = data[i]["g3_finance"] + new_item["meta"]["vandalisme"] = data[i]["vandalisme"] + new_item["meta"]["g3_accord_2"] = data[i]["g3_accor_2"] + new_item["meta"]["accord avec"] = data[i]["accord_ave"] + new_item["meta"]["proprietaire"] = fuse_two_entries(data[i]["proprietai"], data[i]["propriet_1"]) + new_item["meta"]["forme_juridique"] = data[i]["forme_juri"] + new_item["meta"]["g2_participants"] = data[i]["g2_partici"] + new_item["meta"]["gestionnaire"] = data[i]["gestionnai"] + new_item["meta"]["pleine_ter"] = data[i]["pleine_ter"] + new_item["meta"]["license"] = data[i]["licence"] + new_item["meta"]["icon"] = data[i]["icon"] + new_item["meta"]["code_potager"] = data[i]["code_potag"] + new_item["meta"]["type"] = data[i]["type"] + new_item["meta"]["historique"] = data[i]["historique"] + new_item["meta"]["annee_de_creation"] = data[i]["annee_de_c"] + new_item["meta"]["adresse_co"] = data[i]["adresse_co"] + new_item["meta"]["begin"] = data[i]["begin"] + new_item["meta"]["end"] = data[i]["end"] + new_item["meta"]["area_sqm"] = data[i]["area_sqm"] + new_item["meta"]["area_acres"] = data[i]["area_acres"] + + new_item["properties"]["srid"] = "4326" + @list_of_locations["features"].push(new_item) + rescue RGeo::Error::InvalidGeometry => e + puts "---" + puts "Donnee numero " + i.to_s + " invalide. Erreur: " + e.to_s + puts "valeur: " + data[i]["the_geom"] + end + + i += 1 + end + end + def writeToFile(write_file) + File.write(write_file, JSON.pretty_generate(@list_of_locations)) + end +end + +BEGIN { + puts "Starting parser" +} + +parser = Parser.new() +parser.parsePotagersFiles("potagers_merged_spjoined_ar2_1.csv.diff") +parser.writeToFile("LocationsPotagers.geojson") + +END { + puts "Ending parser" +} |