aboutsummaryrefslogtreecommitdiff
path: root/doc/import/ParserPotagers.rb
diff options
context:
space:
mode:
Diffstat (limited to 'doc/import/ParserPotagers.rb')
-rw-r--r--doc/import/ParserPotagers.rb204
1 files changed, 204 insertions, 0 deletions
diff --git a/doc/import/ParserPotagers.rb b/doc/import/ParserPotagers.rb
new file mode 100644
index 0000000..91f9300
--- /dev/null
+++ b/doc/import/ParserPotagers.rb
@@ -0,0 +1,204 @@
+require 'json'
+require 'csv'
+require 'rgeo'
+require 'rgeo-geojson'
+
+class Parser
+ def initialize()
+ @list_of_locations = {}
+ @list_of_locations["type"] = "FeatureCollection"
+ @list_of_locations["features"] = []
+ end
+
+ def fuse_two_entries(entry_1, entry_2)
+ if entry_1 and entry_2
+ if entry_1 != entry_2
+ return entry_1 + " ou " + entry_2
+ else
+ return entry_1
+ end
+ elsif entry_1
+ return entry_1
+ elsif entry_2
+ return entry_2
+ else
+ return nil
+ end
+ end
+
+ def reverse_endianness(string)
+ res = ""
+ while string.length > 0
+ res += string[-2..-1]
+ string = string[0..-3]
+ end
+ return res
+ end
+
+ def cut_from_wkb(string)
+ res = string[0..6] + string[15..-1]
+ return res
+ end
+
+ def reverse_polygon_endianness(string)
+ res = ""
+ endian = string[0..1]
+ string = string[2..-1]
+ if endian == "01" #Reverse 4 bytes 3 times, then check 1 byte, then reverse 4 bytes 3 times, then reverse 8 bytes until the end of the string
+ res += "00"
+ for i in 0..3
+ res += reverse_endianness(string[0..7])
+ string = string[8..-1]
+ end
+ end
+
+ while string != nil and string.length > 0
+ res += reverse_endianness(string[0..15])
+ string = string[16..-1]
+ end
+ res = res[0..9] + res[18..-1]
+ res[2] = "0"
+ return res
+ end
+
+ def reverse_multipolygon_endianness(string)
+ res = ""
+ res += "00"
+ string = string[2..-1]
+ for i in 0..2
+ res += reverse_endianness(string[0..7])
+ number_of_elements = string[0..1]
+ string = string[8..-1]
+ end
+ res += "00"
+ string = string[2..-1]
+ res += reverse_endianness(string[0..7]) #Polygon type stored here
+ string = string[8..-1]
+
+ number_of_polygons = string[0..1].to_i(16)
+ res += reverse_endianness(string[0..7]) #Number_of_polygons stored here
+ string = string[8..-1]
+
+ for i in 0..number_of_polygons-1
+ number_of_duo_of_values = string[0..1].to_i(16)
+ res += reverse_endianness(string[0..7]) #Number_of_duo_of_values stored here
+ string = string[8..-1]
+ for j in 0..number_of_duo_of_values-1
+ res += reverse_endianness(string[0..15])
+ string = string[16..-1]
+ res += reverse_endianness(string[0..15])
+ string = string[16..-1]
+ end
+ end
+ res = res[0..9] + res[18..-1]
+ res[2] = "0"
+ return res
+ end
+
+ def reverse_wkb_endianness(string)
+ if string[3] == "6"
+ res = reverse_multipolygon_endianness(string)
+ elsif string[3] == "3"
+ res = reverse_polygon_endianness(string)
+ else
+ puts "ALERT"
+ res = cut_from_wkb(string)
+ end
+ return res
+ end
+
+ def parsePotagersFiles(data_file)
+ data = CSV.parse(File.read(data_file), headers: true, :col_sep => ",")
+ factory = RGeo::Cartesian.factory
+
+ i = 0
+ while i < data.length
+ begin
+ new_item = {}
+ new_item["type"] = "Feature"
+ #Coordinates
+ string = reverse_wkb_endianness(data[i]["the_geom"])
+ geom = factory.parse_wkb(string)
+ geom = RGeo::GeoJSON.encode(geom)
+ new_item["geometry"] = geom
+
+ #IN COMMON data
+ new_item["properties"] = {}
+ new_item["properties"]["name"] = fuse_two_entries(data[i]["nom"], data[i]["name"])
+ new_item["properties"]["description"] = data[i]["descriptio"]
+ new_item["properties"]["entry_number"] = data[i]["cartodb_id"]
+ new_item["properties"]["phone_number"] = "" #No equivalent in potagers database
+ new_item["properties"]["website"] = "" #No equivalent in potagers database
+ #There is two fields for emails, "email" and "email_2", so if both are filled, puts "or" between the two emails
+ new_item["properties"]["mail"] = fuse_two_entries(data[i]["email"], data[i]["email_2"])
+ #There is two fields for address, "rue_num" and "rue", sometimes a field is empty, so a string is constructed with nil as a parameter, to create "" and add to the address string
+ new_item["properties"]["address"] = String(data[i]["rue"]) + " " + String(data[i]["rue_num"])
+ new_item["properties"]["city"] = data[i]["n_commune"] #Or should it always be "brussels"
+ new_item["properties"]["postal_code"] = "" #No equivalent in potagers database
+ new_item["properties"]["categories"] = [43]
+ new_item["properties"]["source"] = data[i]["source"]
+
+ #Supplementary data
+ new_item["meta"] = {}
+ new_item["meta"]["commentaire"] = data[i]["comment_2"]
+ new_item["meta"]["visibilite"] = data[i]["visibilite"]
+ new_item["meta"]["acces"] = data[i]["acces"]
+ new_item["meta"]["accessibilite"] = data[i]["accessibil"]
+ new_item["meta"]["horaires"] = data[i]["horaires"]
+ new_item["meta"]["utilisation"] = data[i]["utilisatio"]
+ new_item["meta"]["type_de_lo"] = data[i]["type_de_lo"]
+ new_item["meta"]["pollution"] = data[i]["pollution"]
+ new_item["meta"]["pleine_t_1"] = data[i]["pleine_t_1"]
+ new_item["meta"]["compost"] = data[i]["compost"]
+ new_item["meta"]["g4_analyse"] = data[i]["g4_analyse"]
+ new_item["meta"]["g3_autre_a"] = data[i]["g3_autre_a"]
+ new_item["meta"]["g3_colla_1"] = data[i]["g3_colla_1"]
+ new_item["meta"]["g3_finance"] = data[i]["g3_finance"]
+ new_item["meta"]["g3_finance_2"] = data[i]["g3_finance"]
+ new_item["meta"]["vandalisme"] = data[i]["vandalisme"]
+ new_item["meta"]["g3_accord_2"] = data[i]["g3_accor_2"]
+ new_item["meta"]["accord avec"] = data[i]["accord_ave"]
+ new_item["meta"]["proprietaire"] = fuse_two_entries(data[i]["proprietai"], data[i]["propriet_1"])
+ new_item["meta"]["forme_juridique"] = data[i]["forme_juri"]
+ new_item["meta"]["g2_participants"] = data[i]["g2_partici"]
+ new_item["meta"]["gestionnaire"] = data[i]["gestionnai"]
+ new_item["meta"]["pleine_ter"] = data[i]["pleine_ter"]
+ new_item["meta"]["license"] = data[i]["licence"]
+ new_item["meta"]["icon"] = data[i]["icon"]
+ new_item["meta"]["code_potager"] = data[i]["code_potag"]
+ new_item["meta"]["type"] = data[i]["type"]
+ new_item["meta"]["historique"] = data[i]["historique"]
+ new_item["meta"]["annee_de_creation"] = data[i]["annee_de_c"]
+ new_item["meta"]["adresse_co"] = data[i]["adresse_co"]
+ new_item["meta"]["begin"] = data[i]["begin"]
+ new_item["meta"]["end"] = data[i]["end"]
+ new_item["meta"]["area_sqm"] = data[i]["area_sqm"]
+ new_item["meta"]["area_acres"] = data[i]["area_acres"]
+
+ new_item["properties"]["srid"] = "4326"
+ @list_of_locations["features"].push(new_item)
+ rescue RGeo::Error::InvalidGeometry => e
+ puts "---"
+ puts "Donnee numero " + i.to_s + " invalide. Erreur: " + e.to_s
+ puts "valeur: " + data[i]["the_geom"]
+ end
+
+ i += 1
+ end
+ end
+ def writeToFile(write_file)
+ File.write(write_file, JSON.pretty_generate(@list_of_locations))
+ end
+end
+
+BEGIN {
+ puts "Starting parser"
+}
+
+parser = Parser.new()
+parser.parsePotagersFiles("potagers_merged_spjoined_ar2_1.csv.diff")
+parser.writeToFile("LocationsPotagers.geojson")
+
+END {
+ puts "Ending parser"
+}