From 88d6de26a0e55430414b05e9d4d08a7a68102684 Mon Sep 17 00:00:00 2001 From: Nemael <100dragons@gmail.com> Date: Tue, 6 Oct 2020 16:14:19 +0200 Subject: Improved the parser file and added some more comments, created a readme.txt file to explain LocationsParser.rb --- doc/import/LocationsParser.rb | 89 +++++++++++++++++++++++++++++++++++++++++++ doc/import/readme.txt | 10 +++++ 2 files changed, 99 insertions(+) create mode 100644 doc/import/LocationsParser.rb create mode 100644 doc/import/readme.txt diff --git a/doc/import/LocationsParser.rb b/doc/import/LocationsParser.rb new file mode 100644 index 0000000..3ce79c0 --- /dev/null +++ b/doc/import/LocationsParser.rb @@ -0,0 +1,89 @@ +#This file parses the data from the "Concertes" and "Dewey" folders +#It creates a file "Locations.geojson" which is in the geoJson format to be used with a map + +require 'json' +require 'csv' + +class Parser + def initialize() + @list_of_locations = {} #Contains all the locations from the Dewey and Concertes data + @list_of_locations["type"] = "FeatureCollection" + @list_of_locations["features"] = [] + end + def parseDeweyFiles(data_file) + #Parses the .json files contained in the Dewey folder + data = JSON.parse(File.read(data_file)) + i = 0 + while i < data.size + new_item = {} + new_item["type"] = "Feature" + new_item["geometry"] = {} + new_item["geometry"]["type"] = "Point" + coords = data[i]["fields"]["position"].split(";")[1].gsub("POINT (","").gsub(")","").split(" ") + coords[0] = coords[0].to_f + coords[1] = coords[1].to_f + new_item["geometry"]["coordinates"] = coords + new_item["properties"] = {} + new_item["properties"]["name"] = data[i]["fields"]["name"] + new_item["properties"]["description"] = data[i]["fields"]["comment"] + new_item["properties"]["entry_number"] = data[i]["pk"] + new_item["properties"]["phone_number"] = data[i]["fields"]["phone"] + new_item["properties"]["website"] = data[i]["fields"]["web"] + new_item["properties"]["email"] = data[i]["fields"]["email"] + new_item["properties"]["address"] = data[i]["fields"]["address"] + new_item["properties"]["city"] = "" #No equivalent in Dewey data + new_item["properties"]["postal_code"] = "" #No equivalent in Dewey data + new_item["properties"]["categories"] = data[i]["fields"]["subcategories"] + new_item["properties"]["source"] = "Dewey" + new_item["properties"]["srid"] = data[i]["fields"]["position"].split(";")[0].gsub("SRID=","") + @list_of_locations["features"].push(new_item) + i += 1 + end + end + def parseConcertesFiles(data_file, coordinates_file) + #Parses .csv files contained in the Concertes folder + data = CSV.parse(File.read(data_file), headers: true, :col_sep => ";") + coord = CSV.parse(File.read(coordinates_file), headers: true, :col_sep => ";") + i = 0 + while i < data.size + new_item = {} + new_item["type"] = "Feature" + new_item["geometry"] = {} + new_item["geometry"]["type"] = "Point" + new_item["geometry"]["coordinates"] = [coord[i]["CoordLamb_X"].to_f, coord[i]["CoordLamb_Y"].to_f] + new_item["properties"] = {} + new_item["properties"]["name"] = data[i]["Denomination_FULL"] + new_item["properties"]["description"] = data[i]["description"] + new_item["properties"]["entry_number"] = data[i]["NumEntr"] + new_item["properties"]["phone_number"] = coord[i]["Tel"] + new_item["properties"]["website"] = coord[i]["Web"] + new_item["properties"]["email"] = coord[i]["Email"] + new_item["properties"]["address"] = coord[i]["Adresse"] + new_item["properties"]["city"] = data[i]["INS_COMMUNE"] + new_item["properties"]["postal_code"] = coord[i]["Code postal"] + new_item["properties"]["categories"] = [] #The categories from the Concertes data will not be used + new_item["properties"]["source"] = "Concertes" + new_item["properties"]["srid"] = "4326" #Not precised in Concertes data + @list_of_locations["features"].push(new_item) + i += 1 + end + end + def writeToFile(write_file) + #Writes the list of locations in a pretty Json file + File.write(write_file, JSON.pretty_generate(@list_of_locations)) + end +end + +BEGIN { + puts "Starting parser" +} + +parser = Parser.new() +parser.parseConcertesFiles("Concertes/20200312_EntreprisesSignaletique.csv", "Concertes/20200312_EntreprisesCoordonnees.csv") +parser.parseDeweyFiles("Dewey/dewey-maps-markers.json") +parser.writeToFile("Locations.geojson") + + +END { + puts "Ending parser, the data is available in the file 'Locations.geoJson'" +} diff --git a/doc/import/readme.txt b/doc/import/readme.txt new file mode 100644 index 0000000..b534433 --- /dev/null +++ b/doc/import/readme.txt @@ -0,0 +1,10 @@ +The script LocationsParser.rb creates a file named "Locations.json", which contains all the points from the Concertes and Dewey database. +It parses the .csv files from the Concertes folder and the .json files from the Dewey folder. +To run it: + ruby parser.rb + +The main files it uses are: + Concertes/20200312_EntreprisesSignaletique.csv which contains the name of the points from the Concertes data + Concertes/20200312_EntreprisesCoordonnees.csv which contains the coordinates of the points from the Concertes data +and + Dewey/dewey-maps-markers.json which contains the data of the points from the Dewey data -- cgit v1.2.3