diff --git a/.gitignore b/.gitignore index 18b43c9..97d4e96 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ # Ignore master key for decrypting credentials and more. /config/master.key +database.yml diff --git a/Gemfile b/Gemfile index 33017fd..591c79e 100644 --- a/Gemfile +++ b/Gemfile @@ -8,6 +8,11 @@ gem 'pg', '>= 0.18', '< 2.0' gem 'puma', '~> 3.11' gem 'bootsnap', '>= 1.1.0', require: false +gem 'pghero' +gem 'oj' +gem 'activerecord-import' +gem 'strong_migrations' + group :development, :test do # Call 'byebug' anywhere in the code to stop execution and get a debugger console gem 'byebug', platforms: [:mri, :mingw, :x64_mingw] diff --git a/Gemfile.lock b/Gemfile.lock index eb22e16..3de2b69 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -33,6 +33,8 @@ GEM activemodel (= 5.2.3) activesupport (= 5.2.3) arel (>= 9.0) + activerecord-import (1.0.1) + activerecord (>= 3.2) activestorage (5.2.3) actionpack (= 5.2.3) activerecord (= 5.2.3) @@ -76,7 +78,10 @@ GEM nio4r (2.3.1) nokogiri (1.10.2) mini_portile2 (~> 2.4.0) + oj (3.7.11) pg (1.1.4) + pghero (2.2.0) + activerecord puma (3.12.1) rack (2.0.6) rack-test (1.1.0) @@ -117,6 +122,8 @@ GEM actionpack (>= 4.0) activesupport (>= 4.0) sprockets (>= 3.0.0) + strong_migrations (0.3.1) + activerecord (>= 3.2.0) thor (0.20.3) thread_safe (0.3.6) tzinfo (1.2.5) @@ -134,12 +141,16 @@ PLATFORMS ruby DEPENDENCIES + activerecord-import bootsnap (>= 1.1.0) byebug listen (>= 3.0.5, < 3.2) + oj pg (>= 0.18, < 2.0) + pghero puma (~> 3.11) rails (~> 5.2.3) + strong_migrations tzinfo-data web-console (>= 3.3.0) diff --git a/app/controllers/trips_controller.rb b/app/controllers/trips_controller.rb index acb38be..432551f 100644 --- a/app/controllers/trips_controller.rb +++ b/app/controllers/trips_controller.rb @@ -2,6 +2,6 @@ class TripsController < ApplicationController def index @from = City.find_by_name!(params[:from]) @to = City.find_by_name!(params[:to]) - @trips = Trip.where(from: @from, to: @to).order(:start_time) + @trips = Trip.where(from: @from, to: @to).order(:start_time).eager_load(bus: :services) end end diff --git a/app/models/bus.rb b/app/models/bus.rb index 1dcc54c..d97de31 100644 --- a/app/models/bus.rb +++ b/app/models/bus.rb @@ -13,7 +13,8 @@ class Bus < ApplicationRecord ].freeze has_many :trips - has_and_belongs_to_many :services, join_table: :buses_services + has_many :buses_services + has_many :services, through: :buses_services validates :number, presence: true, uniqueness: true validates :model, inclusion: { in: MODELS } diff --git a/app/models/buses_service.rb b/app/models/buses_service.rb new file mode 100644 index 0000000..6219d44 --- /dev/null +++ b/app/models/buses_service.rb @@ -0,0 +1,4 @@ +class BusesService < ApplicationRecord + belongs_to :bus + belongs_to :service +end diff --git a/app/models/service.rb b/app/models/service.rb index 9cbb2a3..1781543 100644 --- a/app/models/service.rb +++ b/app/models/service.rb @@ -12,7 +12,8 @@ class Service < ApplicationRecord 'Можно не печатать билет', ].freeze - has_and_belongs_to_many :buses, join_table: :buses_services + has_many :buses_services + has_many :buses, through: :buses_services validates :name, presence: true validates :name, inclusion: { in: SERVICES } diff --git a/app/views/trips/index.html.erb b/app/views/trips/index.html.erb index a60bce4..9db8729 100644 --- a/app/views/trips/index.html.erb +++ b/app/views/trips/index.html.erb @@ -7,10 +7,18 @@ <% @trips.each do |trip| %> - <%= render "delimiter" %> + ==================================================== <% end %> diff --git a/case-study.md b/case-study.md new file mode 100644 index 0000000..7ba0ff4 --- /dev/null +++ b/case-study.md @@ -0,0 +1,132 @@ +## Актуальная проблема +В нашем проекте возникла серьёзная проблема. + +### Импорт данных +При выполнении `bin/setup` в базу данных загружаются данные о рейсах из файла fixtures/small.json +Сама загрузка данных из файла делается очень наивно. + +В комплекте с заданием поставляются файлы +``` +31M large.json +3,2M medium.json +308K small.json +``` + +Нужно оптимизировать механизм перезагрузки расписания из файла так, чтобы он обрабатывал файл large.json в пределах минуты. + +### Отображение расписаний +Сами страницы расписаний тоже формируются не эффективно и при росте объёмов начинают сильно тормозить. + +Нужно найти и устранить проблемы, замедляющие формирование этих страниц. + +## Формирование метрики +Для того, чтобы понимать, дают ли мои изменения положительный эффект на быстродействие программы буду использовать такую метрику: +- Время выполнения программы на файле: small.json + +Время выполнения исходного кода: +``` +Loading data from fixtures/small.json + 7.531087 0.446847 7.977934 ( 9.376662) +``` + +``` +# ab -n 10 -c 10 http://localhost:3000/автобусы/Самара/Москва + +Concurrency Level: 10 +Time taken for tests: 1.874 seconds +Complete requests: 10 +Failed requests: 0 +Total transferred: 88112 bytes +HTML transferred: 81130 bytes +Requests per second: 5.34 [#/sec] (mean) +Time per request: 1874.175 [ms] (mean) +Time per request: 187.417 [ms] (mean, across all concurrent requests) +Transfer rate: 45.91 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 268 1127 449.1 943 1606 +Waiting: 268 1127 449.1 943 1606 +Total: 268 1127 449.1 943 1606 + +Percentage of the requests served within a certain time (ms) + 50% 943 + 66% 1571 + 75% 1596 + 80% 1603 + 90% 1606 + 95% 1606 + 98% 1606 + 99% 1606 + 100% 1606 (longest request) +``` + +## Feedback-Loop +Для того, чтобы иметь возможность быстро проверять гипотезы я создал задачу `rails feedback:start`, которая позволит мне получать обратную связь по эффективности сделанных изменений за время ~7,5 секунд. + +## Вникаем в детали системы, чтобы найти 20% точек роста +Для того, чтобы найти "точки роста" для оптимизации я воспользовался библиотеками benchmark, ab. + +Вот какие проблемы удалось найти и решить. + +## Оптимизация 1 +Импорт данных: замена парсинга на Oj и индексы таблиц бд, существенного изменения метрики не дали. +Чего нельзя сказать про использование гема activerecord-import. + +``` +Loading data from fixtures/small.json + 0.462224 0.002725 0.464949 ( 0.520389) +``` + +## Оптимизация 2 +Рендеринг: избавляемся от N+1 и рендеринга лишних партиалов. + +``` +# ab -n 10 -c 10 http://localhost:3000/автобусы/Самара/Москва + +Concurrency Level: 10 +Time taken for tests: 0.244 seconds +Complete requests: 10 +Failed requests: 0 +Total transferred: 92284 bytes +HTML transferred: 85340 bytes +Requests per second: 41.03 [#/sec] (mean) +Time per request: 243.738 [ms] (mean) +Time per request: 24.374 [ms] (mean, across all concurrent requests) +Transfer rate: 369.75 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 45 143 49.7 142 198 +Waiting: 45 143 49.8 142 198 +Total: 45 143 49.7 142 198 + +Percentage of the requests served within a certain time (ms) + 50% 142 + 66% 182 + 75% 194 + 80% 197 + 90% 198 + 95% 198 + 98% 198 + 99% 198 + 100% 198 (longest request) +``` + +## Результаты +В результате проделанной оптимизации удалось улучшить метрику системы **c 7.5s до 0.5s** + +``` +#rails asymptotics:start + +Loading data from fixtures/small.json + 0.472344 0.007469 0.479813 ( 0.543996) + +Loading data from fixtures/medium.json + 2.892941 0.012608 2.905549 ( 3.054357) + +Loading data from fixtures/large.json + 28.034505 0.102898 28.137403 ( 29.604921) +``` diff --git a/config/database.yml b/config/database.yml deleted file mode 100644 index e116cfa..0000000 --- a/config/database.yml +++ /dev/null @@ -1,85 +0,0 @@ -# PostgreSQL. Versions 9.1 and up are supported. -# -# Install the pg driver: -# gem install pg -# On OS X with Homebrew: -# gem install pg -- --with-pg-config=/usr/local/bin/pg_config -# On OS X with MacPorts: -# gem install pg -- --with-pg-config=/opt/local/lib/postgresql84/bin/pg_config -# On Windows: -# gem install pg -# Choose the win32 build. -# Install PostgreSQL and put its /bin directory on your path. -# -# Configure Using Gemfile -# gem 'pg' -# -default: &default - adapter: postgresql - encoding: unicode - # For details on connection pooling, see Rails configuration guide - # http://guides.rubyonrails.org/configuring.html#database-pooling - pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> - -development: - <<: *default - database: task-4_development - - # The specified database role being used to connect to postgres. - # To create additional roles in postgres see `$ createuser --help`. - # When left blank, postgres will use the default role. This is - # the same name as the operating system user that initialized the database. - #username: task-4 - - # The password associated with the postgres role (username). - #password: - - # Connect on a TCP socket. Omitted by default since the client uses a - # domain socket that doesn't need configuration. Windows does not have - # domain sockets, so uncomment these lines. - #host: localhost - - # The TCP port the server listens on. Defaults to 5432. - # If your server runs on a different port number, change accordingly. - #port: 5432 - - # Schema search path. The server defaults to $user,public - #schema_search_path: myapp,sharedapp,public - - # Minimum log levels, in increasing order: - # debug5, debug4, debug3, debug2, debug1, - # log, notice, warning, error, fatal, and panic - # Defaults to warning. - #min_messages: notice - -# Warning: The database defined as "test" will be erased and -# re-generated from your development database when you run "rake". -# Do not set this db to the same as development or production. -test: - <<: *default - database: task-4_test - -# As with config/secrets.yml, you never want to store sensitive information, -# like your database password, in your source code. If your source code is -# ever seen by anyone, they now have access to your database. -# -# Instead, provide the password as a unix environment variable when you boot -# the app. Read http://guides.rubyonrails.org/configuring.html#configuring-a-database -# for a full rundown on how to provide these environment variables in a -# production deployment. -# -# On Heroku and other platform providers, you may have a full connection URL -# available as an environment variable. For example: -# -# DATABASE_URL="postgres://myuser:mypass@localhost/somedatabase" -# -# You can use this database configuration with: -# -# production: -# url: <%= ENV['DATABASE_URL'] %> -# -production: - <<: *default - database: task-4_production - username: task-4 - password: <%= ENV['TASK-4_DATABASE_PASSWORD'] %> diff --git a/config/routes.rb b/config/routes.rb index a2da6a7..089f3c4 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,4 +1,5 @@ Rails.application.routes.draw do + mount PgHero::Engine, at: "pghero" # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html get "/" => "statistics#index" get "автобусы/:from/:to" => "trips#index" diff --git a/db/migrate/20190410133341_add_indexes.rb b/db/migrate/20190410133341_add_indexes.rb new file mode 100644 index 0000000..3cf37b7 --- /dev/null +++ b/db/migrate/20190410133341_add_indexes.rb @@ -0,0 +1,9 @@ +class AddIndexes < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + def change + add_index :trips, [:from_id, :to_id], algorithm: :concurrently + add_index :trips, :bus_id, algorithm: :concurrently + add_index :buses_services, :bus_id, algorithm: :concurrently + add_index :buses_services, :service_id, algorithm: :concurrently + end +end diff --git a/db/schema.rb b/db/schema.rb index f6921e4..b31081b 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,9 +10,10 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_03_30_193044) do +ActiveRecord::Schema.define(version: 2019_04_10_133341) do # These are extensions that must be enabled in order to support this database + enable_extension "pg_stat_statements" enable_extension "plpgsql" create_table "buses", force: :cascade do |t| @@ -23,6 +24,8 @@ create_table "buses_services", force: :cascade do |t| t.integer "bus_id" t.integer "service_id" + t.index ["bus_id"], name: "index_buses_services_on_bus_id" + t.index ["service_id"], name: "index_buses_services_on_service_id" end create_table "cities", force: :cascade do |t| @@ -40,6 +43,8 @@ t.integer "duration_minutes" t.integer "price_cents" t.integer "bus_id" + t.index ["bus_id"], name: "index_trips_on_bus_id" + t.index ["from_id", "to_id"], name: "index_trips_on_from_id_and_to_id" end end diff --git a/lib/tasks/asymptotics.rake b/lib/tasks/asymptotics.rake new file mode 100644 index 0000000..c98d5e0 --- /dev/null +++ b/lib/tasks/asymptotics.rake @@ -0,0 +1,19 @@ +namespace :asymptotics do + FILES = %w[ + small.json + medium.json + large.json + ].freeze + + desc "Asymptotics" + task start: :environment do + FILES.each do |file| + result = Benchmark.measure do + puts "\nLoading data from fixtures/#{file}" + Rake::Task["reload_json"].execute({file_name: "fixtures/#{file}"}) + end + + puts result + end + end +end diff --git a/lib/tasks/feedback-loop.rake b/lib/tasks/feedback-loop.rake new file mode 100644 index 0000000..0168f0b --- /dev/null +++ b/lib/tasks/feedback-loop.rake @@ -0,0 +1,11 @@ +namespace :feedback do + desc "Feedback loop" + task start: :environment do + result = Benchmark.measure do + puts "\nLoading data from fixtures/small.json" + Rake::Task["reload_json"].execute({file_name: "fixtures/small.json"}) + end + + puts result + end +end diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index 540fe87..313a319 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -1,7 +1,7 @@ # Наивная загрузка данных из json-файла в БД # rake reload_json[fixtures/small.json] -task :reload_json, [:file_name] => :environment do |_task, args| - json = JSON.parse(File.read(args.file_name)) +task :reload_json, :file_name do |_task, args| + json = Oj.load(File.read(args[:file_name])) ActiveRecord::Base.transaction do City.delete_all @@ -9,26 +9,43 @@ task :reload_json, [:file_name] => :environment do |_task, args| Service.delete_all Trip.delete_all ActiveRecord::Base.connection.execute('delete from buses_services;') + cities = {} + services = {} json.each do |trip| - from = City.find_or_create_by(name: trip['from']) - to = City.find_or_create_by(name: trip['to']) - services = [] - trip['bus']['services'].each do |service| - s = Service.find_or_create_by(name: service) - services << s - end - bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], services: services) - - Trip.create!( - from: from, - to: to, - bus: bus, + cities[trip['from']] = City.new(name: trip['from']) if trip['from'] + cities[trip['to']] = City.new(name: trip['to']) if trip['to'] + trip['bus']['services'].each { |s| services.merge!(s => Service.new(name: s)) } + end + + City.import cities.values + Service.import services.values + + buses = {} + json.each do |trip| + bus = Bus.new(number: trip['bus']['number'], model: trip['bus']['model']) + buses.merge!(trip['bus']['number'] => [bus, services.values_at(*trip['bus']['services'])]) + end + + Bus.import buses.values.map(&:first) + + buses_services = [] + buses.values.each do |bus_with_service| + bus_with_service.second.each { |s| buses_services << { bus_id: bus_with_service.first.id, service_id: s.id } } + end + BusesService.import buses_services + + trips = json.map! do |trip| + { + from_id: cities.fetch(trip['from']).id, + to_id: cities.fetch(trip['to']).id, + bus_id: buses.fetch(trip['bus']['number']).first.id, start_time: trip['start_time'], duration_minutes: trip['duration_minutes'], - price_cents: trip['price_cents'], - ) + price_cents: trip['price_cents'] + } end + + Trip.import trips end end