Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ruby-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.1
2.6.2
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

9 changes: 8 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
source 'https://rubygems.org'
git_source(:github) { |repo| "https://github.com/#{repo}.git" }

ruby '2.6.1'
ruby '2.6.2'

gem 'rails', '~> 5.2.3'
gem 'pg', '>= 0.18', '< 2.0'
gem 'puma', '~> 3.11'
gem 'bootsnap', '>= 1.1.0', require: false
gem 'oj'
gem 'bulk_insert'
gem 'pry'

group :development, :test do
# Call 'byebug' anywhere in the code to stop execution and get a debugger console
gem 'byebug', platforms: [:mri, :mingw, :x64_mingw]
gem 'benchmark-ips'
gem 'ruby-prof'
gem 'pghero'
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

gem 'pg_query', '>= 0.9.0'
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

end

group :development do
Expand Down
21 changes: 20 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,15 @@ GEM
minitest (~> 5.1)
tzinfo (~> 1.1)
arel (9.0.0)
benchmark-ips (2.7.2)
bindex (0.6.0)
bootsnap (1.4.2)
msgpack (~> 1.0)
builder (3.2.3)
bulk_insert (1.7.0)
activerecord (>= 3.2.0)
byebug (11.0.1)
coderay (1.1.2)
concurrent-ruby (1.1.5)
crass (1.0.4)
erubi (1.8.0)
Expand Down Expand Up @@ -76,7 +80,14 @@ GEM
nio4r (2.3.1)
nokogiri (1.10.2)
mini_portile2 (~> 2.4.0)
oj (3.7.11)
pg (1.1.4)
pg_query (1.1.0)
pghero (2.2.0)
activerecord
pry (0.12.2)
coderay (~> 1.1.0)
method_source (~> 0.9.0)
puma (3.12.1)
rack (2.0.6)
rack-test (1.1.0)
Expand Down Expand Up @@ -109,6 +120,7 @@ GEM
rb-fsevent (0.10.3)
rb-inotify (0.10.0)
ffi (~> 1.0)
ruby-prof (0.17.0)
ruby_dep (1.5.0)
sprockets (3.7.2)
concurrent-ruby (~> 1.0)
Expand All @@ -134,17 +146,24 @@ PLATFORMS
ruby

DEPENDENCIES
benchmark-ips
bootsnap (>= 1.1.0)
bulk_insert
byebug
listen (>= 3.0.5, < 3.2)
oj
pg (>= 0.18, < 2.0)
pg_query (>= 0.9.0)
pghero
pry
puma (~> 3.11)
rails (~> 5.2.3)
ruby-prof
tzinfo-data
web-console (>= 3.3.0)

RUBY VERSION
ruby 2.6.1p33
ruby 2.6.2p47

BUNDLED WITH
2.0.1
6 changes: 6 additions & 0 deletions app/models/bus_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class BusService < ApplicationRecord
self.table_name = 'buses_services'

belongs_to :bus
belongs_to :service
end
23 changes: 23 additions & 0 deletions bin/test_benchmark.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env ruby
#
require 'benchmark'
# require 'fileutils'

FILES = %w(
small.json
medium.json
large.json
).freeze

APP_ROOT = File.expand_path('..', __dir__)

# FileUtils.chdir APP_ROOT do
FILES.each do |fname|
result = Benchmark.measure do
puts "----------Load data from #{fname}----------"
`rake reload_json[#{APP_ROOT}/fixtures/#{fname}]`
end
puts result
end
# end

100 changes: 100 additions & 0 deletions case-study.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
При изучении utils.rake первым делом в глаза бросается обилие find_or_create_by

Решил посмотреть с помощью Benchmark.ips как они выполняются последовательно
```
ActiveRecord::Base.transaction do
City.delete_all
Bus.delete_all
Service.delete_all
Trip.delete_all
ActiveRecord::Base.connection.execute('delete from buses_services;')

Benchmark.ips do |x|
x.report('find_or_create_by City') do
json.each { |trip| City.find_or_create_by(name: trip['from']) }
end

x.report('Find find_or_create_by services') do
json.each do |trip|
trip['bus']['services'].each do |service|
Service.find_or_create_by(name: service)
end
end
end

x.report('find_or_create_by Bus') do
json.each { |trip| Bus.find_or_create_by(number: trip['bus']['number']) }
end
end
end
```

Получается так
```
Warming up --------------------------------------
find_or_create_by City
1.000 i/100ms
Find find_or_create_by services
1.000 i/100ms
find_or_create_by Bus
1.000 i/100ms
Calculating -------------------------------------
find_or_create_by City
2.134 (± 0.0%) i/s - 11.000 in 5.167181s
Find find_or_create_by services
0.491 (± 0.0%) i/s - 3.000 in 6.109831s
find_or_create_by Bus
0.497 (± 0.0%) i/s - 3.000 in 6.090513s
```

при этом общее время импорта small.json
11.723580999998376

Окей, пробуем добавить индексы
Показатель времени стал хуже
13.188413999974728

Показатели бенчмарка не "взлетели в небеса"
```
Warming up --------------------------------------
find_or_create_by City
1.000 i/100ms
Find find_or_create_by services
1.000 i/100ms
find_or_create_by Bus
1.000 i/100ms
Calculating -------------------------------------
find_or_create_by City
2.102 (± 0.0%) i/s - 11.000 in 5.260946s
Find find_or_create_by services
0.510 (± 0.0%) i/s - 3.000 in 5.885058s
find_or_create_by Bus
0.441 (± 0.0%) i/s - 3.000 in 6.919219s
```

Окей, индексы не выход (для импорта данных так уж точно)

Другой день, импорт small.json занимает 18-20 секунд
пробую испольщовать gem `oj`

13.56506800011266 s

хмм, после серии тестов среднее время 13-15 секунд. Неплохо, оставляем

Замена AR методов `delete all` на raw sql дает еще чуть выигрыш в пару секунд

Окей, ставим и настраиваем pg_hero

10 000 элементов в small.json генерируют 4,229 запросов `SELECT FROM services` и всего 10 `INSERT INTO services`

Время работы сейчас важнее потребляемой памяти, попробую не делать на каждую строку find_or_create, а сделать массив и вставить c помощью bulk_insert

Стало намного лучше
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

```
----------Load data from small.json----------
0.000284 0.001181 1.671884 ( 2.216141)
----------Load data from medium.json----------
0.000137 0.000838 2.695500 ( 2.925685)
----------Load data from large.json----------
0.000154 0.000889 12.920520 ( 14.627362)
```
3 changes: 3 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
Rails.application.routes.draw do
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html

mount PgHero::Engine, at: "pghero"

get "/" => "statistics#index"
get "автобусы/:from/:to" => "trips#index"
end
13 changes: 13 additions & 0 deletions db/migrate/20190403191542_create_pghero_space_stats.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class CreatePgheroSpaceStats < ActiveRecord::Migration[5.2]
def change
create_table :pghero_space_stats do |t|
t.text :database
t.text :schema
t.text :relation
t.integer :size, limit: 8
t.timestamp :captured_at
end

add_index :pghero_space_stats, [:database, :captured_at]
end
end
11 changes: 10 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2019_03_30_193044) do
ActiveRecord::Schema.define(version: 2019_04_03_191542) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
Expand All @@ -29,6 +29,15 @@
t.string "name"
end

create_table "pghero_space_stats", force: :cascade do |t|
t.text "database"
t.text "schema"
t.text "relation"
t.bigint "size"
t.datetime "captured_at"
t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at"
end

create_table "services", force: :cascade do |t|
t.string "name"
end
Expand Down
94 changes: 74 additions & 20 deletions lib/tasks/utils.rake
Original file line number Diff line number Diff line change
@@ -1,34 +1,88 @@
# Наивная загрузка данных из json-файла в БД
# rake reload_json[fixtures/small.json]

task :reload_json, [:file_name] => :environment do |_task, args|
json = JSON.parse(File.read(args.file_name))

ActiveRecord::Base.transaction do
City.delete_all
Bus.delete_all
Service.delete_all
Trip.delete_all
ActiveRecord::Base.connection.execute('delete from buses_services;')

ActiveRecord::Base.connection.execute <<-SQL
delete from cities;
delete from buses;
delete from services;
delete from trips;
delete from buses_services;
SQL

cities = Set.new
services = Set.new
buses = Set.new
buses_services = Set.new
trips = Set.new

json.each do |trip|
from = City.find_or_create_by(name: trip['from'])
to = City.find_or_create_by(name: trip['to'])
services = []
trip['bus']['services'].each do |service|
s = Service.find_or_create_by(name: service)
services << s
cities << { name: trip['from'] }
cities << { name: trip['to'] }
buses << { number: trip['bus']['number'], model: trip['bus']['model'] }
trip['bus']['services'].each do |service_name|
services << { name: service_name }
buses_services << { bus_number: trip['bus']['number'], service_name: service_name }
end
bus = Bus.find_or_create_by(number: trip['bus']['number'])
bus.update(model: trip['bus']['model'], services: services)

Trip.create!(
from: from,
to: to,
bus: bus,
trips << {
from_name: trip['from'],
to_name: trip['to'],
bus_number: trip['bus']['number'],
start_time: trip['start_time'],
duration_minutes: trip['duration_minutes'],
price_cents: trip['price_cents'],
)
price_cents: trip['price_cents']
}
end

City.bulk_insert do |worker|
cities.each do |city_attrs|
worker.add(city_attrs)
end
end

Service.bulk_insert do |worker|
services.each do |service_attrs|
worker.add(service_attrs)
end
end

Bus.bulk_insert do |worker|
buses.each do |bus_attrs|
worker.add(bus_attrs)
end
end

cities_objects = City.pluck(:name, :id).to_h
services_objects = Service.all.index_by(&:name)
buses_objects = Bus.all.index_by(&:number)

BusService.bulk_insert do |worker|
buses_services.each do |bs|
bus_id = buses_objects[bs[:bus_number]].id
service_id = services_objects[bs[:service_name]].id
worker.add(bus_id: bus_id, service_id: service_id)
end
end

Trip.bulk_insert do |worker|
trips.each do |trip|
from_id = cities_objects[trip[:from_name]]
to_id = cities_objects[trip[:to_name]]
bus_id = buses_objects[trip[:bus_number]].id

worker.add(
from_id: from_id,
to_id: to_id,
bus_id: bus_id,
start_time: trip[:start_time],
duration_minutes: trip[:duration_minutes],
price_cents: trip[:duration_minutes]
)
end
end
end
end