From 4a3a9f28a2ba08b13102b5ba5ba8e6613470d7d7 Mon Sep 17 00:00:00 2001 From: Artsiom Musin Date: Wed, 6 Mar 2019 17:12:19 +0300 Subject: [PATCH 1/4] Split one file to several ones --- .gitignore | 2 + task-1.rb | 178 +------------------------------------------------- task_class.rb | 145 ++++++++++++++++++++++++++++++++++++++++ task_test.rb | 35 ++++++++++ 4 files changed, 184 insertions(+), 176 deletions(-) create mode 100644 .gitignore create mode 100644 task_class.rb create mode 100644 task_test.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..778b3f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +data* +result* diff --git a/task-1.rb b/task-1.rb index 778672d..a72fbe8 100644 --- a/task-1.rb +++ b/task-1.rb @@ -1,176 +1,2 @@ -# Deoptimized version of homework task - -require 'json' -require 'pry' -require 'date' -require 'minitest/autorun' - -class User - attr_reader :attributes, :sessions - - def initialize(attributes:, sessions:) - @attributes = attributes - @sessions = sessions - end -end - -def parse_user(user) - fields = user.split(',') - parsed_result = { - 'id' => fields[1], - 'first_name' => fields[2], - 'last_name' => fields[3], - 'age' => fields[4], - } -end - -def parse_session(session) - fields = session.split(',') - parsed_result = { - 'user_id' => fields[1], - 'session_id' => fields[2], - 'browser' => fields[3], - 'time' => fields[4], - 'date' => fields[5], - } -end - -def collect_stats_from_users(report, users_objects, &block) - users_objects.each do |user| - user_key = "#{user.attributes['first_name']}" + ' ' + "#{user.attributes['last_name']}" - report['usersStats'][user_key] ||= {} - report['usersStats'][user_key] = report['usersStats'][user_key].merge(block.call(user)) - end -end - -def work - file_lines = File.read('data.txt').split("\n") - - users = [] - sessions = [] - - file_lines.each do |line| - cols = line.split(',') - users = users + [parse_user(line)] if cols[0] == 'user' - sessions = sessions + [parse_session(line)] if cols[0] == 'session' - end - - # Отчёт в json - # - Сколько всего юзеров + - # - Сколько всего уникальных браузеров + - # - Сколько всего сессий + - # - Перечислить уникальные браузеры в алфавитном порядке через запятую и капсом + - # - # - По каждому пользователю - # - сколько всего сессий + - # - сколько всего времени + - # - самая длинная сессия + - # - браузеры через запятую + - # - Хоть раз использовал IE? + - # - Всегда использовал только Хром? + - # - даты сессий в порядке убывания через запятую + - - report = {} - - report[:totalUsers] = users.count - - # Подсчёт количества уникальных браузеров - uniqueBrowsers = [] - sessions.each do |session| - browser = session['browser'] - uniqueBrowsers += [browser] if uniqueBrowsers.all? { |b| b != browser } - end - - report['uniqueBrowsersCount'] = uniqueBrowsers.count - - report['totalSessions'] = sessions.count - - report['allBrowsers'] = - sessions - .map { |s| s['browser'] } - .map { |b| b.upcase } - .sort - .uniq - .join(',') - - # Статистика по пользователям - users_objects = [] - - users.each do |user| - attributes = user - user_sessions = sessions.select { |session| session['user_id'] == user['id'] } - user_object = User.new(attributes: attributes, sessions: user_sessions) - users_objects = users_objects + [user_object] - end - - report['usersStats'] = {} - - # Собираем количество сессий по пользователям - collect_stats_from_users(report, users_objects) do |user| - { 'sessionsCount' => user.sessions.count } - end - - # Собираем количество времени по пользователям - collect_stats_from_users(report, users_objects) do |user| - { 'totalTime' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.sum.to_s + ' min.' } - end - - # Выбираем самую длинную сессию пользователя - collect_stats_from_users(report, users_objects) do |user| - { 'longestSession' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' } - end - - # Браузеры пользователя через запятую - collect_stats_from_users(report, users_objects) do |user| - { 'browsers' => user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') } - end - - # Хоть раз использовал IE? - collect_stats_from_users(report, users_objects) do |user| - { 'usedIE' => user.sessions.map{|s| s['browser']}.any? { |b| b.upcase =~ /INTERNET EXPLORER/ } } - end - - # Всегда использовал только Chrome? - collect_stats_from_users(report, users_objects) do |user| - { 'alwaysUsedChrome' => user.sessions.map{|s| s['browser']}.all? { |b| b.upcase =~ /CHROME/ } } - end - - # Даты сессий через запятую в обратном порядке в формате iso8601 - collect_stats_from_users(report, users_objects) do |user| - { 'dates' => user.sessions.map{|s| s['date']}.map {|d| Date.parse(d)}.sort.reverse.map { |d| d.iso8601 } } - end - - File.write('result.json', "#{report.to_json}\n") -end - -class TestMe < Minitest::Test - def setup - File.write('result.json', '') - File.write('data.txt', -'user,0,Leida,Cira,0 -session,0,0,Safari 29,87,2016-10-23 -session,0,1,Firefox 12,118,2017-02-27 -session,0,2,Internet Explorer 28,31,2017-03-28 -session,0,3,Internet Explorer 28,109,2016-09-15 -session,0,4,Safari 39,104,2017-09-27 -session,0,5,Internet Explorer 35,6,2016-09-01 -user,1,Palmer,Katrina,65 -session,1,0,Safari 17,12,2016-10-21 -session,1,1,Firefox 32,3,2016-12-20 -session,1,2,Chrome 6,59,2016-11-11 -session,1,3,Internet Explorer 10,28,2017-04-29 -session,1,4,Chrome 13,116,2016-12-28 -user,2,Gregory,Santos,86 -session,2,0,Chrome 35,6,2018-09-21 -session,2,1,Safari 49,85,2017-05-22 -session,2,2,Firefox 47,17,2018-02-02 -session,2,3,Chrome 20,84,2016-11-25 -') - end - - def test_result - work - expected_result = '{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}' + "\n" - assert_equal expected_result, File.read('result.json') - end -end +require_relative 'task_class' +TaskClass.new.work(filename: ARGV[0]) diff --git a/task_class.rb b/task_class.rb new file mode 100644 index 0000000..dc08936 --- /dev/null +++ b/task_class.rb @@ -0,0 +1,145 @@ +# Deoptimized version of homework task + +require 'json' +require 'pry' +require 'date' + +class TaskClass + class User + attr_reader :attributes, :sessions + + def initialize(attributes:, sessions:) + @attributes = attributes + @sessions = sessions + end + end + + def parse_user(user) + fields = user.split(',') + parsed_result = { + 'id' => fields[1], + 'first_name' => fields[2], + 'last_name' => fields[3], + 'age' => fields[4], + } + end + + def parse_session(session) + fields = session.split(',') + parsed_result = { + 'user_id' => fields[1], + 'session_id' => fields[2], + 'browser' => fields[3], + 'time' => fields[4], + 'date' => fields[5], + } + end + + def collect_stats_from_users(report, users_objects, &block) + users_objects.each do |user| + user_key = "#{user.attributes['first_name']}" + ' ' + "#{user.attributes['last_name']}" + report['usersStats'][user_key] ||= {} + report['usersStats'][user_key] = report['usersStats'][user_key].merge(block.call(user)) + end + end + + def work(filename:) + file_lines = File.read(filename).split("\n") + + users = [] + sessions = [] + + file_lines.each do |line| + cols = line.split(',') + users = users + [parse_user(line)] if cols[0] == 'user' + sessions = sessions + [parse_session(line)] if cols[0] == 'session' + end + + # Отчёт в json + # - Сколько всего юзеров + + # - Сколько всего уникальных браузеров + + # - Сколько всего сессий + + # - Перечислить уникальные браузеры в алфавитном порядке через запятую и капсом + + # + # - По каждому пользователю + # - сколько всего сессий + + # - сколько всего времени + + # - самая длинная сессия + + # - браузеры через запятую + + # - Хоть раз использовал IE? + + # - Всегда использовал только Хром? + + # - даты сессий в порядке убывания через запятую + + + report = {} + + report[:totalUsers] = users.count + + # Подсчёт количества уникальных браузеров + uniqueBrowsers = [] + sessions.each do |session| + browser = session['browser'] + uniqueBrowsers += [browser] if uniqueBrowsers.all? { |b| b != browser } + end + + report['uniqueBrowsersCount'] = uniqueBrowsers.count + + report['totalSessions'] = sessions.count + + report['allBrowsers'] = + sessions + .map { |s| s['browser'] } + .map { |b| b.upcase } + .sort + .uniq + .join(',') + + # Статистика по пользователям + users_objects = [] + + users.each do |user| + attributes = user + user_sessions = sessions.select { |session| session['user_id'] == user['id'] } + user_object = User.new(attributes: attributes, sessions: user_sessions) + users_objects = users_objects + [user_object] + end + + report['usersStats'] = {} + + # Собираем количество сессий по пользователям + collect_stats_from_users(report, users_objects) do |user| + { 'sessionsCount' => user.sessions.count } + end + + # Собираем количество времени по пользователям + collect_stats_from_users(report, users_objects) do |user| + { 'totalTime' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.sum.to_s + ' min.' } + end + + # Выбираем самую длинную сессию пользователя + collect_stats_from_users(report, users_objects) do |user| + { 'longestSession' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' } + end + + # Браузеры пользователя через запятую + collect_stats_from_users(report, users_objects) do |user| + { 'browsers' => user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') } + end + + # Хоть раз использовал IE? + collect_stats_from_users(report, users_objects) do |user| + { 'usedIE' => user.sessions.map{|s| s['browser']}.any? { |b| b.upcase =~ /INTERNET EXPLORER/ } } + end + + # Всегда использовал только Chrome? + collect_stats_from_users(report, users_objects) do |user| + { 'alwaysUsedChrome' => user.sessions.map{|s| s['browser']}.all? { |b| b.upcase =~ /CHROME/ } } + end + + # Даты сессий через запятую в обратном порядке в формате iso8601 + collect_stats_from_users(report, users_objects) do |user| + { 'dates' => user.sessions.map{|s| s['date']}.map {|d| Date.parse(d)}.sort.reverse.map { |d| d.iso8601 } } + end + + File.write('result.json', "#{report.to_json}\n") + end +end diff --git a/task_test.rb b/task_test.rb new file mode 100644 index 0000000..b527bca --- /dev/null +++ b/task_test.rb @@ -0,0 +1,35 @@ +require 'minitest/autorun' +require_relative 'task_class' + +class TestMe < Minitest::Test + def setup + @filename = 'data.txt' + File.write('result.json', '') + File.write(@filename, +'user,0,Leida,Cira,0 +session,0,0,Safari 29,87,2016-10-23 +session,0,1,Firefox 12,118,2017-02-27 +session,0,2,Internet Explorer 28,31,2017-03-28 +session,0,3,Internet Explorer 28,109,2016-09-15 +session,0,4,Safari 39,104,2017-09-27 +session,0,5,Internet Explorer 35,6,2016-09-01 +user,1,Palmer,Katrina,65 +session,1,0,Safari 17,12,2016-10-21 +session,1,1,Firefox 32,3,2016-12-20 +session,1,2,Chrome 6,59,2016-11-11 +session,1,3,Internet Explorer 10,28,2017-04-29 +session,1,4,Chrome 13,116,2016-12-28 +user,2,Gregory,Santos,86 +session,2,0,Chrome 35,6,2018-09-21 +session,2,1,Safari 49,85,2017-05-22 +session,2,2,Firefox 47,17,2018-02-02 +session,2,3,Chrome 20,84,2016-11-25 +') + end + + def test_result + TaskClass.new.work(filename: @filename) + expected_result = '{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}' + "\n" + assert_equal expected_result, File.read('result.json') + end +end From 537b39f2b99d228c60c5b853511e8c47e8b3c802 Mon Sep 17 00:00:00 2001 From: Artsiom Musin Date: Sun, 24 Mar 2019 23:20:36 +0300 Subject: [PATCH 2/4] Add fixes to speed up stuff. Part 1 --- .gitignore | 1 + Gemfile | 7 + Gemfile.lock | 20 + case-study-template.md | 19 +- ruby_prof_flat_allocations_profile.txt | 51 + ruby_prof_graph_allocations_profile.html | 2318 ++++++++++++++++++++++ task-1.rb | 27 +- task_class.rb | 102 +- 8 files changed, 2491 insertions(+), 54 deletions(-) create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 ruby_prof_flat_allocations_profile.txt create mode 100644 ruby_prof_graph_allocations_profile.html diff --git a/.gitignore b/.gitignore index 778b3f1..957eb13 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ data* result* +tmp/ diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..f442d98 --- /dev/null +++ b/Gemfile @@ -0,0 +1,7 @@ +source 'https://rubygems.org' + +#ruby '2.6.1' + +gem 'memory_profiler' +gem 'stackprof' +gem 'ruby-prof' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..1bb1088 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,20 @@ +GEM + remote: https://rubygems.org/ + specs: + memory_profiler (0.9.12) + ruby-prof (0.17.0) + stackprof (0.2.12) + +PLATFORMS + ruby + +DEPENDENCIES + memory_profiler + ruby-prof + stackprof + +RUBY VERSION + ruby 2.6.1p33 + +BUNDLED WITH + 1.17.2 diff --git a/case-study-template.md b/case-study-template.md index e0eef00..329710c 100644 --- a/case-study-template.md +++ b/case-study-template.md @@ -18,20 +18,29 @@ Программа поставлялась с тестом. Выполнение этого теста позволяет не допустить изменения логики программы при оптимизации. ## Feedback-Loop -Для того, чтобы иметь возможность быстро проверять гипотезы я выстроил эффективный `feedback-loop`, который позволил мне получать обратную связь по эффективности сделанных изменений за *время, которое у вас получилось* +Для того, чтобы иметь возможность быстро проверять гипотезы я выстроил эффективный `feedback-loop`, который позволил мне получать обратную связь по эффективности сделанных изменений за 2.5 минут обработки файла. -Вот как я построил `feedback_loop`: *как вы построили feedback_loop* +Вот как я построил `feedback_loop`: +1. Сократил файл до 50_000 строк +2. Добавил профилеровщик памяти +3. Вносил изменения в исходный код +4. Проверял скорость работы обработки файла +5. Если скорость не менялась, переходил к шагу 3. Если ускорялось, к шагу 6. +6. Увеличивал размер файла в 2 раза и переходил к шагу 3. Если код мог обратывать весь исходный файл за вменяемое время, переходил к шагу 7. +7. Все работает. Задача завершена. ## Вникаем в детали системы, чтобы найти 20% точек роста -Для того, чтобы найти "точки роста" для оптимизации я воспользовался *инструментами, которыми вы воспользовались* +Для того, чтобы найти "точки роста" для оптимизации я воспользовался MemoryProfiler и stackprof. Вот какие проблемы удалось найти и решить ### Ваша находка №1 -О вашей находке №1 +.split(',') занимает достаточно много времени, так как повторялся в методе parse_user и parse_session. +Решение: делать split один раз и уже пробрасывать массив полей в parse_user и parse_session. ### Ваша находка №2 -О вашей находке №2 +Избыточное использование collect_stats_from_users, где идет сбор статистики по всем пользователям каждый раз для нового аттрибута статистики. +Решение: вынос блока из collect_stats_from_users в отдельные методы. ### Ваша находка №X О вашей находке №X diff --git a/ruby_prof_flat_allocations_profile.txt b/ruby_prof_flat_allocations_profile.txt new file mode 100644 index 0000000..46be573 --- /dev/null +++ b/ruby_prof_flat_allocations_profile.txt @@ -0,0 +1,51 @@ +Measure Mode: wall_time +Thread ID: 46973231184220 +Fiber ID: 46973235994980 +Total: 134.629748 +Sort by: self_time + + %self total self wait child calls name + 97.25 130.930 130.930 0.000 0.000 7695 Array#select + 1.36 134.419 1.830 0.000 132.589 7699 *Array#each + 0.64 0.864 0.860 0.000 0.004 50000 Array#all? + 0.24 0.326 0.326 0.000 0.000 50001 String#split + 0.12 0.204 0.159 0.000 0.045 69257 Array#map + 0.06 0.099 0.078 0.000 0.021 7696 Class#new + 0.04 0.112 0.057 0.000 0.055 1 JSON::Ext::Generator::GeneratorMethods::Hash#to_json + 0.04 0.049 0.049 0.000 0.000 42305 TaskClass#parse_session + 0.03 0.041 0.041 0.000 0.000 126956 String#encode + 0.03 0.037 0.037 0.000 0.000 116403 String#upcase + 0.02 0.035 0.028 0.000 0.007 7695 Array#any? + 0.02 0.028 0.028 0.000 0.000 7696 Array#sort + 0.02 0.046 0.022 0.000 0.023 7695 Enumerable#sort_by + 0.02 0.021 0.021 0.000 0.000 7695 TaskClass::User#initialize + 0.02 0.020 0.020 0.000 0.000 7695 TaskClass#parse_user + 0.01 0.020 0.020 0.000 0.000 84610 String#to_i + 0.01 0.011 0.011 0.000 0.000 61564 String#to_s + 0.01 0.053 0.010 0.000 0.042 7695 TaskClass#collect_session_longest + 0.01 0.010 0.010 0.000 0.000 7696 Array#join + 0.01 0.008 0.008 0.000 0.000 1 #read + 0.01 0.060 0.008 0.000 0.052 7695 TaskClass#collect_session_time + 0.01 0.008 0.008 0.000 0.000 1 Array#uniq + 0.01 0.007 0.007 0.000 0.000 42305 String#-@ + 0.00 0.070 0.007 0.000 0.064 7695 TaskClass#collect_browsers + 0.00 0.006 0.006 0.000 0.000 15390 Integer#to_s + 0.00 0.064 0.005 0.000 0.059 7695 TaskClass#collect_session_dates + 0.00 0.054 0.005 0.000 0.049 7695 TaskClass#collect_ie_usage + 0.00 0.032 0.005 0.000 0.028 7695 TaskClass#collect_if_only_chrome_used + 0.00 0.006 0.004 0.000 0.002 7695 TaskClass#collect_session_count + 0.00 0.003 0.003 0.000 0.000 7697 Hash#keys + 0.00 0.003 0.003 0.000 0.000 1 #write + 0.00 0.002 0.002 0.000 0.000 7695 Array#max + 0.00 0.002 0.002 0.000 0.000 7698 Array#count + 0.00 0.002 0.002 0.000 0.000 7695 Array#sum + 0.00 134.630 0.001 0.000 134.629 1 TaskClass#work + 0.00 134.630 0.000 0.000 134.630 1 [global]#[no method] + 0.00 0.419 0.000 0.000 0.419 1 TaskClass#collect_stats_from_users + 0.00 0.000 0.000 0.000 0.000 1 Kernel#dup + 0.00 0.000 0.000 0.000 0.000 1 JSON::Ext::Generator::State#initialize_copy + 0.00 0.000 0.000 0.000 0.000 1 Symbol#to_s + 0.00 0.000 0.000 0.000 0.000 1 Kernel#initialize_dup + 0.00 0.000 0.000 0.000 0.000 1 BasicObject#initialize + +* indicates recursively called methods diff --git a/ruby_prof_graph_allocations_profile.html b/ruby_prof_graph_allocations_profile.html new file mode 100644 index 0000000..ba4c1cc --- /dev/null +++ b/ruby_prof_graph_allocations_profile.html @@ -0,0 +1,2318 @@ + + + + + + + +

Profile Report: wall_time

+ + + + + + + + + + + + + + +
Thread IDFiber IDTotal Time
4716616117286047166163365500180.00563621520996
+ + +

Thread 47166161172860, Fiber: 47166163365500

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
%Total%SelfTotalSelfWaitChildCallsNameLine
100.00%0.00%180.010.000.00180.011 + + [global]#[no method] + + 9
  180.010.000.00180.001/1TaskClass#work9
  0.000.000.000.001/7696Class#new9
  180.010.000.00180.001/1[global]#[no method]9
100.00%0.00%180.010.000.00180.001 + + TaskClass#work + + 45
  178.012.250.00175.763/10Array#each97
  1.720.000.001.727/7TaskClass#collect_stats_from_users137
  0.170.090.000.081/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json141
  0.050.040.000.012/84647Array#map86
  0.020.020.000.001/15391Array#sort86
  0.020.020.000.001/50001String#split45
  0.000.000.000.001/1Array#uniq86
  0.000.000.000.001/1<Class::IO>#write141
  0.000.000.000.001/1<Class::IO>#read45
  0.000.000.000.001/7696Array#join86
  0.000.000.000.003/7698Array#count84
  1.720.360.001.367/10TaskClass#collect_stats_from_users37
  178.012.250.00175.763/10TaskClass#work97
99.85%1.45%179.732.600.00177.1210 + + Array#each + +
  174.13174.130.000.007695/7695Array#select
  1.141.130.000.0150000/50000Array#all?
  1.130.320.000.8184645/84647Array#map
  0.270.270.000.0050000/50001String#split
  0.120.090.000.037695/7696Class#new
  0.110.110.000.0042305/42305TaskClass#parse_session
  0.080.050.000.0315390/15391Array#sort
  0.060.060.000.0053865/53865Hash#merge
  0.030.030.000.017695/7695Array#any?
  0.020.020.000.007695/7695TaskClass#parse_user
  0.010.010.000.007695/7696Array#join
  0.010.010.000.0015390/15390Integer#to_s
  0.000.000.000.007695/7695Array#reverse
  0.000.000.000.007695/7695Array#max
  0.000.000.000.007695/7698Array#count
  0.000.000.000.007695/7695Array#sum
  174.13174.130.000.007695/7695Array#each
96.73%96.73%174.13174.130.000.007695 + + Array#select + +
  1.720.000.001.727/7TaskClass#work137
0.95%0.00%1.720.000.001.727 + + TaskClass#collect_stats_from_users + + 37
  1.720.360.001.367/10Array#each37
  0.050.040.000.012/84647TaskClass#work86
  1.130.320.000.8184645/84647Array#each
0.66%0.20%1.180.360.000.8284647 + + Array#map + +
  0.700.350.000.3542305/42305<Class::Date>#parse
  0.060.060.000.0042305/42305Date#iso8601
  0.040.040.000.0084610/116403String#upcase
  0.030.030.000.0084610/84610String#to_i
  1.141.130.000.0150000/50000Array#each
0.63%0.63%1.141.130.000.0150000 + + Array#all? + +
  0.010.010.000.009858/116403String#upcase
  0.700.350.000.3542305/42305Array#map
0.39%0.19%0.700.350.000.3542305 + + <Class::Date>#parse + +
  0.140.140.000.0084610/84610Regexp#match
  0.080.080.000.0042305/42305String#gsub!
  0.070.070.000.0042305/42305MatchData#begin
  0.040.040.000.0042305/42305String#[]=
  0.020.020.000.0042305/42305Integer#div
  0.010.010.000.0042305/42305MatchData#end
  0.020.020.000.001/50001TaskClass#work45
  0.270.270.000.0050000/50001Array#each
0.16%0.16%0.290.290.000.0050001 + + String#split + +
  0.170.090.000.081/1TaskClass#work141
0.10%0.05%0.170.090.000.081 + + JSON::Ext::Generator::GeneratorMethods::Hash#to_json + +
  0.060.060.000.00126956/126956String#encode
  0.020.020.000.0061564/61564String#to_s
  0.000.000.000.007697/7697Hash#keys
  0.000.000.000.001/1Kernel#dup
  0.000.000.000.001/1Symbol#to_s
  0.140.140.000.0084610/84610<Class::Date>#parse
0.08%0.08%0.140.140.000.0084610 + + Regexp#match + +
  0.000.000.000.001/7696[global]#[no method]9
  0.120.090.000.037695/7696Array#each
0.07%0.05%0.120.090.000.037696 + + Class#new + +
  0.030.030.000.007695/7695TaskClass::User#initialize
  0.000.000.000.001/1BasicObject#initialize
  0.110.110.000.0042305/42305Array#each
0.06%0.06%0.110.110.000.0042305 + + TaskClass#parse_session + + 28
  0.020.020.000.001/15391TaskClass#work86
  0.080.050.000.0315390/15391Array#each
0.05%0.04%0.100.070.000.0315391 + + Array#sort + +
  0.030.030.000.0073799/73799Date#<=>
  0.080.080.000.0042305/42305<Class::Date>#parse
0.04%0.04%0.080.080.000.0042305 + + String#gsub! + +
  0.070.070.000.0042305/42305<Class::Date>#parse
0.04%0.04%0.070.070.000.0042305 + + MatchData#begin + +
  0.060.060.000.00126956/126956JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.04%0.04%0.060.060.000.00126956 + + String#encode + +
  0.060.060.000.0053865/53865Array#each
0.03%0.03%0.060.060.000.0053865 + + Hash#merge + +
  0.060.060.000.0042305/42305Array#map
0.03%0.03%0.060.060.000.0042305 + + Date#iso8601 + +
  0.010.010.000.009858/116403Array#all?
  0.010.010.000.0021935/116403Array#any?
  0.040.040.000.0084610/116403Array#map
0.03%0.03%0.050.050.000.00116403 + + String#upcase + +
  0.040.040.000.0042305/42305<Class::Date>#parse
0.02%0.02%0.040.040.000.0042305 + + String#[]= + +
  0.030.030.000.017695/7695Array#each
0.02%0.01%0.030.030.000.017695 + + Array#any? + +
  0.010.010.000.0021935/116403String#upcase
  0.030.030.000.0073799/73799Array#sort
0.02%0.02%0.030.030.000.0073799 + + Date#<=> + +
  0.030.030.000.0084610/84610Array#map
0.02%0.02%0.030.030.000.0084610 + + String#to_i + +
  0.030.030.000.007695/7695Class#new
0.01%0.01%0.030.030.000.007695 + + TaskClass::User#initialize + + 12
  0.020.020.000.0042305/42305<Class::Date>#parse
0.01%0.01%0.020.020.000.0042305 + + Integer#div + +
  0.020.020.000.007695/7695Array#each
0.01%0.01%0.020.020.000.007695 + + TaskClass#parse_user + + 19
  0.020.020.000.0061564/61564JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.01%0.01%0.020.020.000.0061564 + + String#to_s + +
  0.000.000.000.001/7696TaskClass#work86
  0.010.010.000.007695/7696Array#each
0.01%0.01%0.010.010.000.007696 + + Array#join + +
  0.010.010.000.0042305/42305<Class::Date>#parse
0.01%0.01%0.010.010.000.0042305 + + MatchData#end + +
  0.010.010.000.0015390/15390Array#each
0.00%0.00%0.010.010.000.0015390 + + Integer#to_s + +
  0.000.000.000.001/1TaskClass#work86
0.00%0.00%0.000.000.000.001 + + Array#uniq + +
  0.000.000.000.007697/7697JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.00%0.00%0.000.000.000.007697 + + Hash#keys + +
  0.000.000.000.001/1TaskClass#work141
0.00%0.00%0.000.000.000.001 + + <Class::IO>#write + +
  0.000.000.000.007695/7695Array#each
0.00%0.00%0.000.000.000.007695 + + Array#reverse + +
  0.000.000.000.007695/7695Array#each
0.00%0.00%0.000.000.000.007695 + + Array#max + +
  0.000.000.000.003/7698TaskClass#work84
  0.000.000.000.007695/7698Array#each
0.00%0.00%0.000.000.000.007698 + + Array#count + +
  0.000.000.000.007695/7695Array#each
0.00%0.00%0.000.000.000.007695 + + Array#sum + +
  0.000.000.000.001/1TaskClass#work45
0.00%0.00%0.000.000.000.001 + + <Class::IO>#read + +
  0.000.000.000.001/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.00%0.00%0.000.000.000.001 + + Kernel#dup + +
  0.000.000.000.001/1Kernel#initialize_dup
  0.000.000.000.001/1Kernel#dup
0.00%0.00%0.000.000.000.001 + + Kernel#initialize_dup + +
  0.000.000.000.001/1JSON::Ext::Generator::State#initialize_copy
  0.000.000.000.001/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.00%0.00%0.000.000.000.001 + + Symbol#to_s + +
  0.000.000.000.001/1Kernel#initialize_dup
0.00%0.00%0.000.000.000.001 + + JSON::Ext::Generator::State#initialize_copy + +
  0.000.000.000.001/1Class#new
0.00%0.00%0.000.000.000.001 + + BasicObject#initialize + +
* indicates recursively called methods
+ + + diff --git a/task-1.rb b/task-1.rb index a72fbe8..1d9a7ef 100644 --- a/task-1.rb +++ b/task-1.rb @@ -1,2 +1,27 @@ require_relative 'task_class' -TaskClass.new.work(filename: ARGV[0]) +#require 'memory_profiler' +require 'stackprof' +# require 'ruby-prof' +# require 'pry' + +#report = MemoryProfiler.report do +StackProf.run(mode: :object, out: 'tmp/stackprof.dump', raw: true) do +#result = RubyProf.profile do + TaskClass.new.work(filename: ARGV[0]) +end + +#report.pretty_print(scale_bytes: true) +# profile_data = StackProf.run(mode: :object) do +# TaskClass.new.work(filename: ARGV[0]) +# end +# StackProf::Report.new(profile_data).print_graphviz + + +# printer = RubyProf::FlatPrinter.new(result) +# printer.print(File.open("ruby_prof_flat_allocations_profile.txt", "w+")) + +# printer = RubyProf::DotPrinter.new(result) +# printer.print(File.open("ruby_prof_allocations_profile.dot", "w+")) + +# printer = RubyProf::GraphHtmlPrinter.new(result) +# printer.print(File.open("ruby_prof_graph_allocations_profile.html", "w+")) diff --git a/task_class.rb b/task_class.rb index dc08936..65d91d5 100644 --- a/task_class.rb +++ b/task_class.rb @@ -1,8 +1,9 @@ # Deoptimized version of homework task +# frozen_string_literal: true require 'json' -require 'pry' require 'date' +require 'pry' class TaskClass class User @@ -14,9 +15,8 @@ def initialize(attributes:, sessions:) end end - def parse_user(user) - fields = user.split(',') - parsed_result = { + def parse_user(fields) + { 'id' => fields[1], 'first_name' => fields[2], 'last_name' => fields[3], @@ -24,9 +24,8 @@ def parse_user(user) } end - def parse_session(session) - fields = session.split(',') - parsed_result = { + def parse_session(fields) + { 'user_id' => fields[1], 'session_id' => fields[2], 'browser' => fields[3], @@ -35,14 +34,57 @@ def parse_session(session) } end - def collect_stats_from_users(report, users_objects, &block) + def collect_stats_from_users(report, users_objects) + report['usersStats'] = {} users_objects.each do |user| user_key = "#{user.attributes['first_name']}" + ' ' + "#{user.attributes['last_name']}" report['usersStats'][user_key] ||= {} - report['usersStats'][user_key] = report['usersStats'][user_key].merge(block.call(user)) + report['usersStats'][user_key]['sessionsCount'] = collect_session_count(user) + report['usersStats'][user_key]['totalTime'] = collect_session_time(user) + report['usersStats'][user_key]['longestSession'] = collect_session_longest(user) + report['usersStats'][user_key]['browsers'] = collect_browsers(user) + report['usersStats'][user_key]['usedIE'] = collect_ie_usage(user) + report['usersStats'][user_key]['alwaysUsedChrome'] = collect_if_only_chrome_used(user) + report['usersStats'][user_key]['dates'] = collect_session_dates(user) end end + # Собираем количество сессий по пользователям + def collect_session_count(user) + user.sessions.count + end + + # Собираем количество времени по пользователям + def collect_session_time(user) + user.sessions.sum {|s| s['time'].to_i }.to_s + ' min.' + end + + # Выбираем самую длинную сессию пользователя + def collect_session_longest(user) + user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' + end + + # Браузеры пользователя через запятую + def collect_browsers(user) + user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') + end + + # Хоть раз использовал IE? + def collect_ie_usage(user) + !!user.sessions.find { |s| s['browser'] == 'INTERNET EXPLORER' } + end + + # Всегда использовал только Chrome? + def collect_if_only_chrome_used(user) + browsers = user.sessions.map {|s| s['browser']}.uniq + browsers.count == 1 && browsers.first == 'CHROME' + end + + # Даты сессий через запятую в обратном порядке в формате iso8601 + def collect_session_dates(user) + user.sessions.map{|s| s['date']}.sort {|a,b| b <=> a} + end + def work(filename:) file_lines = File.read(filename).split("\n") @@ -51,8 +93,8 @@ def work(filename:) file_lines.each do |line| cols = line.split(',') - users = users + [parse_user(line)] if cols[0] == 'user' - sessions = sessions + [parse_session(line)] if cols[0] == 'session' + users = users + [parse_user(cols)] if cols[0] == 'user' + sessions = sessions + [parse_session(cols)] if cols[0] == 'session' end # Отчёт в json @@ -102,43 +144,7 @@ def work(filename:) user_object = User.new(attributes: attributes, sessions: user_sessions) users_objects = users_objects + [user_object] end - - report['usersStats'] = {} - - # Собираем количество сессий по пользователям - collect_stats_from_users(report, users_objects) do |user| - { 'sessionsCount' => user.sessions.count } - end - - # Собираем количество времени по пользователям - collect_stats_from_users(report, users_objects) do |user| - { 'totalTime' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.sum.to_s + ' min.' } - end - - # Выбираем самую длинную сессию пользователя - collect_stats_from_users(report, users_objects) do |user| - { 'longestSession' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' } - end - - # Браузеры пользователя через запятую - collect_stats_from_users(report, users_objects) do |user| - { 'browsers' => user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') } - end - - # Хоть раз использовал IE? - collect_stats_from_users(report, users_objects) do |user| - { 'usedIE' => user.sessions.map{|s| s['browser']}.any? { |b| b.upcase =~ /INTERNET EXPLORER/ } } - end - - # Всегда использовал только Chrome? - collect_stats_from_users(report, users_objects) do |user| - { 'alwaysUsedChrome' => user.sessions.map{|s| s['browser']}.all? { |b| b.upcase =~ /CHROME/ } } - end - - # Даты сессий через запятую в обратном порядке в формате iso8601 - collect_stats_from_users(report, users_objects) do |user| - { 'dates' => user.sessions.map{|s| s['date']}.map {|d| Date.parse(d)}.sort.reverse.map { |d| d.iso8601 } } - end + collect_stats_from_users(report, users_objects) File.write('result.json', "#{report.to_json}\n") end From 435305f69e527e7a83b871dc28c74922da9163f3 Mon Sep 17 00:00:00 2001 From: Artsiom Musin Date: Mon, 25 Mar 2019 09:05:27 +0300 Subject: [PATCH 3/4] Fix remaining perf issues --- task-1.rb | 14 +++--- task_class.rb | 124 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 89 insertions(+), 49 deletions(-) diff --git a/task-1.rb b/task-1.rb index 1d9a7ef..518f5ca 100644 --- a/task-1.rb +++ b/task-1.rb @@ -1,12 +1,12 @@ require_relative 'task_class' #require 'memory_profiler' require 'stackprof' -# require 'ruby-prof' +require 'ruby-prof' # require 'pry' #report = MemoryProfiler.report do -StackProf.run(mode: :object, out: 'tmp/stackprof.dump', raw: true) do -#result = RubyProf.profile do +#StackProf.run(mode: :object, out: 'tmp/stackprof.dump', raw: true) do +result = RubyProf.profile do TaskClass.new.work(filename: ARGV[0]) end @@ -20,8 +20,8 @@ # printer = RubyProf::FlatPrinter.new(result) # printer.print(File.open("ruby_prof_flat_allocations_profile.txt", "w+")) -# printer = RubyProf::DotPrinter.new(result) -# printer.print(File.open("ruby_prof_allocations_profile.dot", "w+")) +printer = RubyProf::DotPrinter.new(result) +printer.print(File.open("ruby_prof_allocations_profile.dot", "w+")) -# printer = RubyProf::GraphHtmlPrinter.new(result) -# printer.print(File.open("ruby_prof_graph_allocations_profile.html", "w+")) +printer = RubyProf::GraphHtmlPrinter.new(result) +printer.print(File.open("ruby_prof_graph_allocations_profile.html", "w+")) diff --git a/task_class.rb b/task_class.rb index 65d91d5..11dde52 100644 --- a/task_class.rb +++ b/task_class.rb @@ -4,6 +4,7 @@ require 'json' require 'date' require 'pry' +require 'csv' class TaskClass class User @@ -21,6 +22,7 @@ def parse_user(fields) 'first_name' => fields[2], 'last_name' => fields[3], 'age' => fields[4], + 'sessions' => [] } end @@ -37,65 +39,105 @@ def parse_session(fields) def collect_stats_from_users(report, users_objects) report['usersStats'] = {} users_objects.each do |user| - user_key = "#{user.attributes['first_name']}" + ' ' + "#{user.attributes['last_name']}" + user_key = "#{user['first_name']}" + ' ' + "#{user['last_name']}" report['usersStats'][user_key] ||= {} - report['usersStats'][user_key]['sessionsCount'] = collect_session_count(user) - report['usersStats'][user_key]['totalTime'] = collect_session_time(user) - report['usersStats'][user_key]['longestSession'] = collect_session_longest(user) - report['usersStats'][user_key]['browsers'] = collect_browsers(user) - report['usersStats'][user_key]['usedIE'] = collect_ie_usage(user) - report['usersStats'][user_key]['alwaysUsedChrome'] = collect_if_only_chrome_used(user) - report['usersStats'][user_key]['dates'] = collect_session_dates(user) + report['usersStats'][user_key]['sessionsCount'] = collect_session_count(user['sessions']) + report['usersStats'][user_key]['totalTime'] = collect_session_time(user['sessions']) + report['usersStats'][user_key]['longestSession'] = collect_session_longest(user['sessions']) + report['usersStats'][user_key]['browsers'] = collect_browsers(user['sessions']) + report['usersStats'][user_key]['usedIE'] = collect_ie_usage(user['sessions']) + report['usersStats'][user_key]['alwaysUsedChrome'] = collect_if_only_chrome_used(user['sessions']) + report['usersStats'][user_key]['dates'] = collect_session_dates(user['sessions']) end end # Собираем количество сессий по пользователям - def collect_session_count(user) - user.sessions.count + def collect_session_count(sessions) + sessions.count end # Собираем количество времени по пользователям - def collect_session_time(user) - user.sessions.sum {|s| s['time'].to_i }.to_s + ' min.' + def collect_session_time(sessions) + sessions.sum {|s| s['time'].to_i }.to_s + ' min.' end # Выбираем самую длинную сессию пользователя - def collect_session_longest(user) - user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' + def collect_session_longest(sessions) + sessions.map {|s| s['time'].to_i}.max.to_s + ' min.' end # Браузеры пользователя через запятую - def collect_browsers(user) - user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') + def collect_browsers(sessions) + sessions.map {|s| s['browser'].upcase}.sort.join(', ') end # Хоть раз использовал IE? - def collect_ie_usage(user) - !!user.sessions.find { |s| s['browser'] == 'INTERNET EXPLORER' } + def collect_ie_usage(sessions) + !!sessions.find {|s| s['browser'] =~ /INTERNET EXPLORER/i } end # Всегда использовал только Chrome? - def collect_if_only_chrome_used(user) - browsers = user.sessions.map {|s| s['browser']}.uniq - browsers.count == 1 && browsers.first == 'CHROME' + def collect_if_only_chrome_used(sessions) + browsers = sessions.map {|s| s['browser']}.uniq + browsers.count == 1 && browsers.first =~ /CHROME/i end # Даты сессий через запятую в обратном порядке в формате iso8601 - def collect_session_dates(user) - user.sessions.map{|s| s['date']}.sort {|a,b| b <=> a} + def collect_session_dates(sessions) + sessions.map{|s| s['date']}.sort {|a,b| b <=> a} end - def work(filename:) - file_lines = File.read(filename).split("\n") + def prepare_data(filename, users, sessions) + File.open(filename) do |file| + file.lazy.each_slice(2000) do |lines| + lines.each do |row| + row = row.chomp.split(',') + if row[0] == 'session' + session = parse_session(row) + sessions << session + users[session['user_id'].to_i]['sessions'] << session + else + users << parse_user(row) + end + end + end + end + # file_lines = File.open(filename, "r") + # file_lines.each_line do |line| + # cols = line.chomp.split(',') + # if cols[0] == 'session' + # sessions << parse_session(cols) + # else + # users << parse_user(cols) + # end + # end + # CSV.foreach(filename) do |row| + # if row[0] == 'session' + # sessions << parse_session(row) + # else + # users << parse_user(row) + # end + # end + # File.open(filename, 'r') do |file| + # csv = CSV.new(file, headers: true) + # sum = 0 + # + # while row = csv.shift + # if row[0] == 'session' + # sessions << parse_session(row) + # else + # users << parse_user(row) + # end + # end + # end + end + def work(filename:) users = [] sessions = [] - - file_lines.each do |line| - cols = line.split(',') - users = users + [parse_user(cols)] if cols[0] == 'user' - sessions = sessions + [parse_session(cols)] if cols[0] == 'session' - end + t1=Time.now + prepare_data(filename, users, sessions) + puts Time.now-t1 # Отчёт в json # - Сколько всего юзеров + @@ -120,7 +162,7 @@ def work(filename:) uniqueBrowsers = [] sessions.each do |session| browser = session['browser'] - uniqueBrowsers += [browser] if uniqueBrowsers.all? { |b| b != browser } + uniqueBrowsers << browser unless uniqueBrowsers.include?(browser) end report['uniqueBrowsersCount'] = uniqueBrowsers.count @@ -129,22 +171,20 @@ def work(filename:) report['allBrowsers'] = sessions - .map { |s| s['browser'] } - .map { |b| b.upcase } + .map { |s| s['browser'].upcase } .sort .uniq .join(',') # Статистика по пользователям users_objects = [] - - users.each do |user| - attributes = user - user_sessions = sessions.select { |session| session['user_id'] == user['id'] } - user_object = User.new(attributes: attributes, sessions: user_sessions) - users_objects = users_objects + [user_object] - end - collect_stats_from_users(report, users_objects) +t2=Time.now + # users.each do |user| + # user_object = User.new(attributes: user, sessions: user['sessions']) + # users_objects << user_object + # end + puts Time.now - t2 + collect_stats_from_users(report, users) File.write('result.json', "#{report.to_json}\n") end From ad45f6d8719b6769c0efaeeb2209bfa79f684000 Mon Sep 17 00:00:00 2001 From: Artsiom Musin Date: Mon, 25 Mar 2019 13:34:28 +0300 Subject: [PATCH 4/4] Update case study and rafactor/cleanup code --- Gemfile.lock | 3 - case-study-template.md | 13 +- ruby_prof_flat_allocations_profile.txt | 51 - ruby_prof_graph_allocations_profile.html | 2318 ---------------------- task-1.rb | 17 +- task_class.rb | 70 +- task_test.rb | 16 +- 7 files changed, 32 insertions(+), 2456 deletions(-) delete mode 100644 ruby_prof_flat_allocations_profile.txt delete mode 100644 ruby_prof_graph_allocations_profile.html diff --git a/Gemfile.lock b/Gemfile.lock index 1bb1088..c51f98c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -13,8 +13,5 @@ DEPENDENCIES ruby-prof stackprof -RUBY VERSION - ruby 2.6.1p33 - BUNDLED WITH 1.17.2 diff --git a/case-study-template.md b/case-study-template.md index 329710c..50f00ef 100644 --- a/case-study-template.md +++ b/case-study-template.md @@ -12,7 +12,7 @@ Я решил исправить эту проблему, оптимизировав эту программу. ## Формирование метрики -Для того, чтобы понимать, дают ли мои изменения положительный эффект на быстродействие программы я придумал использовать такую метрику: *тут ваша метрика* +Для того, чтобы понимать, дают ли мои изменения положительный эффект на быстродействие программы я придумал использовать такую метрику: 20-30% выйгрыш по времени после изменений. ## Гарантия корректности работы оптимизированной программы Программа поставлялась с тестом. Выполнение этого теста позволяет не допустить изменения логики программы при оптимизации. @@ -30,7 +30,7 @@ 7. Все работает. Задача завершена. ## Вникаем в детали системы, чтобы найти 20% точек роста -Для того, чтобы найти "точки роста" для оптимизации я воспользовался MemoryProfiler и stackprof. +Для того, чтобы найти "точки роста" для оптимизации я воспользовался stackprof и ruby-prof. Вот какие проблемы удалось найти и решить @@ -43,13 +43,12 @@ Решение: вынос блока из collect_stats_from_users в отдельные методы. ### Ваша находка №X -О вашей находке №X +Вложенный select в each, используемый для поиска сессий для пользователей, занимал 99% времени. +Решение: рефакторинг и добавление сессий к пользователю на этапе чтений файла. ## Результаты В результате проделанной оптимизации наконец удалось обработать файл с данными. -Удалось улучшить метрику системы с *того, что у вас было в начале, до того, что получилось в конце* - -*Какими ещё результами можете поделиться* +Удалось улучшить метрику системы с 2.5 минут для 50_000 записей на 1.5 минут для полного файла(3_250_940 записей) ## Защита от регресса производительности -Для защиты от потери достигнутого прогресса при дальнейших изменениях программы сделано *то, что вы для этого сделали* +Для защиты от потери достигнутого прогресса при дальнейших изменениях программы добавлен новый юнит тест, который зафиксировал 1.5 минуты как эталонное значение. Но с погрешностью в еще 1.5 минуты с учетом загруженность или других факторов на системы, которые могут повлиять на скорость обработки данных. diff --git a/ruby_prof_flat_allocations_profile.txt b/ruby_prof_flat_allocations_profile.txt deleted file mode 100644 index 46be573..0000000 --- a/ruby_prof_flat_allocations_profile.txt +++ /dev/null @@ -1,51 +0,0 @@ -Measure Mode: wall_time -Thread ID: 46973231184220 -Fiber ID: 46973235994980 -Total: 134.629748 -Sort by: self_time - - %self total self wait child calls name - 97.25 130.930 130.930 0.000 0.000 7695 Array#select - 1.36 134.419 1.830 0.000 132.589 7699 *Array#each - 0.64 0.864 0.860 0.000 0.004 50000 Array#all? - 0.24 0.326 0.326 0.000 0.000 50001 String#split - 0.12 0.204 0.159 0.000 0.045 69257 Array#map - 0.06 0.099 0.078 0.000 0.021 7696 Class#new - 0.04 0.112 0.057 0.000 0.055 1 JSON::Ext::Generator::GeneratorMethods::Hash#to_json - 0.04 0.049 0.049 0.000 0.000 42305 TaskClass#parse_session - 0.03 0.041 0.041 0.000 0.000 126956 String#encode - 0.03 0.037 0.037 0.000 0.000 116403 String#upcase - 0.02 0.035 0.028 0.000 0.007 7695 Array#any? - 0.02 0.028 0.028 0.000 0.000 7696 Array#sort - 0.02 0.046 0.022 0.000 0.023 7695 Enumerable#sort_by - 0.02 0.021 0.021 0.000 0.000 7695 TaskClass::User#initialize - 0.02 0.020 0.020 0.000 0.000 7695 TaskClass#parse_user - 0.01 0.020 0.020 0.000 0.000 84610 String#to_i - 0.01 0.011 0.011 0.000 0.000 61564 String#to_s - 0.01 0.053 0.010 0.000 0.042 7695 TaskClass#collect_session_longest - 0.01 0.010 0.010 0.000 0.000 7696 Array#join - 0.01 0.008 0.008 0.000 0.000 1 #read - 0.01 0.060 0.008 0.000 0.052 7695 TaskClass#collect_session_time - 0.01 0.008 0.008 0.000 0.000 1 Array#uniq - 0.01 0.007 0.007 0.000 0.000 42305 String#-@ - 0.00 0.070 0.007 0.000 0.064 7695 TaskClass#collect_browsers - 0.00 0.006 0.006 0.000 0.000 15390 Integer#to_s - 0.00 0.064 0.005 0.000 0.059 7695 TaskClass#collect_session_dates - 0.00 0.054 0.005 0.000 0.049 7695 TaskClass#collect_ie_usage - 0.00 0.032 0.005 0.000 0.028 7695 TaskClass#collect_if_only_chrome_used - 0.00 0.006 0.004 0.000 0.002 7695 TaskClass#collect_session_count - 0.00 0.003 0.003 0.000 0.000 7697 Hash#keys - 0.00 0.003 0.003 0.000 0.000 1 #write - 0.00 0.002 0.002 0.000 0.000 7695 Array#max - 0.00 0.002 0.002 0.000 0.000 7698 Array#count - 0.00 0.002 0.002 0.000 0.000 7695 Array#sum - 0.00 134.630 0.001 0.000 134.629 1 TaskClass#work - 0.00 134.630 0.000 0.000 134.630 1 [global]#[no method] - 0.00 0.419 0.000 0.000 0.419 1 TaskClass#collect_stats_from_users - 0.00 0.000 0.000 0.000 0.000 1 Kernel#dup - 0.00 0.000 0.000 0.000 0.000 1 JSON::Ext::Generator::State#initialize_copy - 0.00 0.000 0.000 0.000 0.000 1 Symbol#to_s - 0.00 0.000 0.000 0.000 0.000 1 Kernel#initialize_dup - 0.00 0.000 0.000 0.000 0.000 1 BasicObject#initialize - -* indicates recursively called methods diff --git a/ruby_prof_graph_allocations_profile.html b/ruby_prof_graph_allocations_profile.html deleted file mode 100644 index ba4c1cc..0000000 --- a/ruby_prof_graph_allocations_profile.html +++ /dev/null @@ -1,2318 +0,0 @@ - - - - - - - -

Profile Report: wall_time

- - - - - - - - - - - - - - -
Thread IDFiber IDTotal Time
4716616117286047166163365500180.00563621520996
- - -

Thread 47166161172860, Fiber: 47166163365500

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
%Total%SelfTotalSelfWaitChildCallsNameLine
100.00%0.00%180.010.000.00180.011 - - [global]#[no method] - - 9
  180.010.000.00180.001/1TaskClass#work9
  0.000.000.000.001/7696Class#new9
  180.010.000.00180.001/1[global]#[no method]9
100.00%0.00%180.010.000.00180.001 - - TaskClass#work - - 45
  178.012.250.00175.763/10Array#each97
  1.720.000.001.727/7TaskClass#collect_stats_from_users137
  0.170.090.000.081/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json141
  0.050.040.000.012/84647Array#map86
  0.020.020.000.001/15391Array#sort86
  0.020.020.000.001/50001String#split45
  0.000.000.000.001/1Array#uniq86
  0.000.000.000.001/1<Class::IO>#write141
  0.000.000.000.001/1<Class::IO>#read45
  0.000.000.000.001/7696Array#join86
  0.000.000.000.003/7698Array#count84
  1.720.360.001.367/10TaskClass#collect_stats_from_users37
  178.012.250.00175.763/10TaskClass#work97
99.85%1.45%179.732.600.00177.1210 - - Array#each - -
  174.13174.130.000.007695/7695Array#select
  1.141.130.000.0150000/50000Array#all?
  1.130.320.000.8184645/84647Array#map
  0.270.270.000.0050000/50001String#split
  0.120.090.000.037695/7696Class#new
  0.110.110.000.0042305/42305TaskClass#parse_session
  0.080.050.000.0315390/15391Array#sort
  0.060.060.000.0053865/53865Hash#merge
  0.030.030.000.017695/7695Array#any?
  0.020.020.000.007695/7695TaskClass#parse_user
  0.010.010.000.007695/7696Array#join
  0.010.010.000.0015390/15390Integer#to_s
  0.000.000.000.007695/7695Array#reverse
  0.000.000.000.007695/7695Array#max
  0.000.000.000.007695/7698Array#count
  0.000.000.000.007695/7695Array#sum
  174.13174.130.000.007695/7695Array#each
96.73%96.73%174.13174.130.000.007695 - - Array#select - -
  1.720.000.001.727/7TaskClass#work137
0.95%0.00%1.720.000.001.727 - - TaskClass#collect_stats_from_users - - 37
  1.720.360.001.367/10Array#each37
  0.050.040.000.012/84647TaskClass#work86
  1.130.320.000.8184645/84647Array#each
0.66%0.20%1.180.360.000.8284647 - - Array#map - -
  0.700.350.000.3542305/42305<Class::Date>#parse
  0.060.060.000.0042305/42305Date#iso8601
  0.040.040.000.0084610/116403String#upcase
  0.030.030.000.0084610/84610String#to_i
  1.141.130.000.0150000/50000Array#each
0.63%0.63%1.141.130.000.0150000 - - Array#all? - -
  0.010.010.000.009858/116403String#upcase
  0.700.350.000.3542305/42305Array#map
0.39%0.19%0.700.350.000.3542305 - - <Class::Date>#parse - -
  0.140.140.000.0084610/84610Regexp#match
  0.080.080.000.0042305/42305String#gsub!
  0.070.070.000.0042305/42305MatchData#begin
  0.040.040.000.0042305/42305String#[]=
  0.020.020.000.0042305/42305Integer#div
  0.010.010.000.0042305/42305MatchData#end
  0.020.020.000.001/50001TaskClass#work45
  0.270.270.000.0050000/50001Array#each
0.16%0.16%0.290.290.000.0050001 - - String#split - -
  0.170.090.000.081/1TaskClass#work141
0.10%0.05%0.170.090.000.081 - - JSON::Ext::Generator::GeneratorMethods::Hash#to_json - -
  0.060.060.000.00126956/126956String#encode
  0.020.020.000.0061564/61564String#to_s
  0.000.000.000.007697/7697Hash#keys
  0.000.000.000.001/1Kernel#dup
  0.000.000.000.001/1Symbol#to_s
  0.140.140.000.0084610/84610<Class::Date>#parse
0.08%0.08%0.140.140.000.0084610 - - Regexp#match - -
  0.000.000.000.001/7696[global]#[no method]9
  0.120.090.000.037695/7696Array#each
0.07%0.05%0.120.090.000.037696 - - Class#new - -
  0.030.030.000.007695/7695TaskClass::User#initialize
  0.000.000.000.001/1BasicObject#initialize
  0.110.110.000.0042305/42305Array#each
0.06%0.06%0.110.110.000.0042305 - - TaskClass#parse_session - - 28
  0.020.020.000.001/15391TaskClass#work86
  0.080.050.000.0315390/15391Array#each
0.05%0.04%0.100.070.000.0315391 - - Array#sort - -
  0.030.030.000.0073799/73799Date#<=>
  0.080.080.000.0042305/42305<Class::Date>#parse
0.04%0.04%0.080.080.000.0042305 - - String#gsub! - -
  0.070.070.000.0042305/42305<Class::Date>#parse
0.04%0.04%0.070.070.000.0042305 - - MatchData#begin - -
  0.060.060.000.00126956/126956JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.04%0.04%0.060.060.000.00126956 - - String#encode - -
  0.060.060.000.0053865/53865Array#each
0.03%0.03%0.060.060.000.0053865 - - Hash#merge - -
  0.060.060.000.0042305/42305Array#map
0.03%0.03%0.060.060.000.0042305 - - Date#iso8601 - -
  0.010.010.000.009858/116403Array#all?
  0.010.010.000.0021935/116403Array#any?
  0.040.040.000.0084610/116403Array#map
0.03%0.03%0.050.050.000.00116403 - - String#upcase - -
  0.040.040.000.0042305/42305<Class::Date>#parse
0.02%0.02%0.040.040.000.0042305 - - String#[]= - -
  0.030.030.000.017695/7695Array#each
0.02%0.01%0.030.030.000.017695 - - Array#any? - -
  0.010.010.000.0021935/116403String#upcase
  0.030.030.000.0073799/73799Array#sort
0.02%0.02%0.030.030.000.0073799 - - Date#<=> - -
  0.030.030.000.0084610/84610Array#map
0.02%0.02%0.030.030.000.0084610 - - String#to_i - -
  0.030.030.000.007695/7695Class#new
0.01%0.01%0.030.030.000.007695 - - TaskClass::User#initialize - - 12
  0.020.020.000.0042305/42305<Class::Date>#parse
0.01%0.01%0.020.020.000.0042305 - - Integer#div - -
  0.020.020.000.007695/7695Array#each
0.01%0.01%0.020.020.000.007695 - - TaskClass#parse_user - - 19
  0.020.020.000.0061564/61564JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.01%0.01%0.020.020.000.0061564 - - String#to_s - -
  0.000.000.000.001/7696TaskClass#work86
  0.010.010.000.007695/7696Array#each
0.01%0.01%0.010.010.000.007696 - - Array#join - -
  0.010.010.000.0042305/42305<Class::Date>#parse
0.01%0.01%0.010.010.000.0042305 - - MatchData#end - -
  0.010.010.000.0015390/15390Array#each
0.00%0.00%0.010.010.000.0015390 - - Integer#to_s - -
  0.000.000.000.001/1TaskClass#work86
0.00%0.00%0.000.000.000.001 - - Array#uniq - -
  0.000.000.000.007697/7697JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.00%0.00%0.000.000.000.007697 - - Hash#keys - -
  0.000.000.000.001/1TaskClass#work141
0.00%0.00%0.000.000.000.001 - - <Class::IO>#write - -
  0.000.000.000.007695/7695Array#each
0.00%0.00%0.000.000.000.007695 - - Array#reverse - -
  0.000.000.000.007695/7695Array#each
0.00%0.00%0.000.000.000.007695 - - Array#max - -
  0.000.000.000.003/7698TaskClass#work84
  0.000.000.000.007695/7698Array#each
0.00%0.00%0.000.000.000.007698 - - Array#count - -
  0.000.000.000.007695/7695Array#each
0.00%0.00%0.000.000.000.007695 - - Array#sum - -
  0.000.000.000.001/1TaskClass#work45
0.00%0.00%0.000.000.000.001 - - <Class::IO>#read - -
  0.000.000.000.001/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.00%0.00%0.000.000.000.001 - - Kernel#dup - -
  0.000.000.000.001/1Kernel#initialize_dup
  0.000.000.000.001/1Kernel#dup
0.00%0.00%0.000.000.000.001 - - Kernel#initialize_dup - -
  0.000.000.000.001/1JSON::Ext::Generator::State#initialize_copy
  0.000.000.000.001/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.00%0.00%0.000.000.000.001 - - Symbol#to_s - -
  0.000.000.000.001/1Kernel#initialize_dup
0.00%0.00%0.000.000.000.001 - - JSON::Ext::Generator::State#initialize_copy - -
  0.000.000.000.001/1Class#new
0.00%0.00%0.000.000.000.001 - - BasicObject#initialize - -
* indicates recursively called methods
- - - diff --git a/task-1.rb b/task-1.rb index 518f5ca..bb16e16 100644 --- a/task-1.rb +++ b/task-1.rb @@ -1,27 +1,14 @@ require_relative 'task_class' -#require 'memory_profiler' require 'stackprof' require 'ruby-prof' -# require 'pry' -#report = MemoryProfiler.report do #StackProf.run(mode: :object, out: 'tmp/stackprof.dump', raw: true) do result = RubyProf.profile do TaskClass.new.work(filename: ARGV[0]) end -#report.pretty_print(scale_bytes: true) -# profile_data = StackProf.run(mode: :object) do -# TaskClass.new.work(filename: ARGV[0]) -# end -# StackProf::Report.new(profile_data).print_graphviz - - -# printer = RubyProf::FlatPrinter.new(result) -# printer.print(File.open("ruby_prof_flat_allocations_profile.txt", "w+")) - printer = RubyProf::DotPrinter.new(result) -printer.print(File.open("ruby_prof_allocations_profile.dot", "w+")) +printer.print(File.open("tmp/ruby_prof_allocations_profile.dot", "w+")) printer = RubyProf::GraphHtmlPrinter.new(result) -printer.print(File.open("ruby_prof_graph_allocations_profile.html", "w+")) +printer.print(File.open("tmp/ruby_prof_graph_allocations_profile.html", "w+")) diff --git a/task_class.rb b/task_class.rb index 11dde52..96bde35 100644 --- a/task_class.rb +++ b/task_class.rb @@ -3,19 +3,10 @@ require 'json' require 'date' -require 'pry' +#require 'pry' require 'csv' class TaskClass - class User - attr_reader :attributes, :sessions - - def initialize(attributes:, sessions:) - @attributes = attributes - @sessions = sessions - end - end - def parse_user(fields) { 'id' => fields[1], @@ -88,56 +79,23 @@ def collect_session_dates(sessions) end def prepare_data(filename, users, sessions) - File.open(filename) do |file| - file.lazy.each_slice(2000) do |lines| - lines.each do |row| - row = row.chomp.split(',') - if row[0] == 'session' - session = parse_session(row) - sessions << session - users[session['user_id'].to_i]['sessions'] << session - else - users << parse_user(row) - end - end + file_lines = File.open(filename, "r") + file_lines.each_line do |line| + cols = line.chomp("\n").split(',') + if cols[0] == 'session' + session = parse_session(cols) + sessions << session + users[session['user_id'].to_i]['sessions'] << session + else + users << parse_user(cols) end end - # file_lines = File.open(filename, "r") - # file_lines.each_line do |line| - # cols = line.chomp.split(',') - # if cols[0] == 'session' - # sessions << parse_session(cols) - # else - # users << parse_user(cols) - # end - # end - # CSV.foreach(filename) do |row| - # if row[0] == 'session' - # sessions << parse_session(row) - # else - # users << parse_user(row) - # end - # end - # File.open(filename, 'r') do |file| - # csv = CSV.new(file, headers: true) - # sum = 0 - # - # while row = csv.shift - # if row[0] == 'session' - # sessions << parse_session(row) - # else - # users << parse_user(row) - # end - # end - # end end def work(filename:) users = [] sessions = [] - t1=Time.now prepare_data(filename, users, sessions) - puts Time.now-t1 # Отчёт в json # - Сколько всего юзеров + @@ -176,14 +134,6 @@ def work(filename:) .uniq .join(',') - # Статистика по пользователям - users_objects = [] -t2=Time.now - # users.each do |user| - # user_object = User.new(attributes: user, sessions: user['sessions']) - # users_objects << user_object - # end - puts Time.now - t2 collect_stats_from_users(report, users) File.write('result.json', "#{report.to_json}\n") diff --git a/task_test.rb b/task_test.rb index b527bca..08412da 100644 --- a/task_test.rb +++ b/task_test.rb @@ -1,7 +1,9 @@ -require 'minitest/autorun' +require 'test/unit' +require 'timeout' + require_relative 'task_class' -class TestMe < Minitest::Test +class TestMe < Test::Unit::TestCase def setup @filename = 'data.txt' File.write('result.json', '') @@ -32,4 +34,14 @@ def test_result expected_result = '{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}' + "\n" assert_equal expected_result, File.read('result.json') end + + def test_performance_degradation + time_to_process = 90 + + assert_nothing_raised Timeout::Error do + Timeout::timeout(time_to_process * 2) do + TaskClass.new.work(filename: 'data_large.txt') + end + end + end end