Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/files/data/*
/tmp/*

# Mac finder artifacts
.DS_Store
1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.5.3
9 changes: 9 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
source 'https://rubygems.org'
ruby '2.5.3'

gem 'benchmark-ips'
gem 'get_process_mem'
gem 'memory_profiler'
gem 'oj'
gem 'ruby-prof'
gem 'stackprof'
26 changes: 26 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
GEM
remote: https://rubygems.org/
specs:
benchmark-ips (2.7.2)
get_process_mem (0.2.1)
memory_profiler (0.9.12)
oj (3.7.1)
ruby-prof (0.17.0)
stackprof (0.2.12)

PLATFORMS
ruby

DEPENDENCIES
benchmark-ips
get_process_mem
memory_profiler
oj
ruby-prof
stackprof

RUBY VERSION
ruby 2.5.3p105

BUNDLED WITH
1.17.1
186 changes: 186 additions & 0 deletions case-study-1-memory.md

Large diffs are not rendered by default.

293 changes: 293 additions & 0 deletions case-study-2-cpu.md

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions files/fixtures/data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
user,0,Leida,Cira,0
session,0,0,Safari 29,87,2016-10-23
session,0,1,Firefox 12,118,2017-02-27
session,0,2,Internet Explorer 28,31,2017-03-28
session,0,3,Internet Explorer 28,109,2016-09-15
session,0,4,Safari 39,104,2017-09-27
session,0,5,Internet Explorer 35,6,2016-09-01
user,1,Palmer,Katrina,65
session,1,0,Safari 17,12,2016-10-21
session,1,1,Firefox 32,3,2016-12-20
session,1,2,Chrome 6,59,2016-11-11
session,1,3,Internet Explorer 10,28,2017-04-29
session,1,4,Chrome 13,116,2016-12-28
user,2,Gregory,Santos,86
session,2,0,Chrome 35,6,2018-09-21
session,2,1,Safari 49,85,2017-05-22
session,2,2,Firefox 47,17,2018-02-02
session,2,3,Chrome 20,84,2016-11-25
1 change: 1 addition & 0 deletions files/fixtures/expected_report.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}
1 change: 0 additions & 1 deletion result.json

This file was deleted.

211 changes: 70 additions & 141 deletions task-2.rb
Original file line number Diff line number Diff line change
@@ -1,58 +1,49 @@
# Deoptimized version of homework task

require 'json'
require 'pry'
require 'set'
require 'oj'
require 'date'
require 'minitest/autorun'

class User
attr_reader :attributes, :sessions

def initialize(attributes:, sessions:)
@attributes = attributes
@sessions = sessions
end
end
IE_PATTERN = 'INTERNET EXPLORER'.freeze
CHROME_PATTERN = 'CHROME'.freeze
COMMA = ','.freeze
DELIMITER = ', '.freeze
USER_ROW_MARK = 'user'.freeze
SESSION_ROW_MARK = 'session'.freeze

def parse_user(user)
fields = user.split(',')
parsed_result = {
'id' => fields[1],
'first_name' => fields[2],
'last_name' => fields[3],
'age' => fields[4],
fields = user.split(COMMA)
{
id: fields[1],
name: "#{fields[2]} #{fields[3]}".to_sym
}
end

def parse_session(session)
fields = session.split(',')
parsed_result = {
'user_id' => fields[1],
'session_id' => fields[2],
'browser' => fields[3],
'time' => fields[4],
'date' => fields[5],
fields = session.split(COMMA)
{
user_id: fields[1],
session_id: fields[2],
browser: fields[3],
time: fields[4],
date: fields[5]
}
end

def collect_stats_from_users(report, users_objects, &block)
users_objects.each do |user|
user_key = "#{user.attributes['first_name']}" + ' ' + "#{user.attributes['last_name']}"
report['usersStats'][user_key] ||= {}
report['usersStats'][user_key] = report['usersStats'][user_key].merge(block.call(user))
end
end

def work
file_lines = File.read('data.txt').split("\n")

def create_report(source_file, target_file)
users = []
sessions = []

file_lines.each do |line|
cols = line.split(',')
users = users + [parse_user(line)] if cols[0] == 'user'
sessions = sessions + [parse_session(line)] if cols[0] == 'session'
sessions_by_users = {}
unique_browsers = SortedSet.new
total_sessions = 0

File.open(source_file, 'r').each do |line|
users << parse_user(line) if line.start_with?(USER_ROW_MARK)
next unless line.start_with?(SESSION_ROW_MARK)

session = parse_session(line)
sessions_by_users[session[:user_id]] ||= []
sessions_by_users[session[:user_id]] << session
browser = session[:browser].upcase!
unique_browsers << browser
total_sessions += 1
end

# Отчёт в json
Expand All @@ -73,104 +64,42 @@ def work
report = {}

report[:totalUsers] = users.count

# Подсчёт количества уникальных браузеров
uniqueBrowsers = []
sessions.each do |session|
browser = session['browser']
uniqueBrowsers += [browser] if uniqueBrowsers.all? { |b| b != browser }
end

report['uniqueBrowsersCount'] = uniqueBrowsers.count

report['totalSessions'] = sessions.count

report['allBrowsers'] =
sessions
.map { |s| s['browser'] }
.map { |b| b.upcase }
.sort
.uniq
.join(',')

# Статистика по пользователям
users_objects = []

users.each do |user|
attributes = user
user_sessions = sessions.select { |session| session['user_id'] == user['id'] }
user_object = User.new(attributes: attributes, sessions: user_sessions)
users_objects = users_objects + [user_object]
end

report['usersStats'] = {}

# Собираем количество сессий по пользователям
collect_stats_from_users(report, users_objects) do |user|
{ 'sessionsCount' => user.sessions.count }
end

# Собираем количество времени по пользователям
collect_stats_from_users(report, users_objects) do |user|
{ 'totalTime' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.sum.to_s + ' min.' }
end

# Выбираем самую длинную сессию пользователя
collect_stats_from_users(report, users_objects) do |user|
{ 'longestSession' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' }
end

# Браузеры пользователя через запятую
collect_stats_from_users(report, users_objects) do |user|
{ 'browsers' => user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') }
end

# Хоть раз использовал IE?
collect_stats_from_users(report, users_objects) do |user|
{ 'usedIE' => user.sessions.map{|s| s['browser']}.any? { |b| b.upcase =~ /INTERNET EXPLORER/ } }
report[:uniqueBrowsersCount] = unique_browsers.size
report[:totalSessions] = total_sessions
report[:allBrowsers] = unique_browsers.to_a.join(COMMA)
report[:usersStats] = {}

until users.empty?
user = users.shift
user_sessions = sessions_by_users.delete(user[:id]) || []
sessions_stats = {
total_duration: 0,
max_duration: 0,
browsers: [],
dates: [],
length: user_sessions.length
}

until user_sessions.empty?
session = user_sessions.shift
time = session[:time].to_i

sessions_stats[:total_duration] += time
sessions_stats[:max_duration] = time if sessions_stats[:max_duration] < time
sessions_stats[:browsers] << session[:browser]
sessions_stats[:dates] << session[:date].chomp!
end

report[:usersStats][user[:name]] = {
sessionsCount: sessions_stats[:length],
totalTime: "#{sessions_stats[:total_duration]} min.",
longestSession: "#{sessions_stats[:max_duration]} min.",
browsers: sessions_stats[:browsers].sort!.join(DELIMITER),
usedIE: sessions_stats[:browsers].any? { |b| b.start_with?(IE_PATTERN) },
alwaysUsedChrome: sessions_stats[:browsers].all? { |b| b.start_with?(CHROME_PATTERN) },
dates: sessions_stats[:dates].sort!.reverse!
}
end

# Всегда использовал только Chrome?
collect_stats_from_users(report, users_objects) do |user|
{ 'alwaysUsedChrome' => user.sessions.map{|s| s['browser']}.all? { |b| b.upcase =~ /CHROME/ } }
end

# Даты сессий через запятую в обратном порядке в формате iso8601
collect_stats_from_users(report, users_objects) do |user|
{ 'dates' => user.sessions.map{|s| s['date']}.map {|d| Date.parse(d)}.sort.reverse.map { |d| d.iso8601 } }
end

File.write('result.json', "#{report.to_json}\n")
end

class TestMe < Minitest::Test
def setup
File.write('result.json', '')
File.write('data.txt',
'user,0,Leida,Cira,0
session,0,0,Safari 29,87,2016-10-23
session,0,1,Firefox 12,118,2017-02-27
session,0,2,Internet Explorer 28,31,2017-03-28
session,0,3,Internet Explorer 28,109,2016-09-15
session,0,4,Safari 39,104,2017-09-27
session,0,5,Internet Explorer 35,6,2016-09-01
user,1,Palmer,Katrina,65
session,1,0,Safari 17,12,2016-10-21
session,1,1,Firefox 32,3,2016-12-20
session,1,2,Chrome 6,59,2016-11-11
session,1,3,Internet Explorer 10,28,2017-04-29
session,1,4,Chrome 13,116,2016-12-28
user,2,Gregory,Santos,86
session,2,0,Chrome 35,6,2018-09-21
session,2,1,Safari 49,85,2017-05-22
session,2,2,Firefox 47,17,2018-02-02
session,2,3,Chrome 20,84,2016-11-25
')
end

def test_result
work
expected_result = '{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}' + "\n"
assert_equal expected_result, File.read('result.json')
end
Oj.to_file(target_file, report, mode: :wab)
end
14 changes: 14 additions & 0 deletions utils/asymptotics.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

require 'benchmark/ips'
require_relative 'prepare_data_chunks'
require_relative '../task-2'

Benchmark.ips do |x|
SIZE_LIMITS.each do |name, size|
x.report("Process #{(size / 1_048_576.0).round(1)}Mb") do
create_report("files/data/#{name}.txt", "tmp/result.json")
end
end
x.compare!
end
38 changes: 38 additions & 0 deletions utils/feedback_loop.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# frozen_string_literal: true

require 'benchmark'
require 'benchmark/ips'
require 'minitest/autorun'
require_relative '../task-2'

TMP_RESULT_FILE = 'tmp/result.json'

class TaskTest < Minitest::Test
def test_result
report('files/fixtures/data.txt')

expected_report = File.read('files/fixtures/expected_report.json')
actual_report = File.read(TMP_RESULT_FILE)

assert_equal expected_report, actual_report
end

def test_execution_time
time = Benchmark.realtime { report('files/data/data_1_0mb.txt') }
assert time < 0.3
end

def teardown
File.unlink(TMP_RESULT_FILE)
end

private

def report(source_file)
create_report(source_file, TMP_RESULT_FILE)
end
end

Benchmark.ips do |x|
x.report('Process 1Mb') { create_report('files/data/data_1_0mb.txt', TMP_RESULT_FILE) }
end
38 changes: 38 additions & 0 deletions utils/prepare_data_chunks.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# frozen_string_literal: true

SIZE_LIMITS = {
data_0_5mb: 524_288,
data_1_0mb: 1_048_576,
data_1_5mb: 1_572_864,
data_2_0mb: 2_097_152,
data_2_5mb: 2_621_440,
data_3_0mb: 3_145_728,
data_3_5mb: 3_670_016,
data_4_0mb: 4_194_304,
data_4_5mb: 4_718_592,
data_5_0mb: 5_242_880
}.freeze

def prepare_data_chunks(source_file)
File.open(source_file, 'r') do |source|
SIZE_LIMITS.each do |filename, limit|
slice_data_to_file(filename, source, limit)
end
end
end

def slice_data_to_file(target_filename, data, limit)
size_in_bytes = 0
File.open("files/data/#{target_filename}.txt", 'w') do |target|
data.each_line do |line|
size_in_bytes += line.length
break if size_in_bytes >= limit

target.puts line
end
end
end


prepare_data_chunks('files/data/data_large.txt')

Loading