Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization parser to json #144

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions preview-task-1.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# Deoptimized version of homework task

require 'json'
require 'pry'
require 'date'
require 'minitest/autorun'
require 'ruby-progressbar'

class User
attr_reader :attributes, :sessions

def initialize(attributes:, sessions:)
@attributes = attributes
@sessions = sessions
end
end

def collect_stats_from_users(report, users_objects, &block)
users_objects.each do |user|
user_key = "#{user.attributes[:first_name]}" + ' ' + "#{user.attributes[:last_name]}"
report['usersStats'][user_key] ||= {}
report['usersStats'][user_key] = report['usersStats'][user_key].merge(block.call(user))
end
end

def work
file_lines = File.read('data.txt').split("\n")

users = []
sessions = {}

file_lines.each do |line|
cols = line.split(',')

if cols[0] == 'user'
users << {
id: cols[1], first_name: cols[2],
last_name: cols[3], age: cols[4],
}

next
end

sessions[cols[1]] ||= []
sessions[cols[1]] << {
session_id: cols[2], browser: cols[3],
time: cols[4], date: cols[5],
}
end

# Отчёт в json
# - Сколько всего юзеров +
# - Сколько всего уникальных браузеров +
# - Сколько всего сессий +
# - Перечислить уникальные браузеры в алфавитном порядке через запятую и капсом +
#
# - По каждому пользователю
# - сколько всего сессий +
# - сколько всего времени +
# - самая длинная сессия +
# - браузеры через запятую +
# - Хоть раз использовал IE? +
# - Всегда использовал только Хром? +
# - даты сессий в порядке убывания через запятую +

report = {}

report[:totalUsers] = users.size

# Подсчёт количества уникальных браузеров
counter = 0
browsers_map = {}
all_sessions = sessions.values.flatten

all_sessions.each do |session|
browsers_map[session[:browser].upcase] ||= 0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

два раза делаем session[:browser].upcase

browsers_map[session[:browser].upcase] += 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

для уникальных браузеров можно Set использовать

counter += 1
end

report['uniqueBrowsersCount'] = browsers_map.size
report['totalSessions'] = counter
report['allBrowsers'] = browsers_map.keys.sort.join(',')

# Статистика по пользователям
users_objects = []
parts_of_work = users.size
progressbar = ProgressBar.create(
total: parts_of_work, format: '%a, %J, %E %B'
)

users.each do |user|
attributes = user
user_object = User.new(attributes: attributes, sessions: sessions[user[:id]])
users_objects << user_object
progressbar.increment
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

это лучше опциональным, тк может тормозить, особенно если увеличивать на каждой итерации (а не раз в 10/100/1000...)

end

report['usersStats'] = {}

# Собираем количество сессий по пользователям
collect_stats_from_users(report, users_objects) do |user|
{ 'sessionsCount' => user.sessions.size }
end

# Собираем количество времени по пользователям
collect_stats_from_users(report, users_objects) do |user|
{ 'totalTime' => user.sessions.map {|s| s[:time]}.map {|t| t.to_i}.sum.to_s + ' min.' }
end

# Выбираем самую длинную сессию пользователя
collect_stats_from_users(report, users_objects) do |user|
{ 'longestSession' => user.sessions.map {|s| s[:time]}.map {|t| t.to_i}.max.to_s + ' min.' }
end

# Браузеры пользователя через запятую
collect_stats_from_users(report, users_objects) do |user|
{ 'browsers' => user.sessions.map {|s| s[:browser]}.map {|b| b.upcase}.sort.join(', ') }
end

# Хоть раз использовал IE?
collect_stats_from_users(report, users_objects) do |user|
{ 'usedIE' => user.sessions.map{|s| s[:browser]}.any? { |b| b.upcase =~ /INTERNET EXPLORER/ } }
end

# Всегда использовал только Chrome?
collect_stats_from_users(report, users_objects) do |user|
{ 'alwaysUsedChrome' => user.sessions.map{|s| s[:browser]}.all? { |b| b.upcase =~ /CHROME/ } }
end

# Даты сессий через запятую в обратном порядке в формате iso8601
collect_stats_from_users(report, users_objects) do |user|
{ 'dates' => user.sessions.map{|s| s[:date]}.map {|d| Date.parse(d)}.sort.reverse.map { |d| d.iso8601 } }
end

File.write('result.json', "#{report.to_json}\n")
end

class TestMe < Minitest::Test
def setup
File.write('result.json', '')
File.write('data.txt',
'user,0,Leida,Cira,0
session,0,0,Safari 29,87,2016-10-23
session,0,1,Firefox 12,118,2017-02-27
session,0,2,Internet Explorer 28,31,2017-03-28
session,0,3,Internet Explorer 28,109,2016-09-15
session,0,4,Safari 39,104,2017-09-27
session,0,5,Internet Explorer 35,6,2016-09-01
user,1,Palmer,Katrina,65
session,1,0,Safari 17,12,2016-10-21
session,1,1,Firefox 32,3,2016-12-20
session,1,2,Chrome 6,59,2016-11-11
session,1,3,Internet Explorer 10,28,2017-04-29
session,1,4,Chrome 13,116,2016-12-28
user,2,Gregory,Santos,86
session,2,0,Chrome 35,6,2018-09-21
session,2,1,Safari 49,85,2017-05-22
session,2,2,Firefox 47,17,2018-02-02
session,2,3,Chrome 20,84,2016-11-25
')
end

def test_result
work
expected_result = '{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}' + "\n"
assert_equal expected_result, File.read('result.json')
end
end
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
result.json
data*
!data_large.txt.gz
!data.txt
Loading