Skip to content

Commit 96f71e9

Browse files
Initial commit
0 parents  commit 96f71e9

File tree

5 files changed

+206
-0
lines changed

5 files changed

+206
-0
lines changed

.gitignore

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Prerequisites
2+
*.d
3+
4+
# Compiled Object files
5+
*.slo
6+
*.lo
7+
*.o
8+
*.obj
9+
10+
# Precompiled Headers
11+
*.gch
12+
*.pch
13+
14+
# Compiled Dynamic libraries
15+
*.so
16+
*.dylib
17+
*.dll
18+
19+
# Fortran module files
20+
*.mod
21+
*.smod
22+
23+
# Compiled Static libraries
24+
*.lai
25+
*.la
26+
*.a
27+
*.lib
28+
29+
# Executables
30+
*.exe
31+
*.out
32+
*.app
33+
34+
!bin

CMakeLists.txt

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
project(WordCountSorterProject)
2+
3+
cmake_minimum_required(VERSION 3.10)
4+
5+
FILE(GLOB DOC_FILES "README.md" "docs/*")
6+
add_custom_target(docs SOURCES ${DOC_FILES})
7+
8+
add_subdirectory(src)
9+
10+
if (MSVC)
11+
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT WordCountSorter)
12+
endif()

docs/Problem statement.txt

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Напишите на C++ нехитрую считалку частотного словаря.
2+
Важно: напишите примерно так, как сегодня пишете код для боя, по-взрослому (часть особо редких проверок в стиле “а не кончился ли диск” можно и нужно не писать; но все проверки не писать нельзя). Важно: напишите и/или упакуйте вашу C++ программу так, чтобы мы ее тоже сумели собрать и запустить. (Например, на боевых серверах у нас сегодня Debian 9.) Важно: не надо копипастить простыню кода в docx/pdf документ, человечество недавно таки придумало аттачи.
3+
4+
Словом считается набор латинских букв, a-zA-Z.
5+
Любой другой символ считается пробелом. Регистр нужно привести.
6+
Сортировать нужно сначала по частоте, потом по алфавиту.
7+
Выводить тоже сначала частоту, потом слово.
8+
9+
Вот пример.
10+
$ cat in.txt
11+
The time has come, the Walrus said,
12+
to talk of many things...
13+
$ freq in.txt out.txt
14+
$ cat out.txt
15+
2 the
16+
1 come
17+
1 has
18+
1 many
19+
1 of
20+
1 said
21+
1 talk
22+
1 things
23+
1 time
24+
1 to
25+
1 walrus

src/CMakeLists.txt

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
project(WordCountSorter)
2+
3+
cmake_minimum_required(VERSION 3.10)
4+
5+
set(SOURCES
6+
main.cpp
7+
)
8+
9+
add_executable(${PROJECT_NAME}
10+
${SOURCES}
11+
)
12+
13+
set_property(TARGET ${PROJECT_NAME} PROPERTY OUTPUT_NAME "freq")
14+
set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 17)
15+
set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)

src/main.cpp

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#include <iostream>
2+
#include <fstream>
3+
#include <cctype>
4+
#include <cassert>
5+
#include <unordered_map>
6+
#include <vector>
7+
#include <algorithm>
8+
9+
using namespace std;
10+
11+
constexpr auto INPUT_ARGS_COUNT = 3;
12+
13+
using WordToCountDictionary = unordered_map<string, unsigned int>;
14+
using CountToWordsDictionary = unordered_map<unsigned int, vector<string>>;
15+
using SortedCountToWordsDictionary = vector<pair<unsigned int, vector<string>*>>;
16+
17+
WordToCountDictionary ReadWords(ifstream & inputStream)
18+
{
19+
WordToCountDictionary wordToCountDictionary;
20+
21+
string wordBuf;
22+
using buf_iter = std::istreambuf_iterator<char>;
23+
const buf_iter eof;
24+
for (buf_iter it = inputStream; it != eof; ++it)
25+
{
26+
const auto symbol = tolower(static_cast<unsigned char>(*it));
27+
if (isalpha(symbol))
28+
{
29+
wordBuf += symbol;
30+
}
31+
else if (!wordBuf.empty())
32+
{
33+
++wordToCountDictionary[wordBuf];
34+
wordBuf.clear();
35+
}
36+
}
37+
38+
return wordToCountDictionary;
39+
}
40+
41+
CountToWordsDictionary GetCountToWordsDictionary(const WordToCountDictionary & wordToCountDictionary)
42+
{
43+
CountToWordsDictionary countToWordsDictionary;
44+
for (const auto & [word, count] : wordToCountDictionary)
45+
{
46+
countToWordsDictionary[count].push_back(word);
47+
}
48+
49+
return countToWordsDictionary;
50+
}
51+
52+
SortedCountToWordsDictionary SortWordsByCount(CountToWordsDictionary & countToWordsDictionary)
53+
{
54+
SortedCountToWordsDictionary sortedCountToWordsDictionary;
55+
sortedCountToWordsDictionary.reserve(countToWordsDictionary.size());
56+
57+
for (auto & [count, words] : countToWordsDictionary)
58+
{
59+
sort(begin(words), end(words));
60+
sortedCountToWordsDictionary.push_back({ count, &words });
61+
}
62+
63+
sort(begin(sortedCountToWordsDictionary), end(sortedCountToWordsDictionary),
64+
[] (const auto & left, const auto & right)
65+
{
66+
return left.first > right.first;
67+
});
68+
69+
return sortedCountToWordsDictionary;
70+
}
71+
72+
void PrintResults(ofstream & outputStream, const SortedCountToWordsDictionary & sortedCountToWords)
73+
{
74+
for (const auto & [count, pWords] : sortedCountToWords)
75+
{
76+
assert(pWords != nullptr);
77+
for (const auto & word : *pWords)
78+
{
79+
outputStream << count << " " << word << endl;
80+
}
81+
}
82+
}
83+
84+
int main(int _argc, char * _argv[])
85+
{
86+
if (_argc != INPUT_ARGS_COUNT)
87+
{
88+
cerr << "Usage: " << _argv[0] << " <inputfile> <outputfile>" << endl;
89+
return 1;
90+
}
91+
92+
const string inputFileName(_argv[1]);
93+
ifstream inputStream{ inputFileName };
94+
if (!inputStream)
95+
{
96+
cerr << "Cannot open input file: " << inputFileName << endl;
97+
return 2;
98+
}
99+
100+
const string outputFileName(_argv[2]);
101+
ofstream outputStream{ outputFileName };
102+
if (!outputStream)
103+
{
104+
cerr << "Cannot create output file: " << outputFileName << endl;
105+
return 3;
106+
}
107+
108+
const auto wordToCountDictionary = ReadWords(inputStream);
109+
auto countToWordsDictionary = GetCountToWordsDictionary(wordToCountDictionary);
110+
const auto sortedCountToWordsDictionary = SortWordsByCount(countToWordsDictionary);
111+
112+
PrintResults(outputStream, sortedCountToWordsDictionary);
113+
114+
inputStream.close();
115+
outputStream.close();
116+
117+
cout << "Results were successfully written to file: \"" << outputFileName << "\"" << endl;
118+
119+
return 0;
120+
}

0 commit comments

Comments
 (0)