Skip to content

Commit

Permalink
feat: update self-study contents
Browse files Browse the repository at this point in the history
  • Loading branch information
reudekx committed Jan 10, 2025
1 parent 60bd6e0 commit e68e255
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 21 deletions.
128 changes: 128 additions & 0 deletions self_study/learning_pandas/type_size_test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Original (int64): 7.629520416259766 MB\n",
"Int16: 2.8611488342285156 MB\n",
"Int32: 4.768497467041016 MB\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# 100만 행의 연도 데이터 생성\n",
"years = np.random.randint(1900, 2025, size=1_000_000)\n",
"df = pd.DataFrame({'year': years})\n",
"\n",
"# 메모리 사용량 비교\n",
"print(\"Original (int64):\", df['year'].memory_usage(deep=True) / 1024 / 1024, \"MB\")\n",
"\n",
"df['year_int16'] = df['year'].astype('Int16')\n",
"print(\"Int16:\", df['year_int16'].memory_usage(deep=True) / 1024 / 1024, \"MB\")\n",
"\n",
"df['year_int32'] = df['year'].astype('Int32')\n",
"print(\"Int32:\", df['year_int32'].memory_usage(deep=True) / 1024 / 1024, \"MB\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"1. 초기 상태 (컬럼별):\n",
"text_before: object, 55.313236236572266 MB\n",
"float_before: float64, 7.629520416259766 MB\n",
"year: int64, 7.629520416259766 MB\n",
"float_after: float64, 7.629520416259766 MB\n",
"text_after: object, 56.266910552978516 MB\n",
"Total: 134.46820449829102 MB\n",
"\n",
"2. year를 Int16으로 변경 후 (컬럼별):\n",
"text_before: object, 55.313236236572266 MB\n",
"float_before: float64, 7.629520416259766 MB\n",
"year: Int16, 2.8611488342285156 MB\n",
"float_after: float64, 7.629520416259766 MB\n",
"text_after: object, 56.266910552978516 MB\n",
"Total: 129.69983291625977 MB\n",
"\n",
"3. year를 Int32로 변경 후 (컬럼별):\n",
"text_before: object, 55.313236236572266 MB\n",
"float_before: float64, 7.629520416259766 MB\n",
"year: Int32, 4.768497467041016 MB\n",
"float_after: float64, 7.629520416259766 MB\n",
"text_after: object, 56.266910552978516 MB\n",
"Total: 131.60718154907227 MB\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# 다양한 타입의 컬럼들 사이에 연도 데이터 배치\n",
"df = pd.DataFrame({\n",
" 'text_before': ['some_text'] * 1_000_000, \n",
" 'float_before': np.random.random(1_000_000), \n",
" 'year': np.random.randint(1900, 2025, size=1_000_000),\n",
" 'float_after': np.random.random(1_000_000), \n",
" 'text_after': ['other_text'] * 1_000_000 \n",
"})\n",
"\n",
"# 초기 상태\n",
"print(\"\\n1. 초기 상태 (컬럼별):\")\n",
"for col in df.columns:\n",
" print(f\"{col}: {df[col].dtype},\", df[col].memory_usage(deep=True) / 1024 / 1024, \"MB\")\n",
"print(\"Total:\", df.memory_usage(deep=True).sum() / 1024 / 1024, \"MB\")\n",
"\n",
"# year를 Int16으로 변경\n",
"df['year'] = df['year'].astype('Int16')\n",
"print(\"\\n2. year를 Int16으로 변경 후 (컬럼별):\")\n",
"for col in df.columns:\n",
" print(f\"{col}: {df[col].dtype},\", df[col].memory_usage(deep=True) / 1024 / 1024, \"MB\")\n",
"print(\"Total:\", df.memory_usage(deep=True).sum() / 1024 / 1024, \"MB\")\n",
"\n",
"# year를 Int32로 변경\n",
"df['year'] = df['year'].astype('Int32')\n",
"print(\"\\n3. year를 Int32로 변경 후 (컬럼별):\")\n",
"for col in df.columns:\n",
" print(f\"{col}: {df[col].dtype},\", df[col].memory_usage(deep=True) / 1024 / 1024, \"MB\")\n",
"print(\"Total:\", df.memory_usage(deep=True).sum() / 1024 / 1024, \"MB\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
13 changes: 8 additions & 5 deletions self_study/parallellism/files/pool_test.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
from multiprocessing import Pool
import time
from multiprocessing import Pool


def slow_square(x):
time.sleep(1) # 작업이 오래 걸리는 것을 시뮬레이션
return x * x


def main():
numbers = [1, 2, 3, 4, 5]

print("일반적인 처리 시작")
start = time.time()
regular_result = [slow_square(x) for x in numbers]
print(f"일반 처리 결과: {regular_result}")
print(f"소요 시간: {time.time() - start:.2f}\n")

print("병렬 처리 시작")
start = time.time()
with Pool() as pool:
parallel_result = pool.map(slow_square, numbers)
print(f"병렬 처리 결과: {list(parallel_result)}")
print(f"소요 시간: {time.time() - start:.2f}초")

if __name__ == '__main__':
main()

if __name__ == "__main__":
main()
35 changes: 19 additions & 16 deletions self_study/parallellism/files/queue_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import multiprocessing as mp
from queue import Empty
import time
import random
import time
from queue import Empty


def process_task(queue: mp.Queue, process_id: int) -> None:
"""
Expand All @@ -13,43 +14,45 @@ def process_task(queue: mp.Queue, process_id: int) -> None:

if data is None:
# None이면 종료
print(f'프로세스 {process_id}: 작업 완료')
print(f"프로세스 {process_id}: 작업 완료")
break

# 처리 시작을 알림
print(f'프로세스 {process_id}: 값 {data} 처리 시작')
print(f"프로세스 {process_id}: 값 {data} 처리 시작")

# 데이터 값/10 초 만큼 대기
time.sleep(data/10)
time.sleep(data / 10)

# 처리 완료를 알림
print(f'프로세스 {process_id}: 값 {data} 처리 완료')
print(f"프로세스 {process_id}: 값 {data} 처리 완료")


def main():
# 프로세스 간 공유할 큐 생성
task_queue = mp.Queue()

# 1~10 사이의 랜덤한 값 10개를 큐에 넣기
numbers = random.sample(range(1, 11), 10)
numbers.extend([None] * 4)
for num in numbers:
task_queue.put(num)

# 4개의 프로세스 생성
processes = []
for i in range(4):
p = mp.Process(target=process_task, args=(task_queue, i))
processes.append(p)
print(f'프로세스 {i} 시작')
print(f"프로세스 {i} 시작")
p.start()

# 모든 프로세스가 종료될 때까지 대기
print("모든 프로세스가 종료될 때까지 대기합니다.")
for p in processes:
print(f'프로세스 {p.pid} 대기 중')
print(f"프로세스 {p.pid} 대기 중")
p.join()

print("모든 작업이 완료되었습니다.")

if __name__ == '__main__':
main()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions self_study/parallellism/testing_multiprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"source": [
"from files import pool_test\n",
"\n",
"\n",
"pool_test.main()"
]
}
Expand Down

0 comments on commit e68e255

Please sign in to comment.