Skip to content

Commit

Permalink
[김진아] 11주차 미션 제출 (#119)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikaniz authored Jul 6, 2024
1 parent 17635d2 commit 9b166d2
Showing 1 changed file with 391 additions and 0 deletions.
391 changes: 391 additions & 0 deletions 8th_members/김진아/11주차.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,391 @@
# 11주차

## 원하는 type 정해서 구조체 및 type 코드 분석해보기

### String type

```c
// Include/unicodeobject.h

PyAPI_DATA(PyTypeObject) PyUnicode_Type;
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
```
- `PyUnicode_Type` : 기본 문자열 타입, Python 문자열 객체의 모든 메타데이터와 메서드를 포함
- `PyUnicodeIter_Type` : 문자열 이터레이터 타입, 문자열을 순회할 때 사용
### `PyUnicode_Type`
```c
// Objects/unicodeobject.c
PyTypeObject PyUnicode_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"str", /* tp_name */
sizeof(PyUnicodeObject), /* tp_basicsize */
0, /* tp_itemsize */
/* Slots */
(destructor)unicode_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
unicode_repr, /* tp_repr */
&unicode_as_number, /* tp_as_number */
&unicode_as_sequence, /* tp_as_sequence */
&unicode_as_mapping, /* tp_as_mapping */
(hashfunc) unicode_hash, /* tp_hash*/
0, /* tp_call*/
(reprfunc) unicode_str, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
unicode_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
PyUnicode_RichCompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
unicode_iter, /* tp_iter */
0, /* tp_iternext */
unicode_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
&PyBaseObject_Type, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
unicode_new, /* tp_new */
PyObject_Del, /* tp_free */
};
```

### `PyUnicodeIter_Type`

```c
// Objects/unicodeobject.c

PyTypeObject PyUnicodeIter_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"str_iterator", /* tp_name */
sizeof(unicodeiterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)unicodeiter_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
0, /* tp_doc */
(traverseproc)unicodeiter_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)unicodeiter_next, /* tp_iternext */
unicodeiter_methods, /* tp_methods */
0,
};
```

### `unicode_new`

```c
// Objects/unicodeobject.c

static PyObject *
unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *x = NULL;
static char *kwlist[] = {"object", "encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;

if (type != &PyUnicode_Type)
return unicode_subtype_new(type, args, kwds);
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
kwlist, &x, &encoding, &errors))
return NULL;
if (x == NULL)
_Py_RETURN_UNICODE_EMPTY();
if (encoding == NULL && errors == NULL)
return PyObject_Str(x);
else
return PyUnicode_FromEncodedObject(x, encoding, errors);
}
```
1. 타입 확인
2. `PyArg_ParseTupleAndKeywords`를 통해 인자 파싱
3. 객체 생성
### `PyUnicode_New`
```c
// Objects/unicodeobject.c
PyObject *
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
{
PyObject *obj;
PyCompactUnicodeObject *unicode;
void *data;
enum PyUnicode_Kind kind;
int is_sharing, is_ascii;
Py_ssize_t char_size;
Py_ssize_t struct_size;
/* Optimization for empty strings */
if (size == 0 && unicode_empty != NULL) {
Py_INCREF(unicode_empty);
return unicode_empty;
}
is_ascii = 0;
is_sharing = 0;
struct_size = sizeof(PyCompactUnicodeObject);
if (maxchar < 128) {
kind = PyUnicode_1BYTE_KIND;
char_size = 1;
is_ascii = 1;
struct_size = sizeof(PyASCIIObject);
}
else if (maxchar < 256) {
kind = PyUnicode_1BYTE_KIND;
char_size = 1;
}
else if (maxchar < 65536) {
kind = PyUnicode_2BYTE_KIND;
char_size = 2;
if (sizeof(wchar_t) == 2)
is_sharing = 1;
}
else {
if (maxchar > MAX_UNICODE) {
PyErr_SetString(PyExc_SystemError,
"invalid maximum character passed to PyUnicode_New");
return NULL;
}
kind = PyUnicode_4BYTE_KIND;
char_size = 4;
if (sizeof(wchar_t) == 4)
is_sharing = 1;
}
/* Ensure we won't overflow the size. */
if (size < 0) {
PyErr_SetString(PyExc_SystemError,
"Negative size passed to PyUnicode_New");
return NULL;
}
if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1))
return PyErr_NoMemory();
/* Duplicated allocation code from _PyObject_New() instead of a call to
* PyObject_New() so we are able to allocate space for the object and
* it's data buffer.
*/
obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size);
if (obj == NULL)
return PyErr_NoMemory();
obj = PyObject_INIT(obj, &PyUnicode_Type);
if (obj == NULL)
return NULL;
unicode = (PyCompactUnicodeObject *)obj;
if (is_ascii)
data = ((PyASCIIObject*)obj) + 1;
else
data = unicode + 1;
_PyUnicode_LENGTH(unicode) = size;
_PyUnicode_HASH(unicode) = -1;
_PyUnicode_STATE(unicode).interned = 0;
_PyUnicode_STATE(unicode).kind = kind;
_PyUnicode_STATE(unicode).compact = 1;
_PyUnicode_STATE(unicode).ready = 1;
_PyUnicode_STATE(unicode).ascii = is_ascii;
if (is_ascii) {
((char*)data)[size] = 0;
_PyUnicode_WSTR(unicode) = NULL;
}
else if (kind == PyUnicode_1BYTE_KIND) {
((char*)data)[size] = 0;
_PyUnicode_WSTR(unicode) = NULL;
_PyUnicode_WSTR_LENGTH(unicode) = 0;
unicode->utf8 = NULL;
unicode->utf8_length = 0;
}
else {
unicode->utf8 = NULL;
unicode->utf8_length = 0;
if (kind == PyUnicode_2BYTE_KIND)
((Py_UCS2*)data)[size] = 0;
else /* kind == PyUnicode_4BYTE_KIND */
((Py_UCS4*)data)[size] = 0;
if (is_sharing) {
_PyUnicode_WSTR_LENGTH(unicode) = size;
_PyUnicode_WSTR(unicode) = (wchar_t *)data;
}
else {
_PyUnicode_WSTR_LENGTH(unicode) = 0;
_PyUnicode_WSTR(unicode) = NULL;
}
}
#ifdef Py_DEBUG
unicode_fill_invalid((PyObject*)unicode, 0);
#endif
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
return obj;
}
```

1. empty string 처리
2. 유니코드 종류 결정
3. 메모리 할당
4. `PyObject_INIT`을 통해 객체 초기화
5. 객체 상태 초기화
6. 데이터 초기화
7. 디버깅 모드일 경우 `unicode_fill_invalid`를 통해 객체 검사

### `_PyUnicode_New`

```c
// Objects/unicodeobject.c

static PyUnicodeObject *
_PyUnicode_New(Py_ssize_t length)
{
PyUnicodeObject *unicode;
size_t new_size;

/* Optimization for empty strings */
if (length == 0 && unicode_empty != NULL) {
Py_INCREF(unicode_empty);
return (PyUnicodeObject*)unicode_empty;
}

/* Ensure we won't overflow the size. */
if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
return (PyUnicodeObject *)PyErr_NoMemory();
}
if (length < 0) {
PyErr_SetString(PyExc_SystemError,
"Negative size passed to _PyUnicode_New");
return NULL;
}

unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
if (unicode == NULL)
return NULL;
new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);

_PyUnicode_WSTR_LENGTH(unicode) = length;
_PyUnicode_HASH(unicode) = -1;
_PyUnicode_STATE(unicode).interned = 0;
_PyUnicode_STATE(unicode).kind = 0;
_PyUnicode_STATE(unicode).compact = 0;
_PyUnicode_STATE(unicode).ready = 0;
_PyUnicode_STATE(unicode).ascii = 0;
_PyUnicode_DATA_ANY(unicode) = NULL;
_PyUnicode_LENGTH(unicode) = 0;
_PyUnicode_UTF8(unicode) = NULL;
_PyUnicode_UTF8_LENGTH(unicode) = 0;

_PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size);
if (!_PyUnicode_WSTR(unicode)) {
Py_DECREF(unicode);
PyErr_NoMemory();
return NULL;
}

/* Initialize the first element to guard against cases where
* the caller fails before initializing str -- unicode_resize()
* reads str[0], and the Keep-Alive optimization can keep memory
* allocated for str alive across a call to unicode_dealloc(unicode).
* We don't want unicode_resize to read uninitialized memory in
* that case.
*/
_PyUnicode_WSTR(unicode)[0] = 0;
_PyUnicode_WSTR(unicode)[length] = 0;

assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0));
return unicode;
}
```
1. empty string 처리
2. size 유효성 체크
3. `PyObject_New`를 통해 객체 생성
4. 메모리 할당 및 초기화
5. 아래 코드를 통해 메모리 초기화
```c
_PyUnicode_WSTR(unicode)[0] = 0;
_PyUnicode_WSTR(unicode)[length] = 0;
```
6. `_PyUnicode_CheckConsistency`를 통해 객체 검증
### `unicode_fill`
문자열 데이터를 특정 값으로 채우는 기능 수행, 메모리 초기화 및 데이터 채우기 작업에 사용
```c
// Objects/unicodeobject.c
static inline void
unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
Py_ssize_t start, Py_ssize_t length)
{
assert(0 <= start);
assert(kind != PyUnicode_WCHAR_KIND);
switch (kind) {
case PyUnicode_1BYTE_KIND: {
assert(value <= 0xff);
Py_UCS1 ch = (unsigned char)value;
Py_UCS1 *to = (Py_UCS1 *)data + start;
memset(to, ch, length);
break;
}
case PyUnicode_2BYTE_KIND: {
assert(value <= 0xffff);
Py_UCS2 ch = (Py_UCS2)value;
Py_UCS2 *to = (Py_UCS2 *)data + start;
const Py_UCS2 *end = to + length;
for (; to < end; ++to) *to = ch;
break;
}
case PyUnicode_4BYTE_KIND: {
assert(value <= MAX_UNICODE);
Py_UCS4 ch = value;
Py_UCS4 * to = (Py_UCS4 *)data + start;
const Py_UCS4 *end = to + length;
for (; to < end; ++to) *to = ch;
break;
}
default: Py_UNREACHABLE();
}
}
```

- Parameters
- `enum PyUnicode_Kind kind` : 문자열 유니코드 인코딩 종류
- `void *data` : 문자열 데이터
- `Py_UCS4 value` : 채울 유니코드 값
- `Py_ssize_t start` : 문자열 데이터를 채우기 시작할 위치
- `Py_ssize_t length` : 채울 길이

1. 시작 위치 유효성 체크
2. 유니코드 종류에 따른 방식으로 데이터 채우기

0 comments on commit 9b166d2

Please sign in to comment.