generated from Pseudo-Lab/Jupyter-Book-Template
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
391 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,391 @@ | ||
# 11주차 | ||
|
||
## 원하는 type 정해서 구조체 및 type 코드 분석해보기 | ||
|
||
### String type | ||
|
||
```c | ||
// Include/unicodeobject.h | ||
|
||
PyAPI_DATA(PyTypeObject) PyUnicode_Type; | ||
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; | ||
``` | ||
- `PyUnicode_Type` : 기본 문자열 타입, Python 문자열 객체의 모든 메타데이터와 메서드를 포함 | ||
- `PyUnicodeIter_Type` : 문자열 이터레이터 타입, 문자열을 순회할 때 사용 | ||
### `PyUnicode_Type` | ||
```c | ||
// Objects/unicodeobject.c | ||
PyTypeObject PyUnicode_Type = { | ||
PyVarObject_HEAD_INIT(&PyType_Type, 0) | ||
"str", /* tp_name */ | ||
sizeof(PyUnicodeObject), /* tp_basicsize */ | ||
0, /* tp_itemsize */ | ||
/* Slots */ | ||
(destructor)unicode_dealloc, /* tp_dealloc */ | ||
0, /* tp_vectorcall_offset */ | ||
0, /* tp_getattr */ | ||
0, /* tp_setattr */ | ||
0, /* tp_as_async */ | ||
unicode_repr, /* tp_repr */ | ||
&unicode_as_number, /* tp_as_number */ | ||
&unicode_as_sequence, /* tp_as_sequence */ | ||
&unicode_as_mapping, /* tp_as_mapping */ | ||
(hashfunc) unicode_hash, /* tp_hash*/ | ||
0, /* tp_call*/ | ||
(reprfunc) unicode_str, /* tp_str */ | ||
PyObject_GenericGetAttr, /* tp_getattro */ | ||
0, /* tp_setattro */ | ||
0, /* tp_as_buffer */ | ||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | | ||
Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */ | ||
unicode_doc, /* tp_doc */ | ||
0, /* tp_traverse */ | ||
0, /* tp_clear */ | ||
PyUnicode_RichCompare, /* tp_richcompare */ | ||
0, /* tp_weaklistoffset */ | ||
unicode_iter, /* tp_iter */ | ||
0, /* tp_iternext */ | ||
unicode_methods, /* tp_methods */ | ||
0, /* tp_members */ | ||
0, /* tp_getset */ | ||
&PyBaseObject_Type, /* tp_base */ | ||
0, /* tp_dict */ | ||
0, /* tp_descr_get */ | ||
0, /* tp_descr_set */ | ||
0, /* tp_dictoffset */ | ||
0, /* tp_init */ | ||
0, /* tp_alloc */ | ||
unicode_new, /* tp_new */ | ||
PyObject_Del, /* tp_free */ | ||
}; | ||
``` | ||
|
||
### `PyUnicodeIter_Type` | ||
|
||
```c | ||
// Objects/unicodeobject.c | ||
|
||
PyTypeObject PyUnicodeIter_Type = { | ||
PyVarObject_HEAD_INIT(&PyType_Type, 0) | ||
"str_iterator", /* tp_name */ | ||
sizeof(unicodeiterobject), /* tp_basicsize */ | ||
0, /* tp_itemsize */ | ||
/* methods */ | ||
(destructor)unicodeiter_dealloc, /* tp_dealloc */ | ||
0, /* tp_vectorcall_offset */ | ||
0, /* tp_getattr */ | ||
0, /* tp_setattr */ | ||
0, /* tp_as_async */ | ||
0, /* tp_repr */ | ||
0, /* tp_as_number */ | ||
0, /* tp_as_sequence */ | ||
0, /* tp_as_mapping */ | ||
0, /* tp_hash */ | ||
0, /* tp_call */ | ||
0, /* tp_str */ | ||
PyObject_GenericGetAttr, /* tp_getattro */ | ||
0, /* tp_setattro */ | ||
0, /* tp_as_buffer */ | ||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ | ||
0, /* tp_doc */ | ||
(traverseproc)unicodeiter_traverse, /* tp_traverse */ | ||
0, /* tp_clear */ | ||
0, /* tp_richcompare */ | ||
0, /* tp_weaklistoffset */ | ||
PyObject_SelfIter, /* tp_iter */ | ||
(iternextfunc)unicodeiter_next, /* tp_iternext */ | ||
unicodeiter_methods, /* tp_methods */ | ||
0, | ||
}; | ||
``` | ||
|
||
### `unicode_new` | ||
|
||
```c | ||
// Objects/unicodeobject.c | ||
|
||
static PyObject * | ||
unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | ||
{ | ||
PyObject *x = NULL; | ||
static char *kwlist[] = {"object", "encoding", "errors", 0}; | ||
char *encoding = NULL; | ||
char *errors = NULL; | ||
|
||
if (type != &PyUnicode_Type) | ||
return unicode_subtype_new(type, args, kwds); | ||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str", | ||
kwlist, &x, &encoding, &errors)) | ||
return NULL; | ||
if (x == NULL) | ||
_Py_RETURN_UNICODE_EMPTY(); | ||
if (encoding == NULL && errors == NULL) | ||
return PyObject_Str(x); | ||
else | ||
return PyUnicode_FromEncodedObject(x, encoding, errors); | ||
} | ||
``` | ||
1. 타입 확인 | ||
2. `PyArg_ParseTupleAndKeywords`를 통해 인자 파싱 | ||
3. 객체 생성 | ||
### `PyUnicode_New` | ||
```c | ||
// Objects/unicodeobject.c | ||
PyObject * | ||
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) | ||
{ | ||
PyObject *obj; | ||
PyCompactUnicodeObject *unicode; | ||
void *data; | ||
enum PyUnicode_Kind kind; | ||
int is_sharing, is_ascii; | ||
Py_ssize_t char_size; | ||
Py_ssize_t struct_size; | ||
/* Optimization for empty strings */ | ||
if (size == 0 && unicode_empty != NULL) { | ||
Py_INCREF(unicode_empty); | ||
return unicode_empty; | ||
} | ||
is_ascii = 0; | ||
is_sharing = 0; | ||
struct_size = sizeof(PyCompactUnicodeObject); | ||
if (maxchar < 128) { | ||
kind = PyUnicode_1BYTE_KIND; | ||
char_size = 1; | ||
is_ascii = 1; | ||
struct_size = sizeof(PyASCIIObject); | ||
} | ||
else if (maxchar < 256) { | ||
kind = PyUnicode_1BYTE_KIND; | ||
char_size = 1; | ||
} | ||
else if (maxchar < 65536) { | ||
kind = PyUnicode_2BYTE_KIND; | ||
char_size = 2; | ||
if (sizeof(wchar_t) == 2) | ||
is_sharing = 1; | ||
} | ||
else { | ||
if (maxchar > MAX_UNICODE) { | ||
PyErr_SetString(PyExc_SystemError, | ||
"invalid maximum character passed to PyUnicode_New"); | ||
return NULL; | ||
} | ||
kind = PyUnicode_4BYTE_KIND; | ||
char_size = 4; | ||
if (sizeof(wchar_t) == 4) | ||
is_sharing = 1; | ||
} | ||
/* Ensure we won't overflow the size. */ | ||
if (size < 0) { | ||
PyErr_SetString(PyExc_SystemError, | ||
"Negative size passed to PyUnicode_New"); | ||
return NULL; | ||
} | ||
if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) | ||
return PyErr_NoMemory(); | ||
/* Duplicated allocation code from _PyObject_New() instead of a call to | ||
* PyObject_New() so we are able to allocate space for the object and | ||
* it's data buffer. | ||
*/ | ||
obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size); | ||
if (obj == NULL) | ||
return PyErr_NoMemory(); | ||
obj = PyObject_INIT(obj, &PyUnicode_Type); | ||
if (obj == NULL) | ||
return NULL; | ||
unicode = (PyCompactUnicodeObject *)obj; | ||
if (is_ascii) | ||
data = ((PyASCIIObject*)obj) + 1; | ||
else | ||
data = unicode + 1; | ||
_PyUnicode_LENGTH(unicode) = size; | ||
_PyUnicode_HASH(unicode) = -1; | ||
_PyUnicode_STATE(unicode).interned = 0; | ||
_PyUnicode_STATE(unicode).kind = kind; | ||
_PyUnicode_STATE(unicode).compact = 1; | ||
_PyUnicode_STATE(unicode).ready = 1; | ||
_PyUnicode_STATE(unicode).ascii = is_ascii; | ||
if (is_ascii) { | ||
((char*)data)[size] = 0; | ||
_PyUnicode_WSTR(unicode) = NULL; | ||
} | ||
else if (kind == PyUnicode_1BYTE_KIND) { | ||
((char*)data)[size] = 0; | ||
_PyUnicode_WSTR(unicode) = NULL; | ||
_PyUnicode_WSTR_LENGTH(unicode) = 0; | ||
unicode->utf8 = NULL; | ||
unicode->utf8_length = 0; | ||
} | ||
else { | ||
unicode->utf8 = NULL; | ||
unicode->utf8_length = 0; | ||
if (kind == PyUnicode_2BYTE_KIND) | ||
((Py_UCS2*)data)[size] = 0; | ||
else /* kind == PyUnicode_4BYTE_KIND */ | ||
((Py_UCS4*)data)[size] = 0; | ||
if (is_sharing) { | ||
_PyUnicode_WSTR_LENGTH(unicode) = size; | ||
_PyUnicode_WSTR(unicode) = (wchar_t *)data; | ||
} | ||
else { | ||
_PyUnicode_WSTR_LENGTH(unicode) = 0; | ||
_PyUnicode_WSTR(unicode) = NULL; | ||
} | ||
} | ||
#ifdef Py_DEBUG | ||
unicode_fill_invalid((PyObject*)unicode, 0); | ||
#endif | ||
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0)); | ||
return obj; | ||
} | ||
``` | ||
|
||
1. empty string 처리 | ||
2. 유니코드 종류 결정 | ||
3. 메모리 할당 | ||
4. `PyObject_INIT`을 통해 객체 초기화 | ||
5. 객체 상태 초기화 | ||
6. 데이터 초기화 | ||
7. 디버깅 모드일 경우 `unicode_fill_invalid`를 통해 객체 검사 | ||
|
||
### `_PyUnicode_New` | ||
|
||
```c | ||
// Objects/unicodeobject.c | ||
|
||
static PyUnicodeObject * | ||
_PyUnicode_New(Py_ssize_t length) | ||
{ | ||
PyUnicodeObject *unicode; | ||
size_t new_size; | ||
|
||
/* Optimization for empty strings */ | ||
if (length == 0 && unicode_empty != NULL) { | ||
Py_INCREF(unicode_empty); | ||
return (PyUnicodeObject*)unicode_empty; | ||
} | ||
|
||
/* Ensure we won't overflow the size. */ | ||
if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { | ||
return (PyUnicodeObject *)PyErr_NoMemory(); | ||
} | ||
if (length < 0) { | ||
PyErr_SetString(PyExc_SystemError, | ||
"Negative size passed to _PyUnicode_New"); | ||
return NULL; | ||
} | ||
|
||
unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); | ||
if (unicode == NULL) | ||
return NULL; | ||
new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); | ||
|
||
_PyUnicode_WSTR_LENGTH(unicode) = length; | ||
_PyUnicode_HASH(unicode) = -1; | ||
_PyUnicode_STATE(unicode).interned = 0; | ||
_PyUnicode_STATE(unicode).kind = 0; | ||
_PyUnicode_STATE(unicode).compact = 0; | ||
_PyUnicode_STATE(unicode).ready = 0; | ||
_PyUnicode_STATE(unicode).ascii = 0; | ||
_PyUnicode_DATA_ANY(unicode) = NULL; | ||
_PyUnicode_LENGTH(unicode) = 0; | ||
_PyUnicode_UTF8(unicode) = NULL; | ||
_PyUnicode_UTF8_LENGTH(unicode) = 0; | ||
|
||
_PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size); | ||
if (!_PyUnicode_WSTR(unicode)) { | ||
Py_DECREF(unicode); | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
|
||
/* Initialize the first element to guard against cases where | ||
* the caller fails before initializing str -- unicode_resize() | ||
* reads str[0], and the Keep-Alive optimization can keep memory | ||
* allocated for str alive across a call to unicode_dealloc(unicode). | ||
* We don't want unicode_resize to read uninitialized memory in | ||
* that case. | ||
*/ | ||
_PyUnicode_WSTR(unicode)[0] = 0; | ||
_PyUnicode_WSTR(unicode)[length] = 0; | ||
|
||
assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); | ||
return unicode; | ||
} | ||
``` | ||
1. empty string 처리 | ||
2. size 유효성 체크 | ||
3. `PyObject_New`를 통해 객체 생성 | ||
4. 메모리 할당 및 초기화 | ||
5. 아래 코드를 통해 메모리 초기화 | ||
```c | ||
_PyUnicode_WSTR(unicode)[0] = 0; | ||
_PyUnicode_WSTR(unicode)[length] = 0; | ||
``` | ||
6. `_PyUnicode_CheckConsistency`를 통해 객체 검증 | ||
### `unicode_fill` | ||
문자열 데이터를 특정 값으로 채우는 기능 수행, 메모리 초기화 및 데이터 채우기 작업에 사용 | ||
```c | ||
// Objects/unicodeobject.c | ||
static inline void | ||
unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value, | ||
Py_ssize_t start, Py_ssize_t length) | ||
{ | ||
assert(0 <= start); | ||
assert(kind != PyUnicode_WCHAR_KIND); | ||
switch (kind) { | ||
case PyUnicode_1BYTE_KIND: { | ||
assert(value <= 0xff); | ||
Py_UCS1 ch = (unsigned char)value; | ||
Py_UCS1 *to = (Py_UCS1 *)data + start; | ||
memset(to, ch, length); | ||
break; | ||
} | ||
case PyUnicode_2BYTE_KIND: { | ||
assert(value <= 0xffff); | ||
Py_UCS2 ch = (Py_UCS2)value; | ||
Py_UCS2 *to = (Py_UCS2 *)data + start; | ||
const Py_UCS2 *end = to + length; | ||
for (; to < end; ++to) *to = ch; | ||
break; | ||
} | ||
case PyUnicode_4BYTE_KIND: { | ||
assert(value <= MAX_UNICODE); | ||
Py_UCS4 ch = value; | ||
Py_UCS4 * to = (Py_UCS4 *)data + start; | ||
const Py_UCS4 *end = to + length; | ||
for (; to < end; ++to) *to = ch; | ||
break; | ||
} | ||
default: Py_UNREACHABLE(); | ||
} | ||
} | ||
``` | ||
|
||
- Parameters | ||
- `enum PyUnicode_Kind kind` : 문자열 유니코드 인코딩 종류 | ||
- `void *data` : 문자열 데이터 | ||
- `Py_UCS4 value` : 채울 유니코드 값 | ||
- `Py_ssize_t start` : 문자열 데이터를 채우기 시작할 위치 | ||
- `Py_ssize_t length` : 채울 길이 | ||
|
||
1. 시작 위치 유효성 체크 | ||
2. 유니코드 종류에 따른 방식으로 데이터 채우기 |