Skip to content

Commit

Permalink
Summary
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryRyumin committed Oct 10, 2024
1 parent 25c445d commit de0c642
Show file tree
Hide file tree
Showing 7 changed files with 10,320 additions and 5,519 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
{
"data": {
"text/markdown": [
"<span style=\"color:#333\">**[</span><span style=\"color:#1776D2\">2024-10-08 19:49:08</span><span style=\"color:#333\">]</span> <span style=\"color:#333\">OCEANAI - персональные качества личности человека:</span>**<br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Авторы:</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмина Елена [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмин Дмитрий [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Карпов Алексей [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Сопровождающие:</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмина Елена [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмин Дмитрий [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Версия: <u>1.0.0a40</u></span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Лицензия: <u>BSD License</u></span></p>"
"<span style=\"color:#333\">**[</span><span style=\"color:#1776D2\">2024-10-09 16:38:10</span><span style=\"color:#333\">]</span> <span style=\"color:#333\">OCEANAI - персональные качества личности человека:</span>**<br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Авторы:</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмина Елена [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмин Дмитрий [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Карпов Алексей [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Сопровождающие:</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмина Елена [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Рюмин Дмитрий [<u>[email protected]</u>]</span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Версия: <u>1.0.0a40</u></span><br /><span style=\"color:#333\">&nbsp;&nbsp;&nbsp;&nbsp;Лицензия: <u>BSD License</u></span></p>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
Expand Down Expand Up @@ -167,7 +167,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.10.11"
}
},
"nbformat": 4,
Expand Down
4,761 changes: 3,165 additions & 1,596 deletions docs/source/user_guide/notebooks/Pipeline_practical_task_1.ipynb

Large diffs are not rendered by default.

5,445 changes: 3,336 additions & 2,109 deletions docs/source/user_guide/notebooks/Pipeline_practical_task_2.ipynb

Large diffs are not rendered by default.

4,745 changes: 3,394 additions & 1,351 deletions docs/source/user_guide/notebooks/Pipeline_practical_task_3.ipynb

Large diffs are not rendered by default.

Large diffs are not rendered by default.

43 changes: 25 additions & 18 deletions oceanai/modules/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4161,7 +4161,6 @@ def _professional_match(
df_files (pd.DataFrame): **DataFrame** c данными
correlation_coefficients (pd.DataFrame): **DataFrame** c коэффициентами корреляции
personality_type (str): Персональный тип по версии MBTI
col_name_ocean (str): Столбец с названиями персональных качеств личности человека
threshold (float): Порог для оценок полярности качеств (например, интроверт < 0.55, экстраверт > 0.55)
out (bool): Отображение
Expand Down Expand Up @@ -4204,7 +4203,10 @@ def _professional_match(

name_mbti = correlation_coefficients.columns[1:]

need_type = self.dict_mbti[personality_type]
if len(personality_type) != 4:
need_type = self.dict_mbti[personality_type]
else:
need_type = personality_type

for path in range(len(self._df_files)):
curr_traits = self._df_files.iloc[path].values[1:]
Expand Down Expand Up @@ -4233,9 +4235,9 @@ def _professional_match(
by=["MBTI_Score"], ascending=False
)

self._df_files_MBTI_job_match.index.name = self._keys_id
self._df_files_MBTI_job_match.index += 1
self._df_files_MBTI_job_match.index = self._df_files_MBTI_job_match.index.map(str)
# self._df_files_MBTI_job_match.index.name = self._keys_id
# self._df_files_MBTI_job_match.index += 1
# self._df_files_MBTI_job_match.index = self._df_files_MBTI_job_match.index.map(str)

except Exception:
self._other_error(self._unknown_err, out=out)
Expand All @@ -4261,7 +4263,6 @@ def _colleague_personality_type_match(
df_files (pd.DataFrame): **DataFrame** c данными
correlation_coefficients (pd.DataFrame): **DataFrame** c коэффициентами корреляции
target_scores (List[float]): Список оценок персональных качеств личности целевого человека
col_name_ocean (str): Столбец с названиями персональных качеств личности человека
threshold (float): Порог для оценок полярности качеств (например, интроверт < 0.55, экстраверт > 0.55)
out (bool): Отображение
Expand Down Expand Up @@ -4340,7 +4341,7 @@ def _colleague_personality_type_match(
]
)

match, _ = self._compatibility_percentage(target_personality_type, personality_type)
match, _ = self._compatibility_percentage(target_personality_type, personality_type, curr_weights)

self._df_files_MBTI_colleague_match.loc[
str(path + 1),
Expand All @@ -4351,9 +4352,9 @@ def _colleague_personality_type_match(
by=["Match"], ascending=False
)

self._df_files_MBTI_colleague_match.index.name = self._keys_id
self._df_files_MBTI_colleague_match.index += 1
self._df_files_MBTI_colleague_match.index = self._df_files_MBTI_colleague_match.index.map(str)
# self._df_files_MBTI_colleague_match.index.name = self._keys_id
# self._df_files_MBTI_colleague_match.index += 1
# self._df_files_MBTI_colleague_match.index = self._df_files_MBTI_colleague_match.index.map(str)

except Exception:
self._other_error(self._unknown_err, out=out)
Expand Down Expand Up @@ -4382,7 +4383,6 @@ def _colleague_personality_desorders(
correlation_coefficients_disorders (pd.DataFrame): **DataFrame** c коэффициентами корреляции для расстройств
target_scores (List[float]): Список оценок персональных качеств личности целевого человека
personality_desorder_number (int): Количество приоритетных расстройств
col_name_ocean (str): Столбец с названиями персональных качеств личности человека
threshold (float): Порог для оценок полярности качеств (например, интроверт < 0.55, экстраверт > 0.55)
out (bool): Отображение
Expand Down Expand Up @@ -4441,6 +4441,13 @@ def _colleague_personality_desorders(

curr_weights = np.sum(curr_traits_matrix, axis=0)

personality_type = "".join(
[
(name_mbti[idx_type][1] if curr_weights[idx_type] <= 0 else name_mbti[idx_type][0])
for idx_type in range(len(curr_weights))
]
)

for idx_type in range(len(curr_weights)):
idx_curr_matrix = pd_matrix[:, idx_type]
if curr_weights[idx_type] < 0:
Expand All @@ -4459,19 +4466,19 @@ def _colleague_personality_desorders(
pd_matrix = np.sum(pd_matrix, axis=1)

idx_max_values = np.argsort(-np.asarray(pd_matrix))[:personality_desorder_number]
desorders = name_pd[idx_max_values]
desorders = [name_pd[i] + ' ({})'.format(np.round(pd_matrix[i], 3)) for i in idx_max_values]

self._df_files_MBTI_disorders.loc[
str(path + 1),
name_mbti.tolist()
+ [("Disorder" + " {}").format(i + 1) for i in range(personality_desorder_number)],
["MBTI"]
+ ["Disorder {}".format(i + 1) for i in range(personality_desorder_number)],
] = (
curr_weights.tolist() + desorders.tolist()
[personality_type] + desorders
)

self._df_files_MBTI_disorders.index.name = self._keys_id
self._df_files_MBTI_disorders.index += 1
self._df_files_MBTI_disorders.index = self._df_files_MBTI_disorders.index.map(str)
# self._df_files_MBTI_disorders.index.name = self._keys_id
# self._df_files_MBTI_disorders.index += 1
# self._df_files_MBTI_disorders.index = self._df_files_MBTI_disorders.index.map(str)

except Exception:
self._other_error(self._unknown_err, out=out)
Expand Down
87 changes: 27 additions & 60 deletions oceanai/modules/lab/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,9 +781,7 @@ def __process_audio_and_extract_features(
self._model_transcriptions = WhisperForConditionalGeneration.from_pretrained(self._path_to_transriber).to(
self._device
)

if lang == self.__lang_traslate[0]:
self.__forced_decoder_ids = self._processor.get_decoder_prompt_ids(language=lang, task="transcribe")
self._model_transcriptions.config.forced_decoder_ids = None

path_to_wav = os.path.join(str(Path(path).parent), Path(path).stem + "." + "wav")

Expand All @@ -794,71 +792,40 @@ def __process_audio_and_extract_features(
)
call_audio = subprocess.call(ff_audio, shell=True)

try:
if call_audio == 1:
raise OSError
except OSError:
if call_audio != 0:
self._other_error(self._unknown_err, last=last, out=out)
return np.empty([]), np.empty([])
except Exception:
self._other_error(self._unknown_err, last=last, out=out)
return np.empty([]), np.empty([])
else:
wav, sr = torchaudio.load(path_to_wav)

if wav.size(0) > 1:
wav = wav.mean(dim=0, keepdim=True)

if sr != 16000:
transform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
wav = transform(wav)
sr = 16000

wav = wav.squeeze(0)

for start in range(0, len(wav), win):
inputs = self._processor(wav[start : start + win], sampling_rate=16000, return_tensors="pt")
input_features = inputs.input_features.to(self._device)
if lang == self.__lang_traslate[0]:
generated_ids = self._model_transcriptions.generate(
input_features=input_features,
forced_decoder_ids=self.__forced_decoder_ids,
max_new_tokens=448,
)
elif lang == self.__lang_traslate[1]:
generated_ids = self._model_transcriptions.generate(
input_features=input_features, max_new_tokens=448
)
transcription = self._processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
self.__text_pred += transcription

return self.__translate_and_extract_features(self.__text_pred, lang, show_text, last, out)
else:
wav, sr = torchaudio.load(path_to_wav)
wav, sr = torchaudio.load(path_to_wav)

if wav.size(0) > 1:
wav = wav.mean(dim=0, keepdim=True)
if wav.size(0) > 1:
wav = wav.mean(dim=0, keepdim=True)

if sr != 16000:
transform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
wav = transform(wav)
sr = 16000
if sr != 16000:
transform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
wav = transform(wav)
sr = 16000

wav = wav.squeeze(0)
wav = wav.squeeze(0)

for start in range(0, len(wav), win):
inputs = self._processor(wav[start : start + win], sampling_rate=16000, return_tensors="pt")
input_features = inputs.input_features.to(self._device)
if lang == self.__lang_traslate[0]:
generated_ids = self._model_transcriptions.generate(
input_features=input_features, forced_decoder_ids=self.__forced_decoder_ids
)
elif lang == self.__lang_traslate[1]:
generated_ids = self._model_transcriptions.generate(input_features=input_features)
transcription = self._processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
self.__text_pred += transcription
for start in range(0, len(wav), win):
inputs = self._processor(wav[start : start + win], sampling_rate=16000, return_tensors="pt")
input_features = inputs.input_features.to(self._device)
if lang == self.__lang_traslate[0]:
generated_ids = self._model_transcriptions.generate(
input_features=input_features,
)
elif lang == self.__lang_traslate[1]:
generated_ids = self._model_transcriptions.generate(
input_features=input_features, language="en"
)
transcription = self._processor.batch_decode(generated_ids, skip_special_tokens=False)
transcription = re.findall(r'> ([^<>]+)', transcription[0])
self.__text_pred += transcription[0] + ' '

return self.__translate_and_extract_features(self.__text_pred, lang, show_text, last, out)
self.__text_pred = self.__text_pred.strip()

return self.__translate_and_extract_features(self.__text_pred, lang, show_text, last, out)

def __load_text_model_b5(self, show_summary: bool = False, out: bool = True) -> Optional[nn.Module]:
"""Формирование нейросетевой архитектуры модели для получения оценок персональных качеств
Expand Down

0 comments on commit de0c642

Please sign in to comment.