Revisão | c732afeb3f31febd7da85697da4da6744d08c5a2 (tree) |
---|---|
Hora | 2019-02-15 00:18:14 |
Autor | Starg <starg@user...> |
Commiter | Starg |
Import UnkoTim224
@@ -776,8 +776,6 @@ SaveIniFile(SETTING_PLAYER *sp, SETTING_TIMIDITY *st) | ||
776 | 776 | IniPutKeyInt(INI_SEC_TIMIDITY,"opt_modulation_update",&(st->opt_modulation_update)); |
777 | 777 | IniPutKeyInt(INI_SEC_TIMIDITY,"opt_cut_short_time",&st->opt_cut_short_time); |
778 | 778 | IniPutKeyInt(INI_SEC_TIMIDITY,"opt_limiter",&st->opt_limiter); |
779 | - if (st->opt_use_midi_loop_repeat) | |
780 | - st->opt_use_midi_loop_repeat = 1; | |
781 | 779 | IniPutKeyInt(INI_SEC_TIMIDITY, "opt_use_midi_loop_repeat", &st->opt_use_midi_loop_repeat); |
782 | 780 | IniPutKeyInt(INI_SEC_TIMIDITY, "opt_midi_loop_repeat", &st->opt_midi_loop_repeat); |
783 | 781 |
@@ -1443,9 +1443,9 @@ PrefPlayerDialogProc(HWND hwnd, UINT uMess, WPARAM wParam, LPARAM lParam) | ||
1443 | 1443 | CB_SET(IDC_COMBO_SECOND_MODE, CB_FIND(cb_info_IDC_COMBO_SECOND_MODE_num, sp_temp->SecondMode, 0)); |
1444 | 1444 | |
1445 | 1445 | // CC/Mark loop repeat |
1446 | - CH_SET(IDC_CHECKBOX_LOOP_CC111, st_temp->opt_use_midi_loop_repeat & LF_CC111_TO_EOT); | |
1447 | - CH_SET(IDC_CHECKBOX_LOOP_AB_MARK, st_temp->opt_use_midi_loop_repeat & LF_MARK_A_TO_B); | |
1448 | - CH_SET(IDC_CHECKBOX_LOOP_SE_MARK, st_temp->opt_use_midi_loop_repeat & LF_MARK_S_TO_E); | |
1446 | + CH_SET(IDC_CHECKBOX_LOOP_CC111, (st_temp->opt_use_midi_loop_repeat & LF_CC111_TO_EOT) != 0); | |
1447 | + CH_SET(IDC_CHECKBOX_LOOP_AB_MARK, (st_temp->opt_use_midi_loop_repeat & LF_MARK_A_TO_B) != 0); | |
1448 | + CH_SET(IDC_CHECKBOX_LOOP_SE_MARK, (st_temp->opt_use_midi_loop_repeat & LF_MARK_S_TO_E) != 0); | |
1449 | 1449 | CH_SET(IDC_CHECKBOX_LOOP_CC2, (st_temp->opt_use_midi_loop_repeat & LF_CC2_TO_CC4) != 0); |
1450 | 1450 | SendMessage(hwnd, WM_COMMAND, IDC_CHECKBOX_LOOP_CC111, 0); |
1451 | 1451 | EB_SET_INT(IDC_EDIT_LOOP_REPEAT, st_temp->opt_midi_loop_repeat); |
@@ -783,8 +783,10 @@ ApplySettingTiMidity(SETTING_TIMIDITY *st) | ||
783 | 783 | opt_mix_envelope = st->opt_mix_envelope; |
784 | 784 | opt_modulation_update = st->opt_modulation_update; |
785 | 785 | opt_cut_short_time = st->opt_cut_short_time; |
786 | - opt_use_midi_loop_repeat = SetFlag(st->opt_use_midi_loop_repeat); | |
786 | +#ifdef SUPPORT_LOOPEVENT | |
787 | + opt_use_midi_loop_repeat = st->opt_use_midi_loop_repeat; | |
787 | 788 | opt_midi_loop_repeat = SetValue(st->opt_midi_loop_repeat, 0, 99); |
789 | +#endif /* SUPPORT_LOOPEVENT */ | |
788 | 790 | |
789 | 791 | #if defined(WINDRV_SETUP) || defined(WINDRV) |
790 | 792 | syn_ThreadPriority = st->syn_ThreadPriority; |
@@ -1048,8 +1050,10 @@ SaveSettingTiMidity(SETTING_TIMIDITY *st) | ||
1048 | 1050 | st->add_silent_time = add_silent_time; |
1049 | 1051 | st->emu_delay_time = emu_delay_time; |
1050 | 1052 | st->opt_limiter = opt_limiter; |
1051 | - st->opt_use_midi_loop_repeat = SetValue(opt_use_midi_loop_repeat, 0, 1); | |
1053 | +#ifdef SUPPORT_LOOPEVENT | |
1054 | + st->opt_use_midi_loop_repeat = opt_use_midi_loop_repeat; | |
1052 | 1055 | st->opt_midi_loop_repeat = opt_midi_loop_repeat; |
1056 | +#endif /* SUPPORT_LOOPEVENT */ | |
1053 | 1057 | |
1054 | 1058 | st->opt_mix_envelope = opt_mix_envelope; |
1055 | 1059 | st->opt_modulation_update = opt_modulation_update; |
@@ -1086,7 +1090,26 @@ SaveSettingTiMidity(SETTING_TIMIDITY *st) | ||
1086 | 1090 | st->opt_int_synth_update = opt_int_synth_update; |
1087 | 1091 | } |
1088 | 1092 | |
1089 | - | |
1093 | +void | |
1094 | +InitSettingTiMidity(SETTING_TIMIDITY *st) | |
1095 | +{ | |
1096 | + st->voices = voices = DEFAULT_VOICES; | |
1097 | + st->output_rate = opt_output_rate = DEFAULT_RATE; | |
1098 | +#if defined(TWSYNSRV) || defined(TWSYNG32) | |
1099 | + st->audio_buffer_bits = opt_audio_buffer_bits = DEFAULT_AUDIO_BUFFER_BITS; | |
1100 | + st->opt_reverb_control = opt_reverb_control = 0; /* default off */ | |
1101 | + st->opt_chorus_control = opt_chorus_control = 0; /* default off */ | |
1102 | + st->opt_surround_chorus = opt_surround_chorus = 0; /* default off */ | |
1103 | + st->opt_normal_chorus_plus = opt_normal_chorus_plus = 0; /* default off */ | |
1104 | + st->opt_lpf_def = opt_lpf_def = 0; /* default off */ | |
1105 | + st->noise_sharp_type = noise_sharp_type = 0; /* default off */ | |
1106 | + st->opt_resample_type = opt_resample_type = 0; /* default off */ | |
1107 | +#endif /* TWSYNSRV || TWSYNG32 */ | |
1108 | +#ifdef SUPPORT_LOOPEVENT | |
1109 | + st->opt_use_midi_loop_repeat = 0; | |
1110 | + st->opt_midi_loop_repeat = 3; | |
1111 | +#endif /* SUPPORT_LOOPEVENT */ | |
1112 | +} | |
1090 | 1113 | |
1091 | 1114 | |
1092 | 1115 |
@@ -1236,6 +1259,7 @@ void w32g_initialize(void) | ||
1236 | 1259 | |
1237 | 1260 | SaveSettingPlayer(sp_current); |
1238 | 1261 | SaveSettingTiMidity(st_current); |
1262 | + InitSettingTiMidity(st_current); | |
1239 | 1263 | if(IniVersionCheck()) |
1240 | 1264 | { |
1241 | 1265 | LoadIniFile(sp_current, st_current); |
@@ -1453,6 +1477,7 @@ void w32g_initialize(void) | ||
1453 | 1477 | |
1454 | 1478 | SaveSettingPlayer(sp_current); |
1455 | 1479 | SaveSettingTiMidity(st_current); |
1480 | + InitSettingTiMidity(st_current); | |
1456 | 1481 | if(IniVersionCheck()) |
1457 | 1482 | { |
1458 | 1483 | LoadIniFile(sp_current, st_current); |
@@ -67,6 +67,7 @@ inialize_effect | ||
67 | 67 | #include "effect.h" |
68 | 68 | #include "mt19937ar.h" |
69 | 69 | #include "sndfontini.h" |
70 | +#include "fft4g.h" | |
70 | 71 | |
71 | 72 | #if defined(__W32__) |
72 | 73 | #include <windows.h> |
@@ -6529,7 +6530,7 @@ static void do_reverb_ex(DATA_T *buf, int32 count, InfoReverbEX *info) | ||
6529 | 6530 | |
6530 | 6531 | #define REV_EX2_LEVEL (1.0) // total |
6531 | 6532 | #define REV_EX2_ST_CROSS (0.3) |
6532 | -#define REV_EX2_REV_LEVEL (0.5 * (1.0 - REV_EX2_ST_CROSS)) | |
6533 | +#define REV_EX2_REV_LEVEL (0.25 * (1.0 - REV_EX2_ST_CROSS)) | |
6533 | 6534 | |
6534 | 6535 | double ext_reverb_ex2_level = 1.0; |
6535 | 6536 | int ext_reverb_ex2_rsmode = 3; |
@@ -6540,11 +6541,9 @@ int ext_reverb_ex2_fftmode = 0; | ||
6540 | 6541 | static void do_reverb_ex2_thread(int thread_num, void *info2); |
6541 | 6542 | #endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) |
6542 | 6543 | |
6543 | -#if defined(REV_EX2_FFT) | |
6544 | 6544 | static void init_reverb_ex2_fft(InfoReverbEX2 *info); |
6545 | 6545 | static void do_reverb_ex2_fft_thread(int thread_num, void *info2); |
6546 | 6546 | static void do_reverb_ex2_fft(DATA_T *buf, int32 count, InfoReverbEX2 *info); |
6547 | -#endif | |
6548 | 6547 | |
6549 | 6548 | |
6550 | 6549 | #define MYINI_LIBRARY_DEFIND_VAR |
@@ -6975,7 +6974,7 @@ static void do_reverb_ex2_resample_ov2(float *in0, float *in1, float *out0, floa | ||
6975 | 6974 | static void do_reverb_ex2_resample_ds2(float *in0, float *in1, float *out0, float *out1, int32 nframe) |
6976 | 6975 | { |
6977 | 6976 | int32 i, k; |
6978 | - | |
6977 | + | |
6979 | 6978 | for (i = 0, k = 0; i < nframe; i++, k += 2){ |
6980 | 6979 | out0[i] = (in0[k] + in0[k + 1]) * DIV_2; |
6981 | 6980 | out1[i] = (in1[k] + in1[k + 1]) * DIV_2; |
@@ -6986,16 +6985,90 @@ static void do_reverb_ex2_resample_ds4(float *in0, float *in1, float *out0, floa | ||
6986 | 6985 | { |
6987 | 6986 | int32 i, k; |
6988 | 6987 | |
6988 | +#if (USE_X86_EXT_INTRIN >= 2) // 4samples | |
6989 | + const __m128 divn = _mm_set1_ps(DIV_4); | |
6990 | + for (i = 0, k = 0; i < nframe; i += 4, k += 16){ | |
6991 | + __m128 sum1 = _mm_load_ps(&in0[k + 0]); // v0,v1,v2,v3 | |
6992 | + __m128 sum2 = _mm_load_ps(&in0[k + 4]); // v4,v5,v6,v7 | |
6993 | + __m128 sum3 = _mm_load_ps(&in0[k + 8]); // v8,v9,v10,v11 | |
6994 | + __m128 sum4 = _mm_load_ps(&in0[k + 12]); // v12,v13,v14,v15 | |
6995 | + __m128 sum5 = _mm_load_ps(&in1[k + 0]); // v0,v1,v2,v3 | |
6996 | + __m128 sum6 = _mm_load_ps(&in1[k + 4]); // v4,v5,v6,v7 | |
6997 | + __m128 sum7 = _mm_load_ps(&in1[k + 8]); // v8,v9,v10,v11 | |
6998 | + __m128 sum8 = _mm_load_ps(&in1[k + 12]); // v12,v13,v14,v15 | |
6999 | + //_MM_TRANSPOSE4_PS(sum1, sum2, sum3, sum4) | |
7000 | + __m128 tmp0 = _mm_shuffle_ps(sum1, sum2, 0x44); // v0,v1,v4,v5 | |
7001 | + __m128 tmp2 = _mm_shuffle_ps(sum1, sum2, 0xEE); // v2,v3,v6,v7 | |
7002 | + __m128 tmp1 = _mm_shuffle_ps(sum3, sum4, 0x44); // v8,v9,v12,v13 | |
7003 | + __m128 tmp3 = _mm_shuffle_ps(sum3, sum4, 0xEE); // v10,v11,v14,v5 | |
7004 | + sum1 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v0,v4,v8,v12 | |
7005 | + sum2 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v1,v5,v9,v13 | |
7006 | + sum3 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v2,v6,10,v15 | |
7007 | + sum4 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v3,v7,v11,v16 | |
7008 | + //_MM_TRANSPOSE4_PS(sum5, sum6, sum7, sum8) | |
7009 | + tmp0 = _mm_shuffle_ps(sum5, sum6, 0x44); // v16,.... | |
7010 | + tmp2 = _mm_shuffle_ps(sum5, sum6, 0xEE); // v18,.... | |
7011 | + tmp1 = _mm_shuffle_ps(sum7, sum8, 0x44); // v24,.... | |
7012 | + tmp3 = _mm_shuffle_ps(sum7, sum8, 0xEE); // v26,.... | |
7013 | + sum5 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v16,.... | |
7014 | + sum6 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v17,.... | |
7015 | + sum7 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v18,.... | |
7016 | + sum8 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v19,.... | |
7017 | + sum1 = _mm_add_ps(sum1, sum2); | |
7018 | + sum3 = _mm_add_ps(sum3, sum4); | |
7019 | + sum5 = _mm_add_ps(sum5, sum6); | |
7020 | + sum7 = _mm_add_ps(sum7, sum8); | |
7021 | + sum1 = _mm_add_ps(sum1, sum3); | |
7022 | + sum5 = _mm_add_ps(sum5, sum7); | |
7023 | + sum1 = _mm_mul_ps(sum1, divn); | |
7024 | + sum5 = _mm_mul_ps(sum5, divn); | |
7025 | + _mm_store_ps(&out0[i], sum1); | |
7026 | + _mm_store_ps(&out1[i], sum5); | |
7027 | + } | |
7028 | +#else | |
6989 | 7029 | for (i = 0, k = 0; i < nframe; i++, k += 4){ |
6990 | 7030 | out0[i] = (in0[k] + in0[k + 1] + in0[k + 2] + in0[k + 3]) * DIV_4; |
6991 | 7031 | out1[i] = (in1[k] + in1[k + 1] + in1[k + 1] + in1[k + 3]) * DIV_4; |
6992 | 7032 | } |
7033 | +#endif | |
6993 | 7034 | } |
6994 | 7035 | |
6995 | 7036 | static void do_reverb_ex2_resample_ds8(float *in0, float *in1, float *out0, float *out1, int32 nframe) |
6996 | 7037 | { |
6997 | 7038 | int32 i, k; |
6998 | - | |
7039 | + | |
7040 | +#if (USE_X86_EXT_INTRIN >= 2) // 2samples | |
7041 | + const __m128 divn = _mm_set1_ps(DIV_8); | |
7042 | + for (i = 0, k = 0; i < nframe; i += 2, k += 16){ | |
7043 | + __m128 vin1 = _mm_load_ps(&in0[k + 0]); // v0,v1,v2,v3 | |
7044 | + __m128 vin2 = _mm_load_ps(&in0[k + 4]); // v4,v5,v6,v7 | |
7045 | + __m128 vin3 = _mm_load_ps(&in0[k + 8]); // v8,v9,v10,v11 | |
7046 | + __m128 vin4 = _mm_load_ps(&in0[k + 12]); // v12,v13,v14,v15 | |
7047 | + __m128 vin5 = _mm_load_ps(&in1[k + 0]); // v0,v1,v2,v3 | |
7048 | + __m128 vin6 = _mm_load_ps(&in1[k + 4]); // v4,v5,v6,v7 | |
7049 | + __m128 vin7 = _mm_load_ps(&in1[k + 8]); // v8,v9,v10,v11 | |
7050 | + __m128 vin8 = _mm_load_ps(&in1[k + 12]); // v12,v13,v14,v15 | |
7051 | + __m128 sum1 = _mm_add_ps(vin1, vin2); // v0v4,v1v5,v2v6,v3v7 | |
7052 | + __m128 sum2 = _mm_add_ps(vin3, vin4); // v8v12,v9v13,v10v14,v11v15 | |
7053 | + __m128 sum3 = _mm_add_ps(vin5, vin6); // v0v4,v1v5,v2v6,v3v7 | |
7054 | + __m128 sum4 = _mm_add_ps(vin7, vin8); // v8v12,v9v13,v10v14,v11v15 | |
7055 | + //_MM_TRANSPOSE4_PS(sum1, sum2, sum3, sum4) | |
7056 | + __m128 tmp0 = _mm_shuffle_ps(sum1, sum2, 0x44); // v0,v1,v4,v5 | |
7057 | + __m128 tmp2 = _mm_shuffle_ps(sum1, sum2, 0xEE); // v2,v3,v6,v7 | |
7058 | + __m128 tmp1 = _mm_shuffle_ps(sum3, sum4, 0x44); // v8,v9,v12,v13 | |
7059 | + __m128 tmp3 = _mm_shuffle_ps(sum3, sum4, 0xEE); // v10,v11,v14,v15 | |
7060 | + sum1 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v0v4,v8v12,v0v4,v8v12 | |
7061 | + sum2 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v1v5,v9v13,v1v5,v9v13 | |
7062 | + sum3 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v2v6,v10v15,v2v6,v10v15 | |
7063 | + sum4 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v3v7,v11v16,v3v7,v11v16 | |
7064 | + sum1 = _mm_add_ps(sum1, sum2); | |
7065 | + sum3 = _mm_add_ps(sum3, sum4); | |
7066 | + sum1 = _mm_add_ps(sum1, sum3); | |
7067 | + sum1 = _mm_mul_ps(sum1, divn); | |
7068 | + _mm_storel_pi((__m64*)&out0[i], sum1); | |
7069 | + _mm_storeh_pi((__m64*)&out1[i], sum1); | |
7070 | + } | |
7071 | +#else | |
6999 | 7072 | for (i = 0, k = 0; i < nframe; i++, k += 8){ |
7000 | 7073 | out0[i] = ( |
7001 | 7074 | in0[k ] + in0[k + 1] + in0[k + 2] + in0[k + 3] + |
@@ -7004,6 +7077,7 @@ static void do_reverb_ex2_resample_ds8(float *in0, float *in1, float *out0, floa | ||
7004 | 7077 | in1[k ] + in1[k + 1] + in1[k + 1] + in1[k + 3] + |
7005 | 7078 | in1[k + 4] + in1[k + 5] + in1[k + 6] + in1[k + 7]) * DIV_8; |
7006 | 7079 | } |
7080 | +#endif | |
7007 | 7081 | } |
7008 | 7082 | |
7009 | 7083 | void free_reverb_ex2(InfoReverbEX2 *info) |
@@ -7021,8 +7095,6 @@ void free_reverb_ex2(InfoReverbEX2 *info) | ||
7021 | 7095 | if(info->tbuf[i] != NULL){ safe_free(info->tbuf[i]); info->tbuf[i] = NULL; } |
7022 | 7096 | #endif // USE_X86_EXT_INTRIN |
7023 | 7097 | } |
7024 | - | |
7025 | -#if defined(REV_EX2_FFT) | |
7026 | 7098 | for(i = 0; i < 2; i++){ |
7027 | 7099 | #if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) |
7028 | 7100 | if(info->rvs[i] != NULL){ aligned_free(info->rvs[i]); info->rvs[i] = NULL; } |
@@ -7034,6 +7106,8 @@ void free_reverb_ex2(InfoReverbEX2 *info) | ||
7034 | 7106 | if(info->fi[i] != NULL){ aligned_free(info->fi[i]); info->fi[i] = NULL; } |
7035 | 7107 | if(info->bd[i] != NULL){ aligned_free(info->bd[i]); info->bd[i] = NULL; } |
7036 | 7108 | if(info->ios[i] != NULL){ aligned_free(info->ios[i]); info->ios[i] = NULL; } |
7109 | + if(info->fftw[i] != NULL){ aligned_free(info->fftw[i]); info->fftw[i] = NULL; } | |
7110 | + if(info->ffti[i] != NULL){ aligned_free(info->ffti[i]); info->ffti[i] = NULL; } | |
7037 | 7111 | #else |
7038 | 7112 | if(info->rvs[i] != NULL){ safe_freeinfo->rvs[i]); info->rvs[i] = NULL; } |
7039 | 7113 | if(info->rs[i] != NULL){ safe_freeinfo->rs[i]); info->rs[i] = NULL; } |
@@ -7044,15 +7118,10 @@ void free_reverb_ex2(InfoReverbEX2 *info) | ||
7044 | 7118 | if(info->fi[i] != NULL){ safe_freeinfo->fi[i]); info->fi[i] = NULL; } |
7045 | 7119 | if(info->bd[i] != NULL){ safe_freeinfo->bd[i]); info->bd[i] = NULL; } |
7046 | 7120 | if(info->ios[i] != NULL){ safe_freeinfo->ios[i]); info->ios[i] = NULL; } |
7121 | + if(info->fftw[i] != NULL){ safe_freeinfo->fftw[i]); info->fftw[i] = NULL; } | |
7122 | + if(info->ffti[i] != NULL){ safe_freeinfo->ffti[i]); info->ffti[i] = NULL; } | |
7047 | 7123 | #endif // USE_X86_EXT_INTRIN |
7048 | 7124 | } |
7049 | -#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
7050 | - if(info->sint != NULL){ aligned_free(info->sint); info->sint = NULL; } | |
7051 | -#else | |
7052 | - if(info->sint != NULL){ safe_free(info->sint); info->sint = NULL; } | |
7053 | -#endif // USE_X86_EXT_INTRIN | |
7054 | -#endif // defined(REV_EX2_FFT) | |
7055 | - | |
7056 | 7125 | #if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) |
7057 | 7126 | reset_effect_sub_thread(do_reverb_ex2_thread, info); |
7058 | 7127 | info->thread = 0; |
@@ -7072,7 +7141,6 @@ static void init_reverb_ex2(InfoReverbEX2 *info) | ||
7072 | 7141 | int32 amp = 100; |
7073 | 7142 | TCHAR path[FILEPATH_MAX] = {0}; |
7074 | 7143 | |
7075 | -#if defined(REV_EX2_FFT) | |
7076 | 7144 | if(ext_reverb_ex2_fftmode){ |
7077 | 7145 | init_reverb_ex2_fft(info); |
7078 | 7146 | return; |
@@ -7080,7 +7148,7 @@ static void init_reverb_ex2(InfoReverbEX2 *info) | ||
7080 | 7148 | if(info->fftmode) |
7081 | 7149 | free_reverb_ex2(info); |
7082 | 7150 | info->fftmode = 0; |
7083 | -#endif | |
7151 | + | |
7084 | 7152 | if(info->init){ |
7085 | 7153 | if(info->pmr_p != play_mode->rate || info->rt_p != info->revtype) |
7086 | 7154 | free_reverb_ex2(info); |
@@ -7941,12 +8009,10 @@ static void do_reverb_ex2_thread(int thread_num, void *info2) | ||
7941 | 8009 | info = (InfoReverbEX2 *)info2; |
7942 | 8010 | if(!info->init) |
7943 | 8011 | return; |
7944 | -#if defined(REV_EX2_FFT) | |
7945 | 8012 | if(info->fftmode){ |
7946 | 8013 | do_reverb_ex2_fft_thread(thread_num, info2); |
7947 | 8014 | return; |
7948 | 8015 | } |
7949 | -#endif | |
7950 | 8016 | if(thread_num >= (info->thread + info->ithread)) |
7951 | 8017 | return; |
7952 | 8018 | if(info->ithread){ |
@@ -7977,12 +8043,10 @@ static void do_reverb_ex2(DATA_T *buf, int32 count, InfoReverbEX2 *info) | ||
7977 | 8043 | return; |
7978 | 8044 | else if(!info->init) |
7979 | 8045 | return; |
7980 | -#if defined(REV_EX2_FFT) | |
7981 | 8046 | if(info->fftmode){ |
7982 | 8047 | do_reverb_ex2_fft(buf, count, info); |
7983 | 8048 | return; |
7984 | 8049 | } |
7985 | -#endif | |
7986 | 8050 | info->ptr = buf; |
7987 | 8051 | info->count = count; |
7988 | 8052 | info->tcount = count >> (1 + info->rsmode); |
@@ -8001,161 +8065,90 @@ static void do_reverb_ex2(DATA_T *buf, int32 count, InfoReverbEX2 *info) | ||
8001 | 8065 | } |
8002 | 8066 | |
8003 | 8067 | |
8004 | -#if defined(REV_EX2_FFT) | |
8005 | -// freeverb3 irmodel2zl.cpp を参考に書いてみたが・・ | |
8068 | +// REV_EX2_FFT | |
8069 | +// freeverb3 irmodel2zl.cpp irmodel2.cpp を参考に | |
8006 | 8070 | |
8007 | - | |
8008 | -#define REV_EX2_FFT_LEVEL (1. * (1.0 - REV_EX2_ST_CROSS)) | |
8009 | -#define REV_EX2_FRAGBIT (12) // 10 ~ 14 | |
8071 | +#define REV_EX2_FFT_LEVEL (0.25 * (1.0 - REV_EX2_ST_CROSS)) | |
8072 | +#define REV_EX2_FRAGBIT (10) // 10 ~ 14 | |
8010 | 8073 | #define REV_EX2_FRAGSIZE (1 << REV_EX2_FRAGBIT) // 2^REV_EX2_FRAGBIT > synthbuffer size |
8011 | 8074 | #define REV_EX2_FFTSIZE (REV_EX2_FRAGSIZE << 1) |
8012 | 8075 | |
8013 | -static void do_reverb_ex2__fft(float *fft, float *st) | |
8076 | +static void do_reverb_ex2_rdft(float *fft, int d, int *ip, float *w) | |
8014 | 8077 | { |
8015 | - const int32 cosofs = REV_EX2_FRAGSIZE >> 2; | |
8016 | - const uint32 stmask = REV_EX2_FRAGSIZE - 1; | |
8017 | - float *ar = fft; | |
8018 | - float *ai = fft + REV_EX2_FRAGSIZE; | |
8019 | - int i = 0, j, k, m, mh, irev; | |
8020 | - float xr, xi; | |
8021 | - | |
8022 | - for (j = 1; j < (REV_EX2_FRAGSIZE - 1); j++){ | |
8023 | - for(k = (REV_EX2_FRAGSIZE >> 1); k > (i ^= k); k >>= 1){} | |
8024 | - if(j < i){ | |
8025 | - xr = *(ar + j); | |
8026 | - xi = *(ai + j); | |
8027 | - *(ar + j) = *(ar + i); | |
8028 | - *(ai + j) = *(ai + i); | |
8029 | - *(ar + i) = xr; | |
8030 | - *(ai + i) = xi; | |
8031 | - } | |
8032 | - } | |
8033 | - for(mh = 1; (m = mh << 1) <= REV_EX2_FRAGSIZE; mh = m){ | |
8034 | - irev = 0; | |
8035 | - for(i = 0; i < REV_EX2_FRAGSIZE; i += m){ | |
8036 | - float tsin = st[irev & stmask]; | |
8037 | - float tcos = st[(irev + cosofs) & stmask]; | |
8038 | - for(k = (REV_EX2_FRAGSIZE >> 2); k > (irev ^= k); k >>= 1){} | |
8039 | - for(j = i; j < mh + i; j++){ | |
8040 | - k = j + mh; | |
8041 | - xr = *(ar + j) - *(ar + k); | |
8042 | - xi = *(ai + j) - *(ai + k); | |
8043 | - *(ar + j) += *(ar + k); | |
8044 | - *(ai + j) += *(ai + k); | |
8045 | - *(ar + k) = tcos * xr - tsin * xi; | |
8046 | - *(ai + k) = tcos * xi + tsin * xr; | |
8047 | - } | |
8048 | - } | |
8049 | - } | |
8078 | +#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8079 | + rdft_simd(REV_EX2_FFTSIZE, d, fft, ip, w); | |
8080 | +#else | |
8081 | + rdft(REV_EX2_FFTSIZE, d, fft, ip, w); | |
8082 | +#endif | |
8050 | 8083 | } |
8051 | 8084 | |
8052 | -static void do_reverb_ex2_R2HC(float *iL, float *oL, float *st) | |
8085 | +static void do_reverb_ex2_R2HC(float *iL, float *oL, int *ip, float *w) | |
8053 | 8086 | { |
8054 | 8087 | const int32 fbyte = sizeof(float) * REV_EX2_FRAGSIZE; |
8055 | - int32 i = 0; | |
8088 | + const int32 ribyte = sizeof(float) * REV_EX2_FFTSIZE; | |
8089 | + int32 i, k; | |
8056 | 8090 | ALIGN float fo[REV_EX2_FFTSIZE] = {0}; |
8091 | + float *or = oL; | |
8092 | + float *oi = oL + REV_EX2_FRAGSIZE; | |
8057 | 8093 | |
8058 | 8094 | memcpy(fo, iL, fbyte); |
8059 | - do_reverb_ex2__fft(fo, st); | |
8060 | -#if 0 | |
8061 | - for(i = 0; i < REV_EX2_FRAGSIZE; i++){ | |
8062 | - oL[i ] = fo[i]; | |
8063 | - oL[REV_EX2_FFTSIZE - 1 - 1] = fo[REV_EX2_FFTSIZE - 1 - i]; | |
8064 | - } | |
8065 | -#elif 0 | |
8066 | - oL[0] = fo[0]; | |
8067 | - oL[1] = fo[REV_EX2_FRAGSIZE]; | |
8068 | - for(i = 1; i < REV_EX2_FRAGSIZE; i++){ | |
8069 | - oL[2 * i ] = fo[REV_EX2_FFTSIZE - i]; | |
8070 | - oL[2 * i + 1] = fo[REV_EX2_FFTSIZE - i]; | |
8095 | + do_reverb_ex2_rdft(fo, 1, ip, w); | |
8096 | +#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8097 | + for(i = 0, k = 0; i < REV_EX2_FRAGSIZE; i += 4, k += 8){ | |
8098 | + __m128 vin0 = _mm_load_ps(&fo[k]); // [0123] | |
8099 | + __m128 vin1 = _mm_load_ps(&fo[k + 4]); // [4567] | |
8100 | + vin0 = _mm_shuffle_ps(vin0, vin0, 0xD8); // [0213] | |
8101 | + vin1 = _mm_shuffle_ps(vin1, vin1, 0xD8); // [4657] | |
8102 | + _mm_store_ps(&or[i], _mm_shuffle_ps(vin0, vin1, 0x44)); // [0246] | |
8103 | + _mm_store_ps(&oi[i], _mm_shuffle_ps(vin0, vin1, 0xEE)); // [1357] | |
8071 | 8104 | } |
8072 | -#elif 0 | |
8105 | +#else | |
8073 | 8106 | for(i = 0; i < REV_EX2_FRAGSIZE; i++){ |
8074 | - oL[2 * i ] = fo[i]; | |
8075 | - oL[2 * i + 1] = fo[REV_EX2_FFTSIZE - 1 - i]; | |
8107 | + or[i] = fo[2 * i ]; | |
8108 | + oi[i] = fo[2 * i + 1]; | |
8076 | 8109 | } |
8077 | -#else | |
8078 | - memcpy(oL, fo, sizeof(float) * REV_EX2_FFTSIZE); | |
8079 | 8110 | #endif |
8080 | - | |
8081 | 8111 | } |
8082 | 8112 | |
8083 | -static void do_reverb_ex2_HC2R(float *iL, float *oL, float *st) | |
8113 | +static void do_reverb_ex2_HC2R(float *iL, float *oL, int *ip, float *w) | |
8084 | 8114 | { |
8085 | - int32 i = 0; | |
8115 | + const int32 ribyte = sizeof(float) * REV_EX2_FFTSIZE; | |
8116 | + int32 i, k; | |
8086 | 8117 | ALIGN float fo[REV_EX2_FFTSIZE] = {0}; |
8118 | + float *ir = iL; | |
8119 | + float *ii = iL + REV_EX2_FRAGSIZE; | |
8087 | 8120 | |
8088 | -#if 0 | |
8089 | - for(i = 0; i < REV_EX2_FRAGSIZE; i++){ | |
8090 | - fo[i] = iL[i ]; | |
8091 | - fo[REV_EX2_FFTSIZE - 1 - 1] = iL[REV_EX2_FFTSIZE - 1 - i]; | |
8092 | - } | |
8093 | - | |
8094 | -#elif 0 | |
8095 | - fo[0] = iL[0]; | |
8096 | - fo[REV_EX2_FRAGSIZE] = iL[1]; | |
8097 | - for(i = 1; i < REV_EX2_FRAGSIZE; i++){ | |
8098 | - fo[REV_EX2_FFTSIZE - i] = iL[2 * i ]; | |
8099 | - fo[REV_EX2_FFTSIZE - i] = iL[2 * i + 1]; | |
8121 | +#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8122 | + for(i = 0, k = 0; i < REV_EX2_FRAGSIZE; i += 4, k += 8){ | |
8123 | + __m128 vin0 = _mm_load_ps(&ir[i]); // [0246] | |
8124 | + __m128 vin1 = _mm_load_ps(&ii[i]); // [1357] | |
8125 | + __m128 vt0 = _mm_shuffle_ps(vin0, vin1, 0x44); // [0213] | |
8126 | + __m128 vt1 = _mm_shuffle_ps(vin0, vin1, 0xEE); // [4657] | |
8127 | + _mm_store_ps(&fo[k ], _mm_shuffle_ps(vt0, vt0, 0xD8)); // [0123] | |
8128 | + _mm_store_ps(&fo[k + 4], _mm_shuffle_ps(vt1, vt1, 0xD8)); // [4567] | |
8100 | 8129 | } |
8101 | -#elif 0 | |
8130 | +#else | |
8102 | 8131 | for(i = 0; i < REV_EX2_FRAGSIZE; i++){ |
8103 | - fo[i ] = iL[2 * i ]; | |
8104 | - fo[REV_EX2_FFTSIZE - 1 - i] = iL[2 * i + 1]; | |
8132 | + fo[2 * i ] = ir[i]; | |
8133 | + fo[2 * i + 1] = ii[i]; | |
8105 | 8134 | } |
8135 | +#endif | |
8136 | + do_reverb_ex2_rdft(fo, -1, ip, w); | |
8137 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8138 | + for(i = 0; i < REV_EX2_FFTSIZE; i += 8) | |
8139 | + MM256_LS_ADD_PS(&oL[i], _mm256_load_ps(&fo[i])); | |
8140 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8141 | + for(i = 0; i < REV_EX2_FFTSIZE; i += 4) | |
8142 | + MM_LS_ADD_PS(&oL[i], _mm_load_ps(&fo[i])); | |
8106 | 8143 | #else |
8107 | - memcpy(fo, iL, sizeof(float) * REV_EX2_FFTSIZE); | |
8144 | + for(i = 0; i < REV_EX2_FFTSIZE; i++) | |
8145 | + oL[i] += fo[i]; | |
8108 | 8146 | #endif |
8109 | - do_reverb_ex2__fft(fo, st); | |
8110 | - { | |
8111 | - float *pfor = fo; | |
8112 | - float *pfoi = fo + REV_EX2_FRAGSIZE; | |
8113 | - float *or = oL; | |
8114 | - float *oi = oL + REV_EX2_FRAGSIZE; | |
8115 | - const float divfr = 1.0;// / (double)REV_EX2_FRAGSIZE; | |
8116 | - const float divfi = -1.0;// / (double)REV_EX2_FRAGSIZE; | |
8117 | - for(i = 0; i < REV_EX2_FFTSIZE; i++){ | |
8118 | - // or[i] += pfor[i] * divfi; | |
8119 | - or[REV_EX2_FFTSIZE - 1 - i] += pfor[i] * divfr; | |
8120 | - // oi[i] += pfoi[i] * divfi; | |
8121 | - } | |
8122 | - } | |
8123 | 8147 | } |
8124 | 8148 | |
8125 | 8149 | static void do_reverb_ex2_mul(float *iL, float *fL, float *oL) |
8126 | 8150 | { |
8127 | 8151 | int32 i; |
8128 | -#if 0 | |
8129 | - float tL0 = oL[0] + iL[0] * fL[0]; | |
8130 | - float tL1 = oL[1] + iL[1] * fL[1]; | |
8131 | -#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8132 | - const __m128 vm1 = _mm_set_ps(1, -1, 1, -1); | |
8133 | - for(i = 0; i < REV_EX2_FRAGSIZE; i += 2){ | |
8134 | - __m128 vo = _mm_load_ps(&oL[2 * i]); | |
8135 | - __m128 vi = _mm_loadu_ps(&iL[2 * i]); | |
8136 | - __m128 vf0 = _mm_load_ps(&fL[2 * i]); | |
8137 | - __m128 vf1 = _mm_shuffle_ps(vf0, vf0, 0xB1); | |
8138 | - __m128 vi0 = _mm_shuffle_ps(vi, vi, 0xA0); | |
8139 | - __m128 vi1 = _mm_shuffle_ps(vi, vi, 0xF5); | |
8140 | - vf1 = _mm_mul_ps(vf1, vm1); | |
8141 | - vo = MM_FMA_PS(vi0, vf0, vo); | |
8142 | - vo = MM_FMA_PS(vi1, vf1, vo); | |
8143 | - _mm_store_ps(&oL[2 * i], vo); | |
8144 | - } | |
8145 | -#else | |
8146 | - for(i = 0; i < REV_EX2_FRAGSIZE; i++){ | |
8147 | - float i0 = iL[2 * i + 0]; | |
8148 | - float i1 = iL[2 * i + 1]; | |
8149 | - float f0 = fL[2 * i + 0]; | |
8150 | - float f1 = fL[2 * i + 1]; | |
8151 | - oL[2 * i + 0] += i0 * f0 - i1 * f1; | |
8152 | - oL[2 * i + 1] += i0 * f1 + i1 * f0; | |
8153 | - } | |
8154 | -#endif | |
8155 | - oL[0] = tL0; | |
8156 | - oL[1] = tL1; | |
8157 | - | |
8158 | -#else | |
8159 | 8152 | float *ir = iL; |
8160 | 8153 | float *ii = iL + REV_EX2_FRAGSIZE; |
8161 | 8154 | float *fr = fL; |
@@ -8165,7 +8158,24 @@ static void do_reverb_ex2_mul(float *iL, float *fL, float *oL) | ||
8165 | 8158 | float tor = or[0] + ir[0] * fr[0]; |
8166 | 8159 | float toi = oi[0] + ii[0] * fi[0]; |
8167 | 8160 | |
8168 | -#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8161 | +#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8162 | + const __m256 vm1 = _mm256_set1_ps(-1); | |
8163 | + for(i = 0; i < REV_EX2_FRAGSIZE; i += 8){ | |
8164 | + __m256 vir = _mm256_load_ps(&ir[i]); | |
8165 | + __m256 vii = _mm256_load_ps(&ii[i]); | |
8166 | + __m256 vfr = _mm256_load_ps(&fr[i]); | |
8167 | + __m256 vfi = _mm256_load_ps(&fi[i]); | |
8168 | + __m256 vor = _mm256_load_ps(&or[i]); | |
8169 | + __m256 voi = _mm256_load_ps(&oi[i]); | |
8170 | + __m256 vfm = _mm256_mul_ps(vfi, vm1); | |
8171 | + vor = MM256_FMA_PS(vir, vfr , vor); | |
8172 | + vor = MM256_FMA_PS(vii, vfm , vor); | |
8173 | + voi = MM256_FMA_PS(vir, vfi, voi); | |
8174 | + voi = MM256_FMA_PS(vii, vfr, voi); | |
8175 | + _mm256_store_ps(&or[i], vor); | |
8176 | + _mm256_store_ps(&oi[i], voi); | |
8177 | + } | |
8178 | +#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8169 | 8179 | const __m128 vm1 = _mm_set1_ps(-1); |
8170 | 8180 | for(i = 0; i < REV_EX2_FRAGSIZE; i += 4){ |
8171 | 8181 | __m128 vir = _mm_load_ps(&ir[i]); |
@@ -8194,8 +8204,6 @@ static void do_reverb_ex2_mul(float *iL, float *fL, float *oL) | ||
8194 | 8204 | #endif |
8195 | 8205 | or[0] = tor; |
8196 | 8206 | oi[0] = toi; |
8197 | - | |
8198 | -#endif | |
8199 | 8207 | } |
8200 | 8208 | |
8201 | 8209 | static float* do_reverb_ex2_delay(float *in, int32 prev, float *dbuf, int32 *bcount, int32 bnum) |
@@ -8312,7 +8320,7 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info) | ||
8312 | 8320 | #else |
8313 | 8321 | ndata[0] = (float *) safe_large_malloc(nbytes); |
8314 | 8322 | ndata[1] = (float *) safe_large_malloc(nbytes); |
8315 | -#endif | |
8323 | +#endif fnum | |
8316 | 8324 | if(!ndata[0] || !ndata[0]) |
8317 | 8325 | goto error; |
8318 | 8326 | memset(ndata[0], 0, nbytes); |
@@ -8357,7 +8365,7 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info) | ||
8357 | 8365 | if(info->irdata[i] != NULL){ safe_free(info->irdata[i]); info->irdata[i] = NULL; } |
8358 | 8366 | #endif |
8359 | 8367 | } |
8360 | - info->frame = nframe; | |
8368 | + info->frame = tframe; | |
8361 | 8369 | info->srate = rsrate; |
8362 | 8370 | info->irdata[0] = ndata[0]; |
8363 | 8371 | info->irdata[1] = ndata[1]; |
@@ -8396,7 +8404,7 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info) | ||
8396 | 8404 | // create buffers |
8397 | 8405 | fnum = info->frame / REV_EX2_FRAGSIZE; |
8398 | 8406 | bytes = sizeof(float) * (REV_EX2_FRAGSIZE + 8); |
8399 | - ibytes = sizeof(int32) * REV_EX2_FRAGSIZE; | |
8407 | + ibytes = sizeof(int) * (REV_EX2_FRAGSIZE + 8); | |
8400 | 8408 | for(i = 0; i < 2; i++){ |
8401 | 8409 | #if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) |
8402 | 8410 | info->rvs[i] = (float *) aligned_malloc(bytes * 2, ALIGN_SIZE); |
@@ -8408,6 +8416,8 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info) | ||
8408 | 8416 | info->fi[i] = (float *) aligned_malloc(bytes * 2 * fnum, ALIGN_SIZE); |
8409 | 8417 | info->bd[i] = (float *) aligned_malloc(bytes * 2 * fnum, ALIGN_SIZE); |
8410 | 8418 | info->ios[i] = (float *) aligned_malloc(bytes * 3, ALIGN_SIZE); |
8419 | + info->fftw[i] = (float *) aligned_malloc(bytes * 2, ALIGN_SIZE); | |
8420 | + info->ffti[i] = (int *) aligned_malloc(ibytes * 2, ALIGN_SIZE); | |
8411 | 8421 | #else |
8412 | 8422 | info->rvs[i] = (float *) safe_large_malloc(bytes * 2); |
8413 | 8423 | info->rs[i] = (float *) safe_large_malloc(bytes * 2); |
@@ -8418,9 +8428,12 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info) | ||
8418 | 8428 | info->ios[i] = (float *) safe_large_malloc(bytes * 3); |
8419 | 8429 | info->fi[i] = (float *) safe_large_malloc(bytes * 2 * fnum); |
8420 | 8430 | info->bd[i] = (float *) safe_large_malloc(bytes * 2 * fnum); |
8431 | + info->fftw[i] = (float *) safe_large_malloc(bytes * 2); | |
8432 | + info->ffti[i] = (int *) safe_large_malloc(ibytes * 2); | |
8421 | 8433 | #endif |
8422 | 8434 | if(!info->rvs[i] || !info->rs[i] || !info->is[i] || !info->ss[i] || !info->os[i] |
8423 | - || !info->fs[i] || !info->fi[i] || !info->bd[i] || !info->ios[i] | |
8435 | + || !info->fs[i] || !info->fi[i] || !info->bd[i] || !info->ios[i] | |
8436 | + || !info->fftw[i] || !info->ffti[i] | |
8424 | 8437 | ){ |
8425 | 8438 | goto error; |
8426 | 8439 | } |
@@ -8433,26 +8446,15 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info) | ||
8433 | 8446 | memset(info->fi[i], 0, bytes * 2 * fnum); |
8434 | 8447 | memset(info->bd[i], 0, bytes * 2 * fnum); |
8435 | 8448 | memset(info->ios[i], 0, bytes * 3); |
8436 | - } | |
8437 | - // sin table | |
8438 | -#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) | |
8439 | - info->sint = (float *) aligned_malloc(bytes, ALIGN_SIZE); | |
8440 | -#else | |
8441 | - info->sint = (float *) safe_large_malloc(bytes); | |
8442 | -#endif | |
8443 | - if(!info->sint) | |
8444 | - goto error; | |
8445 | - memset(info->sint, 0, bytes); | |
8446 | - for(i = 0; i < REV_EX2_FRAGSIZE; i++){ | |
8447 | - const double rad = M_PI * 2.0 / REV_EX2_FRAGSIZE; | |
8448 | - info->sint[i] = (float)((double)sin(rad * i)); | |
8449 | + memset(info->fftw[i], 0, bytes * 2); | |
8450 | + memset(info->ffti[i], 0, ibytes * 2); | |
8449 | 8451 | } |
8450 | 8452 | // impulse |
8451 | 8453 | for(i = 0; i < fnum; i++){ |
8452 | 8454 | int32 fofs = REV_EX2_FRAGSIZE * i; |
8453 | 8455 | int32 riofs = REV_EX2_FFTSIZE * i; |
8454 | - do_reverb_ex2_R2HC(info->irdata[0] + fofs, info->fi[0] + riofs, info->sint); | |
8455 | - do_reverb_ex2_R2HC(info->irdata[1] + fofs, info->fi[1] + riofs, info->sint); | |
8456 | + do_reverb_ex2_R2HC(info->irdata[0] + fofs, info->fi[0] + riofs, info->ffti[0], info->fftw[0]); | |
8457 | + do_reverb_ex2_R2HC(info->irdata[1] + fofs, info->fi[1] + riofs, info->ffti[1], info->fftw[1]); | |
8456 | 8458 | } |
8457 | 8459 | info->fnum = fnum; |
8458 | 8460 | // create input/output buffers |
@@ -8517,17 +8519,17 @@ static void do_reverb_ex2_fft_process1(int32 ofs, int32 count, int32 ch, InfoRev | ||
8517 | 8519 | memset(info->ss[ch], 0, ribyte); |
8518 | 8520 | memset(info->rvs[ch] + REV_EX2_FRAGSIZE - 1, 0, sizeof(float) * (REV_EX2_FRAGSIZE + 1)); |
8519 | 8521 | for(i = 1; i < info->fnum; i++){ |
8520 | - float *bd0 = do_reverb_ex2_delay(info->is[ch], i - 1, info->bd[ch], &info->bdcount[ch], info->fnum); | |
8521 | - do_reverb_ex2_mul(bd0, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]); | |
8522 | + float *bd = do_reverb_ex2_delay(info->is[ch], i - 1, info->bd[ch], &info->bdcount[ch], info->fnum); | |
8523 | + do_reverb_ex2_mul(bd, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]); | |
8522 | 8524 | } |
8523 | 8525 | } |
8524 | - memset(info->os[ch], 0, fbyte); | |
8526 | + memset(info->os[ch], 0, f2byte); | |
8525 | 8527 | memcpy(info->fs[ch] + info->scount[ch], input, cbyte); |
8526 | 8528 | memcpy(info->os[ch] + info->scount[ch], input, cbyte); |
8527 | - do_reverb_ex2_R2HC(info->os[ch], info->is[ch], info->sint); | |
8529 | + do_reverb_ex2_R2HC(info->os[ch], info->is[ch], info->ffti[ch], info->fftw[ch]); | |
8528 | 8530 | do_reverb_ex2_mul(info->is[ch], info->fi[ch], info->ss[ch]); |
8529 | - memset(info->rvs[ch], 0, fbyte); | |
8530 | - do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->sint); | |
8531 | + memset(info->rvs[ch], 0, f2byte); | |
8532 | + do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->ffti[ch], info->fftw[ch]); | |
8531 | 8533 | rvsc = info->rvs[ch] + info->scount[ch]; |
8532 | 8534 | rsc = info->rs[ch] + info->scount[ch]; |
8533 | 8535 | #if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE) |
@@ -8539,8 +8541,7 @@ static void do_reverb_ex2_fft_process1(int32 ofs, int32 count, int32 ch, InfoRev | ||
8539 | 8541 | #endif |
8540 | 8542 | info->scount[ch] += count; |
8541 | 8543 | if(info->scount[ch] == REV_EX2_FRAGSIZE){ |
8542 | - const int32 fbyte2 = sizeof(float) * (REV_EX2_FRAGSIZE - 1); | |
8543 | - do_reverb_ex2_R2HC(info->fs[ch], info->is[ch], info->sint); | |
8544 | + do_reverb_ex2_R2HC(info->fs[ch], info->is[ch], info->ffti[ch], info->fftw[ch]); | |
8544 | 8545 | memcpy(info->rs[ch], info->rvs[ch] + REV_EX2_FRAGSIZE, sizeof(float) * (REV_EX2_FRAGSIZE - 1)); |
8545 | 8546 | info->scount[ch] = 0; |
8546 | 8547 | } |
@@ -8556,24 +8557,26 @@ static void do_reverb_ex2_fft_process2(int32 count, int32 ch, InfoReverbEX2 *inf | ||
8556 | 8557 | const int32 f2byte = sizeof(float) * REV_EX2_FRAGSIZE * 2; |
8557 | 8558 | const int32 cbyte = sizeof(float) * count; |
8558 | 8559 | const int32 ribyte = sizeof(float) * REV_EX2_FFTSIZE; |
8559 | - | |
8560 | - memcpy(info->ios[ch] + info->scount[ch] + REV_EX2_FRAGSIZE, input, cbyte); | |
8560 | + float *iobuf = info->ios[ch] + REV_EX2_FRAGSIZE; | |
8561 | + | |
8562 | + memcpy(iobuf + info->scount[ch], input, cbyte); | |
8561 | 8563 | if((info->scount[ch] + count) >= REV_EX2_FRAGSIZE) { |
8562 | - do_reverb_ex2_R2HC(info->ios[ch] + REV_EX2_FRAGSIZE, info->is[ch], info->sint); | |
8564 | + do_reverb_ex2_R2HC(iobuf, info->is[ch], info->ffti[ch], info->fftw[ch]); | |
8563 | 8565 | memset(info->ss[ch], 0, ribyte); |
8564 | - for(i = 1; i < info->fnum; i++){ | |
8565 | - float *bd0 = do_reverb_ex2_delay(info->is[ch], i - 1, info->bd[ch], &info->bdcount[ch], info->fnum); | |
8566 | - do_reverb_ex2_mul(bd0, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]); | |
8566 | + for(i = 0; i < info->fnum; i++){ | |
8567 | + float *bd = do_reverb_ex2_delay(info->is[ch], i, info->bd[ch], &info->bdcount[ch], info->fnum); | |
8568 | + do_reverb_ex2_mul(bd, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]); | |
8567 | 8569 | } |
8568 | - do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->sint); | |
8569 | - memcpy(info->ios[ch] + REV_EX2_FRAGSIZE, info->rvs[ch], fbyte); | |
8570 | + do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->ffti[ch], info->fftw[ch]); | |
8571 | + memcpy(iobuf, info->rvs[ch], fbyte); | |
8570 | 8572 | memcpy(info->rvs[ch], info->rvs[ch] + REV_EX2_FRAGSIZE, fbyte); |
8571 | 8573 | memset(info->rvs[ch] + REV_EX2_FRAGSIZE - 1, 0, sizeof(float) * (REV_EX2_FRAGSIZE + 1)); |
8572 | 8574 | } |
8573 | 8575 | memcpy(output, info->ios[ch] + info->scount[ch], cbyte); |
8574 | 8576 | info->scount[ch] += count; |
8575 | 8577 | if(info->scount[ch] >= REV_EX2_FRAGSIZE) { |
8576 | - memcpy(info->ios[ch], info->ios[ch] + REV_EX2_FRAGSIZE, f2byte); | |
8578 | + memcpy(info->ios[ch], iobuf, f2byte); | |
8579 | + memset(iobuf + REV_EX2_FRAGSIZE, 0, fbyte); | |
8577 | 8580 | info->scount[ch] -= REV_EX2_FRAGSIZE; |
8578 | 8581 | } |
8579 | 8582 | return; |
@@ -8819,9 +8822,6 @@ static void do_reverb_ex2_fft(DATA_T *buf, int32 count, InfoReverbEX2 *info) | ||
8819 | 8822 | do_reverb_ex2_post_process(buf, count, info); |
8820 | 8823 | } |
8821 | 8824 | |
8822 | -#endif | |
8823 | - | |
8824 | - | |
8825 | 8825 | |
8826 | 8826 | |
8827 | 8827 | /* */ |
@@ -702,7 +702,6 @@ typedef struct _InfoReverbEX{ | ||
702 | 702 | } InfoReverbEX; |
703 | 703 | |
704 | 704 | |
705 | -//#define REV_EX2_FFT | |
706 | 705 | typedef struct _InfoReverbEX2{ |
707 | 706 | int8 mode; |
708 | 707 | int32 revtype; |
@@ -713,11 +712,10 @@ typedef struct _InfoReverbEX2{ | ||
713 | 712 | float *irdata[2], *buf[2], *tbuf[2]; // buf:delay(in)*2 , tbuf:out*2 |
714 | 713 | FLOAT_T rsfb[2]; |
715 | 714 | DATA_T *ptr; |
716 | -#if defined(REV_EX2_FFT) // fft | |
717 | 715 | int32 fnum, scount[2], bdcount[2]; |
718 | 716 | float *fs[2], *ss[2], *rvs[2], *rs[2], *is[2], *os[2], *fi[2], *bd[2], *ios[2]; |
719 | - float *sint; | |
720 | -#endif | |
717 | + float *fftw[2]; | |
718 | + int *ffti[2]; | |
721 | 719 | } InfoReverbEX2; |
722 | 720 | |
723 | 721 |
@@ -4823,7 +4823,7 @@ static inline void compute_op_null(Info_OP *info){} | ||
4823 | 4823 | |
4824 | 4824 | static inline void compute_op_wave_none(Info_OP *info) |
4825 | 4825 | { |
4826 | - FLOAT_T osc, lfo1, lfo2; | |
4826 | + FLOAT_T osc; | |
4827 | 4827 | |
4828 | 4828 | info->in = 0.0; // clear |
4829 | 4829 | info->rate += info->freq; // +1/sr = 1Hz |
@@ -4835,7 +4835,7 @@ static inline void compute_op_wave_none(Info_OP *info) | ||
4835 | 4835 | |
4836 | 4836 | static inline void compute_op_wave_fm(Info_OP *info) |
4837 | 4837 | { |
4838 | - FLOAT_T osc, rt; | |
4838 | + FLOAT_T osc; | |
4839 | 4839 | FLOAT_T in = info->in; |
4840 | 4840 | |
4841 | 4841 | info->in = 0.0; // clear |
@@ -4922,7 +4922,7 @@ static inline void compute_op_wave_lowbit(Info_OP *info) | ||
4922 | 4922 | |
4923 | 4923 | static inline void compute_op_scc_none(Info_OP *info) |
4924 | 4924 | { |
4925 | - FLOAT_T osc, lfo1, lfo2; | |
4925 | + FLOAT_T osc; | |
4926 | 4926 | |
4927 | 4927 | info->in = 0.0; // clear |
4928 | 4928 | info->rate += info->freq; // +1/sr = 1Hz |
@@ -4983,10 +4983,7 @@ static inline void compute_op_scc_ampm(Info_OP *info) | ||
4983 | 4983 | info->rate += info->freq; // +1/sr = 1Hz |
4984 | 4984 | RESET_OP_RATE |
4985 | 4985 | rt = info->rate + in * info->mod_level; // mod level; |
4986 | - if(rt >= 1.0) | |
4987 | - rt -= floor(rt); | |
4988 | - else if(rt < 0.0) | |
4989 | - rt += floor(rt); | |
4986 | + rt -= floor(rt); | |
4990 | 4987 | osc = info->scc_ptr(calc_op_width(info, rt), info->data_ptr); |
4991 | 4988 | osc *= (1.0 - ((FLOAT_T)in * DIV_2 + 0.5) * info->mod_level); |
4992 | 4989 | op_filter(&info->fc, &osc); |
@@ -5039,7 +5036,7 @@ static inline void compute_op_pcm_fm(Info_OP *info) | ||
5039 | 5036 | FLOAT_T in = info->in; // |
5040 | 5037 | |
5041 | 5038 | info->in = 0.0; // clear |
5042 | - info->rate += info->freq * (1.0 + (FLOAT_T)in * info->mod_level); // +1/sr*pcm_rate/root_freq = 1Hz | |
5039 | + info->pcm_rate = info->rate += info->freq * (1.0 + (FLOAT_T)in * info->mod_level); // +1/sr*pcm_rate/root_freq = 1Hz | |
5043 | 5040 | osc = compute_pcm_linear(info); |
5044 | 5041 | op_filter(&info->fc, &osc); |
5045 | 5042 | compute_op_output(info, osc * info->amp_vol); // include info->op_level |
@@ -306,8 +306,8 @@ | ||
306 | 306 | ext_reverb_ex_mod = MyIni_GetInt32(sec, "Ext_EX_Mod", 0); |
307 | 307 | // reverb ex2 |
308 | 308 | ext_reverb_ex2_level = MyIni_GetFloat32(sec, "Ext_SR_Level", 1.0); |
309 | - ext_reverb_ex2_rsmode = MyIni_GetInt32(sec, "Ext_SR_RS_Mode", 3); | |
310 | - ext_reverb_ex2_fftmode = MyIni_GetInt32(sec, "Ext_SR_FFT_Mode", 0); | |
309 | + ext_reverb_ex2_rsmode = MyIni_GetInt32(sec, "Ext_SR_RS_Mode", 0); | |
310 | + ext_reverb_ex2_fftmode = MyIni_GetInt32(sec, "Ext_SR_FFT_Mode", 1); | |
311 | 311 | // plate reverb |
312 | 312 | ext_plate_reverb_level = MyIni_GetFloat32(sec, "Ext_Plate_Level", 1.0); |
313 | 313 | ext_plate_reverb_level = MyIniParamRange(ext_plate_reverb_level, 0.001, 8.0); |
@@ -5083,6 +5083,7 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp) | ||
5083 | 5083 | int i; |
5084 | 5084 | int32 smf_at_time; |
5085 | 5085 | int note_seen = (! opt_preserve_silence); |
5086 | + int hascc111; | |
5086 | 5087 | |
5087 | 5088 | smf_at_time = readmidi_set_track(trackno, rewindp); |
5088 | 5089 |
@@ -5103,6 +5104,7 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp) | ||
5103 | 5104 | } |
5104 | 5105 | |
5105 | 5106 | lastchan = laststatus = 0; |
5107 | + hascc111 = 0; | |
5106 | 5108 | |
5107 | 5109 | for(;;) |
5108 | 5110 | { |
@@ -5279,6 +5281,8 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp) | ||
5279 | 5281 | break; |
5280 | 5282 | |
5281 | 5283 | case 0x2F: /* End of Track */ |
5284 | + if (hascc111 != 0) | |
5285 | + MIDIEVENT(smf_at_time, ME_NONE, 0, 0, 0); | |
5282 | 5286 | pos = tf_tell(tf); |
5283 | 5287 | if(pos < next_pos) |
5284 | 5288 | tf_seek(tf, next_pos - pos, SEEK_CUR); |
@@ -5408,6 +5412,12 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp) | ||
5408 | 5412 | case 3: /* Control change */ |
5409 | 5413 | b = tf_getc(tf); |
5410 | 5414 | readmidi_add_ctl_event(smf_at_time, lastchan, a, b); |
5415 | + if (a == 111) { | |
5416 | + if (hascc111 == 0) | |
5417 | + ctl->cmsg(CMSG_INFO, VERB_DEBUG, | |
5418 | + "Detection loop start event CC#111"); | |
5419 | + hascc111 = 1; | |
5420 | + } | |
5411 | 5421 | break; |
5412 | 5422 | |
5413 | 5423 | case 4: /* Program change */ |
@@ -2277,13 +2277,15 @@ MAIN_INTERFACE int read_config_file(const char *name, int self, int allow_missin | ||
2277 | 2277 | char *basedir = NULL, *sep = NULL; |
2278 | 2278 | char *onmemory = NULL; |
2279 | 2279 | |
2280 | + if(rcf_count == 0){ | |
2280 | 2281 | #ifdef VOICE_EFFECT |
2281 | - cfg_flg_vfx = 0; | |
2282 | + cfg_flg_vfx = 0; | |
2282 | 2283 | #endif |
2283 | 2284 | #ifdef INT_SYNTH |
2284 | - cfg_flg_int_synth_mms = 0; | |
2285 | - cfg_flg_int_synth_scc = 0; | |
2285 | + cfg_flg_int_synth_mms = 0; | |
2286 | + cfg_flg_int_synth_scc = 0; | |
2286 | 2287 | #endif |
2288 | + } | |
2287 | 2289 | |
2288 | 2290 | if (rcf_count > 50) |
2289 | 2291 | { |
@@ -2821,6 +2821,8 @@ void init_voice_effect(int v) | ||
2821 | 2821 | VoiceEffect *vfx = voice[v].vfx[i]; |
2822 | 2822 | int num = voice[v].sample->vfx[i][0]; // [0] = effect type |
2823 | 2823 | |
2824 | + if(!vfx) | |
2825 | + break; | |
2824 | 2826 | if(num <= VFX_NONE || num >= VFX_LIST_MAX) |
2825 | 2827 | break; |
2826 | 2828 | memcpy(vfx->param, voice[v].sample->vfx[i], sizeof(int) * VOICE_EFFECT_PARAM_NUM); |
@@ -285,6 +285,7 @@ Appendix : | ||
285 | 285 | w[] and ip[] are compatible with all routines. |
286 | 286 | */ |
287 | 287 | |
288 | +#include "optcode.h" | |
288 | 289 | |
289 | 290 | void cdft(int n, int isgn, float *a, int *ip, float *w) |
290 | 291 | { |
@@ -358,6 +359,52 @@ void rdft(int n, int isgn, float *a, int *ip, float *w) | ||
358 | 359 | } |
359 | 360 | |
360 | 361 | |
362 | +void rdft_simd(int n, int isgn, float *a, int *ip, float *w) | |
363 | +{ | |
364 | + void makewt(int nw, int *ip, float *w); | |
365 | + void makect(int nc, int *ip, float *c); | |
366 | + void bitrv2(int n, int *ip, float *a); | |
367 | + void cftfsub_simd(int n, float *a, float *w); | |
368 | + void cftbsub_simd(int n, float *a, float *w); | |
369 | + void rftfsub(int n, float *a, int nc, float *c); | |
370 | + void rftbsub(int n, float *a, int nc, float *c); | |
371 | + int nw, nc; | |
372 | + float xi; | |
373 | + | |
374 | + nw = ip[0]; | |
375 | + if (n > (nw << 2)) { | |
376 | + nw = n >> 2; | |
377 | + makewt(nw, ip, w); | |
378 | + } | |
379 | + nc = ip[1]; | |
380 | + if (n > (nc << 2)) { | |
381 | + nc = n >> 2; | |
382 | + makect(nc, ip, w + nw); | |
383 | + } | |
384 | + if (isgn >= 0) { | |
385 | + if (n > 4) { | |
386 | + bitrv2(n, ip + 2, a); | |
387 | + cftfsub_simd(n, a, w); | |
388 | + rftfsub(n, a, nc, w + nw); | |
389 | + } else if (n == 4) { | |
390 | + cftfsub(n, a, w); | |
391 | + } | |
392 | + xi = a[0] - a[1]; | |
393 | + a[0] += a[1]; | |
394 | + a[1] = xi; | |
395 | + } else { | |
396 | + a[1] = 0.5 * (a[0] - a[1]); | |
397 | + a[0] -= a[1]; | |
398 | + if (n > 4) { | |
399 | + rftbsub(n, a, nc, w + nw); | |
400 | + bitrv2(n, ip + 2, a); | |
401 | + cftbsub_simd(n, a, w); | |
402 | + } else if (n == 4) { | |
403 | + cftfsub(n, a, w); | |
404 | + } | |
405 | + } | |
406 | +} | |
407 | + | |
361 | 408 | void ddct(int n, int isgn, float *a, int *ip, float *w) |
362 | 409 | { |
363 | 410 | void makewt(int nw, int *ip, float *w); |
@@ -976,6 +1023,84 @@ void cftfsub(int n, float *a, float *w) | ||
976 | 1023 | } |
977 | 1024 | } |
978 | 1025 | |
1026 | +void cftfsub_simd(int n, float *a, float *w) | |
1027 | +{ | |
1028 | + void cft1st(int n, float *a, float *w); | |
1029 | + void cftmdl(int n, int l, float *a, float *w); | |
1030 | + int j, j1, j2, j3, l; | |
1031 | + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; | |
1032 | + | |
1033 | + l = 2; | |
1034 | + if (n > 8) { | |
1035 | + cft1st(n, a, w); | |
1036 | + l = 8; | |
1037 | + while ((l << 2) < n) { | |
1038 | + cftmdl(n, l, a, w); | |
1039 | + l <<= 2; | |
1040 | + } | |
1041 | + } | |
1042 | + if ((l << 2) == n) { | |
1043 | +#if (USE_X86_EXT_INTRIN >= 2) | |
1044 | + const __m128 vma1 = _mm_set_ps(1, -1, 1, -1); | |
1045 | + const __m128 vam1 = _mm_set_ps(-1, 1, -1, 1); | |
1046 | + for (j = 0; j < l; j += 4) { | |
1047 | + __m128 vj0, vj1, vj2, vj3, vx0, vx1, vx2, vx3; | |
1048 | + j1 = j + l; | |
1049 | + j2 = j1 + l; | |
1050 | + j3 = j2 + l; | |
1051 | + vj0 = _mm_load_ps(&a[j]); | |
1052 | + vj1 = _mm_load_ps(&a[j1]); | |
1053 | + vj2 = _mm_load_ps(&a[j2]); | |
1054 | + vj3 = _mm_load_ps(&a[j3]); | |
1055 | + vx0 = _mm_add_ps(vj0, vj1); | |
1056 | + vx1 = _mm_sub_ps(vj0, vj1); | |
1057 | + vx2 = _mm_add_ps(vj2, vj3); | |
1058 | + vx3 = _mm_sub_ps(vj2, vj3); | |
1059 | + vj0 = _mm_add_ps(vx0, vx2); | |
1060 | + vj2 = _mm_sub_ps(vx0, vx2); | |
1061 | + vx3 = _mm_shuffle_ps(vx3, vx3, 0xB1); | |
1062 | + vj1 = MM_FMA_PS(vx3, vma1, vx1); | |
1063 | + vj3 = MM_FMA_PS(vx3, vam1, vx1); | |
1064 | + _mm_store_ps(&a[j], vj0); | |
1065 | + _mm_store_ps(&a[j1], vj1); | |
1066 | + _mm_store_ps(&a[j2], vj2); | |
1067 | + _mm_store_ps(&a[j3], vj3); | |
1068 | + } | |
1069 | +#else | |
1070 | + for (j = 0; j < l; j += 2) { | |
1071 | + j1 = j + l; | |
1072 | + j2 = j1 + l; | |
1073 | + j3 = j2 + l; | |
1074 | + x0r = a[j] + a[j1]; | |
1075 | + x0i = a[j + 1] + a[j1 + 1]; | |
1076 | + x1r = a[j] - a[j1]; | |
1077 | + x1i = a[j + 1] - a[j1 + 1]; | |
1078 | + x2r = a[j2] + a[j3]; | |
1079 | + x2i = a[j2 + 1] + a[j3 + 1]; | |
1080 | + x3r = a[j2] - a[j3]; | |
1081 | + x3i = a[j2 + 1] - a[j3 + 1]; | |
1082 | + a[j] = x0r + x2r; | |
1083 | + a[j + 1] = x0i + x2i; | |
1084 | + a[j2] = x0r - x2r; | |
1085 | + a[j2 + 1] = x0i - x2i; | |
1086 | + a[j1] = x1r - x3i; | |
1087 | + a[j1 + 1] = x1i + x3r; | |
1088 | + a[j3] = x1r + x3i; | |
1089 | + a[j3 + 1] = x1i - x3r; | |
1090 | + } | |
1091 | +#endif | |
1092 | + } else { | |
1093 | + for (j = 0; j < l; j += 2) { | |
1094 | + j1 = j + l; | |
1095 | + x0r = a[j] - a[j1]; | |
1096 | + x0i = a[j + 1] - a[j1 + 1]; | |
1097 | + a[j] += a[j1]; | |
1098 | + a[j + 1] += a[j1 + 1]; | |
1099 | + a[j1] = x0r; | |
1100 | + a[j1 + 1] = x0i; | |
1101 | + } | |
1102 | + } | |
1103 | +} | |
979 | 1104 | |
980 | 1105 | void cftbsub(int n, float *a, float *w) |
981 | 1106 | { |
@@ -1028,6 +1153,85 @@ void cftbsub(int n, float *a, float *w) | ||
1028 | 1153 | } |
1029 | 1154 | } |
1030 | 1155 | |
1156 | +void cftbsub_simd(int n, float *a, float *w) | |
1157 | +{ | |
1158 | + void cft1st(int n, float *a, float *w); | |
1159 | + void cftmdl(int n, int l, float *a, float *w); | |
1160 | + int j, j1, j2, j3, l; | |
1161 | + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; | |
1162 | + | |
1163 | + l = 2; | |
1164 | + if (n > 8) { | |
1165 | + cft1st(n, a, w); | |
1166 | + l = 8; | |
1167 | + while ((l << 2) < n) { | |
1168 | + cftmdl(n, l, a, w); | |
1169 | + l <<= 2; | |
1170 | + } | |
1171 | + } | |
1172 | + if ((l << 2) == n) { | |
1173 | +#if (USE_X86_EXT_INTRIN >= 2) | |
1174 | + const __m128 vma1 = _mm_set_ps(1, -1, 1, -1); | |
1175 | + const __m128 vam1 = _mm_set_ps(-1, 1, -1, 1); | |
1176 | + for (j = 0; j < l; j += 4) { | |
1177 | + __m128 vj0, vj1, vj2, vj3, vx0, vx1, vx2, vx3; | |
1178 | + j1 = j + l; | |
1179 | + j2 = j1 + l; | |
1180 | + j3 = j2 + l; | |
1181 | + vj0 = _mm_load_ps(&a[j]); | |
1182 | + vj1 = _mm_load_ps(&a[j1]); | |
1183 | + vj2 = _mm_load_ps(&a[j2]); | |
1184 | + vj3 = _mm_load_ps(&a[j3]); | |
1185 | + vj0 = _mm_mul_ps(vj0, vam1); | |
1186 | + vx0 = MM_FMA_PS(vj1, vam1, vj0); | |
1187 | + vx1 = MM_FMA_PS(vj1, vma1, vj0); | |
1188 | + vx2 = _mm_add_ps(vj2, vj3); | |
1189 | + vx3 = _mm_sub_ps(vj2, vj3); | |
1190 | + vj0 = MM_FMA_PS(vx2, vam1, vx0); | |
1191 | + vj2 = MM_FMA_PS(vx2, vma1, vx0); | |
1192 | + vx3 = _mm_shuffle_ps(vx3, vx3, 0xB1); | |
1193 | + vj1 = _mm_sub_ps(vx1, vx3); | |
1194 | + vj3 = _mm_add_ps(vx1, vx3); | |
1195 | + _mm_store_ps(&a[j], vj0); | |
1196 | + _mm_store_ps(&a[j1], vj1); | |
1197 | + _mm_store_ps(&a[j2], vj2); | |
1198 | + _mm_store_ps(&a[j3], vj3); | |
1199 | + } | |
1200 | +#else | |
1201 | + for (j = 0; j < l; j += 2) { | |
1202 | + j1 = j + l; | |
1203 | + j2 = j1 + l; | |
1204 | + j3 = j2 + l; | |
1205 | + x0r = a[j] + a[j1]; | |
1206 | + x0i = -a[j + 1] - a[j1 + 1]; | |
1207 | + x1r = a[j] - a[j1]; | |
1208 | + x1i = -a[j + 1] + a[j1 + 1]; | |
1209 | + x2r = a[j2] + a[j3]; | |
1210 | + x2i = a[j2 + 1] + a[j3 + 1]; | |
1211 | + x3r = a[j2] - a[j3]; | |
1212 | + x3i = a[j2 + 1] - a[j3 + 1]; | |
1213 | + a[j] = x0r + x2r; | |
1214 | + a[j + 1] = x0i - x2i; | |
1215 | + a[j2] = x0r - x2r; | |
1216 | + a[j2 + 1] = x0i + x2i; | |
1217 | + a[j1] = x1r - x3i; | |
1218 | + a[j1 + 1] = x1i - x3r; | |
1219 | + a[j3] = x1r + x3i; | |
1220 | + a[j3 + 1] = x1i + x3r; | |
1221 | + } | |
1222 | +#endif | |
1223 | + } else { | |
1224 | + for (j = 0; j < l; j += 2) { | |
1225 | + j1 = j + l; | |
1226 | + x0r = a[j] - a[j1]; | |
1227 | + x0i = -a[j + 1] + a[j1 + 1]; | |
1228 | + a[j] += a[j1]; | |
1229 | + a[j + 1] = -a[j + 1] - a[j1 + 1]; | |
1230 | + a[j1] = x0r; | |
1231 | + a[j1 + 1] = x0i; | |
1232 | + } | |
1233 | + } | |
1234 | +} | |
1031 | 1235 | |
1032 | 1236 | void cft1st(int n, float *a, float *w) |
1033 | 1237 | { |
@@ -1285,7 +1489,6 @@ void rftfsub(int n, float *a, int nc, float *c) | ||
1285 | 1489 | } |
1286 | 1490 | } |
1287 | 1491 | |
1288 | - | |
1289 | 1492 | void rftbsub(int n, float *a, int nc, float *c) |
1290 | 1493 | { |
1291 | 1494 | int j, k, kk, ks, m; |
@@ -7,6 +7,7 @@ | ||
7 | 7 | */ |
8 | 8 | extern void cdft(int, int, float *, int *, float *); |
9 | 9 | extern void rdft(int, int, float *, int *, float *); |
10 | +extern void rdft_simd(int, int, float *, int *, float *); | |
10 | 11 | extern void ddct(int, int, float *, int *, float *); |
11 | 12 | extern void ddst(int, int, float *, int *, float *); |
12 | 13 | extern void dfct(int, float *, float *, int *, float *); |