Wed Oct 28 11:45:36 2009

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * \note This work was motivated by Jeremy McNamara 
00025  * hacked to be configurable by anthm and bkw 9/28/2004
00026  *
00027  * \ingroup codecs
00028  *
00029  * \extref The Speex library - http://www.speex.org
00030  *
00031  */
00032 
00033 /*** MODULEINFO
00034    <depend>speex</depend>
00035    <depend>speex_preprocess</depend>
00036    <use>speexdsp</use>
00037  ***/
00038 
00039 #include "asterisk.h"
00040 
00041 ASTERISK_FILE_VERSION(__FILE__, "$Revision: 211551 $")
00042 
00043 #include <speex/speex.h>
00044 
00045 /* We require a post 1.1.8 version of Speex to enable preprocessing
00046    and better type handling */   
00047 #ifdef _SPEEX_TYPES_H
00048 #include <speex/speex_preprocess.h>
00049 #endif
00050 
00051 #include "asterisk/translate.h"
00052 #include "asterisk/module.h"
00053 #include "asterisk/config.h"
00054 #include "asterisk/utils.h"
00055 
00056 /* Sample frame data */
00057 #include "slin_speex_ex.h"
00058 #include "speex_slin_ex.h"
00059 
00060 /* codec variables */
00061 static int quality = 3;
00062 static int complexity = 2;
00063 static int enhancement = 0;
00064 static int vad = 0;
00065 static int vbr = 0;
00066 static float vbr_quality = 4;
00067 static int abr = 0;
00068 static int dtx = 0;  /* set to 1 to enable silence detection */
00069 
00070 static int preproc = 0;
00071 static int pp_vad = 0;
00072 static int pp_agc = 0;
00073 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00074 static int pp_denoise = 0;
00075 static int pp_dereverb = 0;
00076 static float pp_dereverb_decay = 0.4;
00077 static float pp_dereverb_level = 0.3;
00078 
00079 #define TYPE_SILENCE  0x2
00080 #define TYPE_HIGH  0x0
00081 #define TYPE_LOW   0x1
00082 #define TYPE_MASK  0x3
00083 
00084 #define  BUFFER_SAMPLES 8000
00085 #define  SPEEX_SAMPLES  160
00086 
00087 struct speex_coder_pvt {
00088    void *speex;
00089    SpeexBits bits;
00090    int framesize;
00091    int silent_state;
00092 #ifdef _SPEEX_TYPES_H
00093    SpeexPreprocessState *pp;
00094    spx_int16_t buf[BUFFER_SAMPLES];
00095 #else
00096    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00097 #endif
00098 };
00099 
00100 
00101 static int lintospeex_new(struct ast_trans_pvt *pvt)
00102 {
00103    struct speex_coder_pvt *tmp = pvt->pvt;
00104 
00105    if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
00106       return -1;
00107 
00108    speex_bits_init(&tmp->bits);
00109    speex_bits_reset(&tmp->bits);
00110    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00111    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00112 #ifdef _SPEEX_TYPES_H
00113    if (preproc) {
00114       tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
00115       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00116       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00117       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00118       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00119       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00120       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00121       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00122    }
00123 #endif
00124    if (!abr && !vbr) {
00125       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00126       if (vad)
00127          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00128    }
00129    if (vbr) {
00130       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00131       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00132    }
00133    if (abr)
00134       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00135    if (dtx)
00136       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00137    tmp->silent_state = 0;
00138 
00139    return 0;
00140 }
00141 
00142 static int speextolin_new(struct ast_trans_pvt *pvt)
00143 {
00144    struct speex_coder_pvt *tmp = pvt->pvt;
00145    
00146    if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
00147       return -1;
00148 
00149    speex_bits_init(&tmp->bits);
00150    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00151    if (enhancement)
00152       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00153 
00154    return 0;
00155 }
00156 
00157 static struct ast_frame *lintospeex_sample(void)
00158 {
00159    static struct ast_frame f;
00160    f.frametype = AST_FRAME_VOICE;
00161    f.subclass = AST_FORMAT_SLINEAR;
00162    f.datalen = sizeof(slin_speex_ex);
00163    /* Assume 8000 Hz */
00164    f.samples = sizeof(slin_speex_ex)/2;
00165    f.mallocd = 0;
00166    f.offset = 0;
00167    f.src = __PRETTY_FUNCTION__;
00168    f.data = slin_speex_ex;
00169    return &f;
00170 }
00171 
00172 static struct ast_frame *speextolin_sample(void)
00173 {
00174    static struct ast_frame f;
00175    f.frametype = AST_FRAME_VOICE;
00176    f.subclass = AST_FORMAT_SPEEX;
00177    f.datalen = sizeof(speex_slin_ex);
00178    /* All frames are 20 ms long */
00179    f.samples = SPEEX_SAMPLES;
00180    f.mallocd = 0;
00181    f.offset = 0;
00182    f.src = __PRETTY_FUNCTION__;
00183    f.data = speex_slin_ex;
00184    return &f;
00185 }
00186 
00187 /*! \brief convert and store into outbuf */
00188 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00189 {
00190    struct speex_coder_pvt *tmp = pvt->pvt;
00191 
00192    /* Assuming there's space left, decode into the current buffer at
00193       the tail location.  Read in as many frames as there are */
00194    int x;
00195    int res;
00196    int16_t *dst = (int16_t *)pvt->outbuf;
00197    /* XXX fout is a temporary buffer, may have different types */
00198 #ifdef _SPEEX_TYPES_H
00199    spx_int16_t fout[1024];
00200 #else
00201    float fout[1024];
00202 #endif
00203 
00204    if (f->datalen == 0) {  /* Native PLC interpolation */
00205       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00206          ast_log(LOG_WARNING, "Out of buffer space\n");
00207          return -1;
00208       }
00209 #ifdef _SPEEX_TYPES_H
00210       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00211 #else
00212       speex_decode(tmp->speex, NULL, fout);
00213       for (x=0;x<tmp->framesize;x++) {
00214          dst[pvt->samples + x] = (int16_t)fout[x];
00215       }
00216 #endif
00217       pvt->samples += tmp->framesize;
00218       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00219       return 0;
00220    }
00221 
00222    /* Read in bits */
00223    speex_bits_read_from(&tmp->bits, f->data, f->datalen);
00224    for (;;) {
00225 #ifdef _SPEEX_TYPES_H
00226       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00227 #else
00228       res = speex_decode(tmp->speex, &tmp->bits, fout);
00229 #endif
00230       if (res < 0)
00231          break;
00232       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00233          ast_log(LOG_WARNING, "Out of buffer space\n");
00234          return -1;
00235       }
00236       for (x = 0 ; x < tmp->framesize; x++)
00237          dst[pvt->samples + x] = (int16_t)fout[x];
00238       pvt->samples += tmp->framesize;
00239       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00240    }
00241    return 0;
00242 }
00243 
00244 /*! \brief store input frame in work buffer */
00245 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00246 {
00247    struct speex_coder_pvt *tmp = pvt->pvt;
00248 
00249    /* XXX We should look at how old the rest of our stream is, and if it
00250       is too old, then we should overwrite it entirely, otherwise we can
00251       get artifacts of earlier talk that do not belong */
00252    memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
00253    pvt->samples += f->samples;
00254    return 0;
00255 }
00256 
00257 /*! \brief convert work buffer and produce output frame */
00258 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00259 {
00260    struct speex_coder_pvt *tmp = pvt->pvt;
00261    int is_speech=1;
00262    int datalen = 0;  /* output bytes */
00263    int samples = 0;  /* output samples */
00264 
00265    /* We can't work on anything less than a frame in size */
00266    if (pvt->samples < tmp->framesize)
00267       return NULL;
00268    speex_bits_reset(&tmp->bits);
00269    while (pvt->samples >= tmp->framesize) {
00270 #ifdef _SPEEX_TYPES_H
00271       /* Preprocess audio */
00272       if (preproc)
00273          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00274       /* Encode a frame of data */
00275       if (is_speech) {
00276          /* If DTX enabled speex_encode returns 0 during silence */
00277          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00278       } else {
00279          /* 5 zeros interpreted by Speex as silence (submode 0) */
00280          speex_bits_pack(&tmp->bits, 0, 5);
00281       }
00282 #else
00283       {
00284          float fbuf[1024];
00285          int x;
00286          /* Convert to floating point */
00287          for (x = 0; x < tmp->framesize; x++)
00288             fbuf[x] = tmp->buf[samples + x];
00289          /* Encode a frame of data */
00290          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00291       }
00292 #endif
00293       samples += tmp->framesize;
00294       pvt->samples -= tmp->framesize;
00295    }
00296 
00297    /* Move the data at the end of the buffer to the front */
00298    if (pvt->samples)
00299       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00300 
00301    /* Use AST_FRAME_CNG to signify the start of any silence period */
00302    if (is_speech) {
00303       tmp->silent_state = 0;
00304    } else {
00305       if (tmp->silent_state) {
00306          return NULL;
00307       } else {
00308          tmp->silent_state = 1;
00309          speex_bits_reset(&tmp->bits);
00310          memset(&pvt->f, 0, sizeof(pvt->f));
00311          pvt->f.frametype = AST_FRAME_CNG;
00312          pvt->f.samples = samples;
00313          /* XXX what now ? format etc... */
00314       }
00315    }
00316 
00317    /* Terminate bit stream */
00318    speex_bits_pack(&tmp->bits, 15, 5);
00319    datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
00320    return ast_trans_frameout(pvt, datalen, samples);
00321 }
00322 
00323 static void speextolin_destroy(struct ast_trans_pvt *arg)
00324 {
00325    struct speex_coder_pvt *pvt = arg->pvt;
00326 
00327    speex_decoder_destroy(pvt->speex);
00328    speex_bits_destroy(&pvt->bits);
00329 }
00330 
00331 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00332 {
00333    struct speex_coder_pvt *pvt = arg->pvt;
00334 #ifdef _SPEEX_TYPES_H
00335    if (preproc)
00336       speex_preprocess_state_destroy(pvt->pp);
00337 #endif
00338    speex_encoder_destroy(pvt->speex);
00339    speex_bits_destroy(&pvt->bits);
00340 }
00341 
00342 static struct ast_translator speextolin = {
00343    .name = "speextolin", 
00344    .srcfmt = AST_FORMAT_SPEEX,
00345    .dstfmt =  AST_FORMAT_SLINEAR,
00346    .newpvt = speextolin_new,
00347    .framein = speextolin_framein,
00348    .destroy = speextolin_destroy,
00349    .sample = speextolin_sample,
00350    .desc_size = sizeof(struct speex_coder_pvt),
00351    .buffer_samples = BUFFER_SAMPLES,
00352    .buf_size = BUFFER_SAMPLES * 2,
00353    .native_plc = 1,
00354 };
00355 
00356 static struct ast_translator lintospeex = {
00357    .name = "lintospeex", 
00358    .srcfmt = AST_FORMAT_SLINEAR,
00359    .dstfmt = AST_FORMAT_SPEEX,
00360    .newpvt = lintospeex_new,
00361    .framein = lintospeex_framein,
00362    .frameout = lintospeex_frameout,
00363    .destroy = lintospeex_destroy,
00364    .sample = lintospeex_sample,
00365    .desc_size = sizeof(struct speex_coder_pvt),
00366    .buffer_samples = BUFFER_SAMPLES,
00367    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00368 };
00369 
00370 static int parse_config(int reload) 
00371 {
00372    struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 };
00373    struct ast_config *cfg = ast_config_load("codecs.conf", config_flags);
00374    struct ast_variable *var;
00375    int res;
00376    float res_f;
00377 
00378    if (cfg == NULL)
00379       return 0;
00380    if (cfg == CONFIG_STATUS_FILEUNCHANGED)
00381       return 0;
00382 
00383    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00384       if (!strcasecmp(var->name, "quality")) {
00385          res = abs(atoi(var->value));
00386          if (res > -1 && res < 11) {
00387             ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res);
00388             quality = res;
00389          } else 
00390             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00391       } else if (!strcasecmp(var->name, "complexity")) {
00392          res = abs(atoi(var->value));
00393          if (res > -1 && res < 11) {
00394             ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res);
00395             complexity = res;
00396          } else 
00397             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00398       } else if (!strcasecmp(var->name, "vbr_quality")) {
00399          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00400             ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00401             vbr_quality = res_f;
00402          } else
00403             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00404       } else if (!strcasecmp(var->name, "abr_quality")) {
00405          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00406       } else if (!strcasecmp(var->name, "enhancement")) {
00407          enhancement = ast_true(var->value) ? 1 : 0;
00408          ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00409       } else if (!strcasecmp(var->name, "vbr")) {
00410          vbr = ast_true(var->value) ? 1 : 0;
00411          ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00412       } else if (!strcasecmp(var->name, "abr")) {
00413          res = abs(atoi(var->value));
00414          if (res >= 0) {
00415                if (res > 0)
00416                ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00417                else
00418                ast_verb(3, "CODEC SPEEX: Disabling ABR\n");
00419             abr = res;
00420          } else 
00421             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00422       } else if (!strcasecmp(var->name, "vad")) {
00423          vad = ast_true(var->value) ? 1 : 0;
00424          ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00425       } else if (!strcasecmp(var->name, "dtx")) {
00426          dtx = ast_true(var->value) ? 1 : 0;
00427          ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00428       } else if (!strcasecmp(var->name, "preprocess")) {
00429          preproc = ast_true(var->value) ? 1 : 0;
00430          ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00431       } else if (!strcasecmp(var->name, "pp_vad")) {
00432          pp_vad = ast_true(var->value) ? 1 : 0;
00433          ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00434       } else if (!strcasecmp(var->name, "pp_agc")) {
00435          pp_agc = ast_true(var->value) ? 1 : 0;
00436          ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00437       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00438          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00439             ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00440             pp_agc_level = res_f;
00441          } else
00442             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00443       } else if (!strcasecmp(var->name, "pp_denoise")) {
00444          pp_denoise = ast_true(var->value) ? 1 : 0;
00445          ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00446       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00447          pp_dereverb = ast_true(var->value) ? 1 : 0;
00448          ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00449       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00450          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00451             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00452             pp_dereverb_decay = res_f;
00453          } else
00454             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00455       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00456          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00457             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00458             pp_dereverb_level = res_f;
00459          } else
00460             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00461       }
00462    }
00463    ast_config_destroy(cfg);
00464    return 0;
00465 }
00466 
00467 static int reload(void) 
00468 {
00469    if (parse_config(1))
00470       return AST_MODULE_LOAD_DECLINE;
00471    return AST_MODULE_LOAD_SUCCESS;
00472 }
00473 
00474 static int unload_module(void)
00475 {
00476    int res;
00477 
00478    res = ast_unregister_translator(&lintospeex);
00479    res |= ast_unregister_translator(&speextolin);
00480 
00481    return res;
00482 }
00483 
00484 static int load_module(void)
00485 {
00486    int res;
00487 
00488    if (parse_config(0))
00489       return AST_MODULE_LOAD_DECLINE;
00490    res=ast_register_translator(&speextolin);
00491    if (!res) 
00492       res=ast_register_translator(&lintospeex);
00493    else
00494       ast_unregister_translator(&speextolin);
00495    if (res)
00496       return AST_MODULE_LOAD_FAILURE;
00497    return AST_MODULE_LOAD_SUCCESS;
00498 }
00499 
00500 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00501       .load = load_module,
00502       .unload = unload_module,
00503       .reload = reload,
00504           );

Generated on Wed Oct 28 11:45:36 2009 for Asterisk - the Open Source PBX by  doxygen 1.5.6