libcm is a C development framework with an emphasis on audio signal processing applications.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cmFeatFile.h 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. /// \file cmFeatFile.h
  2. /// \brief Audio file acoustic feature analyzer and accompanying file reader.
  3. ///
  4. ///
  5. #ifndef cmFeatFile_h
  6. #define cmFeatFile_h
  7. #ifdef __cplusplus
  8. extern "C" {
  9. #endif
  10. /// Result codes for all functions in cmFeatFile.h
  11. enum
  12. {
  13. kOkFtRC = cmOkRC,
  14. kCfgParseFailFtRC,
  15. kFileSysFailFtRC,
  16. kJsonFailFtRC,
  17. kDspProcFailFtRC,
  18. kDirCreateFailFtRC,
  19. kFileNotFoundFtRC,
  20. kAudioFileOpenFailFtRC,
  21. kFrameFileFailFtRC,
  22. kChIdxInvalidFtRC,
  23. kParamRangeFtRC,
  24. kParamErrorFtRC,
  25. kFrameWriteFailFtRC,
  26. kEofFtRC,
  27. kPlviewFailFtRC,
  28. kSerialFailFtRC,
  29. kInvalidFeatIdFtRC,
  30. kFileFailFtRC,
  31. kInvalidFrmIdxFtRC
  32. };
  33. /// Feature Id's
  34. enum
  35. {
  36. kInvalidFtId, ///< 0
  37. kAmplFtId, ///< 1 Fourier transform amplitude
  38. kDbAmplFtId, ///< 2 Fourier transform decibel
  39. kPowFtId, ///< 3 Fourier transform power
  40. kDbPowFtId, ///< 4 Fourier transform power decibel
  41. kPhaseFtId, ///< 5 Fourier transform phase (not unwrapped)
  42. kBfccFtId, ///< 6 Bark Frequency Cepstral Coeffcients
  43. kMfccFtId, ///< 7 Mel Frequency Cepstral Coefficients
  44. kCepsFtId, ///< 8 Cepstral Coefficients
  45. kConstQFtId, ///< 9 Constant-Q transform
  46. kLogConstQFtId, ///< 10 Log Constant-Q transform
  47. kRmsFtId, ///< 11 Root means square of the audio signal
  48. kDbRmsFtId, ///< 12 RMS in decibels
  49. kD1AmplFtId, ///< 13 1st order difference over time of the Fourier transform amplitude
  50. kD1DbAmplFtId, ///< 14 1st order difference over time of the Fourier transform decibel
  51. kD1PowFtId, ///< 15 1st order difference over time of the Fourier transform power
  52. kD1DbPowFtId, ///< 16 1st order difference over time of the Fourier transform power decibel
  53. kD1PhaseFtId, ///< 17 1st order difference over time of the Fourier transform phase (not unwrapped)
  54. kD1BfccFtId, ///< 18 1st order difference over time of the Bark Frequency Cepstral Coeffcients
  55. kD1MfccFtId, ///< 19 1st order difference over time of the Mel Frequency Cepstral Coefficients
  56. kD1CepsFtId, ///< 20 1st order difference over time of the Cepstral Coefficients
  57. kD1ConstQFtId, ///< 21 1st order difference over time of the Constant-Q transform
  58. kD1LogConstQFtId, ///< 22 1st order difference over time of the Log Constant-Q transform
  59. kD1RmsFtId, ///< 23 1st order difference over time of the Root means square of the audio signal
  60. kD1DbRmsFtId, ///< 24 1st order difference over time of the RMS in decibels
  61. };
  62. /// User defined feature parameters
  63. typedef struct
  64. {
  65. unsigned id; ///< feature id
  66. unsigned cnt; ///< length of feature vector
  67. bool normFl; ///< normalize this feature
  68. bool enableFl; ///< true if this feature is enabled
  69. } cmFtAttr_t;
  70. /// Skip input audio range record
  71. typedef struct
  72. {
  73. unsigned smpIdx; ///< Index of first sample to skip
  74. unsigned smpCnt; ///< Count of successive samples to skip.
  75. } cmFtSkip_t;
  76. /// Analysis parameters
  77. typedef struct
  78. {
  79. const char* audioFn; ///< Audio file name.
  80. const char* featFn; ///< Feature file name.
  81. unsigned chIdx; ///< Audio file channel index
  82. cmReal_t wndMs; ///< Length of the analysis window in milliseconds.
  83. unsigned hopFact; ///< Analysis window overlap factor 1 = 1:1 2=2:1 ...
  84. bool normAudioFl; ///< Normalize the audio over the length of the audio file
  85. cmMidiByte_t constQMinPitch; ///< Used to determine the base const-q octave.
  86. cmMidiByte_t constQMaxPitch; ///< Used to determine the maximum const-q frequency of interest.
  87. unsigned constQBinsPerOctave; ///< Bands per const-q octave.
  88. unsigned onsetMedFiltWndSmpCnt; ///< Complex onset median filter
  89. cmReal_t onsetThreshold; ///< Complex onset threshold
  90. cmReal_t minDb; ///< Fourier Transform magnitude values below minDb are set to minDb.
  91. cmReal_t floorThreshDb; ///< Frames with an RMS below this value will be skipped
  92. cmFtSkip_t* skipArray; ///< skipArray[skipCnt] user defined sample skip ranges
  93. unsigned skipCnt; ///< Count of records in skipArray[].
  94. cmFtAttr_t* attrArray; ///< attrArray[attrCnt] user defined parameter array
  95. unsigned attrCnt; ///< Count of records in attrArray[].
  96. } cmFtParam_t;
  97. /// Feature summary information
  98. typedef struct
  99. {
  100. unsigned id; ///< feature id (same as associated cmFtAttr.id)
  101. unsigned cnt; ///< length of each feature vector (same as associated cmFtAttr.cnt)
  102. /// The raw feature summary values are calculated prior to normalization.
  103. cmReal_t* rawMinV; ///< Vector of min value over time for each feature element.
  104. cmReal_t* rawMaxV; ///< Vector of max value over time for each feature element.
  105. cmReal_t* rawAvgV; ///< Vector of avg value over time for each feature element.
  106. cmReal_t* rawSdvV; ///< Vector of standard deviation values over time for each feature element.
  107. cmReal_t rawMin; ///< Min value of all values for this feature. Equivalent to min(rawMinV).
  108. cmReal_t rawMax; ///< Max value of all values for this feature. Equivalent to max(rawMaxV).
  109. /// normalized feature summary values
  110. cmReal_t* normMinV; ///< Vector of min value over time for each feature element.
  111. cmReal_t* normMaxV; ///< Vector of max value over time for each feature element.
  112. cmReal_t* normAvgV; ///< Vector of avg value over time for each feature element.
  113. cmReal_t* normSdvV; ///< Vector of standard deviation values over time for each feature element.
  114. cmReal_t normMin; ///< Min value of all values for this feature. Equivalent to min(normMinV).
  115. cmReal_t normMax; ///< Max value of all values for this feature. Equivalent to max(rawMaxV).
  116. } cmFtSumm_t;
  117. /// Feature file info record
  118. typedef struct
  119. {
  120. unsigned frmCnt; ///< count of frames in the file
  121. cmReal_t srate; ///< audio sample rate
  122. unsigned smpCnt; ///< audio sample count
  123. unsigned fftSmpCnt; ///< FFT window length (always power of 2)
  124. unsigned hopSmpCnt; ///< audio sample hop count
  125. unsigned binCnt; ///< FFT bin count (always fftSmpCnt/2 + 1)
  126. unsigned skipFrmCnt; ///< count of frames skipped based on user skip array
  127. unsigned floorFrmCnt; ///< count of frames skipped because below floorThreshDb
  128. cmFtParam_t param; ///< analysis parameter record used to form this feature file
  129. cmFtSumm_t* summArray; ///< summArray[ param.attrCnt ] feature summary information
  130. } cmFtInfo_t;
  131. /// Data structure returned by cmFtReaderAdvance().
  132. typedef struct
  133. {
  134. unsigned smpIdx; ///< The audio signal sample index this frames information is based on.
  135. unsigned frmIdx; ///< The frame index relative to other frames in this feature file.
  136. } cmFtFrameDesc_t;
  137. typedef cmHandle_t cmFtH_t; ///< Analyzer handle
  138. typedef cmHandle_t cmFtFileH_t; ///< Feature file handle.
  139. typedef unsigned cmFtRC_t; ///< Result code type used by all functions in cmFeatFile.h.
  140. extern cmFtH_t cmFtNullHandle; ///< A NULL handle useful for indicating an uninitialized analyzer.
  141. extern cmFtFileH_t cmFtFileNullHandle; ///< A NULL handle useful for indicating an uninitialized feature file.
  142. /// Given a feature type id return the associated label.
  143. const char* cmFtFeatIdToLabel( unsigned featId );
  144. /// Given a feature type label return the associated id.
  145. unsigned cmFtFeatLabelToId( const char* label );
  146. /// \name Feature Analyzer Related functions
  147. ///@{
  148. /// Initialize the feature analyzer. The memory manager and file system must
  149. /// be initialized (cmMdInitialize(), cmFsInitialize()) prior to calling this function.
  150. cmFtRC_t cmFtInitialize( cmFtH_t* hp, cmCtx_t* ctx );
  151. /// Finalize a feature analyzer.
  152. cmFtRC_t cmFtFinalize( cmFtH_t* h );
  153. /// Return true if the handle represents an initialized feature analyzer.
  154. bool cmFtIsValid( cmFtH_t h );
  155. /// Parse a JSON file containing a set of analysis parameters.
  156. cmFtRC_t cmFtParse( cmFtH_t h, const char* cfgFn );
  157. /// Run the analyzer.
  158. cmFtRC_t cmFtAnalyze( cmFtH_t h );
  159. /// If cmFtAnalyze() is being run in a seperate thread this function
  160. /// can be used to access the analyzers progress.
  161. const char* cmFtAnalyzeProgress( cmFtH_t h, unsigned* passPtr, cmReal_t* percentPtr );
  162. ///@}
  163. /// \name Feature File Related Functions
  164. ///@{
  165. /// Open a feature file.
  166. /// Note that inforPtrPtr is optional and will be ignored if it is set to NULL.
  167. cmFtRC_t cmFtReaderOpen( cmFtH_t h, cmFtFileH_t* hp, const char* featFn, const cmFtInfo_t** infoPtrPtr );
  168. /// Close a feature file.
  169. cmFtRC_t cmFtReaderClose( cmFtFileH_t* hp );
  170. /// Return true if the handle reprents an open feature file.
  171. bool cmFtReaderIsValid( cmFtFileH_t h );
  172. /// Return the count of features types this file contains.
  173. unsigned cmFtReaderFeatCount( cmFtFileH_t h );
  174. /// Return the feature type id associated with the specified index.
  175. unsigned cmFtReaderFeatId( cmFtFileH_t h, unsigned index );
  176. /// Reset the current file location to the first frame but do not load it.
  177. /// The next call to cmFtReadAdvance() will load the next frame.
  178. cmFtRC_t cmFtReaderRewind( cmFtFileH_t h );
  179. /// Make frmIdx the current file location.
  180. cmFtRC_t cmFtReaderSeek( cmFtFileH_t h, unsigned frmIdx );
  181. /// Load the current frame, advance the current file position, and return
  182. /// a pointer to a cmFtFrameDesc_t record for the loaded frame.
  183. /// Returns kEofFtRC upon reaching end of file.
  184. /// The frameDescPtr is optional.
  185. cmFtRC_t cmFtReaderAdvance( cmFtFileH_t h, cmFtFrameDesc_t* frameDescPtr );
  186. /// Returns a pointer to a data matrix in the feature identified by featId in the current feature frame.
  187. cmReal_t* cmFtReaderData( cmFtFileH_t h, unsigned featId, unsigned* cntPtr );
  188. /// Copy the contents of a given set of frames into buf[frmCnt*elePerFrmCnt].
  189. cmFtRC_t cmFtReaderCopy( cmFtFileH_t h, unsigned featId, unsigned frmIdx, cmReal_t* buf, unsigned frmCnt, unsigned elePerFrmCnt, unsigned* outEleCntPtr );
  190. /// Data structure used to specify multiple features for use by cmFtReaderMultiSetup().
  191. typedef struct
  192. {
  193. unsigned featId; ///< Feature id of feature to include in the feature vector
  194. unsigned cnt; ///< Set to count of feat ele's for this feat. Error if greater than avail. Set to -1 to use all avail ele's.
  195. /// returned with actual count used
  196. unsigned id0; ///< Ignored on input. Used internally by cmFtReaderXXX()
  197. unsigned id1; ///< Ignored on input. Used internally by cmFtReaderXXX()
  198. } cmFtMulti_t;
  199. /// Setup an array of cmFtMulti_t records. The cmFtMulti_t array
  200. /// used by cmFtReaderMulitData() must be initialized by this function.
  201. cmFtRC_t cmFtReaderMultiSetup( cmFtFileH_t h, cmFtMulti_t* multiArray, unsigned multiCnt, unsigned* featVectEleCntPtr );
  202. /// Fill outV[outN] with a consecutive data from the features specified in the cmFtMulti_t array.
  203. /// Use cmFtReaderMultiSetup() to configure the cmFtMulti_t array prior to calling this function.
  204. cmFtRC_t cmFtReaderMultiData( cmFtFileH_t h, const cmFtMulti_t* multiArray, unsigned multiCnt, cmReal_t* outV, unsigned outN );
  205. /// Report summary information for the specified feature.
  206. cmFtRC_t cmFtReaderReport( cmFtFileH_t h, unsigned featId );
  207. /// Identical to cmFtReaderReport() except the feature file is identified from a file name rather than an open cmFtFileH_t.
  208. cmFtRC_t cmFtReaderReportFn( cmFtH_t h, const cmChar_t* fn, unsigned featId );
  209. /// Report feature data for the specified set of feature frames.
  210. cmFtRC_t cmFtReaderReportFeature( cmFtFileH_t h, unsigned featId, unsigned frmIdx, unsigned frmCnt );
  211. /// Write a feature into a binary file.
  212. /// Set 'frmCnt' to the cmInvalidCnt to include all frames past frmIdx.
  213. /// The first three unsigned values in the output file
  214. /// contain the row count, maximum column count, and the count of bytes in each data element (4=float,8=double).
  215. /// Each row of the file begins with the count of elements in the row and is followed by a data array.
  216. cmFtRC_t cmFtReaderToBinary( cmFtFileH_t h, unsigned featId, unsigned frmIdx, unsigned frmCnt, const cmChar_t* outFn );
  217. /// Identical to cmFtReaderToBinary() except it takes a feature file name instead of a file handle.
  218. cmFtRC_t cmFtReaderToBinaryFn( cmFtH_t h, const cmChar_t* fn, unsigned featId, unsigned frmIdx, unsigned frmCnt, const cmChar_t* outFn );
  219. ///@}
  220. #ifdef __cplusplus
  221. }
  222. #endif
  223. #endif