libcm is a C development framework with an emphasis on audio signal processing applications.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cmFeatFile.h 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. //| Copyright: (C) 2009-2020 Kevin Larke <contact AT larke DOT org>
  2. //| License: GNU GPL version 3.0 or above. See the accompanying LICENSE file.
  3. //( { file_desc:" Audio file acoustic feature analyzer and accompanying file reader." kw:[audio analysis file]}
  4. //
  5. //
  6. #ifndef cmFeatFile_h
  7. #define cmFeatFile_h
  8. #ifdef __cplusplus
  9. extern "C" {
  10. #endif
  11. // Result codes for all functions in cmFeatFile.h
  12. enum
  13. {
  14. kOkFtRC = cmOkRC,
  15. kCfgParseFailFtRC,
  16. kFileSysFailFtRC,
  17. kJsonFailFtRC,
  18. kDspProcFailFtRC,
  19. kDirCreateFailFtRC,
  20. kFileNotFoundFtRC,
  21. kAudioFileOpenFailFtRC,
  22. kFrameFileFailFtRC,
  23. kChIdxInvalidFtRC,
  24. kParamRangeFtRC,
  25. kParamErrorFtRC,
  26. kFrameWriteFailFtRC,
  27. kEofFtRC,
  28. kPlviewFailFtRC,
  29. kSerialFailFtRC,
  30. kInvalidFeatIdFtRC,
  31. kFileFailFtRC,
  32. kInvalidFrmIdxFtRC
  33. };
  34. // Feature Id's
  35. enum
  36. {
  37. kInvalidFtId, // 0
  38. kAmplFtId, // 1 Fourier transform amplitude
  39. kDbAmplFtId, // 2 Fourier transform decibel
  40. kPowFtId, // 3 Fourier transform power
  41. kDbPowFtId, // 4 Fourier transform power decibel
  42. kPhaseFtId, // 5 Fourier transform phase (not unwrapped)
  43. kBfccFtId, // 6 Bark Frequency Cepstral Coeffcients
  44. kMfccFtId, // 7 Mel Frequency Cepstral Coefficients
  45. kCepsFtId, // 8 Cepstral Coefficients
  46. kConstQFtId, // 9 Constant-Q transform
  47. kLogConstQFtId, // 10 Log Constant-Q transform
  48. kRmsFtId, // 11 Root means square of the audio signal
  49. kDbRmsFtId, // 12 RMS in decibels
  50. kD1AmplFtId, // 13 1st order difference over time of the Fourier transform amplitude
  51. kD1DbAmplFtId, // 14 1st order difference over time of the Fourier transform decibel
  52. kD1PowFtId, // 15 1st order difference over time of the Fourier transform power
  53. kD1DbPowFtId, // 16 1st order difference over time of the Fourier transform power decibel
  54. kD1PhaseFtId, // 17 1st order difference over time of the Fourier transform phase (not unwrapped)
  55. kD1BfccFtId, // 18 1st order difference over time of the Bark Frequency Cepstral Coeffcients
  56. kD1MfccFtId, // 19 1st order difference over time of the Mel Frequency Cepstral Coefficients
  57. kD1CepsFtId, // 20 1st order difference over time of the Cepstral Coefficients
  58. kD1ConstQFtId, // 21 1st order difference over time of the Constant-Q transform
  59. kD1LogConstQFtId, // 22 1st order difference over time of the Log Constant-Q transform
  60. kD1RmsFtId, // 23 1st order difference over time of the Root means square of the audio signal
  61. kD1DbRmsFtId, // 24 1st order difference over time of the RMS in decibels
  62. };
  63. // User defined feature parameters
  64. typedef struct
  65. {
  66. unsigned id; // feature id
  67. unsigned cnt; // length of feature vector
  68. bool normFl; // normalize this feature
  69. bool enableFl; // true if this feature is enabled
  70. } cmFtAttr_t;
  71. // Skip input audio range record
  72. typedef struct
  73. {
  74. unsigned smpIdx; // Index of first sample to skip
  75. unsigned smpCnt; // Count of successive samples to skip.
  76. } cmFtSkip_t;
  77. // Analysis parameters
  78. typedef struct
  79. {
  80. const char* audioFn; // Audio file name.
  81. const char* featFn; // Feature file name.
  82. unsigned chIdx; // Audio file channel index
  83. cmReal_t wndMs; // Length of the analysis window in milliseconds.
  84. unsigned hopFact; // Analysis window overlap factor 1 = 1:1 2=2:1 ...
  85. bool normAudioFl; // Normalize the audio over the length of the audio file
  86. cmMidiByte_t constQMinPitch; // Used to determine the base const-q octave.
  87. cmMidiByte_t constQMaxPitch; // Used to determine the maximum const-q frequency of interest.
  88. unsigned constQBinsPerOctave; // Bands per const-q octave.
  89. unsigned onsetMedFiltWndSmpCnt; // Complex onset median filter
  90. cmReal_t onsetThreshold; // Complex onset threshold
  91. cmReal_t minDb; // Fourier Transform magnitude values below minDb are set to minDb.
  92. cmReal_t floorThreshDb; // Frames with an RMS below this value will be skipped
  93. cmFtSkip_t* skipArray; // skipArray[skipCnt] user defined sample skip ranges
  94. unsigned skipCnt; // Count of records in skipArray[].
  95. cmFtAttr_t* attrArray; // attrArray[attrCnt] user defined parameter array
  96. unsigned attrCnt; // Count of records in attrArray[].
  97. } cmFtParam_t;
  98. // Feature summary information
  99. typedef struct
  100. {
  101. unsigned id; // feature id (same as associated cmFtAttr.id)
  102. unsigned cnt; // length of each feature vector (same as associated cmFtAttr.cnt)
  103. // The raw feature summary values are calculated prior to normalization.
  104. cmReal_t* rawMinV; // Vector of min value over time for each feature element.
  105. cmReal_t* rawMaxV; // Vector of max value over time for each feature element.
  106. cmReal_t* rawAvgV; // Vector of avg value over time for each feature element.
  107. cmReal_t* rawSdvV; // Vector of standard deviation values over time for each feature element.
  108. cmReal_t rawMin; // Min value of all values for this feature. Equivalent to min(rawMinV).
  109. cmReal_t rawMax; // Max value of all values for this feature. Equivalent to max(rawMaxV).
  110. // normalized feature summary values
  111. cmReal_t* normMinV; // Vector of min value over time for each feature element.
  112. cmReal_t* normMaxV; // Vector of max value over time for each feature element.
  113. cmReal_t* normAvgV; // Vector of avg value over time for each feature element.
  114. cmReal_t* normSdvV; // Vector of standard deviation values over time for each feature element.
  115. cmReal_t normMin; // Min value of all values for this feature. Equivalent to min(normMinV).
  116. cmReal_t normMax; // Max value of all values for this feature. Equivalent to max(rawMaxV).
  117. } cmFtSumm_t;
  118. // Feature file info record
  119. typedef struct
  120. {
  121. unsigned frmCnt; // count of frames in the file
  122. cmReal_t srate; // audio sample rate
  123. unsigned smpCnt; // audio sample count
  124. unsigned fftSmpCnt; // FFT window length (always power of 2)
  125. unsigned hopSmpCnt; // audio sample hop count
  126. unsigned binCnt; // FFT bin count (always fftSmpCnt/2 + 1)
  127. unsigned skipFrmCnt; // count of frames skipped based on user skip array
  128. unsigned floorFrmCnt; // count of frames skipped because below floorThreshDb
  129. cmFtParam_t param; // analysis parameter record used to form this feature file
  130. cmFtSumm_t* summArray; // summArray[ param.attrCnt ] feature summary information
  131. } cmFtInfo_t;
  132. // Data structure returned by cmFtReaderAdvance().
  133. typedef struct
  134. {
  135. unsigned smpIdx; // The audio signal sample index this frames information is based on.
  136. unsigned frmIdx; // The frame index relative to other frames in this feature file.
  137. } cmFtFrameDesc_t;
  138. typedef cmHandle_t cmFtH_t; // Analyzer handle
  139. typedef cmHandle_t cmFtFileH_t; // Feature file handle.
  140. typedef unsigned cmFtRC_t; // Result code type used by all functions in cmFeatFile.h.
  141. extern cmFtH_t cmFtNullHandle; // A NULL handle useful for indicating an uninitialized analyzer.
  142. extern cmFtFileH_t cmFtFileNullHandle; // A NULL handle useful for indicating an uninitialized feature file.
  143. // Given a feature type id return the associated label.
  144. const char* cmFtFeatIdToLabel( unsigned featId );
  145. // Given a feature type label return the associated id.
  146. unsigned cmFtFeatLabelToId( const char* label );
  147. // Feature Analyzer Related functions
  148. // Initialize the feature analyzer. The memory manager and file system must
  149. // be initialized (cmMdInitialize(), cmFsInitialize()) prior to calling this function.
  150. cmFtRC_t cmFtInitialize( cmFtH_t* hp, cmCtx_t* ctx );
  151. // Finalize a feature analyzer.
  152. cmFtRC_t cmFtFinalize( cmFtH_t* h );
  153. // Return true if the handle represents an initialized feature analyzer.
  154. bool cmFtIsValid( cmFtH_t h );
  155. // Parse a JSON file containing a set of analysis parameters.
  156. cmFtRC_t cmFtParse( cmFtH_t h, const char* cfgFn );
  157. // Run the analyzer.
  158. cmFtRC_t cmFtAnalyze( cmFtH_t h );
  159. // If cmFtAnalyze() is being run in a seperate thread this function
  160. // can be used to access the analyzers progress.
  161. const char* cmFtAnalyzeProgress( cmFtH_t h, unsigned* passPtr, cmReal_t* percentPtr );
  162. // Feature File Related Functions
  163. // Open a feature file.
  164. // Note that inforPtrPtr is optional and will be ignored if it is set to NULL.
  165. cmFtRC_t cmFtReaderOpen( cmFtH_t h, cmFtFileH_t* hp, const char* featFn, const cmFtInfo_t** infoPtrPtr );
  166. // Close a feature file.
  167. cmFtRC_t cmFtReaderClose( cmFtFileH_t* hp );
  168. // Return true if the handle reprents an open feature file.
  169. bool cmFtReaderIsValid( cmFtFileH_t h );
  170. // Return the count of features types this file contains.
  171. unsigned cmFtReaderFeatCount( cmFtFileH_t h );
  172. // Return the feature type id associated with the specified index.
  173. unsigned cmFtReaderFeatId( cmFtFileH_t h, unsigned index );
  174. // Reset the current file location to the first frame but do not load it.
  175. // The next call to cmFtReadAdvance() will load the next frame.
  176. cmFtRC_t cmFtReaderRewind( cmFtFileH_t h );
  177. // Make frmIdx the current file location.
  178. cmFtRC_t cmFtReaderSeek( cmFtFileH_t h, unsigned frmIdx );
  179. // Load the current frame, advance the current file position, and return
  180. // a pointer to a cmFtFrameDesc_t record for the loaded frame.
  181. // Returns kEofFtRC upon reaching end of file.
  182. // The frameDescPtr is optional.
  183. cmFtRC_t cmFtReaderAdvance( cmFtFileH_t h, cmFtFrameDesc_t* frameDescPtr );
  184. // Returns a pointer to a data matrix in the feature identified by featId in the current feature frame.
  185. cmReal_t* cmFtReaderData( cmFtFileH_t h, unsigned featId, unsigned* cntPtr );
  186. // Copy the contents of a given set of frames into buf[frmCnt*elePerFrmCnt].
  187. cmFtRC_t cmFtReaderCopy( cmFtFileH_t h, unsigned featId, unsigned frmIdx, cmReal_t* buf, unsigned frmCnt, unsigned elePerFrmCnt, unsigned* outEleCntPtr );
  188. // Data structure used to specify multiple features for use by cmFtReaderMultiSetup().
  189. typedef struct
  190. {
  191. unsigned featId; // Feature id of feature to include in the feature vector
  192. unsigned cnt; // Set to count of feat ele's for this feat. Error if greater than avail. Set to -1 to use all avail ele's.
  193. // returned with actual count used
  194. unsigned id0; // Ignored on input. Used internally by cmFtReaderXXX()
  195. unsigned id1; // Ignored on input. Used internally by cmFtReaderXXX()
  196. } cmFtMulti_t;
  197. // Setup an array of cmFtMulti_t records. The cmFtMulti_t array
  198. // used by cmFtReaderMulitData() must be initialized by this function.
  199. cmFtRC_t cmFtReaderMultiSetup( cmFtFileH_t h, cmFtMulti_t* multiArray, unsigned multiCnt, unsigned* featVectEleCntPtr );
  200. // Fill outV[outN] with a consecutive data from the features specified in the cmFtMulti_t array.
  201. // Use cmFtReaderMultiSetup() to configure the cmFtMulti_t array prior to calling this function.
  202. cmFtRC_t cmFtReaderMultiData( cmFtFileH_t h, const cmFtMulti_t* multiArray, unsigned multiCnt, cmReal_t* outV, unsigned outN );
  203. // Report summary information for the specified feature.
  204. cmFtRC_t cmFtReaderReport( cmFtFileH_t h, unsigned featId );
  205. // Identical to cmFtReaderReport() except the feature file is identified from a file name rather than an open cmFtFileH_t.
  206. cmFtRC_t cmFtReaderReportFn( cmFtH_t h, const cmChar_t* fn, unsigned featId );
  207. // Report feature data for the specified set of feature frames.
  208. cmFtRC_t cmFtReaderReportFeature( cmFtFileH_t h, unsigned featId, unsigned frmIdx, unsigned frmCnt );
  209. // Write a feature into a binary file.
  210. // Set 'frmCnt' to the cmInvalidCnt to include all frames past frmIdx.
  211. // The first three unsigned values in the output file
  212. // contain the row count, maximum column count, and the count of bytes in each data element (4=float,8=double).
  213. // Each row of the file begins with the count of elements in the row and is followed by a data array.
  214. cmFtRC_t cmFtReaderToBinary( cmFtFileH_t h, unsigned featId, unsigned frmIdx, unsigned frmCnt, const cmChar_t* outFn );
  215. // Identical to cmFtReaderToBinary() except it takes a feature file name instead of a file handle.
  216. cmFtRC_t cmFtReaderToBinaryFn( cmFtH_t h, const cmChar_t* fn, unsigned featId, unsigned frmIdx, unsigned frmCnt, const cmChar_t* outFn );
  217. //)
  218. #ifdef __cplusplus
  219. }
  220. #endif
  221. #endif