Mass++ mzML IO Plugin v2.7.5
 All Classes Namespaces Files Functions Variables Enumerations
MzmlManager.cpp
Go to the documentation of this file.
1 
12 #include "stdafx.h"
13 #include "MzmlManager.h"
14 
15 #include "Accession.h"
16 #include "AccessionManager.h"
17 
18 #include <float.h>
19 
20 #include <vector>
21 #include <set>
22 #include <algorithm>
23 #include <boost/filesystem/path.hpp>
24 #include <boost/filesystem/operations.hpp>
25 
26 
27 #ifdef _MSC_VER
28  #include <Winsock2.h>
29 #else
30  #include <unistd.h>
31 #endif
32 
33 
34 using namespace kome::io::mzml;
35 
36 
37 #include <crtdbg.h>
38 #ifdef _DEBUG
39  #define new new( _NORMAL_BLOCK, __FILE__, __LINE__ )
40  #define malloc( s ) _malloc_dbg( s, _NORMAL_BLOCK, __FILE__, __LINE__ )
41 #endif // _DEBUG
42 
43 
44 
45 #define SOFTWARE_ACC "MS:1000531"
46 #define MACHINE_ACC "MS:1000031"
47 
48 #define DATA_STATUS "data_status"
49 
50 // #define MZML_USE_REFERENCEABLE_PARAM_GROUP
51 
52 
53 // constructor
54 MzmlManager::MzmlManager() : m_reverseFlg( isbigendian() ) {
55  // initialize
56  init();
57 }
58 
59 // destructor
61 }
62 
63 // initialize variables
65  m_specIdxMap.clear();
66  m_chromIdxMap.clear();
67 }
68 
69 // export data
70 bool MzmlManager::exportDataSet(
71  const char* path,
72  kome::objects::DataSet& dataSet,
73  kome::core::Progress& progress,
74  kome::objects::SettingParameterValues* saveSettings // @date 2012/08/21 <Add> FUJITA
75 ) {
76  // initialize
77  init();
78 
79  // check spectra
80  if( dataSet.getNumberOfSpectra() == 0 && dataSet.getNumberOfChromatograms() == 0 ) {
81  LOG_ERROR_CODE( FMT( "There is no data to be exported. Open a data file first." ), ERR_OTHER );
82  return false;
83  }
84 
85  // open the file
86  FILE* fp = fileopen( path, "wb" );
87  if( fp == NULL ) {
88  LOG_ERROR_CODE( FMT( "Failed to open the file for writing. Check file / folder permissions. [%s]", path ), ERR_FILE_OPEN_FAILED );
89  return false;
90  }
91 
92  // export
93  exportDataSet( fp, dataSet, progress, saveSettings );
94 
95  // close
96  fflush( fp );
97  fclose( fp );
98 
99  return true;
100 }
101 
102 // export data
103 void MzmlManager::exportDataSet(
104  FILE* fp,
105  kome::objects::DataSet& dataSet,
106  kome::core::Progress& progress,
107  kome::objects::SettingParameterValues* saveSettings
108 ) {
109  // data array
110  std::vector< kome::objects::Spectrum* > spectra;
111  std::vector< kome::objects::Chromatogram* > chroms;
112  std::vector< kome::objects::Sample* > samples;
113  std::set< kome::objects::Sample* > sampleSet;
114 
115  for( unsigned int i = 0; i < dataSet.getNumberOfSpectra(); i++ ) {
116  kome::objects::Spectrum* spec = dataSet.getSpectrum( i );
117  spectra.push_back( spec );
118 
119  kome::objects::Sample* smpl = spec->getSample();
120  if( smpl != NULL && sampleSet.find( smpl ) == sampleSet.end() ) {
121  sampleSet.insert( smpl );
122  samples.push_back( smpl );
123  }
124  }
125 
126  for( unsigned int i = 0; i < dataSet.getNumberOfChromatograms(); i++ ) {
127  kome::objects::Chromatogram* chrom = dataSet.getChromatogram( i );
128  if( !chrom->isAutoCreated() ) {
129  chroms.push_back( chrom );
130  }
131 
132  kome::objects::Sample* smpl = chrom->getSample();
133  if( smpl != NULL && sampleSet.find( smpl ) == sampleSet.end() ) {
134  sampleSet.insert( smpl );
135  samples.push_back( smpl );
136  }
137  }
138 
139  // add sort @date 2011.10.31 <Add> M.Izumi
140  std::sort( spectra.begin(), spectra.end(), lessSpec );
141 
142  // progress
143  const bool specFlg = ( dataSet.getNumberOfSpectra() > 0 );
144  const bool chromFlg = ( dataSet.getNumberOfChromatograms() > 0 );
145 
146  unsigned int subPrgsNum = 1;
147  if( specFlg ) {
148  subPrgsNum++;
149  }
150  if( chromFlg ) {
151  subPrgsNum++;
152  }
153 
154  progress.createSubProgresses( subPrgsNum );
155 
156  kome::core::Progress* mainPrgs = progress.getSubProgress( 0 );
157  kome::core::Progress* specPrgs = ( specFlg ? progress.getSubProgress( 1 ) : NULL );
158  kome::core::Progress* chromPrgs = ( chromFlg ? progress.getSubProgress( subPrgsNum - 1 ) : NULL );
159 
160  mainPrgs->setRange( 0, 7 );
161 
162  // header
163  mainPrgs->setPosition( 0 );
164  progress.setStatus( "Writing Header..." );
165  writeHeader( fp );
166  mainPrgs->setPosition( 1 );
167 
168  // file description
169  writeFileDescription( fp, samples );
170  mainPrgs->setPosition( 2 );
171 
172  // referenceable param group list
173  writeReferenceableParamGroup( fp, ( spectra.size() > 0 ), ( chroms.size() > 0 ) );
174  mainPrgs->setPosition( 3 );
175 
176  // software listsssss
177  writeSoftwareList( fp, samples );
178  mainPrgs->setPosition( 4 );
179 
180  // instrument list
181  writeInstrumentList( fp, samples );
182  mainPrgs->setPosition( 5 );
183 
184  // processing list
185  writeProcessingList( fp );
186  mainPrgs->setPosition( 6 );
187 
188  // run tag
189  writeRun( fp, samples, spectra, chroms, specPrgs, chromPrgs, saveSettings );
190  if( progress.isStopped() ) {
191  return;
192  }
193 
194  if( specPrgs != NULL ) {
195  specPrgs->fill();
196  }
197  if( chromPrgs != NULL ) {
198  chromPrgs->fill();
199  }
200 
201  // index list
202  writeIndexList( fp, spectra, chroms );
203 
204  mainPrgs->setPosition( 7 );
205  mainPrgs->fill();
206  progress.fill();
207 }
208 
209 // write header
210 void MzmlManager::writeHeader( FILE* fp ) {
211  fprintf( fp, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" );
212  fprintf( fp, "<indexedmzML xmlns=\"http://psi.hupo.org/ms/mzml\"\n" );
213  fprintf( fp, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" );
214  fprintf( fp, " xsi:schemaLocation=\"http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd\">\n" );
215  fprintf( fp, " <mzML xmlns=\"http://psi.hupo.org/ms/mzml\"\n" );
216  fprintf( fp, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" );
217  fprintf( fp, " xsi:schemaLocation=\"http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd\"\n" );
218  fprintf( fp, " version=\"1.1.0\">\n" );
219  fprintf( fp, " <cvList count=\"2\">\n" );
220  fprintf( fp, " <cv id=\"MS\"\n" );
221  fprintf( fp, " fullName=\"Proteomics Standards Initiative Mass Spectrometry Ontology\"\n" );
222  fprintf( fp, " version=\"1.3.1\"\n" );
223  fprintf( fp, " URI=\"http://psidev.info/ms/mzML/psi-ms.obo\" />\n" );
224  fprintf( fp, " <cv id=\"UO\"\n" );
225  fprintf( fp, " fullName=\"Unit Ontology\"\n" );
226  fprintf( fp, " version=\"1.15\"\n" );
227  fprintf( fp, " URI=\"http://obo.cvs.sourceforge.net/obo/obo/ontology/phenotype/unit.obo\" />\n" );
228  fprintf( fp, " </cvList>\n" );
229 }
230 
231 // write file description
232 void MzmlManager::writeFileDescription( FILE* fp, std::vector< kome::objects::Sample* >& samples ) {
233  // start tag
234  fprintf( fp, " <fileDescription>\n" );
235 
236  writeFileContent( fp, samples );
237  writeSourceFileList( fp, samples );
238 
239  // end tag
240  fprintf( fp, " </fileDescription>\n" );
241 
242 }
243 
244 // write file content
245 void MzmlManager::writeFileContent( FILE* fp, std::vector< kome::objects::Sample* >& samples ) {
246  // start tag
247  fprintf( fp, " <fileContent>\n" );
248 
249  // file content
250  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000580\" name=\"MSn spectrum\" value=\"\"/>\n" );
251 
252  // end tag
253  fprintf( fp, " </fileContent>\n" );
254 }
255 
256 // write source file list
257 void MzmlManager::writeSourceFileList( FILE* fp, std::vector< kome::objects::Sample* >& samples ) {
258  // manager
260 
261  // start tag
262  fprintf( fp, " <sourceFileList>\n" );
263 
264  // each samples
265  std::set< kome::objects::SampleSet* > sampleSets;
266 
267  for( unsigned int i = 0; i < samples.size(); i++ ) {
268  kome::objects::Sample* s = samples[ i ];
269  kome::objects::SampleSet* ss = s->getSampleSet();
270 
271  if( sampleSets.find( ss ) == sampleSets.end() ) {
272  sampleSets.insert( ss );
273 
274  // file name & directory
275  std::string fileName = ss->getFileName();
276  std::string fileDir = getdir( ss->getFilePath() );
277  fileDir = replacestring( fileDir.c_str(), "\\", "/" );
278 
279  fprintf(
280  fp,
281  " <sourceFile id=\"SF%d\" name=\"%s\" location=\"file://%s\" />\n",
282  ( i + 1 ),
283  fileName.c_str(),
284  fileDir.c_str()
285  );
286  }
287  }
288 
289  // end tag
290  fprintf( fp, " </sourceFileList>\n" );
291 }
292 
293 // write referenceable parameters group
294 void MzmlManager::writeReferenceableParamGroup( FILE* fp, const bool specFlg, const bool chromFlg ) {
295 
296 #ifdef MZML_USE_REFERENCEABLE_PARAM_GROUP
297 
298  // count
299  int cnt = 0;
300  if( specFlg && chromFlg ) {
301  cnt = 3;
302  }
303  else if( specFlg || chromFlg ) {
304  cnt = 2;
305  }
306  if( cnt == 0 ) {
307  return;
308  }
309 
310  // start tag
311  fprintf( fp, " <referenceableParamGroupList count=\"%d\">\n", cnt );
312 
313  // m/z
314  if( specFlg ) {
315  fprintf( fp, " <referenceableParamGroup id=\"mz_params\">\n" );
316  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000514\" name=\"m/z array\" value=\"\"\n" );
317  fprintf( fp, " unitCvRef=\"MS\" unitAccession=\"MS:1000040\" unitName=\"m/z\"/>\n" );
318  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000521\" name=\"32-bit float\" value=\"\"/>\n" );
319  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000576\" name=\"no compression\" value=\"\"/>\n" );
320  fprintf( fp, " </referenceableParamGroup>\n" );
321  }
322 
323  // RT
324  if( chromFlg ) {
325  fprintf( fp, " <referenceableParamGroup id=\"rt_params\">\n" );
326  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000595\" name=\"time array\" value=\"\"\n" );
327  fprintf( fp, " unitCvRef=\"UO\" unitAccession=\"UO:0000031\" unitName=\"minute\"/>\n" );
328  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000521\" name=\"32-bit float\" value=\"\"/>\n" );
329  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000576\" name=\"no compression\" value=\"\"/>\n" );
330  fprintf( fp, " </referenceableParamGroup>\n" );
331  }
332 
333  // intensity
334  fprintf( fp, " <referenceableParamGroup id=\"int_params\">\n" );
335  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000515\" name=\"intensity array\" value=\"\"/>\n" );
336  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000521\" name=\"32-bit float\" value=\"\"/>\n" );
337  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000576\" name=\"no compression\ value=\"\"/>\n" );
338  fprintf( fp, " </referenceableParamGroup>\n" );
339 
340  // end tag
341  fprintf( fp, " </referenceableParamGroupList>\n" );
342 
343 #endif // MZML_USE_REFERENCEABLE_PARAM_GROUP
344 }
345 
346 // write software list
347 void MzmlManager::writeSoftwareList( FILE* fp, std::vector< kome::objects::Sample* >& samples ) {
348  // managers
349  kome::core::MsppManager& msppMgr = kome::core::MsppManager::getInstance();
351 
352  // softwares
353  std::vector< std::pair< std::string, std::string > > softwares;
354  std::set< std::string > softwareSet;
355 
356  std::string software = "Mass_plus_plus";
357  std::string version = msppMgr.getVersion();
358 
359  softwareSet.insert( software );
360  softwares.push_back( std::make_pair( software, version ) );
361 
362  for( unsigned int i = 0; i < samples.size(); i++ ) {
363  kome::objects::Sample* smpl = samples[ i ];
364 
365  software = smpl->getSoftwareName();
366  version = smpl->getSoftwareVersion();
367 
368  if( !software.empty() && softwareSet.find( software ) == softwareSet.end() ) {
369  softwareSet.insert( software );
370  softwares.push_back( std::make_pair( software, version ) );
371  }
372  }
373 
374  // start tag
375  fprintf( fp, " <softwareList count=\"%d\">\n", softwares.size() );
376 
377  // software accession
378  Accession* parentAcc = accMgr.getAccession( SOFTWARE_ACC );
379  const double minScore = 3.0;
380 
381  // each softwares
382  for( unsigned int i = 0; i < softwares.size(); i++ ) {
383  // name, version
384  software = softwares[ i ].first;
385  version = softwares[ i ].second;
386 
387  // accession
388  Accession* softAcc = NULL;
389  double score = minScore - 0.01;
390 
391  for( unsigned int j = 0; j < accMgr.getNumberOfAccessions(); j++ ) {
392  Accession* tmp = accMgr.getAccession( j );
393  if( tmp->getOrigin() == parentAcc ) {
394  double tmpScore = getSimilarityScore( tmp->getName(), software.c_str() );
395  if( tmpScore > score ) {
396  softAcc = tmp;
397  score = tmpScore;
398  }
399  }
400  }
401 
402  // tag
403  fprintf( fp, " <software id=\"%s\" version=\"%s\"", software.c_str(), version.c_str() );
404  if( softAcc == NULL ) {
405  fprintf( fp, "/>\n" );
406  }
407  else {
408  fprintf(
409  fp,
410  ">\n <cvParam cvRef=\"MS\" accession=\"%s\" name=\"%s\" value=\"\"/>\n",
411  softAcc->getId(),
412  softAcc->getName()
413  );
414  fprintf( fp, " </software>\n" );
415  }
416  }
417 
418  // end tag
419  fprintf( fp, " </softwareList>\n" );
420 }
421 
422 // write instrument list
423 void MzmlManager::writeInstrumentList( FILE* fp, std::vector< kome::objects::Sample* >& samples ) {
424  // manager
426 
427  // instruments
428  std::vector< std::string > instruments;
429  std::set< std::string > instrumentSet;
430 
431  for( unsigned int i = 0; i < samples.size(); i++ ) {
432  std::string instrument = samples[ i ]->getInstrument();
433  if( instrument.empty() ) {
434  instrument = "undefined";
435  }
436 
437  if( instrumentSet.find( instrument ) == instrumentSet.end() ) {
438  instrumentSet.insert( instrument );
439  instruments.push_back( instrument );
440  }
441  }
442 
443  // start tag
444  fprintf( fp, " <instrumentConfigurationList count=\"%d\">\n", instruments.size() );
445 
446  // each instruments
447  Accession* parentAcc = accMgr.getAccession( MACHINE_ACC );
448  const double minScore = 3.0;
449 
450  for( unsigned int i = 0; i < instruments.size(); i++ ) {
451  // accession
452  std::string instrument = instruments[ i ];
453  Accession* machineAcc = NULL;
454  double score = minScore - 0.01;
455 
456  for( unsigned int j = 0; j < accMgr.getNumberOfAccessions(); j++ ) {
457  Accession* tmp = accMgr.getAccession( j );
458  if( tmp->getOrigin() == parentAcc ) {
459  double tmpScore = getSimilarityScore( tmp->getName(), instrument.c_str() );
460  if( tmpScore > score ) {
461  machineAcc = tmp;
462  score = tmpScore;
463  }
464  }
465  }
466 
467  // tag
468  fprintf( fp, " <instrumentConfiguration id=\"%s\"", instrument.c_str() );
469  if( machineAcc == NULL ) {
470  fprintf( fp, "/>\n" );
471  }
472  else {
473  fprintf(
474  fp,
475  ">\n <cvParam cvRef=\"MS\" accession=\"%s\" name=\"%s\" value=\"\"/>\n",
476  machineAcc->getId(),
477  machineAcc->getName()
478  );
479  fprintf( fp, " </instrumentConfiguration>\n" );
480  }
481  }
482 
483  // end tag
484  fprintf( fp, " </instrumentConfigurationList>\n" );
485 }
486 
487 // write processing list
489  // start tag
490  fprintf( fp, " <dataProcessingList count=\"1\">\n" );
491 
492  fprintf( fp, " <dataProcessing id=\"Mspp_proc\">\n" );
493  fprintf( fp, " <processingMethod softwareRef=\"Mass_plus_plus\" order=\"1\">\n" );
494  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000544\" name=\"Conversion to mzML\" value=\"\"/>\n" );
495  fprintf( fp, " </processingMethod>\n" );
496  fprintf( fp, " </dataProcessing>\n" );
497 
498  // end tag
499  fprintf( fp, " </dataProcessingList>\n" );
500 }
501 
502 // write run tag
504  FILE* fp,
505  std::vector< kome::objects::Sample* >& samples,
506  std::vector< kome::objects::Spectrum* >& spectra,
507  std::vector< kome::objects::Chromatogram* >& chroms,
508  kome::core::Progress* specPrgs,
509  kome::core::Progress* chromPrgs,
510  kome::objects::SettingParameterValues* saveSettings // @date 2012/08/21 <Add> FUJITA
511 ) {
512  // instrument
513  std::string instrument;
514  if( samples.size() > 0 ) {
515  instrument = samples.front()->getInstrument();
516  }
517  if( instrument.empty() ) {
518  instrument = "undefined";
519  }
520 
521  // start tag
522  fprintf( fp, " <run id=\"R1\" defaultInstrumentConfigurationRef=\"%s\">\n", instrument.c_str() );
523 
524  // write spectra
525  fprintf(
526  fp,
527  " <spectrumList defaultDataProcessingRef=\"Mspp_proc\" count=\"%d\"",
528  spectra.size()
529  );
530 
531  if( spectra.size() == 0 ) {
532  fprintf( fp, "/>\n" );
533  }
534  else {
535  fprintf( fp, ">\n" );
536 
537  specPrgs->setRange( 0, spectra.size() );
538  specPrgs->setPosition( 0 );
539 
540  // each spectra
541  for( unsigned int i = 0; i < spectra.size() && !specPrgs->isStopped(); i++ ) {
542  // spectrum
543  kome::objects::Spectrum* spec = spectra[ i ];
544 
545  std::string msg = FMT(
546  "Writing Spectrum [%d/%d] ..... %s",
547  ( i + 1 ),
548  spectra.size(),
549  spec->getName()
550  );
551  specPrgs->setStatus( msg.c_str() );
552 
553  // write spectrum
554  writeSpectrum( fp, *spec, saveSettings );
555  specPrgs->setPosition( i + 1 );
556  }
557 
558  if( specPrgs->isStopped() ) {
559  return;
560  }
561  specPrgs->fill();
562 
563  fprintf( fp, " </spectrumList>\n" );
564  }
565 
566  // write chromatograms
567  if( chroms.size() > 0 ) {
568  fprintf(
569  fp,
570  " <chromatogramList defaultDataProcessingRef=\"Mspp_proc\" count=\"%d\">\n",
571  chroms.size()
572  );
573 
574  chromPrgs->setRange( 0, chroms.size() );
575  chromPrgs->setPosition( 0 );
576 
577  // each chromatograms
578  for( unsigned int i = 0; i < chroms.size() && !chromPrgs->isStopped(); i++ ) {
579  // chromatogram
580  kome::objects::Chromatogram* chrom = chroms[ i ];
581 
582  std::string msg = FMT(
583  "Writing Chromatogram [%d/%d] ..... %s",
584  ( i + 1 ),
585  chroms.size(),
586  chrom->getName()
587  );
588  chromPrgs->setStatus( msg.c_str() );
589 
590  // write chromatogram
591  writeChromatogram( fp, *chrom, saveSettings );
592  chromPrgs->setPosition( i + 1 );
593  }
594 
595  if( chromPrgs->isStopped() ) {
596  return;
597  }
598  chromPrgs->fill();
599 
600  fprintf( fp, " </chromatogramList>\n" );
601  }
602 
603  // end tag
604  fprintf( fp, " </run>\n" );
605 }
606 
607 // write spectrum
609  FILE* fp,
610  kome::objects::Spectrum& spec,
611  kome::objects::SettingParameterValues* saveSettings
612 ) {
613  // get data status
614  bool op = saveSettings->getBoolValue( DATA_STATUS,false );
615 
616  // data points
617  kome::core::DataPoints tmpDps;
618  kome::core::DataPoints dps( kome::core::DataPoints::FLOAT );
619 
620  spec.getXYData( &tmpDps, op );
621  for( unsigned int i = 0; i < tmpDps.getLength(); i++ ) {
622  const double x = tmpDps.getX( i );
623  const double y = tmpDps.getY( i );
624 
625  if( y > 0.0 ) {
626  dps.addPoint( x, y );
627  }
628  }
629 
630  // reverse
631  const unsigned int len = dps.getLength();
632  if( m_reverseFlg ) {
633  float* xArr = (float*)dps.getXData();
634  float* yArr = (float*)dps.getYData();
635 
636  for( unsigned int i = 0; i < len; i++ ) {
637  memreverse( xArr + i, sizeof( float ) );
638  memreverse( yArr + i, sizeof( float ) );
639  }
640  }
641 
642  // index
643  const int idx = (int)m_specIdxMap.size();
644 
645  // start tag
646  fprintf( fp, " " );
647 
648  m_specIdxMap[ &spec ] = filetell( fp );
649 
650  // >>>>>> @Date:2013/08/19 <Modify> A.Ozaki
651  //
652  std::string strSpotId;
653  strSpotId.clear( );
654  if ( (char *)NULL != spec.getSpotId( ) )
655  {
656  strSpotId = FMT( "%s", spec.getSpotId( ) );
657  }
658 
659  if ( 0 == strSpotId.length( ) )
660  {
661  fprintf(
662  fp,
663  "<spectrum id=\"%s\" index=\"%d\" defaultArrayLength=\"%d\">\n",
664  spec.getName(),
665  idx,
666  len
667  );
668  }
669  else
670  {
671  fprintf(
672  fp,
673  "<spectrum id=\"%s\" index=\"%d\" defaultArrayLength=\"%d\" spotID=\"%s\">\n",
674  spec.getName(),
675  idx,
676  len,
677  strSpotId.c_str( )
678  );
679  }
680  //
681  // <<<<<< @Date:2013/08/19 <Modify> A.Ozaki
682 
683  // properties
684  kome::core::Properties& props = spec.getProperties();
685 
686  if( spec.getMsStage() >= 1 ) {
687  fprintf(
688  fp,
689  " <cvParam cvRef=\"MS\" accession=\"MS:1000511\" name=\"ms level\" value=\"%d\"/>\n",
690  spec.getMsStage()
691  );
692  }
693 
694  if( spec.isCentroidMode() ) {
695  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000127\" name=\"centroid spectrum\" value=\"\"/>\n" );
696  }
697  else {
698  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000128\" name=\"profile spectrum\" value=\"\"/>\n" );
699  }
700 
701  if( spec.getPolarity() == kome::objects::Spectrum::POLARITY_NEGATIVE ) {
702  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000129\" name=\"negative scan\" value=\"\"/>\n" );
703  }
704  else if( spec.getPolarity() == kome::objects::Spectrum::POLARITY_POSITIVE ) {
705  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000130\" name=\"positive scan\" value=\"\"/>\n" );
706  }
707 
708  if( spec.getTotalIntensity() >= 0.0 ) {
709  fprintf(
710  fp,
711  " <cvParam cvRef=\"MS\" accession=\"MS:1000285\" name=\"total ion current\" value=\"%f\"/>\n",
712  spec.getTotalIntensity()
713  );
714  }
715 
716  if( spec.getBasePeakMass() >= 0.0 ) {
717  fprintf(
718  fp,
719  " <cvParam cvRef=\"MS\" accession=\"MS:1000504\" name=\"base peak m/z\" value=\"%f\"/>\n",
720  spec.getBasePeakMass()
721  );
722  }
723 
724  if( spec.getMaxIntensity() >= 0.0 ) {
725  fprintf(
726  fp,
727  " <cvParam cvRef=\"MS\" accession=\"MS:1000505\" name=\"base peak intensity\" value=\"%f\"/>\n",
728  spec.getMaxIntensity()
729  );
730  }
731 
732  // scan list
733  fprintf(
734  fp,
735  " <scanList count=\"1\">\n"
736  );
737 
738  std::string instrument = spec.getSample()->getInstrument();
739  if( instrument.empty() ) {
740  instrument = "undefined";
741  }
742 
743  fprintf(
744  fp,
745  " <scan>\n"
746  );
747 
748  std::string filter = props.getStringValue( "Filter", "" );
749  if( !filter.empty() ) {
750  fprintf(
751  fp,
752  " <cvParam cvRef=\"MS\" accession=\"MS:1000512\" name=\"filter string\" value=\"%s\"/>\n",
753  filter.c_str()
754  );
755  }
756 
757  if( spec.getRt() >= 0.0 ) {
758  fprintf(
759  fp,
760  " <cvParam cvRef=\"MS\" accession=\"MS:1000016\" name=\"scan start time\" value=\"%f\"\n",
761  spec.getRt()
762  );
763 
764  fprintf(
765  fp,
766  " unitCvRef=\"UO\" unitAccession=\"UO:0000031\" unitName=\"minute\"/>\n"
767  );
768  }
769 
770  fprintf(
771  fp,
772  " <scanWindowList count=\"1\">\n"
773  );
774 
775  fprintf(
776  fp,
777  " <scanWindow>\n"
778  );
779 
780  if( spec.getMinX() >= 0.0 ) {
781  fprintf(
782  fp,
783  " <cvParam cvRef=\"MS\" accession=\"MS:1000501\" name=\"scan window lower limit\" value=\"%f\"/>\n",
784  spec.getMinX()
785  );
786  }
787  if( spec.getMaxX() >= 0.0 ) {
788  fprintf(
789  fp,
790  " <cvParam cvRef=\"MS\" accession=\"MS:1000500\" name=\"scan window upper limit\" value=\"%f\"/>\n",
791  spec.getMaxX()
792  );
793  }
794 
795  fprintf(
796  fp,
797  " </scanWindow>\n"
798  );
799 
800  fprintf(
801  fp,
802  " </scanWindowList>\n"
803  );
804 
805  fprintf(
806  fp,
807  " </scan>\n"
808  );
809 
810  fprintf(
811  fp,
812  " </scanList>\n"
813  );
814 
815  // p recursor
816  const double precursor = spec.getPrecursor();
817  if( precursor >= 0.0 ) {
818  kome::objects::Spectrum* parent = spec.getParentSpectrum();
819  std::string parentId = ( parent == NULL ? "" : parent->getName() );
820 
821  fprintf(
822  fp,
823  " <precursorList count=\"1\">\n"
824  );
825 
826  fprintf(
827  fp,
828  " <precursor spectrumRef=\"%s\">\n",
829  parentId.c_str()
830  );
831 
832  fprintf(
833  fp,
834  " <selectedIonList count=\"1\">\n"
835  );
836 
837  fprintf(
838  fp,
839  " <selectedIon>\n"
840  );
841 
842  fprintf(
843  fp,
844  " <cvParam cvRef=\"MS\" accession=\"MS:1000744\" name=\"selected ion m/z\"\n"
845  );
846 
847  fprintf(
848  fp,
849  " value=\"%f\" unitCvRef=\"MS\" unitAccession=\"MS:1000040\" unitName=\"m/z\"/>\n",
850  precursor
851  );
852 
853  fprintf(
854  fp,
855  " </selectedIon>\n"
856  );
857 
858  fprintf(
859  fp,
860  " </selectedIonList>\n"
861  );
862 
863  fprintf(
864  fp,
865  " </precursor>\n"
866  );
867 
868  fprintf(
869  fp,
870  " </precursorList>\n"
871  );
872  }
873 
874  // data points
875  if( len > 0 ) {
876  // prepare
877  unsigned long buffLength = len * sizeof( float ) * 2;
878  char* base64Buff = new char[ buffLength ];
879 
880  fprintf( fp, " <binaryDataArrayList count=\"2\">\n" );
881 
882  // m/z
883  unsigned long size = kome::core::Base64::encode( dps.getXData(), sizeof( float ) * len, base64Buff, buffLength );
884 
885  fprintf( fp, " <binaryDataArray encodedLength=\"%d\">\n", size );
886 
887 #ifdef MZML_USE_REFERENCEABLE_PARAM_GROUP
888  fprintf( fp, " <referenceableParamGroupRef ref=\"mz_params\"/>\n" );
889 #else
890  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000514\" name=\"m/z array\" value=\"\"\n" );
891  fprintf( fp, " unitCvRef=\"MS\" unitAccession=\"MS:1000040\" unitName=\"m/z\"/>\n" );
892  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000521\" name=\"32-bit float\" value=\"\"/>\n" );
893  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000576\" name=\"no compression\" value=\"\"/>\n" );
894 #endif // MZML_USE_REFERENCEABLE_PARAM_GROUP
895 
896  fprintf( fp, " <binary>" );
897 
898 
899  char* pos = base64Buff;
900  while( size > 0 ) {
901  int writeSize = MIN( size, 1024 );
902  writeSize = fwrite( pos, 1, writeSize, fp );
903  size -= writeSize;
904  pos += writeSize;
905  }
906 
907  fprintf( fp, "</binary>\n" );
908  fprintf( fp, " </binaryDataArray>\n" );
909 
910  // intensity
911  size = kome::core::Base64::encode( dps.getYData(), sizeof( float ) * len, base64Buff, buffLength );
912 
913  fprintf( fp, " <binaryDataArray encodedLength=\"%d\">\n", size );
914 
915 #ifdef MZML_USE_REFERENCEABLE_PARAM_GROUP
916  fprintf( fp, " <referenceableParamGroupRef ref=\"int_params\"/>\n" );
917 #else
918  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000515\" name=\"intensity array\" value=\"\"/>\n" );
919  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000521\" name=\"32-bit float\" value=\"\"/>\n" );
920  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000576\" name=\"no compression\" value=\"\"/>\n" );
921 #endif // MZML_USE_REFERENCEABLE_PARAM_GROUP
922 
923  fprintf( fp, " <binary>" );
924 
925  pos = base64Buff;
926  while( size > 0 ) {
927  int writeSize = MIN( size, 1024 );
928  writeSize = fwrite( pos, 1, writeSize, fp );
929  size -= writeSize;
930  pos += writeSize;
931  }
932 
933  fprintf( fp, "</binary>\n" );
934  fprintf( fp, " </binaryDataArray>\n" );
935 
936  fprintf( fp, " </binaryDataArrayList>\n" );
937 
938  delete[] base64Buff;
939  }
940 
941  // end tag
942  fprintf( fp, " </spectrum>\n" );
943 }
944 
945 // writing chromatogrm
947  FILE* fp,
948  kome::objects::Chromatogram& chrom,
949  kome::objects::SettingParameterValues* saveSettings
950 ) {
951  // get data status
952  bool op = saveSettings->getBoolValue( DATA_STATUS,false ); // @date 2012/08/21 <Add> FUJITA
953 
954  // data points
955  kome::core::DataPoints dps( kome::core::DataPoints::FLOAT );
956 // chrom.getXYData( &dps, false ); // @date 2012/08/21 <Del> FUJITA
957  chrom.getXYData( &dps, op ); // @date 2012/08/21 <Add> FUJITA
958 
959  // reverse
960  const unsigned int len = dps.getLength();
961  if( m_reverseFlg ) {
962  float* xArr = (float*)dps.getXData();
963  float* yArr = (float*)dps.getYData();
964 
965  for( unsigned int i = 0; i < len; i++ ) {
966  memreverse( xArr + i, sizeof( float ) );
967  memreverse( yArr + i, sizeof( float ) );
968  }
969  }
970 
971  // index
972  const int idx = m_chromIdxMap.size();
973 
974  // start tag
975  fprintf( fp, " " );
976 
977  m_chromIdxMap[ &chrom ] = filetell( fp );
978 
979  fprintf(
980  fp,
981  "<chromatogram id=\"%s\" index=\"%d\" defaultArrayLength=\"%d\">\n",
982  chrom.getName(),
983  idx,
984  len
985  );
986 
987  // data points
988  if( len > 0 ) {
989  // prepare
990  unsigned long buffLength = len * sizeof( float ) * 2;
991  char* base64Buff = new char[ buffLength ];
992 
993  fprintf( fp, " <binaryDataArrayList count=\"2\">\n" );
994 
995  // RT
996  unsigned long size = kome::core::Base64::encode( dps.getXData(), sizeof( float ) * len, base64Buff, buffLength );
997 
998  fprintf( fp, " <binaryDataArray encodedLength=\"%d\">\n", size );
999 
1000 #ifdef MZML_USE_REFERENCEABLE_PARAM_GROUP
1001  fprintf( fp, " <referenceableParamGroupRef ref=\"rt_params\"/>\n" );
1002 #else
1003  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000595\" name=\"time array\" value=\"\"\n" );
1004  fprintf( fp, " unitCvRef=\"UO\" unitAccession=\"UO:0000031\" unitName=\"minute\"/>\n" );
1005  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000521\" name=\"32-bit float\" value=\"\"/>\n" );
1006  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000576\" name=\"no compression\" value=\"\"/>\n" );
1007 #endif // MZML_USE_REFERENCEABLE_PARAM_GROUP
1008 
1009  fprintf( fp, " <binary>" );
1010 
1011  char* pos = base64Buff;
1012  while( size > 0 ) {
1013  int writeSize = MIN( size, 1024 );
1014  writeSize = fwrite( pos, 1, writeSize, fp );
1015  size -= writeSize;
1016  pos += writeSize;
1017  }
1018 
1019  fprintf( fp, "</binary>\n" );
1020  fprintf( fp, " </binaryDataArray>\n" );
1021 
1022  // intensity
1023  size = kome::core::Base64::encode( dps.getYData(), sizeof( float ) * len, base64Buff, buffLength );
1024 
1025  fprintf( fp, " <binaryDataArray encodedLength=\"%d\">\n", size );
1026 
1027 #ifdef MZML_USE_REFERENCEABLE_PARAM_GROUP
1028  fprintf( fp, " <referenceableParamGroupRef ref=\"int_params\"/>\n" );
1029 #else
1030  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000515\" name=\"intensity array\" value=\"\"/>\n" );
1031  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000521\" name=\"32-bit float\" value=\"\"/>\n" );
1032  fprintf( fp, " <cvParam cvRef=\"MS\" accession=\"MS:1000576\" name=\"no compression\" value=\"\"/>\n" );
1033 #endif // MZML_USE_REFERENCEABLE_PARAM_GROUP
1034 
1035  fprintf( fp, " <binary>" );
1036 
1037  pos = base64Buff;
1038  while( size > 0 ) {
1039  int writeSize = MIN( size, 1024 );
1040  writeSize = fwrite( pos, 1, writeSize, fp );
1041  size -= writeSize;
1042  pos += writeSize;
1043  }
1044 
1045  fprintf( fp, "</binary>\n" );
1046  fprintf( fp, " </binaryDataArray>\n" );
1047 
1048  fprintf( fp, " </binaryDataArrayList>\n" );
1049 
1050  delete[] base64Buff;
1051  }
1052 
1053  // end tag
1054  fprintf( fp, " </chromatogram>\n" );
1055 }
1056 
1057 // write run tag
1059  FILE* fp,
1060  std::vector< kome::objects::Spectrum* >& spectra,
1061  std::vector< kome::objects::Chromatogram* >& chroms
1062 ) {
1063  // end mzML tag
1064  fprintf( fp, " </mzML>\n" );
1065 
1066  // list count
1067  int listCnt = 0;
1068  if( spectra.size() > 0 ) {
1069  listCnt++;
1070  }
1071  if( chroms.size() > 0 ) {
1072  listCnt++;
1073  }
1074 
1075  // write index list
1076  const long long idxOffset = filetell( fp ) + 2;
1077  if( listCnt > 0 ) {
1078  // start tag
1079  fprintf( fp, " <indexList count=\"%d\">\n", listCnt );
1080 
1081  // spectra
1082  if( spectra.size() > 0 ) {
1083  fprintf( fp, " <index name=\"spectrum\">\n" );
1084 
1085  for( unsigned int i = 0; i < spectra.size(); i++ ) {
1086  kome::objects::Spectrum* spec = spectra[ i ];
1087  if( m_specIdxMap.find( spec ) != m_specIdxMap.end() ) {
1088  const long long offset = m_specIdxMap[ spec ];
1089 
1090  fprintf( fp, " <offset idRef=\"%s\">%lld</offset>\n", spec->getName(), offset );
1091  }
1092  }
1093 
1094  fprintf( fp, " </index>\n" );
1095  }
1096 
1097  // chromatograms
1098  if( chroms.size() > 0 ) {
1099  fprintf( fp, " <index name=\"chromatogram\">\n" );
1100 
1101  for( unsigned int i = 0; i < chroms.size(); i++ ) {
1102  kome::objects::Chromatogram* chrom = chroms[ i ];
1103  if( m_chromIdxMap.find( chrom ) != m_chromIdxMap.end() ) {
1104  const long long offset = m_chromIdxMap[ chrom ];
1105 
1106  fprintf( fp, " <offset idRef=\"%s\">%lld</offset>\n", chrom->getName(), offset );
1107  }
1108  }
1109 
1110  fprintf( fp, " </index>\n" );
1111  }
1112 
1113  // end tag
1114  fprintf( fp, " </indexList>\n" );
1115 
1116  // index offset
1117  fprintf( fp, " " );
1118  fprintf( fp, "<indexListOffset>%lld</indexListOffset>\n", idxOffset );
1119  }
1120 
1121  // end root tag
1122  fprintf( fp, "</indexedmzML>\n" );
1123 }
1124 
1125 // gets similarity score
1126 double MzmlManager::getSimilarityScore( const char* s0, const char* s1 ) {
1127  // check parameters
1128  if( s0 == NULL || s1 == NULL ) {
1129  return ( - FLT_MAX );
1130  }
1131 
1132  // size
1133  unsigned int l0 = strlen( s0 );
1134  unsigned int l1 = strlen( s1 );
1135 
1136  if( l0 == 0 && l1 == 0 ) {
1137  return 0;
1138  }
1139  else if( l0 == 0 || l1 == 0 ) {
1140  return ( - FLT_MAX );
1141  }
1142 
1143  // dynamic programing
1144  kome::numeric::DynamicPrograming dp( l0, l1 );
1145  dp.calculate( boost::bind( getMatchScore, s0, _1, s1, _2 ), 0.1 );
1146 
1147  return dp.getAccumulatedScore( (int)l0 - 1, (int)l1 - 1 );
1148 }
1149 
1150 // get match score
1151 double MzmlManager::getMatchScore( const char* s0, int idx0, const char* s1, int idx1 ) {
1152  // character
1153  char c0 = s0[ idx0 ];
1154  char c1 = s1[ idx1 ];
1155 
1156  bool a0 = ( isalnum( c0 ) != 0 );
1157  bool a1 = ( isalnum( c1 ) != 0 );
1158 
1159  // score
1160  if( a0 && a1 ) {
1161  if( tolower( c0 ) == tolower( c1 ) ) {
1162  return 1.0;
1163  }
1164  else {
1165  return -1.0;
1166  }
1167  }
1168 
1169  if( !a0 && !a1 ) {
1170  return 1.0;
1171  }
1172 
1173  return - 0.1;
1174 }
1175 
1176 // get instance
1178  // create object (This is the only object.)
1179  static MzmlManager mgr;
1180 
1181  return mgr;
1182 }
1183 
1184 
1185 // compare to sort @date 2011.10.31 <Add> M.Izumi
1186 bool MzmlManager::lessSpec( kome::objects::Spectrum* spec0, kome::objects::Spectrum* spec1 ) {
1187  return ( spec0->getRt() < spec1->getRt() );
1188 }
std::map< kome::objects::Chromatogram *, long long > m_chromIdxMap
Definition: MzmlManager.h:45
static double getMatchScore(const char *s0, int idx0, const char *s1, int idx1)
gets match score
static AccessionManager & getInstance()
gets accession manager object (This is the only object.)
static bool lessSpec(kome::objects::Spectrum *spec0, kome::objects::Spectrum *spec1)
compare spectra to sort
void writeRun(FILE *fp, std::vector< kome::objects::Sample * > &samples, std::vector< kome::objects::Spectrum * > &spectra, std::vector< kome::objects::Chromatogram * > &chroms, kome::core::Progress *specPrgs, kome::core::Progress *chromPrgs, kome::objects::SettingParameterValues *saveSettings)
writes run tag
void writeInstrumentList(FILE *fp, std::vector< kome::objects::Sample * > &samples)
writes instrument list
std::map< kome::objects::Spectrum *, long long > m_specIdxMap
Definition: MzmlManager.h:42
void writeChromatogram(FILE *fp, kome::objects::Chromatogram &chrom, kome::objects::SettingParameterValues *saveSettings)
writes chromatogram
const char * getId()
gets accession ID
Definition: Accession.cpp:47
const char * getName()
gets accession name
Definition: Accession.cpp:57
Accession * getOrigin()
gets origin accesion
Definition: Accession.cpp:72
void writeSourceFileList(FILE *fp, std::vector< kome::objects::Sample * > &samples)
writes source file list
interfaces of MzmlManager class
common header file
interfaces of Accession class
void writeHeader(FILE *fp)
writes header
Accession * getAccession(const unsigned int idx)
gets accession
void init()
initializes variables
Definition: MzmlManager.cpp:64
void writeFileContent(FILE *fp, std::vector< kome::objects::Sample * > &samples)
writes file content
accession object management class
virtual ~MzmlManager()
destructor
Definition: MzmlManager.cpp:60
void writeIndexList(FILE *fp, std::vector< kome::objects::Spectrum * > &spectra, std::vector< kome::objects::Chromatogram * > &chroms)
writes index list
accession information class
Definition: Accession.h:24
static double getSimilarityScore(const char *s0, const char *s1)
gets similarity score between two character strings
static MzmlManager & getInstance()
gets mzML IO management object
mzML IO management class
Definition: MzmlManager.h:26
void writeFileDescription(FILE *fp, std::vector< kome::objects::Sample * > &samples)
writes file description
void writeReferenceableParamGroup(FILE *fp, const bool specFlg, const bool chromFlg)
writes referenceable parameters group
void writeSoftwareList(FILE *fp, std::vector< kome::objects::Sample * > &samples)
writes software list
void writeProcessingList(FILE *fp)
writes processing list
void writeSpectrum(FILE *fp, kome::objects::Spectrum &spec, kome::objects::SettingParameterValues *saveSettings)
writes spectrum
unsigned int getNumberOfAccessions()
gets the number of accessions