圖像分類

Above All

機(jī)器學(xué)習(xí)的大作業(yè)是寫圖像分類。這里我整理一些有用的參考資料,以便后來提交報(bào)告的時(shí)候邏輯比較清晰。

主要想用的特征還是SIFT和SURF,當(dāng)然我覺得數(shù)據(jù)集給我的感覺是顏色直方圖也是可以用的。


一、簡(jiǎn)單粗暴的提取SIFT特征

源碼:https://github.com/SimGuo/ImageProcessing/blob/master/main.cpp


二、Bag-of-words方法

作者:Savitch
出處:http://blog.csdn.net/assiduousknight/article/details/16901427
什么是BOW

first step
then

Bag-of-words模型應(yīng)用三步

接下來,我們通過上述圖像展示如何通過Bag-of-words模型,將圖像表示成數(shù)值向量?,F(xiàn)在有三個(gè)目標(biāo)類,分別是人臉、自行車和吉他。

  • Bag-of-words模型的第一步是利用SIFT算法,從每類圖像中提取視覺詞匯,將所有的視覺詞匯集合在一起,如下圖所示:

    提取視覺詞匯

  • 第二步是利用K-Means算法構(gòu)造單詞表。K-Means算法是一種基于樣本間相似性度量的間接聚類方法,此算法以K為參數(shù),把N個(gè)對(duì)象分為K個(gè)簇,以使簇內(nèi)具有較高的相似度,而簇間相似度較低。SIFT提取的視覺詞匯向量之間根據(jù)距離的遠(yuǎn)近,可以利用K-Means算法將詞義相近的詞匯合并,作為單詞表中的基礎(chǔ)詞匯,假定我們將K設(shè)為4,那么單詞表的構(gòu)造過程如下圖所示:

kmeans
  • 第三步是利用單詞表的中詞匯表示圖像。利用SIFT算法,可以從每幅圖像中提取很多個(gè)特征點(diǎn),這些特征點(diǎn)都可以用單詞表中的單詞近似代替,通過統(tǒng)計(jì)單詞表中每個(gè)單詞在圖像中出現(xiàn)的次數(shù),可以將圖像表示成為一個(gè)K=4維數(shù)值向量。請(qǐng)看下圖:
每張圖根據(jù)詞表轉(zhuǎn)化為一個(gè)向量

代碼(還沒看懂)

  1. 配置環(huán)境
  2. 創(chuàng)建c++類CSIFTDiscriptor
    為了方便使用,我們將SIFT庫(kù)用C++類CSIFTDiscriptor封裝,該類可以計(jì)算并獲取指定圖像的特征點(diǎn)向量集合。類的聲名在SIFTDiscriptor.h文件中,內(nèi)容如下:
    #ifndef _SIFT_DISCRIPTOR_H_  
    #define _SIFT_DISCRIPTOR_H_  
    #include <string>  
    #include <highgui.h>  
    #include <cv.h>  
      
    extern "C"  
    {     
    #include "../sift/sift.h"     
    #include "../sift/imgfeatures.h"      
    #include "../sift/utils.h"    
    };  
      
    class CSIFTDiscriptor  
    {     
    public:   
        int GetInterestPointNumber()          
        {         
            return m_nInterestPointNumber;    
        }     
        struct feature *GetFeatureArray()         
        {         
            return m_pFeatureArray;       
        }  
        public :          
            void SetImgName(const std::string &strImgName)        
            {         
                m_strInputImgName = strImgName;       
            }       
            int CalculateSIFT();  
        public:   
            CSIFTDiscriptor(const std::string &strImgName);   
            CSIFTDiscriptor()         
            {         
                m_nInterestPointNumber = 0;  
                m_pFeatureArray = NULL;       
            }     
            ~CSIFTDiscriptor();  
        private:          
            std::string m_strInputImgName;    
            int m_nInterestPointNumber;   
            feature *m_pFeatureArray;     
    };    
    #endif  
    
    成員函數(shù)實(shí)現(xiàn)在SIFTDiscriptor.cpp文件中,其中,CalculateSIFT函數(shù)完成特征點(diǎn)的提取和計(jì)算,其主要內(nèi)部流程如下:
  1. 調(diào)用OpenCV函數(shù)cvLoadImage加載輸入圖像;
  2. 為了統(tǒng)一輸入圖像的尺寸,CalculateSIFT函數(shù)的第二步是調(diào)整輸入圖像的尺寸,這通過調(diào)用cvResize函數(shù)實(shí)現(xiàn);
  3. 如果輸入圖像是彩色圖像,我們需要首先將其轉(zhuǎn)化成灰度圖,這通過調(diào)用cvCvtColor函數(shù)實(shí)現(xiàn);
  4. 調(diào)用SIFT庫(kù)函數(shù)sift_feature獲取輸入圖像的特征點(diǎn)向量集合和特征點(diǎn)個(gè)數(shù)。
    #include "SIFTDiscriptor.h"  
    

int CSIFTDiscriptor::CalculateSIFT()
{
IplImage *pInputImg = cvLoadImage(m_strInputImgName.c_str());
if (!pInputImg)
{
return -1;
}
int nImgWidth = 320; //訓(xùn)練用標(biāo)準(zhǔn)圖像大小
double dbScaleFactor = pInputImg->width / 300.0; //縮放因子
IplImage *pTmpImg = cvCreateImage(cvSize(pInputImg->width / dbScaleFactor, pInputImg->height / dbScaleFactor),
pInputImg->depth, pInputImg->nChannels);
cvResize(pInputImg, pTmpImg); //縮放
cvReleaseImage(&pInputImg);

if (pTmpImg->nChannels != 1)    //非灰度圖  
{  
    IplImage *pGrayImg = cvCreateImage(cvSize(pTmpImg->width, pTmpImg->height),  
        pTmpImg->depth, 1);  
    cvCvtColor(pTmpImg, pGrayImg, CV_RGB2GRAY);  
    m_nInterestPointNumber = sift_features(pGrayImg, &m_pFeatureArray);  
    cvReleaseImage(&pGrayImg);  
}  
else  
{  
    m_nInterestPointNumber = sift_features(pTmpImg, &m_pFeatureArray);  
}  
cvReleaseImage(&pTmpImg);  
return m_nInterestPointNumber;  

}
CSIFTDiscriptor::CSIFTDiscriptor(const std::string &strImgName)
{
m_strInputImgName = strImgName;
m_nInterestPointNumber = 0;
m_pFeatureArray = NULL;
CalculateSIFT();
}
CSIFTDiscriptor::~CSIFTDiscriptor()
{
if (m_pFeatureArray)
{
free(m_pFeatureArray);
}
}
```

  1. 創(chuàng)建c++類CImgSet,管理實(shí)驗(yàn)圖像集合
    Bag-of-words模型需要從多個(gè)目標(biāo)類圖像中提取視覺詞匯,不同目標(biāo)類的圖像存儲(chǔ)在不同子文件夾中,為了方便操作,我們?cè)O(shè)計(jì)了一個(gè)專門的類CImgSet用來管理圖像集合,聲明在文件ImgSet.h中:
    #ifndef _IMG_SET_H_  
    #define _IMG_SET_H_  
    #include <vector>  
    #include <string>  
    #pragma comment(lib, "shlwapi.lib")  
    class CImgSet  
    {  
      public:  
    CImgSet (const std::string &strImgDirName) : m_strImgDirName(strImgDirName+"http://"), m_nImgNumber(0){}  
        int GetTotalImageNumber()  
        {  
            return m_nImgNumber;  
        }  
        std::string GetImgName(int nIndex)  
        {  
            return m_szImgs.at(nIndex);  
        }    
        int LoadImgsFromDir()  
        {  
            return LoadImgsFromDir("");  
        }    
    private:  
        int LoadImgsFromDir(const std::string &strDirName);  
    private:  
        typedef std::vector <std::string> IMG_SET;  
        IMG_SET m_szImgs;  
        int m_nImgNumber;  
        const std::string m_strImgDirName;  
    };  
    #endif  
    
    //成員函數(shù)實(shí)現(xiàn)在文件ImgSet.cpp中:  
    #include "ImgSet.h"  
    #include <windows.h>  
    #include <Shlwapi.h>  
    /** 
    strSubDirName:子文件夾名 
    */  
    int CImgSet::LoadImgsFromDir(const std::string &strSubDirName)  
    {  
        WIN32_FIND_DATAA stFD = {0};  
        std::string strDirName;  
        if ("" == strSubDirName)  
        {  
            strDirName = m_strImgDirName;  
        }    
        else  
        {    
            strDirName = strSubDirName;  
        }    
        std::string strFindName = strDirName + "http://*";  
        HANDLE hFile = FindFirstFileA(strFindName.c_str(), &stFD);  
        BOOL bExist = FindNextFileA(hFile, &stFD);  
      
        for (;bExist;)  
        {  
            std::string strTmpName = strDirName + stFD.cFileName;  
            if (strDirName + "." == strTmpName || strDirName + ".." == strTmpName)  
            {  
                bExist = FindNextFileA(hFile, &stFD);  
                continue;  
            }  
            if (PathIsDirectoryA(strTmpName.c_str()))  
            {  
                strTmpName += "http://";  
                LoadImgsFromDir(strTmpName);  
                bExist = FindNextFileA(hFile, &stFD);  
                continue;  
            }     
            std::string strSubImg = strDirName + stFD.cFileName;  
            m_szImgs.push_back(strSubImg);  
            bExist = FindNextFileA(hFile, &stFD);  
        }  
        m_nImgNumber = m_szImgs.size();  
        return m_nImgNumber;  
    }  
    

LoadImgsFromDir遞歸地從圖像文件夾中獲取所有實(shí)驗(yàn)用圖像名,包括子文件夾。該函數(shù)內(nèi)部通過循環(huán)調(diào)用windows API函數(shù)FindFirstFile和FindNextFile來找到文件夾中所有圖像的名稱。

  1. 創(chuàng)建CHistogram,生成圖像的直方圖表示
//ImgHistogram.h  
#ifndef _IMG_HISTOGRAM_H_  
#define _IMG_HISTOGRAM_H_    
#include <string>  
#include "SIFTDiscriptor.h"  
#include "ImgSet.h"  
const int cnClusterNumber = 1500;  
const int ciMax_D = FEATURE_MAX_D;  

class CHistogram  
{  
public:  
    void SetTrainingImgSetName(const std::string strTrainingImgSet)  
    {  
        m_strTrainingImgSetName = strTrainingImgSet;  
    }  
    int FormHistogram();  
    CvMat CalculateImgHistogram(const string strImgName, int pszImgHistogram[]);  
    CvMat *GetObservedData();  
    CvMat *GetCodebook()  
    {  
        return m_pCodebook;  
    }  
    void SetCodebook(CvMat *pCodebook)  
    {  
        m_pCodebook = pCodebook;  
        m_bSet = true;  
    }  
public:  
    CHistogram():m_pszHistogram(0), m_nImgNumber(0), m_pObservedData(0), m_pCodebook(0), m_bSet(false){}  
    ~CHistogram()  
    {  
        if (m_pszHistogram)  
        {  
            delete m_pszHistogram;  
            m_pszHistogram = 0;  
        }  
        if (m_pObservedData)  
        {  
            cvReleaseMat(&m_pObservedData);  
            m_pObservedData = 0;  
        }  
        if (m_pCodebook && !m_bSet)  
        {  
            cvReleaseMat(&m_pCodebook);  
            m_pCodebook = 0;  
        }  
    }  
private :  
    bool m_bSet;  
    CvMat *m_pCodebook;  
    CvMat *m_pObservedData;  
    std::string m_strTrainingImgSetName;  
    int (*m_pszHistogram)[cnClusterNumber];  
    int m_nImgNumber;  
};  
#endif  

#include "ImgHistogram.h"  
int CHistogram::FormHistogram()  
{  
    int nRet = 0;  
    CImgSet iImgSet(m_strTrainingImgSetName);  
    nRet = iImgSet.LoadImgsFromDir();  
      
    const int cnTrainingImgNumber = iImgSet.GetTotalImageNumber();  
    m_nImgNumber = cnTrainingImgNumber;  
    CSIFTDiscriptor *pDiscriptor = new CSIFTDiscriptor[cnTrainingImgNumber];  
    int nIPNumber(0) ;  
    for (int i = 0; i < cnTrainingImgNumber; ++i)  //計(jì)算每一幅訓(xùn)練圖像的SIFT描述符  
    {  
        const string strImgName = iImgSet.GetImgName(i);  
        pDiscriptor[i].SetImgName(strImgName);  
        pDiscriptor[i].CalculateSIFT();  
        nIPNumber += pDiscriptor[i].GetInterestPointNumber();  
    }  
      
    double (*pszDiscriptor)[FEATURE_MAX_D] = new double[nIPNumber][FEATURE_MAX_D];  //存儲(chǔ)所有描述符的數(shù)組。每一行代表一個(gè)IP的描述符  
    ZeroMemory(pszDiscriptor, sizeof(int) * nIPNumber * FEATURE_MAX_D);  
    int nIndex = 0;  
    for (int i = 0; i < cnTrainingImgNumber; ++i)  //遍歷所有圖像  
    {  
        struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();  
        int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();  
        for (int j = 0; j < nFeatureNumber; ++j)  //遍歷一幅圖像中所有的IP(Interesting Point興趣點(diǎn)  
        {  
            for (int k = 0; k < FEATURE_MAX_D; k++)//初始化一個(gè)IP描述符  
            {  
                pszDiscriptor[nIndex][k] = pFeatureArray[j].descr[k];  
            }  
            ++nIndex;  
        }  
    }  
    CvMat *pszLabels = cvCreateMat(nIPNumber, 1, CV_32SC1);  
      
    //對(duì)所有IP的描述符,執(zhí)行KMeans算法,找到cnClusterNumber個(gè)聚類中心,存儲(chǔ)在pszClusterCenters中  
    if (!m_pCodebook)   //構(gòu)造碼元表  
    {  
        CvMat szSamples,   
            *pszClusterCenters = cvCreateMat(cnClusterNumber, FEATURE_MAX_D, CV_32FC1);  
        cvInitMatHeader(&szSamples, nIPNumber, FEATURE_MAX_D, CV_32FC1, pszDiscriptor);  
        cvKMeans2(&szSamples, cnClusterNumber, pszLabels,   
            cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ),  
            1, (CvRNG *)0, 0, pszClusterCenters);  //  
        m_pCodebook = pszClusterCenters;  
    }  
      
    m_pszHistogram = new int[cnTrainingImgNumber][cnClusterNumber];  //存儲(chǔ)每幅圖像的直方圖表示,每一行對(duì)應(yīng)一幅圖像  
    ZeroMemory(m_pszHistogram, sizeof(int) * cnTrainingImgNumber * cnClusterNumber);  
      
    //計(jì)算每幅圖像的直方圖  
    nIndex = 0;  
    for (int i = 0; i < cnTrainingImgNumber; ++i)  
    {  
        struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();  
        int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();  
        //      int nIndex = 0;  
        for (int j = 0; j < nFeatureNumber; ++j)  
        {  
            //          CvMat szFeature;  
            //          cvInitMatHeader(&szFeature, 1, FEATURE_MAX_D, CV_32FC1, pszDiscriptor[nIndex++]);  
            //          double dbMinimum = 1.79769e308;  
            //          int nCodebookIndex = 0;  
            //          for (int k = 0; k < m_pCodebook->rows; ++k)//找到距離最小的碼元,用最小碼元代替原//來的詞匯  
            //          {  
            //              CvMat szCode = cvMat(1, m_pCodebook->cols, m_pCodebook->type);  
            //              cvGetRow(m_pCodebook, &szCode, k);  
            //              double dbDistance = cvNorm(&szFeature, &szCode, CV_L2);  
            //              if (dbDistance < dbMinimum)  
            //              {  
            //                  dbMinimum = dbDistance;  
            //                  nCodebookIndex = k;  
            //              }  
            //          }  
            int nCodebookIndex = pszLabels->data.i[nIndex++];   //找到第i幅圖像中第j個(gè)IP在Codebook中的索引值nCodebookIndex  
            ++m_pszHistogram[i][nCodebookIndex];   //0<nCodebookIndex<cnClusterNumber;   
        }  
    }  
      
    //資源清理,函數(shù)返回  
    //  delete []m_pszHistogram;  
    //  m_pszHistogram = 0;  
      
    cvReleaseMat(&pszLabels);     
    //  cvReleaseMat(&pszClusterCenters);  
    delete []pszDiscriptor;  
    delete []pDiscriptor;  
      
    return nRet;  
}  
  
//double descr_dist_sq( struct feature* f1, struct feature* f2 );  
CvMat CHistogram::CalculateImgHistogram(const string strImgName, int pszImgHistogram[])  
{  
    if ("" == strImgName || !m_pCodebook || !pszImgHistogram)  
    {  
        return CvMat();  
    }  
    CSIFTDiscriptor iImgDisp;  
    iImgDisp.SetImgName(strImgName);  
    iImgDisp.CalculateSIFT();  
    struct feature *pImgFeature = iImgDisp.GetFeatureArray();  
    int cnIPNumber = iImgDisp.GetInterestPointNumber();  
    //  int *pszImgHistogram = new int[cnClusterNumber];  
    //  ZeroMemory(pszImgHistogram, sizeof(int)*cnClusterNumber);  
    for (int i = 0; i < cnIPNumber; ++i)  
    {  
        double *pszDistance = new double[cnClusterNumber];  
        CvMat iIP = cvMat(FEATURE_MAX_D, 1, CV_32FC1, pImgFeature[i].descr);  
        for (int j = 0; j < cnClusterNumber; ++j)  
        {  
            CvMat iCode = cvMat(1, FEATURE_MAX_D, CV_32FC1);  
            cvGetRow(m_pCodebook, &iCode, j);  
            CvMat *pTmpMat = cvCreateMat(FEATURE_MAX_D, 1, CV_32FC1);  
            cvTranspose(&iCode, pTmpMat);  
            double dbDistance = cvNorm(&iIP, pTmpMat);  //計(jì)算第i個(gè)IP與第j個(gè)code之間的距離                
            pszDistance[j] = dbDistance;  
            cvReleaseMat(&pTmpMat);  
        }  
        double dbMinDistance = pszDistance[0];  
        int nCodebookIndex = 0;  //第i個(gè)IP在codebook中距離最小的code的索引值  
        for (int j = 1; j < cnClusterNumber; ++j)  
        {  
            if (dbMinDistance > pszDistance[j])  
            {  
                dbMinDistance = pszDistance[j];  
                nCodebookIndex = j;  
            }  
        }  
        ++pszImgHistogram[nCodebookIndex];  
        delete []pszDistance;  
    }  
    CvMat iImgHistogram = cvMat(cnClusterNumber, 1, CV_32SC1, pszImgHistogram);  
    return iImgHistogram;  
}  


CvMat *CHistogram::GetObservedData()  
{  
    CvMat iHistogram;  
    cvInitMatHeader(&iHistogram, m_nImgNumber, cnClusterNumber, CV_32SC1, m_pszHistogram);  
    CvMat *m_pObservedData = cvCreateMat(iHistogram.cols, iHistogram.rows, CV_32SC1);  
    cvTranspose(&iHistogram, m_pObservedData);  
    return m_pObservedData;  
}  
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容