C++编写Python扩展 -- C++类扩展技巧

2017/06/19 Python

导语：现在使用Python做项目的越来越多了，但是以前使用c、c++编写的库，再使用python重写一遍，实在不划算。有没有办法在Python中直接调用c、c++编写的代码呢？将c函数做成Python扩展，还是很容易的，毕竟Python官方文档中的《Extending and Embedding the Python Interpreter》章节对扩展c函数已经有了较详细的描述。但是将c++的类做成Python扩展，并保留继承关系，还是比较麻烦的事情。经过多次试验，终于找到了方法，和大家分享一下。

特别关注：

本文的主要目的是阐述C++的类的扩展方法，对C函数的扩展方法会一笔带过不做详解(具体可见python官方文档《Extending and Embedding the Python Interpreter》)。

本文中的例子，凡是文件名、函数名加上Py后缀的，都是C++的Python扩展文件或函数，凡是不加Py后缀的，都是原始的C++文件或函数

一. C函数扩展四步曲

对C函数进行扩展，只需要四个步骤：

1. 编写在Python中调用的函数，举个例子：

static PyObject* sumPy(PyObject* self, PyObject* args)
{
    int a;
    int b;

    if (!PyArg_ParseTuple(args, "ii", &a, &b))
    {
        PyErr_SetString(PyExc_Exception, "Parameters invalid");
        return NULL;
    }
    return Py_BuildValue("i", sum(a, b));
}

这个例子假定我们已有个C函数叫sum，可以对两个int求和。把这个sum函数扩展到Python的第一步，就是编写一个对应的函数sumPy，在这个函数中，通过PyArg_ParseTuple获取输入参数，调用sum计算得到结果，然后调用Py_Buildvalue返回结果。

2. 把Python函数加入到函数表：

static PyMethodDef UtilsMethods[] =
{
    {"sum", sumPy, METH_VARARGS, "Calculate the sum of two integers"},
    {NULL, NULL, 0, NULL}
};

3. 注册函数及模块：

PyMODINIT_FUNC initutils()
{
    Py_InitModule("utils", UtilsMethods);
}

注意：这里注册的模块名称为utils，所以注册模块的函数名称必须是initutils

4. 编译和使用：

g++ -c -Wall -fPIC -D_GNU_SOURCE  -I./Python/Python-2.7.13 -I./Python/Python-2.7.13/Include CFuncTest.c
g++ -o utils.so -shared -fPIC CFuncTest.o

注意：编译生成的动态库文件名，必须是步骤3中注册的模块名+.so。不能加上lib前缀。如果这个utils.so还会作为函数库被其他模块编译引用的话，可以用ln命令创建一个链接：

ln -s utils.so libutils.so

在Python中使用：

import utils;
utils.sum(89, 10)

二、C++类扩展技巧

对C++类进行扩展，要比对C函数扩展要复杂很多，总结下来，至少需要以下8个步骤：

定义一个与C++类对应的PyObject结构体
实现这个结构体的内存分配函数(对应C++的构造函数)
实现这个结构体的初始化函数(可选)
实现这个结构体的内存释放函数(对应C++的析构函数)
为C++类的函数编写对应的Python扩展函数
把扩展函数加入到函数表
使用PyTypeObject结构体定义一个新的Python类型
将Python类型添加到Python模块中

让我们从扩展一个基类开始，假定这个类结构如下：

// Base.h
#ifndef BASE_H
#define BASE_H

#include <string>
using namespace std;

class Base
{
protected:
    string m_sName;
public:
    Base(const string& sName="") : m_sName(sName) {}
    virtual ~Base() {}
    void SetName(const string& sName="") {m_sName = sName;}
    virtual const string& GetName() const {return m_sName;}
    virtual string GetName() {return m_sName;}
};

#endif

1. 第一步，我们要写一个对应的头文件，在这个头文件中，我们要定义一个与C++类对应的Python结构体，并申明两个函数，一个是将Python类型添加到Python模块的函数，一个是返回Python类型对象的函数：

// BasePy.h
#ifndef BASE_PY_H
#define BASE_PY_H

#include <Python.h>
#include "Base.h"

// 1. 定义一个与C++类对应的PyObject结构体
typedef struct tagBasePy
{
    PyObject_HEAD
    Base* pBase;
} BasePy;

// 将Python类型添加到Python模块的函数，见第8步
int InitBaseModule(PyObject* pModule);
// 返回本C++对象的Python类型对象，用于实现类继承
PyTypeObject* GetBasePyType();

#endif

2. 第二步，编写一个CPP文件。实现上述中2~7个步骤的功能函数(包括内存分配、初始化、内存释放、成员函数等)。为了阅读代码方便起见，我将这些小步骤和编号都写到注释中。

// BasePy.cpp
#include "BasePy.h"

// 2. 实现内存分配函数
static PyObject* CreateBasePy(PyTypeObject* pTypeObj, PyObject* pArgs, PyObject* /*pKwds*/)
{
    BasePy* pSelf = (BasePy*)pTypeObj->tp_alloc(pTypeObj, 0);
    if (pSelf)
    {
        pSelf->pBase = new Base;
    }
    return (PyObject*)pSelf;
}

// 3. 实现初始化函数(可选)
static int InitBasePy(BasePy* pSelf, PyObject* pArgs, PyObject* pKwds)
{
    const char* szName = NULL;
    static const char* szKwList[] = {"name", NULL};

    if (!PyArg_ParseTupleAndKeywords(pArgs, pKwds, "|s", (char**)szKwList,
                                     &szName))
    {
        return -1;
    }

    if (szName)
    {
        pSelf->pBase->SetName(szName);
    }

    return 0;
}

// 4. 实现内存释放函数
static void FreeBasePy(BasePy* pSelf)
{
    // 释放C++类对象内存
    delete pSelf->pBase;
    // 释放python对象占用的内存
    pSelf->ob_type->tp_free((PyObject*)pSelf);
}

// 5. 为C++类的函数SetName编写对应的Python扩展函数
static PyObject* BaseSetNamePy(BasePy* pSelf, PyObject* pArgs)
{
    const char* szName = NULL;

    if (!PyArg_ParseTuple(pArgs, "|s", &szName))
    {
        PyErr_SetString(PyExc_Exception, "Invalid parameter for Base:setName");
        return NULL;
    }

    pSelf->pBase->SetName(szName ? szName : "");

    return Py_None;
}

// 5. 为C++类的函数GetName编写对应的Python扩展函数
static PyObject* BaseGetNamePy(BasePy* pSelf, PyObject* /*pArgs*/)
{
    return Py_BuildValue("s", pSelf->pBase->GetName().c_str());
}

// 6. 把扩展函数加入到函数列表
static PyMethodDef BasePyMethods[] =
{
    {"setName", (PyCFunction)BaseSetNamePy, METH_VARARGS, "Sets the name of the object Base"},
    {"getName", (PyCFunction)BaseGetNamePy, METH_NOARGS, "Gets the name of the object Base"},
    {NULL, NULL, 0, NULL}
};

// 7. 使用PyTypeObject结构体定义一个新的Python类型
static PyTypeObject s_stBasePyType =
    {
        PyObject_HEAD_INIT(NULL)
        0,
        "Base",  /* 模块名称 */
        sizeof(BasePy),
        0,
        (destructor)FreeBasePy,  /* 注册释放内存的函数 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
        "Base objects",
        0,
        0,
        0,
        0,
        0,
        0,
        BasePyMethods,  /* 注册类成员函数 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        NULL,//(initproc)InitBasePy,  /* 注册初始化对象的函数initproc */
        0,
        CreateBasePy /* 注册创建对象的函数allocfunc */
    };

// 8. 将Python类型添加到Python模块中
int InitBaseModule(PyObject* pModule)
{
    if (PyType_Ready(&s_stBasePyType) < 0) return -1;
    Py_INCREF(&s_stBasePyType);
    PyModule_AddObject(pModule, "Base", (PyObject *)&s_stBasePyType);
    return 0;
}

// 返回本C++对象的Python类型对象，用于实现类继承
PyTypeObject* GetBasePyType()
{
    return &s_stBasePyType;
}

代码说明：

a) Python创建一个对象时，会先调用allocfunc分配内存，再调用initproc函数对对象初始化，这有点像C++的构造函数。不过allocfunc函数也是可以传入参数的，所以也可以在allocfunc中分配内存后，直接初始化对象，但是最好不要这样做，因为当实现类的继承关系时，子类需要调用父类initproc函数初始化会更方便一点(见下一章)。

b) 在Python的destructor函数释放Python对象占用的内存时，要先释放C++对象占用的内存。

3. 第三步，注册模块，并把新增的Python类型，添加到模块中：

// Main.cpp
#include "BasePy.h"

// 随模块一起导出的函数表
static PyMethodDef TestsMethods[] =
{
    {NULL, NULL, 0, NULL}
};

// 注册模块用的函数名，必须是:init+模块名称
PyMODINIT_FUNC inittests()
{
    // 注册/初始化模块
    PyObject* pModule = Py_InitModule("tests", TestsMethods);

    // 将新增的Python类型，添加到模块中
    if (InitBaseModule(pModule) != 0) return;
}

4. 第四步，编译

g++ -c -Wall -fPIC -D_GNU_SOURCE  -I./Python/Python-2.7.13 -I./Python/Python-2.7.13/Include BasePy.cpp
g++ -c -Wall -fPIC -D_GNU_SOURCE  -I./Python/Python-2.7.13 -I./Python/Python-2.7.13/Include Main.cpp
g++ -o tests.so -shared -fPIC BasePy.o Main.o

5. 第五步，在Python中使用c++扩展类

import tests
o = tests.Base('hello world');
o.getName();
o.setName('xy');

代码看起来还是很简单的。

因为代码都大同小异，接下来，我们可以利用C++的宏定义，让编码变的更简单，请大家先记住下面这几个宏，在后续的子类扩展中将直接使用这几个宏：

// PythonExtDef.h

#ifndef PYTHON_EXT_DEF_H
#define PYTHON_EXT_DEF_H

/**
 * 定义初始化Python模块的方法
 * @param ModuleName 模块名
 */
#define DECLARE_PY_MODULE_INIT_FUNC(ModuleName) \
    int Init##ModuleName##Module(PyObject* pModule); \
    PyTypeObject* Get##ModuleName##PyType()

/**
 * 实现初始化Python模块的方法(注意这个宏只能供基类使用)
 * @param ModuleName 模块名
 * @param ClassName Python模块所包装的C++类名
 */
#define IMPLEMENT_PY_BASE_MODULE_INIT_FUNC(ModuleName, ClassName) \
    DECLARE_MODULE_PY_TYPE(ModuleName, ClassName); \
    int Init##ModuleName##Module(PyObject* pModule) \
    { \
        if (PyType_Ready(&s_st##ClassName##PyType) < 0) return -1; \
        Py_INCREF(&s_st##ClassName##PyType); \
        PyModule_AddObject(pModule, #ModuleName, (PyObject *)&s_st##ClassName##PyType); \
        return 0; \
    } \
    PyTypeObject* Get##ModuleName##PyType() \
    { \
        return &s_st##ClassName##PyType; \
    }

/**
 * 实现初始化Python模块的方法(注意这个宏只能供子类使用)
 * @param ModuleName 模块名
 * @param ClassName Python模块所包装的C++类名
 */
#define IMPLEMENT_PY_MODULE_INIT_FUNC(ModuleName, ClassName, BaseModuleName) \
    DECLARE_MODULE_PY_TYPE(ModuleName, ClassName); \
    int Init##ModuleName##Module(PyObject* pModule) \
    { \
        s_st##ClassName##PyType.tp_base = Get##BaseModuleName##PyType(); \
        if (PyType_Ready(&s_st##ClassName##PyType) < 0) return -1; \
        Py_INCREF(&s_st##ClassName##PyType); \
        PyModule_AddObject(pModule, #ModuleName, (PyObject *)&s_st##ClassName##PyType ); \
        return 0; \
    } \
    PyTypeObject* Get##ModuleName##PyType() \
    { \
        return &s_st##ClassName##PyType; \
    }

/**
 * 初始化模块的Python类型
 * @param ModuleName 模块名
 * @param ClassName Python模块所包装的C++类名
 */
#define DECLARE_MODULE_PY_TYPE(ModuleName, ClassName) \
    PyTypeObject s_st##ClassName##PyType = \
    { \
        PyObject_HEAD_INIT(NULL) \
        0, \
        #ModuleName, \
        sizeof(ClassName##Py), \
        0, \
        (destructor)Free##ModuleName##Py, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, \
        #ModuleName " objects", \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        ClassName##PyMethods, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        0, \
        (initproc)Init##ModuleName##Py, \
        0, \
        Create##ModuleName##Py, \
    }

#endif

三、子类扩展

子类扩展和基类扩展的步骤是一样的，只是要保证类之间的继承关系，在结构体定义、内存分配函数、初始化函数、内存释放函数的编写上略有一点不同。

假定有个类SubClass，它的父类是Base：

// SubClass.h

#ifndef SUB_CLASS_H
#define SUB_CLASS_H

#include "Base.h"

class SubClass : public Base
{
protected:
    int m_iValue;
public:
    SubClass(const string& sName = "", int iValue = 0) : Base(sName), m_iValue(iValue) {}
    ~SubClass() {}
    void SetValue(int iValue) {m_iValue = iValue;}
    int GetValue() const {return m_iValue;}
};

#endif

1. 第一步，写一个对应的头文件，结构体需要使用BasePy：

// SubClassPy.h

#ifndef SUB_CLASS_PY_H
#define SUB_CLASS_PY_H

#include "BasePy.h"

typedef struct tagSubClassPy
{
    BasePy oBase;
} SubClassPy;

// 使用上面定义的宏，简化代码编写
DECLARE_PY_MODULE_INIT_FUNC(SubClass);

#endif

2. 第二步，编写实现文件，和基类扩展一样，需要实现内存分配、初始化、内存释放、成员函数等功能函数：

// SubClassPy.cpp

#include "SubClassPy.h"
#include "SubClass.h"

static void FreeSubClassPy(SubClassPy* pSelf)
{
    // 这里用到了在BasePy中定义的GetBasePyType()，通过它，就可以直接调用基类的函数，类似Java的super
    GetBasePyType()->tp_dealloc((PyObject*)pSelf);
}

static PyObject* CreateSubClassPy(PyTypeObject* pTypeObj, PyObject* /*pArgs*/, PyObject* /*pKwds*/)
{
    SubClassPy* pSelf = (SubClassPy*)pTypeObj->tp_alloc(pTypeObj, 0);
    if (pSelf)
    {
        pSelf->oBase.pBase = new SubClass;
    }
    return (PyObject*)pSelf;
}

static int InitSubClassPy(SubClassPy* pSelf, PyObject* pArgs, PyObject* pKwds)
{
    const char* szName = NULL;
    int iValue = 0;
    static const char* szKwList[] = {"name", "value", NULL};

    if (!PyArg_ParseTupleAndKeywords(pArgs, pKwds, "|si", (char**)szKwList,
                                     &szName, &iValue))
    {
        return -1;
    }

    if (szName)
    {
        pSelf->oBase.pBase->SetName(szName);
    }

    ((SubClass*)pSelf->oBase.pBase)->SetValue(iValue);

    // Note: 这里也可以通过下面的语句去调用基类的初始化函数哦。只是我们在子类里面，多了一个参数value，所以直接在子类中实现更方便。
    // 如果子类构造函数参数和基类参数一样，就可以直接调用基类的初始化函数了。
    // GetBasePyType()->tp_init((PyObject*)pSelf, pArgs, pKwds);

    return 0;
}

static PyObject* SubClassSetValuePy(SubClassPy* pSelf, PyObject* pArgs)
{
    int iValue = 0;

    if (!PyArg_ParseTuple(pArgs, "|i", &iValue))
    {
        PyErr_SetString(PyExc_Exception, "Invalid parameter for Base:setValue");
        return NULL;
    }

    ((SubClass*)pSelf->oBase.pBase)->SetValue(iValue);

    return Py_None;
}

static PyObject* SubClassGetValuePy(SubClassPy* pSelf, PyObject* /*pArgs*/)
{
    return Py_BuildValue("i", ((SubClass*)pSelf->oBase.pBase)->GetValue());
}

static PyMethodDef SubClassPyMethods[] =
{
    {"setValue", (PyCFunction)SubClassSetValuePy, METH_VARARGS, "Sets the value of the object SubClass"},
    {"getValue", (PyCFunction)SubClassGetValuePy, METH_NOARGS, "Gets the value of the object SubClass"},
    {NULL, NULL, 0, NULL}
};

// 使用宏，简化代码
IMPLEMENT_PY_MODULE_INIT_FUNC(SubClass, SubClass, Base);

3. 第三步，添加到模块中：

// Main.cpp
#include "SubClassPy.h"

static PyMethodDef TestsMethods[] =
{
    {NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC inittests()
{
    PyObject* pModule = Py_InitModule("tests", TestsMethods);

    // 将Base类对应的Python类对象注册到模块
    if (InitBaseModule(pModule) != 0) return;
    // 将SubClass对应的Python类对象注册到模块
    if (InitSubClassModule(pModule) != 0) return;
}

4. 第四步，编译：

g++ -c -Wall -fPIC -D_GNU_SOURCE  -I./Python/Python-2.7.13 -I./Python/Python-2.7.13/Include BasePy.cpp
g++ -c -Wall -fPIC -D_GNU_SOURCE  -I./Python/Python-2.7.13 -I./Python/Python-2.7.13/Include SubClassPy.cpp
g++ -c -Wall -fPIC -D_GNU_SOURCE  -I./Python/Python-2.7.13 -I./Python/Python-2.7.13/Include Main.cpp
g++ -o tests.so -shared -fPIC BasePy.o SubClassPy.o Main.o

5. 第五步，在Python中使用：

import tests

o = tests.SubClass('xy', 45);
o.getName();
o.getValue();
o.setName('xy2');
o.setValue(78);

四、对象参数

接下来，我们看一看函数参数是某个对象、以及函数返回值为对象的情况。

这里再新增一个类Other，并让这个类称为SubClass类的一个成员：

// Other.h
#ifndef OTHER_H
#define OTHER_H

#include <string>
using namespace std;

class Other
{
protected:
    string m_sName;
public:
    Other(const string& sName="") : m_sName(sName) {}
    virtual ~Other() {}
    void SetName(const string& sName="") {m_sName = sName;}
    virtual const string& GetName() const {return m_sName;}
    virtual string GetName() {return m_sName;}
};

#endif

修改SubClass.h，增加成员属性Other* m_pOther，并增加对应的函数GetOther和SetOther，使得SubClass类拥有参数是对象及返回值是对象的函数：

#ifndef SUB_CLASS_H
#define SUB_CLASS_H

#include "Base.h"
#include "Other.h"

class SubClass : public Base
{
protected:
    int m_iValue;
    // 1. 添加属性对象
    Other* m_pOther;
public:
    SubClass(const string& sName = "", int iValue = 0) : Base(sName), m_iValue(iValue), m_pOther(NULL) {}
    ~SubClass() {}
    void SetValue(int iValue) {m_iValue = iValue;}
    int GetValue() const {return m_iValue;}
    // 2. 添加参数是对象的函数SetOther
    void SetOther(Other* pOther) {m_pOther = pOther;}
    // 3. 添加返回值是对象的函数GetOther
    Other* GetOther() {return m_pOther;}
};

#endif

按第二章介绍的步骤，实现Other对应的Python对象OtherPy。

同时修改SubClassPy的实现：

1. 修改SubClassPy结构体，增加pOtherPy成员，该成员类型必须是PyObject*

#ifndef SUB_CLASS_PY_H
#define SUB_CLASS_PY_H

#include "BasePy.h"

typedef struct tagSubClassPy
{
    BasePy oBase;
    // 1. 增加新成员pOtherPy
    PyObject* pOtherPy;
} SubClassPy;

DECLARE_PY_MODULE_INIT_FUNC(SubClass);

#endif

2. 修改析构函数FreeSubClassPy，在释放SubClassPy对象前，释放其成员pOtherPy的内存：

static void FreeSubClassPy(SubClassPy* pSelf)
{
    // 2. 释放成员的内存
    Py_XDECREF(pSelf->pOtherPy);
    // 释放本对象内存
    GetBasePyType()->tp_dealloc((PyObject*)pSelf);
}

3. 初始化函数中，记得将pOtherPy初始化为Py_None

static PyObject* CreateSubClassPy(PyTypeObject* pTypeObj, PyObject* /*pArgs*/, PyObject* /*pKwds*/)
{
    SubClassPy* pSelf = (SubClassPy*)pTypeObj->tp_alloc(pTypeObj, 0);
    if (pSelf)
    {
        pSelf->oBase.pBase = new SubClass;
        // 3. 将对象成员，初始化为Py_None
        pSelf->pOtherPy = Py_None;
    }
    return (PyObject*)pSelf;
}

4. 实现setOther和getOther函数。特别注意引用计数的使用，使用不当会导致崩溃。

// 4.1 实现setOther函数
static PyObject* SubClassSetOtherPy(SubClassPy* pSelf, PyObject* pArgs)
{
    PyObject* pOtherPy = NULL;

    if (!PyArg_ParseTuple(pArgs, "O", &pOtherPy))
    {
        PyErr_SetString(PyExc_Exception, "Invalid parameter for SubClass:setOther");
        return NULL;
    }

    if (pOtherPy == Py_None)
    {
        PyErr_SetString(PyExc_Exception, "Null parameter for SubClass:setOther");
        return NULL;
    }

    if (!PyObject_TypeCheck(pOtherPy, GetOtherPyType()))
    {
        PyErr_SetString(PyExc_Exception, "Invalid parameter for SubClass:setOther, the parameter is not an Other object");
        return NULL;
    }

    ((SubClass*)pSelf->oBase.pBase)->SetOther((Other*)((OtherPy*)pOtherPy)->pOther);
    // 修改应用计数
    if (pSelf->pOtherPy != pOtherPy)
    {
        // 先减少之前的对象的引用计数
        Py_XDECREF(pSelf->pOtherPy);
        pSelf->pOtherPy = pOtherPy;
        // 增加现在对象的引用计数
        Py_XINCREF(pSelf->pOtherPy);
    } 

    return Py_None;
}

// 4.2 实现GetOther函数
static PyObject* SubClassGetOtherPy(SubClassPy* pSelf, PyObject* /*pArgs*/)
{
    // 增加引用计数
    Py_XINCREF(pSelf->pOtherPy);
    return pSelf->pOtherPy;
}

5. 将新增的函数加入函数表：

static PyMethodDef SubClassPyMethods[] =
{
    {"setValue", (PyCFunction)SubClassSetValuePy, METH_VARARGS, "Sets the value of the object SubClass"},
    {"getValue", (PyCFunction)SubClassGetValuePy, METH_NOARGS, "Gets the value of the object SubClass"},
    // 5. 将setOther和getOther加入到函数表
    {"setOther", (PyCFunction)SubClassSetOtherPy, METH_VARARGS, "Sets the other object of the object SubClass"},
    {"getOther", (PyCFunction)SubClassGetOtherPy, METH_NOARGS, "Gets the other object of the object SubClass"},
    {NULL, NULL, 0, NULL}
};

6. 编译后在Python中使用：

import tests;
o = tests.Other('other');
s = tests.SubClass('s', 56);
s.setOther(o);
s.getOther().getName();
s.getOther().setName('xy');

五、总结

本文重点描述了C++类的Python扩展技巧，包括C++基类扩展、子类扩展技巧，以及成员函数的参数为对象、以及成员函数返回对象的扩展技巧。

从上面的例子中，我们可以看到，实现C++类的Python扩展，过程还是比较“结构化”的，非常方便使用工具生成Python扩展代码。SWIG就是一个比较通用的生成工具，大家如果感兴趣的话，可以去研究一下。

文中涉及的代码请到我的git上下载：码也/python-cpp-ext