🫑Python | C++漂移扩散方程和无风险套利公式算法微分

Python | C++ | 漂移扩散 | 方程 | 公式 | 期权 | 套利 | 定价权 | 算法 | 正向累积 | 反向累积 | 回归 | 分类 | 生成对抗网络 | 几何 | 网格

🏈指点迷津 | Brief

🎯要点

🎯漂移扩散方程计算微分 | 🎯期权无风险套利公式计算微分 | 🎯实现图结构算法微分 | 🎯实现简单正向和反向计算微分 | 🎯实现简单回归分类和生成对抗网络计算微分 | 🎯几何网格计算微分

🍇Python和C++计算微分正反向累积

算法微分在机器学习领域尤为重要。例如,它允许人们在神经网络中实现反向传播,而无需手动计算导数。

计算微分的基础是复合函数偏导数链式法则提供的微分分解。简单结构如:

y=f(g(h(x)))=f(g(h(w0)))=f(g(w1))=f(w2)=w3w0=xw1=h(w0)w2=g(w1)w3=f(w2)=y\begin{aligned} y & =f(g(h(x)))=f\left(g\left(h\left(w_0\right)\right)\right)=f\left(g\left(w_1\right)\right)=f\left(w_2\right)=w_3 \\ w_0 & =x \\ w_1 & =h\left(w_0\right) \\ w_2 & =g\left(w_1\right) \\ w_3 & =f\left(w_2\right)=y \end{aligned}

由链式法则得出:

yx=yw2w2w1w1x=f(w2)w2g(w1)w1h(w0)x\frac{\partial y}{\partial x}=\frac{\partial y}{\partial w_2} \frac{\partial w_2}{\partial w_1} \frac{\partial w_1}{\partial x}=\frac{\partial f\left(w_2\right)}{\partial w_2} \frac{\partial g\left(w_1\right)}{\partial w_1} \frac{\partial h\left(w_0\right)}{\partial x}

通常,存在两种不同的计算微分模式:正向累积和反向累积。

正向累积指定从内到外遍历链式法则(即首先计算 w1/x\partial w_1 / \partial x,然后计算 w2/w1\partial w_2 / \partial w_1,最后计算y/w2 \partial y / \partial w_2 ),而反向累积是从外到内的遍历(首先计算 y/w2\partial y / \partial w_2,然后计算 w2/w1\partial w_2 / \partial w_1,最后计算 w1/x​)\partial w_1 / \partial x​)。更简洁地说,

正向累积计算递归关系:wix=wiwi1wi1x\frac{\partial w_i}{\partial x}=\frac{\partial w_i}{\partial w_{i-1}} \frac{\partial w_{i-1}}{\partial x}w3=yw_3=y

反向累积计算递归关系:ywi=ywi+1wi+1wi\frac{\partial y}{\partial w_i}=\frac{\partial y}{\partial w_{i+1}} \frac{\partial w_{i+1}}{\partial w_i} w0=xw_0=x

正向累积在一次传递中计算函数和导数(但每个仅针对一个独立变量)。相关方法调用期望表达式 Z 相对于变量 V 导出。该方法返回一对已求值的函数及其导数。该方法递归遍历表达式树,直到到达变量。如果请求相对于此变量的导数,则其导数为 1,否则为 0。然后求偏函数以及偏导数。

伪代码:

 tuple<float,float> evaluateAndDerive(Expression Z, Variable V) {
    if isVariable(Z)
       if (Z = V) return {valueOf(Z), 1};
       else return {valueOf(Z), 0};
    else if (Z = A + B)
       {a, a'} = evaluateAndDerive(A, V);
       {b, b'} = evaluateAndDerive(B, V);
       return {a + b, a' + b'};
    else if (Z = A - B)
       {a, a'} = evaluateAndDerive(A, V);
       {b, b'} = evaluateAndDerive(B, V);
       return {a - b, a' - b'};
    else if (Z = A * B)
       {a, a'} = evaluateAndDerive(A, V);
       {b, b'} = evaluateAndDerive(B, V);
       return {a * b, b * a' + a * b'};
 }

Python实现正向累积:

 class ValueAndPartial:
     def __init__(self, value, partial):
         self.value = value
         self.partial = partial
 ​
     def toList(self):
         return [self.value, self.partial]
 ​
 class Expression:
     def __add__(self, other):
         return Plus(self, other)
 ​
     def __mul__(self, other):
         return Multiply(self, other)
 ​
 class Variable(Expression):
     def __init__(self, value):
         self.value = value
 ​
     def evaluateAndDerive(self, variable):
         partial = 1 if self == variable else 0
         return ValueAndPartial(self.value, partial)
 ​
 class Plus(Expression):
     def __init__(self, expressionA, expressionB):
         self.expressionA = expressionA
         self.expressionB = expressionB
 ​
     def evaluateAndDerive(self, variable):
         valueA, partialA = self.expressionA.evaluateAndDerive(variable).toList()
         valueB, partialB = self.expressionB.evaluateAndDerive(variable).toList()
         return ValueAndPartial(valueA + valueB, partialA + partialB)
 ​
 class Multiply(Expression):
     def __init__(self, expressionA, expressionB):
         self.expressionA = expressionA
         self.expressionB = expressionB
 ​
     def evaluateAndDerive(self, variable):
         valueA, partialA = self.expressionA.evaluateAndDerive(variable).toList()
         valueB, partialB = self.expressionB.evaluateAndDerive(variable).toList()
         return ValueAndPartial(valueA * valueB, valueB * partialA + valueA * partialB)
 ​
 # Example: Finding the partials of z = x * (x + y) + y * y at (x, y) = (2, 3)
 x = Variable(2)
 y = Variable(3)
 z = x * (x + y) + y * y
 xPartial = z.evaluateAndDerive(x).partial
 yPartial = z.evaluateAndDerive(y).partial
 print("∂z/∂x =", xPartial)  # Output: ∂z/∂x = 7
 print("∂z/∂y =", yPartial)  # Output: ∂z/∂y = 8

C++实现正向累积:

 #include <iostream>
 struct ValueAndPartial { float value, partial; };
 struct Variable;
 struct Expression {
    virtual ValueAndPartial evaluateAndDerive(Variable *variable) = 0;
 };
 struct Variable: public Expression {
    float value;
    Variable(float value): value(value) {}
    ValueAndPartial evaluateAndDerive(Variable *variable) {
       float partial = (this == variable) ? 1.0f : 0.0f;
       return {value, partial};
    }
 };
 struct Plus: public Expression {
    Expression *a, *b;
    Plus(Expression *a, Expression *b): a(a), b(b) {}
    ValueAndPartial evaluateAndDerive(Variable *variable) {
       auto [valueA, partialA] = a->evaluateAndDerive(variable);
       auto [valueB, partialB] = b->evaluateAndDerive(variable);
       return {valueA + valueB, partialA + partialB};
    }
 };
 struct Multiply: public Expression {
    Expression *a, *b;
    Multiply(Expression *a, Expression *b): a(a), b(b) {}
    ValueAndPartial evaluateAndDerive(Variable *variable) {
       auto [valueA, partialA] = a->evaluateAndDerive(variable);
       auto [valueB, partialB] = b->evaluateAndDerive(variable);
       return {valueA * valueB, valueB * partialA + valueA * partialB};
    }
 };
 int main () {
    // Example: Finding the partials of z = x * (x + y) + y * y at (x, y) = (2, 3)
    Variable x(2), y(3);
    Plus p1(&x, &y); Multiply m1(&x, &p1); Multiply m2(&y, &y); Plus z(&m1, &m2);
    float xPartial = z.evaluateAndDerive(&x).partial;
    float yPartial = z.evaluateAndDerive(&y).partial;
    std::cout << "∂z/∂x = " << xPartial << ", "
              << "∂z/∂y = " << yPartial << std::endl;
    // Output: ∂z/∂x = 7, ∂z/∂y = 8
    return 0;
 }

反向累积需要两次传递:在正向传递中,首先评估函数并缓存部分结果。在反向传递中,计算偏导数并反向传播先前导出的值。相应的方法调用期望表达式 Z 被导出,并以父表达式的导出值为种子。对于顶部表达式 Z 相对于 Z 导出,这是 1。该方法递归遍历表达式树,直到到达变量并将当前种子值添加到导数表达式。

伪代码:

 void derive(Expression Z, float seed) {
    if isVariable(Z)
       partialDerivativeOf(Z) += seed;
    else if (Z = A + B)
       derive(A, seed);
       derive(B, seed);
    else if (Z = A - B)
       derive(A, seed);
       derive(B, -seed);
    else if (Z = A * B)
       derive(A, valueOf(B) * seed);
       derive(B, valueOf(A) * seed);
 }

Python实现反向累积:

 class Expression:
     def __add__(self, other):
         return Plus(self, other)
     def __mul__(self, other):
         return Multiply(self, other)
 ​
 class Variable(Expression):
     def __init__(self, value):
         self.value = value
         self.partial = 0
 ​
     def evaluate(self):
         pass
 ​
     def derive(self, seed):
         self.partial += seed
 ​
 class Plus(Expression):
     def __init__(self, expressionA, expressionB):
         self.expressionA = expressionA
         self.expressionB = expressionB
         self.value = None
 ​
     def evaluate(self):
         self.expressionA.evaluate()
         self.expressionB.evaluate()
         self.value = self.expressionA.value + self.expressionB.value
 ​
     def derive(self, seed):
         self.expressionA.derive(seed)
         self.expressionB.derive(seed)
 ​
 class Multiply(Expression):
     def __init__(self, expressionA, expressionB):
         self.expressionA = expressionA
         self.expressionB = expressionB
         self.value = None
 ​
     def evaluate(self):
         self.expressionA.evaluate()
         self.expressionB.evaluate()
         self.value = self.expressionA.value * self.expressionB.value
 ​
     def derive(self, seed):
         self.expressionA.derive(self.expressionB.value * seed)
         self.expressionB.derive(self.expressionA.value * seed)
 ​
 # Example: Finding the partials of z = x * (x + y) + y * y at (x, y) = (2, 3)
 x = Variable(2)
 y = Variable(3)
 z = x * (x + y) + y * y
 z.evaluate()
 print("z =", z.value)        # Output: z = 19
 z.derive(1)
 print("∂z/∂x =", x.partial)  # Output: ∂z/∂x = 7
 print("∂z/∂y =", y.partial)  # Output: ∂z/∂y = 8

C++实现反向累积:

 #include <iostream>
 struct Expression {
    float value;
    virtual void evaluate() = 0;
    virtual void derive(float seed) = 0;
 };
 struct Variable: public Expression {
    float partial;
    Variable(float _value) {
       value = _value;
       partial = 0;
    }
    void evaluate() {}
    void derive(float seed) {
       partial += seed;
    }
 };
 struct Plus: public Expression {
    Expression *a, *b;
    Plus(Expression *a, Expression *b): a(a), b(b) {}
    void evaluate() {
       a->evaluate();
       b->evaluate();
       value = a->value + b->value;
    }
    void derive(float seed) {
       a->derive(seed);
       b->derive(seed);
    }
 };
 struct Multiply: public Expression {
    Expression *a, *b;
    Multiply(Expression *a, Expression *b): a(a), b(b) {}
    void evaluate() {
       a->evaluate();
       b->evaluate();
       value = a->value * b->value;
    }
    void derive(float seed) {
       a->derive(b->value * seed);
       b->derive(a->value * seed);
    }
 };
 int main () {
    // Example: Finding the partials of z = x * (x + y) + y * y at (x, y) = (2, 3)
    Variable x(2), y(3);
    Plus p1(&x, &y); Multiply m1(&x, &p1); Multiply m2(&y, &y); Plus z(&m1, &m2);
    z.evaluate();
    std::cout << "z = " << z.value << std::endl;
    // Output: z = 19
    z.derive(1);
    std::cout << "∂z/∂x = " << x.partial << ", "
              << "∂z/∂y = " << y.partial << std::endl;
    // Output: ∂z/∂x = 7, ∂z/∂y = 8
    return 0;
 }

Last updated