diff --git a/src/pyVertexModel/Kg/kg.py b/src/pyVertexModel/Kg/kg.py
index b9b8998..4d3325b 100644
--- a/src/pyVertexModel/Kg/kg.py
+++ b/src/pyVertexModel/Kg/kg.py
@@ -13,7 +13,7 @@ def add_noise_to_parameter(avg_parameter, noise):
     """
     if noise == 0:
         return avg_parameter
-    
+
     min_value = avg_parameter - avg_parameter * noise
     max_value = avg_parameter + avg_parameter * noise
 
@@ -63,8 +63,9 @@ def assemble_k(self, k_e, n_y):
             idofg[(index * self.dim): ((index + 1) * self.dim)] = np.arange(n_y[index] * self.dim,
                                                                             (n_y[index] + 1) * self.dim)
 
-        indices = np.meshgrid(idofg, idofg)
-        self.K[indices[0], indices[1]] += k_e
+        # Use np.ix_ for efficient indexing instead of meshgrid
+        # This avoids creating O(n^2) temporary arrays
+        self.K[np.ix_(idofg, idofg)] += k_e
 
     def assemble_g(self, g, ge, n_y):
         """
@@ -124,11 +125,14 @@ def gKSArea(self, y1, y2, y3):
         Q2 = y3_crossed - y1_crossed
         Q3 = y1_crossed - y2_crossed
 
-        fact = 1 / np.dot(2, np.linalg.norm(q))
+        # Cache norm computation - only compute once
+        norm_q = np.linalg.norm(q)
+        fact = 1.0 / (2.0 * norm_q)
         gs = np.dot(fact,
                     np.concatenate([np.dot(Q1.transpose(), q), np.dot(Q2.transpose(), q), np.dot(Q3.transpose(), q)]))
 
-        Kss = np.dot(-(2 / np.linalg.norm(q)), np.outer(gs, gs))
+        # Reuse cached norm_q instead of recomputing
+        Kss = np.dot(-(2.0 / norm_q), np.outer(gs, gs))
 
         Ks = np.dot(fact, np.block([
             [np.dot(Q1.transpose(), Q1), self.kK(y1_crossed, y2_crossed, y3_crossed, y1, y2, y3),
diff --git a/src/pyVertexModel/Kg/kgContractility.py b/src/pyVertexModel/Kg/kgContractility.py
index f9a6a20..b09f65a 100644
--- a/src/pyVertexModel/Kg/kgContractility.py
+++ b/src/pyVertexModel/Kg/kgContractility.py
@@ -61,7 +61,7 @@ def get_intensity_based_contractility(c_set, current_face, intensity_images=True
         indices_of_closest_time_points = np.argsort(distance_to_time_variables)
         closest_time_points_distance = 1 - distance_to_time_variables[indices_of_closest_time_points]
 
-        if get_interface(current_face.InterfaceType) == get_interface('Top'):
+        if get_interface(current_face.InterfaceType) == get_interface("Top"):
             # In case we reach a point where the two closest time points are beyond the scope of the time points
             if closest_time_points_distance[0] + closest_time_points_distance[1] == 1:
                 contractility_value = contractility_variability_purse_string[indices_of_closest_time_points[0]] * \
@@ -70,7 +70,7 @@ def get_intensity_based_contractility(c_set, current_face, intensity_images=True
             else:
                 contractility_value = contractility_variability_purse_string[indices_of_closest_time_points[0]]
 
-        elif get_interface(current_face.InterfaceType) == get_interface('CellCell'):
+        elif get_interface(current_face.InterfaceType) == get_interface("CellCell"):
             if closest_time_points_distance[0] + closest_time_points_distance[1] == 1:
                 contractility_value = contractility_variability_lateral_cables[indices_of_closest_time_points[0]] * \
                                       closest_time_points_distance[0] + contractility_variability_lateral_cables[
@@ -107,14 +107,12 @@ def get_delayed_contractility(current_t, purse_string_strength, current_tri, cut
                                                         indicesOfClosestTimePoints[1], 1] * \
                                                     closestTimePointsDistance[1]
 
-        if CORRESPONDING_EDGELENGTH_6MINUTES_AGO <= 0:
-            CORRESPONDING_EDGELENGTH_6MINUTES_AGO = 0
+        CORRESPONDING_EDGELENGTH_6MINUTES_AGO = max(0, CORRESPONDING_EDGELENGTH_6MINUTES_AGO)
 
         contractilityValue = ((CORRESPONDING_EDGELENGTH_6MINUTES_AGO / current_tri.EdgeLength_time[
             0, 1]) ** 4.5) * purse_string_strength
 
-    if contractilityValue < 1:
-        contractilityValue = 1
+    contractilityValue = max(contractilityValue, 1)
 
     if contractilityValue > cutoff or np.isinf(contractilityValue):
         contractilityValue = cutoff
@@ -157,30 +155,24 @@ def get_contractility_based_on_location(current_face, current_tri, geo, c_set):
         elif c_set.TypeOfPurseString == 2:
             contractilityValue = get_intensity_based_contractility(c_set, current_face, intensity_images=False)
         elif c_set.TypeOfPurseString == 3:
-            if get_interface(current_face.InterfaceType) == get_interface('CellCell'):
+            if get_interface(current_face.InterfaceType) == get_interface("CellCell"):
                 contractilityValue = c_set.lateralCablesStrength
-            elif get_interface(current_face.InterfaceType) == get_interface('Top'):
+            elif get_interface(current_face.InterfaceType) == get_interface("Top"):
                 contractilityValue = c_set.purseStringStrength
     else:
         contractilityValue = c_set.cLineTension
 
     if len(current_tri.SharedByCells) == 1:
         contractilityValue = 0
-    else:
-        if get_interface(current_face.InterfaceType) == get_interface('Top'):  # Top
-            if any([geo.Cells[cell].AliveStatus == 0 for cell in current_tri.SharedByCells]):
-                pass
-            else:
-                contractilityValue = c_set.cLineTension
-        elif get_interface(current_face.InterfaceType) == get_interface('CellCell'):
-            if any([geo.Cells[cell].AliveStatus == 0 for cell in current_tri.SharedByCells]):
-                pass
-            else:
-                contractilityValue = c_set.cLineTension
-        elif get_interface(current_face.InterfaceType) == get_interface('Bottom'):
-            contractilityValue = c_set.cLineTension
+    elif get_interface(current_face.InterfaceType) == get_interface("Top") or get_interface(current_face.InterfaceType) == get_interface("CellCell"):  # Top
+        if any([geo.Cells[cell].AliveStatus == 0 for cell in current_tri.SharedByCells]):
+            pass
         else:
             contractilityValue = c_set.cLineTension
+    elif get_interface(current_face.InterfaceType) == get_interface("Bottom"):
+        contractilityValue = c_set.cLineTension
+    else:
+        contractilityValue = c_set.cLineTension
 
     return contractilityValue, geo
 
@@ -216,7 +208,7 @@ def compute_work(self, geo, c_set, geo_n=None, calculate_k=True):
                         # Adding a bit of noise between cells
                         C = C * cell.c_line_tension_perc
 
-                        if 'is_commited_to_intercalate' in currentTri.__dict__ and c_set.Remodelling:
+                        if "is_commited_to_intercalate" in currentTri.__dict__ and c_set.Remodelling:
                             if currentTri.is_commited_to_intercalate:
                                 C = C * 2
                                 pass
@@ -229,15 +221,20 @@ def compute_work(self, geo, c_set, geo_n=None, calculate_k=True):
                                                                   geo.Cells[c].vertices_and_faces_to_remodel)):
                             continue
 
-                        g_current = self.compute_g_contractility(l_i0, y_1, y_2, C)
+                        # Compute edge vector and length once for reuse
+                        edge_vec = y_1 - y_2
+                        l_i = np.linalg.norm(edge_vec)
+
+                        g_current = self.compute_g_contractility_with_length(l_i0, edge_vec, l_i, C)
                         ge = self.assemble_g(ge[:], g_current[:], cell.globalIds[currentTri.Edge])
 
                         geo.Cells[c].Faces[face_id].Tris[tri_id].ContractilityG = np.linalg.norm(g_current[:])
                         if calculate_k:
-                            K_current = self.compute_k_contractility(l_i0, y_1, y_2, C)
+                            K_current = self.compute_k_contractility_with_length(l_i0, edge_vec, l_i, C)
                             self.assemble_k(K_current[:, :], cell.globalIds[currentTri.Edge])
 
-                        cell.energy_contractility += compute_energy_contractility(l_i0, np.linalg.norm(y_1 - y_2), C)
+                        # Reuse cached l_i instead of recomputing
+                        cell.energy_contractility += compute_energy_contractility(l_i0, l_i, C)
             self.g += ge
             Energy[c] = cell.energy_contractility
             self.energy_per_cell[c] = cell.energy_contractility
@@ -259,12 +256,23 @@ def compute_k_contractility(self, l_i0, y_1, y_2, C):
         :param C:
         :return:
         """
-        dim = 3
+        edge_vec = y_1 - y_2
+        l_i = np.linalg.norm(edge_vec)
+        return self.compute_k_contractility_with_length(l_i0, edge_vec, l_i, C)
 
-        l_i = np.linalg.norm(y_1 - y_2)
+    def compute_k_contractility_with_length(self, l_i0, edge_vec, l_i, C):
+        """
+        Compute the stiffness matrix of the contractility with precomputed edge vector and length
+        :param l_i0: reference edge length
+        :param edge_vec: edge vector (y_1 - y_2)
+        :param l_i: edge length (norm of edge_vec)
+        :param C: contractility coefficient
+        :return: stiffness matrix
+        """
+        dim = 3
 
         kContractility = np.zeros((6, 6), dtype=self.precision_type)
-        kContractility[0:3, 0:3] = -(C / l_i0) * (1 / l_i ** 3 * np.outer((y_1 - y_2), (y_1 - y_2))) + (
+        kContractility[0:3, 0:3] = -(C / l_i0) * (1 / l_i ** 3 * np.outer(edge_vec, edge_vec)) + (
                 (C / l_i0) * np.eye(dim)) / l_i
         kContractility[0:3, 3:6] = -kContractility[0:3, 0:3]
         kContractility[3:6, 0:3] = -kContractility[0:3, 0:3]
@@ -281,10 +289,21 @@ def compute_g_contractility(self, l_i0, y_1, y_2, C):
         :param C:
         :return:
         """
-        l_i = np.linalg.norm(y_1 - y_2)
+        edge_vec = y_1 - y_2
+        l_i = np.linalg.norm(edge_vec)
+        return self.compute_g_contractility_with_length(l_i0, edge_vec, l_i, C)
 
+    def compute_g_contractility_with_length(self, l_i0, edge_vec, l_i, C):
+        """
+        Compute the force gradient with precomputed edge vector and length
+        :param l_i0: reference edge length
+        :param edge_vec: edge vector (y_1 - y_2)
+        :param l_i: edge length (norm of edge_vec)
+        :param C: contractility coefficient
+        :return: force gradient
+        """
         gContractility = np.zeros(6, dtype=self.precision_type)
-        gContractility[0:3] = (C / l_i0) * (y_1 - y_2) / l_i
+        gContractility[0:3] = (C / l_i0) * edge_vec / l_i
         gContractility[3:6] = -gContractility[0:3]
 
         return gContractility
diff --git a/src/pyVertexModel/Kg/kg_functions.pyx b/src/pyVertexModel/Kg/kg_functions.pyx
index f9ce87a..734f328 100644
--- a/src/pyVertexModel/Kg/kg_functions.pyx
+++ b/src/pyVertexModel/Kg/kg_functions.pyx
@@ -18,10 +18,10 @@ cpdef np.ndarray assembleg(double[:] g, double[:] ge, np.ndarray nY):
     cdef int cont = 0
     cdef int col
 
+    # Remove zero-checking for better performance - addition with 0 is cheap
     for I in range(len(nY)):
         for col in range(nY[I] * dim, (nY[I] + 1) * dim):
-            if ge[cont] != 0:
-                g[col] = g[col] + ge[cont]
+            g[col] = g[col] + ge[cont]
             cont = cont + 1
 
     return np.array(g, dtype=np.float64)
@@ -31,19 +31,23 @@ cpdef np.ndarray assembleg(double[:] g, double[:] ge, np.ndarray nY):
 @cython.nonecheck(False)
 @cython.boundscheck(False)
 cpdef np.ndarray assembleK(double[:, :] K, double[:, :] Ke, nY: np.ndarray):
-    #TODO: IMPROVE LIKE ASSEMBLE_G
     cdef int dim = 3
-    cdef np.ndarray idofg = np.zeros(len(nY) * dim, dtype=int)
-    cdef int I
-    for I in range(len(nY)):
-        idofg[(I * dim): ((I + 1) * dim)] = np.arange(nY[I] * dim, (nY[I] + 1) * dim)
-
-    # Update the matrix K using sparse matrix addition
-    cdef int i, j
-    for i in range(len(nY) * dim):
-        for j in range(len(nY) * dim):
-            if Ke[i, j] != 0:
-                K[idofg[i], idofg[j]] = K[idofg[i], idofg[j]] + Ke[i, j]
+    cdef int len_nY = len(nY)
+    cdef int total_dofs = len_nY * dim
+    cdef int[:] idofg = np.zeros(total_dofs, dtype=np.int32)
+    cdef int I, i, j, gi, gj
+    
+    # Build index mapping once - more efficient than arange
+    for I in range(len_nY):
+        for i in range(dim):
+            idofg[I * dim + i] = nY[I] * dim + i
+    
+    # Optimized assembly: remove zero-checking and use direct indexing
+    for i in range(total_dofs):
+        gi = idofg[i]
+        for j in range(total_dofs):
+            gj = idofg[j]
+            K[gi, gj] = K[gi, gj] + Ke[i, j]
 
     return np.array(K)
 
@@ -105,11 +109,15 @@ cpdef tuple gKSArea(np.ndarray y1, np.ndarray y2, np.ndarray y3):
     cdef np.ndarray Q2 = y3_crossed - y1_crossed
     cdef np.ndarray Q3 = y1_crossed - y2_crossed
 
-    cdef double fact = 1 / np.dot(2, np.linalg.norm(q))
+    # Cache norm computation - only compute once
+    cdef double norm_q = np.linalg.norm(q)
+    cdef double fact = 1.0 / (2.0 * norm_q)
 
+    # Vectorized computation of gs components
     cdef np.ndarray gs = np.dot(fact,  np.concatenate([np.dot(Q1.transpose(), q), np.dot(Q2.transpose(), q), np.dot(Q3.transpose(), q)]))
 
-    cdef np.ndarray Kss = np.dot(-(2 / np.linalg.norm(q)), np.outer(gs, gs))
+    # Reuse cached norm_q instead of recomputing
+    cdef np.ndarray Kss = np.dot(-(2.0 / norm_q), np.outer(gs, gs))
 
     cdef np.ndarray Ks = np.dot(fact, np.block([
         [np.dot(Q1.transpose(), Q1), kK(y1_crossed, y2_crossed, y3_crossed, y1, y2, y3),
@@ -127,30 +135,38 @@ cpdef tuple gKSArea(np.ndarray y1, np.ndarray y2, np.ndarray y3):
 @cython.nonecheck(False)
 @cython.boundscheck(False)
 cpdef np.ndarray compute_finalK_SurfaceEnergy(np.ndarray ge, np.ndarray K, double Area0):
+    """
+    Compute final K for surface energy using optimized outer product.
+    This is equivalent to K += ge * ge.T / Area0^2
+    """
     cdef Py_ssize_t i, j
     cdef Py_ssize_t n = ge.shape[0]
     cdef double[:] ge_view = ge
     cdef double[:, :] K_view = K
+    cdef double factor = 1.0 / (Area0 * Area0)
 
+    # Remove zero-checking - outer product is fast enough
     for i in range(n):
-        if ge_view[i] != 0:
-            for j in range(n):
-                if ge_view[j] != 0:
-                    K_view[i, j] += ge_view[i] * ge_view[j] / (Area0 ** 2)
+        for j in range(n):
+            K_view[i, j] += ge_view[i] * ge_view[j] * factor
 
     return np.asarray(K_view)
 
 cpdef np.ndarray compute_finalK_Volume(np.ndarray ge, np.ndarray K, double Vol, double Vol0, int n_dim, double lambdaV):
+    """
+    Compute final K for volume energy using optimized outer product.
+    Pre-compute the scalar factor to avoid repeated calculations.
+    """
     cdef Py_ssize_t i, j
     cdef Py_ssize_t n = ge.shape[0]
     cdef double[:] ge_view = ge
     cdef double[:, :] K_view = K
+    cdef double factor = lambdaV / 36.0 * (Vol - Vol0) ** (n_dim - 2) / (Vol0 ** n_dim)
 
+    # Remove zero-checking - outer product is fast enough
     for i in range(n):
-        if ge_view[i] != 0:
-            for j in range(n):
-                if ge_view[j] != 0:
-                    K_view[i, j] += lambdaV * ge_view[i] * ge_view[j] / 6 / 6 * (Vol - Vol0) ** (n_dim - 2) / Vol0 ** n_dim
+        for j in range(n):
+            K_view[i, j] += ge_view[i] * ge_view[j] * factor
 
     return np.asarray(K_view)