Skip to content

Commit 666049d

Browse files
author
Kristinn
authored
Added a helper to extract data from JSON structs inside data frames (#54)
1 parent ee3f6f8 commit 666049d

File tree

5 files changed

+73
-8
lines changed

5 files changed

+73
-8
lines changed

jupyter/docker/docker_build/00-import.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""This is an import file that runs on every startup of the Jupyter runtime."""
2+
# flake8: noqa
23

34
import altair as alt
45
import pandas as pd

jupyter/docker/docker_build/jupyter_notebook_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Configuration file for jupyter-notebook.
2+
# flake8: noqa
23

34
## Use a regular expression for the Access-Control-Allow-Origin header
45
#
Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""A simple loading of helper functions."""
2+
# flake8: noqa
23

3-
from . import cache # noqa: F401
4-
from . import client # noqa: F401
5-
from . import date # noqa: F401
6-
from . import hunt # noqa: F401
7-
from . import policies # noqa: F401
8-
from . import query # noqa: F401
9-
from . import query_builder # noqa: F401
4+
from . import cache
5+
from . import client
6+
from . import date
7+
from . import helper
8+
from . import hunt
9+
from . import policies
10+
from . import query
11+
from . import query_builder
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
File that contains data frame helpers for the Lacework notebook environment.
3+
"""
4+
5+
import json
6+
import logging
7+
8+
import pandas as pd
9+
import numpy as np
10+
11+
from laceworkjupyter import manager
12+
13+
14+
logger = logging.getLogger("lacework_sdk.jupyter.feature.helper")
15+
16+
17+
@manager.register_feature
18+
def deep_extract_field(data_frame, column, field_string, ctx=None):
19+
"""
20+
Extract a field from a JSON struct inside a DataFrame.
21+
22+
Usage example:
23+
df['hostname'] = lw.deep_extract_field(
24+
df, 'properties', 'host.hostname')
25+
26+
:param DataFrame data_frame: The data frame to extract from.
27+
:param str column: The name of the column that contains the JSON struct.
28+
:param str field_string: String that contains the field to extract from,
29+
this is a dot delimited string, eg: key.foo.bar, that will extract
30+
a value from {'key': 'foo': {'bar': 'value'}}.
31+
:param obj ctx: The context object.
32+
:return: A pandas Series with the extracted value.
33+
"""
34+
def _extract_function(json_obj, item):
35+
if isinstance(json_obj, str):
36+
try:
37+
json_obj = json.loads(json_obj)
38+
except json.JSONDecodeError:
39+
logger.error("Unable to decode JSON string: %s", json_obj)
40+
return np.nan
41+
42+
if not isinstance(json_obj, dict):
43+
logger.error("Unable to extract, not a dict: %s", type(json_obj))
44+
return np.nan
45+
46+
data = json_obj
47+
for point in item.split("."):
48+
if not isinstance(data, dict):
49+
logger.error(
50+
"Sub-item %s is not a dict (%s)", point, type(data))
51+
return np.nan
52+
53+
data = data.get(point)
54+
return data
55+
56+
if column not in data_frame:
57+
logger.error("Column does not exist in the dataframe.")
58+
return pd.Series()
59+
60+
return data_frame[column].apply(
61+
lambda x: _extract_function(x, field_string))

jupyter/laceworkjupyter/features/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def get_query_definition(table_name, filters):
141141
}
142142

143143

144-
def build_lql_query(query_name, query_dict, join_support=True):
144+
def build_lql_query(query_name, query_dict, join_support=True): # noqa: C901
145145
"""
146146
Build a LQL query and return evaluator ID and the query.
147147

0 commit comments

Comments
 (0)