diff --git a/functions/src/auto_trainer/auto_trainer.py b/functions/src/auto_trainer/auto_trainer.py index 7b4764700..4e53e5b7e 100755 --- a/functions/src/auto_trainer/auto_trainer.py +++ b/functions/src/auto_trainer/auto_trainer.py @@ -67,30 +67,14 @@ def _get_dataframe( Classification tasks. :param drop_columns: str/int or a list of strings/ints that represent the column names/indices to drop. """ - store_uri_prefix, _ = mlrun.datastore.parse_store_uri(dataset.artifact_url) - - # Getting the dataset: - if mlrun.utils.StorePrefix.FeatureVector == store_uri_prefix: - label_columns = label_columns or dataset.meta.status.label_column - context.logger.info(f"label columns: {label_columns}") - # FeatureVector case: - try: - fv = mlrun.datastore.get_store_resource(dataset.artifact_url) - dataset = fv.get_offline_features(drop_columns=drop_columns).to_dataframe() - except AttributeError: - # Leave here for backwards compatibility - dataset = fs.get_offline_features( - dataset.meta.uri, drop_columns=drop_columns - ).to_dataframe() - - elif not label_columns: - context.logger.info( - "label_columns not provided, mandatory when dataset is not a FeatureVector" - ) - raise ValueError - - elif isinstance(dataset, (list, dict)): + # Check if dataset is list/dict first (before trying to access artifact_url) + if isinstance(dataset, (list, dict)): # list/dict case: + if not label_columns: + context.logger.info( + "label_columns not provided, mandatory when dataset is not a FeatureVector" + ) + raise ValueError dataset = pd.DataFrame(dataset) # Checking if drop_columns provided by integer type: if drop_columns: @@ -103,17 +87,38 @@ def _get_dataframe( ) raise ValueError dataset.drop(drop_columns, axis=1, inplace=True) - else: - # simple URL case: - dataset = dataset.as_df() - if drop_columns: - if all(col in dataset for col in drop_columns): - dataset = dataset.drop(drop_columns, axis=1) - else: + # Dataset is a DataItem with artifact_url (URI or FeatureVector) + store_uri_prefix, _ = mlrun.datastore.parse_store_uri(dataset.artifact_url) + + # Getting the dataset: + if mlrun.utils.StorePrefix.FeatureVector == store_uri_prefix: + label_columns = label_columns or dataset.meta.status.label_column + context.logger.info(f"label columns: {label_columns}") + # FeatureVector case: + try: + fv = mlrun.datastore.get_store_resource(dataset.artifact_url) + dataset = fv.get_offline_features(drop_columns=drop_columns).to_dataframe() + except AttributeError: + # Leave here for backwards compatibility + dataset = fs.get_offline_features( + dataset.meta.uri, drop_columns=drop_columns + ).to_dataframe() + else: + # simple URL case: + if not label_columns: context.logger.info( - "not all of the columns to drop in the dataset, drop columns process skipped" + "label_columns not provided, mandatory when dataset is not a FeatureVector" ) + raise ValueError + dataset = dataset.as_df() + if drop_columns: + if all(col in dataset for col in drop_columns): + dataset = dataset.drop(drop_columns, axis=1) + else: + context.logger.info( + "not all of the columns to drop in the dataset, drop columns process skipped" + ) return dataset, label_columns diff --git a/functions/src/auto_trainer/function.yaml b/functions/src/auto_trainer/function.yaml index 0920b1033..50a36e750 100644 --- a/functions/src/auto_trainer/function.yaml +++ b/functions/src/auto_trainer/function.yaml @@ -1,22 +1,19 @@ -metadata: - categories: - - machine-learning - - model-training - tag: '' - name: auto-trainer +verbose: false +kind: job spec: - image: mlrun/mlrun - build: - origin_filename: '' - functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKZnJvbSBwYXRobGliIGltcG9ydCBQYXRoCmZyb20gdHlwaW5nIGltcG9ydCBBbnksIERpY3QsIExpc3QsIE9wdGlvbmFsLCBUdXBsZSwgVW5pb24KCmltcG9ydCBtbHJ1bgppbXBvcnQgbWxydW4uZGF0YXN0b3JlCmltcG9ydCBtbHJ1bi51dGlscwppbXBvcnQgcGFuZGFzIGFzIHBkCmZyb20gbWxydW4gaW1wb3J0IGZlYXR1cmVfc3RvcmUgYXMgZnMKZnJvbSBtbHJ1bi5kYXRhc3RvcmUgaW1wb3J0IERhdGFJdGVtCmZyb20gbWxydW4uZXhlY3V0aW9uIGltcG9ydCBNTENsaWVudEN0eApmcm9tIG1scnVuLmZyYW1ld29ya3MuYXV0b19tbHJ1biBpbXBvcnQgQXV0b01MUnVuCmZyb20gbWxydW4udXRpbHMuaGVscGVycyBpbXBvcnQgY3JlYXRlX2NsYXNzLCBjcmVhdGVfZnVuY3Rpb24KZnJvbSBza2xlYXJuLm1vZGVsX3NlbGVjdGlvbiBpbXBvcnQgdHJhaW5fdGVzdF9zcGxpdAoKUGF0aFR5cGUgPSBVbmlvbltzdHIsIFBhdGhdCgoKY2xhc3MgS1dBcmdzUHJlZml4ZXM6CiAgICBNT0RFTF9DTEFTUyA9ICJDTEFTU18iCiAgICBGSVQgPSAiRklUXyIKICAgIFRSQUlOID0gIlRSQUlOXyIKCgpkZWYgX2dldF9zdWJfZGljdF9ieV9wcmVmaXgoc3JjOiBEaWN0LCBwcmVmaXhfa2V5OiBzdHIpIC0+IERpY3Rbc3RyLCBBbnldOgogICAgIiIiCiAgICBDb2xsZWN0IGFsbCB0aGUga2V5cyBmcm9tIHRoZSBnaXZlbiBkaWN0IHRoYXQgc3RhcnRzIHdpdGggdGhlIGdpdmVuIHByZWZpeCBhbmQgY3JlYXRlcyBhIG5ldyBkaWN0aW9uYXJ5IHdpdGggdGhlc2UKICAgIGtleXMuCgogICAgOnBhcmFtIHNyYzogICAgICAgICBUaGUgc291cmNlIGRpY3QgdG8gZXh0cmFjdCB0aGUgdmFsdWVzIGZyb20uCiAgICA6cGFyYW0gcHJlZml4X2tleTogIE9ubHkga2V5cyB3aXRoIHRoaXMgcHJlZml4IHdpbGwgYmUgcmV0dXJuZWQuIFRoZSBrZXlzIGluIHRoZSByZXN1bHQgZGljdCB3aWxsIGJlIHdpdGhvdXQgdGhpcwogICAgICAgICAgICAgICAgICAgICAgICBwcmVmaXguCiAgICAiIiIKICAgIHJldHVybiB7CiAgICAgICAga2V5LnJlcGxhY2UocHJlZml4X2tleSwgIiIpOiB2YWwKICAgICAgICBmb3Iga2V5LCB2YWwgaW4gc3JjLml0ZW1zKCkKICAgICAgICBpZiBrZXkuc3RhcnRzd2l0aChwcmVmaXhfa2V5KQogICAgfQoKCmRlZiBfZ2V0X2RhdGFmcmFtZSgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGF0YXNldDogRGF0YUl0ZW0sCiAgICBsYWJlbF9jb2x1bW5zOiBPcHRpb25hbFtVbmlvbltzdHIsIExpc3Rbc3RyXV1dID0gTm9uZSwKICAgIGRyb3BfY29sdW1uczogVW5pb25bc3RyLCBMaXN0W3N0cl0sIGludCwgTGlzdFtpbnRdXSA9IE5vbmUsCikgLT4gVHVwbGVbcGQuRGF0YUZyYW1lLCBPcHRpb25hbFtVbmlvbltzdHIsIExpc3Rbc3RyXV1dXToKICAgICIiIgogICAgR2V0dGluZyB0aGUgRGF0YUZyYW1lIG9mIHRoZSBkYXRhc2V0IGFuZCBkcm9wIHRoZSBjb2x1bW5zIGFjY29yZGluZ2x5LgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgIE1MUnVuIGNvbnRleHQuCiAgICA6cGFyYW0gZGF0YXNldDogICAgICAgICBUaGUgZGF0YXNldCB0byB0cmFpbiB0aGUgbW9kZWwgb24uCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBDYW4gYmUgZWl0aGVyIGEgbGlzdCBvZiBsaXN0cywgZGljdCwgVVJJIG9yIGEgRmVhdHVyZVZlY3Rvci4KICAgIDpwYXJhbSBsYWJlbF9jb2x1bW5zOiAgIFRoZSB0YXJnZXQgbGFiZWwocykgb2YgdGhlIGNvbHVtbihzKSBpbiB0aGUgZGF0YXNldC4gZm9yIFJlZ3Jlc3Npb24gb3IKICAgICAgICAgICAgICAgICAgICAgICAgICAgIENsYXNzaWZpY2F0aW9uIHRhc2tzLgogICAgOnBhcmFtIGRyb3BfY29sdW1uczogICAgc3RyL2ludCBvciBhIGxpc3Qgb2Ygc3RyaW5ncy9pbnRzIHRoYXQgcmVwcmVzZW50IHRoZSBjb2x1bW4gbmFtZXMvaW5kaWNlcyB0byBkcm9wLgogICAgIiIiCiAgICBzdG9yZV91cmlfcHJlZml4LCBfID0gbWxydW4uZGF0YXN0b3JlLnBhcnNlX3N0b3JlX3VyaShkYXRhc2V0LmFydGlmYWN0X3VybCkKCiAgICAjIEdldHRpbmcgdGhlIGRhdGFzZXQ6CiAgICBpZiBtbHJ1bi51dGlscy5TdG9yZVByZWZpeC5GZWF0dXJlVmVjdG9yID09IHN0b3JlX3VyaV9wcmVmaXg6CiAgICAgICAgbGFiZWxfY29sdW1ucyA9IGxhYmVsX2NvbHVtbnMgb3IgZGF0YXNldC5tZXRhLnN0YXR1cy5sYWJlbF9jb2x1bW4KICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYibGFiZWwgY29sdW1uczoge2xhYmVsX2NvbHVtbnN9IikKICAgICAgICAjIEZlYXR1cmVWZWN0b3IgY2FzZToKICAgICAgICB0cnk6CiAgICAgICAgICAgIGZ2ID0gbWxydW4uZGF0YXN0b3JlLmdldF9zdG9yZV9yZXNvdXJjZShkYXRhc2V0LmFydGlmYWN0X3VybCkKICAgICAgICAgICAgZGF0YXNldCA9IGZ2LmdldF9vZmZsaW5lX2ZlYXR1cmVzKGRyb3BfY29sdW1ucz1kcm9wX2NvbHVtbnMpLnRvX2RhdGFmcmFtZSgpCiAgICAgICAgZXhjZXB0IEF0dHJpYnV0ZUVycm9yOgogICAgICAgICAgICAjIExlYXZlIGhlcmUgZm9yIGJhY2t3YXJkcyBjb21wYXRpYmlsaXR5CiAgICAgICAgICAgIGRhdGFzZXQgPSBmcy5nZXRfb2ZmbGluZV9mZWF0dXJlcygKICAgICAgICAgICAgICAgIGRhdGFzZXQubWV0YS51cmksIGRyb3BfY29sdW1ucz1kcm9wX2NvbHVtbnMKICAgICAgICAgICAgKS50b19kYXRhZnJhbWUoKQoKICAgIGVsaWYgbm90IGxhYmVsX2NvbHVtbnM6CiAgICAgICAgY29udGV4dC5sb2dnZXIuaW5mbygKICAgICAgICAgICAgImxhYmVsX2NvbHVtbnMgbm90IHByb3ZpZGVkLCBtYW5kYXRvcnkgd2hlbiBkYXRhc2V0IGlzIG5vdCBhIEZlYXR1cmVWZWN0b3IiCiAgICAgICAgKQogICAgICAgIHJhaXNlIFZhbHVlRXJyb3IKCiAgICBlbGlmIGlzaW5zdGFuY2UoZGF0YXNldCwgKGxpc3QsIGRpY3QpKToKICAgICAgICAjIGxpc3QvZGljdCBjYXNlOgogICAgICAgIGRhdGFzZXQgPSBwZC5EYXRhRnJhbWUoZGF0YXNldCkKICAgICAgICAjIENoZWNraW5nIGlmIGRyb3BfY29sdW1ucyBwcm92aWRlZCBieSBpbnRlZ2VyIHR5cGU6CiAgICAgICAgaWYgZHJvcF9jb2x1bW5zOgogICAgICAgICAgICBpZiBpc2luc3RhbmNlKGRyb3BfY29sdW1ucywgc3RyKSBvciAoCiAgICAgICAgICAgICAgICBpc2luc3RhbmNlKGRyb3BfY29sdW1ucywgbGlzdCkKICAgICAgICAgICAgICAgIGFuZCBhbnkoaXNpbnN0YW5jZShjb2wsIHN0cikgZm9yIGNvbCBpbiBkcm9wX2NvbHVtbnMpCiAgICAgICAgICAgICk6CiAgICAgICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5lcnJvcigKICAgICAgICAgICAgICAgICAgICAiZHJvcF9jb2x1bW5zIG11c3QgYmUgYW4gaW50ZWdlci9saXN0IG9mIGludGVnZXJzIGlmIG5vdCBwcm92aWRlZCB3aXRoIGEgVVJJL0ZlYXR1cmVWZWN0b3IgZGF0YXNldCIKICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgICAgIHJhaXNlIFZhbHVlRXJyb3IKICAgICAgICAgICAgZGF0YXNldC5kcm9wKGRyb3BfY29sdW1ucywgYXhpcz0xLCBpbnBsYWNlPVRydWUpCgogICAgZWxzZToKICAgICAgICAjIHNpbXBsZSBVUkwgY2FzZToKICAgICAgICBkYXRhc2V0ID0gZGF0YXNldC5hc19kZigpCiAgICAgICAgaWYgZHJvcF9jb2x1bW5zOgogICAgICAgICAgICBpZiBhbGwoY29sIGluIGRhdGFzZXQgZm9yIGNvbCBpbiBkcm9wX2NvbHVtbnMpOgogICAgICAgICAgICAgICAgZGF0YXNldCA9IGRhdGFzZXQuZHJvcChkcm9wX2NvbHVtbnMsIGF4aXM9MSkKICAgICAgICAgICAgZWxzZToKICAgICAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmluZm8oCiAgICAgICAgICAgICAgICAgICAgIm5vdCBhbGwgb2YgdGhlIGNvbHVtbnMgdG8gZHJvcCBpbiB0aGUgZGF0YXNldCwgZHJvcCBjb2x1bW5zIHByb2Nlc3Mgc2tpcHBlZCIKICAgICAgICAgICAgICAgICkKCiAgICByZXR1cm4gZGF0YXNldCwgbGFiZWxfY29sdW1ucwoKCmRlZiB0cmFpbigKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGF0YXNldDogRGF0YUl0ZW0sCiAgICBtb2RlbF9jbGFzczogc3RyLAogICAgbGFiZWxfY29sdW1uczogT3B0aW9uYWxbVW5pb25bc3RyLCBMaXN0W3N0cl1dXSA9IE5vbmUsCiAgICBkcm9wX2NvbHVtbnM6IExpc3Rbc3RyXSA9IE5vbmUsCiAgICBtb2RlbF9uYW1lOiBzdHIgPSAibW9kZWwiLAogICAgdGFnOiBzdHIgPSAiIiwKICAgIHNhbXBsZV9zZXQ6IERhdGFJdGVtID0gTm9uZSwKICAgIHRlc3Rfc2V0OiBEYXRhSXRlbSA9IE5vbmUsCiAgICB0cmFpbl90ZXN0X3NwbGl0X3NpemU6IGZsb2F0ID0gTm9uZSwKICAgIHJhbmRvbV9zdGF0ZTogaW50ID0gTm9uZSwKICAgIGxhYmVsczogZGljdCA9IE5vbmUsCiAgICAqKmt3YXJncywKKToKICAgICIiIgogICAgVHJhaW5pbmcgYSBtb2RlbCB3aXRoIHRoZSBnaXZlbiBkYXRhc2V0LgoKICAgIGV4YW1wbGU6OgoKICAgICAgICBpbXBvcnQgbWxydW4KICAgICAgICBwcm9qZWN0ID0gbWxydW4uZ2V0X29yX2NyZWF0ZV9wcm9qZWN0KCJteS1wcm9qZWN0IikKICAgICAgICBwcm9qZWN0LnNldF9mdW5jdGlvbigiaHViOi8vYXV0b190cmFpbmVyIiwgInRyYWluIikKICAgICAgICB0cmFpbmVyX3J1biA9IHByb2plY3QucnVuKAogICAgICAgICAgICBuYW1lPSJ0cmFpbiIsCiAgICAgICAgICAgIGhhbmRsZXI9InRyYWluIiwKICAgICAgICAgICAgaW5wdXRzPXsiZGF0YXNldCI6ICIuL3BhdGgvdG8vZGF0YXNldC5jc3YifSwKICAgICAgICAgICAgcGFyYW1zPXsKICAgICAgICAgICAgICAgICJtb2RlbF9jbGFzcyI6ICJza2xlYXJuLmxpbmVhcl9tb2RlbC5Mb2dpc3RpY1JlZ3Jlc3Npb24iLAogICAgICAgICAgICAgICAgImxhYmVsX2NvbHVtbnMiOiAibGFiZWwiLAogICAgICAgICAgICAgICAgImRyb3BfY29sdW1ucyI6ICJpZCIsCiAgICAgICAgICAgICAgICAibW9kZWxfbmFtZSI6ICJteS1tb2RlbCIsCiAgICAgICAgICAgICAgICAidGFnIjogInYxLjAuMCIsCiAgICAgICAgICAgICAgICAic2FtcGxlX3NldCI6ICIuL3BhdGgvdG8vc2FtcGxlX3NldC5jc3YiLAogICAgICAgICAgICAgICAgInRlc3Rfc2V0IjogIi4vcGF0aC90by90ZXN0X3NldC5jc3YiLAogICAgICAgICAgICAgICAgIkNMQVNTX3NvbHZlciI6ICJsaWJsaW5lYXIiLAogICAgICAgICAgICB9LAogICAgICAgICkKCiAgICA6cGFyYW0gY29udGV4dDogICAgICAgICAgICAgICAgIE1MUnVuIGNvbnRleHQKICAgIDpwYXJhbSBkYXRhc2V0OiAgICAgICAgICAgICAgICAgVGhlIGRhdGFzZXQgdG8gdHJhaW4gdGhlIG1vZGVsIG9uLiBDYW4gYmUgZWl0aGVyIGEgVVJJIG9yIGEgRmVhdHVyZVZlY3RvcgogICAgOnBhcmFtIG1vZGVsX2NsYXNzOiAgICAgICAgICAgICBUaGUgY2xhc3Mgb2YgdGhlIG1vZGVsLCBlLmcuIGBza2xlYXJuLmxpbmVhcl9tb2RlbC5Mb2dpc3RpY1JlZ3Jlc3Npb25gCiAgICA6cGFyYW0gbGFiZWxfY29sdW1uczogICAgICAgICAgIFRoZSB0YXJnZXQgbGFiZWwocykgb2YgdGhlIGNvbHVtbihzKSBpbiB0aGUgZGF0YXNldC4gZm9yIFJlZ3Jlc3Npb24gb3IKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgQ2xhc3NpZmljYXRpb24gdGFza3MuIE1hbmRhdG9yeSB3aGVuIGRhdGFzZXQgaXMgbm90IGEgRmVhdHVyZVZlY3Rvci4KICAgIDpwYXJhbSBkcm9wX2NvbHVtbnM6ICAgICAgICAgICAgc3RyIG9yIGEgbGlzdCBvZiBzdHJpbmdzIHRoYXQgcmVwcmVzZW50IHRoZSBjb2x1bW5zIHRvIGRyb3AKICAgIDpwYXJhbSBtb2RlbF9uYW1lOiAgICAgICAgICAgICAgVGhlIG1vZGVsJ3MgbmFtZSB0byB1c2UgZm9yIHN0b3JpbmcgdGhlIG1vZGVsIGFydGlmYWN0LCBkZWZhdWx0IHRvICdtb2RlbCcKICAgIDpwYXJhbSB0YWc6ICAgICAgICAgICAgICAgICAgICAgVGhlIG1vZGVsJ3MgdGFnIHRvIGxvZyB3aXRoCiAgICA6cGFyYW0gc2FtcGxlX3NldDogICAgICAgICAgICAgIEEgc2FtcGxlIHNldCBvZiBpbnB1dHMgZm9yIHRoZSBtb2RlbCBmb3IgbG9nZ2luZyBpdHMgc3RhdHMgYWxvbmcgdGhlIG1vZGVsIGluIGZhdm91cgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBvZiBtb2RlbCBtb25pdG9yaW5nLiBDYW4gYmUgZWl0aGVyIGEgVVJJIG9yIGEgRmVhdHVyZVZlY3RvcgogICAgOnBhcmFtIHRlc3Rfc2V0OiAgICAgICAgICAgICAgICBUaGUgdGVzdCBzZXQgdG8gdHJhaW4gdGhlIG1vZGVsIHdpdGguCiAgICA6cGFyYW0gdHJhaW5fdGVzdF9zcGxpdF9zaXplOiAgIGlmIHRlc3Rfc2V0IHdhcyBwcm92aWRlZCB0aGVuIHRoaXMgYXJndW1lbnQgaXMgaWdub3JlZC4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgU2hvdWxkIGJlIGJldHdlZW4gMC4wIGFuZCAxLjAgYW5kIHJlcHJlc2VudCB0aGUgcHJvcG9ydGlvbiBvZiB0aGUgZGF0YXNldCB0byBpbmNsdWRlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGluIHRoZSB0ZXN0IHNwbGl0LiBUaGUgc2l6ZSBvZiB0aGUgVHJhaW5pbmcgc2V0IGlzIHNldCB0byB0aGUgY29tcGxlbWVudCBvZiB0aGlzCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHZhbHVlLiBEZWZhdWx0ID0gMC4yCiAgICA6cGFyYW0gcmFuZG9tX3N0YXRlOiAgICAgICAgICAgIFJlbGV2YW50IG9ubHkgd2hlbiB1c2luZyB0cmFpbl90ZXN0X3NwbGl0X3NpemUuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIEEgcmFuZG9tIHN0YXRlIHNlZWQgdG8gc2h1ZmZsZSB0aGUgZGF0YS4gRm9yIG1vcmUgaW5mb3JtYXRpb24sIHNlZToKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaHR0cHM6Ly9zY2lraXQtbGVhcm4ub3JnL3N0YWJsZS9nbG9zc2FyeS5odG1sI3Rlcm0tcmFuZG9tX3N0YXRlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIE5vdGljZSB0aGF0IGhlcmUgd2Ugb25seSBwYXNzIGludGVnZXIgdmFsdWVzLgogICAgOnBhcmFtIGxhYmVsczogICAgICAgICAgICAgICAgICBMYWJlbHMgdG8gbG9nIHdpdGggdGhlIG1vZGVsCiAgICA6cGFyYW0ga3dhcmdzOiAgICAgICAgICAgICAgICAgIEhlcmUgeW91IGNhbiBwYXNzIGtleXdvcmQgYXJndW1lbnRzIHdpdGggcHJlZml4ZXMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRoYXQgd2lsbCBiZSBwYXJzZWQgYW5kIHBhc3NlZCB0byB0aGUgcmVsZXZhbnQgZnVuY3Rpb24sIGJ5IHRoZSBmb2xsb3dpbmcgcHJlZml4ZXM6CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC0gYENMQVNTX2AgLSBmb3IgdGhlIG1vZGVsIGNsYXNzIGFyZ3VtZW50cwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAtIGBGSVRfYCAtIGZvciB0aGUgYGZpdGAgZnVuY3Rpb24gYXJndW1lbnRzCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC0gYFRSQUlOX2AgLSBmb3IgdGhlIGB0cmFpbmAgZnVuY3Rpb24gKGluIHhnYiBvciBsZ2JtIHRyYWluIGZ1bmN0aW9uIC0gZnV0dXJlKQoKICAgICIiIgogICAgIyBWYWxpZGF0ZSBpbnB1dHM6CiAgICAjIENoZWNrIGlmIGV4YWN0bHkgb25lIG9mIHRoZW0gaXMgc3VwcGxpZWQ6CiAgICBpZiB0ZXN0X3NldCBpcyBOb25lOgogICAgICAgIGlmIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZSBpcyBOb25lOgogICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKAogICAgICAgICAgICAgICAgInRlc3Rfc2V0IG9yIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZSBhcmUgbm90IHByb3ZpZGVkLCBzZXR0aW5nIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZSB0byAwLjIiCiAgICAgICAgICAgICkKICAgICAgICAgICAgdHJhaW5fdGVzdF9zcGxpdF9zaXplID0gMC4yCgogICAgZWxpZiB0cmFpbl90ZXN0X3NwbGl0X3NpemU6CiAgICAgICAgY29udGV4dC5sb2dnZXIuaW5mbygKICAgICAgICAgICAgInRlc3Rfc2V0IHByb3ZpZGVkLCBpZ25vcmluZyBnaXZlbiB0cmFpbl90ZXN0X3NwbGl0X3NpemUgdmFsdWUiCiAgICAgICAgKQogICAgICAgIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZSA9IE5vbmUKCiAgICAjIEdldCBEYXRhRnJhbWUgYnkgVVJMIG9yIGJ5IEZlYXR1cmVWZWN0b3I6CiAgICBkYXRhc2V0LCBsYWJlbF9jb2x1bW5zID0gX2dldF9kYXRhZnJhbWUoCiAgICAgICAgY29udGV4dD1jb250ZXh0LAogICAgICAgIGRhdGFzZXQ9ZGF0YXNldCwKICAgICAgICBsYWJlbF9jb2x1bW5zPWxhYmVsX2NvbHVtbnMsCiAgICAgICAgZHJvcF9jb2x1bW5zPWRyb3BfY29sdW1ucywKICAgICkKCiAgICAjIEdldHRpbmcgdGhlIHNhbXBsZSBzZXQ6CiAgICBpZiBzYW1wbGVfc2V0IGlzIE5vbmU6CiAgICAgICAgY29udGV4dC5sb2dnZXIuaW5mbygKICAgICAgICAgICAgZiJTYW1wbGUgc2V0IG5vdCBnaXZlbiwgdXNpbmcgdGhlIHdob2xlIHRyYWluaW5nIHNldCBhcyB0aGUgc2FtcGxlIHNldCIKICAgICAgICApCiAgICAgICAgc2FtcGxlX3NldCA9IGRhdGFzZXQKICAgIGVsc2U6CiAgICAgICAgc2FtcGxlX3NldCwgXyA9IF9nZXRfZGF0YWZyYW1lKAogICAgICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgICAgIGRhdGFzZXQ9c2FtcGxlX3NldCwKICAgICAgICAgICAgbGFiZWxfY29sdW1ucz1sYWJlbF9jb2x1bW5zLAogICAgICAgICAgICBkcm9wX2NvbHVtbnM9ZHJvcF9jb2x1bW5zLAogICAgICAgICkKCiAgICAjIFBhcnNpbmcga3dhcmdzOgogICAgIyBUT0RPOiBVc2UgaW4geGdiIG9yIGxnYm0gdHJhaW4gZnVuY3Rpb24uCiAgICB0cmFpbl9rd2FyZ3MgPSBfZ2V0X3N1Yl9kaWN0X2J5X3ByZWZpeChzcmM9a3dhcmdzLCBwcmVmaXhfa2V5PUtXQXJnc1ByZWZpeGVzLlRSQUlOKQogICAgZml0X2t3YXJncyA9IF9nZXRfc3ViX2RpY3RfYnlfcHJlZml4KHNyYz1rd2FyZ3MsIHByZWZpeF9rZXk9S1dBcmdzUHJlZml4ZXMuRklUKQogICAgbW9kZWxfY2xhc3Nfa3dhcmdzID0gX2dldF9zdWJfZGljdF9ieV9wcmVmaXgoCiAgICAgICAgc3JjPWt3YXJncywgcHJlZml4X2tleT1LV0FyZ3NQcmVmaXhlcy5NT0RFTF9DTEFTUwogICAgKQoKICAgICMgQ2hlY2sgaWYgbW9kZWwgb3IgZnVuY3Rpb246CiAgICBpZiBoYXNhdHRyKG1vZGVsX2NsYXNzLCAidHJhaW4iKToKICAgICAgICAjIFRPRE86IE5lZWQgdG8gY2FsbDogbW9kZWwoKSwgYWZ0ZXJ3YXJkcyB0byBzdGFydCB0aGUgdHJhaW4gZnVuY3Rpb24uCiAgICAgICAgIyBtb2RlbCA9IGNyZWF0ZV9mdW5jdGlvbihmInttb2RlbF9jbGFzc30udHJhaW4iKQogICAgICAgIHJhaXNlIE5vdEltcGxlbWVudGVkRXJyb3IKICAgIGVsc2U6CiAgICAgICAgIyBDcmVhdGluZyBtb2RlbCBpbnN0YW5jZToKICAgICAgICBtb2RlbCA9IGNyZWF0ZV9jbGFzcyhtb2RlbF9jbGFzcykoKiptb2RlbF9jbGFzc19rd2FyZ3MpCgogICAgeCA9IGRhdGFzZXQuZHJvcChsYWJlbF9jb2x1bW5zLCBheGlzPTEpCiAgICB5ID0gZGF0YXNldFtsYWJlbF9jb2x1bW5zXQogICAgaWYgdHJhaW5fdGVzdF9zcGxpdF9zaXplOgogICAgICAgIHhfdHJhaW4sIHhfdGVzdCwgeV90cmFpbiwgeV90ZXN0ID0gdHJhaW5fdGVzdF9zcGxpdCgKICAgICAgICAgICAgeCwgeSwgdGVzdF9zaXplPXRyYWluX3Rlc3Rfc3BsaXRfc2l6ZSwgcmFuZG9tX3N0YXRlPXJhbmRvbV9zdGF0ZQogICAgICAgICkKICAgIGVsc2U6CiAgICAgICAgeF90cmFpbiwgeV90cmFpbiA9IHgsIHkKCiAgICAgICAgdGVzdF9zZXQgPSB0ZXN0X3NldC5hc19kZigpCiAgICAgICAgaWYgZHJvcF9jb2x1bW5zOgogICAgICAgICAgICB0ZXN0X3NldCA9IGRhdGFzZXQuZHJvcChkcm9wX2NvbHVtbnMsIGF4aXM9MSkKCiAgICAgICAgeF90ZXN0LCB5X3Rlc3QgPSB0ZXN0X3NldC5kcm9wKGxhYmVsX2NvbHVtbnMsIGF4aXM9MSksIHRlc3Rfc2V0W2xhYmVsX2NvbHVtbnNdCgogICAgQXV0b01MUnVuLmFwcGx5X21scnVuKAogICAgICAgIG1vZGVsPW1vZGVsLAogICAgICAgIG1vZGVsX25hbWU9bW9kZWxfbmFtZSwKICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgdGFnPXRhZywKICAgICAgICBzYW1wbGVfc2V0PXNhbXBsZV9zZXQsCiAgICAgICAgeV9jb2x1bW5zPWxhYmVsX2NvbHVtbnMsCiAgICAgICAgdGVzdF9zZXQ9dGVzdF9zZXQsCiAgICAgICAgeF90ZXN0PXhfdGVzdCwKICAgICAgICB5X3Rlc3Q9eV90ZXN0LAogICAgICAgIGFydGlmYWN0cz1jb250ZXh0LmFydGlmYWN0cywKICAgICAgICBsYWJlbHM9bGFiZWxzLAogICAgKQogICAgY29udGV4dC5sb2dnZXIuaW5mbyhmInRyYWluaW5nICd7bW9kZWxfbmFtZX0nIikKICAgIG1vZGVsLmZpdCh4X3RyYWluLCB5X3RyYWluLCAqKmZpdF9rd2FyZ3MpCgoKZGVmIGV2YWx1YXRlKAogICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICBtb2RlbDogc3RyLAogICAgZGF0YXNldDogbWxydW4uRGF0YUl0ZW0sCiAgICBkcm9wX2NvbHVtbnM6IExpc3Rbc3RyXSA9IE5vbmUsCiAgICBsYWJlbF9jb2x1bW5zOiBPcHRpb25hbFtVbmlvbltzdHIsIExpc3Rbc3RyXV1dID0gTm9uZSwKICAgICoqa3dhcmdzLAopOgogICAgIiIiCiAgICBFdmFsdWF0aW5nIGEgbW9kZWwuIEFydGlmYWN0cyBnZW5lcmF0ZWQgYnkgdGhlIE1MSGFuZGxlci4KCiAgICA6cGFyYW0gY29udGV4dDogICAgICAgICAgICAgICAgIE1MUnVuIGNvbnRleHQuCiAgICA6cGFyYW0gbW9kZWw6ICAgICAgICAgICAgICAgICAgIFRoZSBtb2RlbCBTdG9yZSBwYXRoLgogICAgOnBhcmFtIGRhdGFzZXQ6ICAgICAgICAgICAgICAgICBUaGUgZGF0YXNldCB0byBldmFsdWF0ZSB0aGUgbW9kZWwgb24uIENhbiBiZSBlaXRoZXIgYSBVUkkgb3IgYSBGZWF0dXJlVmVjdG9yLgogICAgOnBhcmFtIGRyb3BfY29sdW1uczogICAgICAgICAgICBzdHIgb3IgYSBsaXN0IG9mIHN0cmluZ3MgdGhhdCByZXByZXNlbnQgdGhlIGNvbHVtbnMgdG8gZHJvcC4KICAgIDpwYXJhbSBsYWJlbF9jb2x1bW5zOiAgICAgICAgICAgVGhlIHRhcmdldCBsYWJlbChzKSBvZiB0aGUgY29sdW1uKHMpIGluIHRoZSBkYXRhc2V0LiBmb3IgUmVncmVzc2lvbiBvcgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBDbGFzc2lmaWNhdGlvbiB0YXNrcy4gTWFuZGF0b3J5IHdoZW4gZGF0YXNldCBpcyBub3QgYSBGZWF0dXJlVmVjdG9yLgogICAgOnBhcmFtIGt3YXJnczogICAgICAgICAgICAgICAgICBIZXJlIHlvdSBjYW4gcGFzcyBrZXl3b3JkIGFyZ3VtZW50cyB0byB0aGUgcHJlZGljdCBmdW5jdGlvbgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoUFJFRElDVF8gcHJlZml4IGlzIG5vdCByZXF1aXJlZCkuCiAgICAiIiIKICAgICMgR2V0IGRhdGFzZXQgYnkgVVJMIG9yIGJ5IEZlYXR1cmVWZWN0b3I6CiAgICBkYXRhc2V0LCBsYWJlbF9jb2x1bW5zID0gX2dldF9kYXRhZnJhbWUoCiAgICAgICAgY29udGV4dD1jb250ZXh0LAogICAgICAgIGRhdGFzZXQ9ZGF0YXNldCwKICAgICAgICBsYWJlbF9jb2x1bW5zPWxhYmVsX2NvbHVtbnMsCiAgICAgICAgZHJvcF9jb2x1bW5zPWRyb3BfY29sdW1ucywKICAgICkKCiAgICAjIFBhcnNpbmcgbGFiZWxfY29sdW1uczoKICAgIHBhcnNlZF9sYWJlbF9jb2x1bW5zID0gW10KICAgIGlmIGxhYmVsX2NvbHVtbnM6CiAgICAgICAgbGFiZWxfY29sdW1ucyA9ICgKICAgICAgICAgICAgbGFiZWxfY29sdW1ucyBpZiBpc2luc3RhbmNlKGxhYmVsX2NvbHVtbnMsIGxpc3QpIGVsc2UgW2xhYmVsX2NvbHVtbnNdCiAgICAgICAgKQogICAgICAgIGZvciBsYyBpbiBsYWJlbF9jb2x1bW5zOgogICAgICAgICAgICBpZiBmcy5jb21tb24uZmVhdHVyZV9zZXBhcmF0b3IgaW4gbGM6CiAgICAgICAgICAgICAgICBmZWF0dXJlX3NldF9uYW1lLCBsYWJlbF9uYW1lLCBhbGlhcyA9IGZzLmNvbW1vbi5wYXJzZV9mZWF0dXJlX3N0cmluZyhsYykKICAgICAgICAgICAgICAgIHBhcnNlZF9sYWJlbF9jb2x1bW5zLmFwcGVuZChhbGlhcyBvciBsYWJlbF9uYW1lKQogICAgICAgIGlmIHBhcnNlZF9sYWJlbF9jb2x1bW5zOgogICAgICAgICAgICBsYWJlbF9jb2x1bW5zID0gcGFyc2VkX2xhYmVsX2NvbHVtbnMKCiAgICB4ID0gZGF0YXNldC5kcm9wKGxhYmVsX2NvbHVtbnMsIGF4aXM9MSkKICAgIHkgPSBkYXRhc2V0W2xhYmVsX2NvbHVtbnNdCgogICAgIyBMb2FkaW5nIHRoZSBtb2RlbCBhbmQgcHJlZGljdGluZzoKICAgIG1vZGVsX2hhbmRsZXIgPSBBdXRvTUxSdW4ubG9hZF9tb2RlbCgKICAgICAgICBtb2RlbF9wYXRoPW1vZGVsLCBjb250ZXh0PWNvbnRleHQsIG1vZGVsX25hbWU9Im1vZGVsX0xpbmVhclJlZ3Jlc3Npb24iCiAgICApCiAgICBBdXRvTUxSdW4uYXBwbHlfbWxydW4obW9kZWxfaGFuZGxlci5tb2RlbCwgeV90ZXN0PXksIG1vZGVsX3BhdGg9bW9kZWwpCgogICAgY29udGV4dC5sb2dnZXIuaW5mbyhmImV2YWx1YXRpbmcgJ3ttb2RlbF9oYW5kbGVyLm1vZGVsX25hbWV9JyIpCiAgICBtb2RlbF9oYW5kbGVyLm1vZGVsLnByZWRpY3QoeCwgKiprd2FyZ3MpCgoKZGVmIHByZWRpY3QoCiAgICBjb250ZXh0OiBNTENsaWVudEN0eCwKICAgIG1vZGVsOiBzdHIsCiAgICBkYXRhc2V0OiBtbHJ1bi5EYXRhSXRlbSwKICAgIGRyb3BfY29sdW1uczogVW5pb25bc3RyLCBMaXN0W3N0cl0sIGludCwgTGlzdFtpbnRdXSA9IE5vbmUsCiAgICBsYWJlbF9jb2x1bW5zOiBPcHRpb25hbFtVbmlvbltzdHIsIExpc3Rbc3RyXV1dID0gTm9uZSwKICAgIHJlc3VsdF9zZXQ6IE9wdGlvbmFsW3N0cl0gPSBOb25lLAogICAgKiprd2FyZ3MsCik6CiAgICAiIiIKICAgIFByZWRpY3RpbmcgZGF0YXNldCBieSBhIG1vZGVsLgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgICAgICAgTUxSdW4gY29udGV4dC4KICAgIDpwYXJhbSBtb2RlbDogICAgICAgICAgICAgICAgICAgVGhlIG1vZGVsIFN0b3JlIHBhdGguCiAgICA6cGFyYW0gZGF0YXNldDogICAgICAgICAgICAgICAgIFRoZSBkYXRhc2V0IHRvIHByZWRpY3QgdGhlIG1vZGVsIG9uLiBDYW4gYmUgZWl0aGVyIGEgVVJJLCBhIEZlYXR1cmVWZWN0b3Igb3IgYQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzYW1wbGUgaW4gYSBzaGFwZSBvZiBhIGxpc3QvZGljdC4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgV2hlbiBwYXNzaW5nIGEgc2FtcGxlLCBwYXNzIHRoZSBkYXRhc2V0IGFzIGEgZmllbGQgaW4gYHBhcmFtc2AgaW5zdGVhZCBvZiBgaW5wdXRzYC4KICAgIDpwYXJhbSBkcm9wX2NvbHVtbnM6ICAgICAgICAgICAgc3RyL2ludCBvciBhIGxpc3Qgb2Ygc3RyaW5ncy9pbnRzIHRoYXQgcmVwcmVzZW50IHRoZSBjb2x1bW4gbmFtZXMvaW5kaWNlcyB0byBkcm9wLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBXaGVuIHRoZSBkYXRhc2V0IGlzIGEgbGlzdC9kaWN0IHRoaXMgcGFyYW1ldGVyIHNob3VsZCBiZSByZXByZXNlbnRlZCBieSBpbnRlZ2Vycy4KICAgIDpwYXJhbSBsYWJlbF9jb2x1bW5zOiAgICAgICAgICAgVGhlIHRhcmdldCBsYWJlbChzKSBvZiB0aGUgY29sdW1uKHMpIGluIHRoZSBkYXRhc2V0LiBmb3IgUmVncmVzc2lvbiBvcgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBDbGFzc2lmaWNhdGlvbiB0YXNrcy4gTWFuZGF0b3J5IHdoZW4gZGF0YXNldCBpcyBub3QgYSBGZWF0dXJlVmVjdG9yLgogICAgOnBhcmFtIHJlc3VsdF9zZXQ6ICAgICAgICAgICAgICBUaGUgZGIga2V5IHRvIHNldCBuYW1lIG9mIHRoZSBwcmVkaWN0aW9uIHJlc3VsdCBhbmQgdGhlIGZpbGVuYW1lLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBEZWZhdWx0IHRvICdwcmVkaWN0aW9uJy4KICAgIDpwYXJhbSBrd2FyZ3M6ICAgICAgICAgICAgICAgICAgSGVyZSB5b3UgY2FuIHBhc3Mga2V5d29yZCBhcmd1bWVudHMgdG8gdGhlIHByZWRpY3QgZnVuY3Rpb24KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKFBSRURJQ1RfIHByZWZpeCBpcyBub3QgcmVxdWlyZWQpLgogICAgIiIiCiAgICAjIEdldCBkYXRhc2V0IGJ5IFVSTCBvciBieSBGZWF0dXJlVmVjdG9yOgogICAgZGF0YXNldCwgbGFiZWxfY29sdW1ucyA9IF9nZXRfZGF0YWZyYW1lKAogICAgICAgIGNvbnRleHQ9Y29udGV4dCwKICAgICAgICBkYXRhc2V0PWRhdGFzZXQsCiAgICAgICAgbGFiZWxfY29sdW1ucz1sYWJlbF9jb2x1bW5zLAogICAgICAgIGRyb3BfY29sdW1ucz1kcm9wX2NvbHVtbnMsCiAgICApCgogICAgIyBsb2FkaW5nIHRoZSBtb2RlbCwgYW5kIGdldHRpbmcgdGhlIG1vZGVsIGhhbmRsZXI6CiAgICBtb2RlbF9oYW5kbGVyID0gQXV0b01MUnVuLmxvYWRfbW9kZWwobW9kZWxfcGF0aD1tb2RlbCwgY29udGV4dD1jb250ZXh0KQoKICAgICMgRHJvcHBpbmcgbGFiZWwgY29sdW1ucyBpZiBuZWNlc3Nhcnk6CiAgICBpZiBub3QgbGFiZWxfY29sdW1uczoKICAgICAgICBsYWJlbF9jb2x1bW5zID0gW10KICAgIGVsaWYgaXNpbnN0YW5jZShsYWJlbF9jb2x1bW5zLCBzdHIpOgogICAgICAgIGxhYmVsX2NvbHVtbnMgPSBbbGFiZWxfY29sdW1uc10KCiAgICAjIFByZWRpY3Rpbmc6CiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYibWFraW5nIHByZWRpY3Rpb24gYnkgJ3ttb2RlbF9oYW5kbGVyLm1vZGVsX25hbWV9JyIpCiAgICB5X3ByZWQgPSBtb2RlbF9oYW5kbGVyLm1vZGVsLnByZWRpY3QoZGF0YXNldCwgKiprd2FyZ3MpCgogICAgIyBQcmVwYXJpbmcgYW5kIHZhbGlkYXRpbmcgbGFiZWwgY29sdW1ucyBmb3IgdGhlIGRhdGFmcmFtZSBvZiB0aGUgcHJlZGljdGlvbiByZXN1bHQ6CiAgICBudW1fcHJlZGljdGVkID0gMSBpZiBsZW4oeV9wcmVkLnNoYXBlKSA9PSAxIGVsc2UgeV9wcmVkLnNoYXBlWzFdCgogICAgaWYgbnVtX3ByZWRpY3RlZCA+IGxlbihsYWJlbF9jb2x1bW5zKToKICAgICAgICBpZiBudW1fcHJlZGljdGVkID09IDE6CiAgICAgICAgICAgIGxhYmVsX2NvbHVtbnMgPSBbInByZWRpY3RlZCBsYWJlbHMiXQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGxhYmVsX2NvbHVtbnMuZXh0ZW5kKAogICAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgICAgIGYicHJlZGljdGVkX2xhYmVsX3tpICsgMSArIGxlbihsYWJlbF9jb2x1bW5zKX0iCiAgICAgICAgICAgICAgICAgICAgZm9yIGkgaW4gcmFuZ2UobnVtX3ByZWRpY3RlZCAtIGxlbihsYWJlbF9jb2x1bW5zKSkKICAgICAgICAgICAgICAgIF0KICAgICAgICAgICAgKQogICAgZWxpZiBudW1fcHJlZGljdGVkIDwgbGVuKGxhYmVsX2NvbHVtbnMpOgogICAgICAgIGNvbnRleHQubG9nZ2VyLmVycm9yKAogICAgICAgICAgICBmIm51bWJlciBvZiBwcmVkaWN0ZWQgbGFiZWxzOiB7bnVtX3ByZWRpY3RlZH0gaXMgc21hbGxlciB0aGFuIG51bWJlciBvZiBsYWJlbCBjb2x1bW5zOiB7bGVuKGxhYmVsX2NvbHVtbnMpfSIKICAgICAgICApCiAgICAgICAgcmFpc2UgVmFsdWVFcnJvcgoKICAgIGFydGlmYWN0X25hbWUgPSByZXN1bHRfc2V0IG9yICJwcmVkaWN0aW9uIgogICAgbGFiZWxzX2luc2lkZV9kZiA9IHNldChsYWJlbF9jb2x1bW5zKSAmIHNldChkYXRhc2V0LmNvbHVtbnMudG9saXN0KCkpCiAgICBpZiBsYWJlbHNfaW5zaWRlX2RmOgogICAgICAgIGNvbnRleHQubG9nZ2VyLmVycm9yKAogICAgICAgICAgICBmIlRoZSBsYWJlbHM6IHtsYWJlbHNfaW5zaWRlX2RmfSBhcmUgYWxyZWFkeSBleGlzdGVkIGluIHRoZSBkYXRhZnJhbWUiCiAgICAgICAgKQogICAgICAgIHJhaXNlIFZhbHVlRXJyb3IKICAgIHByZWRfZGYgPSBwZC5jb25jYXQoW2RhdGFzZXQsIHBkLkRhdGFGcmFtZSh5X3ByZWQsIGNvbHVtbnM9bGFiZWxfY29sdW1ucyldLCBheGlzPTEpCiAgICBjb250ZXh0LmxvZ19kYXRhc2V0KGFydGlmYWN0X25hbWUsIHByZWRfZGYsIGRiX2tleT1yZXN1bHRfc2V0KQo= - code_origin: '' - description: Automatic train, evaluate and predict functions for the ML frameworks - - Scikit-Learn, XGBoost and LightGBM. - disable_auto_mount: false - default_handler: train entry_points: train: - lineno: 121 + doc: "Training a model with the given dataset.\n\nexample::\n\n import mlrun\n\ + \ project = mlrun.get_or_create_project(\"my-project\")\n project.set_function(\"\ + hub://auto_trainer\", \"train\")\n trainer_run = project.run(\n \ + \ name=\"train\",\n handler=\"train\",\n inputs={\"dataset\"\ + : \"./path/to/dataset.csv\"},\n params={\n \"model_class\"\ + : \"sklearn.linear_model.LogisticRegression\",\n \"label_columns\"\ + : \"label\",\n \"drop_columns\": \"id\",\n \"model_name\"\ + : \"my-model\",\n \"tag\": \"v1.0.0\",\n \"sample_set\"\ + : \"./path/to/sample_set.csv\",\n \"test_set\": \"./path/to/test_set.csv\"\ + ,\n \"CLASS_solver\": \"liblinear\",\n },\n )" + has_kwargs: true parameters: - name: context type: MLClientCtx @@ -70,21 +67,12 @@ spec: type: dict doc: Labels to log with the model default: null - has_varargs: false + lineno: 126 name: train - has_kwargs: true - doc: "Training a model with the given dataset.\n\nexample::\n\n import mlrun\n\ - \ project = mlrun.get_or_create_project(\"my-project\")\n project.set_function(\"\ - hub://auto_trainer\", \"train\")\n trainer_run = project.run(\n \ - \ name=\"train\",\n handler=\"train\",\n inputs={\"dataset\"\ - : \"./path/to/dataset.csv\"},\n params={\n \"model_class\"\ - : \"sklearn.linear_model.LogisticRegression\",\n \"label_columns\"\ - : \"label\",\n \"drop_columns\": \"id\",\n \"model_name\"\ - : \"my-model\",\n \"tag\": \"v1.0.0\",\n \"sample_set\"\ - : \"./path/to/sample_set.csv\",\n \"test_set\": \"./path/to/test_set.csv\"\ - ,\n \"CLASS_solver\": \"liblinear\",\n },\n )" + has_varargs: false evaluate: - lineno: 273 + doc: Evaluating a model. Artifacts generated by the MLHandler. + has_kwargs: true parameters: - name: context type: MLClientCtx @@ -104,12 +92,12 @@ spec: doc: The target label(s) of the column(s) in the dataset. for Regression or Classification tasks. Mandatory when dataset is not a FeatureVector. default: null - has_varargs: false + lineno: 278 name: evaluate - has_kwargs: true - doc: Evaluating a model. Artifacts generated by the MLHandler. + has_varargs: false predict: - lineno: 327 + doc: Predicting dataset by a model. + has_kwargs: true parameters: - name: context type: MLClientCtx @@ -138,10 +126,22 @@ spec: doc: The db key to set name of the prediction result and the filename. Default to 'prediction'. default: null - has_varargs: false + lineno: 332 name: predict - has_kwargs: true - doc: Predicting dataset by a model. + has_varargs: false + build: + code_origin: '' + origin_filename: '' + functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKZnJvbSBwYXRobGliIGltcG9ydCBQYXRoCmZyb20gdHlwaW5nIGltcG9ydCBBbnksIERpY3QsIExpc3QsIE9wdGlvbmFsLCBUdXBsZSwgVW5pb24KCmltcG9ydCBtbHJ1bgppbXBvcnQgbWxydW4uZGF0YXN0b3JlCmltcG9ydCBtbHJ1bi51dGlscwppbXBvcnQgcGFuZGFzIGFzIHBkCmZyb20gbWxydW4gaW1wb3J0IGZlYXR1cmVfc3RvcmUgYXMgZnMKZnJvbSBtbHJ1bi5kYXRhc3RvcmUgaW1wb3J0IERhdGFJdGVtCmZyb20gbWxydW4uZXhlY3V0aW9uIGltcG9ydCBNTENsaWVudEN0eApmcm9tIG1scnVuLmZyYW1ld29ya3MuYXV0b19tbHJ1biBpbXBvcnQgQXV0b01MUnVuCmZyb20gbWxydW4udXRpbHMuaGVscGVycyBpbXBvcnQgY3JlYXRlX2NsYXNzLCBjcmVhdGVfZnVuY3Rpb24KZnJvbSBza2xlYXJuLm1vZGVsX3NlbGVjdGlvbiBpbXBvcnQgdHJhaW5fdGVzdF9zcGxpdAoKUGF0aFR5cGUgPSBVbmlvbltzdHIsIFBhdGhdCgoKY2xhc3MgS1dBcmdzUHJlZml4ZXM6CiAgICBNT0RFTF9DTEFTUyA9ICJDTEFTU18iCiAgICBGSVQgPSAiRklUXyIKICAgIFRSQUlOID0gIlRSQUlOXyIKCgpkZWYgX2dldF9zdWJfZGljdF9ieV9wcmVmaXgoc3JjOiBEaWN0LCBwcmVmaXhfa2V5OiBzdHIpIC0+IERpY3Rbc3RyLCBBbnldOgogICAgIiIiCiAgICBDb2xsZWN0IGFsbCB0aGUga2V5cyBmcm9tIHRoZSBnaXZlbiBkaWN0IHRoYXQgc3RhcnRzIHdpdGggdGhlIGdpdmVuIHByZWZpeCBhbmQgY3JlYXRlcyBhIG5ldyBkaWN0aW9uYXJ5IHdpdGggdGhlc2UKICAgIGtleXMuCgogICAgOnBhcmFtIHNyYzogICAgICAgICBUaGUgc291cmNlIGRpY3QgdG8gZXh0cmFjdCB0aGUgdmFsdWVzIGZyb20uCiAgICA6cGFyYW0gcHJlZml4X2tleTogIE9ubHkga2V5cyB3aXRoIHRoaXMgcHJlZml4IHdpbGwgYmUgcmV0dXJuZWQuIFRoZSBrZXlzIGluIHRoZSByZXN1bHQgZGljdCB3aWxsIGJlIHdpdGhvdXQgdGhpcwogICAgICAgICAgICAgICAgICAgICAgICBwcmVmaXguCiAgICAiIiIKICAgIHJldHVybiB7CiAgICAgICAga2V5LnJlcGxhY2UocHJlZml4X2tleSwgIiIpOiB2YWwKICAgICAgICBmb3Iga2V5LCB2YWwgaW4gc3JjLml0ZW1zKCkKICAgICAgICBpZiBrZXkuc3RhcnRzd2l0aChwcmVmaXhfa2V5KQogICAgfQoKCmRlZiBfZ2V0X2RhdGFmcmFtZSgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGF0YXNldDogRGF0YUl0ZW0sCiAgICBsYWJlbF9jb2x1bW5zOiBPcHRpb25hbFtVbmlvbltzdHIsIExpc3Rbc3RyXV1dID0gTm9uZSwKICAgIGRyb3BfY29sdW1uczogVW5pb25bc3RyLCBMaXN0W3N0cl0sIGludCwgTGlzdFtpbnRdXSA9IE5vbmUsCikgLT4gVHVwbGVbcGQuRGF0YUZyYW1lLCBPcHRpb25hbFtVbmlvbltzdHIsIExpc3Rbc3RyXV1dXToKICAgICIiIgogICAgR2V0dGluZyB0aGUgRGF0YUZyYW1lIG9mIHRoZSBkYXRhc2V0IGFuZCBkcm9wIHRoZSBjb2x1bW5zIGFjY29yZGluZ2x5LgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgIE1MUnVuIGNvbnRleHQuCiAgICA6cGFyYW0gZGF0YXNldDogICAgICAgICBUaGUgZGF0YXNldCB0byB0cmFpbiB0aGUgbW9kZWwgb24uCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBDYW4gYmUgZWl0aGVyIGEgbGlzdCBvZiBsaXN0cywgZGljdCwgVVJJIG9yIGEgRmVhdHVyZVZlY3Rvci4KICAgIDpwYXJhbSBsYWJlbF9jb2x1bW5zOiAgIFRoZSB0YXJnZXQgbGFiZWwocykgb2YgdGhlIGNvbHVtbihzKSBpbiB0aGUgZGF0YXNldC4gZm9yIFJlZ3Jlc3Npb24gb3IKICAgICAgICAgICAgICAgICAgICAgICAgICAgIENsYXNzaWZpY2F0aW9uIHRhc2tzLgogICAgOnBhcmFtIGRyb3BfY29sdW1uczogICAgc3RyL2ludCBvciBhIGxpc3Qgb2Ygc3RyaW5ncy9pbnRzIHRoYXQgcmVwcmVzZW50IHRoZSBjb2x1bW4gbmFtZXMvaW5kaWNlcyB0byBkcm9wLgogICAgIiIiCiAgICAjIENoZWNrIGlmIGRhdGFzZXQgaXMgbGlzdC9kaWN0IGZpcnN0IChiZWZvcmUgdHJ5aW5nIHRvIGFjY2VzcyBhcnRpZmFjdF91cmwpCiAgICBpZiBpc2luc3RhbmNlKGRhdGFzZXQsIChsaXN0LCBkaWN0KSk6CiAgICAgICAgIyBsaXN0L2RpY3QgY2FzZToKICAgICAgICBpZiBub3QgbGFiZWxfY29sdW1uczoKICAgICAgICAgICAgY29udGV4dC5sb2dnZXIuaW5mbygKICAgICAgICAgICAgICAgICJsYWJlbF9jb2x1bW5zIG5vdCBwcm92aWRlZCwgbWFuZGF0b3J5IHdoZW4gZGF0YXNldCBpcyBub3QgYSBGZWF0dXJlVmVjdG9yIgogICAgICAgICAgICApCiAgICAgICAgICAgIHJhaXNlIFZhbHVlRXJyb3IKICAgICAgICBkYXRhc2V0ID0gcGQuRGF0YUZyYW1lKGRhdGFzZXQpCiAgICAgICAgIyBDaGVja2luZyBpZiBkcm9wX2NvbHVtbnMgcHJvdmlkZWQgYnkgaW50ZWdlciB0eXBlOgogICAgICAgIGlmIGRyb3BfY29sdW1uczoKICAgICAgICAgICAgaWYgaXNpbnN0YW5jZShkcm9wX2NvbHVtbnMsIHN0cikgb3IgKAogICAgICAgICAgICAgICAgaXNpbnN0YW5jZShkcm9wX2NvbHVtbnMsIGxpc3QpCiAgICAgICAgICAgICAgICBhbmQgYW55KGlzaW5zdGFuY2UoY29sLCBzdHIpIGZvciBjb2wgaW4gZHJvcF9jb2x1bW5zKQogICAgICAgICAgICApOgogICAgICAgICAgICAgICAgY29udGV4dC5sb2dnZXIuZXJyb3IoCiAgICAgICAgICAgICAgICAgICAgImRyb3BfY29sdW1ucyBtdXN0IGJlIGFuIGludGVnZXIvbGlzdCBvZiBpbnRlZ2VycyBpZiBub3QgcHJvdmlkZWQgd2l0aCBhIFVSSS9GZWF0dXJlVmVjdG9yIGRhdGFzZXQiCiAgICAgICAgICAgICAgICApCiAgICAgICAgICAgICAgICByYWlzZSBWYWx1ZUVycm9yCiAgICAgICAgICAgIGRhdGFzZXQuZHJvcChkcm9wX2NvbHVtbnMsIGF4aXM9MSwgaW5wbGFjZT1UcnVlKQogICAgZWxzZToKICAgICAgICAjIERhdGFzZXQgaXMgYSBEYXRhSXRlbSB3aXRoIGFydGlmYWN0X3VybCAoVVJJIG9yIEZlYXR1cmVWZWN0b3IpCiAgICAgICAgc3RvcmVfdXJpX3ByZWZpeCwgXyA9IG1scnVuLmRhdGFzdG9yZS5wYXJzZV9zdG9yZV91cmkoZGF0YXNldC5hcnRpZmFjdF91cmwpCgogICAgICAgICMgR2V0dGluZyB0aGUgZGF0YXNldDoKICAgICAgICBpZiBtbHJ1bi51dGlscy5TdG9yZVByZWZpeC5GZWF0dXJlVmVjdG9yID09IHN0b3JlX3VyaV9wcmVmaXg6CiAgICAgICAgICAgIGxhYmVsX2NvbHVtbnMgPSBsYWJlbF9jb2x1bW5zIG9yIGRhdGFzZXQubWV0YS5zdGF0dXMubGFiZWxfY29sdW1uCiAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmluZm8oZiJsYWJlbCBjb2x1bW5zOiB7bGFiZWxfY29sdW1uc30iKQogICAgICAgICAgICAjIEZlYXR1cmVWZWN0b3IgY2FzZToKICAgICAgICAgICAgdHJ5OgogICAgICAgICAgICAgICAgZnYgPSBtbHJ1bi5kYXRhc3RvcmUuZ2V0X3N0b3JlX3Jlc291cmNlKGRhdGFzZXQuYXJ0aWZhY3RfdXJsKQogICAgICAgICAgICAgICAgZGF0YXNldCA9IGZ2LmdldF9vZmZsaW5lX2ZlYXR1cmVzKGRyb3BfY29sdW1ucz1kcm9wX2NvbHVtbnMpLnRvX2RhdGFmcmFtZSgpCiAgICAgICAgICAgIGV4Y2VwdCBBdHRyaWJ1dGVFcnJvcjoKICAgICAgICAgICAgICAgICMgTGVhdmUgaGVyZSBmb3IgYmFja3dhcmRzIGNvbXBhdGliaWxpdHkKICAgICAgICAgICAgICAgIGRhdGFzZXQgPSBmcy5nZXRfb2ZmbGluZV9mZWF0dXJlcygKICAgICAgICAgICAgICAgICAgICBkYXRhc2V0Lm1ldGEudXJpLCBkcm9wX2NvbHVtbnM9ZHJvcF9jb2x1bW5zCiAgICAgICAgICAgICAgICApLnRvX2RhdGFmcmFtZSgpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgIyBzaW1wbGUgVVJMIGNhc2U6CiAgICAgICAgICAgIGlmIG5vdCBsYWJlbF9jb2x1bW5zOgogICAgICAgICAgICAgICAgY29udGV4dC5sb2dnZXIuaW5mbygKICAgICAgICAgICAgICAgICAgICAibGFiZWxfY29sdW1ucyBub3QgcHJvdmlkZWQsIG1hbmRhdG9yeSB3aGVuIGRhdGFzZXQgaXMgbm90IGEgRmVhdHVyZVZlY3RvciIKICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgICAgIHJhaXNlIFZhbHVlRXJyb3IKICAgICAgICAgICAgZGF0YXNldCA9IGRhdGFzZXQuYXNfZGYoKQogICAgICAgICAgICBpZiBkcm9wX2NvbHVtbnM6CiAgICAgICAgICAgICAgICBpZiBhbGwoY29sIGluIGRhdGFzZXQgZm9yIGNvbCBpbiBkcm9wX2NvbHVtbnMpOgogICAgICAgICAgICAgICAgICAgIGRhdGFzZXQgPSBkYXRhc2V0LmRyb3AoZHJvcF9jb2x1bW5zLCBheGlzPTEpCiAgICAgICAgICAgICAgICBlbHNlOgogICAgICAgICAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmluZm8oCiAgICAgICAgICAgICAgICAgICAgICAgICJub3QgYWxsIG9mIHRoZSBjb2x1bW5zIHRvIGRyb3AgaW4gdGhlIGRhdGFzZXQsIGRyb3AgY29sdW1ucyBwcm9jZXNzIHNraXBwZWQiCiAgICAgICAgICAgICAgICAgICAgKQoKICAgIHJldHVybiBkYXRhc2V0LCBsYWJlbF9jb2x1bW5zCgoKZGVmIHRyYWluKAogICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICBkYXRhc2V0OiBEYXRhSXRlbSwKICAgIG1vZGVsX2NsYXNzOiBzdHIsCiAgICBsYWJlbF9jb2x1bW5zOiBPcHRpb25hbFtVbmlvbltzdHIsIExpc3Rbc3RyXV1dID0gTm9uZSwKICAgIGRyb3BfY29sdW1uczogTGlzdFtzdHJdID0gTm9uZSwKICAgIG1vZGVsX25hbWU6IHN0ciA9ICJtb2RlbCIsCiAgICB0YWc6IHN0ciA9ICIiLAogICAgc2FtcGxlX3NldDogRGF0YUl0ZW0gPSBOb25lLAogICAgdGVzdF9zZXQ6IERhdGFJdGVtID0gTm9uZSwKICAgIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZTogZmxvYXQgPSBOb25lLAogICAgcmFuZG9tX3N0YXRlOiBpbnQgPSBOb25lLAogICAgbGFiZWxzOiBkaWN0ID0gTm9uZSwKICAgICoqa3dhcmdzLAopOgogICAgIiIiCiAgICBUcmFpbmluZyBhIG1vZGVsIHdpdGggdGhlIGdpdmVuIGRhdGFzZXQuCgogICAgZXhhbXBsZTo6CgogICAgICAgIGltcG9ydCBtbHJ1bgogICAgICAgIHByb2plY3QgPSBtbHJ1bi5nZXRfb3JfY3JlYXRlX3Byb2plY3QoIm15LXByb2plY3QiKQogICAgICAgIHByb2plY3Quc2V0X2Z1bmN0aW9uKCJodWI6Ly9hdXRvX3RyYWluZXIiLCAidHJhaW4iKQogICAgICAgIHRyYWluZXJfcnVuID0gcHJvamVjdC5ydW4oCiAgICAgICAgICAgIG5hbWU9InRyYWluIiwKICAgICAgICAgICAgaGFuZGxlcj0idHJhaW4iLAogICAgICAgICAgICBpbnB1dHM9eyJkYXRhc2V0IjogIi4vcGF0aC90by9kYXRhc2V0LmNzdiJ9LAogICAgICAgICAgICBwYXJhbXM9ewogICAgICAgICAgICAgICAgIm1vZGVsX2NsYXNzIjogInNrbGVhcm4ubGluZWFyX21vZGVsLkxvZ2lzdGljUmVncmVzc2lvbiIsCiAgICAgICAgICAgICAgICAibGFiZWxfY29sdW1ucyI6ICJsYWJlbCIsCiAgICAgICAgICAgICAgICAiZHJvcF9jb2x1bW5zIjogImlkIiwKICAgICAgICAgICAgICAgICJtb2RlbF9uYW1lIjogIm15LW1vZGVsIiwKICAgICAgICAgICAgICAgICJ0YWciOiAidjEuMC4wIiwKICAgICAgICAgICAgICAgICJzYW1wbGVfc2V0IjogIi4vcGF0aC90by9zYW1wbGVfc2V0LmNzdiIsCiAgICAgICAgICAgICAgICAidGVzdF9zZXQiOiAiLi9wYXRoL3RvL3Rlc3Rfc2V0LmNzdiIsCiAgICAgICAgICAgICAgICAiQ0xBU1Nfc29sdmVyIjogImxpYmxpbmVhciIsCiAgICAgICAgICAgIH0sCiAgICAgICAgKQoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgICAgICAgTUxSdW4gY29udGV4dAogICAgOnBhcmFtIGRhdGFzZXQ6ICAgICAgICAgICAgICAgICBUaGUgZGF0YXNldCB0byB0cmFpbiB0aGUgbW9kZWwgb24uIENhbiBiZSBlaXRoZXIgYSBVUkkgb3IgYSBGZWF0dXJlVmVjdG9yCiAgICA6cGFyYW0gbW9kZWxfY2xhc3M6ICAgICAgICAgICAgIFRoZSBjbGFzcyBvZiB0aGUgbW9kZWwsIGUuZy4gYHNrbGVhcm4ubGluZWFyX21vZGVsLkxvZ2lzdGljUmVncmVzc2lvbmAKICAgIDpwYXJhbSBsYWJlbF9jb2x1bW5zOiAgICAgICAgICAgVGhlIHRhcmdldCBsYWJlbChzKSBvZiB0aGUgY29sdW1uKHMpIGluIHRoZSBkYXRhc2V0LiBmb3IgUmVncmVzc2lvbiBvcgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBDbGFzc2lmaWNhdGlvbiB0YXNrcy4gTWFuZGF0b3J5IHdoZW4gZGF0YXNldCBpcyBub3QgYSBGZWF0dXJlVmVjdG9yLgogICAgOnBhcmFtIGRyb3BfY29sdW1uczogICAgICAgICAgICBzdHIgb3IgYSBsaXN0IG9mIHN0cmluZ3MgdGhhdCByZXByZXNlbnQgdGhlIGNvbHVtbnMgdG8gZHJvcAogICAgOnBhcmFtIG1vZGVsX25hbWU6ICAgICAgICAgICAgICBUaGUgbW9kZWwncyBuYW1lIHRvIHVzZSBmb3Igc3RvcmluZyB0aGUgbW9kZWwgYXJ0aWZhY3QsIGRlZmF1bHQgdG8gJ21vZGVsJwogICAgOnBhcmFtIHRhZzogICAgICAgICAgICAgICAgICAgICBUaGUgbW9kZWwncyB0YWcgdG8gbG9nIHdpdGgKICAgIDpwYXJhbSBzYW1wbGVfc2V0OiAgICAgICAgICAgICAgQSBzYW1wbGUgc2V0IG9mIGlucHV0cyBmb3IgdGhlIG1vZGVsIGZvciBsb2dnaW5nIGl0cyBzdGF0cyBhbG9uZyB0aGUgbW9kZWwgaW4gZmF2b3VyCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG9mIG1vZGVsIG1vbml0b3JpbmcuIENhbiBiZSBlaXRoZXIgYSBVUkkgb3IgYSBGZWF0dXJlVmVjdG9yCiAgICA6cGFyYW0gdGVzdF9zZXQ6ICAgICAgICAgICAgICAgIFRoZSB0ZXN0IHNldCB0byB0cmFpbiB0aGUgbW9kZWwgd2l0aC4KICAgIDpwYXJhbSB0cmFpbl90ZXN0X3NwbGl0X3NpemU6ICAgaWYgdGVzdF9zZXQgd2FzIHByb3ZpZGVkIHRoZW4gdGhpcyBhcmd1bWVudCBpcyBpZ25vcmVkLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBTaG91bGQgYmUgYmV0d2VlbiAwLjAgYW5kIDEuMCBhbmQgcmVwcmVzZW50IHRoZSBwcm9wb3J0aW9uIG9mIHRoZSBkYXRhc2V0IHRvIGluY2x1ZGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaW4gdGhlIHRlc3Qgc3BsaXQuIFRoZSBzaXplIG9mIHRoZSBUcmFpbmluZyBzZXQgaXMgc2V0IHRvIHRoZSBjb21wbGVtZW50IG9mIHRoaXMKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdmFsdWUuIERlZmF1bHQgPSAwLjIKICAgIDpwYXJhbSByYW5kb21fc3RhdGU6ICAgICAgICAgICAgUmVsZXZhbnQgb25seSB3aGVuIHVzaW5nIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZS4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgQSByYW5kb20gc3RhdGUgc2VlZCB0byBzaHVmZmxlIHRoZSBkYXRhLiBGb3IgbW9yZSBpbmZvcm1hdGlvbiwgc2VlOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBodHRwczovL3NjaWtpdC1sZWFybi5vcmcvc3RhYmxlL2dsb3NzYXJ5Lmh0bWwjdGVybS1yYW5kb21fc3RhdGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgTm90aWNlIHRoYXQgaGVyZSB3ZSBvbmx5IHBhc3MgaW50ZWdlciB2YWx1ZXMuCiAgICA6cGFyYW0gbGFiZWxzOiAgICAgICAgICAgICAgICAgIExhYmVscyB0byBsb2cgd2l0aCB0aGUgbW9kZWwKICAgIDpwYXJhbSBrd2FyZ3M6ICAgICAgICAgICAgICAgICAgSGVyZSB5b3UgY2FuIHBhc3Mga2V5d29yZCBhcmd1bWVudHMgd2l0aCBwcmVmaXhlcywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGhhdCB3aWxsIGJlIHBhcnNlZCBhbmQgcGFzc2VkIHRvIHRoZSByZWxldmFudCBmdW5jdGlvbiwgYnkgdGhlIGZvbGxvd2luZyBwcmVmaXhlczoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLSBgQ0xBU1NfYCAtIGZvciB0aGUgbW9kZWwgY2xhc3MgYXJndW1lbnRzCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC0gYEZJVF9gIC0gZm9yIHRoZSBgZml0YCBmdW5jdGlvbiBhcmd1bWVudHMKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLSBgVFJBSU5fYCAtIGZvciB0aGUgYHRyYWluYCBmdW5jdGlvbiAoaW4geGdiIG9yIGxnYm0gdHJhaW4gZnVuY3Rpb24gLSBmdXR1cmUpCgogICAgIiIiCiAgICAjIFZhbGlkYXRlIGlucHV0czoKICAgICMgQ2hlY2sgaWYgZXhhY3RseSBvbmUgb2YgdGhlbSBpcyBzdXBwbGllZDoKICAgIGlmIHRlc3Rfc2V0IGlzIE5vbmU6CiAgICAgICAgaWYgdHJhaW5fdGVzdF9zcGxpdF9zaXplIGlzIE5vbmU6CiAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmluZm8oCiAgICAgICAgICAgICAgICAidGVzdF9zZXQgb3IgdHJhaW5fdGVzdF9zcGxpdF9zaXplIGFyZSBub3QgcHJvdmlkZWQsIHNldHRpbmcgdHJhaW5fdGVzdF9zcGxpdF9zaXplIHRvIDAuMiIKICAgICAgICAgICAgKQogICAgICAgICAgICB0cmFpbl90ZXN0X3NwbGl0X3NpemUgPSAwLjIKCiAgICBlbGlmIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZToKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKAogICAgICAgICAgICAidGVzdF9zZXQgcHJvdmlkZWQsIGlnbm9yaW5nIGdpdmVuIHRyYWluX3Rlc3Rfc3BsaXRfc2l6ZSB2YWx1ZSIKICAgICAgICApCiAgICAgICAgdHJhaW5fdGVzdF9zcGxpdF9zaXplID0gTm9uZQoKICAgICMgR2V0IERhdGFGcmFtZSBieSBVUkwgb3IgYnkgRmVhdHVyZVZlY3RvcjoKICAgIGRhdGFzZXQsIGxhYmVsX2NvbHVtbnMgPSBfZ2V0X2RhdGFmcmFtZSgKICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgZGF0YXNldD1kYXRhc2V0LAogICAgICAgIGxhYmVsX2NvbHVtbnM9bGFiZWxfY29sdW1ucywKICAgICAgICBkcm9wX2NvbHVtbnM9ZHJvcF9jb2x1bW5zLAogICAgKQoKICAgICMgR2V0dGluZyB0aGUgc2FtcGxlIHNldDoKICAgIGlmIHNhbXBsZV9zZXQgaXMgTm9uZToKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKAogICAgICAgICAgICBmIlNhbXBsZSBzZXQgbm90IGdpdmVuLCB1c2luZyB0aGUgd2hvbGUgdHJhaW5pbmcgc2V0IGFzIHRoZSBzYW1wbGUgc2V0IgogICAgICAgICkKICAgICAgICBzYW1wbGVfc2V0ID0gZGF0YXNldAogICAgZWxzZToKICAgICAgICBzYW1wbGVfc2V0LCBfID0gX2dldF9kYXRhZnJhbWUoCiAgICAgICAgICAgIGNvbnRleHQ9Y29udGV4dCwKICAgICAgICAgICAgZGF0YXNldD1zYW1wbGVfc2V0LAogICAgICAgICAgICBsYWJlbF9jb2x1bW5zPWxhYmVsX2NvbHVtbnMsCiAgICAgICAgICAgIGRyb3BfY29sdW1ucz1kcm9wX2NvbHVtbnMsCiAgICAgICAgKQoKICAgICMgUGFyc2luZyBrd2FyZ3M6CiAgICAjIFRPRE86IFVzZSBpbiB4Z2Igb3IgbGdibSB0cmFpbiBmdW5jdGlvbi4KICAgIHRyYWluX2t3YXJncyA9IF9nZXRfc3ViX2RpY3RfYnlfcHJlZml4KHNyYz1rd2FyZ3MsIHByZWZpeF9rZXk9S1dBcmdzUHJlZml4ZXMuVFJBSU4pCiAgICBmaXRfa3dhcmdzID0gX2dldF9zdWJfZGljdF9ieV9wcmVmaXgoc3JjPWt3YXJncywgcHJlZml4X2tleT1LV0FyZ3NQcmVmaXhlcy5GSVQpCiAgICBtb2RlbF9jbGFzc19rd2FyZ3MgPSBfZ2V0X3N1Yl9kaWN0X2J5X3ByZWZpeCgKICAgICAgICBzcmM9a3dhcmdzLCBwcmVmaXhfa2V5PUtXQXJnc1ByZWZpeGVzLk1PREVMX0NMQVNTCiAgICApCgogICAgIyBDaGVjayBpZiBtb2RlbCBvciBmdW5jdGlvbjoKICAgIGlmIGhhc2F0dHIobW9kZWxfY2xhc3MsICJ0cmFpbiIpOgogICAgICAgICMgVE9ETzogTmVlZCB0byBjYWxsOiBtb2RlbCgpLCBhZnRlcndhcmRzIHRvIHN0YXJ0IHRoZSB0cmFpbiBmdW5jdGlvbi4KICAgICAgICAjIG1vZGVsID0gY3JlYXRlX2Z1bmN0aW9uKGYie21vZGVsX2NsYXNzfS50cmFpbiIpCiAgICAgICAgcmFpc2UgTm90SW1wbGVtZW50ZWRFcnJvcgogICAgZWxzZToKICAgICAgICAjIENyZWF0aW5nIG1vZGVsIGluc3RhbmNlOgogICAgICAgIG1vZGVsID0gY3JlYXRlX2NsYXNzKG1vZGVsX2NsYXNzKSgqKm1vZGVsX2NsYXNzX2t3YXJncykKCiAgICB4ID0gZGF0YXNldC5kcm9wKGxhYmVsX2NvbHVtbnMsIGF4aXM9MSkKICAgIHkgPSBkYXRhc2V0W2xhYmVsX2NvbHVtbnNdCiAgICBpZiB0cmFpbl90ZXN0X3NwbGl0X3NpemU6CiAgICAgICAgeF90cmFpbiwgeF90ZXN0LCB5X3RyYWluLCB5X3Rlc3QgPSB0cmFpbl90ZXN0X3NwbGl0KAogICAgICAgICAgICB4LCB5LCB0ZXN0X3NpemU9dHJhaW5fdGVzdF9zcGxpdF9zaXplLCByYW5kb21fc3RhdGU9cmFuZG9tX3N0YXRlCiAgICAgICAgKQogICAgZWxzZToKICAgICAgICB4X3RyYWluLCB5X3RyYWluID0geCwgeQoKICAgICAgICB0ZXN0X3NldCA9IHRlc3Rfc2V0LmFzX2RmKCkKICAgICAgICBpZiBkcm9wX2NvbHVtbnM6CiAgICAgICAgICAgIHRlc3Rfc2V0ID0gZGF0YXNldC5kcm9wKGRyb3BfY29sdW1ucywgYXhpcz0xKQoKICAgICAgICB4X3Rlc3QsIHlfdGVzdCA9IHRlc3Rfc2V0LmRyb3AobGFiZWxfY29sdW1ucywgYXhpcz0xKSwgdGVzdF9zZXRbbGFiZWxfY29sdW1uc10KCiAgICBBdXRvTUxSdW4uYXBwbHlfbWxydW4oCiAgICAgICAgbW9kZWw9bW9kZWwsCiAgICAgICAgbW9kZWxfbmFtZT1tb2RlbF9uYW1lLAogICAgICAgIGNvbnRleHQ9Y29udGV4dCwKICAgICAgICB0YWc9dGFnLAogICAgICAgIHNhbXBsZV9zZXQ9c2FtcGxlX3NldCwKICAgICAgICB5X2NvbHVtbnM9bGFiZWxfY29sdW1ucywKICAgICAgICB0ZXN0X3NldD10ZXN0X3NldCwKICAgICAgICB4X3Rlc3Q9eF90ZXN0LAogICAgICAgIHlfdGVzdD15X3Rlc3QsCiAgICAgICAgYXJ0aWZhY3RzPWNvbnRleHQuYXJ0aWZhY3RzLAogICAgICAgIGxhYmVscz1sYWJlbHMsCiAgICApCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYidHJhaW5pbmcgJ3ttb2RlbF9uYW1lfSciKQogICAgbW9kZWwuZml0KHhfdHJhaW4sIHlfdHJhaW4sICoqZml0X2t3YXJncykKCgpkZWYgZXZhbHVhdGUoCiAgICBjb250ZXh0OiBNTENsaWVudEN0eCwKICAgIG1vZGVsOiBzdHIsCiAgICBkYXRhc2V0OiBtbHJ1bi5EYXRhSXRlbSwKICAgIGRyb3BfY29sdW1uczogTGlzdFtzdHJdID0gTm9uZSwKICAgIGxhYmVsX2NvbHVtbnM6IE9wdGlvbmFsW1VuaW9uW3N0ciwgTGlzdFtzdHJdXV0gPSBOb25lLAogICAgKiprd2FyZ3MsCik6CiAgICAiIiIKICAgIEV2YWx1YXRpbmcgYSBtb2RlbC4gQXJ0aWZhY3RzIGdlbmVyYXRlZCBieSB0aGUgTUxIYW5kbGVyLgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgICAgICAgTUxSdW4gY29udGV4dC4KICAgIDpwYXJhbSBtb2RlbDogICAgICAgICAgICAgICAgICAgVGhlIG1vZGVsIFN0b3JlIHBhdGguCiAgICA6cGFyYW0gZGF0YXNldDogICAgICAgICAgICAgICAgIFRoZSBkYXRhc2V0IHRvIGV2YWx1YXRlIHRoZSBtb2RlbCBvbi4gQ2FuIGJlIGVpdGhlciBhIFVSSSBvciBhIEZlYXR1cmVWZWN0b3IuCiAgICA6cGFyYW0gZHJvcF9jb2x1bW5zOiAgICAgICAgICAgIHN0ciBvciBhIGxpc3Qgb2Ygc3RyaW5ncyB0aGF0IHJlcHJlc2VudCB0aGUgY29sdW1ucyB0byBkcm9wLgogICAgOnBhcmFtIGxhYmVsX2NvbHVtbnM6ICAgICAgICAgICBUaGUgdGFyZ2V0IGxhYmVsKHMpIG9mIHRoZSBjb2x1bW4ocykgaW4gdGhlIGRhdGFzZXQuIGZvciBSZWdyZXNzaW9uIG9yCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIENsYXNzaWZpY2F0aW9uIHRhc2tzLiBNYW5kYXRvcnkgd2hlbiBkYXRhc2V0IGlzIG5vdCBhIEZlYXR1cmVWZWN0b3IuCiAgICA6cGFyYW0ga3dhcmdzOiAgICAgICAgICAgICAgICAgIEhlcmUgeW91IGNhbiBwYXNzIGtleXdvcmQgYXJndW1lbnRzIHRvIHRoZSBwcmVkaWN0IGZ1bmN0aW9uCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIChQUkVESUNUXyBwcmVmaXggaXMgbm90IHJlcXVpcmVkKS4KICAgICIiIgogICAgIyBHZXQgZGF0YXNldCBieSBVUkwgb3IgYnkgRmVhdHVyZVZlY3RvcjoKICAgIGRhdGFzZXQsIGxhYmVsX2NvbHVtbnMgPSBfZ2V0X2RhdGFmcmFtZSgKICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgZGF0YXNldD1kYXRhc2V0LAogICAgICAgIGxhYmVsX2NvbHVtbnM9bGFiZWxfY29sdW1ucywKICAgICAgICBkcm9wX2NvbHVtbnM9ZHJvcF9jb2x1bW5zLAogICAgKQoKICAgICMgUGFyc2luZyBsYWJlbF9jb2x1bW5zOgogICAgcGFyc2VkX2xhYmVsX2NvbHVtbnMgPSBbXQogICAgaWYgbGFiZWxfY29sdW1uczoKICAgICAgICBsYWJlbF9jb2x1bW5zID0gKAogICAgICAgICAgICBsYWJlbF9jb2x1bW5zIGlmIGlzaW5zdGFuY2UobGFiZWxfY29sdW1ucywgbGlzdCkgZWxzZSBbbGFiZWxfY29sdW1uc10KICAgICAgICApCiAgICAgICAgZm9yIGxjIGluIGxhYmVsX2NvbHVtbnM6CiAgICAgICAgICAgIGlmIGZzLmNvbW1vbi5mZWF0dXJlX3NlcGFyYXRvciBpbiBsYzoKICAgICAgICAgICAgICAgIGZlYXR1cmVfc2V0X25hbWUsIGxhYmVsX25hbWUsIGFsaWFzID0gZnMuY29tbW9uLnBhcnNlX2ZlYXR1cmVfc3RyaW5nKGxjKQogICAgICAgICAgICAgICAgcGFyc2VkX2xhYmVsX2NvbHVtbnMuYXBwZW5kKGFsaWFzIG9yIGxhYmVsX25hbWUpCiAgICAgICAgaWYgcGFyc2VkX2xhYmVsX2NvbHVtbnM6CiAgICAgICAgICAgIGxhYmVsX2NvbHVtbnMgPSBwYXJzZWRfbGFiZWxfY29sdW1ucwoKICAgIHggPSBkYXRhc2V0LmRyb3AobGFiZWxfY29sdW1ucywgYXhpcz0xKQogICAgeSA9IGRhdGFzZXRbbGFiZWxfY29sdW1uc10KCiAgICAjIExvYWRpbmcgdGhlIG1vZGVsIGFuZCBwcmVkaWN0aW5nOgogICAgbW9kZWxfaGFuZGxlciA9IEF1dG9NTFJ1bi5sb2FkX21vZGVsKAogICAgICAgIG1vZGVsX3BhdGg9bW9kZWwsIGNvbnRleHQ9Y29udGV4dCwgbW9kZWxfbmFtZT0ibW9kZWxfTGluZWFyUmVncmVzc2lvbiIKICAgICkKICAgIEF1dG9NTFJ1bi5hcHBseV9tbHJ1bihtb2RlbF9oYW5kbGVyLm1vZGVsLCB5X3Rlc3Q9eSwgbW9kZWxfcGF0aD1tb2RlbCkKCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYiZXZhbHVhdGluZyAne21vZGVsX2hhbmRsZXIubW9kZWxfbmFtZX0nIikKICAgIG1vZGVsX2hhbmRsZXIubW9kZWwucHJlZGljdCh4LCAqKmt3YXJncykKCgpkZWYgcHJlZGljdCgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgbW9kZWw6IHN0ciwKICAgIGRhdGFzZXQ6IG1scnVuLkRhdGFJdGVtLAogICAgZHJvcF9jb2x1bW5zOiBVbmlvbltzdHIsIExpc3Rbc3RyXSwgaW50LCBMaXN0W2ludF1dID0gTm9uZSwKICAgIGxhYmVsX2NvbHVtbnM6IE9wdGlvbmFsW1VuaW9uW3N0ciwgTGlzdFtzdHJdXV0gPSBOb25lLAogICAgcmVzdWx0X3NldDogT3B0aW9uYWxbc3RyXSA9IE5vbmUsCiAgICAqKmt3YXJncywKKToKICAgICIiIgogICAgUHJlZGljdGluZyBkYXRhc2V0IGJ5IGEgbW9kZWwuCgogICAgOnBhcmFtIGNvbnRleHQ6ICAgICAgICAgICAgICAgICBNTFJ1biBjb250ZXh0LgogICAgOnBhcmFtIG1vZGVsOiAgICAgICAgICAgICAgICAgICBUaGUgbW9kZWwgU3RvcmUgcGF0aC4KICAgIDpwYXJhbSBkYXRhc2V0OiAgICAgICAgICAgICAgICAgVGhlIGRhdGFzZXQgdG8gcHJlZGljdCB0aGUgbW9kZWwgb24uIENhbiBiZSBlaXRoZXIgYSBVUkksIGEgRmVhdHVyZVZlY3RvciBvciBhCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNhbXBsZSBpbiBhIHNoYXBlIG9mIGEgbGlzdC9kaWN0LgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBXaGVuIHBhc3NpbmcgYSBzYW1wbGUsIHBhc3MgdGhlIGRhdGFzZXQgYXMgYSBmaWVsZCBpbiBgcGFyYW1zYCBpbnN0ZWFkIG9mIGBpbnB1dHNgLgogICAgOnBhcmFtIGRyb3BfY29sdW1uczogICAgICAgICAgICBzdHIvaW50IG9yIGEgbGlzdCBvZiBzdHJpbmdzL2ludHMgdGhhdCByZXByZXNlbnQgdGhlIGNvbHVtbiBuYW1lcy9pbmRpY2VzIHRvIGRyb3AuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFdoZW4gdGhlIGRhdGFzZXQgaXMgYSBsaXN0L2RpY3QgdGhpcyBwYXJhbWV0ZXIgc2hvdWxkIGJlIHJlcHJlc2VudGVkIGJ5IGludGVnZXJzLgogICAgOnBhcmFtIGxhYmVsX2NvbHVtbnM6ICAgICAgICAgICBUaGUgdGFyZ2V0IGxhYmVsKHMpIG9mIHRoZSBjb2x1bW4ocykgaW4gdGhlIGRhdGFzZXQuIGZvciBSZWdyZXNzaW9uIG9yCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIENsYXNzaWZpY2F0aW9uIHRhc2tzLiBNYW5kYXRvcnkgd2hlbiBkYXRhc2V0IGlzIG5vdCBhIEZlYXR1cmVWZWN0b3IuCiAgICA6cGFyYW0gcmVzdWx0X3NldDogICAgICAgICAgICAgIFRoZSBkYiBrZXkgdG8gc2V0IG5hbWUgb2YgdGhlIHByZWRpY3Rpb24gcmVzdWx0IGFuZCB0aGUgZmlsZW5hbWUuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIERlZmF1bHQgdG8gJ3ByZWRpY3Rpb24nLgogICAgOnBhcmFtIGt3YXJnczogICAgICAgICAgICAgICAgICBIZXJlIHlvdSBjYW4gcGFzcyBrZXl3b3JkIGFyZ3VtZW50cyB0byB0aGUgcHJlZGljdCBmdW5jdGlvbgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoUFJFRElDVF8gcHJlZml4IGlzIG5vdCByZXF1aXJlZCkuCiAgICAiIiIKICAgICMgR2V0IGRhdGFzZXQgYnkgVVJMIG9yIGJ5IEZlYXR1cmVWZWN0b3I6CiAgICBkYXRhc2V0LCBsYWJlbF9jb2x1bW5zID0gX2dldF9kYXRhZnJhbWUoCiAgICAgICAgY29udGV4dD1jb250ZXh0LAogICAgICAgIGRhdGFzZXQ9ZGF0YXNldCwKICAgICAgICBsYWJlbF9jb2x1bW5zPWxhYmVsX2NvbHVtbnMsCiAgICAgICAgZHJvcF9jb2x1bW5zPWRyb3BfY29sdW1ucywKICAgICkKCiAgICAjIGxvYWRpbmcgdGhlIG1vZGVsLCBhbmQgZ2V0dGluZyB0aGUgbW9kZWwgaGFuZGxlcjoKICAgIG1vZGVsX2hhbmRsZXIgPSBBdXRvTUxSdW4ubG9hZF9tb2RlbChtb2RlbF9wYXRoPW1vZGVsLCBjb250ZXh0PWNvbnRleHQpCgogICAgIyBGaXggZmVhdHVyZSBuYW1lcyBmb3IgbW9kZWxzIHRoYXQgcmVxdWlyZSB0aGVtIChlLmcuLCBYR0Jvb3N0KQogICAgIyBXaGVuIGRhdGFzZXQgY29tZXMgZnJvbSBhIGxpc3QsIHBhbmRhcyBhc3NpZ25zIGRlZmF1bHQgaW50ZWdlciBjb2x1bW4gbmFtZXMKICAgICMgYnV0IHNvbWUgbW9kZWxzIGV4cGVjdCBzcGVjaWZpYyBmZWF0dXJlIG5hbWVzIHRoZXkgd2VyZSB0cmFpbmVkIHdpdGgKICAgIGlmIGhhc2F0dHIobW9kZWxfaGFuZGxlci5tb2RlbCwgJ2ZlYXR1cmVfbmFtZXNfaW5fJyk6CiAgICAgICAgZXhwZWN0ZWRfZmVhdHVyZXMgPSBtb2RlbF9oYW5kbGVyLm1vZGVsLmZlYXR1cmVfbmFtZXNfaW5fCiAgICAgICAgaWYgbGVuKGRhdGFzZXQuY29sdW1ucykgPT0gbGVuKGV4cGVjdGVkX2ZlYXR1cmVzKToKICAgICAgICAgICAgIyBPbmx5IHJlbmFtZSBpZiB0aGUgbnVtYmVyIG9mIGNvbHVtbnMgbWF0Y2hlcwogICAgICAgICAgICAjIFRoaXMgaGFuZGxlcyB0aGUgY2FzZSB3aGVyZSBhIGxpc3Qgd2FzIGNvbnZlcnRlZCB0byBEYXRhRnJhbWUgd2l0aCBkZWZhdWx0IGNvbHVtbiBuYW1lcwogICAgICAgICAgICBpZiBub3QgYWxsKGNvbCA9PSBmZWF0IGZvciBjb2wsIGZlYXQgaW4gemlwKGRhdGFzZXQuY29sdW1ucywgZXhwZWN0ZWRfZmVhdHVyZXMpKToKICAgICAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmluZm8oCiAgICAgICAgICAgICAgICAgICAgZiJSZW5hbWluZyBkYXRhc2V0IGNvbHVtbnMgdG8gbWF0Y2ggbW9kZWwncyBleHBlY3RlZCBmZWF0dXJlIG5hbWVzIgogICAgICAgICAgICAgICAgKQogICAgICAgICAgICAgICAgZGF0YXNldC5jb2x1bW5zID0gZXhwZWN0ZWRfZmVhdHVyZXMKCiAgICAjIERyb3BwaW5nIGxhYmVsIGNvbHVtbnMgaWYgbmVjZXNzYXJ5OgogICAgaWYgbm90IGxhYmVsX2NvbHVtbnM6CiAgICAgICAgbGFiZWxfY29sdW1ucyA9IFtdCiAgICBlbGlmIGlzaW5zdGFuY2UobGFiZWxfY29sdW1ucywgc3RyKToKICAgICAgICBsYWJlbF9jb2x1bW5zID0gW2xhYmVsX2NvbHVtbnNdCgogICAgIyBQcmVkaWN0aW5nOgogICAgY29udGV4dC5sb2dnZXIuaW5mbyhmIm1ha2luZyBwcmVkaWN0aW9uIGJ5ICd7bW9kZWxfaGFuZGxlci5tb2RlbF9uYW1lfSciKQogICAgeV9wcmVkID0gbW9kZWxfaGFuZGxlci5tb2RlbC5wcmVkaWN0KGRhdGFzZXQsICoqa3dhcmdzKQoKICAgICMgUHJlcGFyaW5nIGFuZCB2YWxpZGF0aW5nIGxhYmVsIGNvbHVtbnMgZm9yIHRoZSBkYXRhZnJhbWUgb2YgdGhlIHByZWRpY3Rpb24gcmVzdWx0OgogICAgbnVtX3ByZWRpY3RlZCA9IDEgaWYgbGVuKHlfcHJlZC5zaGFwZSkgPT0gMSBlbHNlIHlfcHJlZC5zaGFwZVsxXQoKICAgIGlmIG51bV9wcmVkaWN0ZWQgPiBsZW4obGFiZWxfY29sdW1ucyk6CiAgICAgICAgaWYgbnVtX3ByZWRpY3RlZCA9PSAxOgogICAgICAgICAgICBsYWJlbF9jb2x1bW5zID0gWyJwcmVkaWN0ZWQgbGFiZWxzIl0KICAgICAgICBlbHNlOgogICAgICAgICAgICBsYWJlbF9jb2x1bW5zLmV4dGVuZCgKICAgICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICAgICBmInByZWRpY3RlZF9sYWJlbF97aSArIDEgKyBsZW4obGFiZWxfY29sdW1ucyl9IgogICAgICAgICAgICAgICAgICAgIGZvciBpIGluIHJhbmdlKG51bV9wcmVkaWN0ZWQgLSBsZW4obGFiZWxfY29sdW1ucykpCiAgICAgICAgICAgICAgICBdCiAgICAgICAgICAgICkKICAgIGVsaWYgbnVtX3ByZWRpY3RlZCA8IGxlbihsYWJlbF9jb2x1bW5zKToKICAgICAgICBjb250ZXh0LmxvZ2dlci5lcnJvcigKICAgICAgICAgICAgZiJudW1iZXIgb2YgcHJlZGljdGVkIGxhYmVsczoge251bV9wcmVkaWN0ZWR9IGlzIHNtYWxsZXIgdGhhbiBudW1iZXIgb2YgbGFiZWwgY29sdW1uczoge2xlbihsYWJlbF9jb2x1bW5zKX0iCiAgICAgICAgKQogICAgICAgIHJhaXNlIFZhbHVlRXJyb3IKCiAgICBhcnRpZmFjdF9uYW1lID0gcmVzdWx0X3NldCBvciAicHJlZGljdGlvbiIKICAgIGxhYmVsc19pbnNpZGVfZGYgPSBzZXQobGFiZWxfY29sdW1ucykgJiBzZXQoZGF0YXNldC5jb2x1bW5zLnRvbGlzdCgpKQogICAgaWYgbGFiZWxzX2luc2lkZV9kZjoKICAgICAgICBjb250ZXh0LmxvZ2dlci5lcnJvcigKICAgICAgICAgICAgZiJUaGUgbGFiZWxzOiB7bGFiZWxzX2luc2lkZV9kZn0gYXJlIGFscmVhZHkgZXhpc3RlZCBpbiB0aGUgZGF0YWZyYW1lIgogICAgICAgICkKICAgICAgICByYWlzZSBWYWx1ZUVycm9yCiAgICBwcmVkX2RmID0gcGQuY29uY2F0KFtkYXRhc2V0LCBwZC5EYXRhRnJhbWUoeV9wcmVkLCBjb2x1bW5zPWxhYmVsX2NvbHVtbnMpXSwgYXhpcz0xKQogICAgY29udGV4dC5sb2dfZGF0YXNldChhcnRpZmFjdF9uYW1lLCBwcmVkX2RmLCBkYl9rZXk9cmVzdWx0X3NldCkK command: '' -kind: job -verbose: false + default_handler: train + image: mlrun/mlrun + disable_auto_mount: false + description: Automatic train, evaluate and predict functions for the ML frameworks + - Scikit-Learn, XGBoost and LightGBM. +metadata: + categories: + - machine-learning + - model-training + tag: '' + name: auto-trainer diff --git a/functions/src/auto_trainer/item.yaml b/functions/src/auto_trainer/item.yaml index ba33f6a08..d397a79d6 100755 --- a/functions/src/auto_trainer/item.yaml +++ b/functions/src/auto_trainer/item.yaml @@ -13,7 +13,7 @@ labels: author: Iguazio maintainers: [] marketplaceType: '' -mlrunVersion: 1.7.0 +mlrunVersion: 1.10.0 name: auto_trainer platformVersion: 3.5.0 spec: @@ -23,4 +23,4 @@ spec: kind: job requirements: [] url: '' -version: 1.8.0 +version: 1.9.0 diff --git a/functions/src/auto_trainer/requirements.txt b/functions/src/auto_trainer/requirements.txt index b14a0293c..b23f9b9dd 100644 --- a/functions/src/auto_trainer/requirements.txt +++ b/functions/src/auto_trainer/requirements.txt @@ -1,4 +1,5 @@ pandas -scikit-learn<1.4.0 +scikit-learn~=1.5.2 +lightgbm xgboost<2.0.0 plotly diff --git a/functions/src/auto_trainer/test_auto_trainer.py b/functions/src/auto_trainer/test_auto_trainer.py index 9a1ff554c..ac95109f8 100644 --- a/functions/src/auto_trainer/test_auto_trainer.py +++ b/functions/src/auto_trainer/test_auto_trainer.py @@ -29,6 +29,9 @@ ("sklearn.linear_model.LinearRegression", "regression"), ("sklearn.ensemble.RandomForestClassifier", "classification"), ("xgboost.XGBRegressor", "regression"), + ("xgboost.XGBClassifier", "classification"), + ("lightgbm.LGBMRegressor", "regression"), + ("lightgbm.LGBMClassifier", "classification") ] REQUIRED_ENV_VARS = [ @@ -78,11 +81,15 @@ def _assert_train_handler(train_run): @pytest.mark.parametrize("model", MODELS) +@pytest.mark.skipif( + condition=not _validate_environment_variables(), + reason="Project's environment variables are not set", +) def test_train(model: Tuple[str, str]): dataset, label_columns = _get_dataset(model[1]) is_test_passed = True - project = mlrun.new_project("auto-trainer-test", context="./") + project = mlrun.get_or_create_project("auto-trainer-test", context="./") fn = project.set_function("function.yaml", "train", kind="job", image="mlrun/mlrun") train_run = None @@ -119,7 +126,7 @@ def test_train_evaluate(model: Tuple[str, str]): dataset, label_columns = _get_dataset(model[1]) is_test_passed = True # Importing function: - project = mlrun.new_project("auto-trainer-test", context="./") + project = mlrun.get_or_create_project("auto-trainer-test", context="./") fn = project.set_function("function.yaml", "train", kind="job", image="mlrun/mlrun") temp_dir = tempfile.mkdtemp() @@ -172,7 +179,7 @@ def test_train_predict(model: Tuple[str, str]): df = pd.read_csv(dataset) sample = df.head().drop("labels", axis=1).values.tolist() # Importing function: - project = mlrun.new_project("auto-trainer-test", context="./") + project = mlrun.get_or_create_project("auto-trainer-test", context="./") fn = project.set_function("function.yaml", "train", kind="job", image="mlrun/mlrun") temp_dir = tempfile.mkdtemp() diff --git a/functions/src/describe/function.yaml b/functions/src/describe/function.yaml index a11461774..1c254c3c4 100644 --- a/functions/src/describe/function.yaml +++ b/functions/src/describe/function.yaml @@ -1,9 +1,44 @@ +metadata: + tag: '' + categories: + - data-analysis + name: describe +verbose: false +kind: job spec: + command: '' + image: mlrun/mlrun + description: describe and visualizes dataset stats + disable_auto_mount: false + default_handler: analyze entry_points: analyze: + doc: 'The function will output the following artifacts per + + column within the data frame (based on data types) + + If the data has more than 500,000 sample we + + sample randomly 500,000 samples: + + + describe csv + + histograms + + scatter-2d + + violin chart + + correlation-matrix chart + + correlation-matrix csv + + imbalance pie chart + + imbalance-weights-vec csv' + has_kwargs: false has_varargs: false - outputs: - - type: None parameters: - name: context type: MLClientCtx @@ -45,46 +80,11 @@ spec: - name: dask_client doc: Dask client object default: null - doc: 'The function will output the following artifacts per - - column within the data frame (based on data types) - - If the data has more than 500,000 sample we - - sample randomly 500,000 samples: - - - describe csv - - histograms - - scatter-2d - - violin chart - - correlation-matrix chart - - correlation-matrix csv - - imbalance pie chart - - imbalance-weights-vec csv' - has_kwargs: false + outputs: + - type: None name: analyze lineno: 46 - image: mlrun/mlrun - command: '' build: - functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKIyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IHdhcm5pbmdzCmZyb20gdHlwaW5nIGltcG9ydCBVbmlvbgoKaW1wb3J0IG1scnVuCmltcG9ydCBudW1weSBhcyBucAoKd2FybmluZ3Muc2ltcGxlZmlsdGVyKGFjdGlvbj0iaWdub3JlIiwgY2F0ZWdvcnk9RnV0dXJlV2FybmluZykKCmltcG9ydCBtbHJ1bi5mZWF0dXJlX3N0b3JlIGFzIGZzdG9yZQppbXBvcnQgcGFuZGFzIGFzIHBkCmltcG9ydCBwbG90bHkuZXhwcmVzcyBhcyBweAppbXBvcnQgcGxvdGx5LmZpZ3VyZV9mYWN0b3J5IGFzIGZmCmltcG9ydCBwbG90bHkuZ3JhcGhfb2JqZWN0cyBhcyBnbwpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgKAogICAgQXJ0aWZhY3QsCiAgICBEYXRhc2V0QXJ0aWZhY3QsCiAgICBQbG90bHlBcnRpZmFjdCwKICAgIFRhYmxlQXJ0aWZhY3QsCiAgICB1cGRhdGVfZGF0YXNldF9tZXRhLAopCmZyb20gbWxydW4uZGF0YXN0b3JlIGltcG9ydCBEYXRhSXRlbQpmcm9tIG1scnVuLmV4ZWN1dGlvbiBpbXBvcnQgTUxDbGllbnRDdHgKZnJvbSBtbHJ1bi5mZWF0dXJlX3N0b3JlIGltcG9ydCBGZWF0dXJlU2V0CmZyb20gcGxvdGx5LnN1YnBsb3RzIGltcG9ydCBtYWtlX3N1YnBsb3RzCgpwZC5zZXRfb3B0aW9uKCJkaXNwbGF5LmZsb2F0X2Zvcm1hdCIsIGxhbWJkYSB4OiAiJS4yZiIgJSB4KQpNQVhfU0laRV9PRl9ERiA9IDUwMDAwMAoKCmRlZiBhbmFseXplKAogICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICBuYW1lOiBzdHIgPSAiZGF0YXNldCIsCiAgICB0YWJsZTogVW5pb25bRmVhdHVyZVNldCwgRGF0YUl0ZW1dID0gTm9uZSwKICAgIGxhYmVsX2NvbHVtbjogc3RyID0gTm9uZSwKICAgIHBsb3RzX2Rlc3Q6IHN0ciA9ICJwbG90cyIsCiAgICByYW5kb21fc3RhdGU6IGludCA9IDEsCiAgICBwcm9ibGVtX3R5cGU6IHN0ciA9ICJjbGFzc2lmaWNhdGlvbiIsCiAgICBkYXNrX2tleTogc3RyID0gImRhc2tfa2V5IiwKICAgIGRhc2tfZnVuY3Rpb246IHN0ciA9IE5vbmUsCiAgICBkYXNrX2NsaWVudD1Ob25lLAopIC0+IE5vbmU6CiAgICAiIiIKICAgIFRoZSBmdW5jdGlvbiB3aWxsIG91dHB1dCB0aGUgZm9sbG93aW5nIGFydGlmYWN0cyBwZXIKICAgIGNvbHVtbiB3aXRoaW4gdGhlIGRhdGEgZnJhbWUgKGJhc2VkIG9uIGRhdGEgdHlwZXMpCiAgICBJZiB0aGUgZGF0YSBoYXMgbW9yZSB0aGFuIDUwMCwwMDAgc2FtcGxlIHdlCiAgICBzYW1wbGUgcmFuZG9tbHkgNTAwLDAwMCBzYW1wbGVzOgoKICAgIGRlc2NyaWJlIGNzdgogICAgaGlzdG9ncmFtcwogICAgc2NhdHRlci0yZAogICAgdmlvbGluIGNoYXJ0CiAgICBjb3JyZWxhdGlvbi1tYXRyaXggY2hhcnQKICAgIGNvcnJlbGF0aW9uLW1hdHJpeCBjc3YKICAgIGltYmFsYW5jZSBwaWUgY2hhcnQKICAgIGltYmFsYW5jZS13ZWlnaHRzLXZlYyBjc3YKCiAgICA6cGFyYW0gY29udGV4dDogICAgICAgICAgICAgICAgIFRoZSBmdW5jdGlvbiBjb250ZXh0CiAgICA6cGFyYW0gbmFtZTogICAgICAgICAgICAgICAgICAgIEtleSBvZiBkYXRhc2V0IHRvIGRhdGFiYXNlICgiZGF0YXNldCIgZm9yIGRlZmF1bHQpCiAgICA6cGFyYW0gdGFibGU6ICAgICAgICAgICAgICAgICAgIE1MUnVuIGlucHV0IHBvaW50aW5nIHRvIHBhbmRhcyBkYXRhZnJhbWUgKGNzdi9wYXJxdWV0IGZpbGUgcGF0aCkgb3IgRmVhdHVyZVNldAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhcyBwYXJhbQogICAgOnBhcmFtIGxhYmVsX2NvbHVtbjogICAgICAgICAgICBHcm91bmQgdHJ1dGggY29sdW1uIGxhYmVsCiAgICA6cGFyYW0gcGxvdHNfZGVzdDogICAgICAgICAgICAgIERlc3RpbmF0aW9uIGZvbGRlciBvZiBzdW1tYXJ5IHBsb3RzIChyZWxhdGl2ZSB0byBhcnRpZmFjdF9wYXRoKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoInBsb3RzIiBmb3IgZGVmYXVsdCkKICAgIDpwYXJhbSByYW5kb21fc3RhdGU6ICAgICAgICAgICAgV2hlbiB0aGUgdGFibGUgaGFzIG1vcmUgdGhhbiA1MDAsMDAwIHNhbXBsZXMsIHdlIHNhbXBsZSByYW5kb21seSA1MDAsMDAwIHNhbXBsZXMKICAgIDpwYXJhbSBwcm9ibGVtX3R5cGUgICAgICAgICAgICAgVGhlIHR5cGUgb2YgdGhlIE1MIHByb2JsZW0gdGhlIGRhdGEgZmFjaW5nIC0gcmVncmVzc2lvbiwgY2xhc3NpZmljYXRpb24gb3IgTm9uZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoY2xhc3NpZmljYXRpb24gZm9yIGRlZmF1bHQpCiAgICA6cGFyYW0gZGFza19rZXk6ICAgICAgICAgICAgICAgIEtleSBvZiBkYXRhZnJhbWUgaW4gZGFzayBjbGllbnQgImRhdGFzZXRzIiBhdHRyaWJ1dGUKICAgIDpwYXJhbSBkYXNrX2Z1bmN0aW9uOiAgICAgICAgICAgRGFzayBmdW5jdGlvbiB1cmwgKGRiOi8vLi4pCiAgICA6cGFyYW0gZGFza19jbGllbnQ6ICAgICAgICAgICAgIERhc2sgY2xpZW50IG9iamVjdAogICAgIiIiCiAgICBkYXRhX2l0ZW0sIGZlYXR1cmVzZXQsIGNyZWF0LCB1cGRhdGUgPSBGYWxzZSwgRmFsc2UsIEZhbHNlLCBGYWxzZQogICAgZ2V0X2Zyb21fdGFibGUgPSBUcnVlCiAgICBpZiBkYXNrX2Z1bmN0aW9uIG9yIGRhc2tfY2xpZW50OgogICAgICAgIGRhdGFfaXRlbSwgY3JlYXQgPSBUcnVlLCBUcnVlCiAgICAgICAgaWYgZGFza19mdW5jdGlvbjoKICAgICAgICAgICAgY2xpZW50ID0gbWxydW4uaW1wb3J0X2Z1bmN0aW9uKGRhc2tfZnVuY3Rpb24pLmNsaWVudAogICAgICAgIGVsaWYgZGFza19jbGllbnQ6CiAgICAgICAgICAgIGNsaWVudCA9IGRhc2tfY2xpZW50CiAgICAgICAgZWxzZToKICAgICAgICAgICAgcmFpc2UgVmFsdWVFcnJvcigiZGFzayBjbGllbnQgd2FzIG5vdCBwcm92aWRlZCIpCgogICAgICAgIGlmIGRhc2tfa2V5IGluIGNsaWVudC5kYXRhc2V0czoKICAgICAgICAgICAgZGYgPSBjbGllbnQuZ2V0X2RhdGFzZXQoZGFza19rZXkpCiAgICAgICAgICAgIGRhdGFfaXRlbSwgY3JlYXQsIGdldF9mcm9tX3RhYmxlID0gVHJ1ZSwgVHJ1ZSwgRmFsc2UKICAgICAgICBlbGlmIHRhYmxlOgogICAgICAgICAgICBnZXRfZnJvbV90YWJsZSA9IFRydWUKICAgICAgICBlbHNlOgogICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKAogICAgICAgICAgICAgICAgZiJvbmx5IHRoZXNlIGRhdGFzZXRzIGFyZSBhdmFpbGFibGUge2NsaWVudC5kYXRhc2V0c30gaW4gY2xpZW50IHtjbGllbnR9IgogICAgICAgICAgICApCiAgICAgICAgICAgIHJhaXNlIEV4Y2VwdGlvbigiZGF0YXNldCBub3QgZm91bmQgb24gZGFzayBjbHVzdGVyIikKCiAgICBpZiBnZXRfZnJvbV90YWJsZToKICAgICAgICBpZiB0eXBlKHRhYmxlKSA9PSBEYXRhSXRlbToKICAgICAgICAgICAgaWYgdGFibGUubWV0YSBpcyBOb25lOgogICAgICAgICAgICAgICAgZGF0YV9pdGVtLCBjcmVhdCwgdXBkYXRlID0gVHJ1ZSwgVHJ1ZSwgRmFsc2UKICAgICAgICAgICAgZWxpZiB0YWJsZS5tZXRhLmtpbmQgPT0gImRhdGFzZXQiOgogICAgICAgICAgICAgICAgZGF0YV9pdGVtLCBjcmVhdCwgdXBkYXRlID0gVHJ1ZSwgRmFsc2UsIFRydWUKICAgICAgICAgICAgZWxpZiB0YWJsZS5tZXRhLmtpbmQgPT0gIkZlYXR1cmVWZWN0b3IiOgogICAgICAgICAgICAgICAgZGF0YV9pdGVtLCBjcmVhdCwgdXBkYXRlID0gVHJ1ZSwgRmFsc2UsIEZhbHNlCiAgICAgICAgICAgIGVsaWYgdGFibGUubWV0YS5raW5kID09ICJGZWF0dXJlU2V0IjoKICAgICAgICAgICAgICAgIGZlYXR1cmVzZXQsIGNyZWF0LCB1cGRhdGUgPSBUcnVlLCBGYWxzZSwgRmFsc2UKCiAgICAgICAgaWYgZGF0YV9pdGVtOgogICAgICAgICAgICBkZiA9IHRhYmxlLmFzX2RmKCkKICAgICAgICBlbGlmIGZlYXR1cmVzZXQ6CiAgICAgICAgICAgIHByb2plY3RfbmFtZSwgc2V0X25hbWUgPSAoCiAgICAgICAgICAgICAgICB0YWJsZS5fcGF0aC5zcGxpdCgiLyIpWzJdLAogICAgICAgICAgICAgICAgdGFibGUuX3BhdGguc3BsaXQoIi8iKVs0XSwKICAgICAgICAgICAgKQogICAgICAgICAgICBmZWF0dXJlX3NldCA9IGZzdG9yZS5nZXRfZmVhdHVyZV9zZXQoCiAgICAgICAgICAgICAgICBmInN0b3JlOi8vZmVhdHVyZS1zZXRzL3twcm9qZWN0X25hbWV9L3tzZXRfbmFtZX0iCiAgICAgICAgICAgICkKICAgICAgICAgICAgZGYgPSBmZWF0dXJlX3NldC50b19kYXRhZnJhbWUoKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmVycm9yKGYiV3JvbmcgdGFibGUgdHlwZS4iKQogICAgICAgICAgICByZXR1cm4KCiAgICBpZiBkZi5zaXplID4gTUFYX1NJWkVfT0ZfREY6CiAgICAgICAgZGYgPSBkZi5zYW1wbGUobj1pbnQoTUFYX1NJWkVfT0ZfREYgLyBkZi5zaGFwZVsxXSksIHJhbmRvbV9zdGF0ZT1yYW5kb21fc3RhdGUpCiAgICBleHRyYV9kYXRhID0ge30KCiAgICBpZiBsYWJlbF9jb2x1bW4gbm90IGluIGRmLmNvbHVtbnM6CiAgICAgICAgbGFiZWxfY29sdW1uID0gTm9uZQoKICAgIGV4dHJhX2RhdGFbImRlc2NyaWJlIGNzdiJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgVGFibGVBcnRpZmFjdCgiZGVzY3JpYmUtY3N2IiwgZGY9ZGYuZGVzY3JpYmUoKSksCiAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9kZXNjcmliZS5jc3YiLAogICAgKQoKICAgIHRyeToKICAgICAgICBfY3JlYXRlX2hpc3RvZ3JhbV9tYXRfYXJ0aWZhY3QoCiAgICAgICAgICAgIGNvbnRleHQsIGRmLCBleHRyYV9kYXRhLCBsYWJlbF9jb2x1bW4sIHBsb3RzX2Rlc3QKICAgICAgICApCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgY29udGV4dC5sb2dnZXIud2FybihmIkZhaWxlZCB0byBjcmVhdGUgaGlzdG9ncmFtIG1hdHJpeCBhcnRpZmFjdCBkdWUgdG86IHtlfSIpCiAgICB0cnk6CiAgICAgICAgX2NyZWF0ZV9mZWF0dXJlc19oaXN0b2dyYW1fYXJ0aWZhY3RzKAogICAgICAgICAgICBjb250ZXh0LCBkZiwgZXh0cmFfZGF0YSwgbGFiZWxfY29sdW1uLCBwbG90c19kZXN0LCBwcm9ibGVtX3R5cGUKICAgICAgICApCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgY29udGV4dC5sb2dnZXIud2FybihmIkZhaWxlZCB0byBjcmVhdGUgcGFpcnBsb3QgaGlzdG9ncmFtcyBkdWUgdG86IHtlfSIpCiAgICB0cnk6CiAgICAgICAgX2NyZWF0ZV9mZWF0dXJlc18yZF9zY2F0dGVyX2FydGlmYWN0cygKICAgICAgICAgICAgY29udGV4dCwgZGYsIGV4dHJhX2RhdGEsIGxhYmVsX2NvbHVtbiwgcGxvdHNfZGVzdCwgcHJvYmxlbV90eXBlCiAgICAgICAgKQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIGNvbnRleHQubG9nZ2VyLndhcm4oZiJGYWlsZWQgdG8gY3JlYXRlIHBhaXJwbG90IDJkX3NjYXR0ZXIgZHVlIHRvOiB7ZX0iKQogICAgdHJ5OgogICAgICAgIF9jcmVhdGVfdmlvbGluX2FydGlmYWN0KGNvbnRleHQsIGRmLCBleHRyYV9kYXRhLCBwbG90c19kZXN0KQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIGNvbnRleHQubG9nZ2VyLndhcm4oZiJGYWlsZWQgdG8gY3JlYXRlIHZpb2xpbiBkaXN0cmlidXRpb24gcGxvdHMgZHVlIHRvOiB7ZX0iKQogICAgdHJ5OgogICAgICAgIF9jcmVhdGVfaW1iYWxhbmNlX2FydGlmYWN0KAogICAgICAgICAgICBjb250ZXh0LCBkZiwgZXh0cmFfZGF0YSwgbGFiZWxfY29sdW1uLCBwbG90c19kZXN0LCBwcm9ibGVtX3R5cGUKICAgICAgICApCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgY29udGV4dC5sb2dnZXIud2FybihmIkZhaWxlZCB0byBjcmVhdGUgY2xhc3MgaW1iYWxhbmNlIHBsb3QgZHVlIHRvOiB7ZX0iKQogICAgdHJ5OgogICAgICAgIF9jcmVhdGVfY29ycl9hcnRpZmFjdChjb250ZXh0LCBkZiwgZXh0cmFfZGF0YSwgbGFiZWxfY29sdW1uLCBwbG90c19kZXN0KQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIGNvbnRleHQubG9nZ2VyLndhcm4oZiJGYWlsZWQgdG8gY3JlYXRlIGZlYXR1cmVzIGNvcnJlbGF0aW9uIHBsb3QgZHVlIHRvOiB7ZX0iKQoKICAgIGlmIG5vdCBkYXRhX2l0ZW06CiAgICAgICAgcmV0dXJuCgogICAgYXJ0aWZhY3QgPSB0YWJsZS5hcnRpZmFjdF91cmwKICAgIGlmIGNyZWF0OiAgIyBkYXRhc2V0IG5vdCBzdG9yZWQKICAgICAgICBhcnRpZmFjdCA9IERhdGFzZXRBcnRpZmFjdCgKICAgICAgICAgICAga2V5PSJkYXRhc2V0Iiwgc3RhdHM9VHJ1ZSwgZGY9ZGYsIGV4dHJhX2RhdGE9ZXh0cmFfZGF0YQogICAgICAgICkKICAgICAgICBhcnRpZmFjdCA9IGNvbnRleHQubG9nX2FydGlmYWN0KGFydGlmYWN0LCBkYl9rZXk9bmFtZSkKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYiVGhlIGRhdGEgc2V0IGlzIGxvZ2dlZCB0byB0aGUgcHJvamVjdCB1bmRlciB7bmFtZX0gbmFtZSIpCgogICAgaWYgdXBkYXRlOgogICAgICAgIHVwZGF0ZV9kYXRhc2V0X21ldGEoYXJ0aWZhY3QsIGV4dHJhX2RhdGE9ZXh0cmFfZGF0YSkKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYiVGhlIGRhdGEgc2V0IG5hbWVkIHtuYW1lfSBpcyB1cGRhdGVkIikKCiAgICAjIFRPRE8gOiAzLUQgcGxvdCBvbiBvbiBzZWxlY3RlZCBmZWF0dXJlcy4KICAgICMgVE9ETyA6IFJlaW50ZWdyYXRpb24gcGxvdCBvbiBvbiBzZWxlY3RlZCBmZWF0dXJlcy4KICAgICMgVE9ETyA6IFBDQSBwbG90ICh3aXRoIG9wdGlvbnMpCgoKZGVmIF9jcmVhdGVfaGlzdG9ncmFtX21hdF9hcnRpZmFjdCgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKKToKICAgICIiIgogICAgQ3JlYXRlIGFuZCBsb2cgYSBoaXN0b2dyYW0gbWF0cml4IGFydGlmYWN0CiAgICAiIiIKICAgIGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgIGl0ZW09QXJ0aWZhY3QoCiAgICAgICAgICAgIGtleT0iaGlzdCIsCiAgICAgICAgICAgIGJvZHk9YiI8Yj4gRGVwcmVjYXRlZCwgc2VlIHRoZSBhcnRpZmFjdHMgc2NhdHRlci0yZCAiCiAgICAgICAgICAgIGIiYW5kIGhpc3RvZ3JhbXMgaW5zdGVhZDxiPiIsCiAgICAgICAgKSwKICAgICAgICBsb2NhbF9wYXRoPWYie3Bsb3RzX2Rlc3R9L2hpc3QuaHRtbCIsCiAgICApCgoKZGVmIF9jcmVhdGVfZmVhdHVyZXNfaGlzdG9ncmFtX2FydGlmYWN0cygKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKICAgIHByb2JsZW1fdHlwZTogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhIGhpc3RvZ3JhbSBhcnRpZmFjdCBmb3IgZWFjaCBmZWF0dXJlCiAgICAiIiIKCiAgICBmaWdzID0gZGljdCgpCiAgICBmaXJzdF9mZWF0dXJlX25hbWUgPSAiIgogICAgaWYgbGFiZWxfY29sdW1uIGlzIG5vdCBOb25lIGFuZCBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIjoKICAgICAgICBhbGxfbGFiZWxzID0gZGZbbGFiZWxfY29sdW1uXS51bmlxdWUoKQogICAgdmlzaWJsZSA9IFRydWUKICAgIGZvciBjb2x1bW5fbmFtZSBpbiBkZi5jb2x1bW5zOgogICAgICAgIGlmIGNvbHVtbl9uYW1lID09IGxhYmVsX2NvbHVtbjoKICAgICAgICAgICAgY29udGludWUKCiAgICAgICAgaWYgbGFiZWxfY29sdW1uIGlzIG5vdCBOb25lIGFuZCBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIjoKICAgICAgICAgICAgZm9yIGxhYmVsIGluIGFsbF9sYWJlbHM6CiAgICAgICAgICAgICAgICBzdWJfZmlnID0gZ28uSGlzdG9ncmFtKAogICAgICAgICAgICAgICAgICAgIGhpc3RmdW5jPSJjb3VudCIsCiAgICAgICAgICAgICAgICAgICAgeD1kZi5sb2NbZGZbbGFiZWxfY29sdW1uXSA9PSBsYWJlbF1bY29sdW1uX25hbWVdLAogICAgICAgICAgICAgICAgICAgIG5hbWU9c3RyKGxhYmVsKSwKICAgICAgICAgICAgICAgICAgICB2aXNpYmxlPXZpc2libGUsCiAgICAgICAgICAgICAgICApCiAgICAgICAgICAgICAgICBmaWdzW2Yie2NvbHVtbl9uYW1lfUA/QHtsYWJlbH0iXSA9IHN1Yl9maWcKICAgICAgICBlbHNlOgogICAgICAgICAgICBzdWJfZmlnID0gZ28uSGlzdG9ncmFtKGhpc3RmdW5jPSJjb3VudCIsIHg9ZGZbY29sdW1uX25hbWVdLCB2aXNpYmxlPXZpc2libGUpCiAgICAgICAgICAgIGZpZ3NbZiJ7Y29sdW1uX25hbWV9QD9AezF9Il0gPSBzdWJfZmlnCiAgICAgICAgaWYgdmlzaWJsZToKICAgICAgICAgICAgZmlyc3RfZmVhdHVyZV9uYW1lID0gY29sdW1uX25hbWUKICAgICAgICB2aXNpYmxlID0gRmFsc2UKCiAgICBmaWcgPSBnby5GaWd1cmUoKQogICAgZm9yIGsgaW4gZmlncy5rZXlzKCk6CiAgICAgICAgZmlnLmFkZF90cmFjZShmaWdzW2tdKQoKICAgIGZpZy51cGRhdGVfbGF5b3V0KAogICAgICAgIHVwZGF0ZW1lbnVzPVsKICAgICAgICAgICAgewogICAgICAgICAgICAgICAgImJ1dHRvbnMiOiBbCiAgICAgICAgICAgICAgICAgICAgewogICAgICAgICAgICAgICAgICAgICAgICAibGFiZWwiOiBjb2x1bW5fbmFtZSwKICAgICAgICAgICAgICAgICAgICAgICAgIm1ldGhvZCI6ICJ1cGRhdGUiLAogICAgICAgICAgICAgICAgICAgICAgICAiYXJncyI6IFsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAidmlzaWJsZSI6IFsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAga2V5LnNwbGl0KCJAP0AiKVswXSA9PSBjb2x1bW5fbmFtZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmb3Iga2V5IGluIGZpZ3Mua2V5cygpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgXSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAieGF4aXMiOiB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJyYW5nZSI6IFsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1pbihkZltjb2x1bW5fbmFtZV0pLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWF4KGRmW2NvbHVtbl9uYW1lXSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIF0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB9LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgfSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHsidGl0bGUiOiBmIjxpPjxiPkhpc3RvZ3JhbSBvZiB7Y29sdW1uX25hbWV9PC9iPjwvaT4ifSwKICAgICAgICAgICAgICAgICAgICAgICAgXSwKICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgZm9yIGNvbHVtbl9uYW1lIGluIGRmLmNvbHVtbnMKICAgICAgICAgICAgICAgICAgICBpZiBjb2x1bW5fbmFtZSAhPSBsYWJlbF9jb2x1bW4KICAgICAgICAgICAgICAgIF0sCiAgICAgICAgICAgICAgICAiZGlyZWN0aW9uIjogImRvd24iLAogICAgICAgICAgICAgICAgInBhZCI6IHsiciI6IDEwLCAidCI6IDEwfSwKICAgICAgICAgICAgICAgICJzaG93YWN0aXZlIjogVHJ1ZSwKICAgICAgICAgICAgICAgICJ4IjogMC4yNSwKICAgICAgICAgICAgICAgICJ4YW5jaG9yIjogImxlZnQiLAogICAgICAgICAgICAgICAgInkiOiAxLjEsCiAgICAgICAgICAgICAgICAieWFuY2hvciI6ICJ0b3AiLAogICAgICAgICAgICB9CiAgICAgICAgXSwKICAgICAgICBhbm5vdGF0aW9ucz1bCiAgICAgICAgICAgIGRpY3QoCiAgICAgICAgICAgICAgICB0ZXh0PSJTZWxlY3QgRmVhdHVyZSBOYW1lICIsCiAgICAgICAgICAgICAgICBzaG93YXJyb3c9RmFsc2UsCiAgICAgICAgICAgICAgICB4PTAsCiAgICAgICAgICAgICAgICB5PTEuMDUsCiAgICAgICAgICAgICAgICB5cmVmPSJwYXBlciIsCiAgICAgICAgICAgICAgICB4cmVmPSJwYXBlciIsCiAgICAgICAgICAgICAgICBhbGlnbj0ibGVmdCIsCiAgICAgICAgICAgICAgICB4YW5jaG9yPSJsZWZ0IiwKICAgICAgICAgICAgICAgIHlhbmNob3I9InRvcCIsCiAgICAgICAgICAgICAgICBmb250PXsKICAgICAgICAgICAgICAgICAgICAiY29sb3IiOiAiYmx1ZSIsCiAgICAgICAgICAgICAgICB9LAogICAgICAgICAgICApCiAgICAgICAgXSwKICAgICkKCiAgICBmaWcudXBkYXRlX2xheW91dCgKICAgICAgICB3aWR0aD02MDAsCiAgICAgICAgaGVpZ2h0PTQwMCwKICAgICAgICBhdXRvc2l6ZT1GYWxzZSwKICAgICAgICBtYXJnaW49ZGljdCh0PTEwMCwgYj0wLCBsPTAsIHI9MCksCiAgICAgICAgdGVtcGxhdGU9InBsb3RseV93aGl0ZSIsCiAgICApCgogICAgZmlnLnVwZGF0ZV9sYXlvdXQodGl0bGVfdGV4dD1mIjxpPjxiPkhpc3RvZ3JhbXMgb2Yge2ZpcnN0X2ZlYXR1cmVfbmFtZX08L2I+PC9pPiIpCiAgICBleHRyYV9kYXRhW2YiaGlzdG9ncmFtcyJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PWYiaGlzdG9ncmFtcyIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vaGlzdG9ncmFtcy5odG1sIiwKICAgICkKCgpkZWYgX2NyZWF0ZV9mZWF0dXJlc18yZF9zY2F0dGVyX2FydGlmYWN0cygKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKICAgIHByb2JsZW1fdHlwZTogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhIHNjYXR0ZXItMmQgYXJ0aWZhY3QgZm9yIGVhY2ggY291cGxlIG9mIGZlYXR1cmVzCiAgICAiIiIKICAgIGZlYXR1cmVzID0gWwogICAgICAgIGNvbHVtbl9uYW1lIGZvciBjb2x1bW5fbmFtZSBpbiBkZi5jb2x1bW5zIGlmIGNvbHVtbl9uYW1lICE9IGxhYmVsX2NvbHVtbgogICAgXQogICAgbWF4X2ZlYXR1cmVfbGVuID0gZmxvYXQobWF4KGxlbihlbGVtKSBmb3IgZWxlbSBpbiBmZWF0dXJlcykpCiAgICBpZiBsYWJlbF9jb2x1bW4gaXMgbm90IE5vbmU6CiAgICAgICAgbGFiZWxzID0gc29ydGVkKGRmW2xhYmVsX2NvbHVtbl0udW5pcXVlKCkpCiAgICBlbHNlOgogICAgICAgIGxhYmVscyA9IFtOb25lXQogICAgZmlnID0gZ28uRmlndXJlKCkKICAgIGlmIGxhYmVsX2NvbHVtbiBpcyBub3QgTm9uZSBhbmQgcHJvYmxlbV90eXBlID09ICJjbGFzc2lmaWNhdGlvbiI6CiAgICAgICAgZm9yIGwgaW4gbGFiZWxzOgogICAgICAgICAgICBmaWcuYWRkX3RyYWNlKAogICAgICAgICAgICAgICAgZ28uU2NhdHRlcigKICAgICAgICAgICAgICAgICAgICB4PWRmLmxvY1tkZltsYWJlbF9jb2x1bW5dID09IGxdW2ZlYXR1cmVzWzBdXSwKICAgICAgICAgICAgICAgICAgICB5PWRmLmxvY1tkZltsYWJlbF9jb2x1bW5dID09IGxdW2ZlYXR1cmVzWzBdXSwKICAgICAgICAgICAgICAgICAgICBtb2RlPSJtYXJrZXJzIiwKICAgICAgICAgICAgICAgICAgICB2aXNpYmxlPVRydWUsCiAgICAgICAgICAgICAgICAgICAgc2hvd2xlZ2VuZD1UcnVlLAogICAgICAgICAgICAgICAgICAgIG5hbWU9c3RyKGwpLAogICAgICAgICAgICAgICAgKQogICAgICAgICAgICApCiAgICBlbGlmIGxhYmVsX2NvbHVtbiBpcyBOb25lOgogICAgICAgIGZpZy5hZGRfdHJhY2UoCiAgICAgICAgICAgIGdvLlNjYXR0ZXIoCiAgICAgICAgICAgICAgICB4PWRmW2ZlYXR1cmVzWzBdXSwKICAgICAgICAgICAgICAgIHk9ZGZbZmVhdHVyZXNbMF1dLAogICAgICAgICAgICAgICAgbW9kZT0ibWFya2VycyIsCiAgICAgICAgICAgICAgICB2aXNpYmxlPVRydWUsCiAgICAgICAgICAgICkKICAgICAgICApCiAgICBlbGlmIHByb2JsZW1fdHlwZSA9PSAicmVncmVzc2lvbiI6CiAgICAgICAgZmlnLmFkZF90cmFjZSgKICAgICAgICAgICAgZ28uU2NhdHRlcigKICAgICAgICAgICAgICAgIHg9ZGZbZmVhdHVyZXNbMF1dLAogICAgICAgICAgICAgICAgeT1kZltmZWF0dXJlc1swXV0sCiAgICAgICAgICAgICAgICBtb2RlPSJtYXJrZXJzIiwKICAgICAgICAgICAgICAgIG1hcmtlcj1kaWN0KAogICAgICAgICAgICAgICAgICAgIGNvbG9yPWRmW2xhYmVsX2NvbHVtbl0sIGNvbG9yc2NhbGU9IlZpcmlkaXMiLCBzaG93c2NhbGU9VHJ1ZQogICAgICAgICAgICAgICAgKSwKICAgICAgICAgICAgICAgIHZpc2libGU9VHJ1ZSwKICAgICAgICAgICAgKQogICAgICAgICkKCiAgICB4X2J1dHRvbnMgPSBbXQogICAgeV9idXR0b25zID0gW10KCiAgICBmb3IgbmNvbCBpbiBmZWF0dXJlczoKICAgICAgICBpZiBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIiBhbmQgbGFiZWxfY29sdW1uIGlzIG5vdCBOb25lOgogICAgICAgICAgICB4X2J1dHRvbnMuYXBwZW5kKAogICAgICAgICAgICAgICAgZGljdCgKICAgICAgICAgICAgICAgICAgICBtZXRob2Q9InVwZGF0ZSIsCiAgICAgICAgICAgICAgICAgICAgbGFiZWw9bmNvbCwKICAgICAgICAgICAgICAgICAgICBhcmdzPVsKICAgICAgICAgICAgICAgICAgICAgICAgeyJ4IjogW2RmLmxvY1tkZltsYWJlbF9jb2x1bW5dID09IGxdW25jb2xdIGZvciBsIGluIGxhYmVsc119LAogICAgICAgICAgICAgICAgICAgICAgICBucC5hcmFuZ2UobGVuKGxhYmVscykpLnRvbGlzdCgpLAogICAgICAgICAgICAgICAgICAgIF0sCiAgICAgICAgICAgICAgICApCiAgICAgICAgICAgICkKCiAgICAgICAgICAgIHlfYnV0dG9ucy5hcHBlbmQoCiAgICAgICAgICAgICAgICBkaWN0KAogICAgICAgICAgICAgICAgICAgIG1ldGhvZD0idXBkYXRlIiwKICAgICAgICAgICAgICAgICAgICBsYWJlbD1uY29sLAogICAgICAgICAgICAgICAgICAgIGFyZ3M9WwogICAgICAgICAgICAgICAgICAgICAgICB7InkiOiBbZGYubG9jW2RmW2xhYmVsX2NvbHVtbl0gPT0gbF1bbmNvbF0gZm9yIGwgaW4gbGFiZWxzXX0sCiAgICAgICAgICAgICAgICAgICAgICAgIG5wLmFyYW5nZShsZW4obGFiZWxzKSkudG9saXN0KCksCiAgICAgICAgICAgICAgICAgICAgXSwKICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHhfYnV0dG9ucy5hcHBlbmQoCiAgICAgICAgICAgICAgICBkaWN0KG1ldGhvZD0idXBkYXRlIiwgbGFiZWw9bmNvbCwgYXJncz1beyJ4IjogW2RmW25jb2xdXX1dKQogICAgICAgICAgICApCgogICAgICAgICAgICB5X2J1dHRvbnMuYXBwZW5kKAogICAgICAgICAgICAgICAgZGljdChtZXRob2Q9InVwZGF0ZSIsIGxhYmVsPW5jb2wsIGFyZ3M9W3sieSI6IFtkZltuY29sXV19XSkKICAgICAgICAgICAgKQoKICAgICMgUGFzcyBidXR0b25zIHRvIHRoZSB1cGRhdGVtZW51cyBhcmd1bWVudAogICAgZmlnLnVwZGF0ZV9sYXlvdXQoCiAgICAgICAgdXBkYXRlbWVudXM9WwogICAgICAgICAgICBkaWN0KGJ1dHRvbnM9eF9idXR0b25zLCBkaXJlY3Rpb249InVwIiwgeD0wLjUsIHk9LTAuMSksCiAgICAgICAgICAgIGRpY3QoYnV0dG9ucz15X2J1dHRvbnMsIGRpcmVjdGlvbj0iZG93biIsIHg9LW1heF9mZWF0dXJlX2xlbiAvIDEwMCwgeT0wLjUpLAogICAgICAgIF0KICAgICkKCiAgICBmaWcudXBkYXRlX2xheW91dCgKICAgICAgICB3aWR0aD02MDAsCiAgICAgICAgaGVpZ2h0PTQwMCwKICAgICAgICBhdXRvc2l6ZT1GYWxzZSwKICAgICAgICBtYXJnaW49ZGljdCh0PTEwMCwgYj0wLCBsPTAsIHI9MCksCiAgICAgICAgdGVtcGxhdGU9InBsb3RseV93aGl0ZSIsCiAgICApCgogICAgZmlnLnVwZGF0ZV9sYXlvdXQodGl0bGVfdGV4dD1mIjxpPjxiPlNjYXR0ZXItMmQ8L2I+PC9pPiIpCiAgICBleHRyYV9kYXRhW2Yic2NhdHRlci0yZCJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PWYic2NhdHRlci0yZCIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vc2NhdHRlci0yZC5odG1sIiwKICAgICkKCgpkZWYgX2NyZWF0ZV92aW9saW5fYXJ0aWZhY3QoCiAgICBjb250ZXh0OiBNTENsaWVudEN0eCwgZGY6IHBkLkRhdGFGcmFtZSwgZXh0cmFfZGF0YTogZGljdCwgcGxvdHNfZGVzdDogc3RyCik6CiAgICAiIiIKICAgIENyZWF0ZSBhbmQgbG9nIGEgdmlvbGluIGFydGlmYWN0CiAgICAiIiIKICAgIGNvbHMgPSA1CiAgICByb3dzID0gKGRmLnNoYXBlWzFdIC8vIGNvbHMpICsgMQogICAgZmlnID0gbWFrZV9zdWJwbG90cyhyb3dzPXJvd3MsIGNvbHM9Y29scykKCiAgICBwbG90X251bSA9IDAKCiAgICBmb3IgY29sdW1uX25hbWUgaW4gZGYuY29sdW1uczoKICAgICAgICBjb2x1bW5fZGF0YSA9IGRmW2NvbHVtbl9uYW1lXQogICAgICAgIHZpb2xpbiA9IGdvLlZpb2xpbigKICAgICAgICAgICAgeD1bY29sdW1uX25hbWVdICogY29sdW1uX2RhdGEuc2hhcGVbMF0sCiAgICAgICAgICAgIHk9Y29sdW1uX2RhdGEsCiAgICAgICAgICAgIG5hbWU9Y29sdW1uX25hbWUsCiAgICAgICAgKQoKICAgICAgICBmaWcuYWRkX3RyYWNlKAogICAgICAgICAgICB2aW9saW4sCiAgICAgICAgICAgIHJvdz0ocGxvdF9udW0gLy8gY29scykgKyAxLAogICAgICAgICAgICBjb2w9KHBsb3RfbnVtICUgY29scykgKyAxLAogICAgICAgICkKCiAgICAgICAgcGxvdF9udW0gKz0gMQoKICAgIGZpZ1sibGF5b3V0Il0udXBkYXRlKAogICAgICAgIGhlaWdodD0ocm93cyArIDEpICogMjAwLAogICAgICAgIHdpZHRoPShjb2xzICsgMSkgKiAyMDAsCiAgICAgICAgdGl0bGU9IjxpPjxiPlZpb2xpbiBQbG90czwvYj48L2k+IiwKICAgICkKCiAgICBmaWcudXBkYXRlX2xheW91dChzaG93bGVnZW5kPUZhbHNlKQogICAgZXh0cmFfZGF0YVsidmlvbGluIl0gPSBjb250ZXh0LmxvZ19hcnRpZmFjdCgKICAgICAgICBQbG90bHlBcnRpZmFjdChrZXk9InZpb2xpbiIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vdmlvbGluLmh0bWwiLAogICAgKQoKCmRlZiBfY3JlYXRlX2ltYmFsYW5jZV9hcnRpZmFjdCgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKICAgIHByb2JsZW1fdHlwZTogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhbiBpbWJhbGFuY2UgY2xhc3MgYXJ0aWZhY3QgKGNzdiArIHBsb3QpCiAgICAiIiIKICAgIGlmIGxhYmVsX2NvbHVtbjoKICAgICAgICBpZiBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIjoKICAgICAgICAgICAgdmFsdWVzX2NvbHVtbiA9ICJjb3VudCIKICAgICAgICAgICAgbGFiZWxzX2NvdW50ID0gZGZbbGFiZWxfY29sdW1uXS52YWx1ZV9jb3VudHMoKS5zb3J0X2luZGV4KCkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50ID0gcGQuRGF0YUZyYW1lKGxhYmVsc19jb3VudCkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50W2xhYmVsX2NvbHVtbl0gPSBsYWJlbHNfY291bnQuaW5kZXgKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50LnJlbmFtZShjb2x1bW5zPXsiIjogdmFsdWVzX2NvbHVtbn0sIGlucGxhY2U9VHJ1ZSkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50W3ZhbHVlc19jb2x1bW5dID0gZGZfbGFiZWxzX2NvdW50W3ZhbHVlc19jb2x1bW5dIC8gc3VtKAogICAgICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50W3ZhbHVlc19jb2x1bW5dCiAgICAgICAgICAgICkKICAgICAgICAgICAgZmlnID0gcHgucGllKGRmX2xhYmVsc19jb3VudCwgbmFtZXM9bGFiZWxfY29sdW1uLCB2YWx1ZXM9dmFsdWVzX2NvbHVtbikKICAgICAgICBlbHNlOgogICAgICAgICAgICBmaWcgPSBweC5oaXN0b2dyYW0oCiAgICAgICAgICAgICAgICBoaXN0ZnVuYz0iY291bnQiLAogICAgICAgICAgICAgICAgeD1kZltsYWJlbF9jb2x1bW5dLAogICAgICAgICAgICApCiAgICAgICAgICAgIGhpc3QgPSBucC5oaXN0b2dyYW0oZGZbbGFiZWxfY29sdW1uXSkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50ID0gcGQuRGF0YUZyYW1lKAogICAgICAgICAgICAgICAgeyJtaW5fdmFsIjogaGlzdFsxXSwgImNvdW50IjogaGlzdFswXS50b2xpc3QoKSArIFswXX0KICAgICAgICAgICAgKQogICAgICAgIGZpZy51cGRhdGVfbGF5b3V0KHRpdGxlX3RleHQ9IjxpPjxiPkxhYmVscyBJbWJhbGFuY2U8L2I+PC9pPiIpCiAgICAgICAgZXh0cmFfZGF0YVsiaW1iYWxhbmNlIl0gPSBjb250ZXh0LmxvZ19hcnRpZmFjdCgKICAgICAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PSJpbWJhbGFuY2UiLCBmaWd1cmU9ZmlnKSwKICAgICAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9pbWJhbGFuY2UuaHRtbCIsCiAgICAgICAgKQogICAgICAgIGV4dHJhX2RhdGFbImltYmFsYW5jZS1jc3YiXSA9IGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgICAgICBUYWJsZUFydGlmYWN0KCJpbWJhbGFuY2Utd2VpZ2h0cy12ZWMiLCBkZj1kZl9sYWJlbHNfY291bnQpLAogICAgICAgICAgICBsb2NhbF9wYXRoPWYie3Bsb3RzX2Rlc3R9L2ltYmFsYW5jZS13ZWlnaHRzLXZlYy5jc3YiLAogICAgICAgICkKCgpkZWYgX2NyZWF0ZV9jb3JyX2FydGlmYWN0KAogICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICBkZjogcGQuRGF0YUZyYW1lLAogICAgZXh0cmFfZGF0YTogZGljdCwKICAgIGxhYmVsX2NvbHVtbjogc3RyLAogICAgcGxvdHNfZGVzdDogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhbiBjb3JyZWxhdGlvbi1tYXRyaXggYXJ0aWZhY3QgKGNzdiArIHBsb3QpCiAgICAiIiIKICAgIGlmIGxhYmVsX2NvbHVtbiBpcyBub3QgTm9uZToKICAgICAgICBkZiA9IGRmLmRyb3AoW2xhYmVsX2NvbHVtbl0sIGF4aXM9MSkKICAgIHRibGNvcnIgPSBkZi5jb3JyKG51bWVyaWNfb25seT1UcnVlKQogICAgZXh0cmFfZGF0YVsiY29ycmVsYXRpb24tbWF0cml4LWNzdiJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgVGFibGVBcnRpZmFjdCgiY29ycmVsYXRpb24tbWF0cml4LWNzdiIsIGRmPXRibGNvcnIsIHZpc2libGU9VHJ1ZSksCiAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9jb3JyZWxhdGlvbi1tYXRyaXguY3N2IiwKICAgICkKCiAgICB6ID0gdGJsY29yci52YWx1ZXMudG9saXN0KCkKICAgIHpfdGV4dCA9IFtbIns6LjJmfSIuZm9ybWF0KHkpIGZvciB5IGluIHhdIGZvciB4IGluIHpdCiAgICBmaWcgPSBmZi5jcmVhdGVfYW5ub3RhdGVkX2hlYXRtYXAoCiAgICAgICAgeiwKICAgICAgICB4PWxpc3QodGJsY29yci5jb2x1bW5zKSwKICAgICAgICB5PWxpc3QodGJsY29yci5jb2x1bW5zKSwKICAgICAgICBhbm5vdGF0aW9uX3RleHQ9el90ZXh0LAogICAgICAgIGNvbG9yc2NhbGU9ImFnc3Vuc2V0IiwKICAgICkKICAgIGZpZ1sibGF5b3V0Il1bInlheGlzIl1bImF1dG9yYW5nZSJdID0gInJldmVyc2VkIiAgIyBsIC0+IHIKICAgIGZpZy51cGRhdGVfbGF5b3V0KHRpdGxlX3RleHQ9IjxpPjxiPkNvcnJlbGF0aW9uIG1hdHJpeDwvYj48L2k+IikKICAgIGZpZ1siZGF0YSJdWzBdWyJzaG93c2NhbGUiXSA9IFRydWUKCiAgICBleHRyYV9kYXRhWyJjb3JyZWxhdGlvbiJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PSJjb3JyZWxhdGlvbiIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vY29ycmVsYXRpb24uaHRtbCIsCiAgICApCg== - code_origin: '' origin_filename: '' - description: describe and visualizes dataset stats - disable_auto_mount: false - default_handler: analyze -verbose: false -metadata: - tag: '' - name: describe - categories: - - data-analysis -kind: job + code_origin: '' + functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKIyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IHdhcm5pbmdzCmZyb20gdHlwaW5nIGltcG9ydCBVbmlvbgoKaW1wb3J0IG1scnVuCmltcG9ydCBudW1weSBhcyBucAoKd2FybmluZ3Muc2ltcGxlZmlsdGVyKGFjdGlvbj0iaWdub3JlIiwgY2F0ZWdvcnk9RnV0dXJlV2FybmluZykKCmltcG9ydCBtbHJ1bi5mZWF0dXJlX3N0b3JlIGFzIGZzdG9yZQppbXBvcnQgcGFuZGFzIGFzIHBkCmltcG9ydCBwbG90bHkuZXhwcmVzcyBhcyBweAppbXBvcnQgcGxvdGx5LmZpZ3VyZV9mYWN0b3J5IGFzIGZmCmltcG9ydCBwbG90bHkuZ3JhcGhfb2JqZWN0cyBhcyBnbwpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgKAogICAgQXJ0aWZhY3QsCiAgICBEYXRhc2V0QXJ0aWZhY3QsCiAgICBQbG90bHlBcnRpZmFjdCwKICAgIFRhYmxlQXJ0aWZhY3QsCiAgICB1cGRhdGVfZGF0YXNldF9tZXRhLAopCmZyb20gbWxydW4uZGF0YXN0b3JlIGltcG9ydCBEYXRhSXRlbQpmcm9tIG1scnVuLmV4ZWN1dGlvbiBpbXBvcnQgTUxDbGllbnRDdHgKZnJvbSBtbHJ1bi5mZWF0dXJlX3N0b3JlIGltcG9ydCBGZWF0dXJlU2V0CmZyb20gcGxvdGx5LnN1YnBsb3RzIGltcG9ydCBtYWtlX3N1YnBsb3RzCgpwZC5zZXRfb3B0aW9uKCJkaXNwbGF5LmZsb2F0X2Zvcm1hdCIsIGxhbWJkYSB4OiAiJS4yZiIgJSB4KQpNQVhfU0laRV9PRl9ERiA9IDUwMDAwMAoKCmRlZiBhbmFseXplKAogICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICBuYW1lOiBzdHIgPSAiZGF0YXNldCIsCiAgICB0YWJsZTogVW5pb25bRmVhdHVyZVNldCwgRGF0YUl0ZW1dID0gTm9uZSwKICAgIGxhYmVsX2NvbHVtbjogc3RyID0gTm9uZSwKICAgIHBsb3RzX2Rlc3Q6IHN0ciA9ICJwbG90cyIsCiAgICByYW5kb21fc3RhdGU6IGludCA9IDEsCiAgICBwcm9ibGVtX3R5cGU6IHN0ciA9ICJjbGFzc2lmaWNhdGlvbiIsCiAgICBkYXNrX2tleTogc3RyID0gImRhc2tfa2V5IiwKICAgIGRhc2tfZnVuY3Rpb246IHN0ciA9IE5vbmUsCiAgICBkYXNrX2NsaWVudD1Ob25lLAopIC0+IE5vbmU6CiAgICAiIiIKICAgIFRoZSBmdW5jdGlvbiB3aWxsIG91dHB1dCB0aGUgZm9sbG93aW5nIGFydGlmYWN0cyBwZXIKICAgIGNvbHVtbiB3aXRoaW4gdGhlIGRhdGEgZnJhbWUgKGJhc2VkIG9uIGRhdGEgdHlwZXMpCiAgICBJZiB0aGUgZGF0YSBoYXMgbW9yZSB0aGFuIDUwMCwwMDAgc2FtcGxlIHdlCiAgICBzYW1wbGUgcmFuZG9tbHkgNTAwLDAwMCBzYW1wbGVzOgoKICAgIGRlc2NyaWJlIGNzdgogICAgaGlzdG9ncmFtcwogICAgc2NhdHRlci0yZAogICAgdmlvbGluIGNoYXJ0CiAgICBjb3JyZWxhdGlvbi1tYXRyaXggY2hhcnQKICAgIGNvcnJlbGF0aW9uLW1hdHJpeCBjc3YKICAgIGltYmFsYW5jZSBwaWUgY2hhcnQKICAgIGltYmFsYW5jZS13ZWlnaHRzLXZlYyBjc3YKCiAgICA6cGFyYW0gY29udGV4dDogICAgICAgICAgICAgICAgIFRoZSBmdW5jdGlvbiBjb250ZXh0CiAgICA6cGFyYW0gbmFtZTogICAgICAgICAgICAgICAgICAgIEtleSBvZiBkYXRhc2V0IHRvIGRhdGFiYXNlICgiZGF0YXNldCIgZm9yIGRlZmF1bHQpCiAgICA6cGFyYW0gdGFibGU6ICAgICAgICAgICAgICAgICAgIE1MUnVuIGlucHV0IHBvaW50aW5nIHRvIHBhbmRhcyBkYXRhZnJhbWUgKGNzdi9wYXJxdWV0IGZpbGUgcGF0aCkgb3IgRmVhdHVyZVNldAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhcyBwYXJhbQogICAgOnBhcmFtIGxhYmVsX2NvbHVtbjogICAgICAgICAgICBHcm91bmQgdHJ1dGggY29sdW1uIGxhYmVsCiAgICA6cGFyYW0gcGxvdHNfZGVzdDogICAgICAgICAgICAgIERlc3RpbmF0aW9uIGZvbGRlciBvZiBzdW1tYXJ5IHBsb3RzIChyZWxhdGl2ZSB0byBhcnRpZmFjdF9wYXRoKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoInBsb3RzIiBmb3IgZGVmYXVsdCkKICAgIDpwYXJhbSByYW5kb21fc3RhdGU6ICAgICAgICAgICAgV2hlbiB0aGUgdGFibGUgaGFzIG1vcmUgdGhhbiA1MDAsMDAwIHNhbXBsZXMsIHdlIHNhbXBsZSByYW5kb21seSA1MDAsMDAwIHNhbXBsZXMKICAgIDpwYXJhbSBwcm9ibGVtX3R5cGUgICAgICAgICAgICAgVGhlIHR5cGUgb2YgdGhlIE1MIHByb2JsZW0gdGhlIGRhdGEgZmFjaW5nIC0gcmVncmVzc2lvbiwgY2xhc3NpZmljYXRpb24gb3IgTm9uZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoY2xhc3NpZmljYXRpb24gZm9yIGRlZmF1bHQpCiAgICA6cGFyYW0gZGFza19rZXk6ICAgICAgICAgICAgICAgIEtleSBvZiBkYXRhZnJhbWUgaW4gZGFzayBjbGllbnQgImRhdGFzZXRzIiBhdHRyaWJ1dGUKICAgIDpwYXJhbSBkYXNrX2Z1bmN0aW9uOiAgICAgICAgICAgRGFzayBmdW5jdGlvbiB1cmwgKGRiOi8vLi4pCiAgICA6cGFyYW0gZGFza19jbGllbnQ6ICAgICAgICAgICAgIERhc2sgY2xpZW50IG9iamVjdAogICAgIiIiCiAgICBkYXRhX2l0ZW0sIGZlYXR1cmVzZXQsIGNyZWF0LCB1cGRhdGUgPSBGYWxzZSwgRmFsc2UsIEZhbHNlLCBGYWxzZQogICAgZ2V0X2Zyb21fdGFibGUgPSBUcnVlCiAgICBpZiBkYXNrX2Z1bmN0aW9uIG9yIGRhc2tfY2xpZW50OgogICAgICAgIGRhdGFfaXRlbSwgY3JlYXQgPSBUcnVlLCBUcnVlCiAgICAgICAgaWYgZGFza19mdW5jdGlvbjoKICAgICAgICAgICAgY2xpZW50ID0gbWxydW4uaW1wb3J0X2Z1bmN0aW9uKGRhc2tfZnVuY3Rpb24pLmNsaWVudAogICAgICAgIGVsaWYgZGFza19jbGllbnQ6CiAgICAgICAgICAgIGNsaWVudCA9IGRhc2tfY2xpZW50CiAgICAgICAgZWxzZToKICAgICAgICAgICAgcmFpc2UgVmFsdWVFcnJvcigiZGFzayBjbGllbnQgd2FzIG5vdCBwcm92aWRlZCIpCgogICAgICAgIGlmIGRhc2tfa2V5IGluIGNsaWVudC5kYXRhc2V0czoKICAgICAgICAgICAgZGYgPSBjbGllbnQuZ2V0X2RhdGFzZXQoZGFza19rZXkpCiAgICAgICAgICAgIGRhdGFfaXRlbSwgY3JlYXQsIGdldF9mcm9tX3RhYmxlID0gVHJ1ZSwgVHJ1ZSwgRmFsc2UKICAgICAgICBlbGlmIHRhYmxlOgogICAgICAgICAgICBnZXRfZnJvbV90YWJsZSA9IFRydWUKICAgICAgICBlbHNlOgogICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKAogICAgICAgICAgICAgICAgZiJvbmx5IHRoZXNlIGRhdGFzZXRzIGFyZSBhdmFpbGFibGUge2NsaWVudC5kYXRhc2V0c30gaW4gY2xpZW50IHtjbGllbnR9IgogICAgICAgICAgICApCiAgICAgICAgICAgIHJhaXNlIEV4Y2VwdGlvbigiZGF0YXNldCBub3QgZm91bmQgb24gZGFzayBjbHVzdGVyIikKCiAgICBpZiBnZXRfZnJvbV90YWJsZToKICAgICAgICBpZiB0eXBlKHRhYmxlKSA9PSBEYXRhSXRlbToKICAgICAgICAgICAgaWYgdGFibGUubWV0YSBpcyBOb25lOgogICAgICAgICAgICAgICAgZGF0YV9pdGVtLCBjcmVhdCwgdXBkYXRlID0gVHJ1ZSwgVHJ1ZSwgRmFsc2UKICAgICAgICAgICAgZWxpZiB0YWJsZS5tZXRhLmtpbmQgPT0gImRhdGFzZXQiOgogICAgICAgICAgICAgICAgZGF0YV9pdGVtLCBjcmVhdCwgdXBkYXRlID0gVHJ1ZSwgRmFsc2UsIFRydWUKICAgICAgICAgICAgZWxpZiB0YWJsZS5tZXRhLmtpbmQgPT0gIkZlYXR1cmVWZWN0b3IiOgogICAgICAgICAgICAgICAgZGF0YV9pdGVtLCBjcmVhdCwgdXBkYXRlID0gVHJ1ZSwgRmFsc2UsIEZhbHNlCiAgICAgICAgICAgIGVsaWYgdGFibGUubWV0YS5raW5kID09ICJGZWF0dXJlU2V0IjoKICAgICAgICAgICAgICAgIGZlYXR1cmVzZXQsIGNyZWF0LCB1cGRhdGUgPSBUcnVlLCBGYWxzZSwgRmFsc2UKCiAgICAgICAgaWYgZGF0YV9pdGVtOgogICAgICAgICAgICBkZiA9IHRhYmxlLmFzX2RmKCkKICAgICAgICBlbGlmIGZlYXR1cmVzZXQ6CiAgICAgICAgICAgIHByb2plY3RfbmFtZSwgc2V0X25hbWUgPSAoCiAgICAgICAgICAgICAgICB0YWJsZS5fcGF0aC5zcGxpdCgiLyIpWzJdLAogICAgICAgICAgICAgICAgdGFibGUuX3BhdGguc3BsaXQoIi8iKVs0XSwKICAgICAgICAgICAgKQogICAgICAgICAgICBmZWF0dXJlX3NldCA9IGZzdG9yZS5nZXRfZmVhdHVyZV9zZXQoCiAgICAgICAgICAgICAgICBmInN0b3JlOi8vZmVhdHVyZS1zZXRzL3twcm9qZWN0X25hbWV9L3tzZXRfbmFtZX0iCiAgICAgICAgICAgICkKICAgICAgICAgICAgZGYgPSBmZWF0dXJlX3NldC50b19kYXRhZnJhbWUoKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmVycm9yKGYiV3JvbmcgdGFibGUgdHlwZS4iKQogICAgICAgICAgICByZXR1cm4KCiAgICBpZiBkZi5zaXplID4gTUFYX1NJWkVfT0ZfREY6CiAgICAgICAgZGYgPSBkZi5zYW1wbGUobj1pbnQoTUFYX1NJWkVfT0ZfREYgLyBkZi5zaGFwZVsxXSksIHJhbmRvbV9zdGF0ZT1yYW5kb21fc3RhdGUpCiAgICBleHRyYV9kYXRhID0ge30KCiAgICBpZiBsYWJlbF9jb2x1bW4gbm90IGluIGRmLmNvbHVtbnM6CiAgICAgICAgbGFiZWxfY29sdW1uID0gTm9uZQoKICAgIGV4dHJhX2RhdGFbImRlc2NyaWJlIGNzdiJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgVGFibGVBcnRpZmFjdCgiZGVzY3JpYmUtY3N2IiwgZGY9ZGYuZGVzY3JpYmUoKSksCiAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9kZXNjcmliZS5jc3YiLAogICAgKQoKICAgIHRyeToKICAgICAgICBfY3JlYXRlX2hpc3RvZ3JhbV9tYXRfYXJ0aWZhY3QoCiAgICAgICAgICAgIGNvbnRleHQsIGRmLCBleHRyYV9kYXRhLCBsYWJlbF9jb2x1bW4sIHBsb3RzX2Rlc3QKICAgICAgICApCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgY29udGV4dC5sb2dnZXIud2FybihmIkZhaWxlZCB0byBjcmVhdGUgaGlzdG9ncmFtIG1hdHJpeCBhcnRpZmFjdCBkdWUgdG86IHtlfSIpCiAgICB0cnk6CiAgICAgICAgX2NyZWF0ZV9mZWF0dXJlc19oaXN0b2dyYW1fYXJ0aWZhY3RzKAogICAgICAgICAgICBjb250ZXh0LCBkZiwgZXh0cmFfZGF0YSwgbGFiZWxfY29sdW1uLCBwbG90c19kZXN0LCBwcm9ibGVtX3R5cGUKICAgICAgICApCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgY29udGV4dC5sb2dnZXIud2FybihmIkZhaWxlZCB0byBjcmVhdGUgcGFpcnBsb3QgaGlzdG9ncmFtcyBkdWUgdG86IHtlfSIpCiAgICB0cnk6CiAgICAgICAgX2NyZWF0ZV9mZWF0dXJlc18yZF9zY2F0dGVyX2FydGlmYWN0cygKICAgICAgICAgICAgY29udGV4dCwgZGYsIGV4dHJhX2RhdGEsIGxhYmVsX2NvbHVtbiwgcGxvdHNfZGVzdCwgcHJvYmxlbV90eXBlCiAgICAgICAgKQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIGNvbnRleHQubG9nZ2VyLndhcm4oZiJGYWlsZWQgdG8gY3JlYXRlIHBhaXJwbG90IDJkX3NjYXR0ZXIgZHVlIHRvOiB7ZX0iKQogICAgdHJ5OgogICAgICAgIF9jcmVhdGVfdmlvbGluX2FydGlmYWN0KGNvbnRleHQsIGRmLCBleHRyYV9kYXRhLCBwbG90c19kZXN0KQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIGNvbnRleHQubG9nZ2VyLndhcm4oZiJGYWlsZWQgdG8gY3JlYXRlIHZpb2xpbiBkaXN0cmlidXRpb24gcGxvdHMgZHVlIHRvOiB7ZX0iKQogICAgdHJ5OgogICAgICAgIF9jcmVhdGVfaW1iYWxhbmNlX2FydGlmYWN0KAogICAgICAgICAgICBjb250ZXh0LCBkZiwgZXh0cmFfZGF0YSwgbGFiZWxfY29sdW1uLCBwbG90c19kZXN0LCBwcm9ibGVtX3R5cGUKICAgICAgICApCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgY29udGV4dC5sb2dnZXIud2FybihmIkZhaWxlZCB0byBjcmVhdGUgY2xhc3MgaW1iYWxhbmNlIHBsb3QgZHVlIHRvOiB7ZX0iKQogICAgdHJ5OgogICAgICAgIF9jcmVhdGVfY29ycl9hcnRpZmFjdChjb250ZXh0LCBkZiwgZXh0cmFfZGF0YSwgbGFiZWxfY29sdW1uLCBwbG90c19kZXN0KQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIGNvbnRleHQubG9nZ2VyLndhcm4oZiJGYWlsZWQgdG8gY3JlYXRlIGZlYXR1cmVzIGNvcnJlbGF0aW9uIHBsb3QgZHVlIHRvOiB7ZX0iKQoKICAgIGlmIG5vdCBkYXRhX2l0ZW06CiAgICAgICAgcmV0dXJuCgogICAgYXJ0aWZhY3QgPSB0YWJsZS5hcnRpZmFjdF91cmwKICAgIGlmIGNyZWF0OiAgIyBkYXRhc2V0IG5vdCBzdG9yZWQKICAgICAgICBhcnRpZmFjdCA9IERhdGFzZXRBcnRpZmFjdCgKICAgICAgICAgICAga2V5PSJkYXRhc2V0Iiwgc3RhdHM9VHJ1ZSwgZGY9ZGYsIGV4dHJhX2RhdGE9ZXh0cmFfZGF0YQogICAgICAgICkKICAgICAgICBhcnRpZmFjdCA9IGNvbnRleHQubG9nX2FydGlmYWN0KGFydGlmYWN0LCBkYl9rZXk9bmFtZSkKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYiVGhlIGRhdGEgc2V0IGlzIGxvZ2dlZCB0byB0aGUgcHJvamVjdCB1bmRlciB7bmFtZX0gbmFtZSIpCgogICAgaWYgdXBkYXRlOgogICAgICAgIHVwZGF0ZV9kYXRhc2V0X21ldGEoYXJ0aWZhY3QsIGV4dHJhX2RhdGE9ZXh0cmFfZGF0YSkKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYiVGhlIGRhdGEgc2V0IG5hbWVkIHtuYW1lfSBpcyB1cGRhdGVkIikKCiAgICAjIFRPRE8gOiAzLUQgcGxvdCBvbiBvbiBzZWxlY3RlZCBmZWF0dXJlcy4KICAgICMgVE9ETyA6IFJlaW50ZWdyYXRpb24gcGxvdCBvbiBvbiBzZWxlY3RlZCBmZWF0dXJlcy4KICAgICMgVE9ETyA6IFBDQSBwbG90ICh3aXRoIG9wdGlvbnMpCgoKZGVmIF9jcmVhdGVfaGlzdG9ncmFtX21hdF9hcnRpZmFjdCgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKKToKICAgICIiIgogICAgQ3JlYXRlIGFuZCBsb2cgYSBoaXN0b2dyYW0gbWF0cml4IGFydGlmYWN0CiAgICAiIiIKICAgIGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgIGl0ZW09QXJ0aWZhY3QoCiAgICAgICAgICAgIGtleT0iaGlzdCIsCiAgICAgICAgICAgIGJvZHk9YiI8Yj4gRGVwcmVjYXRlZCwgc2VlIHRoZSBhcnRpZmFjdHMgc2NhdHRlci0yZCAiCiAgICAgICAgICAgIGIiYW5kIGhpc3RvZ3JhbXMgaW5zdGVhZDxiPiIsCiAgICAgICAgKSwKICAgICAgICBsb2NhbF9wYXRoPWYie3Bsb3RzX2Rlc3R9L2hpc3QuaHRtbCIsCiAgICApCgoKZGVmIF9jcmVhdGVfZmVhdHVyZXNfaGlzdG9ncmFtX2FydGlmYWN0cygKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKICAgIHByb2JsZW1fdHlwZTogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhIGhpc3RvZ3JhbSBhcnRpZmFjdCBmb3IgZWFjaCBmZWF0dXJlCiAgICAiIiIKCiAgICBmaWdzID0gZGljdCgpCiAgICBmaXJzdF9mZWF0dXJlX25hbWUgPSAiIgogICAgaWYgbGFiZWxfY29sdW1uIGlzIG5vdCBOb25lIGFuZCBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIjoKICAgICAgICBhbGxfbGFiZWxzID0gZGZbbGFiZWxfY29sdW1uXS51bmlxdWUoKQogICAgdmlzaWJsZSA9IFRydWUKICAgIGZvciBjb2x1bW5fbmFtZSBpbiBkZi5jb2x1bW5zOgogICAgICAgIGlmIGNvbHVtbl9uYW1lID09IGxhYmVsX2NvbHVtbjoKICAgICAgICAgICAgY29udGludWUKCiAgICAgICAgaWYgbGFiZWxfY29sdW1uIGlzIG5vdCBOb25lIGFuZCBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIjoKICAgICAgICAgICAgZm9yIGxhYmVsIGluIGFsbF9sYWJlbHM6CiAgICAgICAgICAgICAgICBzdWJfZmlnID0gZ28uSGlzdG9ncmFtKAogICAgICAgICAgICAgICAgICAgIGhpc3RmdW5jPSJjb3VudCIsCiAgICAgICAgICAgICAgICAgICAgeD1kZi5sb2NbZGZbbGFiZWxfY29sdW1uXSA9PSBsYWJlbF1bY29sdW1uX25hbWVdLAogICAgICAgICAgICAgICAgICAgIG5hbWU9c3RyKGxhYmVsKSwKICAgICAgICAgICAgICAgICAgICB2aXNpYmxlPXZpc2libGUsCiAgICAgICAgICAgICAgICApCiAgICAgICAgICAgICAgICBmaWdzW2Yie2NvbHVtbl9uYW1lfUA/QHtsYWJlbH0iXSA9IHN1Yl9maWcKICAgICAgICBlbHNlOgogICAgICAgICAgICBzdWJfZmlnID0gZ28uSGlzdG9ncmFtKGhpc3RmdW5jPSJjb3VudCIsIHg9ZGZbY29sdW1uX25hbWVdLCB2aXNpYmxlPXZpc2libGUpCiAgICAgICAgICAgIGZpZ3NbZiJ7Y29sdW1uX25hbWV9QD9AezF9Il0gPSBzdWJfZmlnCiAgICAgICAgaWYgdmlzaWJsZToKICAgICAgICAgICAgZmlyc3RfZmVhdHVyZV9uYW1lID0gY29sdW1uX25hbWUKICAgICAgICB2aXNpYmxlID0gRmFsc2UKCiAgICBmaWcgPSBnby5GaWd1cmUoKQogICAgZm9yIGsgaW4gZmlncy5rZXlzKCk6CiAgICAgICAgZmlnLmFkZF90cmFjZShmaWdzW2tdKQoKICAgIGZpZy51cGRhdGVfbGF5b3V0KAogICAgICAgIHVwZGF0ZW1lbnVzPVsKICAgICAgICAgICAgewogICAgICAgICAgICAgICAgImJ1dHRvbnMiOiBbCiAgICAgICAgICAgICAgICAgICAgewogICAgICAgICAgICAgICAgICAgICAgICAibGFiZWwiOiBjb2x1bW5fbmFtZSwKICAgICAgICAgICAgICAgICAgICAgICAgIm1ldGhvZCI6ICJ1cGRhdGUiLAogICAgICAgICAgICAgICAgICAgICAgICAiYXJncyI6IFsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAidmlzaWJsZSI6IFsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAga2V5LnNwbGl0KCJAP0AiKVswXSA9PSBjb2x1bW5fbmFtZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmb3Iga2V5IGluIGZpZ3Mua2V5cygpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgXSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAieGF4aXMiOiB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJyYW5nZSI6IFsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1pbihkZltjb2x1bW5fbmFtZV0pLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWF4KGRmW2NvbHVtbl9uYW1lXSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIF0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB9LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgfSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHsidGl0bGUiOiBmIjxpPjxiPkhpc3RvZ3JhbSBvZiB7Y29sdW1uX25hbWV9PC9iPjwvaT4ifSwKICAgICAgICAgICAgICAgICAgICAgICAgXSwKICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgZm9yIGNvbHVtbl9uYW1lIGluIGRmLmNvbHVtbnMKICAgICAgICAgICAgICAgICAgICBpZiBjb2x1bW5fbmFtZSAhPSBsYWJlbF9jb2x1bW4KICAgICAgICAgICAgICAgIF0sCiAgICAgICAgICAgICAgICAiZGlyZWN0aW9uIjogImRvd24iLAogICAgICAgICAgICAgICAgInBhZCI6IHsiciI6IDEwLCAidCI6IDEwfSwKICAgICAgICAgICAgICAgICJzaG93YWN0aXZlIjogVHJ1ZSwKICAgICAgICAgICAgICAgICJ4IjogMC4yNSwKICAgICAgICAgICAgICAgICJ4YW5jaG9yIjogImxlZnQiLAogICAgICAgICAgICAgICAgInkiOiAxLjEsCiAgICAgICAgICAgICAgICAieWFuY2hvciI6ICJ0b3AiLAogICAgICAgICAgICB9CiAgICAgICAgXSwKICAgICAgICBhbm5vdGF0aW9ucz1bCiAgICAgICAgICAgIGRpY3QoCiAgICAgICAgICAgICAgICB0ZXh0PSJTZWxlY3QgRmVhdHVyZSBOYW1lICIsCiAgICAgICAgICAgICAgICBzaG93YXJyb3c9RmFsc2UsCiAgICAgICAgICAgICAgICB4PTAsCiAgICAgICAgICAgICAgICB5PTEuMDUsCiAgICAgICAgICAgICAgICB5cmVmPSJwYXBlciIsCiAgICAgICAgICAgICAgICB4cmVmPSJwYXBlciIsCiAgICAgICAgICAgICAgICBhbGlnbj0ibGVmdCIsCiAgICAgICAgICAgICAgICB4YW5jaG9yPSJsZWZ0IiwKICAgICAgICAgICAgICAgIHlhbmNob3I9InRvcCIsCiAgICAgICAgICAgICAgICBmb250PXsKICAgICAgICAgICAgICAgICAgICAiY29sb3IiOiAiYmx1ZSIsCiAgICAgICAgICAgICAgICB9LAogICAgICAgICAgICApCiAgICAgICAgXSwKICAgICkKCiAgICBmaWcudXBkYXRlX2xheW91dCgKICAgICAgICB3aWR0aD02MDAsCiAgICAgICAgaGVpZ2h0PTQwMCwKICAgICAgICBhdXRvc2l6ZT1GYWxzZSwKICAgICAgICBtYXJnaW49ZGljdCh0PTEwMCwgYj0wLCBsPTAsIHI9MCksCiAgICAgICAgdGVtcGxhdGU9InBsb3RseV93aGl0ZSIsCiAgICApCgogICAgZmlnLnVwZGF0ZV9sYXlvdXQodGl0bGVfdGV4dD1mIjxpPjxiPkhpc3RvZ3JhbXMgb2Yge2ZpcnN0X2ZlYXR1cmVfbmFtZX08L2I+PC9pPiIpCiAgICBleHRyYV9kYXRhW2YiaGlzdG9ncmFtcyJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PWYiaGlzdG9ncmFtcyIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vaGlzdG9ncmFtcy5odG1sIiwKICAgICkKCgpkZWYgX2NyZWF0ZV9mZWF0dXJlc18yZF9zY2F0dGVyX2FydGlmYWN0cygKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKICAgIHByb2JsZW1fdHlwZTogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhIHNjYXR0ZXItMmQgYXJ0aWZhY3QgZm9yIGVhY2ggY291cGxlIG9mIGZlYXR1cmVzCiAgICAiIiIKICAgIGZlYXR1cmVzID0gWwogICAgICAgIGNvbHVtbl9uYW1lIGZvciBjb2x1bW5fbmFtZSBpbiBkZi5jb2x1bW5zIGlmIGNvbHVtbl9uYW1lICE9IGxhYmVsX2NvbHVtbgogICAgXQogICAgbWF4X2ZlYXR1cmVfbGVuID0gZmxvYXQobWF4KGxlbihlbGVtKSBmb3IgZWxlbSBpbiBmZWF0dXJlcykpCiAgICBpZiBsYWJlbF9jb2x1bW4gaXMgbm90IE5vbmU6CiAgICAgICAgbGFiZWxzID0gc29ydGVkKGRmW2xhYmVsX2NvbHVtbl0udW5pcXVlKCkpCiAgICBlbHNlOgogICAgICAgIGxhYmVscyA9IFtOb25lXQogICAgZmlnID0gZ28uRmlndXJlKCkKICAgIGlmIGxhYmVsX2NvbHVtbiBpcyBub3QgTm9uZSBhbmQgcHJvYmxlbV90eXBlID09ICJjbGFzc2lmaWNhdGlvbiI6CiAgICAgICAgZm9yIGwgaW4gbGFiZWxzOgogICAgICAgICAgICBmaWcuYWRkX3RyYWNlKAogICAgICAgICAgICAgICAgZ28uU2NhdHRlcigKICAgICAgICAgICAgICAgICAgICB4PWRmLmxvY1tkZltsYWJlbF9jb2x1bW5dID09IGxdW2ZlYXR1cmVzWzBdXSwKICAgICAgICAgICAgICAgICAgICB5PWRmLmxvY1tkZltsYWJlbF9jb2x1bW5dID09IGxdW2ZlYXR1cmVzWzBdXSwKICAgICAgICAgICAgICAgICAgICBtb2RlPSJtYXJrZXJzIiwKICAgICAgICAgICAgICAgICAgICB2aXNpYmxlPVRydWUsCiAgICAgICAgICAgICAgICAgICAgc2hvd2xlZ2VuZD1UcnVlLAogICAgICAgICAgICAgICAgICAgIG5hbWU9c3RyKGwpLAogICAgICAgICAgICAgICAgKQogICAgICAgICAgICApCiAgICBlbGlmIGxhYmVsX2NvbHVtbiBpcyBOb25lOgogICAgICAgIGZpZy5hZGRfdHJhY2UoCiAgICAgICAgICAgIGdvLlNjYXR0ZXIoCiAgICAgICAgICAgICAgICB4PWRmW2ZlYXR1cmVzWzBdXSwKICAgICAgICAgICAgICAgIHk9ZGZbZmVhdHVyZXNbMF1dLAogICAgICAgICAgICAgICAgbW9kZT0ibWFya2VycyIsCiAgICAgICAgICAgICAgICB2aXNpYmxlPVRydWUsCiAgICAgICAgICAgICkKICAgICAgICApCiAgICBlbGlmIHByb2JsZW1fdHlwZSA9PSAicmVncmVzc2lvbiI6CiAgICAgICAgZmlnLmFkZF90cmFjZSgKICAgICAgICAgICAgZ28uU2NhdHRlcigKICAgICAgICAgICAgICAgIHg9ZGZbZmVhdHVyZXNbMF1dLAogICAgICAgICAgICAgICAgeT1kZltmZWF0dXJlc1swXV0sCiAgICAgICAgICAgICAgICBtb2RlPSJtYXJrZXJzIiwKICAgICAgICAgICAgICAgIG1hcmtlcj1kaWN0KAogICAgICAgICAgICAgICAgICAgIGNvbG9yPWRmW2xhYmVsX2NvbHVtbl0sIGNvbG9yc2NhbGU9IlZpcmlkaXMiLCBzaG93c2NhbGU9VHJ1ZQogICAgICAgICAgICAgICAgKSwKICAgICAgICAgICAgICAgIHZpc2libGU9VHJ1ZSwKICAgICAgICAgICAgKQogICAgICAgICkKCiAgICB4X2J1dHRvbnMgPSBbXQogICAgeV9idXR0b25zID0gW10KCiAgICBmb3IgbmNvbCBpbiBmZWF0dXJlczoKICAgICAgICBpZiBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIiBhbmQgbGFiZWxfY29sdW1uIGlzIG5vdCBOb25lOgogICAgICAgICAgICB4X2J1dHRvbnMuYXBwZW5kKAogICAgICAgICAgICAgICAgZGljdCgKICAgICAgICAgICAgICAgICAgICBtZXRob2Q9InVwZGF0ZSIsCiAgICAgICAgICAgICAgICAgICAgbGFiZWw9bmNvbCwKICAgICAgICAgICAgICAgICAgICBhcmdzPVsKICAgICAgICAgICAgICAgICAgICAgICAgeyJ4IjogW2RmLmxvY1tkZltsYWJlbF9jb2x1bW5dID09IGxdW25jb2xdIGZvciBsIGluIGxhYmVsc119LAogICAgICAgICAgICAgICAgICAgICAgICBucC5hcmFuZ2UobGVuKGxhYmVscykpLnRvbGlzdCgpLAogICAgICAgICAgICAgICAgICAgIF0sCiAgICAgICAgICAgICAgICApCiAgICAgICAgICAgICkKCiAgICAgICAgICAgIHlfYnV0dG9ucy5hcHBlbmQoCiAgICAgICAgICAgICAgICBkaWN0KAogICAgICAgICAgICAgICAgICAgIG1ldGhvZD0idXBkYXRlIiwKICAgICAgICAgICAgICAgICAgICBsYWJlbD1uY29sLAogICAgICAgICAgICAgICAgICAgIGFyZ3M9WwogICAgICAgICAgICAgICAgICAgICAgICB7InkiOiBbZGYubG9jW2RmW2xhYmVsX2NvbHVtbl0gPT0gbF1bbmNvbF0gZm9yIGwgaW4gbGFiZWxzXX0sCiAgICAgICAgICAgICAgICAgICAgICAgIG5wLmFyYW5nZShsZW4obGFiZWxzKSkudG9saXN0KCksCiAgICAgICAgICAgICAgICAgICAgXSwKICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHhfYnV0dG9ucy5hcHBlbmQoCiAgICAgICAgICAgICAgICBkaWN0KG1ldGhvZD0idXBkYXRlIiwgbGFiZWw9bmNvbCwgYXJncz1beyJ4IjogW2RmW25jb2xdXX1dKQogICAgICAgICAgICApCgogICAgICAgICAgICB5X2J1dHRvbnMuYXBwZW5kKAogICAgICAgICAgICAgICAgZGljdChtZXRob2Q9InVwZGF0ZSIsIGxhYmVsPW5jb2wsIGFyZ3M9W3sieSI6IFtkZltuY29sXV19XSkKICAgICAgICAgICAgKQoKICAgICMgUGFzcyBidXR0b25zIHRvIHRoZSB1cGRhdGVtZW51cyBhcmd1bWVudAogICAgZmlnLnVwZGF0ZV9sYXlvdXQoCiAgICAgICAgdXBkYXRlbWVudXM9WwogICAgICAgICAgICBkaWN0KGJ1dHRvbnM9eF9idXR0b25zLCBkaXJlY3Rpb249InVwIiwgeD0wLjUsIHk9LTAuMSksCiAgICAgICAgICAgIGRpY3QoYnV0dG9ucz15X2J1dHRvbnMsIGRpcmVjdGlvbj0iZG93biIsIHg9LW1heF9mZWF0dXJlX2xlbiAvIDEwMCwgeT0wLjUpLAogICAgICAgIF0KICAgICkKCiAgICBmaWcudXBkYXRlX2xheW91dCgKICAgICAgICB3aWR0aD02MDAsCiAgICAgICAgaGVpZ2h0PTQwMCwKICAgICAgICBhdXRvc2l6ZT1GYWxzZSwKICAgICAgICBtYXJnaW49ZGljdCh0PTEwMCwgYj0wLCBsPTAsIHI9MCksCiAgICAgICAgdGVtcGxhdGU9InBsb3RseV93aGl0ZSIsCiAgICApCgogICAgZmlnLnVwZGF0ZV9sYXlvdXQodGl0bGVfdGV4dD1mIjxpPjxiPlNjYXR0ZXItMmQ8L2I+PC9pPiIpCiAgICBleHRyYV9kYXRhW2Yic2NhdHRlci0yZCJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PWYic2NhdHRlci0yZCIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vc2NhdHRlci0yZC5odG1sIiwKICAgICkKCgpkZWYgX2NyZWF0ZV92aW9saW5fYXJ0aWZhY3QoCiAgICBjb250ZXh0OiBNTENsaWVudEN0eCwgZGY6IHBkLkRhdGFGcmFtZSwgZXh0cmFfZGF0YTogZGljdCwgcGxvdHNfZGVzdDogc3RyCik6CiAgICAiIiIKICAgIENyZWF0ZSBhbmQgbG9nIGEgdmlvbGluIGFydGlmYWN0CiAgICAiIiIKICAgIGNvbHMgPSA1CiAgICByb3dzID0gKGRmLnNoYXBlWzFdIC8vIGNvbHMpICsgMQogICAgZmlnID0gbWFrZV9zdWJwbG90cyhyb3dzPXJvd3MsIGNvbHM9Y29scykKCiAgICBwbG90X251bSA9IDAKCiAgICBmb3IgY29sdW1uX25hbWUgaW4gZGYuY29sdW1uczoKICAgICAgICBjb2x1bW5fZGF0YSA9IGRmW2NvbHVtbl9uYW1lXQogICAgICAgIHZpb2xpbiA9IGdvLlZpb2xpbigKICAgICAgICAgICAgeD1bY29sdW1uX25hbWVdICogY29sdW1uX2RhdGEuc2hhcGVbMF0sCiAgICAgICAgICAgIHk9Y29sdW1uX2RhdGEsCiAgICAgICAgICAgIG5hbWU9Y29sdW1uX25hbWUsCiAgICAgICAgKQoKICAgICAgICBmaWcuYWRkX3RyYWNlKAogICAgICAgICAgICB2aW9saW4sCiAgICAgICAgICAgIHJvdz0ocGxvdF9udW0gLy8gY29scykgKyAxLAogICAgICAgICAgICBjb2w9KHBsb3RfbnVtICUgY29scykgKyAxLAogICAgICAgICkKCiAgICAgICAgcGxvdF9udW0gKz0gMQoKICAgIGZpZ1sibGF5b3V0Il0udXBkYXRlKAogICAgICAgIGhlaWdodD0ocm93cyArIDEpICogMjAwLAogICAgICAgIHdpZHRoPShjb2xzICsgMSkgKiAyMDAsCiAgICAgICAgdGl0bGU9IjxpPjxiPlZpb2xpbiBQbG90czwvYj48L2k+IiwKICAgICkKCiAgICBmaWcudXBkYXRlX2xheW91dChzaG93bGVnZW5kPUZhbHNlKQogICAgZXh0cmFfZGF0YVsidmlvbGluIl0gPSBjb250ZXh0LmxvZ19hcnRpZmFjdCgKICAgICAgICBQbG90bHlBcnRpZmFjdChrZXk9InZpb2xpbiIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vdmlvbGluLmh0bWwiLAogICAgKQoKCmRlZiBfY3JlYXRlX2ltYmFsYW5jZV9hcnRpZmFjdCgKICAgIGNvbnRleHQ6IE1MQ2xpZW50Q3R4LAogICAgZGY6IHBkLkRhdGFGcmFtZSwKICAgIGV4dHJhX2RhdGE6IGRpY3QsCiAgICBsYWJlbF9jb2x1bW46IHN0ciwKICAgIHBsb3RzX2Rlc3Q6IHN0ciwKICAgIHByb2JsZW1fdHlwZTogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhbiBpbWJhbGFuY2UgY2xhc3MgYXJ0aWZhY3QgKGNzdiArIHBsb3QpCiAgICAiIiIKICAgIGlmIGxhYmVsX2NvbHVtbjoKICAgICAgICBpZiBwcm9ibGVtX3R5cGUgPT0gImNsYXNzaWZpY2F0aW9uIjoKICAgICAgICAgICAgdmFsdWVzX2NvbHVtbiA9ICJjb3VudCIKICAgICAgICAgICAgbGFiZWxzX2NvdW50ID0gZGZbbGFiZWxfY29sdW1uXS52YWx1ZV9jb3VudHMoKS5zb3J0X2luZGV4KCkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50ID0gcGQuRGF0YUZyYW1lKGxhYmVsc19jb3VudCkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50W2xhYmVsX2NvbHVtbl0gPSBsYWJlbHNfY291bnQuaW5kZXgKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50LnJlbmFtZShjb2x1bW5zPXsiIjogdmFsdWVzX2NvbHVtbn0sIGlucGxhY2U9VHJ1ZSkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50W3ZhbHVlc19jb2x1bW5dID0gZGZfbGFiZWxzX2NvdW50W3ZhbHVlc19jb2x1bW5dIC8gc3VtKAogICAgICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50W3ZhbHVlc19jb2x1bW5dCiAgICAgICAgICAgICkKICAgICAgICAgICAgZmlnID0gcHgucGllKGRmX2xhYmVsc19jb3VudCwgbmFtZXM9bGFiZWxfY29sdW1uLCB2YWx1ZXM9dmFsdWVzX2NvbHVtbikKICAgICAgICBlbHNlOgogICAgICAgICAgICBmaWcgPSBweC5oaXN0b2dyYW0oCiAgICAgICAgICAgICAgICBoaXN0ZnVuYz0iY291bnQiLAogICAgICAgICAgICAgICAgeD1kZltsYWJlbF9jb2x1bW5dLAogICAgICAgICAgICApCiAgICAgICAgICAgIGhpc3QgPSBucC5oaXN0b2dyYW0oZGZbbGFiZWxfY29sdW1uXSkKICAgICAgICAgICAgZGZfbGFiZWxzX2NvdW50ID0gcGQuRGF0YUZyYW1lKAogICAgICAgICAgICAgICAgeyJtaW5fdmFsIjogaGlzdFsxXSwgImNvdW50IjogaGlzdFswXS50b2xpc3QoKSArIFswXX0KICAgICAgICAgICAgKQogICAgICAgIGZpZy51cGRhdGVfbGF5b3V0KHRpdGxlX3RleHQ9IjxpPjxiPkxhYmVscyBJbWJhbGFuY2U8L2I+PC9pPiIpCiAgICAgICAgZXh0cmFfZGF0YVsiaW1iYWxhbmNlIl0gPSBjb250ZXh0LmxvZ19hcnRpZmFjdCgKICAgICAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PSJpbWJhbGFuY2UiLCBmaWd1cmU9ZmlnKSwKICAgICAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9pbWJhbGFuY2UuaHRtbCIsCiAgICAgICAgKQogICAgICAgIGV4dHJhX2RhdGFbImltYmFsYW5jZS1jc3YiXSA9IGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgICAgICBUYWJsZUFydGlmYWN0KCJpbWJhbGFuY2Utd2VpZ2h0cy12ZWMiLCBkZj1kZl9sYWJlbHNfY291bnQpLAogICAgICAgICAgICBsb2NhbF9wYXRoPWYie3Bsb3RzX2Rlc3R9L2ltYmFsYW5jZS13ZWlnaHRzLXZlYy5jc3YiLAogICAgICAgICkKCgpkZWYgX2NyZWF0ZV9jb3JyX2FydGlmYWN0KAogICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICBkZjogcGQuRGF0YUZyYW1lLAogICAgZXh0cmFfZGF0YTogZGljdCwKICAgIGxhYmVsX2NvbHVtbjogc3RyLAogICAgcGxvdHNfZGVzdDogc3RyLAopOgogICAgIiIiCiAgICBDcmVhdGUgYW5kIGxvZyBhbiBjb3JyZWxhdGlvbi1tYXRyaXggYXJ0aWZhY3QgKGNzdiArIHBsb3QpCiAgICAiIiIKICAgIGlmIGxhYmVsX2NvbHVtbiBpcyBub3QgTm9uZToKICAgICAgICBkZiA9IGRmLmRyb3AoW2xhYmVsX2NvbHVtbl0sIGF4aXM9MSkKICAgIHRibGNvcnIgPSBkZi5jb3JyKG51bWVyaWNfb25seT1UcnVlKQogICAgZXh0cmFfZGF0YVsiY29ycmVsYXRpb24tbWF0cml4LWNzdiJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgVGFibGVBcnRpZmFjdCgiY29ycmVsYXRpb24tbWF0cml4LWNzdiIsIGRmPXRibGNvcnIsIHZpc2libGU9VHJ1ZSksCiAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9jb3JyZWxhdGlvbi1tYXRyaXguY3N2IiwKICAgICkKCiAgICB6ID0gdGJsY29yci52YWx1ZXMudG9saXN0KCkKICAgIHpfdGV4dCA9IFtbIns6LjJmfSIuZm9ybWF0KHkpIGZvciB5IGluIHhdIGZvciB4IGluIHpdCiAgICBmaWcgPSBmZi5jcmVhdGVfYW5ub3RhdGVkX2hlYXRtYXAoCiAgICAgICAgeiwKICAgICAgICB4PWxpc3QodGJsY29yci5jb2x1bW5zKSwKICAgICAgICB5PWxpc3QodGJsY29yci5jb2x1bW5zKSwKICAgICAgICBhbm5vdGF0aW9uX3RleHQ9el90ZXh0LAogICAgICAgIGNvbG9yc2NhbGU9ImFnc3Vuc2V0IiwKICAgICkKICAgIGZpZ1sibGF5b3V0Il1bInlheGlzIl1bImF1dG9yYW5nZSJdID0gInJldmVyc2VkIiAgIyBsIC0+IHIKICAgIGZpZy51cGRhdGVfbGF5b3V0KHRpdGxlX3RleHQ9IjxpPjxiPkNvcnJlbGF0aW9uIG1hdHJpeDwvYj48L2k+IikKICAgIGZpZ1siZGF0YSJdWzBdWyJzaG93c2NhbGUiXSA9IFRydWUKCiAgICBleHRyYV9kYXRhWyJjb3JyZWxhdGlvbiJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgUGxvdGx5QXJ0aWZhY3Qoa2V5PSJjb3JyZWxhdGlvbiIsIGZpZ3VyZT1maWcpLAogICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vY29ycmVsYXRpb24uaHRtbCIsCiAgICApCg== diff --git a/functions/src/describe/item.yaml b/functions/src/describe/item.yaml index da26f1501..a1aa47372 100644 --- a/functions/src/describe/item.yaml +++ b/functions/src/describe/item.yaml @@ -11,7 +11,7 @@ labels: author: Iguazio maintainers: [] marketplaceType: '' -mlrunVersion: 1.7.0 +mlrunVersion: 1.10.0 name: describe platformVersion: 3.5.3 spec: @@ -21,4 +21,4 @@ spec: kind: job requirements: [] url: '' -version: 1.4.0 +version: 1.5.0 diff --git a/functions/src/describe/requirements.txt b/functions/src/describe/requirements.txt index 15492b176..ac445e6d6 100644 --- a/functions/src/describe/requirements.txt +++ b/functions/src/describe/requirements.txt @@ -1,4 +1,4 @@ -scikit-learn~=1.0.2 +scikit-learn~=1.5.2 plotly~=5.23 pytest~=7.0.1 matplotlib~=3.5.1 diff --git a/functions/src/gen_class_data/function.yaml b/functions/src/gen_class_data/function.yaml index 1769bec07..fa802964e 100644 --- a/functions/src/gen_class_data/function.yaml +++ b/functions/src/gen_class_data/function.yaml @@ -1,13 +1,15 @@ metadata: - categories: - - data-generation tag: '' name: gen-class-data + categories: + - data-generation +verbose: false spec: description: Create a binary classification sample dataset and save. - default_handler: gen_class_data entry_points: gen_class_data: + lineno: 22 + has_varargs: false has_kwargs: false parameters: - name: context @@ -48,7 +50,6 @@ spec: - name: sk_params doc: additional parameters for `sklearn.datasets.make_classification` default: {} - lineno: 22 doc: 'Create a binary classification sample dataset and save. If no filename is given it will default to: @@ -59,14 +60,13 @@ spec: Additional scikit-learn parameters can be set using **sk_params, please see https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html for more details.' - has_varargs: false name: gen_class_data - command: '' - disable_auto_mount: false - image: mlrun/mlrun build: origin_filename: '' functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKaW1wb3J0IHBhbmRhcyBhcyBwZApmcm9tIHR5cGluZyBpbXBvcnQgT3B0aW9uYWwsIExpc3QKZnJvbSBza2xlYXJuLmRhdGFzZXRzIGltcG9ydCBtYWtlX2NsYXNzaWZpY2F0aW9uCgpmcm9tIG1scnVuLmV4ZWN1dGlvbiBpbXBvcnQgTUxDbGllbnRDdHgKCgpkZWYgZ2VuX2NsYXNzX2RhdGEoCiAgICAgICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICAgICAgbl9zYW1wbGVzOiBpbnQsCiAgICAgICAgbV9mZWF0dXJlczogaW50LAogICAgICAgIGtfY2xhc3NlczogaW50LAogICAgICAgIGhlYWRlcjogT3B0aW9uYWxbTGlzdFtzdHJdXSwKICAgICAgICBsYWJlbF9jb2x1bW46IE9wdGlvbmFsW3N0cl0gPSAibGFiZWxzIiwKICAgICAgICB3ZWlnaHQ6IGZsb2F0ID0gMC41LAogICAgICAgIHJhbmRvbV9zdGF0ZTogaW50ID0gMSwKICAgICAgICBrZXk6IHN0ciA9ICJjbGFzc2lmaWVyLWRhdGEiLAogICAgICAgIGZpbGVfZXh0OiBzdHIgPSAicGFycXVldCIsCiAgICAgICAgc2tfcGFyYW1zPXt9Cik6CiAgICAiIiJDcmVhdGUgYSBiaW5hcnkgY2xhc3NpZmljYXRpb24gc2FtcGxlIGRhdGFzZXQgYW5kIHNhdmUuCiAgICBJZiBubyBmaWxlbmFtZSBpcyBnaXZlbiBpdCB3aWxsIGRlZmF1bHQgdG86CiAgICAic2ltZGF0YS17bl9zYW1wbGVzfVh7bV9mZWF0dXJlc30ucGFycXVldCIuCgogICAgQWRkaXRpb25hbCBzY2lraXQtbGVhcm4gcGFyYW1ldGVycyBjYW4gYmUgc2V0IHVzaW5nICoqc2tfcGFyYW1zLCBwbGVhc2Ugc2VlIGh0dHBzOi8vc2Npa2l0LWxlYXJuLm9yZy9zdGFibGUvbW9kdWxlcy9nZW5lcmF0ZWQvc2tsZWFybi5kYXRhc2V0cy5tYWtlX2NsYXNzaWZpY2F0aW9uLmh0bWwgZm9yIG1vcmUgZGV0YWlscy4KCiAgICA6cGFyYW0gY29udGV4dDogICAgICAgZnVuY3Rpb24gY29udGV4dAogICAgOnBhcmFtIG5fc2FtcGxlczogICAgIG51bWJlciBvZiByb3dzL3NhbXBsZXMKICAgIDpwYXJhbSBtX2ZlYXR1cmVzOiAgICBudW1iZXIgb2YgY29scy9mZWF0dXJlcwogICAgOnBhcmFtIGtfY2xhc3NlczogICAgIG51bWJlciBvZiBjbGFzc2VzCiAgICA6cGFyYW0gaGVhZGVyOiAgICAgICAgaGVhZGVyIGZvciBmZWF0dXJlcyBhcnJheQogICAgOnBhcmFtIGxhYmVsX2NvbHVtbjogIGNvbHVtbiBuYW1lIG9mIGdyb3VuZC10cnV0aCBzZXJpZXMKICAgIDpwYXJhbSB3ZWlnaHQ6ICAgICAgICBmcmFjdGlvbiBvZiBzYW1wbGUgbmVnYXRpdmUgdmFsdWUgKGdyb3VuZC10cnV0aD0wKQogICAgOnBhcmFtIHJhbmRvbV9zdGF0ZTogIHJuZyBzZWVkIChzZWUgaHR0cHM6Ly9zY2lraXQtbGVhcm4ub3JnL3N0YWJsZS9nbG9zc2FyeS5odG1sI3Rlcm0tcmFuZG9tLXN0YXRlKQogICAgOnBhcmFtIGtleTogICAgICAgICAgIGtleSBvZiBkYXRhIGluIGFydGlmYWN0IHN0b3JlCiAgICA6cGFyYW0gZmlsZV9leHQ6ICAgICAgKHBxdCkgZXh0ZW5zaW9uIGZvciBwYXJxdWV0IGZpbGUKICAgIDpwYXJhbSBza19wYXJhbXM6ICAgICBhZGRpdGlvbmFsIHBhcmFtZXRlcnMgZm9yIGBza2xlYXJuLmRhdGFzZXRzLm1ha2VfY2xhc3NpZmljYXRpb25gCiAgICAiIiIKICAgIGZlYXR1cmVzLCBsYWJlbHMgPSBtYWtlX2NsYXNzaWZpY2F0aW9uKAogICAgICAgIG5fc2FtcGxlcz1uX3NhbXBsZXMsCiAgICAgICAgbl9mZWF0dXJlcz1tX2ZlYXR1cmVzLAogICAgICAgIHdlaWdodHM9d2VpZ2h0LAogICAgICAgIG5fY2xhc3Nlcz1rX2NsYXNzZXMsCiAgICAgICAgcmFuZG9tX3N0YXRlPXJhbmRvbV9zdGF0ZSwKICAgICAgICAqKnNrX3BhcmFtcykKCiAgICAjIG1ha2UgZGF0YWZyYW1lcywgYWRkIGNvbHVtbiBuYW1lcywgY29uY2F0ZW5hdGUgKFgsIHkpCiAgICBYID0gcGQuRGF0YUZyYW1lKGZlYXR1cmVzKQogICAgaWYgbm90IGhlYWRlcjoKICAgICAgICBYLmNvbHVtbnMgPSBbImZlYXRfIiArIHN0cih4KSBmb3IgeCBpbiByYW5nZShtX2ZlYXR1cmVzKV0KICAgIGVsc2U6CiAgICAgICAgWC5jb2x1bW5zID0gaGVhZGVyCgogICAgeSA9IHBkLkRhdGFGcmFtZShsYWJlbHMsIGNvbHVtbnM9W2xhYmVsX2NvbHVtbl0pCiAgICBkYXRhID0gcGQuY29uY2F0KFtYLCB5XSwgYXhpcz0xKQoKICAgIGNvbnRleHQubG9nX2RhdGFzZXQoa2V5LCBkZj1kYXRhLCBmb3JtYXQ9ZmlsZV9leHQsIGluZGV4PUZhbHNlKQo= code_origin: '' + command: '' + image: mlrun/mlrun + default_handler: gen_class_data + disable_auto_mount: false kind: job -verbose: false diff --git a/functions/src/gen_class_data/item.yaml b/functions/src/gen_class_data/item.yaml index 30f5cd21c..082b00305 100644 --- a/functions/src/gen_class_data/item.yaml +++ b/functions/src/gen_class_data/item.yaml @@ -11,7 +11,7 @@ labels: author: Iguazio maintainers: [] marketplaceType: '' -mlrunVersion: 1.7.0 +mlrunVersion: 1.10.0 name: gen_class_data platformVersion: 3.5.3 spec: @@ -21,4 +21,4 @@ spec: kind: job requirements: [] url: '' -version: 1.3.0 +version: 1.4.0 diff --git a/functions/src/gen_class_data/requirements.txt b/functions/src/gen_class_data/requirements.txt index d7dbe376b..e265290f6 100644 --- a/functions/src/gen_class_data/requirements.txt +++ b/functions/src/gen_class_data/requirements.txt @@ -1,2 +1,2 @@ pandas -scikit-learn==1.0.2 \ No newline at end of file +scikit-learn~=1.5.2 \ No newline at end of file diff --git a/functions/src/gen_class_data/test_gen_class_data.py b/functions/src/gen_class_data/test_gen_class_data.py index e06eeb16b..990075dec 100644 --- a/functions/src/gen_class_data/test_gen_class_data.py +++ b/functions/src/gen_class_data/test_gen_class_data.py @@ -36,4 +36,7 @@ def test_gen_class_data(): local=True, artifact_path="./artifacts", ) - assert os.path.isfile(run.status.artifacts[0]['spec']['target_path']), 'dataset is not available' + # In local mode, artifacts are in function-name/iteration subdirectory + # Default key is "classifier-data" (can be overridden in params) + dataset_path = "./artifacts/test-gen-class-data-gen-class-data/0/classifier-data.csv" + assert os.path.isfile(dataset_path), f'dataset is not available at {dataset_path}' diff --git a/functions/src/onnx_utils/function.yaml b/functions/src/onnx_utils/function.yaml index 05a0f0bc2..091002cdc 100644 --- a/functions/src/onnx_utils/function.yaml +++ b/functions/src/onnx_utils/function.yaml @@ -1,39 +1,13 @@ -kind: job metadata: + name: onnx-utils + tag: '' categories: - utilities - deep-learning - name: onnx-utils - tag: '' -verbose: false +kind: job spec: - build: - code_origin: '' - base_image: mlrun/mlrun - origin_filename: '' - functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKZnJvbSB0eXBpbmcgaW1wb3J0IEFueSwgQ2FsbGFibGUsIERpY3QsIExpc3QsIFR1cGxlCgppbXBvcnQgbWxydW4KCgpjbGFzcyBfVG9PTk5YQ29udmVyc2lvbnM6CiAgICAiIiIKICAgIEFuIE9OTlggY29udmVyc2lvbiBmdW5jdGlvbnMgbGlicmFyeSBjbGFzcy4KICAgICIiIgoKICAgIEBzdGF0aWNtZXRob2QKICAgIGRlZiB0Zl9rZXJhc190b19vbm54KAogICAgICAgIG1vZGVsX2hhbmRsZXIsCiAgICAgICAgb25ueF9tb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIG9wdGltaXplX21vZGVsOiBib29sID0gVHJ1ZSwKICAgICAgICBpbnB1dF9zaWduYXR1cmU6IExpc3RbVHVwbGVbVHVwbGVbaW50XSwgc3RyXV0gPSBOb25lLAogICAgKToKICAgICAgICAiIiIKICAgICAgICBDb252ZXJ0IGEgVEYuS2VyYXMgbW9kZWwgdG8gYW4gT05OWCBtb2RlbCBhbmQgbG9nIGl0IGJhY2sgdG8gTUxSdW4gYXMgYSBuZXcgbW9kZWwgb2JqZWN0LgoKICAgICAgICA6cGFyYW0gbW9kZWxfaGFuZGxlcjogICBBbiBpbml0aWFsaXplZCBURktlcmFzTW9kZWxIYW5kbGVyIHdpdGggYSBsb2FkZWQgbW9kZWwgdG8gY29udmVydCB0byBPTk5YLgogICAgICAgIDpwYXJhbSBvbm54X21vZGVsX25hbWU6IFRoZSBuYW1lIHRvIHVzZSB0byBsb2cgdGhlIGNvbnZlcnRlZCBPTk5YIG1vZGVsLiBJZiBub3QgZ2l2ZW4sIHRoZSBnaXZlbiBgbW9kZWxfbmFtZWAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB3aWxsIGJlIHVzZWQgd2l0aCBhbiBhZGRpdGlvbmFsIHN1ZmZpeCBgX29ubnhgLiBEZWZhdWx0ZWQgdG8gTm9uZS4KICAgICAgICA6cGFyYW0gb3B0aW1pemVfbW9kZWw6ICBXaGV0aGVyIG9yIG5vdCB0byBvcHRpbWl6ZSB0aGUgT05OWCBtb2RlbCB1c2luZyAnb25ueG9wdGltaXplcicgYmVmb3JlIHNhdmluZyB0aGUgbW9kZWwuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgRGVmYXVsdGVkIHRvIFRydWUuCiAgICAgICAgOnBhcmFtIGlucHV0X3NpZ25hdHVyZTogQSBsaXN0IG9mIHRoZSBpbnB1dCBsYXllcnMgc2hhcGUgYW5kIGRhdGEgdHlwZSBwcm9wZXJ0aWVzLiBFeHBlY3RlZCB0byByZWNlaXZlIGEgbGlzdAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHdoZXJlIGVhY2ggZWxlbWVudCBpcyBhbiBpbnB1dCBsYXllciB0dXBsZS4gQW4gaW5wdXQgbGF5ZXIgdHVwbGUgaXMgYSB0dXBsZSBvZjoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBbMF0gPSBMYXllcidzIHNoYXBlLCBhIHR1cGxlIG9mIGludGVnZXJzLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFsxXSA9IExheWVyJ3MgZGF0YSB0eXBlLCBhIG1scnVuLmRhdGFfdHlwZXMuVmFsdWVUeXBlIHN0cmluZy4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBJZiBOb25lLCB0aGUgaW5wdXQgc2lnbmF0dXJlIHdpbGwgYmUgdHJpZWQgdG8gYmUgcmVhZCBmcm9tIHRoZSBtb2RlbCBhcnRpZmFjdC4gRGVmYXVsdGVkCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdG8gTm9uZS4KICAgICAgICAiIiIKICAgICAgICAjIEltcG9ydCB0aGUgZnJhbWV3b3JrIGFuZCBoYW5kbGVyOgogICAgICAgIGltcG9ydCB0ZW5zb3JmbG93IGFzIHRmCiAgICAgICAgZnJvbSBtbHJ1bi5mcmFtZXdvcmtzLnRmX2tlcmFzIGltcG9ydCBURktlcmFzVXRpbHMKCiAgICAgICAgIyBDaGVjayB0aGUgZ2l2ZW4gJ2lucHV0X3NpZ25hdHVyZScgcGFyYW1ldGVyOgogICAgICAgIGlmIGlucHV0X3NpZ25hdHVyZSBpcyBOb25lOgogICAgICAgICAgICAjIFJlYWQgdGhlIGlucHV0cyBmcm9tIHRoZSBtb2RlbDoKICAgICAgICAgICAgdHJ5OgogICAgICAgICAgICAgICAgbW9kZWxfaGFuZGxlci5yZWFkX2lucHV0c19mcm9tX21vZGVsKCkKICAgICAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlcnJvcjoKICAgICAgICAgICAgICAgIHJhaXNlIG1scnVuLmVycm9ycy5NTFJ1blJ1bnRpbWVFcnJvcigKICAgICAgICAgICAgICAgICAgICBmIlBsZWFzZSBwcm92aWRlIHRoZSAnaW5wdXRfc2lnbmF0dXJlJyBwYXJhbWV0ZXIuIFRoZSBmdW5jdGlvbiB0cmllZCByZWFkaW5nIHRoZSBpbnB1dCBsYXllcnMgIgogICAgICAgICAgICAgICAgICAgIGYiaW5mb3JtYXRpb24gYXV0b21hdGljYWxseSBidXQgZmFpbGVkIHdpdGggdGhlIGZvbGxvd2luZyBlcnJvcjoge2Vycm9yfSIKICAgICAgICAgICAgICAgICkKICAgICAgICBlbHNlOgogICAgICAgICAgICAjIFBhcnNlIHRoZSAnaW5wdXRfc2lnbmF0dXJlJyBwYXJhbWV0ZXI6CiAgICAgICAgICAgIGlucHV0X3NpZ25hdHVyZSA9IFsKICAgICAgICAgICAgICAgIHRmLlRlbnNvclNwZWMoCiAgICAgICAgICAgICAgICAgICAgc2hhcGU9c2hhcGUsCiAgICAgICAgICAgICAgICAgICAgZHR5cGU9VEZLZXJhc1V0aWxzLmNvbnZlcnRfdmFsdWVfdHlwZV90b190Zl9kdHlwZSgKICAgICAgICAgICAgICAgICAgICAgICAgdmFsdWVfdHlwZT12YWx1ZV90eXBlCiAgICAgICAgICAgICAgICAgICAgKSwKICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgICAgIGZvciAoc2hhcGUsIHZhbHVlX3R5cGUpIGluIGlucHV0X3NpZ25hdHVyZQogICAgICAgICAgICBdCgogICAgICAgICMgQ29udmVydCB0byBPTk5YOgogICAgICAgIG1vZGVsX2hhbmRsZXIudG9fb25ueCgKICAgICAgICAgICAgbW9kZWxfbmFtZT1vbm54X21vZGVsX25hbWUsCiAgICAgICAgICAgIGlucHV0X3NpZ25hdHVyZT1pbnB1dF9zaWduYXR1cmUsCiAgICAgICAgICAgIG9wdGltaXplPW9wdGltaXplX21vZGVsLAogICAgICAgICkKCiAgICBAc3RhdGljbWV0aG9kCiAgICBkZWYgcHl0b3JjaF90b19vbm54KAogICAgICAgIG1vZGVsX2hhbmRsZXIsCiAgICAgICAgb25ueF9tb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIG9wdGltaXplX21vZGVsOiBib29sID0gVHJ1ZSwKICAgICAgICBpbnB1dF9zaWduYXR1cmU6IExpc3RbVHVwbGVbVHVwbGVbaW50LCAuLi5dLCBzdHJdXSA9IE5vbmUsCiAgICAgICAgaW5wdXRfbGF5ZXJzX25hbWVzOiBMaXN0W3N0cl0gPSBOb25lLAogICAgICAgIG91dHB1dF9sYXllcnNfbmFtZXM6IExpc3Rbc3RyXSA9IE5vbmUsCiAgICAgICAgZHluYW1pY19heGVzOiBEaWN0W3N0ciwgRGljdFtpbnQsIHN0cl1dID0gTm9uZSwKICAgICAgICBpc19iYXRjaGVkOiBib29sID0gVHJ1ZSwKICAgICk6CiAgICAgICAgIiIiCiAgICAgICAgQ29udmVydCBhIFB5VG9yY2ggbW9kZWwgdG8gYW4gT05OWCBtb2RlbCBhbmQgbG9nIGl0IGJhY2sgdG8gTUxSdW4gYXMgYSBuZXcgbW9kZWwgb2JqZWN0LgoKICAgICAgICA6cGFyYW0gbW9kZWxfaGFuZGxlcjogICAgICAgQW4gaW5pdGlhbGl6ZWQgUHlUb3JjaE1vZGVsSGFuZGxlciB3aXRoIGEgbG9hZGVkIG1vZGVsIHRvIGNvbnZlcnQgdG8gT05OWC4KICAgICAgICA6cGFyYW0gb25ueF9tb2RlbF9uYW1lOiAgICAgVGhlIG5hbWUgdG8gdXNlIHRvIGxvZyB0aGUgY29udmVydGVkIE9OTlggbW9kZWwuIElmIG5vdCBnaXZlbiwgdGhlIGdpdmVuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGBtb2RlbF9uYW1lYCB3aWxsIGJlIHVzZWQgd2l0aCBhbiBhZGRpdGlvbmFsIHN1ZmZpeCBgX29ubnhgLiBEZWZhdWx0ZWQgdG8gTm9uZS4KICAgICAgICA6cGFyYW0gb3B0aW1pemVfbW9kZWw6ICAgICAgV2hldGhlciBvciBub3QgdG8gb3B0aW1pemUgdGhlIE9OTlggbW9kZWwgdXNpbmcgJ29ubnhvcHRpbWl6ZXInIGJlZm9yZSBzYXZpbmcgdGhlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1vZGVsLiBEZWZhdWx0ZWQgdG8gVHJ1ZS4KICAgICAgICA6cGFyYW0gaW5wdXRfc2lnbmF0dXJlOiAgICAgQSBsaXN0IG9mIHRoZSBpbnB1dCBsYXllcnMgc2hhcGUgYW5kIGRhdGEgdHlwZSBwcm9wZXJ0aWVzLiBFeHBlY3RlZCB0byByZWNlaXZlIGEKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGlzdCB3aGVyZSBlYWNoIGVsZW1lbnQgaXMgYW4gaW5wdXQgbGF5ZXIgdHVwbGUuIEFuIGlucHV0IGxheWVyIHR1cGxlIGlzIGEgdHVwbGUgb2Y6CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFswXSA9IExheWVyJ3Mgc2hhcGUsIGEgdHVwbGUgb2YgaW50ZWdlcnMuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFsxXSA9IExheWVyJ3MgZGF0YSB0eXBlLCBhIG1scnVuLmRhdGFfdHlwZXMuVmFsdWVUeXBlIHN0cmluZy4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgTm9uZSwgdGhlIGlucHV0IHNpZ25hdHVyZSB3aWxsIGJlIHRyaWVkIHRvIGJlIHJlYWQgZnJvbSB0aGUgbW9kZWwgYXJ0aWZhY3QuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIERlZmF1bHRlZCB0byBOb25lLgogICAgICAgIDpwYXJhbSBpbnB1dF9sYXllcnNfbmFtZXM6ICBMaXN0IG9mIG5hbWVzIHRvIGFzc2lnbiB0byB0aGUgaW5wdXQgbm9kZXMgb2YgdGhlIGdyYXBoIGluIG9yZGVyLiBBbGwgb2YgdGhlIG90aGVyCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhcmFtZXRlcnMgKGlubmVyIGxheWVycykgY2FuIGJlIHNldCBhcyB3ZWxsIGJ5IHBhc3NpbmcgYWRkaXRpb25hbCBuYW1lcyBpbiB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGlzdC4gVGhlIG9yZGVyIGlzIGJ5IHRoZSBvcmRlciBvZiB0aGUgcGFyYW1ldGVycyBpbiB0aGUgbW9kZWwuIElmIE5vbmUsIHRoZSBpbnB1dHMKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgd2lsbCBiZSByZWFkIGZyb20gdGhlIGhhbmRsZXIncyBpbnB1dHMuIElmIGl0cyBhbHNvIE5vbmUsIGl0IGlzIGRlZmF1bHRlZCB0bzoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgImlucHV0XzAiLCAiaW5wdXRfMSIsIC4uLgogICAgICAgIDpwYXJhbSBvdXRwdXRfbGF5ZXJzX25hbWVzOiBMaXN0IG9mIG5hbWVzIHRvIGFzc2lnbiB0byB0aGUgb3V0cHV0IG5vZGVzIG9mIHRoZSBncmFwaCBpbiBvcmRlci4gSWYgTm9uZSwgdGhlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG91dHB1dHMgd2lsbCBiZSByZWFkIGZyb20gdGhlIGhhbmRsZXIncyBvdXRwdXRzLiBJZiBpdHMgYWxzbyBOb25lLCBpdCBpcyBkZWZhdWx0ZWQKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdG86ICJvdXRwdXRfMCIgKGZvciBtdWx0aXBsZSBvdXRwdXRzLCB0aGlzIHBhcmFtZXRlciBtdXN0IGJlIHByb3ZpZGVkKS4KICAgICAgICA6cGFyYW0gZHluYW1pY19heGVzOiAgICAgICAgSWYgcGFydCBvZiB0aGUgaW5wdXQgLyBvdXRwdXQgc2hhcGUgaXMgZHluYW1pYywgbGlrZSAoYmF0Y2hfc2l6ZSwgMywgMzIsIDMyKSB5b3UgY2FuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNwZWNpZnkgaXQgYnkgZ2l2aW5nIGEgZHluYW1pYyBheGlzIHRvIHRoZSBpbnB1dCAvIG91dHB1dCBsYXllciBieSBpdHMgbmFtZSBhcwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmb2xsb3dzOiB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiaW5wdXQgbGF5ZXIgbmFtZSI6IHswOiAiYmF0Y2hfc2l6ZSJ9LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIm91dHB1dCBsYXllciBuYW1lIjogezA6ICJiYXRjaF9zaXplIn0sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgcHJvdmlkZWQsIHRoZSAnaXNfYmF0Y2hlZCcgZmxhZyB3aWxsIGJlIGlnbm9yZWQuIERlZmF1bHRlZCB0byBOb25lLgogICAgICAgIDpwYXJhbSBpc19iYXRjaGVkOiAgICAgICAgICBXaGV0aGVyIHRvIGluY2x1ZGUgYSBiYXRjaCBzaXplIGFzIHRoZSBmaXJzdCBheGlzIGluIGV2ZXJ5IGlucHV0IGFuZCBvdXRwdXQgbGF5ZXIuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIERlZmF1bHRlZCB0byBUcnVlLiBXaWxsIGJlIGlnbm9yZWQgaWYgJ2R5bmFtaWNfYXhlcycgaXMgcHJvdmlkZWQuCiAgICAgICAgIiIiCiAgICAgICAgIyBJbXBvcnQgdGhlIGZyYW1ld29yayBhbmQgaGFuZGxlcjoKICAgICAgICBpbXBvcnQgdG9yY2gKICAgICAgICBmcm9tIG1scnVuLmZyYW1ld29ya3MucHl0b3JjaCBpbXBvcnQgUHlUb3JjaFV0aWxzCgogICAgICAgICMgUGFyc2UgdGhlICdpbnB1dF9zaWduYXR1cmUnIHBhcmFtZXRlcjoKICAgICAgICBpZiBpbnB1dF9zaWduYXR1cmUgaXMgbm90IE5vbmU6CiAgICAgICAgICAgIGlucHV0X3NpZ25hdHVyZSA9IHR1cGxlKAogICAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgICAgIHRvcmNoLnplcm9zKAogICAgICAgICAgICAgICAgICAgICAgICBzaXplPXNoYXBlLAogICAgICAgICAgICAgICAgICAgICAgICBkdHlwZT1QeVRvcmNoVXRpbHMuY29udmVydF92YWx1ZV90eXBlX3RvX3RvcmNoX2R0eXBlKAogICAgICAgICAgICAgICAgICAgICAgICAgICAgdmFsdWVfdHlwZT12YWx1ZV90eXBlCiAgICAgICAgICAgICAgICAgICAgICAgICksCiAgICAgICAgICAgICAgICAgICAgKQogICAgICAgICAgICAgICAgICAgIGZvciAoc2hhcGUsIHZhbHVlX3R5cGUpIGluIGlucHV0X3NpZ25hdHVyZQogICAgICAgICAgICAgICAgXQogICAgICAgICAgICApCgogICAgICAgICMgQ29udmVydCB0byBPTk5YOgogICAgICAgIG1vZGVsX2hhbmRsZXIudG9fb25ueCgKICAgICAgICAgICAgbW9kZWxfbmFtZT1vbm54X21vZGVsX25hbWUsCiAgICAgICAgICAgIGlucHV0X3NhbXBsZT1pbnB1dF9zaWduYXR1cmUsCiAgICAgICAgICAgIG9wdGltaXplPW9wdGltaXplX21vZGVsLAogICAgICAgICAgICBpbnB1dF9sYXllcnNfbmFtZXM9aW5wdXRfbGF5ZXJzX25hbWVzLAogICAgICAgICAgICBvdXRwdXRfbGF5ZXJzX25hbWVzPW91dHB1dF9sYXllcnNfbmFtZXMsCiAgICAgICAgICAgIGR5bmFtaWNfYXhlcz1keW5hbWljX2F4ZXMsCiAgICAgICAgICAgIGlzX2JhdGNoZWQ9aXNfYmF0Y2hlZCwKICAgICAgICApCgoKIyBNYXAgZm9yIGdldHRpbmcgdGhlIGNvbnZlcnNpb24gZnVuY3Rpb24gYWNjb3JkaW5nIHRvIHRoZSBwcm92aWRlZCBmcmFtZXdvcms6Cl9DT05WRVJTSU9OX01BUCA9IHsKICAgICJ0ZW5zb3JmbG93LmtlcmFzIjogX1RvT05OWENvbnZlcnNpb25zLnRmX2tlcmFzX3RvX29ubngsCiAgICAidG9yY2giOiBfVG9PTk5YQ29udmVyc2lvbnMucHl0b3JjaF90b19vbm54LAp9ICAjIHR5cGU6IERpY3Rbc3RyLCBDYWxsYWJsZV0KCgpkZWYgdG9fb25ueCgKICAgIGNvbnRleHQ6IG1scnVuLk1MQ2xpZW50Q3R4LAogICAgbW9kZWxfcGF0aDogc3RyLAogICAgbG9hZF9tb2RlbF9rd2FyZ3M6IGRpY3QgPSBOb25lLAogICAgb25ueF9tb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgb3B0aW1pemVfbW9kZWw6IGJvb2wgPSBUcnVlLAogICAgZnJhbWV3b3JrX2t3YXJnczogRGljdFtzdHIsIEFueV0gPSBOb25lLAopOgogICAgIiIiCiAgICBDb252ZXJ0IHRoZSBnaXZlbiBtb2RlbCB0byBhbiBPTk5YIG1vZGVsLgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgVGhlIE1MUnVuIGZ1bmN0aW9uIGV4ZWN1dGlvbiBjb250ZXh0CiAgICA6cGFyYW0gbW9kZWxfcGF0aDogICAgICAgIFRoZSBtb2RlbCBwYXRoIHN0b3JlIG9iamVjdC4KICAgIDpwYXJhbSBsb2FkX21vZGVsX2t3YXJnczogS2V5d29yZCBhcmd1bWVudHMgdG8gcGFzcyB0byB0aGUgYEF1dG9NTFJ1bi5sb2FkX21vZGVsYCBtZXRob2QuCiAgICA6cGFyYW0gb25ueF9tb2RlbF9uYW1lOiAgIFRoZSBuYW1lIHRvIHVzZSB0byBsb2cgdGhlIGNvbnZlcnRlZCBPTk5YIG1vZGVsLiBJZiBub3QgZ2l2ZW4sIHRoZSBnaXZlbiBgbW9kZWxfbmFtZWAgd2lsbAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBiZSB1c2VkIHdpdGggYW4gYWRkaXRpb25hbCBzdWZmaXggYF9vbm54YC4gRGVmYXVsdGVkIHRvIE5vbmUuCiAgICA6cGFyYW0gb3B0aW1pemVfbW9kZWw6ICAgIFdoZXRoZXIgdG8gb3B0aW1pemUgdGhlIE9OTlggbW9kZWwgdXNpbmcgJ29ubnhvcHRpbWl6ZXInIGJlZm9yZSBzYXZpbmcgdGhlIG1vZGVsLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBEZWZhdWx0ZWQgdG8gVHJ1ZS4KICAgIDpwYXJhbSBmcmFtZXdvcmtfa3dhcmdzOiAgQWRkaXRpb25hbCBhcmd1bWVudHMgZWFjaCBmcmFtZXdvcmsgbWF5IHJlcXVpcmUgdG8gY29udmVydCB0byBPTk5YLiBUbyBnZXQgdGhlIGRvYyBzdHJpbmcKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgb2YgdGhlIGRlc2lyZWQgZnJhbWV3b3JrIG9ubnggY29udmVyc2lvbiBmdW5jdGlvbiwgcGFzcyAiaGVscCIuCiAgICAiIiIKICAgIGZyb20gbWxydW4uZnJhbWV3b3Jrcy5hdXRvX21scnVuLmF1dG9fbWxydW4gaW1wb3J0IEF1dG9NTFJ1bgoKICAgICMgR2V0IGEgbW9kZWwgaGFuZGxlciBvZiB0aGUgcmVxdWlyZWQgZnJhbWV3b3JrOgogICAgbG9hZF9tb2RlbF9rd2FyZ3MgPSBsb2FkX21vZGVsX2t3YXJncyBvciB7fQogICAgbW9kZWxfaGFuZGxlciA9IEF1dG9NTFJ1bi5sb2FkX21vZGVsKAogICAgICAgIG1vZGVsX3BhdGg9bW9kZWxfcGF0aCwgY29udGV4dD1jb250ZXh0LCAqKmxvYWRfbW9kZWxfa3dhcmdzCiAgICApCgogICAgIyBHZXQgdGhlIG1vZGVsJ3MgZnJhbWV3b3JrOgogICAgZnJhbWV3b3JrID0gbW9kZWxfaGFuZGxlci5GUkFNRVdPUktfTkFNRQoKICAgICMgVXNlIHRoZSBjb252ZXJzaW9uIG1hcCB0byBnZXQgdGhlIHNwZWNpZmljIGZyYW1ld29yayB0byBvbm54IGNvbnZlcnNpb246CiAgICBpZiBmcmFtZXdvcmsgbm90IGluIF9DT05WRVJTSU9OX01BUDoKICAgICAgICByYWlzZSBtbHJ1bi5lcnJvcnMuTUxSdW5JbnZhbGlkQXJndW1lbnRFcnJvcigKICAgICAgICAgICAgZiJUaGUgZm9sbG93aW5nIGZyYW1ld29yazogJ3tmcmFtZXdvcmt9JywgaGFzIG5vIE9OTlggY29udmVyc2lvbi4iCiAgICAgICAgKQogICAgY29udmVyc2lvbl9mdW5jdGlvbiA9IF9DT05WRVJTSU9OX01BUFtmcmFtZXdvcmtdCgogICAgIyBDaGVjayBpZiBuZWVkZWQgdG8gcHJpbnQgdGhlIGZ1bmN0aW9uJ3MgZG9jIHN0cmluZyAoImhlbHAiIGlzIHBhc3NlZCk6CiAgICBpZiBmcmFtZXdvcmtfa3dhcmdzID09ICJoZWxwIjoKICAgICAgICBwcmludChjb252ZXJzaW9uX2Z1bmN0aW9uLl9fZG9jX18pCiAgICAgICAgcmV0dXJuCgogICAgIyBTZXQgdGhlIGRlZmF1bHQgZW1wdHkgZnJhbWV3b3JrIGt3YXJncyBpZiBuZWVkZWQ6CiAgICBpZiBmcmFtZXdvcmtfa3dhcmdzIGlzIE5vbmU6CiAgICAgICAgZnJhbWV3b3JrX2t3YXJncyA9IHt9CgogICAgIyBSdW4gdGhlIGNvbnZlcnNpb246CiAgICB0cnk6CiAgICAgICAgY29udmVyc2lvbl9mdW5jdGlvbigKICAgICAgICAgICAgbW9kZWxfaGFuZGxlcj1tb2RlbF9oYW5kbGVyLAogICAgICAgICAgICBvbm54X21vZGVsX25hbWU9b25ueF9tb2RlbF9uYW1lLAogICAgICAgICAgICBvcHRpbWl6ZV9tb2RlbD1vcHRpbWl6ZV9tb2RlbCwKICAgICAgICAgICAgKipmcmFtZXdvcmtfa3dhcmdzLAogICAgICAgICkKICAgIGV4Y2VwdCBUeXBlRXJyb3IgYXMgZXhjZXB0aW9uOgogICAgICAgIHJhaXNlIG1scnVuLmVycm9ycy5NTFJ1bkludmFsaWRBcmd1bWVudEVycm9yKAogICAgICAgICAgICBmIkVSUk9SOiBBIFR5cGVFcnJvciBleGNlcHRpb24gd2FzIHJhaXNlZCBkdXJpbmcgdGhlIGNvbnZlcnNpb246XG57ZXhjZXB0aW9ufS4gIgogICAgICAgICAgICBmIlBsZWFzZSByZWFkIHRoZSB7ZnJhbWV3b3JrfSBmcmFtZXdvcmsgY29udmVyc2lvbiBmdW5jdGlvbiBkb2Mgc3RyaW5nIGJ5IHBhc3NpbmcgJ2hlbHAnIGluIHRoZSAiCiAgICAgICAgICAgIGYiJ2ZyYW1ld29ya19rd2FyZ3MnIGRpY3Rpb25hcnkgcGFyYW1ldGVyLiIKICAgICAgICApCgoKZGVmIG9wdGltaXplKAogICAgY29udGV4dDogbWxydW4uTUxDbGllbnRDdHgsCiAgICBtb2RlbF9wYXRoOiBzdHIsCiAgICBoYW5kbGVyX2luaXRfa3dhcmdzOiBkaWN0ID0gTm9uZSwKICAgIG9wdGltaXphdGlvbnM6IExpc3Rbc3RyXSA9IE5vbmUsCiAgICBmaXhlZF9wb2ludDogYm9vbCA9IEZhbHNlLAogICAgb3B0aW1pemVkX21vZGVsX25hbWU6IHN0ciA9IE5vbmUsCik6CiAgICAiIiIKICAgIE9wdGltaXplIHRoZSBnaXZlbiBPTk5YIG1vZGVsLgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgICAgVGhlIE1MUnVuIGZ1bmN0aW9uIGV4ZWN1dGlvbiBjb250ZXh0LgogICAgOnBhcmFtIG1vZGVsX3BhdGg6ICAgICAgICAgICBQYXRoIHRvIHRoZSBPTk5YIG1vZGVsIG9iamVjdC4KICAgIDpwYXJhbSBoYW5kbGVyX2luaXRfa3dhcmdzOiAgS2V5d29yZCBhcmd1bWVudHMgdG8gcGFzcyB0byB0aGUgYE9OTlhNb2RlbEhhbmRsZXJgIGluaXQgbWV0aG9kIHByZWxvYWRpbmcuCiAgICA6cGFyYW0gb3B0aW1pemF0aW9uczogICAgICAgIExpc3Qgb2YgcG9zc2libGUgb3B0aW1pemF0aW9ucy4gVG8gc2VlIHdoYXQgb3B0aW1pemF0aW9ucyBhcmUgYXZhaWxhYmxlLCBwYXNzICJoZWxwIi4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgTm9uZSwgYWxsIHRoZSBvcHRpbWl6YXRpb25zIHdpbGwgYmUgdXNlZC4gRGVmYXVsdGVkIHRvIE5vbmUuCiAgICA6cGFyYW0gZml4ZWRfcG9pbnQ6ICAgICAgICAgIE9wdGltaXplIHRoZSB3ZWlnaHRzIHVzaW5nIGZpeGVkIHBvaW50LiBEZWZhdWx0ZWQgdG8gRmFsc2UuCiAgICA6cGFyYW0gb3B0aW1pemVkX21vZGVsX25hbWU6IFRoZSBuYW1lIG9mIHRoZSBvcHRpbWl6ZWQgbW9kZWwuIElmIE5vbmUsIHRoZSBvcmlnaW5hbCBtb2RlbCB3aWxsIGJlIG92ZXJyaWRkZW4uCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIERlZmF1bHRlZCB0byBOb25lLgogICAgIiIiCiAgICAjIEltcG9ydCB0aGUgbW9kZWwgaGFuZGxlcjoKICAgIGltcG9ydCBvbm54b3B0aW1pemVyCiAgICBmcm9tIG1scnVuLmZyYW1ld29ya3Mub25ueCBpbXBvcnQgT05OWE1vZGVsSGFuZGxlcgoKICAgICMgQ2hlY2sgaWYgbmVlZGVkIHRvIHByaW50IHRoZSBhdmFpbGFibGUgb3B0aW1pemF0aW9ucyAoImhlbHAiIGlzIHBhc3NlZCk6CiAgICBpZiBvcHRpbWl6YXRpb25zID09ICJoZWxwIjoKICAgICAgICBhdmFpbGFibGVfcGFzc2VzID0gIlxuKiAiLmpvaW4ob25ueG9wdGltaXplci5nZXRfYXZhaWxhYmxlX3Bhc3NlcygpKQogICAgICAgIHByaW50KGYiVGhlIGF2YWlsYWJsZSBvcHRpbWl6YXRpb25zIGFyZTpcbioge2F2YWlsYWJsZV9wYXNzZXN9IikKICAgICAgICByZXR1cm4KCiAgICAjIENyZWF0ZSB0aGUgbW9kZWwgaGFuZGxlcjoKICAgIGhhbmRsZXJfaW5pdF9rd2FyZ3MgPSBoYW5kbGVyX2luaXRfa3dhcmdzIG9yIHt9CiAgICBtb2RlbF9oYW5kbGVyID0gT05OWE1vZGVsSGFuZGxlcigKICAgICAgICBtb2RlbF9wYXRoPW1vZGVsX3BhdGgsIGNvbnRleHQ9Y29udGV4dCwgKipoYW5kbGVyX2luaXRfa3dhcmdzCiAgICApCgogICAgIyBMb2FkIHRoZSBPTk5YIG1vZGVsOgogICAgbW9kZWxfaGFuZGxlci5sb2FkKCkKCiAgICAjIE9wdGltaXplIHRoZSBtb2RlbCB1c2luZyB0aGUgZ2l2ZW4gY29uZmlndXJhdGlvbnM6CiAgICBtb2RlbF9oYW5kbGVyLm9wdGltaXplKG9wdGltaXphdGlvbnM9b3B0aW1pemF0aW9ucywgZml4ZWRfcG9pbnQ9Zml4ZWRfcG9pbnQpCgogICAgIyBSZW5hbWUgaWYgbmVlZGVkOgogICAgaWYgb3B0aW1pemVkX21vZGVsX25hbWUgaXMgbm90IE5vbmU6CiAgICAgICAgbW9kZWxfaGFuZGxlci5zZXRfbW9kZWxfbmFtZShtb2RlbF9uYW1lPW9wdGltaXplZF9tb2RlbF9uYW1lKQoKICAgICMgTG9nIHRoZSBvcHRpbWl6ZWQgbW9kZWw6CiAgICBtb2RlbF9oYW5kbGVyLmxvZygpCg== - requirements: - - tqdm~=4.67.1 - - tensorflow~=2.19.0 - - tf_keras~=2.19.0 - - torch~=2.6.0 - - torchvision~=0.21.0 - - onnx~=1.17.0 - - onnxruntime~=1.19.2 - - onnxoptimizer~=0.3.13 - - onnxmltools~=1.13.0 - - tf2onnx~=1.16.1 - - plotly~=5.23 - with_mlrun: false - auto_build: true - disable_auto_mount: false - description: ONNX intigration in MLRun, some utils functions for the ONNX framework, - optimizing and converting models from different framework to ONNX using MLRun. - image: '' entry_points: tf_keras_to_onnx: - doc: Convert a TF.Keras model to an ONNX model and log it back to MLRun as a - new model object. name: tf_keras_to_onnx parameters: - name: model_handler @@ -58,12 +32,12 @@ spec: data type, a mlrun.data_types.ValueType string. If None, the input signature will be tried to be read from the model artifact. Defaulted to None.' default: null + doc: Convert a TF.Keras model to an ONNX model and log it back to MLRun as a + new model object. + lineno: 26 has_varargs: false has_kwargs: false - lineno: 26 pytorch_to_onnx: - doc: Convert a PyTorch model to an ONNX model and log it back to MLRun as a - new model object. name: pytorch_to_onnx parameters: - name: model_handler @@ -116,11 +90,12 @@ spec: doc: Whether to include a batch size as the first axis in every input and output layer. Defaulted to True. Will be ignored if 'dynamic_axes' is provided. default: true + doc: Convert a PyTorch model to an ONNX model and log it back to MLRun as a + new model object. + lineno: 81 has_varargs: false has_kwargs: false - lineno: 81 to_onnx: - doc: Convert the given model to an ONNX model. name: to_onnx parameters: - name: context @@ -150,11 +125,11 @@ spec: get the doc string of the desired framework onnx conversion function, pass "help". default: null + doc: Convert the given model to an ONNX model. + lineno: 160 has_varargs: false has_kwargs: false - lineno: 160 optimize: - doc: Optimize the given ONNX model. name: optimize parameters: - name: context @@ -181,9 +156,34 @@ spec: doc: The name of the optimized model. If None, the original model will be overridden. Defaulted to None. default: null + doc: Optimize the given ONNX model. + lineno: 224 has_varargs: false has_kwargs: false - lineno: 224 + image: '' default_handler: to_onnx allow_empty_resources: true command: '' + disable_auto_mount: false + description: ONNX intigration in MLRun, some utils functions for the ONNX framework, + optimizing and converting models from different framework to ONNX using MLRun. + build: + functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKZnJvbSB0eXBpbmcgaW1wb3J0IEFueSwgQ2FsbGFibGUsIERpY3QsIExpc3QsIFR1cGxlCgppbXBvcnQgbWxydW4KCgpjbGFzcyBfVG9PTk5YQ29udmVyc2lvbnM6CiAgICAiIiIKICAgIEFuIE9OTlggY29udmVyc2lvbiBmdW5jdGlvbnMgbGlicmFyeSBjbGFzcy4KICAgICIiIgoKICAgIEBzdGF0aWNtZXRob2QKICAgIGRlZiB0Zl9rZXJhc190b19vbm54KAogICAgICAgIG1vZGVsX2hhbmRsZXIsCiAgICAgICAgb25ueF9tb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIG9wdGltaXplX21vZGVsOiBib29sID0gVHJ1ZSwKICAgICAgICBpbnB1dF9zaWduYXR1cmU6IExpc3RbVHVwbGVbVHVwbGVbaW50XSwgc3RyXV0gPSBOb25lLAogICAgKToKICAgICAgICAiIiIKICAgICAgICBDb252ZXJ0IGEgVEYuS2VyYXMgbW9kZWwgdG8gYW4gT05OWCBtb2RlbCBhbmQgbG9nIGl0IGJhY2sgdG8gTUxSdW4gYXMgYSBuZXcgbW9kZWwgb2JqZWN0LgoKICAgICAgICA6cGFyYW0gbW9kZWxfaGFuZGxlcjogICBBbiBpbml0aWFsaXplZCBURktlcmFzTW9kZWxIYW5kbGVyIHdpdGggYSBsb2FkZWQgbW9kZWwgdG8gY29udmVydCB0byBPTk5YLgogICAgICAgIDpwYXJhbSBvbm54X21vZGVsX25hbWU6IFRoZSBuYW1lIHRvIHVzZSB0byBsb2cgdGhlIGNvbnZlcnRlZCBPTk5YIG1vZGVsLiBJZiBub3QgZ2l2ZW4sIHRoZSBnaXZlbiBgbW9kZWxfbmFtZWAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB3aWxsIGJlIHVzZWQgd2l0aCBhbiBhZGRpdGlvbmFsIHN1ZmZpeCBgX29ubnhgLiBEZWZhdWx0ZWQgdG8gTm9uZS4KICAgICAgICA6cGFyYW0gb3B0aW1pemVfbW9kZWw6ICBXaGV0aGVyIG9yIG5vdCB0byBvcHRpbWl6ZSB0aGUgT05OWCBtb2RlbCB1c2luZyAnb25ueG9wdGltaXplcicgYmVmb3JlIHNhdmluZyB0aGUgbW9kZWwuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgRGVmYXVsdGVkIHRvIFRydWUuCiAgICAgICAgOnBhcmFtIGlucHV0X3NpZ25hdHVyZTogQSBsaXN0IG9mIHRoZSBpbnB1dCBsYXllcnMgc2hhcGUgYW5kIGRhdGEgdHlwZSBwcm9wZXJ0aWVzLiBFeHBlY3RlZCB0byByZWNlaXZlIGEgbGlzdAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHdoZXJlIGVhY2ggZWxlbWVudCBpcyBhbiBpbnB1dCBsYXllciB0dXBsZS4gQW4gaW5wdXQgbGF5ZXIgdHVwbGUgaXMgYSB0dXBsZSBvZjoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBbMF0gPSBMYXllcidzIHNoYXBlLCBhIHR1cGxlIG9mIGludGVnZXJzLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFsxXSA9IExheWVyJ3MgZGF0YSB0eXBlLCBhIG1scnVuLmRhdGFfdHlwZXMuVmFsdWVUeXBlIHN0cmluZy4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBJZiBOb25lLCB0aGUgaW5wdXQgc2lnbmF0dXJlIHdpbGwgYmUgdHJpZWQgdG8gYmUgcmVhZCBmcm9tIHRoZSBtb2RlbCBhcnRpZmFjdC4gRGVmYXVsdGVkCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdG8gTm9uZS4KICAgICAgICAiIiIKICAgICAgICAjIEltcG9ydCB0aGUgZnJhbWV3b3JrIGFuZCBoYW5kbGVyOgogICAgICAgIGltcG9ydCB0ZW5zb3JmbG93IGFzIHRmCiAgICAgICAgZnJvbSBtbHJ1bi5mcmFtZXdvcmtzLnRmX2tlcmFzIGltcG9ydCBURktlcmFzVXRpbHMKCiAgICAgICAgIyBDaGVjayB0aGUgZ2l2ZW4gJ2lucHV0X3NpZ25hdHVyZScgcGFyYW1ldGVyOgogICAgICAgIGlmIGlucHV0X3NpZ25hdHVyZSBpcyBOb25lOgogICAgICAgICAgICAjIFJlYWQgdGhlIGlucHV0cyBmcm9tIHRoZSBtb2RlbDoKICAgICAgICAgICAgdHJ5OgogICAgICAgICAgICAgICAgbW9kZWxfaGFuZGxlci5yZWFkX2lucHV0c19mcm9tX21vZGVsKCkKICAgICAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlcnJvcjoKICAgICAgICAgICAgICAgIHJhaXNlIG1scnVuLmVycm9ycy5NTFJ1blJ1bnRpbWVFcnJvcigKICAgICAgICAgICAgICAgICAgICBmIlBsZWFzZSBwcm92aWRlIHRoZSAnaW5wdXRfc2lnbmF0dXJlJyBwYXJhbWV0ZXIuIFRoZSBmdW5jdGlvbiB0cmllZCByZWFkaW5nIHRoZSBpbnB1dCBsYXllcnMgIgogICAgICAgICAgICAgICAgICAgIGYiaW5mb3JtYXRpb24gYXV0b21hdGljYWxseSBidXQgZmFpbGVkIHdpdGggdGhlIGZvbGxvd2luZyBlcnJvcjoge2Vycm9yfSIKICAgICAgICAgICAgICAgICkKICAgICAgICBlbHNlOgogICAgICAgICAgICAjIFBhcnNlIHRoZSAnaW5wdXRfc2lnbmF0dXJlJyBwYXJhbWV0ZXI6CiAgICAgICAgICAgIGlucHV0X3NpZ25hdHVyZSA9IFsKICAgICAgICAgICAgICAgIHRmLlRlbnNvclNwZWMoCiAgICAgICAgICAgICAgICAgICAgc2hhcGU9c2hhcGUsCiAgICAgICAgICAgICAgICAgICAgZHR5cGU9VEZLZXJhc1V0aWxzLmNvbnZlcnRfdmFsdWVfdHlwZV90b190Zl9kdHlwZSgKICAgICAgICAgICAgICAgICAgICAgICAgdmFsdWVfdHlwZT12YWx1ZV90eXBlCiAgICAgICAgICAgICAgICAgICAgKSwKICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgICAgIGZvciAoc2hhcGUsIHZhbHVlX3R5cGUpIGluIGlucHV0X3NpZ25hdHVyZQogICAgICAgICAgICBdCgogICAgICAgICMgQ29udmVydCB0byBPTk5YOgogICAgICAgIG1vZGVsX2hhbmRsZXIudG9fb25ueCgKICAgICAgICAgICAgbW9kZWxfbmFtZT1vbm54X21vZGVsX25hbWUsCiAgICAgICAgICAgIGlucHV0X3NpZ25hdHVyZT1pbnB1dF9zaWduYXR1cmUsCiAgICAgICAgICAgIG9wdGltaXplPW9wdGltaXplX21vZGVsLAogICAgICAgICkKCiAgICBAc3RhdGljbWV0aG9kCiAgICBkZWYgcHl0b3JjaF90b19vbm54KAogICAgICAgIG1vZGVsX2hhbmRsZXIsCiAgICAgICAgb25ueF9tb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIG9wdGltaXplX21vZGVsOiBib29sID0gVHJ1ZSwKICAgICAgICBpbnB1dF9zaWduYXR1cmU6IExpc3RbVHVwbGVbVHVwbGVbaW50LCAuLi5dLCBzdHJdXSA9IE5vbmUsCiAgICAgICAgaW5wdXRfbGF5ZXJzX25hbWVzOiBMaXN0W3N0cl0gPSBOb25lLAogICAgICAgIG91dHB1dF9sYXllcnNfbmFtZXM6IExpc3Rbc3RyXSA9IE5vbmUsCiAgICAgICAgZHluYW1pY19heGVzOiBEaWN0W3N0ciwgRGljdFtpbnQsIHN0cl1dID0gTm9uZSwKICAgICAgICBpc19iYXRjaGVkOiBib29sID0gVHJ1ZSwKICAgICk6CiAgICAgICAgIiIiCiAgICAgICAgQ29udmVydCBhIFB5VG9yY2ggbW9kZWwgdG8gYW4gT05OWCBtb2RlbCBhbmQgbG9nIGl0IGJhY2sgdG8gTUxSdW4gYXMgYSBuZXcgbW9kZWwgb2JqZWN0LgoKICAgICAgICA6cGFyYW0gbW9kZWxfaGFuZGxlcjogICAgICAgQW4gaW5pdGlhbGl6ZWQgUHlUb3JjaE1vZGVsSGFuZGxlciB3aXRoIGEgbG9hZGVkIG1vZGVsIHRvIGNvbnZlcnQgdG8gT05OWC4KICAgICAgICA6cGFyYW0gb25ueF9tb2RlbF9uYW1lOiAgICAgVGhlIG5hbWUgdG8gdXNlIHRvIGxvZyB0aGUgY29udmVydGVkIE9OTlggbW9kZWwuIElmIG5vdCBnaXZlbiwgdGhlIGdpdmVuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGBtb2RlbF9uYW1lYCB3aWxsIGJlIHVzZWQgd2l0aCBhbiBhZGRpdGlvbmFsIHN1ZmZpeCBgX29ubnhgLiBEZWZhdWx0ZWQgdG8gTm9uZS4KICAgICAgICA6cGFyYW0gb3B0aW1pemVfbW9kZWw6ICAgICAgV2hldGhlciBvciBub3QgdG8gb3B0aW1pemUgdGhlIE9OTlggbW9kZWwgdXNpbmcgJ29ubnhvcHRpbWl6ZXInIGJlZm9yZSBzYXZpbmcgdGhlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1vZGVsLiBEZWZhdWx0ZWQgdG8gVHJ1ZS4KICAgICAgICA6cGFyYW0gaW5wdXRfc2lnbmF0dXJlOiAgICAgQSBsaXN0IG9mIHRoZSBpbnB1dCBsYXllcnMgc2hhcGUgYW5kIGRhdGEgdHlwZSBwcm9wZXJ0aWVzLiBFeHBlY3RlZCB0byByZWNlaXZlIGEKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGlzdCB3aGVyZSBlYWNoIGVsZW1lbnQgaXMgYW4gaW5wdXQgbGF5ZXIgdHVwbGUuIEFuIGlucHV0IGxheWVyIHR1cGxlIGlzIGEgdHVwbGUgb2Y6CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFswXSA9IExheWVyJ3Mgc2hhcGUsIGEgdHVwbGUgb2YgaW50ZWdlcnMuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFsxXSA9IExheWVyJ3MgZGF0YSB0eXBlLCBhIG1scnVuLmRhdGFfdHlwZXMuVmFsdWVUeXBlIHN0cmluZy4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgTm9uZSwgdGhlIGlucHV0IHNpZ25hdHVyZSB3aWxsIGJlIHRyaWVkIHRvIGJlIHJlYWQgZnJvbSB0aGUgbW9kZWwgYXJ0aWZhY3QuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIERlZmF1bHRlZCB0byBOb25lLgogICAgICAgIDpwYXJhbSBpbnB1dF9sYXllcnNfbmFtZXM6ICBMaXN0IG9mIG5hbWVzIHRvIGFzc2lnbiB0byB0aGUgaW5wdXQgbm9kZXMgb2YgdGhlIGdyYXBoIGluIG9yZGVyLiBBbGwgb2YgdGhlIG90aGVyCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhcmFtZXRlcnMgKGlubmVyIGxheWVycykgY2FuIGJlIHNldCBhcyB3ZWxsIGJ5IHBhc3NpbmcgYWRkaXRpb25hbCBuYW1lcyBpbiB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGlzdC4gVGhlIG9yZGVyIGlzIGJ5IHRoZSBvcmRlciBvZiB0aGUgcGFyYW1ldGVycyBpbiB0aGUgbW9kZWwuIElmIE5vbmUsIHRoZSBpbnB1dHMKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgd2lsbCBiZSByZWFkIGZyb20gdGhlIGhhbmRsZXIncyBpbnB1dHMuIElmIGl0cyBhbHNvIE5vbmUsIGl0IGlzIGRlZmF1bHRlZCB0bzoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgImlucHV0XzAiLCAiaW5wdXRfMSIsIC4uLgogICAgICAgIDpwYXJhbSBvdXRwdXRfbGF5ZXJzX25hbWVzOiBMaXN0IG9mIG5hbWVzIHRvIGFzc2lnbiB0byB0aGUgb3V0cHV0IG5vZGVzIG9mIHRoZSBncmFwaCBpbiBvcmRlci4gSWYgTm9uZSwgdGhlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG91dHB1dHMgd2lsbCBiZSByZWFkIGZyb20gdGhlIGhhbmRsZXIncyBvdXRwdXRzLiBJZiBpdHMgYWxzbyBOb25lLCBpdCBpcyBkZWZhdWx0ZWQKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdG86ICJvdXRwdXRfMCIgKGZvciBtdWx0aXBsZSBvdXRwdXRzLCB0aGlzIHBhcmFtZXRlciBtdXN0IGJlIHByb3ZpZGVkKS4KICAgICAgICA6cGFyYW0gZHluYW1pY19heGVzOiAgICAgICAgSWYgcGFydCBvZiB0aGUgaW5wdXQgLyBvdXRwdXQgc2hhcGUgaXMgZHluYW1pYywgbGlrZSAoYmF0Y2hfc2l6ZSwgMywgMzIsIDMyKSB5b3UgY2FuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNwZWNpZnkgaXQgYnkgZ2l2aW5nIGEgZHluYW1pYyBheGlzIHRvIHRoZSBpbnB1dCAvIG91dHB1dCBsYXllciBieSBpdHMgbmFtZSBhcwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmb2xsb3dzOiB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiaW5wdXQgbGF5ZXIgbmFtZSI6IHswOiAiYmF0Y2hfc2l6ZSJ9LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIm91dHB1dCBsYXllciBuYW1lIjogezA6ICJiYXRjaF9zaXplIn0sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgcHJvdmlkZWQsIHRoZSAnaXNfYmF0Y2hlZCcgZmxhZyB3aWxsIGJlIGlnbm9yZWQuIERlZmF1bHRlZCB0byBOb25lLgogICAgICAgIDpwYXJhbSBpc19iYXRjaGVkOiAgICAgICAgICBXaGV0aGVyIHRvIGluY2x1ZGUgYSBiYXRjaCBzaXplIGFzIHRoZSBmaXJzdCBheGlzIGluIGV2ZXJ5IGlucHV0IGFuZCBvdXRwdXQgbGF5ZXIuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIERlZmF1bHRlZCB0byBUcnVlLiBXaWxsIGJlIGlnbm9yZWQgaWYgJ2R5bmFtaWNfYXhlcycgaXMgcHJvdmlkZWQuCiAgICAgICAgIiIiCiAgICAgICAgIyBJbXBvcnQgdGhlIGZyYW1ld29yayBhbmQgaGFuZGxlcjoKICAgICAgICBpbXBvcnQgdG9yY2gKICAgICAgICBmcm9tIG1scnVuLmZyYW1ld29ya3MucHl0b3JjaCBpbXBvcnQgUHlUb3JjaFV0aWxzCgogICAgICAgICMgUGFyc2UgdGhlICdpbnB1dF9zaWduYXR1cmUnIHBhcmFtZXRlcjoKICAgICAgICBpZiBpbnB1dF9zaWduYXR1cmUgaXMgbm90IE5vbmU6CiAgICAgICAgICAgIGlucHV0X3NpZ25hdHVyZSA9IHR1cGxlKAogICAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgICAgIHRvcmNoLnplcm9zKAogICAgICAgICAgICAgICAgICAgICAgICBzaXplPXNoYXBlLAogICAgICAgICAgICAgICAgICAgICAgICBkdHlwZT1QeVRvcmNoVXRpbHMuY29udmVydF92YWx1ZV90eXBlX3RvX3RvcmNoX2R0eXBlKAogICAgICAgICAgICAgICAgICAgICAgICAgICAgdmFsdWVfdHlwZT12YWx1ZV90eXBlCiAgICAgICAgICAgICAgICAgICAgICAgICksCiAgICAgICAgICAgICAgICAgICAgKQogICAgICAgICAgICAgICAgICAgIGZvciAoc2hhcGUsIHZhbHVlX3R5cGUpIGluIGlucHV0X3NpZ25hdHVyZQogICAgICAgICAgICAgICAgXQogICAgICAgICAgICApCgogICAgICAgICMgQ29udmVydCB0byBPTk5YOgogICAgICAgIG1vZGVsX2hhbmRsZXIudG9fb25ueCgKICAgICAgICAgICAgbW9kZWxfbmFtZT1vbm54X21vZGVsX25hbWUsCiAgICAgICAgICAgIGlucHV0X3NhbXBsZT1pbnB1dF9zaWduYXR1cmUsCiAgICAgICAgICAgIG9wdGltaXplPW9wdGltaXplX21vZGVsLAogICAgICAgICAgICBpbnB1dF9sYXllcnNfbmFtZXM9aW5wdXRfbGF5ZXJzX25hbWVzLAogICAgICAgICAgICBvdXRwdXRfbGF5ZXJzX25hbWVzPW91dHB1dF9sYXllcnNfbmFtZXMsCiAgICAgICAgICAgIGR5bmFtaWNfYXhlcz1keW5hbWljX2F4ZXMsCiAgICAgICAgICAgIGlzX2JhdGNoZWQ9aXNfYmF0Y2hlZCwKICAgICAgICApCgoKIyBNYXAgZm9yIGdldHRpbmcgdGhlIGNvbnZlcnNpb24gZnVuY3Rpb24gYWNjb3JkaW5nIHRvIHRoZSBwcm92aWRlZCBmcmFtZXdvcms6Cl9DT05WRVJTSU9OX01BUCA9IHsKICAgICJ0ZW5zb3JmbG93LmtlcmFzIjogX1RvT05OWENvbnZlcnNpb25zLnRmX2tlcmFzX3RvX29ubngsCiAgICAidG9yY2giOiBfVG9PTk5YQ29udmVyc2lvbnMucHl0b3JjaF90b19vbm54LAp9ICAjIHR5cGU6IERpY3Rbc3RyLCBDYWxsYWJsZV0KCgpkZWYgdG9fb25ueCgKICAgIGNvbnRleHQ6IG1scnVuLk1MQ2xpZW50Q3R4LAogICAgbW9kZWxfcGF0aDogc3RyLAogICAgbG9hZF9tb2RlbF9rd2FyZ3M6IGRpY3QgPSBOb25lLAogICAgb25ueF9tb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgb3B0aW1pemVfbW9kZWw6IGJvb2wgPSBUcnVlLAogICAgZnJhbWV3b3JrX2t3YXJnczogRGljdFtzdHIsIEFueV0gPSBOb25lLAopOgogICAgIiIiCiAgICBDb252ZXJ0IHRoZSBnaXZlbiBtb2RlbCB0byBhbiBPTk5YIG1vZGVsLgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgVGhlIE1MUnVuIGZ1bmN0aW9uIGV4ZWN1dGlvbiBjb250ZXh0CiAgICA6cGFyYW0gbW9kZWxfcGF0aDogICAgICAgIFRoZSBtb2RlbCBwYXRoIHN0b3JlIG9iamVjdC4KICAgIDpwYXJhbSBsb2FkX21vZGVsX2t3YXJnczogS2V5d29yZCBhcmd1bWVudHMgdG8gcGFzcyB0byB0aGUgYEF1dG9NTFJ1bi5sb2FkX21vZGVsYCBtZXRob2QuCiAgICA6cGFyYW0gb25ueF9tb2RlbF9uYW1lOiAgIFRoZSBuYW1lIHRvIHVzZSB0byBsb2cgdGhlIGNvbnZlcnRlZCBPTk5YIG1vZGVsLiBJZiBub3QgZ2l2ZW4sIHRoZSBnaXZlbiBgbW9kZWxfbmFtZWAgd2lsbAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBiZSB1c2VkIHdpdGggYW4gYWRkaXRpb25hbCBzdWZmaXggYF9vbm54YC4gRGVmYXVsdGVkIHRvIE5vbmUuCiAgICA6cGFyYW0gb3B0aW1pemVfbW9kZWw6ICAgIFdoZXRoZXIgdG8gb3B0aW1pemUgdGhlIE9OTlggbW9kZWwgdXNpbmcgJ29ubnhvcHRpbWl6ZXInIGJlZm9yZSBzYXZpbmcgdGhlIG1vZGVsLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBEZWZhdWx0ZWQgdG8gVHJ1ZS4KICAgIDpwYXJhbSBmcmFtZXdvcmtfa3dhcmdzOiAgQWRkaXRpb25hbCBhcmd1bWVudHMgZWFjaCBmcmFtZXdvcmsgbWF5IHJlcXVpcmUgdG8gY29udmVydCB0byBPTk5YLiBUbyBnZXQgdGhlIGRvYyBzdHJpbmcKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgb2YgdGhlIGRlc2lyZWQgZnJhbWV3b3JrIG9ubnggY29udmVyc2lvbiBmdW5jdGlvbiwgcGFzcyAiaGVscCIuCiAgICAiIiIKICAgIGZyb20gbWxydW4uZnJhbWV3b3Jrcy5hdXRvX21scnVuLmF1dG9fbWxydW4gaW1wb3J0IEF1dG9NTFJ1bgoKICAgICMgR2V0IGEgbW9kZWwgaGFuZGxlciBvZiB0aGUgcmVxdWlyZWQgZnJhbWV3b3JrOgogICAgbG9hZF9tb2RlbF9rd2FyZ3MgPSBsb2FkX21vZGVsX2t3YXJncyBvciB7fQogICAgbW9kZWxfaGFuZGxlciA9IEF1dG9NTFJ1bi5sb2FkX21vZGVsKAogICAgICAgIG1vZGVsX3BhdGg9bW9kZWxfcGF0aCwgY29udGV4dD1jb250ZXh0LCAqKmxvYWRfbW9kZWxfa3dhcmdzCiAgICApCgogICAgIyBHZXQgdGhlIG1vZGVsJ3MgZnJhbWV3b3JrOgogICAgZnJhbWV3b3JrID0gbW9kZWxfaGFuZGxlci5GUkFNRVdPUktfTkFNRQoKICAgICMgVXNlIHRoZSBjb252ZXJzaW9uIG1hcCB0byBnZXQgdGhlIHNwZWNpZmljIGZyYW1ld29yayB0byBvbm54IGNvbnZlcnNpb246CiAgICBpZiBmcmFtZXdvcmsgbm90IGluIF9DT05WRVJTSU9OX01BUDoKICAgICAgICByYWlzZSBtbHJ1bi5lcnJvcnMuTUxSdW5JbnZhbGlkQXJndW1lbnRFcnJvcigKICAgICAgICAgICAgZiJUaGUgZm9sbG93aW5nIGZyYW1ld29yazogJ3tmcmFtZXdvcmt9JywgaGFzIG5vIE9OTlggY29udmVyc2lvbi4iCiAgICAgICAgKQogICAgY29udmVyc2lvbl9mdW5jdGlvbiA9IF9DT05WRVJTSU9OX01BUFtmcmFtZXdvcmtdCgogICAgIyBDaGVjayBpZiBuZWVkZWQgdG8gcHJpbnQgdGhlIGZ1bmN0aW9uJ3MgZG9jIHN0cmluZyAoImhlbHAiIGlzIHBhc3NlZCk6CiAgICBpZiBmcmFtZXdvcmtfa3dhcmdzID09ICJoZWxwIjoKICAgICAgICBwcmludChjb252ZXJzaW9uX2Z1bmN0aW9uLl9fZG9jX18pCiAgICAgICAgcmV0dXJuCgogICAgIyBTZXQgdGhlIGRlZmF1bHQgZW1wdHkgZnJhbWV3b3JrIGt3YXJncyBpZiBuZWVkZWQ6CiAgICBpZiBmcmFtZXdvcmtfa3dhcmdzIGlzIE5vbmU6CiAgICAgICAgZnJhbWV3b3JrX2t3YXJncyA9IHt9CgogICAgIyBSdW4gdGhlIGNvbnZlcnNpb246CiAgICB0cnk6CiAgICAgICAgY29udmVyc2lvbl9mdW5jdGlvbigKICAgICAgICAgICAgbW9kZWxfaGFuZGxlcj1tb2RlbF9oYW5kbGVyLAogICAgICAgICAgICBvbm54X21vZGVsX25hbWU9b25ueF9tb2RlbF9uYW1lLAogICAgICAgICAgICBvcHRpbWl6ZV9tb2RlbD1vcHRpbWl6ZV9tb2RlbCwKICAgICAgICAgICAgKipmcmFtZXdvcmtfa3dhcmdzLAogICAgICAgICkKICAgIGV4Y2VwdCBUeXBlRXJyb3IgYXMgZXhjZXB0aW9uOgogICAgICAgIHJhaXNlIG1scnVuLmVycm9ycy5NTFJ1bkludmFsaWRBcmd1bWVudEVycm9yKAogICAgICAgICAgICBmIkVSUk9SOiBBIFR5cGVFcnJvciBleGNlcHRpb24gd2FzIHJhaXNlZCBkdXJpbmcgdGhlIGNvbnZlcnNpb246XG57ZXhjZXB0aW9ufS4gIgogICAgICAgICAgICBmIlBsZWFzZSByZWFkIHRoZSB7ZnJhbWV3b3JrfSBmcmFtZXdvcmsgY29udmVyc2lvbiBmdW5jdGlvbiBkb2Mgc3RyaW5nIGJ5IHBhc3NpbmcgJ2hlbHAnIGluIHRoZSAiCiAgICAgICAgICAgIGYiJ2ZyYW1ld29ya19rd2FyZ3MnIGRpY3Rpb25hcnkgcGFyYW1ldGVyLiIKICAgICAgICApCgoKZGVmIG9wdGltaXplKAogICAgY29udGV4dDogbWxydW4uTUxDbGllbnRDdHgsCiAgICBtb2RlbF9wYXRoOiBzdHIsCiAgICBoYW5kbGVyX2luaXRfa3dhcmdzOiBkaWN0ID0gTm9uZSwKICAgIG9wdGltaXphdGlvbnM6IExpc3Rbc3RyXSA9IE5vbmUsCiAgICBmaXhlZF9wb2ludDogYm9vbCA9IEZhbHNlLAogICAgb3B0aW1pemVkX21vZGVsX25hbWU6IHN0ciA9IE5vbmUsCik6CiAgICAiIiIKICAgIE9wdGltaXplIHRoZSBnaXZlbiBPTk5YIG1vZGVsLgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgICAgVGhlIE1MUnVuIGZ1bmN0aW9uIGV4ZWN1dGlvbiBjb250ZXh0LgogICAgOnBhcmFtIG1vZGVsX3BhdGg6ICAgICAgICAgICBQYXRoIHRvIHRoZSBPTk5YIG1vZGVsIG9iamVjdC4KICAgIDpwYXJhbSBoYW5kbGVyX2luaXRfa3dhcmdzOiAgS2V5d29yZCBhcmd1bWVudHMgdG8gcGFzcyB0byB0aGUgYE9OTlhNb2RlbEhhbmRsZXJgIGluaXQgbWV0aG9kIHByZWxvYWRpbmcuCiAgICA6cGFyYW0gb3B0aW1pemF0aW9uczogICAgICAgIExpc3Qgb2YgcG9zc2libGUgb3B0aW1pemF0aW9ucy4gVG8gc2VlIHdoYXQgb3B0aW1pemF0aW9ucyBhcmUgYXZhaWxhYmxlLCBwYXNzICJoZWxwIi4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgTm9uZSwgYWxsIHRoZSBvcHRpbWl6YXRpb25zIHdpbGwgYmUgdXNlZC4gRGVmYXVsdGVkIHRvIE5vbmUuCiAgICA6cGFyYW0gZml4ZWRfcG9pbnQ6ICAgICAgICAgIE9wdGltaXplIHRoZSB3ZWlnaHRzIHVzaW5nIGZpeGVkIHBvaW50LiBEZWZhdWx0ZWQgdG8gRmFsc2UuCiAgICA6cGFyYW0gb3B0aW1pemVkX21vZGVsX25hbWU6IFRoZSBuYW1lIG9mIHRoZSBvcHRpbWl6ZWQgbW9kZWwuIElmIE5vbmUsIHRoZSBvcmlnaW5hbCBtb2RlbCB3aWxsIGJlIG92ZXJyaWRkZW4uCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIERlZmF1bHRlZCB0byBOb25lLgogICAgIiIiCiAgICAjIEltcG9ydCB0aGUgbW9kZWwgaGFuZGxlcjoKICAgIGltcG9ydCBvbm54b3B0aW1pemVyCiAgICBmcm9tIG1scnVuLmZyYW1ld29ya3Mub25ueCBpbXBvcnQgT05OWE1vZGVsSGFuZGxlcgoKICAgICMgQ2hlY2sgaWYgbmVlZGVkIHRvIHByaW50IHRoZSBhdmFpbGFibGUgb3B0aW1pemF0aW9ucyAoImhlbHAiIGlzIHBhc3NlZCk6CiAgICBpZiBvcHRpbWl6YXRpb25zID09ICJoZWxwIjoKICAgICAgICBhdmFpbGFibGVfcGFzc2VzID0gIlxuKiAiLmpvaW4ob25ueG9wdGltaXplci5nZXRfYXZhaWxhYmxlX3Bhc3NlcygpKQogICAgICAgIHByaW50KGYiVGhlIGF2YWlsYWJsZSBvcHRpbWl6YXRpb25zIGFyZTpcbioge2F2YWlsYWJsZV9wYXNzZXN9IikKICAgICAgICByZXR1cm4KCiAgICAjIENyZWF0ZSB0aGUgbW9kZWwgaGFuZGxlcjoKICAgIGhhbmRsZXJfaW5pdF9rd2FyZ3MgPSBoYW5kbGVyX2luaXRfa3dhcmdzIG9yIHt9CiAgICBtb2RlbF9oYW5kbGVyID0gT05OWE1vZGVsSGFuZGxlcigKICAgICAgICBtb2RlbF9wYXRoPW1vZGVsX3BhdGgsIGNvbnRleHQ9Y29udGV4dCwgKipoYW5kbGVyX2luaXRfa3dhcmdzCiAgICApCgogICAgIyBMb2FkIHRoZSBPTk5YIG1vZGVsOgogICAgbW9kZWxfaGFuZGxlci5sb2FkKCkKCiAgICAjIE9wdGltaXplIHRoZSBtb2RlbCB1c2luZyB0aGUgZ2l2ZW4gY29uZmlndXJhdGlvbnM6CiAgICBtb2RlbF9oYW5kbGVyLm9wdGltaXplKG9wdGltaXphdGlvbnM9b3B0aW1pemF0aW9ucywgZml4ZWRfcG9pbnQ9Zml4ZWRfcG9pbnQpCgogICAgIyBSZW5hbWUgaWYgbmVlZGVkOgogICAgaWYgb3B0aW1pemVkX21vZGVsX25hbWUgaXMgbm90IE5vbmU6CiAgICAgICAgbW9kZWxfaGFuZGxlci5zZXRfbW9kZWxfbmFtZShtb2RlbF9uYW1lPW9wdGltaXplZF9tb2RlbF9uYW1lKQoKICAgICMgTG9nIHRoZSBvcHRpbWl6ZWQgbW9kZWw6CiAgICBtb2RlbF9oYW5kbGVyLmxvZygpCg== + base_image: mlrun/mlrun + with_mlrun: false + auto_build: true + requirements: + - tqdm~=4.67.1 + - tensorflow~=2.19.0 + - tf_keras~=2.19.0 + - torch~=2.8.0 + - torchvision~=0.23.0 + - onnx~=1.17.0 + - onnxruntime~=1.19.2 + - onnxoptimizer~=0.3.13 + - onnxmltools~=1.13.0 + - tf2onnx~=1.16.1 + - plotly~=5.23 + origin_filename: '' + code_origin: '' +verbose: false diff --git a/functions/src/onnx_utils/item.yaml b/functions/src/onnx_utils/item.yaml index 803bd2599..5f129389f 100644 --- a/functions/src/onnx_utils/item.yaml +++ b/functions/src/onnx_utils/item.yaml @@ -13,7 +13,7 @@ labels: author: Iguazio maintainers: [] marketplaceType: '' -mlrunVersion: 1.7.2 +mlrunVersion: 1.10.0 name: onnx_utils platformVersion: 3.5.0 spec: @@ -30,8 +30,8 @@ spec: - tqdm~=4.67.1 - tensorflow~=2.19.0 - tf_keras~=2.19.0 - - torch~=2.6.0 - - torchvision~=0.21.0 + - torch~=2.8.0 + - torchvision~=0.23.0 - onnx~=1.17.0 - onnxruntime~=1.19.2 - onnxoptimizer~=0.3.13 @@ -39,4 +39,4 @@ spec: - tf2onnx~=1.16.1 - plotly~=5.23 url: '' -version: 1.3.0 +version: 1.4.0 diff --git a/functions/src/onnx_utils/onnx_utils.ipynb b/functions/src/onnx_utils/onnx_utils.ipynb index 78203a45d..14c810fab 100644 --- a/functions/src/onnx_utils/onnx_utils.ipynb +++ b/functions/src/onnx_utils/onnx_utils.ipynb @@ -77,9 +77,9 @@ "source": [ "### 1.2. Demo\n", "\n", - "We will use the `TF.Keras` framework, a `MobileNetV2` as our model and we will convert it to ONNX using the `to_onnx` handler.\n", + "We will use the `PyTorch` framework, a `MobileNetV2` as our model and we will convert it to ONNX using the `to_onnx` handler.\n", "\n", - "1.2.1. First we will set a temporary artifact path for our model to be saved in and choose the models names:" + "1.2.1. First we will set the artifact path for our model to be saved in and choose the models names:" ] }, { @@ -87,16 +87,21 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2026-02-10T14:13:28.256582Z", + "start_time": "2026-02-10T14:13:28.250886Z" } }, "source": [ "import os\n", - "os.environ[\"TF_USE_LEGACY_KERAS\"] = \"true\"\n", - "from tempfile import TemporaryDirectory\n", + "import tempfile\n", + "# Use a temporary directory for model artifacts (safe cleanup):\n", + "ARTIFACT_PATH = tempfile.mkdtemp()\n", + "os.environ[\"MLRUN_ARTIFACT_PATH\"] = ARTIFACT_PATH\n", "\n", - "# Create a temporary directory for the model artifact:\n", - "ARTIFACT_PATH = TemporaryDirectory().name\n", - "os.makedirs(ARTIFACT_PATH)\n", + "# Project name:\n", + "PROJECT_NAME = \"onnx-utils\"\n", "\n", "# Choose our model's name:\n", "MODEL_NAME = \"mobilenetv2\"\n", @@ -108,7 +113,7 @@ "OPTIMIZED_ONNX_MODEL_NAME = \"optimized_onnx_mobilenetv2\"" ], "outputs": [], - "execution_count": null + "execution_count": 1 }, { "cell_type": "markdown", @@ -118,87 +123,88 @@ } }, "source": [ - "1.2.2. Download the model from `keras.applications` and log it with MLRun's `TFKerasModelHandler`:" + "1.2.2. Download the model from `torchvision.models` and log it with MLRun's `PyTorchModelHandler`:" ] }, { - "cell_type": "code", "metadata": { - "pycharm": { - "name": "#%%\n" + "ExecuteTime": { + "end_time": "2026-02-10T14:00:15.032590Z", + "start_time": "2026-02-10T14:00:15.031196Z" } }, - "source": [ - "# mlrun: start-code" - ], + "cell_type": "code", + "source": "# mlrun: start-code", "outputs": [], - "execution_count": null + "execution_count": 8 }, { + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-10T14:14:00.992001Z", + "start_time": "2026-02-10T14:13:33.115438Z" + } + }, "cell_type": "code", - "metadata": {}, "source": [ - "from tensorflow import keras\n", + "import torchvision\n", "\n", "import mlrun\n", - "import mlrun.frameworks.tf_keras as mlrun_tf_keras\n", + "from mlrun.frameworks.pytorch import PyTorchModelHandler\n", "\n", "\n", "def get_model(context: mlrun.MLClientCtx, model_name: str):\n", " # Download the MobileNetV2 model:\n", - " model = keras.applications.mobilenet_v2.MobileNetV2()\n", + " model = torchvision.models.mobilenet_v2()\n", "\n", " # Initialize a model handler for logging the model:\n", - " model_handler = mlrun_tf_keras.TFKerasModelHandler(\n", + " model_handler = PyTorchModelHandler(\n", " model_name=model_name,\n", " model=model,\n", - " context=context\n", + " model_class=\"mobilenet_v2\",\n", + " modules_map={\"torchvision.models\": \"mobilenet_v2\"},\n", + " context=context,\n", " )\n", "\n", " # Log the model:\n", " model_handler.log()" ], "outputs": [], - "execution_count": null + "execution_count": 2 }, { - "cell_type": "code", "metadata": { - "pycharm": { - "name": "#%%\n" + "ExecuteTime": { + "end_time": "2026-02-10T14:00:15.040221Z", + "start_time": "2026-02-10T14:00:15.038886Z" } }, - "source": [ - "# mlrun: end-code" - ], + "cell_type": "code", + "source": "# mlrun: end-code", "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "1.2.3. Create the function using MLRun's `code_to_function` and run it:" - ] + "execution_count": 10 }, { "cell_type": "code", "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2026-02-10T14:14:34.429194Z", + "start_time": "2026-02-10T14:14:07.906087Z" } }, "source": [ "import mlrun\n", "\n", + "# Create or get the MLRun project:\n", + "project = mlrun.get_or_create_project(PROJECT_NAME, context=\"./\")\n", "\n", "# Create the function parsing this notebook's code using 'code_to_function':\n", "get_model_function = mlrun.code_to_function(\n", " name=\"get_mobilenetv2\",\n", + " project=PROJECT_NAME,\n", " kind=\"job\",\n", " image=\"mlrun/ml-models\"\n", ")\n", @@ -206,15 +212,267 @@ "# Run the function to log the model:\n", "get_model_run = get_model_function.run(\n", " handler=\"get_model\",\n", - " artifact_path=ARTIFACT_PATH,\n", + " output_path=ARTIFACT_PATH,\n", " params={\n", " \"model_name\": MODEL_NAME\n", " },\n", " local=True\n", ")" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-10 16:14:24,932 [info] Created and saved project: {\"context\":\"./\",\"from_template\":null,\"name\":\"onnx-utils\",\"overwrite\":false,\"save\":true}\n", + "> 2026-02-10 16:14:24,933 [info] Project created successfully: {\"project_name\":\"onnx-utils\",\"stored_in_db\":true}\n", + "> 2026-02-10 16:14:31,659 [info] Storing function: {\"db\":null,\"name\":\"get-mobilenetv2-get-model\",\"uid\":\"7b9d1b54375b44e191d73685a382c910\"}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartendstatekindnamelabelsinputsparametersresultsartifact_uris
onnx-utils0Feb 10 14:14:32NaTcompletedrunget-mobilenetv2-get-model
v3io_user=omerm
kind=local
owner=omerm
host=M-KCX16N69X3
model_name=mobilenetv2
mobilenetv2_modules_map.json=store://artifacts/onnx-utils/#0@7b9d1b54375b44e191d73685a382c910
model=store://models/onnx-utils/mobilenetv2#0@7b9d1b54375b44e191d73685a382c910^e0393bc5b070fd55cc57cecb94160ce412498e0f
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-10 16:14:34,427 [info] Run execution finished: {\"name\":\"get-mobilenetv2-get-model\",\"status\":\"completed\"}\n" + ] + } + ], + "execution_count": 3 }, { "cell_type": "markdown", @@ -228,33 +486,271 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2026-02-10T14:14:53.863947Z", + "start_time": "2026-02-10T14:14:48.088349Z" } }, - "source": [ - "# Import the ONNX function from the marketplace:\n", - "onnx_utils_function = mlrun.import_function(\"hub://onnx_utils\")\n", - "\n", - "# Run the function to convert our model to ONNX:\n", - "to_onnx_run = onnx_utils_function.run(\n", - " handler=\"to_onnx\",\n", - " artifact_path=ARTIFACT_PATH,\n", - " params={\n", - " \"model_name\": MODEL_NAME,\n", - " \"model_path\": get_model_run.outputs[MODEL_NAME], # <- Take the logged model from the previous function.\n", - " \"onnx_model_name\": ONNX_MODEL_NAME,\n", - " \"optimize_model\": False # <- For optimizing it later in the demo, we mark the flag as False\n", - " },\n", - " local=True\n", - ")" + "source": "# Import the ONNX function from the marketplace:\nonnx_utils_function = mlrun.import_function(\"hub://onnx_utils\", project=PROJECT_NAME)\n\n# Construct the model path from the run directory structure:\nmodel_path = os.path.join(ARTIFACT_PATH, \"get-mobilenetv2-get-model\", \"0\", \"model\")\nmodules_map_path = os.path.join(ARTIFACT_PATH, \"get-mobilenetv2-get-model\", \"0\", \"mobilenetv2_modules_map.json.json\")\n\n# Run the function to convert our model to ONNX:\nto_onnx_run = onnx_utils_function.run(\n handler=\"to_onnx\",\n output_path=ARTIFACT_PATH,\n params={\n \"model_name\": MODEL_NAME,\n \"model_path\": model_path,\n \"load_model_kwargs\": {\n \"model_name\": MODEL_NAME,\n \"model_class\": \"mobilenet_v2\",\n \"modules_map\": modules_map_path,\n },\n \"onnx_model_name\": ONNX_MODEL_NAME,\n \"optimize_model\": False, # <- For optimizing it later in the demo, we mark the flag as False\n \"framework_kwargs\": {\"input_signature\": [((32, 3, 224, 224), \"float32\")]},\n },\n local=True\n)", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-10 16:14:48,519 [info] Storing function: {\"db\":null,\"name\":\"onnx-utils-to-onnx\",\"uid\":\"95deb2c7dbf0460291efb25c48eeebd7\"}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartendstatekindnamelabelsinputsparametersresultsartifact_uris
onnx-utils0Feb 10 14:14:49NaTcompletedrunonnx-utils-to-onnx
v3io_user=omerm
kind=local
owner=omerm
host=M-KCX16N69X3
model_name=mobilenetv2
model_path=/var/folders/rn/q8gs952n26982d36y50w_2rw0000gp/T/tmpvs5qvbxr/get-mobilenetv2-get-model/0/model
load_model_kwargs={'model_name': 'mobilenetv2', 'model_class': 'mobilenet_v2', 'modules_map': '/var/folders/rn/q8gs952n26982d36y50w_2rw0000gp/T/tmpvs5qvbxr/get-mobilenetv2-get-model/0/mobilenetv2_modules_map.json.json'}
onnx_model_name=onnx_mobilenetv2
optimize_model=False
framework_kwargs={'input_signature': [((32, 3, 224, 224), 'float32')]}
model=store://models/onnx-utils/onnx_mobilenetv2#0@95deb2c7dbf0460291efb25c48eeebd7^03e4286da44d015cf5465d43e809a504d15f7f63
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-10 16:14:53,862 [info] Run execution finished: {\"name\":\"onnx-utils-to-onnx\",\"status\":\"completed\"}\n" + ] + } ], - "outputs": [], - "execution_count": null + "execution_count": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "1.2.5. Now, listing the artifact directory we will see both our `tf.keras` model and the `onnx` model:" + "1.2.5. Now we verify the ONNX model was created:" ] }, { @@ -262,16 +758,29 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2026-02-10T14:14:56.820411Z", + "start_time": "2026-02-10T14:14:56.817892Z" } }, "source": [ "import os\n", "\n", - "\n", - "print(os.listdir(ARTIFACT_PATH))" + "onnx_model_file = os.path.join(ARTIFACT_PATH, \"onnx-utils-to-onnx\", \"0\", \"model\", \"onnx_mobilenetv2.onnx\")\n", + "assert os.path.isfile(onnx_model_file), f\"ONNX model not found at {onnx_model_file}\"\n", + "print(f\"ONNX model created at: {onnx_model_file}\")" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ONNX model created at: /var/folders/rn/q8gs952n26982d36y50w_2rw0000gp/T/tmpvs5qvbxr/onnx-utils-to-onnx/0/model/onnx_mobilenetv2.onnx\n" + ] + } + ], + "execution_count": 5 }, { "cell_type": "markdown", @@ -308,28 +817,281 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2026-02-10T14:15:03.415997Z", + "start_time": "2026-02-10T14:15:00.637332Z" } }, - "source": [ - "onnx_utils_function.run(\n", - " handler=\"optimize\",\n", - " artifact_path=ARTIFACT_PATH,\n", - " params={\n", - " \"model_name\": ONNX_MODEL_NAME,\n", - " \"model_path\": to_onnx_run.output(ONNX_MODEL_NAME), # <- Take the logged model from the previous function.\n", - " \"optimized_model_name\": OPTIMIZED_ONNX_MODEL_NAME,\n", - " },\n", - " local=True\n", - ")" + "source": "# Construct the ONNX model path from the run directory structure:\nonnx_model_path = os.path.join(ARTIFACT_PATH, \"onnx-utils-to-onnx\", \"0\", \"model\")\n\nonnx_utils_function.run(\n handler=\"optimize\",\n output_path=ARTIFACT_PATH,\n params={\n \"model_path\": onnx_model_path,\n \"handler_init_kwargs\": {\"model_name\": ONNX_MODEL_NAME},\n \"optimized_model_name\": OPTIMIZED_ONNX_MODEL_NAME,\n },\n local=True\n)", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-10 16:15:00,639 [info] Storing function: {\"db\":null,\"name\":\"onnx-utils-optimize\",\"uid\":\"0c30d7af94814dcabde8152a1951fb5d\"}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartendstatekindnamelabelsinputsparametersresultsartifact_uris
onnx-utils0Feb 10 14:15:01NaTcompletedrunonnx-utils-optimize
v3io_user=omerm
kind=local
owner=omerm
host=M-KCX16N69X3
model_path=/var/folders/rn/q8gs952n26982d36y50w_2rw0000gp/T/tmpvs5qvbxr/onnx-utils-to-onnx/0/model
handler_init_kwargs={'model_name': 'onnx_mobilenetv2'}
optimized_model_name=optimized_onnx_mobilenetv2
model=store://models/onnx-utils/optimized_onnx_mobilenetv2#0@0c30d7af94814dcabde8152a1951fb5d^599547984e83a664dc1c2708607d06731edb5ac2
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-10 16:15:03,414 [info] Run execution finished: {\"name\":\"onnx-utils-optimize\",\"status\":\"completed\"}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } ], - "outputs": [], - "execution_count": null + "execution_count": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "2.2.2. And now our model was optimized and can be seen under the `ARTIFACT_PATH`:" + "2.2.2. And now our model was optimized. Let us verify:" ] }, { @@ -337,13 +1099,27 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2026-02-10T14:15:05.748413Z", + "start_time": "2026-02-10T14:15:05.745309Z" } }, "source": [ - "print(os.listdir(ARTIFACT_PATH))" + "optimized_model_file = os.path.join(ARTIFACT_PATH, \"onnx-utils-optimize\", \"0\", \"model\", \"optimized_onnx_mobilenetv2.onnx\")\n", + "assert os.path.isfile(optimized_model_file), f\"Optimized ONNX model not found at {optimized_model_file}\"\n", + "print(f\"Optimized ONNX model created at: {optimized_model_file}\")" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimized ONNX model created at: /var/folders/rn/q8gs952n26982d36y50w_2rw0000gp/T/tmpvs5qvbxr/onnx-utils-optimize/0/model/optimized_onnx_mobilenetv2.onnx\n" + ] + } + ], + "execution_count": 7 }, { "cell_type": "markdown", @@ -353,7 +1129,7 @@ } }, "source": [ - "Lastly, run this code to clean up the models:" + "Lastly, run this code to clean up all generated files and directories:" ] }, { @@ -361,23 +1137,22 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2026-02-10T14:00:28.409998Z", + "start_time": "2026-02-10T13:57:21.679146Z" } }, - "source": [ - "import shutil\n", - "\n", - "\n", - "shutil.rmtree(ARTIFACT_PATH)" - ], + "source": "import shutil\n\n# Clean up the temporary artifact directory:\nif os.path.exists(ARTIFACT_PATH):\n shutil.rmtree(ARTIFACT_PATH)", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "mlrun_functions", "language": "python", - "name": "python3" + "name": "mlrun_functions" }, "language_info": { "codemirror_mode": { diff --git a/functions/src/onnx_utils/requirements.txt b/functions/src/onnx_utils/requirements.txt index d3d7dfd68..912b3d7e5 100644 --- a/functions/src/onnx_utils/requirements.txt +++ b/functions/src/onnx_utils/requirements.txt @@ -1,11 +1,10 @@ tqdm~=4.67.1 tensorflow~=2.19.0 tf_keras~=2.19.0 -torch~=2.6.0 -torchvision~=0.21.0 +torch~=2.8 +torchvision~=0.23.0 onnx~=1.17.0 onnxruntime~=1.19.2 onnxoptimizer~=0.3.13 onnxmltools~=1.13.0 -tf2onnx~=1.16.1 -plotly~=5.23 +plotly~=5.23 \ No newline at end of file diff --git a/functions/src/onnx_utils/test_onnx_utils.py b/functions/src/onnx_utils/test_onnx_utils.py index 2e01782f5..59c6c2b38 100644 --- a/functions/src/onnx_utils/test_onnx_utils.py +++ b/functions/src/onnx_utils/test_onnx_utils.py @@ -17,6 +17,9 @@ import tempfile import mlrun +import pytest + +PROJECT_NAME = "onnx-utils" # Choose our model's name: MODEL_NAME = "model" @@ -27,41 +30,67 @@ # Choose our optimized ONNX version model's name: OPTIMIZED_ONNX_MODEL_NAME = f"optimized_{ONNX_MODEL_NAME}" +REQUIRED_ENV_VARS = [ + "MLRUN_DBPATH", + "MLRUN_ARTIFACT_PATH", + "V3IO_USERNAME", + "V3IO_ACCESS_KEY", +] -def _setup_environment() -> str: - """ - Setup the test environment, creating the artifacts path of the test. - :returns: The temporary directory created for the test artifacts path. +def _validate_environment_variables() -> bool: """ - artifact_path = tempfile.TemporaryDirectory().name - os.makedirs(artifact_path) - return artifact_path + Checks that all required Environment variables are set. + """ + environment_keys = os.environ.keys() + return all(key in environment_keys for key in REQUIRED_ENV_VARS) -def _cleanup_environment(artifact_path: str): +def _is_tf2onnx_available() -> bool: """ - Cleanup the test environment, deleting files and artifacts created during the test. - - :param artifact_path: The artifact path to delete. + Check if tf2onnx is installed (required for TensorFlow/Keras ONNX conversion). """ - # Clean the local directory: + try: + import tf2onnx + return True + except ImportError: + return False + + +@pytest.fixture(scope="session") +def onnx_project(): + """Create/get the MLRun project once per test session.""" + return mlrun.get_or_create_project(PROJECT_NAME, context="./") + + +@pytest.fixture(autouse=True) +def test_environment(onnx_project): + """Setup and cleanup test artifacts for each test.""" + artifact_path = tempfile.mkdtemp() + yield artifact_path + # Cleanup - only remove files/dirs from the directory containing this test file, + # never from an arbitrary CWD (which could be the project root). + test_dir = os.path.dirname(os.path.abspath(__file__)) for test_output in [ - *os.listdir(artifact_path), "schedules", "runs", "artifacts", "functions", + "model.pt", + "model.zip", + "model_modules_map.json", + "model_modules_map.json.json", + "onnx_model.onnx", + "optimized_onnx_model.onnx", ]: - test_output_path = os.path.abspath(f"./{test_output}") + test_output_path = os.path.join(test_dir, test_output) if os.path.exists(test_output_path): if os.path.isdir(test_output_path): shutil.rmtree(test_output_path) else: os.remove(test_output_path) - - # Clean the artifacts directory: - shutil.rmtree(artifact_path) + if os.path.exists(artifact_path): + shutil.rmtree(artifact_path) def _log_tf_keras_model(context: mlrun.MLClientCtx, model_name: str): @@ -114,42 +143,55 @@ def _log_pytorch_model(context: mlrun.MLClientCtx, model_name: str): model_handler.log() -def test_to_onnx_help(): +@pytest.mark.skipif( + condition=not _validate_environment_variables(), + reason="Project's environment variables are not set", +) +def test_to_onnx_help(test_environment): """ Test the 'to_onnx' handler, passing "help" in the 'framework_kwargs'. """ - # Setup the tests environment: - artifact_path = _setup_environment() + artifact_path = test_environment # Create the function: log_model_function = mlrun.code_to_function( filename="test_onnx_utils.py", name="log_model", + project=PROJECT_NAME, kind="job", image="mlrun/ml-models", ) # Run the function to log the model: - log_model_run = log_model_function.run( - handler="_log_tf_keras_model", - artifact_path=artifact_path, + log_model_function.run( + handler="_log_pytorch_model", + output_path=artifact_path, params={"model_name": MODEL_NAME}, local=True, ) + # Get artifact paths - construct from artifact_path and run structure + run_artifact_dir = os.path.join(artifact_path, "log-model--log-pytorch-model", "0") + model_path = os.path.join(run_artifact_dir, "model") + modules_map_path = os.path.join(run_artifact_dir, "model_modules_map.json.json") + # Import the ONNX Utils function: - onnx_function = mlrun.import_function("function.yaml") + onnx_function = mlrun.import_function("function.yaml", project=PROJECT_NAME) # Run the function, passing "help" in 'framework_kwargs' and see that no exception was raised: is_test_passed = True try: onnx_function.run( handler="to_onnx", - artifact_path=artifact_path, + output_path=artifact_path, params={ # Take the logged model from the previous function. - "model_path": log_model_run.status.artifacts[0]["spec"]["target_path"], - "load_model_kwargs": {"model_name": MODEL_NAME}, + "model_path": model_path, + "load_model_kwargs": { + "model_name": MODEL_NAME, + "model_class": "mobilenet_v2", + "modules_map": modules_map_path, + }, "framework_kwargs": "help", }, local=True, @@ -160,23 +202,28 @@ def test_to_onnx_help(): ) is_test_passed = False - # Cleanup the tests environment: - _cleanup_environment(artifact_path=artifact_path) - assert is_test_passed -def test_tf_keras_to_onnx(): +@pytest.mark.skipif( + condition=not _validate_environment_variables(), + reason="Project's environment variables are not set", +) +@pytest.mark.skipif( + condition=not _is_tf2onnx_available(), + reason="tf2onnx is not installed", +) +def test_tf_keras_to_onnx(test_environment): """ Test the 'to_onnx' handler, giving it a tf.keras model. """ - # Setup the tests environment: - artifact_path = _setup_environment() + artifact_path = test_environment # Create the function: log_model_function = mlrun.code_to_function( filename="test_onnx_utils.py", name="log_model", + project=PROJECT_NAME, kind="job", image="mlrun/ml-models", ) @@ -184,18 +231,18 @@ def test_tf_keras_to_onnx(): # Run the function to log the model: log_model_run = log_model_function.run( handler="_log_tf_keras_model", - artifact_path=artifact_path, + output_path=artifact_path, params={"model_name": MODEL_NAME}, local=True, ) # Import the ONNX Utils function: - onnx_function = mlrun.import_function("function.yaml") + onnx_function = mlrun.import_function("function.yaml", project=PROJECT_NAME) # Run the function to convert our model to ONNX: onnx_function_run = onnx_function.run( handler="to_onnx", - artifact_path=artifact_path, + output_path=artifact_path, params={ # Take the logged model from the previous function. "model_path": log_model_run.status.artifacts[0]["spec"]["target_path"], @@ -205,9 +252,6 @@ def test_tf_keras_to_onnx(): local=True, ) - # Cleanup the tests environment: - _cleanup_environment(artifact_path=artifact_path) - # Print the outputs list: print(f"Produced outputs: {onnx_function_run.outputs}") @@ -215,17 +259,21 @@ def test_tf_keras_to_onnx(): assert "model" in onnx_function_run.outputs -def test_pytorch_to_onnx(): +@pytest.mark.skipif( + condition=not _validate_environment_variables(), + reason="Project's environment variables are not set", +) +def test_pytorch_to_onnx(test_environment): """ Test the 'to_onnx' handler, giving it a pytorch model. """ - # Setup the tests environment: - artifact_path = _setup_environment() + artifact_path = test_environment # Create the function: log_model_function = mlrun.code_to_function( filename="test_onnx_utils.py", name="log_model", + project=PROJECT_NAME, kind="job", image="mlrun/ml-models", ) @@ -233,25 +281,30 @@ def test_pytorch_to_onnx(): # Run the function to log the model: log_model_run = log_model_function.run( handler="_log_pytorch_model", - artifact_path=artifact_path, + output_path=artifact_path, params={"model_name": MODEL_NAME}, local=True, ) # Import the ONNX Utils function: - onnx_function = mlrun.import_function("function.yaml") + onnx_function = mlrun.import_function("function.yaml", project=PROJECT_NAME) + + # Get artifact paths - construct from artifact_path and run structure + run_artifact_dir = os.path.join(artifact_path, "log-model--log-pytorch-model", "0") + model_path = os.path.join(run_artifact_dir, "model") + modules_map_path = os.path.join(run_artifact_dir, "model_modules_map.json.json") # Run the function to convert our model to ONNX: onnx_function_run = onnx_function.run( handler="to_onnx", - artifact_path=artifact_path, + output_path=artifact_path, params={ # Take the logged model from the previous function. - "model_path": log_model_run.status.artifacts[1]["spec"]["target_path"], + "model_path": model_path, "load_model_kwargs": { "model_name": MODEL_NAME, "model_class": "mobilenet_v2", - "modules_map": log_model_run.status.artifacts[0]["spec"]["target_path"], + "modules_map": modules_map_path, }, "onnx_model_name": ONNX_MODEL_NAME, "framework_kwargs": {"input_signature": [((32, 3, 224, 224), "float32")]}, @@ -259,9 +312,6 @@ def test_pytorch_to_onnx(): local=True, ) - # Cleanup the tests environment: - _cleanup_environment(artifact_path=artifact_path) - # Print the outputs list: print(f"Produced outputs: {onnx_function_run.outputs}") @@ -269,22 +319,25 @@ def test_pytorch_to_onnx(): assert "model" in onnx_function_run.outputs -def test_optimize_help(): +@pytest.mark.skipif( + condition=not _validate_environment_variables(), + reason="Project's environment variables are not set", +) +def test_optimize_help(test_environment): """ Test the 'optimize' handler, passing "help" in the 'optimizations'. """ - # Setup the tests environment: - artifact_path = _setup_environment() + artifact_path = test_environment # Import the ONNX Utils function: - onnx_function = mlrun.import_function("function.yaml") + onnx_function = mlrun.import_function("function.yaml", project=PROJECT_NAME) # Run the function, passing "help" in 'optimizations' and see that no exception was raised: is_test_passed = True try: onnx_function.run( handler="optimize", - artifact_path=artifact_path, + output_path=artifact_path, params={ "model_path": "", "optimizations": "help", @@ -297,69 +350,81 @@ def test_optimize_help(): ) is_test_passed = False - # Cleanup the tests environment: - _cleanup_environment(artifact_path=artifact_path) - assert is_test_passed -def test_optimize(): +@pytest.mark.skipif( + condition=not _validate_environment_variables(), + reason="Project's environment variables are not set", +) +def test_optimize(test_environment): """ - Test the 'optimize' handler, giving it a model from the ONNX zoo git repository. + Test the 'optimize' handler, giving it a pytorch model converted to ONNX. """ - # Setup the tests environment: - artifact_path = _setup_environment() + artifact_path = test_environment # Create the function: log_model_function = mlrun.code_to_function( filename="test_onnx_utils.py", name="log_model", + project=PROJECT_NAME, kind="job", image="mlrun/ml-models", ) # Run the function to log the model: - log_model_run = log_model_function.run( - handler="_log_tf_keras_model", - artifact_path=artifact_path, + log_model_function.run( + handler="_log_pytorch_model", + output_path=artifact_path, params={"model_name": MODEL_NAME}, local=True, ) + # Get artifact paths - construct from artifact_path and run structure + run_artifact_dir = os.path.join(artifact_path, "log-model--log-pytorch-model", "0") + model_path = os.path.join(run_artifact_dir, "model") + modules_map_path = os.path.join(run_artifact_dir, "model_modules_map.json.json") + # Import the ONNX Utils function: - onnx_function = mlrun.import_function("function.yaml") + onnx_function = mlrun.import_function("function.yaml", project=PROJECT_NAME) # Run the function to convert our model to ONNX: - to_onnx_function_run = onnx_function.run( + onnx_function.run( handler="to_onnx", - artifact_path=artifact_path, + output_path=artifact_path, params={ # Take the logged model from the previous function. - "model_path": log_model_run.status.artifacts[0]["spec"]["target_path"], - "load_model_kwargs": {"model_name": MODEL_NAME}, + "model_path": model_path, + "load_model_kwargs": { + "model_name": MODEL_NAME, + "model_class": "mobilenet_v2", + "modules_map": modules_map_path, + }, "onnx_model_name": ONNX_MODEL_NAME, + "framework_kwargs": {"input_signature": [((32, 3, 224, 224), "float32")]}, }, local=True, ) + # Get the ONNX model path from the to_onnx run output + onnx_run_artifact_dir = os.path.join( + artifact_path, "onnx-utils-to-onnx", "0" + ) + onnx_model_path = os.path.join(onnx_run_artifact_dir, "model") + # Run the function to optimize our model: optimize_function_run = onnx_function.run( handler="optimize", - artifact_path=artifact_path, + output_path=artifact_path, params={ # Take the logged model from the previous function. - "model_path": to_onnx_function_run.status.artifacts[0]["spec"][ - "target_path" - ], + "model_path": onnx_model_path, "handler_init_kwargs": {"model_name": ONNX_MODEL_NAME}, "optimized_model_name": OPTIMIZED_ONNX_MODEL_NAME, }, local=True, ) - # Cleanup the tests environment: - _cleanup_environment(artifact_path=artifact_path) - # Print the outputs list: print(f"Produced outputs: {optimize_function_run.outputs}") diff --git a/modules/src/langchain_mlrun/item.yaml b/modules/src/langchain_mlrun/item.yaml new file mode 100644 index 000000000..532cb4bd3 --- /dev/null +++ b/modules/src/langchain_mlrun/item.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +categories: +- langchain +- langgraph +- tracing +- monitoring +- llm +description: LangChain x MLRun integration - Orchestrate your LangChain code with MLRun. +example: langchain_mlrun.ipynb +generationDate: 2026-01-08:12-25 +hidden: false +labels: + author: Iguazio +mlrunVersion: 1.10.0 +name: langchain_mlrun +spec: + filename: langchain_mlrun.py + image: mlrun/mlrun + kind: generic + requirements: + - langchain~=1.2 + - pydantic-settings~=2.12 + - kafka-python~=2.3 +version: 0.0.1 \ No newline at end of file diff --git a/modules/src/langchain_mlrun/langchain_mlrun.ipynb b/modules/src/langchain_mlrun/langchain_mlrun.ipynb new file mode 100644 index 000000000..0e5a341e7 --- /dev/null +++ b/modules/src/langchain_mlrun/langchain_mlrun.ipynb @@ -0,0 +1,1046 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7955da79-02cc-42fe-aee0-5456d3e386fd", + "metadata": {}, + "source": [ + "# LangChain ✕ MLRun Integration\n", + "\n", + "`langchain_mlrun` is a hub module that implements LangChain integration with MLRun. Using the module allows MLRun to orchestrate LangChain and LangGraph code, enabling tracing and monitoring batch workflows and realtime deployments.\n", + "___" + ] + }, + { + "cell_type": "markdown", + "id": "8392a3e1-d0a1-409a-ae68-fcc36858d30a", + "metadata": {}, + "source": [ + "## Main Components\n", + "\n", + "This is a short brief of the components available to import from the `langchain_mlrun` module. For full docs, see the documentation page.\n", + "\n", + "### Settings\n", + "\n", + "The module uses Pydantic settings classes that can be configured programmatically or via environment variables. The main class is `MLRunTracerSettings`. It contains two sub-settings:\n", + "* `MLRunTracerClientSettings` - Connection settings (stream path, container, endpoint info). Env prefix: `\"LC_MLRUN_TRACER_CLIENT_\"`\n", + "* `MLRunTracerMonitorSettings` - Controls what/how runs are captured (filters, labels, debug mode). Env prefix: `\"LC_MLRUN_TRACER_MONITOR_\"`\n", + "\n", + "For more information about each setting, see the class docstrings.\n", + "\n", + "#### Example - via code configuration\n", + "\n", + "```python\n", + "from langchain_mlrun import MLRunTracerSettings, MLRunTracerClientSettings, MLRunTracerMonitorSettings\n", + "\n", + "settings = MLRunTracerSettings(\n", + " client=MLRunTracerClientSettings(\n", + " stream_path=\"my-project/model-endpoints/stream-v1\",\n", + " container=\"projects\",\n", + " model_endpoint_name=\"my_endpoint\",\n", + " model_endpoint_uid=\"abc123\",\n", + " serving_function=\"my_function\",\n", + " ),\n", + " monitor=MLRunTracerMonitorSettings(\n", + " label=\"production\",\n", + " root_run_only=True, # Only monitor root runs, not child runs\n", + " tags_filter=[\"important\"], # Only monitor runs with this tag\n", + " ),\n", + ")\n", + "```\n", + "\n", + "#### Example - environment variable configuration\n", + "\n", + "```bash\n", + "export LC_MLRUN_TRACER_CLIENT_STREAM_PATH=\"my-project/model-endpoints/stream-v1\"\n", + "export LC_MLRUN_TRACER_CLIENT_CONTAINER=\"projects\"\n", + "export LC_MLRUN_TRACER_MONITOR_LABEL=\"production\"\n", + "export LC_MLRUN_TRACER_MONITOR_ROOT_RUN_ONLY=\"true\"\n", + "```\n", + "\n", + "### MLRun Tracer\n", + "\n", + "`MLRunTracer` is a LangChain-compatible tracer that converts LangChain `Run` objects into MLRun monitoring events and publishes them to a V3IO stream. \n", + "\n", + "Key points:\n", + "* **No inheritance required** - use it directly without subclassing.\n", + "* **Fully customizable via settings** - control filtering, summarization, and output format.\n", + "* **Custom summarizer support** - pass your own `run_summarizer_function` via settings to customize how runs are converted to events.\n", + "\n", + "### Monitoring Setup Utility Function\n", + "\n", + "`setup_langchain_monitoring()` is a utility function that creates the necessary MLRun infrastructure for LangChain monitoring. This is a **temporary workaround** until custom endpoint creation support is added to MLRun.\n", + "\n", + "The function returns a dictionary of environment variables to configure auto-tracing. See how to use it in the tutorial section below.\n", + "\n", + "### LangChain Monitoring Application\n", + "\n", + "`LangChainMonitoringApp` is a base class (inheriting from MLRun's `ModelMonitoringApplicationBase`) for building monitoring applications that process events from the MLRun Tracer.\n", + "\n", + "It offers several built-in helper methods and metrics for analyzing LangChain runs:\n", + "\n", + "* Helper methods:\n", + " * `get_structured_runs()` - Parse raw monitoring samples into structured run dictionaries with filtering options\n", + " * `iterate_structured_runs()` - Iterate over all runs including nested child runs\n", + "* Metric methods:\n", + " * `calculate_average_latency()` - Average latency across root runs\n", + " * `calculate_success_rate()` - Percentage of runs without errors\n", + " * `count_token_usage()` - Total input/output tokens from LLM runs\n", + " * `count_run_names()` - Count occurrences of each run name\n", + "\n", + "The base app can be used as-is, but it is recommended to extend it with your own custom monitoring logic.\n", + "___" + ] + }, + { + "cell_type": "markdown", + "id": "7e24e1a5-d80a-4b7e-9b94-57b24e8b39d7", + "metadata": {}, + "source": [ + "## How to Apply MLRun?\n", + "\n", + "### Auto Tracing\n", + "\n", + "Auto tracing automatically instruments all LangChain code by setting the `LC_MLRUN_MONITORING_ENABLED` environment variable and importing the module:\n", + "\n", + "```python\n", + "import os\n", + "os.environ[\"LC_MLRUN_MONITORING_ENABLED\"] = \"1\"\n", + "# Set other LC_MLRUN_TRACER_* environment variables as needed...\n", + "\n", + "# Import the module BEFORE any LangChain code\n", + "langchain_mlrun = mlrun.import_module(\"hub://langchain_mlrun\")\n", + "\n", + "# All LangChain/LangGraph code below will be automatically traced\n", + "chain.invoke(...)\n", + "```\n", + "\n", + "### Manual Tracing\n", + "\n", + "For more control, use the `mlrun_monitoring()` context manager to trace specific code blocks:\n", + "\n", + "```python\n", + "langchain_mlrun = mlrun.import_module(\"hub://langchain_mlrun\")\n", + "mlrun_monitoring = langchain_mlrun.mlrun_monitoring\n", + "MLRunTracerSettings = langchain_mlrun.MLRunTracerSettings\n", + "\n", + "# Optional: customize settings\n", + "settings = MLRunTracerSettings(...)\n", + "\n", + "with mlrun_monitoring(settings=settings) as tracer:\n", + " # Only LangChain code within this block will be traced\n", + " result = chain.invoke({\"topic\": \"MLRun\"})\n", + "```\n", + "___" + ] + }, + { + "cell_type": "markdown", + "id": "68b52d3d-a431-44fb-acd6-ea33fec37a49", + "metadata": {}, + "source": [ + "## Tutorial\n", + "\n", + "In this tutorial we'll show how to orchestrate LangChain based code with MLRun using the `langchain_mlrun` hub module.\n", + "\n", + "### Prerequisites\n", + "\n", + "Install MLRun and the `langchain_mlrun` requirements." + ] + }, + { + "cell_type": "code", + "id": "caf72aa6-06e8-4a04-bfc4-409b39d255fe", + "metadata": {}, + "source": "!pip install mlrun langchain~=1.2 pydantic-settings~=2.12 kafka-python~=2.3", + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "vprq8wj4iqh", + "source": [ + "### Local Development Setup (Optional)\n", + "\n", + "> Skip this section if you're running inside a Jupyter instance deployed in the MLRun cluster.\n", + "\n", + "If you're running this notebook from your local machine, follow these steps:\n", + "\n", + "#### Step 1: Set Environment Variables\n", + "\n", + "Run the cell below to set up all required environment variables for local development." + ], + "metadata": {} + }, + { + "cell_type": "code", + "id": "9lc788zu3zi", + "source": [ + "import os\n", + "\n", + "# MLRun API endpoint:\n", + "# os.environ[\"MLRUN_DBPATH\"] = \"http://localhost:30070\"\n", + "\n", + "# Kafka Configuration:\n", + "# os.environ[\"KAFKA_BROKER\"] = \"\"\n", + "\n", + "# TDEngine Configuration:\n", + "# os.environ[\"TDENGINE_HOST\"] = \"\"\n", + "# os.environ[\"TDENGINE_PORT\"] = \"\"\n", + "# os.environ[\"TDENGINE_USER\"] = \"\"\n", + "# os.environ[\"TDENGINE_PASSWORD\"] = \"\"\n", + "\n", + "# MinIO/S3 Configuration:\n", + "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"\"\n", + "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"\"\n", + "# os.environ[\"AWS_ENDPOINT_URL_S3\"] = \"\"" + ], + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### Step 2: Set Up Port Forwarding\n", + "\n", + "Set up port-forwarding to access cluster services. Run these commands in separate terminal windows:\n", + "\n", + "```bash\n", + "# MLRun API\n", + "kubectl port-forward -n mlrun svc/mlrun-api 30070:8080\n", + "```\n", + "\n", + "```bash\n", + "# MinIO (S3-compatible storage)\n", + "kubectl port-forward -n mlrun svc/minio 9000:9000\n", + "```\n", + "\n", + "```bash\n", + "# Kafka (for CE mode) - requires /etc/hosts entry: 127.0.0.1 kafka-stream\n", + "kubectl port-forward -n mlrun svc/kafka-stream 9092:9092\n", + "```\n", + "\n", + "```bash\n", + "# TDEngine (for CE mode) - requires /etc/hosts entry: 127.0.0.1 tdengine-tsdb\n", + "kubectl port-forward -n mlrun svc/tdengine-tsdb 6041:6041\n", + "```" + ], + "id": "6d1d2d3c016ec62c" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Create Project\n", + "\n", + "We'll first create an MLRun project" + ], + "id": "4442f7ad1b0a8ee" + }, + { + "cell_type": "code", + "id": "2664df3e-d9c6-40dd-a215-29d60e4b4208", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T19:43:18.142870Z", + "start_time": "2026-02-03T19:43:10.068758Z" + } + }, + "source": [ + "import time\n", + "import datetime\n", + "import mlrun\n", + "\n", + "print(f\"MLRun version: {mlrun.__version__}\")\n", + "print(f\"CE Mode: {mlrun.mlconf.is_ce_mode()}\")\n", + "\n", + "project = mlrun.get_or_create_project(\"langchain-mlrun-tutorial\")\n", + "print(f\"Project: {project.name}\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MLRun version: 1.10.0\n", + "CE Mode: True\n", + "> 2026-02-03 21:43:18,053 [info] Loading project from path: {\"path\":\"./\",\"project_name\":\"langchain-mlrun-tutorial\",\"user_project\":false}\n", + "> 2026-02-03 21:43:18,141 [info] Project loaded successfully: {\"path\":\"./\",\"project_name\":\"langchain-mlrun-tutorial\",\"stored_in_db\":true}\n", + "Project: langchain-mlrun-tutorial\n" + ] + } + ], + "execution_count": 3 + }, + { + "cell_type": "markdown", + "id": "33f28986-c158-47fd-97a6-74f69892b4eb", + "metadata": {}, + "source": "### Enable Monitoring\n\nTo use MLRun's monitoring feature in our project we first need to set up the monitoring infrastructure.\n\n- **MLRun CE**: Uses Kafka for streaming (automatically detected)\n- **MLRun Enterprise**: Uses V3IO for streaming (automatically detected)\n\nThe cell below automatically detects your MLRun mode and sets up the appropriate streaming infrastructure." + }, + { + "cell_type": "code", + "id": "d9d2fa66-0498-445d-ab4a-8370f46aec1e", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T19:49:22.700332Z", + "start_time": "2026-02-03T19:43:18.148037Z" + } + }, + "source": [ + "# Create datastore profiles (based on CE or Enterprise):\n", + "if mlrun.mlconf.is_ce_mode():\n", + " print(\"Setting up Kafka streaming for MLRun CE...\")\n", + " from mlrun.datastore.datastore_profile import DatastoreProfileKafkaStream, DatastoreProfileTDEngine\n", + " \n", + " stream_profile = DatastoreProfileKafkaStream(\n", + " name=\"kafka-stream-profile\",\n", + " brokers=os.environ[\"KAFKA_BROKER\"],\n", + " topics=[],\n", + " )\n", + " tsdb_profile = DatastoreProfileTDEngine(\n", + " name=\"tsdb-profile\",\n", + " user=os.environ[\"TDENGINE_USER\"],\n", + " password=os.environ[\"TDENGINE_PASSWORD\"],\n", + " host=os.environ[\"TDENGINE_HOST\"],\n", + " port=int(os.environ[\"TDENGINE_PORT\"]),\n", + " )\n", + " project.register_datastore_profile(stream_profile)\n", + " project.register_datastore_profile(tsdb_profile)\n", + "else: # Enterprise\n", + " print(\"Setting up V3IO streaming for MLRun Enterprise...\")\n", + " from mlrun.datastore import DatastoreProfileV3io\n", + " \n", + " stream_profile = DatastoreProfileV3io(name=\"v3io-ds\", v3io_access_key=os.environ[\"V3IO_ACCESS_KEY\"])\n", + " tsdb_profile = stream_profile\n", + " project.register_datastore_profile(stream_profile)\n", + "\n", + "# Enable monitoring in our project:\n", + "project.set_model_monitoring_credentials(\n", + " stream_profile_name=stream_profile.name,\n", + " tsdb_profile_name=tsdb_profile.name,\n", + ")\n", + "project.enable_model_monitoring(\n", + " base_period=1,\n", + " wait_for_deployment=True,\n", + ")\n", + "\n", + "print(\"Monitoring enabled successfully!\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "f23117fa-7b67-470c-80ca-976d14c2120e", + "metadata": {}, + "source": [ + "### Import `langchain_mlrun`\n", + "\n", + "Now we'll import `langchain_mlrun` from the hub." + ] + }, + { + "cell_type": "code", + "id": "2360cd49-b260-4140-bd16-138349e000b3", + "metadata": {}, + "source": [ + "# Import the module from the hub:\n", + "langchain_mlrun = mlrun.import_module(\"hub://langchain_mlrun\")\n", + "\n", + "# Import the utility function and monitoring application from the module:\n", + "setup_langchain_monitoring = langchain_mlrun.setup_langchain_monitoring\n", + "LangChainMonitoringApp = langchain_mlrun.LangChainMonitoringApp" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "de030131-ebaf-48f8-96ed-3c1013b5e260", + "metadata": {}, + "source": [ + "### Create Monitorable Endpoint\n", + "\n", + "Endpoints are the entities being monitored by MLRun. We'll use the `setup_langchain_monitoring()` utility function to create the model monitoring endpoint.\n", + "\n", + "For MLRun CE mode, you must pass the `kafka_stream_profile_name` parameter with the name of the registered Kafka stream profile.\n", + "\n", + "By default, the endpoint name will be `\"langchain_mlrun_endpoint\"` but you can change it by using the `model_endpoint_name` parameter." + ] + }, + { + "cell_type": "code", + "id": "0e9baf78-3d38-46bd-89dd-6f83760eaeb0", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T19:49:23.861085Z", + "start_time": "2026-02-03T19:49:23.412235Z" + } + }, + "source": [ + "# Pass kafka_stream_profile_name for CE mode (required)\n", + "env_vars = setup_langchain_monitoring(\n", + " kafka_stream_profile_name=stream_profile.name if mlrun.mlconf.is_ce_mode() else None\n", + ")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating LangChain model endpoint\n", + "\n", + " [✓] Loading Project......................... Done (0.00s)\u001B[K\n", + " [✓] Creating Model.......................... Done (0.31s) \u001B[K\n", + " [✓] Creating Function....................... Done (0.04s) \u001B[K\n", + " [✓] Creating Model Endpoint................. Done (0.09s) \u001B[K\n", + "\n", + "✨ Done! LangChain monitoring model endpoint created successfully.\n", + "You can now set the following environment variables to enable MLRun tracing in your LangChain code:\n", + "\n", + "{\n", + " \"MLRUN_MONITORING_ENABLED\": \"1\",\n", + " \"MLRUN_TRACER_CLIENT_PROJECT\": \"langchain-mlrun-tutorial\",\n", + " \"MLRUN_TRACER_CLIENT_MODEL_ENDPOINT_NAME\": \"langchain_mlrun_endpoint\",\n", + " \"MLRUN_TRACER_CLIENT_MODEL_ENDPOINT_UID\": \"d1d2b2686772441cacf687b45cd48ffa\",\n", + " \"MLRUN_TRACER_CLIENT_SERVING_FUNCTION\": \"langchain_mlrun_function\",\n", + " \"MLRUN_TRACER_CLIENT_KAFKA_STREAM_PROFILE_NAME\": \"kafka-stream-profile\"\n", + "}\n", + "\n", + "To customize the monitoring behavior, you can also set additional environment variables prefixed with 'MLRUN_TRACER_MONITOR_'. Refer to the MLRun tracer documentation for more details.\n", + "\n" + ] + } + ], + "execution_count": 6 + }, + { + "cell_type": "markdown", + "id": "dd45c94b-ee05-449c-9336-0aa659e66bda", + "metadata": {}, + "source": [ + "### Setup Environment Variables for Auto Tracing\n", + "\n", + "We'll use the environment variables returned from `setup_langchain_monitoring` to setup the environment for auto-tracing. Read the printed outputs for more information." + ] + }, + { + "cell_type": "code", + "id": "1c1988f8-c80a-4bf2-bfb1-d43523fc161f", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T19:49:23.866556Z", + "start_time": "2026-02-03T19:49:23.864805Z" + } + }, + "source": [ + "os.environ.update(env_vars)" + ], + "outputs": [], + "execution_count": 7 + }, + { + "cell_type": "markdown", + "id": "d3f3b8e5-3538-4153-95da-e6d8776be3ac", + "metadata": {}, + "source": "### Run `langchain` or `langgraph` Code\n\nHere we have 3 functions, each using different method utilizing LLMs with `langchain` and `langgraph`:\n* `run_simple_chain` - Using `langchain`'s chains.\n* `run_simple_agent` - Using `langchain`'s `create_agent` function and `tool`s.\n* `run_langgraph_graph` - Using pure `langgraph`.\n\n> **Notice**: You don't need to set OpenAI API credentials, there is a mock `ChatModel` that will replace it if the credentials are not set in the environment. If you wish to use OpenAI models, make sure you `pip install langchain_openai` and set the `OPENAI_API_KEY` environment variable before continue to the next cell.\n\nBecause the auto-tracing environment is set, any run will be automatically traced and monitored!\n\nFeel free to adjust the code as you like.\n\n> **Remember**: To enable auto-tracing you do need to set the environment variables and import the `langchain_mlrun` module before any LangChain code. For batch jobs and realtime functions, make sure you set env vars in the MLRun function and add the import line `langchain_mlrun = mlrun.import_module(\"hub://langchain_mlrun\")` at the top of your code." + }, + { + "cell_type": "code", + "id": "94b4d4b0-8d10-4ad3-8f16-7b1b7daeac11", + "metadata": { + "tags": [], + "ExecuteTime": { + "end_time": "2026-02-03T19:49:24.899991Z", + "start_time": "2026-02-03T19:49:23.869475Z" + } + }, + "source": [ + "import os\n", + "from typing import Literal, TypedDict, Annotated, Sequence, Any, Callable\n", + "from operator import add\n", + "\n", + "from langchain_core.language_models import LanguageModelInput\n", + "from langchain_core.runnables import Runnable, RunnableLambda\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.language_models.fake_chat_models import FakeListChatModel, GenericFakeChatModel\n", + "from langchain.agents import create_agent\n", + "from langchain_core.messages import AIMessage, HumanMessage\n", + "from langchain_core.tools import tool, BaseTool\n", + "\n", + "from langgraph.graph import StateGraph, START, END\n", + "from langchain_core.messages import BaseMessage\n", + "\n", + "\n", + "def _check_openai_credentials() -> bool:\n", + " \"\"\"\n", + " Check if OpenAI API key is set in environment variables.\n", + "\n", + " :return: True if OPENAI_API_KEY is set, False otherwise.\n", + " \"\"\"\n", + " return \"OPENAI_API_KEY\" in os.environ\n", + "\n", + "\n", + "# Import ChatOpenAI only if OpenAI credentials are available (meaning `langchain-openai` must be installed).\n", + "if _check_openai_credentials():\n", + " from langchain_openai import ChatOpenAI\n", + "\n", + " \n", + "class _ToolEnabledFakeModel(GenericFakeChatModel):\n", + " \"\"\"\n", + " A fake chat model that supports tool binding for running agent tracing tests.\n", + " \"\"\"\n", + "\n", + " def bind_tools(\n", + " self,\n", + " tools: Sequence[\n", + " dict[str, Any] | type | Callable | BaseTool # noqa: UP006\n", + " ],\n", + " *,\n", + " tool_choice: str | None = None,\n", + " **kwargs: Any,\n", + " ) -> Runnable[LanguageModelInput, AIMessage]:\n", + " return self\n", + "\n", + "\n", + "#: Tag value for testing tag filtering.\n", + "_dummy_tag = \"dummy_tag\"\n", + "\n", + "\n", + "def run_simple_chain() -> str:\n", + " \"\"\"\n", + " Run a simple LangChain chain that gets a fact about a topic.\n", + " \"\"\"\n", + " # Build a simple chain: prompt -> llm -> str output parser\n", + " llm = ChatOpenAI(\n", + " model=\"gpt-4o-mini\",\n", + " tags=[_dummy_tag]\n", + " ) if _check_openai_credentials() else (\n", + " FakeListChatModel(\n", + " responses=[\n", + " \"MLRun is an open-source orchestrator for machine learning pipelines.\"\n", + " ],\n", + " tags=[_dummy_tag]\n", + " )\n", + " )\n", + " prompt = ChatPromptTemplate.from_template(\"Tell me a short fact about {topic}\")\n", + " chain = prompt | llm | StrOutputParser()\n", + "\n", + " # Run the chain:\n", + " response = chain.invoke({\"topic\": \"MLRun\"})\n", + " return response\n", + "\n", + "\n", + "def run_simple_agent():\n", + " \"\"\"\n", + " Run a simple LangChain agent that uses two tools to get weather and stock price.\n", + " \"\"\"\n", + " # Define the tools:\n", + " @tool\n", + " def get_weather(city: str) -> str:\n", + " \"\"\"Get the current weather for a specific city.\"\"\"\n", + " return f\"The weather in {city} is 22°C and sunny.\"\n", + "\n", + " @tool\n", + " def get_stock_price(symbol: str) -> str:\n", + " \"\"\"Get the current stock price for a symbol.\"\"\"\n", + " return f\"The stock price for {symbol} is $150.25.\"\n", + "\n", + " # Define the model:\n", + " model = ChatOpenAI(\n", + " model=\"gpt-4o-mini\",\n", + " tags=[_dummy_tag]\n", + " ) if _check_openai_credentials() else (\n", + " _ToolEnabledFakeModel(\n", + " messages=iter(\n", + " [\n", + " AIMessage(\n", + " content=\"\",\n", + " tool_calls=[\n", + " {\"name\": \"get_weather\", \"args\": {\"city\": \"London\"}, \"id\": \"call_abc123\"},\n", + " {\"name\": \"get_stock_price\", \"args\": {\"symbol\": \"AAPL\"}, \"id\": \"call_def456\"}\n", + " ]\n", + " ),\n", + " AIMessage(content=\"The weather in London is 22°C and AAPL is trading at $150.25.\")\n", + " ]\n", + " ),\n", + " tags=[_dummy_tag]\n", + " )\n", + " )\n", + "\n", + " # Create the agent:\n", + " agent = create_agent(\n", + " model=model,\n", + " tools=[get_weather, get_stock_price],\n", + " system_prompt=\"You are a helpful assistant with access to tools.\"\n", + " )\n", + "\n", + " # Run the agent:\n", + " return agent.invoke({\"messages\": [\"What is the weather in London and the stock price of AAPL?\"]})\n", + "\n", + "\n", + "def run_langgraph_graph():\n", + " \"\"\"\n", + " Run a LangGraph agent that uses reflection to correct its answer.\n", + " \"\"\"\n", + " # Define the graph state:\n", + " class AgentState(TypedDict):\n", + " messages: Annotated[list[BaseMessage], add]\n", + " attempts: int\n", + "\n", + " # Define the model:\n", + " model = ChatOpenAI(model=\"gpt-4o-mini\") if _check_openai_credentials() else (\n", + " _ToolEnabledFakeModel(\n", + " messages=iter(\n", + " [\n", + " AIMessage(content=\"There are 2 'r's in Strawberry.\"), # Mocking the failure\n", + " AIMessage(content=\"I stand corrected. S-t-r-a-w-b-e-r-r-y. There are 3 'r's.\"), # Mocking the fix\n", + " ]\n", + " )\n", + " )\n", + " )\n", + "\n", + " # Define the graph nodes and router:\n", + " def call_model(state: AgentState):\n", + " response = model.invoke(state[\"messages\"])\n", + " return {\"messages\": [response], \"attempts\": state[\"attempts\"] + 1}\n", + "\n", + " def reflect_node(state: AgentState):\n", + " prompt = \"Wait, count the 'r's again slowly, letter by letter. Are you sure?\"\n", + " return {\"messages\": [HumanMessage(content=prompt)]}\n", + "\n", + " def router(state: AgentState) -> Literal[\"reflect\", END]:\n", + " # Make sure there are 2 attempts at least for an answer:\n", + " if state[\"attempts\"] == 1:\n", + " return \"reflect\"\n", + " return END\n", + "\n", + " # Build the graph:\n", + " builder = StateGraph(AgentState)\n", + " builder.add_node(\"model\", call_model)\n", + " tagged_reflect_node = RunnableLambda(reflect_node).with_config(tags=[_dummy_tag])\n", + " builder.add_node(\"reflect\", tagged_reflect_node)\n", + " builder.add_edge(START, \"model\")\n", + " builder.add_conditional_edges(\"model\", router)\n", + " builder.add_edge(\"reflect\", \"model\")\n", + " graph = builder.compile()\n", + "\n", + " # Run the graph:\n", + " return graph.invoke({\"messages\": [HumanMessage(content=\"How many 'r's in Strawberry?\")], \"attempts\": 0})" + ], + "outputs": [], + "execution_count": 8 + }, + { + "cell_type": "markdown", + "id": "49964f96-89ba-4f61-8788-38290a877aa2", + "metadata": {}, + "source": "Let's create some traffic, we'll run whatever function you want in a loop to get some events. We take timestamps in order to use them later to run the monitoring application on the data we'll send." + }, + { + "cell_type": "code", + "id": "b7e6418d-76f4-4b18-9ef9-c5bb40b20545", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T22:05:54.601563Z", + "start_time": "2026-02-03T22:05:52.518385Z" + } + }, + "source": [ + "# Run LangChain code and now it should be tracked and monitored in MLRun:\n", + "start_timestamp = datetime.datetime.now() - datetime.timedelta(minutes=1)\n", + "for i in range(20):\n", + " run_simple_agent()\n", + "end_timestamp = datetime.datetime.now() + datetime.timedelta(minutes=5)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-04 00:05:52,553 [info] Project loaded successfully: {\"project_name\":\"langchain-mlrun-tutorial\"}\n" + ] + } + ], + "execution_count": 13 + }, + { + "cell_type": "markdown", + "id": "d9085765-91fd-4d31-84b4-927ecf9cc455", + "metadata": {}, + "source": "> **Note**: Please wait a minute or two until the events are processed." + }, + { + "cell_type": "code", + "id": "85fae3e4-5f1b-4f0c-ba71-81060f10804f", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T19:50:26.655189Z", + "start_time": "2026-02-03T19:49:26.648461Z" + } + }, + "source": [ + "time.sleep(60)" + ], + "outputs": [], + "execution_count": 10 + }, + { + "cell_type": "markdown", + "id": "2475ebec-fc32-4884-9723-3ca9cfde577f", + "metadata": {}, + "source": [ + "### Test the LangChain Monitoring Application\n", + "\n", + "To test a monitoring application, we use the `evaluate` class method. We'll run an evaluation on the data we just sent. It is a small local job and should run fast.\n", + "\n", + "Keep an eye for the returned metrics from the monitoring application." + ] + }, + { + "cell_type": "code", + "id": "3d046755-9153-497a-a024-5d63316e1f91", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T19:50:28.003195Z", + "start_time": "2026-02-03T19:50:26.670024Z" + } + }, + "source": [ + "LangChainMonitoringApp.evaluate(\n", + " func_name=\"langchain-monitoring-app-test\",\n", + " func_path=\"langchain_mlrun.py\",\n", + " run_local=True,\n", + " endpoints=[env_vars[\"LC_MLRUN_TRACER_CLIENT_MODEL_ENDPOINT_NAME\"]],\n", + " start=start_timestamp.isoformat(),\n", + " end=end_timestamp.isoformat(),\n", + ")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-03 21:50:26,671 [info] Changing function name - adding `\"-batch\"` suffix: {\"func_name\":\"langchain-monitoring-app-test-batch\"}\n", + "> 2026-02-03 21:50:26,815 [warning] It is recommended to use k8s secret (specify secret_name), specifying aws_access_key/aws_secret_key directly is unsafe.\n", + "> 2026-02-03 21:50:26,829 [info] Storing function: {\"db\":\"http://localhost:30070\",\"name\":\"langchain-monitoring-app-test-batch--handler\",\"uid\":\"f2c3c94681094915beb2c5c1ccc0dac8\"}\n", + "> 2026-02-03 21:50:27,953 [warning] No data was found for any of the specified endpoints. No results were produced: {\"application_name\":\"langchain-monitoring-app-test-batch\",\"end\":\"2026-02-03T21:54:26.640556\",\"endpoints\":[\"langchain_mlrun_endpoint\"],\"start\":\"2026-02-03T21:48:24.904667\"}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartendstatekindnamelabelsinputsparametersresults
langchain-mlrun-tutorial
...c0dac8
0Feb 03 19:50:26NaTcompletedrunlangchain-monitoring-app-test-batch--handler
kind=local
owner=Tomer_Weitzman
host=M-QXN63PHMF9
endpoints=['langchain_mlrun_endpoint']
start=2026-02-03T21:48:24.904667
end=2026-02-03T21:54:26.640556
base_period=None
write_output=False
existing_data_handling=fail_on_overlap
stream_profile=None
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + " > to track results use the .show() or .logs() methods " + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2026-02-03 21:50:28,001 [info] Run execution finished: {\"name\":\"langchain-monitoring-app-test-batch--handler\",\"status\":\"completed\"}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11 + }, + { + "cell_type": "markdown", + "id": "eda724c3-27f3-4d28-a7ba-1e59b9be2a37", + "metadata": {}, + "source": "### Deploy the Monitoring Application\n\nAll that's left to do now is to deploy our monitoring application!" + }, + { + "cell_type": "code", + "id": "652b00d4-070d-4849-9784-4d461cb83eae", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-03T19:52:29.502406Z", + "start_time": "2026-02-03T19:50:28.009318Z" + } + }, + "source": "# Deploy the monitoring app:\nLangChainMonitoringApp.deploy(\n func_name=\"langchain-monitoring-app\",\n func_path=\"langchain_mlrun.py\",\n image=\"mlrun/mlrun\",\n requirements=[\n \"langchain\",\n \"pydantic-settings\",\n ],\n)", + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "c23bef7a-cbdb-4b22-a2d9-2edbfde5eb04", + "metadata": {}, + "source": [ + "Once it is deployed, you can run events again and see the monitoring application in MLRun UI in action:\n", + "\n", + "![mlrun ui example](./notebook_images/mlrun_ui.png)" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "fc994d2114a89a25" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/src/langchain_mlrun/langchain_mlrun.py b/modules/src/langchain_mlrun/langchain_mlrun.py new file mode 100644 index 000000000..920354bfb --- /dev/null +++ b/modules/src/langchain_mlrun/langchain_mlrun.py @@ -0,0 +1,1840 @@ +# Copyright 2026 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +MLRun to LangChain integration - a tracer that converts LangChain Run objects into serializable event and send them to +MLRun monitoring. +""" + +from abc import ABC, abstractmethod +import copy +import importlib +import orjson +import os +import socket +from uuid import UUID +import threading +from contextlib import contextmanager +from contextvars import ContextVar +import datetime +from typing import Any, Callable, Generator, Optional + +from langchain_core.tracers import BaseTracer, Run +from langchain_core.tracers.context import register_configure_hook + +from pydantic import Field, field_validator, model_validator +from pydantic_settings import BaseSettings, SettingsConfigDict +from uuid_utils import uuid7 + +import mlrun +from mlrun.runtimes import RemoteRuntime +from mlrun.model_monitoring.applications import ( + ModelMonitoringApplicationBase, ModelMonitoringApplicationMetric, + ModelMonitoringApplicationResult, MonitoringApplicationContext, +) +import mlrun.common.schemas.model_monitoring.constants as mm_constants + +#: Environment variable name to use MLRun monitoring tracer via LangChain global tracing system: +mlrun_monitoring_env_var = "LC_MLRUN_MONITORING_ENABLED" + + +class _MLRunEndPointClient(ABC): + """ + An MLRun model endpoint monitoring client base class to connect and send events on a monitoring stream. + """ + + def __init__( + self, + model_endpoint_name: str, + model_endpoint_uid: str, + serving_function: str | RemoteRuntime, + serving_function_tag: str | None = None, + project: str | mlrun.projects.MlrunProject = None, + ): + """ + Initialize an MLRun model endpoint monitoring client. + + :param model_endpoint_name: The monitoring endpoint related model name. + :param model_endpoint_uid: Model endpoint unique identifier. + :param serving_function: Serving function name or ``RemoteRuntime`` object. + :param serving_function_tag: Optional function tag (defaults to 'latest'). + :param project: Project name or ``MlrunProject``. If ``None``, uses the current project. + raise: MLRunInvalidArgumentError: If there is no current active project and no `project` argument was provided. + """ + # Store the provided info: + self._model_endpoint_name = model_endpoint_name + self._model_endpoint_uid = model_endpoint_uid + + # Load project: + if project is None: + try: + self._project_name = mlrun.get_current_project(silent=False).name + except mlrun.errors.MLRunInvalidArgumentError: + raise mlrun.errors.MLRunInvalidArgumentError( + "There is no current active project. Either use `mlrun.get_or_create_project` prior to " + "initializing the monitoring tracer or pass a project name to load. You can also set the " + "environment variable: 'LC_MLRUN_TRACER_CLIENT_PROJECT'." + ) + elif isinstance(project, str): + self._project_name = project + else: + self._project_name = project.name + + # Load function: + if isinstance(serving_function, str): + self._serving_function_name = serving_function + self._serving_function_tag = serving_function_tag or "latest" + else: + self._serving_function_name = serving_function.metadata.name + self._serving_function_tag = ( + serving_function_tag or serving_function.metadata.tag + ) + + # Prepare the sample: + self._event_sample = { + "class": "CustomStream", + "worker": "0", + "model": self._model_endpoint_name, + "host": socket.gethostname(), + "function_uri": f"{self._project_name}/{self._serving_function_name}:{self._serving_function_tag}", + "endpoint_id": self._model_endpoint_uid, + "sampling_percentage": 100, + "request": {"inputs": [], "background_task_state": "succeeded"}, + "op": "infer", + "resp": { + "id": None, + "model_name": self._model_endpoint_name, + "outputs": [], + "timestamp": None, + "model_endpoint_uid": self._model_endpoint_uid, + }, + "when": None, + "microsec": 496, + "effective_sample_count": 1, + } + + @abstractmethod + def monitor( + self, + event_id: str, + label: str, + input_data: dict, + output_data: dict, + request_timestamp: str, + response_timestamp: str, + ): + """ + Monitor the provided event, sending it to the model endpoint monitoring stream. + + :param event_id: Unique event identifier used as the monitored record id. + :param label: Label for the run/event. + :param input_data: Serialized input data for the run. + :param output_data: Serialized output data for the run. + :param request_timestamp: Request/start timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + :param response_timestamp: Response/end timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + """ + pass + + def flush(self): + """ + Flush any buffered messages to ensure they are sent to the stream. + + For streaming backends that buffer messages (like Kafka), this ensures delivery. For backends that send + immediately (like V3IO), this may be a no-op. + """ + pass + + def _create_event( + self, + event_id: str, + label: str, + input_data: dict, + output_data: dict, + request_timestamp: str, + response_timestamp: str, + ) -> dict: + """ + Create a new event out of the stored event sample. + + :param event_id: Unique event identifier used as the monitored record id. + :param label: Label for the run/event. + :param input_data: Serialized input data for the run. + :param output_data: Serialized output data for the run. + :param request_timestamp: Request/start timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + :param response_timestamp: Response/end timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + + :returns: The event to send to the monitoring stream. + """ + # Copy the sample: + event = copy.deepcopy(self._event_sample) + + # Edit event with given parameters: + event["when"] = request_timestamp + event["request"]["inputs"].append(orjson.dumps({"label": label, "input": input_data}).decode('utf-8')) + event["resp"]["timestamp"] = response_timestamp + event["resp"]["outputs"].append(orjson.dumps(output_data).decode('utf-8')) + event["resp"]["id"] = event_id + + return event + + +class _V3IOMLRunEndPointClient(_MLRunEndPointClient): + """ + An MLRun model endpoint monitoring client to connect and send events on a V3IO stream. + """ + + def __init__( + self, + monitoring_stream_path: str, + monitoring_container: str, + model_endpoint_name: str, + model_endpoint_uid: str, + serving_function: str | RemoteRuntime, + serving_function_tag: str | None = None, + project: str | mlrun.projects.MlrunProject = None, + ): + """ + Initialize an MLRun model endpoint monitoring client. + + :param monitoring_stream_path: V3IO stream path. + :param monitoring_container: V3IO container name. + :param model_endpoint_name: The monitoring endpoint related model name. + :param model_endpoint_uid: Model endpoint unique identifier. + :param serving_function: Serving function name or ``RemoteRuntime`` object. + :param serving_function_tag: Optional function tag (defaults to 'latest'). + :param project: Project name or ``MlrunProject``. If ``None``, uses the current project. + raise: MLRunInvalidArgumentError: If there is no current active project and no `project` argument was provided. + """ + super().__init__( + model_endpoint_name=model_endpoint_name, + model_endpoint_uid=model_endpoint_uid, + serving_function=serving_function, + serving_function_tag=serving_function_tag, + project=project, + ) + + import v3io + + # Store the provided info: + self._monitoring_stream_path = monitoring_stream_path + self._monitoring_container = monitoring_container + + # Initialize a V3IO client: + self._v3io_client = v3io.Client() + + def monitor( + self, + event_id: str, + label: str, + input_data: dict, + output_data: dict, + request_timestamp: str, + response_timestamp: str, + ): + """ + Monitor the provided event, sending it to the model endpoint monitoring stream. + + :param event_id: Unique event identifier used as the monitored record id. + :param label: Label for the run/event. + :param input_data: Serialized input data for the run. + :param output_data: Serialized output data for the run. + :param request_timestamp: Request/start timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + :param response_timestamp: Response/end timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + """ + # Copy the sample: + event = self._create_event( + event_id=event_id, + label=label, + input_data=input_data, + output_data=output_data, + request_timestamp=request_timestamp, + response_timestamp=response_timestamp, + ) + + # Push to stream: + self._v3io_client.stream.put_records( + container=self._monitoring_container, + stream_path=self._monitoring_stream_path, + records=[{"data": orjson.dumps(event).decode('utf-8')}], + ) + + +class _KafkaMLRunEndPointClient(_MLRunEndPointClient): + """ + An MLRun model endpoint monitoring client to connect and send events on a Kafka stream. + """ + + def __init__( + self, + kafka_stream_profile_name: str, + model_endpoint_name: str, + model_endpoint_uid: str, + serving_function: str | RemoteRuntime, + serving_function_tag: str | None = None, + project: str | mlrun.projects.MlrunProject = None, + kafka_linger_ms: int = 0, + ): + """ + Initialize an MLRun model endpoint monitoring client for Kafka. + + :param kafka_stream_profile_name: The name of the registered DatastoreProfileKafkaStream to use for Kafka + configuration. This profile should be registered via ``project.register_datastore_profile()`` and + contains all Kafka settings including broker, topic, SASL credentials, SSL config, etc. + :param model_endpoint_name: The monitoring endpoint related model name. + :param model_endpoint_uid: Model endpoint unique identifier. + :param serving_function: Serving function name or ``RemoteRuntime`` object. + :param serving_function_tag: Optional function tag (defaults to 'latest'). + :param project: Project name or ``MlrunProject``. If ``None``, uses the current project. + :param kafka_linger_ms: Kafka producer linger.ms setting controlling message batching. Messages are + accumulated for up to this duration before being sent as a batch. Default: 500ms. + raise: MLRunInvalidArgumentError: If there is no current active project and no `project` argument was provided. + """ + super().__init__( + model_endpoint_name=model_endpoint_name, + model_endpoint_uid=model_endpoint_uid, + serving_function=serving_function, + serving_function_tag=serving_function_tag, + project=project, + ) + + from kafka import KafkaProducer + from mlrun.datastore.utils import KafkaParameters + from mlrun.common.model_monitoring.helpers import get_kafka_topic + + # Get project object using resolved project name from parent: + project_obj = mlrun.get_or_create_project(self._project_name) + + # Fetch the Kafka stream profile: + stream_profile = project_obj.get_datastore_profile(profile=kafka_stream_profile_name) + + # Get profile attributes and convert to producer config: + profile_attrs = stream_profile.attributes() + kafka_params = KafkaParameters(kwargs=profile_attrs) + producer_config = kafka_params.producer() + + # Extract broker and determine topic (use profile's topic if available, otherwise use MLRun's standard naming): + self._monitoring_broker = profile_attrs.get("brokers") + topics = profile_attrs.get("topics", []) + self._monitoring_topic = topics[0] if topics else get_kafka_topic(project=project_obj.name) + + # Remove bootstrap_servers from producer_config to avoid duplicate argument error: + producer_config.pop("bootstrap_servers", None) + + # Initialize a Kafka producer with full config from profile: + self._kafka_producer = KafkaProducer( + bootstrap_servers=self._monitoring_broker, + key_serializer=lambda k: k.encode("utf-8") if isinstance(k, str) else k, + value_serializer=( + lambda v: v if isinstance(v, bytes) + else orjson.dumps(v) if isinstance(v, dict) + else str(v).encode("utf-8") + ), + linger_ms=kafka_linger_ms, + **producer_config, + ) + + def monitor( + self, + event_id: str, + label: str, + input_data: dict, + output_data: dict, + request_timestamp: str, + response_timestamp: str, + ): + """ + Monitor the provided event, sending it to the model endpoint monitoring stream. + + :param event_id: Unique event identifier used as the monitored record id. + :param label: Label for the run/event. + :param input_data: Serialized input data for the run. + :param output_data: Serialized output data for the run. + :param request_timestamp: Request/start timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + :param response_timestamp: Response/end timestamp in the format of '%Y-%m-%d %H:%M:%S%z'. + """ + # Copy the sample: + event = self._create_event( + event_id=event_id, + label=label, + input_data=input_data, + output_data=output_data, + request_timestamp=request_timestamp, + response_timestamp=response_timestamp, + ) + + # Push to stream (async - message is buffered): + self._kafka_producer.send( + topic=self._monitoring_topic, + value=event, # Will be serialized by the value_serializer + key=self._model_endpoint_uid, + ) + + def flush(self): + """ + Flush all buffered messages to ensure they are sent to Kafka. + + Blocks until all buffered messages are delivered and acknowledged by the broker. + """ + self._kafka_producer.flush() + + +class MLRunTracerClientSettings(BaseSettings): + """ + MLRun tracer monitoring client configurations. These are mandatory arguments for allowing MLRun to send monitoring + events to a specific model endpoint stream. + """ + + v3io_stream_path: str | None = None + """ + The V3IO stream path to send the events to. + """ + + v3io_container: str | None = None + """ + The V3IO stream container. + """ + + kafka_stream_profile_name: str | None = None + """ + The name of the registered DatastoreProfileKafkaStream to use for Kafka configuration. This profile should be + registered via ``project.register_datastore_profile()`` and contains all Kafka settings including broker, topic, + SASL credentials, SSL config, etc. + """ + + kafka_linger_ms: int = 500 + """ + The Kafka producer linger.ms setting controlling message batching (in milliseconds). Messages are accumulated for + up to this duration before being sent as a batch, reducing network overhead. + + The tracer always flushes at the end of each root run, guaranteeing delivery regardless of this setting. + Default: 500ms. Set to 0 to disable batching (each message sent immediately). + """ + + model_endpoint_name: str = ... + """ + The model endpoint name. + """ + + model_endpoint_uid: str = ... + """ + The model endpoint UID. + """ + + serving_function: str = ... + """ + The serving function name. + """ + + serving_function_tag: str | None = None + """ + The serving function tag. If not set, it will be 'latest' by default. + """ + + project: str | None = None + """ + The MLRun project name related to the serving function and model endpoint. + """ + + #: Pydantic model configuration to set the environment variable prefix. + model_config = SettingsConfigDict(env_prefix="LC_MLRUN_TRACER_CLIENT_") + + @model_validator(mode='after') + def validate_stream_settings(self) -> 'MLRunTracerClientSettings': + """ + Validate that either V3IO settings or stream profile name is provided, but not both or none. + + :returns: The validated settings instance. + """ + v3io_settings = all([self.v3io_container, self.v3io_stream_path]) + kafka_settings = self.kafka_stream_profile_name is not None + + if v3io_settings and kafka_settings: + raise ValueError("Provide either V3IO settings OR Kafka settings, not both.") + if not v3io_settings and not kafka_settings: + raise ValueError("You must provide either a complete V3IO settings or complete Kafka settings. See docs for more information") + return self + +class MLRunTracerMonitorSettings(BaseSettings): + """ + MLRun tracer monitoring configurations. These are optional arguments to customize the LangChain runs summarization + into monitorable MLRun endpoint events. If needed, a custom summarization can be passed. + """ + + label: str = "default" + """ + Label to use for all monitored runs. Can be used to differentiate between different monitored sources on the same + endpoint. + """ + + tags_filter: list[str] | None = None + """ + Filter runs by tags. Only runs with at least one tag in this list will be monitored. + If None, no tag-based filtering is applied and runs with any tags are considered. + Default: None. + """ + + run_types_filter: list[str] | None = None + """ + Filter runs by run types (e.g. "chain", "llm", "chat", "tool"). + Only runs whose `run_type` appears in this list will be monitored. + If None, no run-type filtering is applied. + Default: None. + """ + + names_filter: list[str] | None = None + """ + Filter runs by class/name. Only runs whose `name` appears in this list will be monitored. + If None, no name-based filtering is applied. + Default: None. + """ + + include_full_run: bool = False + """ + If True, include the complete serialized run dict (the output of `run._get_dicts_safe()`) + in the event outputs under the key `full_run`. Useful for debugging or when consumers need + the raw run payload. Default: False. + """ + + include_errors: bool = True + """ + If True, include run error information in the outputs under the `error` key. + If False, runs that contain an error may be skipped by the summarizer filters. + Default: True. + """ + + include_metadata: bool = True + """ + If True, include run metadata (environment, tool metadata, etc.) in the inputs under + the `metadata` key. Default: True. + """ + + include_latency: bool = True + """ + If True, include latency information in the outputs under the `latency` key. + Default: True. + """ + + root_run_only: bool = False + """ + If True, only the root/top-level run will be monitored and any child runs will be + ignored/removed from monitoring. Use when only the top-level run should produce events. + Default: False. + """ + + split_runs: bool = False + """ + If True, child runs are emitted as separate monitoring events (each run summarized and + sent individually). If False, child runs are nested inside the parent/root run event under + `child_runs`. Default: False. + """ + + run_summarizer_function: ( + str + | Callable[ + [Run, Optional[BaseSettings]], + Generator[tuple[dict, dict] | None, None, None], + ] + | None + ) = None + """ + A function to summarize a `Run` object into a tuple of inputs and outputs. Can be passed directly or via a full + module path ("a.b.c.my_summarizer" will be imported as `from a.b.c import my_summarizer`). + + A summarizer is a function that will be used to process a run into monitoring events. The function is expected to be + of type: + `Callable[[Run, Optional[BaseSettings]], Generator[tuple[dict, dict] | None, None, None]]`, meaning + get a run object and optionally a settings object and return a generator yielding tuples of serialized dictionaries, + the (inputs, outputs) to send to MLRun monitoring as events or `None` to skip monitoring this run. + """ + + run_summarizer_settings: str | BaseSettings | None = None + """ + Settings to pass to the run summarizer function. Can be passed directly or via a full module path to be imported + and initialized. If the summarizer function does not require settings, this can be left as None. + """ + + debug: bool = False + """ + If True, disable sending events to MLRun and instead route events to `debug_target_list` + or print them as JSON to stdout. Useful for unit tests and local debugging. Default: False. + """ + + debug_target_list: list[dict] | bool = False + """ + Optional list to which debug events will be appended when `debug` is True. + If set, each generated event dict will be appended to this list. If not set and `debug` is True, + events will be printed to stdout as JSON. Default: False. + """ + + #: Pydantic model configuration to set the environment variable prefix. + model_config = SettingsConfigDict(env_prefix="LC_MLRUN_TRACER_MONITOR_") + + @field_validator('debug_target_list', mode='before') + @classmethod + def convert_bool_to_list(cls, v): + """ + Convert a boolean `True` value to an empty list for `debug_target_list`. + + :param v: The value to validate. + + :returns: An empty list if `v` is True, otherwise the original value. + """ + if v is True: + return [] + return v + + +class MLRunTracerSettings(BaseSettings): + """ + MLRun tracer settings to configure the tracer. The settings are split into two groups: + + * `client`: settings required to connect and send events to the MLRun monitoring stream. + * `monitor`: settings controlling which LangChain runs are summarized and sent and how. + """ + + client: MLRunTracerClientSettings = Field(default_factory=MLRunTracerClientSettings) + """ + Client configuration group (``MLRunTracerClientSettings``). + + Contains the mandatory connection and endpoint information required to publish monitoring + events. Values may be supplied programmatically or via environment variables prefixed with + `LC_MLRUN_TRACER_CLIENT_`. See more at ``MLRunTracerClientSettings``. + """ + + monitor: MLRunTracerMonitorSettings = Field(default_factory=MLRunTracerMonitorSettings) + """ + Monitoring configuration group (``MLRunTracerMonitorSettings``). + + Controls what runs are captured, how they are summarized (including custom summarizer import + options), whether child runs are split or nested, and debug behavior. Values may be supplied + programmatically or via environment variables prefixed with `LC_MLRUN_TRACER_MONITOR_`. + See more at ``MLRunTracerMonitorSettings``. + """ + + #: Pydantic model configuration to set the environment variable prefix. + model_config = SettingsConfigDict(env_prefix="LC_MLRUN_TRACER_") + + +class MLRunTracer(BaseTracer): + """ + MLRun tracer for LangChain runs allowing monitoring LangChain and LangGraph in production using MLRun's monitoring. + + There are two usage modes for the MLRun tracer following LangChain tracing best practices: + + 1. **Manual Mode** - Using the ``mlrun_monitoring`` context manager:: + + from mlrun_tracer import mlrun_monitoring + + with mlrun_monitoring(...) as tracer: + # LangChain code here. + pass + + 2. **Auto Mode** - Setting the `LC_MLRUN_MONITORING_ENABLED="1"` environment variable:: + + import mlrun_integration.tracer + + # All LangChain code will be automatically traced and monitored. + pass + + To control how runs are being summarized into the events being monitored, the ``MLRunTracerSettings`` can be set. + As it is a Pydantic ``BaseSettings`` class, it can be done in two ways: + + 1. Initializing the settings classes and passing them to the context manager:: + + from mlrun_tracer import ( + mlrun_monitoring, + MLRunTracerSettings, + MLRunTracerClientSettings, + MLRunTracerMonitorSettings, + ) + + my_settings = MLRunTracerSettings( + client=MLRunTracerClientSettings(), + monitor=MLRunTracerMonitorSettings(root_run_only=True), + ) + + with mlrun_monitoring(settings=my_settings) as tracer: + # LangChain code here. + pass + + 2. Or via environment variables following the prefix 'LC_MLRUN_TRACER_CLIENT_' for client settings and + 'LC_MLRUN_TRACER_MONITOR_' for monitoring settings. + """ + + #: A singleton tracer for when using the tracer via environment variable to activate global tracing. + _singleton_tracer: "MLRunTracer | None" = None + #: A thread lock for initializing the tracer singleton safely. + _lock = threading.Lock() + #: A boolean flag to know whether the singleton was initialized. + _initialized = False + + def __new__(cls, *args, **kwargs) -> "MLRunTracer": + """ + Create or return an ``MLRunTracer`` instance. + + When ``LC_MLRUN_MONITORING_ENABLED`` is not set to ``"1"``, a normal instance is returned. + When the env var is ``"1"``, a process-wide singleton is returned. Creation is thread-safe. + + :returns: MLRunTracer instance (singleton if 'auto' mode is active). + """ + # Check if needed to use a singleton as the user is using the MLRun tracer by setting the environment variable + # and not manually (via context manager): + if not cls._check_for_env_var_usage(): + return super(MLRunTracer, cls).__new__(cls) + + # Check if the singleton is set: + if cls._singleton_tracer is None: + # Acquire lock to initialize the singleton: + with cls._lock: + # Double-check after acquiring lock: + if cls._singleton_tracer is None: + cls._singleton_tracer = super(MLRunTracer, cls).__new__(cls) + + return cls._singleton_tracer + + def __init__(self, settings: MLRunTracerSettings = None, **kwargs): + """ + Initialize the tracer. + + :param settings: Settings to use for the tracer. If not passed, defaults are used and environment variables are + applied per Pydantic settings behavior. + :param kwargs: Passed to the base initializer. + """ + # Proceed with initialization only if singleton mode is not required or the singleton was not initialized: + if self._check_for_env_var_usage() and self._initialized: + return + + # Call the base tracer init: + super().__init__(**kwargs) + + # Set a UID for this instance: + self._uid = uuid7() + + # Set the settings: + self._settings = settings or MLRunTracerSettings() + self._client_settings = self._settings.client + self._monitor_settings = self._settings.monitor + + # Initialize the MLRun endpoint client: + self._mlrun_client = ( + self._get_mlrun_client() + if not self._monitor_settings.debug + else None + ) + + # In case the user passed a custom summarizer, import it: + self._custom_run_summarizer_function: ( + Callable[ + [Run, Optional[BaseSettings]], + Generator[tuple[dict, dict] | None, None, None], + ] + | None + ) = None + self._custom_run_summarizer_settings: BaseSettings | None = None + self._import_custom_run_summarizer() + + # Mark the initialization flag (for the singleton case): + self._initialized = True + + @property + def settings(self) -> MLRunTracerSettings: + """ + Access the effective settings. + + :returns: The settings used by this tracer. + """ + return self._settings + + def _get_mlrun_client(self) -> _MLRunEndPointClient: + """ + Create and return an MLRun model endpoint monitoring client based on the MLRun (CE or not) and current + configuration. + + :returns: An MLRun model endpoint monitoring client. + """ + if mlrun.mlconf.is_ce_mode(): + return _KafkaMLRunEndPointClient( + kafka_stream_profile_name=self._client_settings.kafka_stream_profile_name, + model_endpoint_name=self._client_settings.model_endpoint_name, + model_endpoint_uid=self._client_settings.model_endpoint_uid, + serving_function=self._client_settings.serving_function, + serving_function_tag=self._client_settings.serving_function_tag, + project=self._client_settings.project, + kafka_linger_ms=self._client_settings.kafka_linger_ms, + ) + return _V3IOMLRunEndPointClient( + monitoring_stream_path=self._client_settings.v3io_stream_path, + monitoring_container=self._client_settings.v3io_container, + model_endpoint_name=self._client_settings.model_endpoint_name, + model_endpoint_uid=self._client_settings.model_endpoint_uid, + serving_function=self._client_settings.serving_function, + serving_function_tag=self._client_settings.serving_function_tag, + project=self._client_settings.project, + ) + + def _import_custom_run_summarizer(self): + """ + Import or assign a custom run summarizer (and its custom settings) if configured. + """ + # If the user did not pass a run summarizer function, return: + if not self._monitor_settings.run_summarizer_function: + return + + # Check if the function needs to be imported: + if isinstance(self._monitor_settings.run_summarizer_function, str): + self._custom_run_summarizer_function = self._import_from_module_path( + module_path=self._monitor_settings.run_summarizer_function + ) + else: + self._custom_run_summarizer_function = ( + self._monitor_settings.run_summarizer_function + ) + + # Check if the user passed settings as well: + if self._monitor_settings.run_summarizer_settings: + # Check if the settings need to be imported: + if isinstance(self._monitor_settings.run_summarizer_settings, str): + self._custom_run_summarizer_settings = self._import_from_module_path( + module_path=self._monitor_settings.run_summarizer_settings + )() + else: + self._custom_run_summarizer_settings = ( + self._monitor_settings.run_summarizer_settings + ) + + def _persist_run(self, run: Run, level: int = 0) -> None: + """ + Summarize the run (and its children) into MLRun monitoring events. + + Note: This will use the MLRun tracer's default summarization that can be configured via + ``MLRunTracerMonitorSettings``, unless a custom summarizer was provided (via the same settings). + + :param run: LangChain run object to process holding all the nested tree of runs. + :param level: The nesting level of the run (0 for root runs, incremented for child runs). + """ + try: + # Serialize the run: + serialized_run = self._serialize_run( + run=run, + include_child_runs=not (self._settings.monitor.root_run_only or self._settings.monitor.split_runs) + ) + + # Check for a user custom run summarizer function: + if self._custom_run_summarizer_function: + for summarized_run in self._custom_run_summarizer_function( + run, self._custom_run_summarizer_settings + ): + if summarized_run: + inputs, outputs = summarized_run + self._send_run_event( + event_id=serialized_run["id"], + inputs=inputs, + outputs=outputs, + start_time=run.start_time, + end_time=run.end_time, + ) + return + + # Check how to deal with the child runs, monitor them in separate events or as a single event: + if self._monitor_settings.split_runs and not self._settings.monitor.root_run_only: + # Monitor as separate events: + for child_run in run.child_runs: + self._persist_run(run=child_run, level=level + 1) + summarized_run = self._summarize_run(serialized_run=serialized_run, include_children=False) + if summarized_run: + inputs, outputs = summarized_run + inputs["child_level"] = level + self._send_run_event( + event_id=serialized_run["id"], + inputs=inputs, + outputs=outputs, + start_time=run.start_time, + end_time=run.end_time, + ) + return + + # Monitor the root event (include child runs if `root_run_only` is False): + summarized_run = self._summarize_run( + serialized_run=serialized_run, + include_children=not self._monitor_settings.root_run_only + ) + if not summarized_run: + return + inputs, outputs = summarized_run + inputs["child_level"] = level + self._send_run_event( + event_id=serialized_run["id"], + inputs=inputs, + outputs=outputs, + start_time=run.start_time, + end_time=run.end_time, + ) + finally: + # Flush buffered messages after root run completion to ensure delivery: + if level == 0 and self._mlrun_client: + self._mlrun_client.flush() + + def _serialize_run(self, run: Run, include_child_runs: bool) -> dict: + """ + Serialize a LangChain run into a dictionary. + + :param run: The run to serialize. + :param include_child_runs: Whether to include child runs in the serialization. + + :returns: The serialized run dictionary. + """ + # In LangChain 1.2.3+, the Run model uses Pydantic v2 with child_runs marked as Field(exclude=True), so we + # must manually serialize child runs. Still excluding manually for future compatibility. In previous + # LangChain versions, Run was Pydantic v1, so we use dict. + serialized_run = ( + run.model_dump(exclude={"child_runs"}) + if hasattr(run, "model_dump") + else run.dict(exclude={"child_runs"}) + ) + + # Manually serialize child runs if needed: + if include_child_runs and run.child_runs: + serialized_run["child_runs"] = [ + self._serialize_run(child_run, include_child_runs=True) + for child_run in run.child_runs + ] + + return orjson.loads(orjson.dumps(serialized_run, default=self._serialize_default)) + + def _serialize_default(self, obj: Any): + """ + Default serializer for objects present in LangChain run that are not serializable by default JSON encoder. It + includes handling Pydantic v1 and v2 models, UUIDs, and datetimes. + + :param obj: The object to serialize. + + :returns: The serialized object. + """ + if isinstance(obj, UUID): + return str(obj) + if isinstance(obj, datetime.datetime): + return obj.isoformat() + if hasattr(obj, "model_dump"): + return orjson.loads(orjson.dumps(obj.model_dump(), default=self._serialize_default)) + if hasattr(obj, "dict"): + return orjson.loads(orjson.dumps(obj.dict(), default=self._serialize_default)) + return str(obj) + + def _filter_by_tags(self, serialized_run: dict) -> bool: + """ + Apply tag-based filtering. + + :param serialized_run: Serialized run dictionary. + + :returns: True if the run passes tag filters or if no tag filter is configured. + """ + # Check if the user enabled filtering by tags: + if not self._monitor_settings.tags_filter: + return True + + # Filter the run: + return not set(self._monitor_settings.tags_filter).isdisjoint( + serialized_run["tags"] + ) + + def _filter_by_run_types(self, serialized_run: dict) -> bool: + """ + Apply run-type filtering. + + :param serialized_run: Serialized run dictionary. + + :returns: True if the run's ``run_type`` is allowed or if no run-type filter is configured. + """ + # Check if the user enabled filtering by run types: + if not self._monitor_settings.run_types_filter: + return True + + # Filter the run: + return serialized_run["run_type"] in self._monitor_settings.run_types_filter + + def _filter_by_names(self, serialized_run: dict) -> bool: + """ + Apply class/name filtering. + + :param serialized_run: Serialized run dictionary. + + :returns: True if the run's ``name`` is allowed or if no name filter is configured. + """ + # Check if the user enabled filtering by class names: + if not self._monitor_settings.names_filter: + return True + + # Filter the run: + return serialized_run["name"] in self._monitor_settings.names_filter + + def _get_run_inputs(self, serialized_run: dict) -> dict[str, Any]: + """ + Build the inputs dictionary for a monitoring event. + + :param serialized_run: Serialized run dictionary. + + :returns: A dictionary containing inputs, run metadata and (optionally) additional metadata. + """ + inputs = { + "inputs": serialized_run["inputs"], + "run_type": serialized_run["run_type"], + "run_name": serialized_run["name"], + "tags": serialized_run["tags"], + "run_id": serialized_run["id"], + "start_timestamp": serialized_run["start_time"], + } + if "parent_run_id" in serialized_run: + # Parent run ID is excluded when child runs are joined in the same event. When child runs are split, it is + # included and can be used to reconstruct the run tree if needed. + inputs = {**inputs, "parent_run_id": serialized_run["parent_run_id"]} + if self._monitor_settings.include_metadata and "metadata" in serialized_run: + inputs = {**inputs, "metadata": serialized_run["metadata"]} + + return inputs + + def _get_run_outputs(self, serialized_run: dict) -> dict[str, Any]: + """ + Build the outputs dictionary for a monitoring event. + + :param serialized_run: Serialized run dictionary. + + :returns: A dictionary with outputs and optional other collected info depending on monitor settings. + """ + outputs = {"outputs": serialized_run["outputs"], "end_timestamp": serialized_run["end_time"]} + if self._monitor_settings.include_latency and "latency" in serialized_run: + outputs = {**outputs, "latency": serialized_run["latency"]} + if self._monitor_settings.include_errors: + outputs = {**outputs, "error": serialized_run["error"]} + if self._monitor_settings.include_full_run: + outputs = {**outputs, "full_run": serialized_run} + + return outputs + + def _summarize_run(self, serialized_run: dict, include_children: bool) -> tuple[dict, dict] | None: + """ + Summarize a single run into (inputs, outputs) if it passes filters. + + :param serialized_run: Serialized run dictionary. + :param include_children: Whether to include child runs. + + :returns: The summarized run (inputs, outputs) tuple if the run should be monitored, otherwise ``None``. + """ + # Pass filters: + if not ( + self._filter_by_tags(serialized_run=serialized_run) + and self._filter_by_run_types(serialized_run=serialized_run) + and self._filter_by_names(serialized_run=serialized_run) + ): + return None + + # Check if needed to include errors: + if serialized_run["error"] and not self._monitor_settings.include_errors: + return None + + # Prepare the inputs and outputs: + inputs = self._get_run_inputs(serialized_run=serialized_run) + outputs = self._get_run_outputs(serialized_run=serialized_run) + + # Check if needed to include child runs: + if include_children: + outputs["child_runs"] = [] + for child_run in serialized_run.get("child_runs", []): + # Recursively summarize the child run: + summarized_child_run = self._summarize_run(serialized_run=child_run, include_children=True) + if summarized_child_run: + inputs_child, outputs_child = summarized_child_run + outputs["child_runs"].append( + { + "input_data": inputs_child, + "output_data": outputs_child, + } + ) + + return inputs, outputs + + def _send_run_event( + self, event_id: str, inputs: dict, outputs: dict, start_time: datetime.datetime, end_time: datetime.datetime + ): + """ + Send a monitoring event for a single run. + + Note: If monitor debug mode is enabled, appends to ``debug_target_list`` or prints JSON. + + :param event_id: Unique event identifier. + :param inputs: Inputs dictionary for the event. + :param outputs: Outputs dictionary for the event. + :param start_time: Request/start timestamp. + :param end_time: Response/end timestamp. + """ + event = { + "event_id": event_id, + "label": self._monitor_settings.label, + "input_data": {"input_data": inputs}, # So it will be a single "input feature" in MLRun monitoring. + "output_data": {"output_data": outputs}, # So it will be a single "output feature" in MLRun monitoring. + "request_timestamp": start_time.strftime("%Y-%m-%d %H:%M:%S%z"), + "response_timestamp": end_time.strftime("%Y-%m-%d %H:%M:%S%z"), + } + if self._monitor_settings.debug: + if isinstance(self._monitor_settings.debug_target_list, list): + self._monitor_settings.debug_target_list.append(event) + else: + print(orjson.dumps(event, option=orjson.OPT_INDENT_2 | orjson.OPT_APPEND_NEWLINE)) + return + + self._mlrun_client.monitor(**event) + + @staticmethod + def _check_for_env_var_usage() -> bool: + """ + Check whether global env-var activated tracing is requested. + + :returns: True when ``LC_MLRUN_MONITORING_ENABLED`` environment variable equals ``"1"``. + """ + return os.environ.get(mlrun_monitoring_env_var, "0") == "1" + + @staticmethod + def _import_from_module_path(module_path: str) -> Any: + """ + Import an object from a full module path string. + + :param module_path: Full dotted path, e.g. ``a.b.module.object``. + + :returns: The imported object. + """ + try: + module_name, object_name = module_path.rsplit(".", 1) + module = importlib.import_module(module_name) + obj = getattr(module, object_name) + except ValueError as value_error: + raise ValueError( + f"The provided '{module_path}' is not valid: it must have at least one '.'. " + f"If the class is locally defined, please add '__main__.MyObject' to the path." + ) from value_error + except ImportError as import_error: + raise ImportError( + f"Could not import '{module_path}'. Tried to import '{module_name}' and failed with the following " + f"error: {import_error}." + ) from import_error + except AttributeError as attribute_error: + raise AttributeError( + f"Could not import '{object_name}'. Tried to run 'from {module_name} import {object_name}' and could " + f"not find it: {attribute_error}" + ) from attribute_error + + return obj + + +#: MLRun monitoring context variable to set when the user wraps his code with `mlrun_monitoring`. From this context +# variable LangChain will get the tracer in a thread-safe way. +mlrun_monitoring_var: ContextVar[MLRunTracer | None] = ContextVar( + "mlrun_monitoring", default=None +) + + +@contextmanager +def mlrun_monitoring(settings: MLRunTracerSettings | None = None): + """ + Context manager to enable MLRun tracing for LangChain code to monitor LangChain runs. + + Example usage:: + + from mlrun_tracer import mlrun_monitoring, MLRunTracerSettings + + settings = MLRunTracerSettings(...) + with mlrun_monitoring(settings=settings) as tracer: + # LangChain execution within this block will be traced by `tracer`. + ... + + :param settings: The settings to use to configure the tracer. + """ + mlrun_tracer = MLRunTracer(settings=settings) + token = mlrun_monitoring_var.set(mlrun_tracer) + try: + yield mlrun_tracer + finally: + mlrun_monitoring_var.reset(token) + + +# Register a hook for LangChain to apply the MLRun tracer: +register_configure_hook( + context_var=mlrun_monitoring_var, + inheritable=True, # To allow inner runs (agent that uses a tool that uses a llm...) to be traced. + env_var=mlrun_monitoring_env_var, + handle_class=MLRunTracer, +) + +# Temporary convenient function to set up the monitoring infrastructure required for the tracer. +def setup_langchain_monitoring( + project: str | mlrun.MlrunProject = None, + function_name: str = "langchain_mlrun_function", + model_name: str = "langchain_mlrun_model", + model_endpoint_name: str = "langchain_mlrun_endpoint", + v3io_container: str = "projects", + v3io_stream_path: str = None, + kafka_stream_profile_name: str = None, + kafka_linger_ms: int = 500, +) -> dict: + """ + Create a model endpoint in the given project to be used for LangChain monitoring with MLRun and returns the + necessary environment variables to configure the MLRun tracer client. The project should already exist and have + monitoring enabled:: + + project.set_model_monitoring_credentials( + stream_profile_name=..., + tsdb_profile_name=... + ) + + This function creates and logs dummy model and function in the specified project in order to create the model + endpoint for monitoring. It is a temporary workaround and will be added as a feature in a future MLRun version. + + :param project: The MLRun project name or object where to create the model endpoint. If None, the current active + project will be used. + :param function_name: The name of the serving function to create. + :param model_name: The name of the model to create. + :param model_endpoint_name: The name of the model endpoint to create. + :param v3io_container: The V3IO container where the monitoring stream is located (for MLRun Enterprise). + :param v3io_stream_path: The V3IO stream path for monitoring (for MLRun Enterprise). If None, + ``/model-endpoints/stream-v1`` will be used. + :param kafka_stream_profile_name: The name of the registered ``DatastoreProfileKafkaStream`` to use for Kafka + configuration (required for MLRun CE). This profile should be registered via + ``project.register_datastore_profile()`` and contains all Kafka settings including broker, topic, + SASL credentials, SSL config, etc. + :param kafka_linger_ms: Kafka producer linger.ms setting controlling message batching (default: 500ms). + Messages are accumulated for up to this duration before being sent as a batch, reducing network overhead. + The tracer always flushes at the end of each root run, guaranteeing delivery. Set to 0 to disable batching. + + :returns: A dictionary with the necessary environment variables to configure the MLRun tracer client. + raise: MLRunInvalidArgumentError: If no project is provided and there is no current active project. + """ + import io + import time + import sys + from contextlib import redirect_stdout, redirect_stderr + import tempfile + import pickle + import json + + from mlrun.features import Feature + + class ProgressStep: + """ + A context manager to display progress of a code block with timing and optional output suppression. + """ + + def __init__(self, label: str, indent: int = 2, width: int = 40, clean: bool = True): + """ + Initialize the ProgressStep context manager. + + :param label: The label to display for the progress step. + :param indent: The number of spaces to indent the label. + :param width: The width to pad the label for alignment. + :param clean: Whether to suppress stdout and stderr during the block execution. + """ + # Store parameters: + self._label = label + self._indent = indent + self._width = width + self._clean = clean + + # Internal state: + self._start_time = None + self._sink = io.StringIO() + self._stdout_redirect = None + self._stderr_redirect = None + self._last_line_length = 0 # To track the line printed when terminals don't support '\033[K'. + + # Capture the stream currently in use (before and if clean is true and we redirect it): + self._terminal = sys.stdout + + def __enter__(self): + """ + Enter the context manager, starting the timer and printing the initial status. + """ + # Start timer: + self._start_time = time.perf_counter() + + # Print without newline (using \r to allow overwriting): + self._write(icon=" ", status="Running", new_line=False) + + # Silence all internal noise: + if self._clean: + self._stdout_redirect = redirect_stdout(self._sink) + self._stderr_redirect = redirect_stderr(self._sink) + self._stdout_redirect.__enter__() + self._stderr_redirect.__enter__() + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit the context manager, stopping the timer and printing the final status. + + :param exc_type: The exception type, if any. + :param exc_val: The exception value, if any. + :param exc_tb: The exception traceback, if any. + """ + # Restore stdout/stderr: + if self._clean: + self._stdout_redirect.__exit__(exc_type, exc_val, exc_tb) + self._stderr_redirect.__exit__(exc_type, exc_val, exc_tb) + + # Calculate elapsed time: + elapsed = time.perf_counter() - self._start_time + + # Move cursor back to start of line ('\r') and overwrite ('\033[K' clears the line to the right): + if exc_type is None: + self._write(icon="✓", status=f"Done ({elapsed:.2f}s)", new_line=True) + else: + self._write(icon="✕", status="Failed", new_line=True) + + def update(self, status: str): + """ + Update the status message displayed for the progress step. + + :param status: The new status message to display. + """ + self._write(icon=" ", status=status, new_line=False) + + def _write(self, icon: str, status: str, new_line: bool): + """ + Write the progress line to the terminal, handling line clearing for terminals that do not support it. + + :param icon: The icon to display (e.g., checkmark, cross, space). + :param status: The status message to display. + :param new_line: Whether to end the line with a newline character. + """ + # Construct the basic line + line = f"\r{' ' * self._indent}[{icon}] {self._label.ljust(self._width, '.')} {status}" + + # Calculate if we need to pad with spaces to clear the old, longer line: + padding = max(0, self._last_line_length - len(line)) + + # Add spaces to clear old text (add the ANSI clear for terminals that support it): + line = f"{line}{' ' * padding}\033[K" + + # Add newline if needed: + if new_line: + line += "\n" + + # Write to terminal: + self._terminal.write(line) + self._terminal.flush() + + # Update the max length seen so far: + self._last_line_length = len(line) + + print("Creating LangChain model endpoint\n") + + # Get the project: + with ProgressStep("Loading Project"): + if project is None: + try: + project = mlrun.get_current_project(silent=False) + except mlrun.errors.MLRunInvalidArgumentError: + raise mlrun.errors.MLRunInvalidArgumentError( + "There is no current active project. Either use `mlrun.get_or_create_project` prior to " + "creating the monitoring endpoint or pass a project name to load." + ) + if isinstance(project, str): + project = mlrun.load_project(name=project) + + # Create and log the dummy model: + with ProgressStep(f"Creating Model") as progress_step: + # Check if the model already exists: + progress_step.update("Checking if model exists") + try: + dummy_model = project.get_artifact(key=model_name) + except mlrun.MLRunNotFoundError: + dummy_model = None + # If not, create and log it: + if not dummy_model: + progress_step.update(f"Logging model '{model_name}'") + with tempfile.TemporaryDirectory() as tmpdir: + # Create a dummy model file: + dummy_model_path = os.path.join(tmpdir, "for_langchain_mlrun_tracer.pkl") + with open(dummy_model_path, "wb") as f: + pickle.dump({"dummy": "model"}, f) + # Log the model: + dummy_model = project.log_model( + key=model_name, + model_file=dummy_model_path, + inputs=[Feature(value_type="str", name="input")], + outputs=[Feature(value_type='str', name="output")] + ) + + # Create and set the dummy function: + with ProgressStep("Creating Function") as progress_step: + # Check if the function already exists: + progress_step.update("Checking if function exists") + try: + dummy_function = project.get_function(key=function_name) + except mlrun.MLRunNotFoundError: + dummy_function = None + # If not, create and save it: + if not dummy_function: + progress_step.update(f"Setting function '{function_name}'") + with tempfile.TemporaryDirectory() as tmpdir: + # Create a dummy function file: + dummy_function_code = """ +def handler(context, event): + return "ok" +""" + dummy_function_path = os.path.join(tmpdir, "dummy_function.py") + with open(dummy_function_path, "w") as f: + f.write(dummy_function_code) + # Set the function in the project: + dummy_function = project.set_function( + func=dummy_function_path, name=function_name, image="mlrun/mlrun", kind="nuclio" + ) + dummy_function.save() + + # Create the model endpoint: + with ProgressStep("Creating Model Endpoint") as progress_step: + # Get the MLRun DB: + progress_step.update("Getting MLRun DB") + db = mlrun.get_run_db() + # Check if the model endpoint already exists: + progress_step.update("Checking if endpoint exists") + model_endpoint = project.list_model_endpoints(names=[model_endpoint_name]).endpoints + if model_endpoint: + model_endpoint = model_endpoint[0] + else: + progress_step.update("Creating model endpoint") + model_endpoint = mlrun.common.schemas.ModelEndpoint( + metadata=mlrun.common.schemas.ModelEndpointMetadata( + project=project.name, + name=model_endpoint_name, + endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP, + ), + spec=mlrun.common.schemas.ModelEndpointSpec( + function_name=dummy_function.metadata.name, + function_tag="latest", + model_path=dummy_model.uri, + model_class="CustomStream", + ), + status=mlrun.common.schemas.ModelEndpointStatus( + monitoring_mode=mm_constants.ModelMonitoringMode.enabled, + ), + ) + db.create_model_endpoint(model_endpoint=model_endpoint) + # Wait for the model endpoint UID to be set: + progress_step.update("Waiting for model endpoint") + uid_exist_flag = False + while not uid_exist_flag: + model_endpoint = project.list_model_endpoints(names=[model_endpoint_name]) + model_endpoint = model_endpoint.endpoints[0] + if model_endpoint.metadata.uid: + uid_exist_flag = True + + # Set parameters defaults: + v3io_stream_path = v3io_stream_path or f"{project.name}/model-endpoints/stream-v1" + + if mlrun.mlconf.is_ce_mode(): + if kafka_stream_profile_name is None: + raise ValueError( + "kafka_stream_profile_name is required for MLRun CE mode. Register a DatastoreProfileKafkaStream and " + "pass its name." + ) + client_env_vars = { + "LC_MLRUN_TRACER_CLIENT_KAFKA_STREAM_PROFILE_NAME": kafka_stream_profile_name, + "LC_MLRUN_TRACER_CLIENT_KAFKA_LINGER_MS": str(kafka_linger_ms), + } + else: + client_env_vars = { + "LC_MLRUN_TRACER_CLIENT_V3IO_STREAM_PATH": v3io_stream_path, + "LC_MLRUN_TRACER_CLIENT_V3IO_CONTAINER": v3io_container, + } + + # Prepare the environment variables: + env_vars = { + "LC_MLRUN_MONITORING_ENABLED": "1", + "LC_MLRUN_TRACER_CLIENT_PROJECT": project.name, + "LC_MLRUN_TRACER_CLIENT_MODEL_ENDPOINT_NAME": model_endpoint.metadata.name, + "LC_MLRUN_TRACER_CLIENT_MODEL_ENDPOINT_UID": model_endpoint.metadata.uid, + "LC_MLRUN_TRACER_CLIENT_SERVING_FUNCTION": function_name, + **client_env_vars + } + print("\n✨ Done! LangChain monitoring model endpoint created successfully.") + print("You can now set the following environment variables to enable MLRun tracing in your LangChain code:\n") + print(json.dumps(env_vars, indent=4)) + print( + "\nTo customize the monitoring behavior, you can also set additional environment variables prefixed with " + "'LC_MLRUN_TRACER_MONITOR_'. Refer to the MLRun tracer documentation for more details.\n" + ) + + return env_vars + + +class LangChainMonitoringApp(ModelMonitoringApplicationBase): + """ + A base monitoring application for LangChain that calculates common metrics on LangChain runs traced with the MLRun + tracer. + + The class is inheritable and can be extended to add custom metrics or override existing ones. It provides methods to + extract structured runs from the monitoring context and calculate metrics such as average latency, success rate, + token usage, and run name counts. + + If inheriting, the main method to override is `do_tracking`, which performs the tracking on the monitoring context. + """ + + def do_tracking(self, monitoring_context: MonitoringApplicationContext) -> ( + ModelMonitoringApplicationResult | + list[ModelMonitoringApplicationResult | ModelMonitoringApplicationMetric] | + dict[str, Any] + ): + """ + The main function that performs tracking on the monitoring context. The LangChain monitoring app by default + will calculate all the provided metrics on the structured runs extracted from the monitoring context sample + dataframe. + + :param monitoring_context: The monitoring context containing the sample dataframe. + + :returns: The monitoring artifacts, metrics and results. + """ + # Get the structured runs from the monitoring context: + structured_runs, _ = self.get_structured_runs(monitoring_context=monitoring_context) + + # Calculate the metrics: + average_latency = self.calculate_average_latency(structured_runs=structured_runs) + success_rate = self.calculate_success_rate(structured_runs=structured_runs) + token_usage = self.count_token_usage(structured_runs=structured_runs) + run_name_counts = self.count_run_names(structured_runs=structured_runs) + + return [ + ModelMonitoringApplicationMetric( + name="average_latency", + value=average_latency, + ), + ModelMonitoringApplicationMetric( + name="success_rate", + value=success_rate, + ), + ModelMonitoringApplicationMetric( + name="total_input_tokens", + value=token_usage["total_input_tokens"], + ), + ModelMonitoringApplicationMetric( + name="total_output_tokens", + value=token_usage["total_output_tokens"], + ), + ModelMonitoringApplicationMetric( + name="combined_total_tokens", + value=token_usage["combined_total"], + ), + *[ModelMonitoringApplicationMetric( + name=f"run_name_counts_{run_name}", + value=count, + ) for run_name, count in run_name_counts.items()], + ] + + @staticmethod + def get_structured_runs( + monitoring_context: MonitoringApplicationContext, + labels_filter: list[str] = None, + tags_filter: list[str] = None, + run_name_filter: list[str] = None, + run_type_filter: list[str] = None, + flatten_child_runs: bool = False, + ignore_child_runs: bool = False, + ignore_errored_runs: bool = False, + ) -> tuple[list[dict], list[dict]]: + """ + Get the structured runs from the monitoring context sample dataframe. The sample dataframe contains the raw + input and output data as JSON strings - the way the MLRun tracer sends them as events to MLRun monitoring. This + function parses the JSON strings into structured dictionaries that can be used for further metrics calculations + and analysis. + + :param monitoring_context: The monitoring context containing the sample dataframe. + :param labels_filter: List of labels to filter the runs. Only runs with a label appearing in this list will + remain. If None, no filtering is applied. + :param tags_filter: List of tags to filter the runs. Only runs containing at least one tag from this list will + remain. If None, no filtering is applied. + :param run_name_filter: List of run names to filter the runs. Only runs with a name appearing in this list will + remain. If None, no filtering is applied. + :param run_type_filter: List of run types to filter the runs. Only runs with a type appearing in this list will + remain. If None, no filtering is applied. + :param flatten_child_runs: Whether to flatten child runs into the main runs list. If True, all child runs will + be extracted and added to the main runs list. If False, child runs will be kept nested within their parent + runs. + :param ignore_child_runs: Whether to ignore child runs completely. If True, child runs will be removed from the + output. If False, child runs will be processed according to the other parameters. + :param ignore_errored_runs: Whether to ignore runs that resulted in errors. If True, runs with errors will be + excluded from the output. If False, errored runs will be included. + + :returns: A list of structured run dictionaries that passed the filters and a list of samples that could not be + parsed due to errors. + """ + # Retrieve the input and output samples from the monitoring context: + samples = monitoring_context.sample_df[['input', 'output']].to_dict('records') + + # Prepare to collect structured samples: + structured_samples = [] + errored_samples = [] + + # Go over all samples: + for sample in samples: + try: + # Parse the input data into structured format: + parsed_input = orjson.loads(sample['input']) + label = parsed_input['label'] + parsed_input = parsed_input["input"]["input_data"] + # Parse the output data into structured format: + parsed_output = orjson.loads(sample['output'])["output_data"] + structured_samples.extend( + LangChainMonitoringApp._collect_run( + structured_input=parsed_input, + structured_output=parsed_output, + label=label, + labels_filter=labels_filter, + tags_filter=tags_filter, + run_name_filter=run_name_filter, + run_type_filter=run_type_filter, + flatten_child_runs=flatten_child_runs, + ignore_child_runs=ignore_child_runs, + ignore_errored_runs=ignore_errored_runs, + ) + ) + except Exception: + errored_samples.append(sample) + + return structured_samples, errored_samples + + @staticmethod + def _collect_run( + structured_input: dict, + structured_output: dict, + label: str, + child_level: int = 0, + labels_filter: list[str] = None, + tags_filter: list[str] = None, + run_name_filter: list[str] = None, + run_type_filter: list[str] = None, + flatten_child_runs: bool = False, + ignore_child_runs: bool = False, + ignore_errored_runs: bool = False, + ) -> list[dict]: + """ + Recursively collect runs from the structured input and output data, applying filters as specified. + + :param structured_input: The structured input data of the run. + :param structured_output: The structured output data of the run. + :param label: The label of the run. + :param child_level: The current child level of the run (0 for root runs). + :param labels_filter: Label filter as described in `get_structured_runs`. + :param tags_filter: Tag filter as described in `get_structured_runs`. + :param run_name_filter: Run name filter as described in `get_structured_runs`. + :param run_type_filter: Run type filter as described in `get_structured_runs`. + :param flatten_child_runs: Flag to flatten child runs as described in `get_structured_runs`. + :param ignore_child_runs: Flag to ignore child runs as described in `get_structured_runs`. + :param ignore_errored_runs: Flag to ignore errored runs as described in `get_structured_runs`. + + :returns: A list of structured run dictionaries that passed the filters. + """ + # Prepare to collect runs: + runs = [] + + # Filter by label: + if labels_filter and label not in labels_filter: + return runs + + # Handle child runs: + if "child_runs" in structured_output: + # Check if we need to ignore or flatten child runs: + if ignore_child_runs: + structured_output.pop("child_runs") + elif flatten_child_runs: + # Recursively collect child runs: + child_runs = structured_output.pop("child_runs") + flattened_runs = [] + for child_run in child_runs: + flattened_runs.extend( + LangChainMonitoringApp._collect_run( + structured_input=child_run["input_data"], + structured_output=child_run["output_data"], + label=label, + child_level=child_level + 1, + tags_filter=tags_filter, + run_name_filter=run_name_filter, + run_type_filter=run_type_filter, + flatten_child_runs=flatten_child_runs, + ignore_child_runs=ignore_child_runs, + ignore_errored_runs=ignore_errored_runs, + ) + ) + runs.extend(flattened_runs) + + # Filter by tags, run name, run type, and errors: + if tags_filter and not set(structured_input["tags"]).isdisjoint(tags_filter): + return runs + if run_name_filter and structured_input["run_name"] not in run_name_filter: + return runs + if run_type_filter and structured_input["run_type"] not in run_type_filter: + return runs + if ignore_errored_runs and structured_output.get("error", None): + return runs + + # Collect the current run: + runs.append({"label": label, "input_data": structured_input, "output_data": structured_output, + "child_level": child_level}) + return runs + + @staticmethod + def iterate_structured_runs(structured_runs: list[dict]) -> Generator[dict, None, None]: + """ + Iterates over all runs in the structured samples, including child runs. + + :param structured_runs: List of structured run samples. + + :returns: A generator yielding each run structure. + """ + # TODO: Add an option to stop at a certain child level. + for structured_run in structured_runs: + if "child_runs" in structured_run['output_data']: + for child_run in structured_run['output_data']['child_runs']: + yield from LangChainMonitoringApp.iterate_structured_runs([{ + "label": structured_run['label'], + "input_data": child_run['input_data'], + "output_data": child_run['output_data'], + "child_level": structured_run['child_level'] + 1 + }]) + yield structured_run + + @staticmethod + def count_run_names(structured_runs: list[dict]) -> dict[str, int]: + """ + Counts occurrences of each run name in the structured samples. + + :param structured_runs: List of structured run samples. + + :returns: A dictionary with run names as keys and their counts as values. + """ + # TODO: Add a nice plot artifact that will draw the bar chart for what is being used the most. + # Prepare to count run names: + run_name_counts = {} + + # Go over all the runs: + for structured_run in LangChainMonitoringApp.iterate_structured_runs(structured_runs): + run_name = structured_run['input_data']['run_name'] + if run_name in run_name_counts: + run_name_counts[run_name] += 1 + else: + run_name_counts[run_name] = 1 + + return run_name_counts + + @staticmethod + def count_token_usage(structured_runs: list[dict]) -> dict: + """ + Calculates total tokens by only counting unique 'llm' type runs. + + :param structured_runs: List of structured run samples. + + :returns: A dictionary with total input tokens, total output tokens, and combined total tokens. + """ + # TODO: Add a token count per model breakdown (a dictionary of : to token counts) + # including an artifact that will plot it nicely. Pay attention that different providers use different + # keys in the response metadata. We should implement a mapping for that so each provider will have its own + # handler that will know how to extract the relevant info out of a run. + # Prepare to count tokens: + total_input_tokens = 0 + total_output_tokens = 0 + + # Go over all the LLM typed runs: + for structured_run in LangChainMonitoringApp.iterate_structured_runs(structured_runs): + # Count only LLM type runs as chain runs may include duplicative information as they accumulate the tokens + # from the child runs: + if structured_run['input_data']['run_type'] != 'llm': + continue + # Look for the token count information: + outputs = structured_run['output_data']["outputs"] + # Newer implementations should have the metadata in the `AIMessage` kwargs under generations: + if "generations" in outputs: + for generation in outputs["generations"]: # Iterate over generations. + for sample in generation: # Iterate over the generation batch. + token_usage = sample.get("message", {}).get("kwargs", {}).get("usage_metadata", {}) + if token_usage: + total_input_tokens += ( + token_usage.get('input_tokens', 0) + or token_usage.get('prompt_tokens', 0) + ) + total_output_tokens += ( + token_usage.get('output_tokens', 0) or + token_usage.get('completion_tokens', 0) + ) + continue + # Older implementations may have the metadata under `llm_output`: + if "llm_output" in outputs: + token_usage = outputs["llm_output"].get("token_usage", {}) + if token_usage: + total_input_tokens += token_usage.get('input_tokens', 0) or token_usage.get('prompt_tokens', 0) + total_output_tokens += ( + token_usage.get('output_tokens', 0) or + token_usage.get('completion_tokens', 0) + ) + + return { + "total_input_tokens": total_input_tokens, + "total_output_tokens": total_output_tokens, + "combined_total": total_input_tokens + total_output_tokens + } + + @staticmethod + def calculate_success_rate(structured_runs: list[dict]) -> float: + """ + Calculates the success rate across all runs. + + :param structured_runs: List of structured run samples. + + :returns: Success rate as a float percentage between 0 and 1. + """ + # TODO: Add an option to see errors breakdown by kind of error and maybe an option to show which run name yielded + # most of the errors with artifacts showcasing it. + successful_count = 0 + for structured_run in structured_runs: + if 'error' not in structured_run['output_data'] or structured_run['output_data']['error'] is None: + successful_count += 1 + return successful_count / len(structured_runs) if structured_runs else 0.0 + + @staticmethod + def calculate_average_latency(structured_runs: list[dict]) -> float: + """ + Calculates the average latency across all runs. + + :param structured_runs: List of structured run samples. + + :returns: Average latency in milliseconds. + """ + # TODO: Add an option to calculate latency per run name (to know which runs are slower/faster) and then return an + # artifact showcasing it. + # Prepare to calculate average latency: + total_latency = 0.0 + count = 0 + + # Go over all the root runs: + for structured_run in structured_runs: + # Skip child runs: + if structured_run["child_level"] > 0: + continue + # Check if latency is already provided: + if "latency" in structured_run['output_data']: + total_latency += structured_run['output_data']['latency'] + count += 1 + continue + # Calculate latency from timestamps: + start_time = datetime.datetime.fromisoformat(structured_run['input_data']['start_timestamp']) + end_time = datetime.datetime.fromisoformat(structured_run['output_data']['end_timestamp']) + total_latency += (end_time - start_time).total_seconds() * 1000 # Convert to milliseconds + count += 1 + + return total_latency / count if count > 0 else 0.0 diff --git a/modules/src/langchain_mlrun/notebook_images/mlrun_ui.png b/modules/src/langchain_mlrun/notebook_images/mlrun_ui.png new file mode 100644 index 000000000..9785eeae3 Binary files /dev/null and b/modules/src/langchain_mlrun/notebook_images/mlrun_ui.png differ diff --git a/modules/src/langchain_mlrun/requirements.txt b/modules/src/langchain_mlrun/requirements.txt new file mode 100644 index 000000000..2597fbe12 --- /dev/null +++ b/modules/src/langchain_mlrun/requirements.txt @@ -0,0 +1,4 @@ +pytest +langchain~=1.2 +pydantic-settings~=2.12 +kafka-python~=2.3 \ No newline at end of file diff --git a/modules/src/langchain_mlrun/test_langchain_mlrun.py b/modules/src/langchain_mlrun/test_langchain_mlrun.py new file mode 100644 index 000000000..cb6ca184c --- /dev/null +++ b/modules/src/langchain_mlrun/test_langchain_mlrun.py @@ -0,0 +1,1025 @@ +# Copyright 2026 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Literal, TypedDict, Annotated, Sequence, Any, Callable +from concurrent.futures import ThreadPoolExecutor +from operator import add + +import pytest +from langchain_core.language_models import LanguageModelInput +from langchain_core.runnables import Runnable, RunnableLambda +from pydantic import ValidationError + +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +from langchain_core.tracers import Run +from langchain_core.language_models.fake_chat_models import FakeListChatModel, GenericFakeChatModel +from langchain.agents import create_agent +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.tools import tool, BaseTool + +from langgraph.graph import StateGraph, START, END +from langchain_core.messages import BaseMessage +from pydantic_settings import BaseSettings, SettingsConfigDict + +from langchain_mlrun import ( + mlrun_monitoring, + MLRunTracer, + MLRunTracerSettings, + MLRunTracerClientSettings, + MLRunTracerMonitorSettings, + mlrun_monitoring_env_var, + LangChainMonitoringApp, +) + + +def _check_openai_credentials() -> bool: + """ + Check if OpenAI API key is set in environment variables. + + :return: True if OPENAI_API_KEY is set, False otherwise. + """ + return "OPENAI_API_KEY" in os.environ + + +# Import ChatOpenAI only if OpenAI credentials are available (meaning `langchain-openai` must be installed). +if _check_openai_credentials(): + from langchain_openai import ChatOpenAI + + +class _ToolEnabledFakeModel(GenericFakeChatModel): + """ + A fake chat model that supports tool binding for running agent tracing tests. + """ + + def bind_tools( + self, + tools: Sequence[ + dict[str, Any] | type | Callable | BaseTool # noqa: UP006 + ], + *, + tool_choice: str | None = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, AIMessage]: + return self + + +#: Tag value for testing tag filtering. +_dummy_tag = "dummy_tag" + + +def _run_simple_chain() -> str: + """ + Run a simple LangChain chain that gets a fact about a topic. + """ + # Build a simple chain: prompt -> llm -> str output parser + llm = ChatOpenAI( + model="gpt-4o-mini", + tags=[_dummy_tag] + ) if _check_openai_credentials() else ( + FakeListChatModel( + responses=[ + "MLRun is an open-source orchestrator for machine learning pipelines." + ], + tags=[_dummy_tag] + ) + ) + prompt = ChatPromptTemplate.from_template("Tell me a short fact about {topic}") + chain = prompt | llm | StrOutputParser() + + # Run the chain: + response = chain.invoke({"topic": "MLRun"}) + return response + + +def _run_simple_agent(): + """ + Run a simple LangChain agent that uses two tools to get weather and stock price. + """ + # Define the tools: + @tool + def get_weather(city: str) -> str: + """Get the current weather for a specific city.""" + return f"The weather in {city} is 22°C and sunny." + + @tool + def get_stock_price(symbol: str) -> str: + """Get the current stock price for a symbol.""" + return f"The stock price for {symbol} is $150.25." + + # Define the model: + model = ChatOpenAI( + model="gpt-4o-mini", + tags=[_dummy_tag] + ) if _check_openai_credentials() else ( + _ToolEnabledFakeModel( + messages=iter( + [ + AIMessage( + content="", + tool_calls=[ + {"name": "get_weather", "args": {"city": "London"}, "id": "call_abc123"}, + {"name": "get_stock_price", "args": {"symbol": "AAPL"}, "id": "call_def456"} + ] + ), + AIMessage(content="The weather in London is 22°C and AAPL is trading at $150.25.") + ] + ), + tags=[_dummy_tag] + ) + ) + + # Create the agent: + agent = create_agent( + model=model, + tools=[get_weather, get_stock_price], + system_prompt="You are a helpful assistant with access to tools." + ) + + # Run the agent: + return agent.invoke({"messages": ["What is the weather in London and the stock price of AAPL?"]}) + + +def _run_langgraph_graph(): + """ + Run a LangGraph agent that uses reflection to correct its answer. + """ + + # Define the graph state: + class AgentState(TypedDict): + messages: Annotated[list[BaseMessage], add] + attempts: int + + # Define the model: + model = ChatOpenAI(model="gpt-4o-mini") if _check_openai_credentials() else ( + _ToolEnabledFakeModel( + messages=iter( + [ + AIMessage(content="There are 2 'r's in Strawberry."), # Mocking the failure + AIMessage(content="I stand corrected. S-t-r-a-w-b-e-r-r-y. There are 3 'r's."), # Mocking the fix + ] + ) + ) + ) + + # Define the graph nodes and router: + def call_model(state: AgentState): + response = model.invoke(state["messages"]) + return {"messages": [response], "attempts": state["attempts"] + 1} + + def reflect_node(state: AgentState): + prompt = "Wait, count the 'r's again slowly, letter by letter. Are you sure?" + return {"messages": [HumanMessage(content=prompt)]} + + def router(state: AgentState) -> Literal["reflect", END]: + # Make sure there are 2 attempts at least for an answer: + if state["attempts"] == 1: + return "reflect" + return END + + # Build the graph: + builder = StateGraph(AgentState) + builder.add_node("model", call_model) + tagged_reflect_node = RunnableLambda(reflect_node).with_config(tags=[_dummy_tag]) + builder.add_node("reflect", tagged_reflect_node) + builder.add_edge(START, "model") + builder.add_conditional_edges("model", router) + builder.add_edge("reflect", "model") + graph = builder.compile() + + # Run the graph: + return graph.invoke({"messages": [HumanMessage(content="How many 'r's in Strawberry?")], "attempts": 0}) + + +#: List of example functions to run in tests along the full (split-run enabled) expected monitor events. +_run_suites: list[tuple[Callable, int]] = [ + (_run_simple_chain, 4), + (_run_simple_agent, 9), + (_run_langgraph_graph, 9), +] + + +#: Dummy environment variables for testing. +_dummy_environment_variables = { + "LC_MLRUN_TRACER_CLIENT_V3IO_STREAM_PATH": "dummy_stream_path", + "LC_MLRUN_TRACER_CLIENT_V3IO_CONTAINER": "dummy_container", + "LC_MLRUN_TRACER_CLIENT_MODEL_ENDPOINT_NAME": "dummy_model_name", + "LC_MLRUN_TRACER_CLIENT_MODEL_ENDPOINT_UID": "dummy_model_endpoint_uid", + "LC_MLRUN_TRACER_CLIENT_SERVING_FUNCTION": "dummy_serving_function", + "LC_MLRUN_TRACER_MONITOR_DEBUG": "true", + "LC_MLRUN_TRACER_MONITOR_DEBUG_TARGET_LIST": "true", + "LC_MLRUN_TRACER_MONITOR_SPLIT_RUNS": "true", +} + + +@pytest.fixture() +def auto_mode_settings(monkeypatch): + """ + Sets the environment variables to enable mlrun monitoring in 'auto' mode. + """ + # Set environment variables for the duration of the test: + monkeypatch.setenv(mlrun_monitoring_env_var, "1") + for key, value in _dummy_environment_variables.items(): + monkeypatch.setenv(key, value) + + # Reset the singleton tracer to ensure fresh initialization: + MLRunTracer._singleton_tracer = None + MLRunTracer._initialized = False + + yield + + # Reset the singleton tracer after the test: + MLRunTracer._singleton_tracer = None + MLRunTracer._initialized = False + + +@pytest.fixture +def manual_mode_settings(): + """ + Sets the mandatory client settings and debug flag for the tests. + """ + settings = MLRunTracerSettings( + client=MLRunTracerClientSettings( + v3io_stream_path="dummy_stream_path", + v3io_container="dummy_container", + model_endpoint_name="dummy_model_name", + model_endpoint_uid="dummy_model_endpoint_uid", + serving_function="dummy_serving_function", + ), + monitor=MLRunTracerMonitorSettings( + debug=True, + debug_target_list=[], + split_runs=True, # Easier to test with split runs (filters can filter per run instead of inner events) + ), + ) + + yield settings + + +def test_settings_init_via_env_vars(monkeypatch): + """ + Test that settings are correctly initialized from environment variables. + """ + #: First, ensure that without env vars, validation fails due to missing required fields: + with pytest.raises(ValidationError): + MLRunTracerSettings() + + # Now, set the environment variables for the client settings and debug flag: + for key, value in _dummy_environment_variables.items(): + monkeypatch.setenv(key, value) + + # Ensure that settings are now correctly initialized from env vars: + settings = MLRunTracerSettings() + assert settings.client.v3io_stream_path == "dummy_stream_path" + assert settings.client.v3io_container == "dummy_container" + assert settings.client.model_endpoint_name == "dummy_model_name" + assert settings.client.model_endpoint_uid == "dummy_model_endpoint_uid" + assert settings.client.serving_function == "dummy_serving_function" + assert settings.monitor.debug is True + + +@pytest.mark.parametrize( + "test_suite", [ + # Valid case: only v3io settings provided + ( + { + "v3io_stream_path": "dummy_stream_path", + "v3io_container": "dummy_container", + "model_endpoint_name": "dummy_model_name", + "model_endpoint_uid": "dummy_model_endpoint_uid", + "serving_function": "dummy_serving_function", + }, + True, + ), + # Invalid case: partial v3io settings provided + ( + { + "v3io_stream_path": "dummy_stream_path", + "model_endpoint_name": "dummy_model_name", + "model_endpoint_uid": "dummy_model_endpoint_uid", + "serving_function": "dummy_serving_function", + }, + False, + ), + # Valid case: only kafka settings provided + ( + { + "kafka_stream_profile_name": "dummy_stream_profile_name", + "model_endpoint_name": "dummy_model_name", + "model_endpoint_uid": "dummy_model_endpoint_uid", + "serving_function": "dummy_serving_function", + }, + True, + ), + # Invalid case: partial kafka settings provided + ( + { + "kafka_linger_ms": "1000", + "model_endpoint_name": "dummy_model_name", + "model_endpoint_uid": "dummy_model_endpoint_uid", + "serving_function": "dummy_serving_function", + }, + False, + ), + # Invalid case: both v3io and kafka settings provided + ( + { + "v3io_stream_path": "dummy_stream_path", + "v3io_container": "dummy_container", + "kafka_stream_profile_name": "dummy_stream_profile_name", + "model_endpoint_name": "dummy_model_name", + "model_endpoint_uid": "dummy_model_endpoint_uid", + "serving_function": "dummy_serving_function", + }, + False, + ), + # Invalid case: both v3io and kafka settings provided (partial) + ( + { + "v3io_container": "dummy_container", + "kafka_linger_ms": "1000", + "model_endpoint_name": "dummy_model_name", + "model_endpoint_uid": "dummy_model_endpoint_uid", + "serving_function": "dummy_serving_function", + }, + False, + ), + ] +) +def test_settings_v3io_kafka_combination(test_suite: tuple[dict[str, str], bool]): + """ + Test that settings validation enforces mutual exclusivity between v3io and kafka configurations. + + :param test_suite: A tuple containing environment variable overrides and a flag indicating + whether validation should pass. + """ + settings, should_pass = test_suite + + if should_pass: + MLRunTracerClientSettings(**settings) + else: + with pytest.raises(ValidationError): + MLRunTracerClientSettings(**settings) + + +def test_auto_mode_singleton_thread_safety(auto_mode_settings): + """ + Test that MLRunTracer singleton initialization is thread-safe in 'auto' mode. + + :param auto_mode_settings: Fixture to set up 'auto' mode environment and settings. + """ + # Initialize a list to hold tracer instances created in different threads: + tracer_instances = [] + + # Function to initialize the tracer in a thread: + def _init_tracer(): + tracer = MLRunTracer() + return tracer + + # Use ThreadPoolExecutor to simulate concurrent tracer initialization: + num_threads = 50 + with ThreadPoolExecutor(max_workers=num_threads) as executor: + futures = [executor.submit(_init_tracer) for _ in range(num_threads)] + tracer_instances = [f.result() for f in futures] + + # Check if every single reference in the list is the exact same object: + unique_instances = set(tracer._uid for tracer in tracer_instances) + + assert len(tracer_instances) == num_threads, "Not all threads returned a tracer instance. Test cannot proceed." + assert len(unique_instances) == 1, ( + f"Thread-safety failure! {len(unique_instances)} different instances were created under high concurrency." + ) + assert tracer_instances[0] is MLRunTracer(), "The global access point should return the same singleton." + + +def test_manual_mode_multi_instances(manual_mode_settings: MLRunTracerSettings): + """ + Test that MLRunTracer allows multiple instances in 'manual' mode. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + """ + # Initialize a list to hold tracer instances created in different iterations: + tracer_instances = [] + + # Create multiple tracer instances: + num_instances = 50 + for _ in range(num_instances): + tracer = MLRunTracer(settings=manual_mode_settings) + tracer_instances.append(tracer) + + # Check if every single reference in the list is a different object: + unique_instances = set(tracer._uid for tracer in tracer_instances) + + assert len(tracer_instances) == num_instances, "Not all instances were created. Test cannot proceed." + assert len(unique_instances) == num_instances, ( + f"Manual mode failure! {len(unique_instances)} unique instances were created instead of {num_instances}." + ) + + +@pytest.mark.parametrize("run_suites", _run_suites) +def test_auto_mode(auto_mode_settings, run_suites: tuple[Callable, int]): + """ + Test that MLRunTracer in 'auto' mode captures debug target list after running a LangChain / LangGraph example code. + + :param auto_mode_settings: Fixture to set up 'auto' mode environment and settings. + + :param run_suites: The function to run with the expected monitored events. + """ + run_func, expected_events = run_suites + + tracer = MLRunTracer() + assert len(tracer.settings.monitor.debug_target_list) == 0 + + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == expected_events + + +@pytest.mark.parametrize("run_suites", _run_suites) +def test_manual_mode(manual_mode_settings: MLRunTracerSettings, run_suites: tuple[Callable, int]): + """ + Test that MLRunTracer in 'auto' mode captures debug target list after running a LangChain / LangGraph example code. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param run_suites: The function to run with the expected monitored events. + """ + run_func, expected_events = run_suites + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == expected_events + + +def test_labeling(manual_mode_settings: MLRunTracerSettings): + """ + Test that MLRunTracer in 'auto' mode captures debug target list after running a LangChain / LangGraph example code. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + """ + for i, (run_func, expected_events) in enumerate(_run_suites): + label = f"label_{i}" + manual_mode_settings.monitor.label = label + manual_mode_settings.monitor.debug_target_list.clear() + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == expected_events + for event in tracer.settings.monitor.debug_target_list: + assert event["label"] == label + + +@pytest.mark.parametrize( + "run_suites", [ + run_suite + (filtered_events,) + for run_suite, filtered_events in zip(_run_suites, [1, 2, 1]) + ] +) +def test_monitor_settings_tags_filter( + manual_mode_settings: MLRunTracerSettings, + run_suites: tuple[Callable, int, int], +): + """ + Test the `tags_filter` setting of MLRunTracer. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param run_suites: The function to run with the expected monitored events and filtered events. + """ + run_func, expected_events, filtered_events = run_suites + + manual_mode_settings.monitor.tags_filter = [_dummy_tag] + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == filtered_events + for event in tracer.settings.monitor.debug_target_list: + assert not set(manual_mode_settings.monitor.tags_filter).isdisjoint(event["input_data"]["input_data"]["tags"]) + + +@pytest.mark.parametrize( + "run_suites", [ + run_suite + (filtered_events,) + for run_suite, filtered_events in zip(_run_suites, [1, 3, 4]) + ] +) +def test_monitor_settings_name_filter( + manual_mode_settings: MLRunTracerSettings, + run_suites: tuple[Callable, int, int], +): + """ + Test the `names_filter` setting of MLRunTracer. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param run_suites: The function to run with the expected monitored events and filtered events. + """ + run_func, expected_events, filtered_events = run_suites + + manual_mode_settings.monitor.names_filter = ["StrOutputParser", "get_weather", "model", "router"] + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == filtered_events + for event in tracer.settings.monitor.debug_target_list: + assert event["input_data"]["input_data"]["run_name"] in manual_mode_settings.monitor.names_filter + + +@pytest.mark.parametrize( + "run_suites", [ + run_suite + (filtered_events,) + for run_suite, filtered_events in zip(_run_suites, [2, 7, 9]) + ] +) +@pytest.mark.parametrize("split_runs", [True, False]) +def test_monitor_settings_run_type_filter( + manual_mode_settings: MLRunTracerSettings, + run_suites: tuple[Callable, int, int], + split_runs: bool +): + """ + Test the `run_types_filter` setting of MLRunTracer. Will also test with split runs enabled and disabled - meaning + that when disabled, if a parent run is filtered, all its child runs are also filtered by default. In the test we + made sure that the root run is always passing the filter (hence the equal one). + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param run_suites: The function to run with the expected monitored events and filtered events. + :param split_runs: Whether to enable split runs in the monitor settings. + """ + run_func, expected_events, filtered_events = run_suites + filtered_events = filtered_events if split_runs else 1 + + manual_mode_settings.monitor.run_types_filter = ["llm", "chain"] + manual_mode_settings.monitor.split_runs = split_runs + + def recursive_check_run_types(run: dict): + assert run["input_data"]["run_type"] in manual_mode_settings.monitor.run_types_filter + if "child_runs" in run["output_data"]: + for child_run in run["output_data"]["child_runs"]: + recursive_check_run_types(child_run) + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == filtered_events + + for event in tracer.settings.monitor.debug_target_list: + event_run = { + "input_data": event["input_data"]["input_data"], + "output_data": event["output_data"]["output_data"], + } + recursive_check_run_types(run=event_run) + +@pytest.mark.parametrize("run_suites", _run_suites) +@pytest.mark.parametrize("split_runs", [True, False]) +def test_monitor_settings_full_filter( + manual_mode_settings: MLRunTracerSettings, + run_suites: tuple[Callable, int], + split_runs: bool +): + """ + Test that a complete filter (not allowing any events to pass) won't fail the tracer. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param run_suites: The function to run with the expected monitored events. + :param split_runs: Whether to enable split runs in the monitor settings. + """ + run_func, _ = run_suites + + manual_mode_settings.monitor.run_types_filter = ["dummy_run_type"] + manual_mode_settings.monitor.split_runs = split_runs + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == 0 + + +@pytest.mark.parametrize("run_suites", _run_suites) +@pytest.mark.parametrize("split_runs", [True, False]) +@pytest.mark.parametrize("root_run_only", [True, False]) +def test_monitor_settings_split_runs_and_root_run_only( + manual_mode_settings: MLRunTracerSettings, + run_suites: tuple[Callable, int], + split_runs: bool, + root_run_only: bool, +): + """ + Test the `split_runs` setting of MLRunTracer. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param run_suites: The function to run with the expected monitored events. + :param split_runs: Whether to enable split runs in the monitor settings. + :param root_run_only: Whether to enable `root_run_only` in the monitor settings. + """ + run_func, expected_events = run_suites + + manual_mode_settings.monitor.split_runs = split_runs + manual_mode_settings.monitor.root_run_only = root_run_only + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + for run_iteration in range(1, 3): + print(run_func()) + if root_run_only: + assert len(tracer.settings.monitor.debug_target_list) == 1 * run_iteration + assert "child_runs" not in tracer.settings.monitor.debug_target_list[-1]["output_data"]["output_data"] + elif split_runs: + assert len(tracer.settings.monitor.debug_target_list) == expected_events * run_iteration + assert "child_runs" not in tracer.settings.monitor.debug_target_list[-1]["output_data"]["output_data"] + else: # split_runs disabled + assert len(tracer.settings.monitor.debug_target_list) == 1 * run_iteration + assert len(tracer.settings.monitor.debug_target_list[-1]["output_data"]["output_data"]["child_runs"]) != 0 + + +class _CustomRunSummarizerSettings(BaseSettings): + """ + Settings for the custom summarizer function. + """ + dummy_value: int = 21 + + model_config = SettingsConfigDict(env_prefix="TEST_CUSTOM_SUMMARIZER_SETTINGS_") + + +def _custom_run_summarizer(run: Run, settings: _CustomRunSummarizerSettings = None): + """ + A custom summarizer function for testing. + + :param run: The LangChain / LangGraph run to summarize. + :param settings: Optional settings for the summarizer. + """ + inputs = { + "run_id": run.id, + "input": run.inputs, + "from_settings": settings.dummy_value if settings else 0, + } + + def count_llm_calls(r: Run) -> int: + if not r.child_runs: + return 1 if r.run_type == "llm" else 0 + return sum(count_llm_calls(child) for child in r.child_runs) + + def count_tool_calls(r: Run) -> int: + if not r.child_runs: + return 1 if r.run_type == "tool" else 0 + return sum(count_tool_calls(child) for child in r.child_runs) + + outputs = { + "llm_calls": count_llm_calls(run), + "tool_calls": count_tool_calls(run), + "output": run.outputs + } + + yield inputs, outputs + + +@pytest.mark.parametrize("run_suites", _run_suites) +@pytest.mark.parametrize("run_summarizer_function", [ + _custom_run_summarizer, + "test_langchain_mlrun._custom_run_summarizer", +]) +@pytest.mark.parametrize("run_summarizer_settings", [ + _CustomRunSummarizerSettings(dummy_value=12), + "test_langchain_mlrun._CustomRunSummarizerSettings", + None, +]) +def test_monitor_settings_custom_run_summarizer( + manual_mode_settings: MLRunTracerSettings, + run_suites: tuple[Callable, int], + run_summarizer_function: Callable | str, + run_summarizer_settings: BaseSettings | str | None, +): + """ + Test the custom run summarizer that can be passed to MLRunTracer. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param run_suites: The function to run with the expected monitored events. + :param run_summarizer_function: The custom summarizer function or its import path. + :param run_summarizer_settings: The settings for the custom summarizer or its import path. + """ + run_func, _ = run_suites + manual_mode_settings.monitor.run_summarizer_function = run_summarizer_function + manual_mode_settings.monitor.run_summarizer_settings = run_summarizer_settings + dummy_value_for_settings_from_env = 26 + os.environ["TEST_CUSTOM_SUMMARIZER_SETTINGS_DUMMY_VALUE"] = str(dummy_value_for_settings_from_env) + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + print(run_func()) + assert len(tracer.settings.monitor.debug_target_list) == 1 + + event = tracer.settings.monitor.debug_target_list[0] + if run_summarizer_settings: + if isinstance(run_summarizer_settings, str): + assert event["input_data"]["input_data"]["from_settings"] == dummy_value_for_settings_from_env + else: + assert event["input_data"]["input_data"]["from_settings"] == run_summarizer_settings.dummy_value + else: + assert event["input_data"]["input_data"]["from_settings"] == 0 + + +def test_monitor_settings_include_errors_field_presence(manual_mode_settings: MLRunTracerSettings): + """ + Test that when `include_errors` is True, the error field is present in outputs. + When `include_errors` is False, the error field is not added to outputs. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + """ + # Run with include_errors=True (default) and verify error field is present: + manual_mode_settings.monitor.include_errors = True + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + _run_simple_chain() + assert len(tracer.settings.monitor.debug_target_list) > 0 + + for event in tracer.settings.monitor.debug_target_list: + output_data = event["output_data"]["output_data"] + assert "error" in output_data, "error field should be present when include_errors is True" + + # Now run with include_errors=False and verify error field is excluded: + manual_mode_settings.monitor.include_errors = False + manual_mode_settings.monitor.debug_target_list.clear() + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + _run_simple_chain() + assert len(tracer.settings.monitor.debug_target_list) > 0 + + for event in tracer.settings.monitor.debug_target_list: + output_data = event["output_data"]["output_data"] + assert "error" not in output_data, "error field should be excluded when include_errors is False" + + +def test_monitor_settings_include_full_run(manual_mode_settings: MLRunTracerSettings): + """ + Test that when `include_full_run` is True, the complete serialized run is included in outputs. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + """ + manual_mode_settings.monitor.include_full_run = True + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + _run_simple_chain() + + assert len(tracer.settings.monitor.debug_target_list) > 0 + + for event in tracer.settings.monitor.debug_target_list: + output_data = event["output_data"]["output_data"] + assert "full_run" in output_data, "full_run should be included in outputs when include_full_run is True" + # Verify the full_run contains expected run structure: + assert "inputs" in output_data["full_run"] + assert "outputs" in output_data["full_run"] + + +def test_monitor_settings_include_metadata(manual_mode_settings: MLRunTracerSettings): + """ + Test that when `include_metadata` is False, metadata is excluded from inputs. + + Note: The fake models used in tests don't produce runs with metadata, so we can only + verify the "exclude" behavior. The code only adds metadata if the run actually contains it. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + """ + # Run with include_metadata=False and verify metadata is excluded: + manual_mode_settings.monitor.include_metadata = False + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + _run_simple_chain() + assert len(tracer.settings.monitor.debug_target_list) > 0 + + # Check that metadata is not present in inputs: + for event in tracer.settings.monitor.debug_target_list: + input_data = event["input_data"]["input_data"] + assert "metadata" not in input_data, "metadata should be excluded when include_metadata is False" + + +def test_monitor_settings_include_latency(manual_mode_settings: MLRunTracerSettings): + """ + Test that when `include_latency` is False, latency is excluded from outputs. + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + """ + manual_mode_settings.monitor.include_latency = False + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + _run_simple_chain() + assert len(tracer.settings.monitor.debug_target_list) > 0 + + for event in tracer.settings.monitor.debug_target_list: + assert "latency" not in event["output_data"]["output_data"], \ + "latency should be excluded when include_latency is False" + + +def test_import_from_module_path_errors(): + """ + Test that `_import_from_module_path` raises appropriate errors for invalid paths. + """ + # Test ValueError for path without a dot: + with pytest.raises(ValueError) as exc_info: + MLRunTracer._import_from_module_path("no_dot_path") + assert "must have at least one '.'" in str(exc_info.value) + + # Test ImportError for non-existent module: + with pytest.raises(ImportError) as exc_info: + MLRunTracer._import_from_module_path("nonexistent_module_xyz.SomeClass") + assert "Could not import" in str(exc_info.value) + + # Test AttributeError for non-existent attribute in existing module: + with pytest.raises(AttributeError) as exc_info: + MLRunTracer._import_from_module_path("os.nonexistent_attribute_xyz") + assert "Could not import" in str(exc_info.value) + + +#: Sample structured runs for testing LangChainMonitoringApp methods. +_sample_structured_runs = [ + { + "label": "test_label", + "child_level": 0, + "input_data": { + "run_name": "RunnableSequence", + "run_type": "chain", + "tags": ["tag1"], + "inputs": {"topic": "MLRun"}, + "start_timestamp": "2024-01-01T10:00:00+00:00", + }, + "output_data": { + "outputs": {"result": "test output"}, + "end_timestamp": "2024-01-01T10:00:01+00:00", + "error": None, + "child_runs": [ + { + "input_data": { + "run_name": "FakeListChatModel", + "run_type": "llm", + "tags": ["tag2"], + "inputs": {"prompt": "test"}, + "start_timestamp": "2024-01-01T10:00:00.100+00:00", + }, + "output_data": { + "outputs": { + "generations": [[{ + "message": { + "kwargs": { + "usage_metadata": { + "input_tokens": 10, + "output_tokens": 20, + } + } + } + }]] + }, + "end_timestamp": "2024-01-01T10:00:00.500+00:00", + "error": None, + }, + }, + ], + }, + }, + { + "label": "test_label", + "child_level": 0, + "input_data": { + "run_name": "SimpleAgent", + "run_type": "chain", + "tags": ["tag1"], + "inputs": {"query": "test query"}, + "start_timestamp": "2024-01-01T10:00:02+00:00", + }, + "output_data": { + "outputs": {"result": "agent output"}, + "end_timestamp": "2024-01-01T10:00:04+00:00", + "error": "SomeError: something went wrong", + }, + }, +] + + +def test_langchain_monitoring_app_iterate_structured_runs(): + """ + Test that `iterate_structured_runs` yields all runs including nested child runs. + """ + # Iterate over all runs: + all_runs = list(LangChainMonitoringApp.iterate_structured_runs(_sample_structured_runs)) + + # Should yield parent runs and child runs: + # - First sample: 1 parent + 1 child = 2 runs + # - Second sample: 1 parent = 1 run + # Total: 3 runs + assert len(all_runs) == 3 + + # Verify run names are as expected: + run_names = [r["input_data"]["run_name"] for r in all_runs] + assert "RunnableSequence" in run_names + assert "FakeListChatModel" in run_names + assert "SimpleAgent" in run_names + + +def test_langchain_monitoring_app_count_run_names(): + """ + Test that `count_run_names` correctly counts occurrences of each run name. + """ + counts = LangChainMonitoringApp.count_run_names(_sample_structured_runs) + + assert counts["RunnableSequence"] == 1 + assert counts["FakeListChatModel"] == 1 + assert counts["SimpleAgent"] == 1 + + +def test_langchain_monitoring_app_count_token_usage(): + """ + Test that `count_token_usage` correctly calculates total tokens from LLM runs. + """ + token_usage = LangChainMonitoringApp.count_token_usage(_sample_structured_runs) + + assert token_usage["total_input_tokens"] == 10 + assert token_usage["total_output_tokens"] == 20 + assert token_usage["combined_total"] == 30 + + +def test_langchain_monitoring_app_calculate_success_rate(): + """ + Test that `calculate_success_rate` returns the correct percentage of successful runs. + """ + success_rate = LangChainMonitoringApp.calculate_success_rate(_sample_structured_runs) + + # First run has no error, second run has error: + # Success rate should be 1/2 = 0.5 + assert success_rate == 0.5 + + # Test with empty list: + empty_rate = LangChainMonitoringApp.calculate_success_rate([]) + assert empty_rate == 0.0 + + # Test with all successful runs: + successful_runs = [_sample_structured_runs[0]] # Only the first run which has no error + all_success_rate = LangChainMonitoringApp.calculate_success_rate(successful_runs) + assert all_success_rate == 1.0 + + +def test_langchain_monitoring_app_calculate_average_latency(): + """ + Test that `calculate_average_latency` returns the correct average latency across root runs. + """ + # Calculate average latency: + avg_latency = LangChainMonitoringApp.calculate_average_latency(_sample_structured_runs) + + # First run: 10:00:00 to 10:00:01 = 1000ms + # Second run: 10:00:02 to 10:00:04 = 2000ms + # Average: (1000 + 2000) / 2 = 1500ms + assert avg_latency == 1500.0 + + # Test with empty list: + empty_latency = LangChainMonitoringApp.calculate_average_latency([]) + assert empty_latency == 0.0 + + +def test_langchain_monitoring_app_calculate_average_latency_skips_child_runs(): + """ + Test that `calculate_average_latency` skips child runs (only calculates for root runs). + """ + # Create a sample with a child run that has child_level > 0: + runs_with_child = [ + { + "label": "test", + "child_level": 0, + "input_data": {"start_timestamp": "2024-01-01T10:00:00+00:00"}, + "output_data": {"end_timestamp": "2024-01-01T10:00:01+00:00"}, + }, + { + "label": "test", + "child_level": 1, # This is a child run, should be skipped + "input_data": {"start_timestamp": "2024-01-01T10:00:00+00:00"}, + "output_data": {"end_timestamp": "2024-01-01T10:00:10+00:00"}, # 10 seconds - would skew average + }, + ] + + # Calculate average latency: + avg_latency = LangChainMonitoringApp.calculate_average_latency(runs_with_child) + + # Should only consider the root run (1000ms), not the child run: + assert avg_latency == 1000.0 + + +def test_debug_mode_stdout(manual_mode_settings: MLRunTracerSettings, capsys): + """ + Test that debug mode prints to stdout when `debug_target_list` is not set (is False). + + :param manual_mode_settings: Fixture to set up 'manual' mode environment and settings. + :param capsys: Pytest fixture to capture stdout/stderr. + """ + # Set debug mode with debug_target_list=False (should print to stdout): + manual_mode_settings.monitor.debug = True + manual_mode_settings.monitor.debug_target_list = False + + with mlrun_monitoring(settings=manual_mode_settings) as tracer: + _run_simple_chain() + + # Capture stdout: + captured = capsys.readouterr() + + # Verify that JSON output was printed to stdout: + assert "event_id" in captured.out, "Event should be printed to stdout when debug_target_list is False" + assert "input_data" in captured.out + assert "output_data" in captured.out