improve: generalize transformations and scripts of runner and preloads into TemplateTransformer (#4487)

This commit is contained in:
Bowen Liang 2024-05-20 15:56:26 +08:00 committed by GitHub
parent c255a20d7c
commit 5f4df34829
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 210 additions and 231 deletions

View File

@ -12,7 +12,7 @@ from config import get_env
from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES, Python3TemplateTransformer
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.template_transformer import TemplateTransformer
logger = logging.getLogger(__name__)
@ -187,7 +187,8 @@ class CodeExecutor:
response = response.json()
dependencies = response.get('data', {}).get('dependencies', [])
return [
CodeDependency(**dependency) for dependency in dependencies if dependency.get('name') not in PYTHON_STANDARD_PACKAGES
CodeDependency(**dependency) for dependency in dependencies
if dependency.get('name') not in Python3TemplateTransformer.get_standard_packages()
]
except Exception as e:
logger.exception(f'Failed to list dependencies: {e}')

View File

@ -1,58 +1,25 @@
import json
import re
from typing import Optional
from textwrap import dedent
from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.template_transformer import TemplateTransformer
NODEJS_RUNNER = """// declare main function here
{{code}}
// execute main function, and return the result
// inputs is a dict, unstructured inputs
output = main({{inputs}})
// convert output to json and print
output = JSON.stringify(output)
result = `<<RESULT>>${output}<<RESULT>>`
console.log(result)
"""
NODEJS_PRELOAD = """"""
class NodeJsTemplateTransformer(TemplateTransformer):
@classmethod
def transform_caller(cls, code: str, inputs: dict,
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return:
"""
# transform inputs to json string
inputs_str = json.dumps(inputs, indent=4, ensure_ascii=False)
# replace code and inputs
runner = NODEJS_RUNNER.replace('{{code}}', code)
runner = runner.replace('{{inputs}}', inputs_str)
return runner, NODEJS_PRELOAD, []
@classmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
# extract result
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return json.loads(result)
def get_runner_script(cls) -> str:
runner_script = dedent(
f"""
// declare main function
{cls._code_placeholder}
// decode and prepare input object
var inputs_obj = JSON.parse(atob('{cls._inputs_placeholder}'))
// execute main function
var output_obj = main(inputs_obj)
// convert output to json and print
var output_json = JSON.stringify(output_obj)
var result = `<<RESULT>>${{output_json}}<<RESULT>>`
console.log(result)
""")
return runner_script

View File

@ -1,94 +1,13 @@
import json
import re
from base64 import b64encode
from typing import Optional
from textwrap import dedent
from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.template_transformer import TemplateTransformer
PYTHON_RUNNER = """
import jinja2
from json import loads
from base64 import b64decode
template = jinja2.Template('''{{code}}''')
def main(**inputs):
return template.render(**inputs)
# execute main function, and return the result
inputs = b64decode('{{inputs}}').decode('utf-8')
output = main(**loads(inputs))
result = f'''<<RESULT>>{output}<<RESULT>>'''
print(result)
"""
JINJA2_PRELOAD_TEMPLATE = """{% set fruits = ['Apple'] %}
{{ 'a' }}
{% for fruit in fruits %}
<li>{{ fruit }}</li>
{% endfor %}
{% if fruits|length > 1 %}
1
{% endif %}
{% for i in range(5) %}
{% if i == 3 %}{{ i }}{% else %}{% endif %}
{% endfor %}
{% for i in range(3) %}
{{ i + 1 }}
{% endfor %}
{% macro say_hello() %}a{{ 'b' }}{% endmacro %}
{{ s }}{{ say_hello() }}"""
JINJA2_PRELOAD = f"""
import jinja2
from base64 import b64decode
def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('''{JINJA2_PRELOAD_TEMPLATE}''')
template.render(s='a')
if __name__ == '__main__':
_jinja2_preload_()
"""
class Jinja2TemplateTransformer(TemplateTransformer):
@classmethod
def transform_caller(cls, code: str, inputs: dict,
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return:
"""
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
# transform jinja2 template to python code
runner = PYTHON_RUNNER.replace('{{code}}', code)
runner = runner.replace('{{inputs}}', inputs_str)
if not dependencies:
dependencies = []
# add native packages and jinja2
for package in PYTHON_STANDARD_PACKAGES.union(['jinja2']):
dependencies.append(CodeDependency(name=package, version=''))
# deduplicate
dependencies = list({
dep.name: dep for dep in dependencies if dep.name
}.values())
return runner, JINJA2_PRELOAD, dependencies
def get_standard_packages(cls) -> set[str]:
return {'jinja2'} | Python3TemplateTransformer.get_standard_packages()
@classmethod
def transform_response(cls, response: str) -> dict:
@ -97,12 +16,49 @@ class Jinja2TemplateTransformer(TemplateTransformer):
:param response: response
:return:
"""
# extract result
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return {
'result': result
'result': cls.extract_result_str_from_response(response)
}
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f"""
# declare main function
def main(**inputs):
import jinja2
template = jinja2.Template('''{cls._code_placeholder}''')
return template.render(**inputs)
import json
from base64 import b64decode
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function
output = main(**inputs_obj)
# convert output and print
result = f'''<<RESULT>>{{output}}<<RESULT>>'''
print(result)
""")
return runner_script
@classmethod
def get_preload_script(cls) -> str:
preload_script = dedent("""
import jinja2
from base64 import b64decode
def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('{{s}}')
template.render(s='a')
if __name__ == '__main__':
_jinja2_preload_()
""")
return preload_script

View File

@ -1,83 +1,51 @@
import json
import re
from base64 import b64encode
from textwrap import dedent
from typing import Optional
from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.template_transformer import TemplateTransformer
PYTHON_RUNNER = dedent("""
# declare main function here
{{code}}
from json import loads, dumps
from base64 import b64decode
# execute main function, and return the result
# inputs is a dict, and it
inputs = b64decode('{{inputs}}').decode('utf-8')
output = main(**json.loads(inputs))
# convert output to json and print
output = dumps(output, indent=4)
result = f'''<<RESULT>>
{output}
<<RESULT>>'''
print(result)
""")
PYTHON_PRELOAD = """"""
PYTHON_STANDARD_PACKAGES = {
'json', 'datetime', 'math', 'random', 're', 'string', 'sys', 'time', 'traceback', 'uuid', 'os', 'base64',
'hashlib', 'hmac', 'binascii', 'collections', 'functools', 'operator', 'itertools', 'uuid',
}
class Python3TemplateTransformer(TemplateTransformer):
@classmethod
def transform_caller(cls, code: str, inputs: dict,
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return:
"""
# transform inputs to json string
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
def get_standard_packages(cls) -> set[str]:
return {
'base64',
'binascii',
'collections',
'datetime',
'functools',
'hashlib',
'hmac',
'itertools',
'json',
'math',
'operator',
'os',
'random',
're',
'string',
'sys',
'time',
'traceback',
'uuid',
}
# replace code and inputs
runner = PYTHON_RUNNER.replace('{{code}}', code)
runner = runner.replace('{{inputs}}', inputs_str)
# add standard packages
if dependencies is None:
dependencies = []
for package in PYTHON_STANDARD_PACKAGES:
if package not in dependencies:
dependencies.append(CodeDependency(name=package, version=''))
# deduplicate
dependencies = list({dep.name: dep for dep in dependencies if dep.name}.values())
return runner, PYTHON_PRELOAD, dependencies
@classmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
# extract result
result = re.search(r'<<RESULT>>(.*?)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return json.loads(result)
def get_runner_script(cls) -> str:
runner_script = dedent(f"""
# declare main function
{cls._code_placeholder}
import json
from base64 import b64decode
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function
output_obj = main(**inputs_obj)
# convert output to json and print
output_json = json.dumps(output_obj, indent=4)
result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''
print(result)
""")
return runner_script

View File

@ -1,13 +1,25 @@
import json
import re
from abc import ABC, abstractmethod
from base64 import b64encode
from typing import Optional
from pydantic import BaseModel
from core.helper.code_executor.entities import CodeDependency
class TemplateTransformer(ABC):
class TemplateTransformer(ABC, BaseModel):
_code_placeholder: str = '{{code}}'
_inputs_placeholder: str = '{{inputs}}'
_result_tag: str = '<<RESULT>>'
@classmethod
@abstractmethod
def transform_caller(cls, code: str, inputs: dict,
def get_standard_packages(cls) -> set[str]:
return set()
@classmethod
def transform_caller(cls, code: str, inputs: dict,
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
"""
Transform code to python runner
@ -15,14 +27,61 @@ class TemplateTransformer(ABC):
:param inputs: inputs
:return: runner, preload
"""
pass
runner_script = cls.assemble_runner_script(code, inputs)
preload_script = cls.get_preload_script()
packages = dependencies or []
standard_packages = cls.get_standard_packages()
for package in standard_packages:
if package not in packages:
packages.append(CodeDependency(name=package, version=''))
packages = list({dep.name: dep for dep in packages if dep.name}.values())
return runner_script, preload_script, packages
@classmethod
def extract_result_str_from_response(cls, response: str) -> str:
result = re.search(rf'{cls._result_tag}(.*){cls._result_tag}', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return result
@classmethod
@abstractmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
pass
return json.loads(cls.extract_result_str_from_response(response))
@classmethod
@abstractmethod
def get_runner_script(cls) -> str:
"""
Get runner script
"""
pass
@classmethod
def serialize_inputs(cls, inputs: dict) -> str:
inputs_json_str = json.dumps(inputs, ensure_ascii=False).encode()
input_base64_encoded = b64encode(inputs_json_str).decode('utf-8')
return input_base64_encoded
@classmethod
def assemble_runner_script(cls, code: str, inputs: dict) -> str:
# assemble runner script
script = cls.get_runner_script()
script = script.replace(cls._code_placeholder, code)
inputs_str = cls.serialize_inputs(inputs)
script = script.replace(cls._inputs_placeholder, inputs_str)
return script
@classmethod
def get_preload_script(cls) -> str:
"""
Get preload script
"""
return ''

View File

@ -2,6 +2,7 @@ from textwrap import dedent
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
CODE_LANGUAGE = CodeLanguage.JAVASCRIPT
@ -23,7 +24,8 @@ def test_javascript_json():
def test_javascript_with_code_template():
result = CodeExecutor.execute_workflow_code_template(
language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(), inputs={'arg1': 'Hello', 'arg2': 'World'})
language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(),
inputs={'arg1': 'Hello', 'arg2': 'World'})
assert result == {'result': 'HelloWorld'}
@ -32,3 +34,10 @@ def test_javascript_list_default_available_packages():
# no default packages available for javascript
assert len(packages) == 0
def test_javascript_get_runner_script():
runner_script = NodeJsTemplateTransformer.get_runner_script()
assert runner_script.count(NodeJsTemplateTransformer._code_placeholder) == 1
assert runner_script.count(NodeJsTemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(NodeJsTemplateTransformer._result_tag) == 2

View File

@ -1,7 +1,7 @@
import base64
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
from core.helper.code_executor.jinja2.jinja2_transformer import JINJA2_PRELOAD, PYTHON_RUNNER
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
CODE_LANGUAGE = CodeLanguage.JINJA2
@ -9,8 +9,12 @@ CODE_LANGUAGE = CodeLanguage.JINJA2
def test_jinja2():
template = 'Hello {{template}}'
inputs = base64.b64encode(b'{"template": "World"}').decode('utf-8')
code = PYTHON_RUNNER.replace('{{code}}', template).replace('{{inputs}}', inputs)
result = CodeExecutor.execute_code(language=CODE_LANGUAGE, preload=JINJA2_PRELOAD, code=code)
code = (Jinja2TemplateTransformer.get_runner_script()
.replace(Jinja2TemplateTransformer._code_placeholder, template)
.replace(Jinja2TemplateTransformer._inputs_placeholder, inputs))
result = CodeExecutor.execute_code(language=CODE_LANGUAGE,
preload=Jinja2TemplateTransformer.get_preload_script(),
code=code)
assert result == '<<RESULT>>Hello World<<RESULT>>\n'
@ -18,3 +22,10 @@ def test_jinja2_with_code_template():
result = CodeExecutor.execute_workflow_code_template(
language=CODE_LANGUAGE, code='Hello {{template}}', inputs={'template': 'World'})
assert result == {'result': 'Hello World'}
def test_jinja2_get_runner_script():
runner_script = Jinja2TemplateTransformer.get_runner_script()
assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2

View File

@ -3,6 +3,7 @@ from textwrap import dedent
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
CODE_LANGUAGE = CodeLanguage.PYTHON3
@ -35,3 +36,10 @@ def test_python3_list_default_available_packages():
# check JSON serializable
assert len(str(json.dumps(packages))) > 0
def test_python3_get_runner_script():
runner_script = Python3TemplateTransformer.get_runner_script()
assert runner_script.count(Python3TemplateTransformer._code_placeholder) == 1
assert runner_script.count(Python3TemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(Python3TemplateTransformer._result_tag) == 2