Source code for retdec.decompiler

#
# Project:   retdec-python
# Copyright: (c) 2015 by Petr Zemek <s3rvac@gmail.com> and contributors
# License:   MIT, see the LICENSE file for more details
#

"""Access to the decompiler (decompilation of files)."""

from retdec.decompilation import Decompilation
from retdec.exceptions import MissingParameterError
from retdec.file import File
from retdec.service import Service


[docs]class Decompiler(Service): """Access to the decompilation service."""
[docs] def start_decompilation(self, **kwargs): """Starts a decompilation with the given parameters. :param input_file: File to be analyzed (**required**). :type input_file: str or file-like object :param pdb_file: A PDB file associated with `input_file` containing debugging information. :type pdb_file: str or file-like object :param mode: Decompilation mode. :type mode: str :param target_language: Target high-level language. :type target_language: str :param graph_format: Format of the generated call and control-flow graphs. :type graph_format: str :param decomp_var_names: Naming style for variables. :type decomp_var_names: str :param decomp_optimizations: Level of optimizations performed by the decompiler. :type decomp_optimizations: str :param decomp_unreach_funcs: Should all functions be decompiled, even if they are not reachable from the main function? :type decomp_unreach_funcs: bool :param decomp_emit_addresses: Should addresses in comments be emitted in the generated code? :type decomp_emit_addresses: bool :param architecture: Architecture. The precise meaning depends on the used `mode`. :type architecture: str :param file_format: File format. File format to be used when compiling input C source files. :type file_format: str :param comp_compiler: Compiler to be used when compiling input C source files. :type comp_compiler: str :param comp_optimizations: Compiler optimizations to be used when compiling input C source files. :type comp_optimizations: str :param comp_debug: Should the input C source file be compiled with debugging information? :type comp_debug: bool :param comp_strip: Should the compiled input C source file be stripped? :type comp_strip: bool :param sel_decomp_funcs: Decompile only the selected functions. It can be either an iterable of function names (e.g. ``['func1', 'func2']``) or a string with comma-separated function names (e.g. ``'func1, func2'``). :type sel_decomp_funcs: str/iterable :param sel_decomp_ranges: Decompile only the selected address ranges. It can be either an iterable of ranges (e.g. ``[(0x100, 0x200), (0x400, 0x500)]``) or a string with comma-separated ranges (e.g. ``'0x100-0x200,0x400-0x500'``). :type sel_decomp_ranges: str/iterable :param sel_decomp_decoding: What instructions should be decoded when either `sel_decomp_funcs` or `sel_decomp_ranges` is given? :type sel_decomp_decoding: str :param endian: Endianness of the machine code (``'little'`` or ``'big'``). Only for the ``raw`` `mode`. :type endian: str :param raw_entry_point: Virtual memory address where execution flow should start in the raw machine code. Only for the ``raw`` `mode`. :type raw_entry_point: str :param raw_section_vma: Address where the section created from the raw machine code will be placed in virtual memory. Only for the ``raw`` `mode`. :type raw_section_vma: str :param ar_index: Index of the object file in the input archive to be decompiled when decompiling an archive. :type ar_index: int/str :param ar_name: Name of the object file in the input archive to be decompiled when decompiling an archive. :type ar_name: str :param generate_cg: Should a call graph be generated? :type generate_cg: bool :param generate_cfgs: Should control-flow graphs for all functions be generated? :type generate_cfgs: bool :param generate_archive: Should an archive containing all outputs from the decompilation be generated? :type generate_archive: bool :returns: Started decompilation (:class:`~retdec.decompilation.Decompilation`). If `mode` is not given, it is automatically determined based on the name of ``input_file``. If the input file ends with ``.c`` or ``.C``, the `mode` is set to ``c``. Otherwise, the `mode` is set to ``bin``. See the `official documentation <https://retdec.com/api/docs/decompiler.html#decompilation-parameters>`_ for more information about the parameters. """ conn = self._create_new_api_connection('/decompiler/decompilations') id = self._start_decompilation(conn, kwargs) return Decompilation(id, conn)
def _start_decompilation(self, conn, kwargs): """Starts a decompilation with the given parameters. :param retdec.conn.APIConnection conn: Connection to the API to be used for sending API requests. :param dict kwargs: Parameters for the decompilation. :returns: Unique identifier of the decompilation. """ files = { 'input': self._get_input_file(kwargs) } self._add_pdb_file_when_given(files, kwargs) params = { 'mode': self._get_mode_param(files['input'], kwargs) } self._add_param_when_given('target_language', params, kwargs) self._add_param_when_given('graph_format', params, kwargs) self._add_param_when_given('decomp_var_names', params, kwargs) self._add_param_when_given('decomp_optimizations', params, kwargs) self._add_param_when_given('decomp_unreach_funcs', params, kwargs) self._add_param_when_given('decomp_emit_addresses', params, kwargs) self._add_param_when_given('architecture', params, kwargs) self._add_param_when_given('file_format', params, kwargs) self._add_param_when_given('comp_compiler', params, kwargs) self._add_param_when_given('comp_debug', params, kwargs) self._add_param_when_given('comp_strip', params, kwargs) self._add_param_when_given('comp_optimizations', params, kwargs) self._add_sel_decomp_funcs_param_when_given(params, kwargs) self._add_sel_decomp_ranges_param_when_given(params, kwargs) self._add_param_when_given('sel_decomp_decoding', params, kwargs) self._add_endian_param_when_given(params, kwargs) self._add_param_when_given('raw_entry_point', params, kwargs) self._add_param_when_given('raw_section_vma', params, kwargs) self._add_param_when_given('ar_index', params, kwargs) self._add_param_when_given('ar_name', params, kwargs) self._add_param_when_given('generate_archive', params, kwargs) self._add_param_when_given('generate_cg', params, kwargs) self._add_param_when_given('generate_cfgs', params, kwargs) response = conn.send_post_request(files=files, params=params) return response['id'] def _get_input_file(self, kwargs): """Returns the input file to be decompiled.""" try: return File(kwargs['input_file']) except KeyError: raise MissingParameterError('input_file') def _add_pdb_file_when_given(self, files, kwargs): """Adds a PDB file to `files` when it was given.""" pdb_file = kwargs.get('pdb_file') if pdb_file is not None: files['pdb'] = File(pdb_file) def _get_mode_param(self, input_file, kwargs): """Returns a decompilation mode to be used.""" return self._get_param( 'mode', kwargs, choices={'c', 'bin', 'raw'}, default=self._get_default_mode(input_file) ) def _get_default_mode(self, input_file): """Returns a default decompilation mode to be used based on the given input file's name. """ return 'c' if input_file.name.lower().endswith('.c') else 'bin' def _add_sel_decomp_funcs_param_when_given(self, params, kwargs): """Adds the ``sel_decomp_funcs`` parameter to `params` when given in `kwargs`. """ value = kwargs.get('sel_decomp_funcs') if value is not None: if not isinstance(value, str): value = ','.join(value) params['sel_decomp_funcs'] = value def _add_sel_decomp_ranges_param_when_given(self, params, kwargs): """Adds the ``sel_decomp_ranges`` parameter to `params` when given in `kwargs`. """ def ranges2str(ranges): return ','.join( range2str(range) for range in ranges ) def range2str(range): if isinstance(range, tuple): assert len(range) == 2, 'invalid range: {}'.format(range) return '{}-{}'.format( address2str(range[0]), address2str(range[1]) ) return str(range) def address2str(address): if isinstance(address, int): return hex(address) return str(address) value = kwargs.get('sel_decomp_ranges') if value is not None: if not isinstance(value, str): value = ranges2str(value) params['sel_decomp_ranges'] = value def _add_endian_param_when_given(self, params, kwargs): """Adds the ``endian`` parameter to `params` when given in `kwargs`. """ # Since RetDec 2.2, the 'raw_endian' parameter has been renamed to # 'endian'. However, the original name should still be supported. endian = kwargs.get('endian', kwargs.get('raw_endian')) if endian is not None: params['endian'] = endian def __repr__(self): return '<{} api_url={!r}>'.format( __name__ + '.' + self.__class__.__name__, self.api_url )