"""The STIX shifter data source package provides access to data sources via
`stix-shifter`_.
The STIX Shifter interface can reach multiple data sources. The user needs to
provide one *profile* per data source. The profile name (case insensitive) will
be used in the ``FROM`` clause of the Kestrel ``GET`` command, e.g., ``newvar =
GET entity-type FROM stixshifter://profilename WHERE ...``. Kestrel runtime
will load profiles from 3 places (the later will override the former):
#. stix-shifter interface config file (only when a Kestrel session starts):
Put your profiles in the stix-shifter interface config file (YAML):
- Default path: ``~/.config/kestrel/stixshifter.yaml``.
- A customized path specified in the environment variable ``KESTREL_STIXSHIFTER_CONFIG``.
Example of stix-shifter interface config file containing profiles:
.. code-block:: yaml
profiles:
host101:
connector: elastic_ecs
connection:
host: elastic.securitylog.company.com
port: 9200
selfSignedCert: false # this means do NOT check cert
indices: host101
config:
auth:
id: VuaCfGcBCdbkQm-e5aOx
api_key: ui2lp2axTNmsyakw9tvNnw
host102:
connector: qradar
connection:
host: qradar.securitylog.company.com
port: 443
config:
auth:
SEC: 123e4567-e89b-12d3-a456-426614174000
host103:
connector: cbcloud
connection:
host: cbcloud.securitylog.company.com
port: 443
config:
auth:
org-key: D5DQRHQP
token: HT8EMI32DSIMAQ7DJM
#. environment variables (only when a Kestrel session starts):
Three environment variables are required for each profile:
- ``STIXSHIFTER_PROFILENAME_CONNECTOR``: the STIX Shifter connector name,
e.g., ``elastic_ecs``.
- ``STIXSHIFTER_PROFILENAME_CONNECTION``: the STIX Shifter `connection
<https://github.com/opencybersecurityalliance/stix-shifter/blob/master/OVERVIEW.md#connection>`_
object in JSON string.
- ``STIXSHIFTER_PROFILENAME_CONFIG``: the STIX Shifter `configuration
<https://github.com/opencybersecurityalliance/stix-shifter/blob/master/OVERVIEW.md#configuration>`_
object in JSON string.
Example of environment variables for a profile:
.. code-block:: console
$ export STIXSHIFTER_HOST101_CONNECTOR=elastic_ecs
$ export STIXSHIFTER_HOST101_CONNECTION='{"host":"elastic.securitylog.company.com", "port":9200, "indices":"host101"}'
$ export STIXSHIFTER_HOST101_CONFIG='{"auth":{"id":"VuaCfGcBCdbkQm-e5aOx", "api_key":"ui2lp2axTNmsyakw9tvNnw"}}'
#. any in-session edit through the ``CONFIG`` command.
If you launch Kestrel in debug mode, stix-shifter debug mode is still not
enabled by default. To record debug level logs of stix-shifter, create
environment variable ``KESTREL_STIXSHIFTER_DEBUG`` with any value.
.. _stix-shifter: https://github.com/opencybersecurityalliance/stix-shifter
"""
import sys
import json
import time
import copy
import logging
import importlib
import subprocess
import requests
from lxml import html
from stix_shifter.stix_translation import stix_translation
from stix_shifter.stix_transmission import stix_transmission
from kestrel.utils import mkdtemp
from kestrel.datasource import AbstractDataSourceInterface
from kestrel.datasource import ReturnFromFile
from kestrel.exceptions import DataSourceError, DataSourceManagerInternalError
from kestrel_datasource_stixshifter.config import (
RETRIEVAL_BATCH_SIZE,
get_datasource_from_profiles,
load_profiles,
set_stixshifter_logging_level,
)
_logger = logging.getLogger(__name__)
XPATH_PYPI_PKG_HOME = "/html/body/main/div[4]/div/div/div[1]/div[2]/ul/li[1]/a/@href"
XPATH_PYPI_PKG_SOURCE = "/html/body/main/div[4]/div/div/div[1]/div[2]/ul/li[2]/a/@href"
STIX_SHIFTER_HOMEPAGE = "https://github.com/opencybersecurityalliance/stix-shifter"
def check_module_availability(connector_name):
try:
importlib.import_module(
"stix_shifter_modules." + connector_name + ".entry_point"
)
except:
_logger.info(f'miss stix-shifter connector "{connector_name}"')
package_name = "stix-shifter-modules-" + connector_name.replace("_", "-")
_logger.debug(f"guess the connector package name: {package_name}")
_logger.debug(
"go to PyPI to verify whether the package is genuine from stix-shifter project"
)
try:
pypi_response = requests.get(f"https://pypi.org/project/{package_name}")
pypi_etree = html.fromstring(pypi_response.content)
except:
raise DataSourceError(
f'STIX shifter connector for "{connector_name}" is not installed '
f'and Kestrel guessed Python package name "{package_name}" but could not connect to PyPI to verify its genuineness',
"please manually install the correct stix-shifter connector Python package.",
)
try:
p_homepage = pypi_etree.xpath(XPATH_PYPI_PKG_HOME)[0]
p_source = pypi_etree.xpath(XPATH_PYPI_PKG_SOURCE)[0]
except:
raise DataSourceError(
f'STIX shifter connector for "{connector_name}" is not installed '
f'and Kestrel guessed Python package name "{package_name}" but could not verify its genuineness due to PyPI design change',
"please find the correct stix-shifter connector Python package to install. "
"And report to Kestrel developers about this package verification failure",
)
if p_homepage != STIX_SHIFTER_HOMEPAGE or p_source != STIX_SHIFTER_HOMEPAGE:
raise DataSourceError(
f'STIX shifter connector for "{connector_name}" is not installed '
f'and Kestrel found Python package "{package_name}" is not a genuine stix-shifter package',
"please find the correct stix-shifter connector Python package to install. "
f"And report to Kestrel developers about this malicious package",
)
_logger.info(
f'find Python package "{package_name}" for the missing stix-shifter connector "{connector_name}".'
)
_logger.info(f'install Python package "{package_name}".')
try:
subprocess.check_call(
[sys.executable, "-m", "pip", "install", package_name]
)
except:
_logger.info("package installation with 'pip' failed.")
try:
importlib.import_module(
"stix_shifter_modules." + connector_name + ".entry_point"
)
except:
raise DataSourceError(
f'STIX shifter connector for "{connector_name}" is not installed '
f'and Kestrel failed to install the possible Python package "{package_name}"',
"please manually install the corresponding STIX shifter connector Python package.",
)
[docs]class StixShifterInterface(AbstractDataSourceInterface):
[docs] @staticmethod
def schemes():
"""STIX Shifter data source interface only supports ``stixshifter://`` scheme."""
return ["stixshifter"]
[docs] @staticmethod
def list_data_sources(config):
"""Get configured data sources from environment variable profiles."""
if not config:
config["profiles"] = load_profiles()
data_sources = list(config["profiles"].keys())
data_sources.sort()
return data_sources
[docs] @staticmethod
def query(uri, pattern, session_id, config):
"""Query a stixshifter data source."""
scheme, _, profile = uri.rpartition("://")
profiles = profile.split(",")
if not config:
config["profiles"] = load_profiles()
if scheme != "stixshifter":
raise DataSourceManagerInternalError(
f"interface {__package__} should not process scheme {scheme}"
)
set_stixshifter_logging_level()
ingestdir = mkdtemp()
query_id = ingestdir.name
bundles = []
_logger.debug(f"prepare query with ID: {query_id}")
for i, profile in enumerate(profiles):
# STIX-shifter will alter the config objects, thus making them not reusable.
# So only give stix-shifter a copy of the configs.
# Check `modernize` functions in the `stix_shifter_utils` for details.
(connector_name, connection_dict, configuration_dict,) = map(
copy.deepcopy, get_datasource_from_profiles(profile, config["profiles"])
)
check_module_availability(connector_name)
data_path_striped = "".join(filter(str.isalnum, profile))
ingestfile = ingestdir / f"{i}_{data_path_striped}.json"
query_metadata = json.dumps(
{"id": "identity--" + query_id, "name": connector_name}
)
translation = stix_translation.StixTranslation()
transmission = stix_transmission.StixTransmission(
connector_name, connection_dict, configuration_dict
)
dsl = translation.translate(
connector_name, "query", query_metadata, pattern, {}
)
if "error" in dsl:
raise DataSourceError(
f"STIX-shifter translation failed with message: {dsl['error']}"
)
_logger.debug(f"STIX pattern to query: {pattern}")
_logger.debug(f"translate results: {dsl}")
# query results should be put together; when translated to STIX, the relation between them will remain
connector_results = []
for query in dsl["queries"]:
search_meta_result = transmission.query(query)
if search_meta_result["success"]:
search_id = search_meta_result["search_id"]
if transmission.is_async():
time.sleep(1)
status = transmission.status(search_id)
if status["success"]:
while (
status["progress"] < 100
and status["status"] == "RUNNING"
):
status = transmission.status(search_id)
else:
stix_shifter_error_msg = (
status["error"]
if "error" in status
else "details not avaliable"
)
raise DataSourceError(
f"STIX-shifter transmission.status() failed with message: {stix_shifter_error_msg}"
)
result_retrieval_offset = 0
has_remaining_results = True
while has_remaining_results:
result_batch = transmission.results(
search_id, result_retrieval_offset, RETRIEVAL_BATCH_SIZE
)
if result_batch["success"]:
new_entries = result_batch["data"]
if new_entries:
connector_results += new_entries
result_retrieval_offset += RETRIEVAL_BATCH_SIZE
if len(new_entries) < RETRIEVAL_BATCH_SIZE:
has_remaining_results = False
else:
has_remaining_results = False
else:
stix_shifter_error_msg = (
result_batch["error"]
if "error" in result_batch
else "details not avaliable"
)
raise DataSourceError(
f"STIX-shifter transmission.results() failed with message: {stix_shifter_error_msg}"
)
else:
stix_shifter_error_msg = (
search_meta_result["error"]
if "error" in search_meta_result
else "details not avaliable"
)
raise DataSourceError(
f"STIX-shifter transmission.query() failed with message: {stix_shifter_error_msg}"
)
_logger.debug("transmission succeeded, start translate back to STIX")
stixbundle = translation.translate(
connector_name,
"results",
query_metadata,
json.dumps(connector_results),
{},
)
_logger.debug(f"dumping STIX bundles into file: {ingestfile}")
with ingestfile.open("w") as ingest:
json.dump(stixbundle, ingest, indent=4)
bundles.append(str(ingestfile.expanduser().resolve()))
return ReturnFromFile(query_id, bundles)