Files
mesa/.gitlab-ci/lava/utils/log_section.py
Valentine Burley f6dce6dee1 ci: Add a minimal Alpine container for running LAVA jobs
Compared to the existing Debian-based x86_64_pyutils container, this
Alpine-based variant reduces the image size by approximately 83%.

Include all the necessary python artifacts, including lava_job_submitter
in the container to avoid having to download them at the start of each
test job.

Signed-off-by: Valentine Burley <valentine.burley@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34980>
2025-05-26 17:25:40 +00:00

150 lines
5.5 KiB
Python

# When changing this file, you need to bump the following
# .gitlab-ci/image-tags.yml tags:
# ALPINE_X86_64_LAVA_TRIGGER_TAG
import re
from dataclasses import dataclass
from datetime import datetime, timedelta
from enum import Enum, auto
from os import getenv
from typing import Optional, Pattern, Union
from lava.utils.gitlab_section import GitlabSection
class LogSectionType(Enum):
UNKNOWN = auto()
LAVA_SUBMIT = auto()
LAVA_QUEUE = auto()
LAVA_DEPLOY = auto()
LAVA_BOOT = auto()
TEST_SUITE = auto()
TEST_CASE = auto()
LAVA_POST_PROCESSING = auto()
# How long to wait whilst we try to submit a job; make it fairly short,
# since the job will be retried.
LAVA_SUBMIT_TIMEOUT = int(getenv("LAVA_SUBMIT_TIMEOUT", 5))
# How long should we wait for a device to become available?
# For post-merge jobs, this should be ~infinite, but we can fail more
# aggressively for pre-merge.
LAVA_QUEUE_TIMEOUT = int(getenv("LAVA_QUEUE_TIMEOUT", 60))
# How long should we wait for a device to be deployed?
# The deploy involves downloading and decompressing the kernel, modules, dtb and the overlays.
# We should retry, to overcome network issues.
LAVA_DEPLOY_TIMEOUT = int(getenv("LAVA_DEPLOY_TIMEOUT", 5))
# Empirically, successful device deploy+boot in LAVA time takes less than 3 minutes.
# LAVA itself is configured to attempt `failure_retry` times (NUMBER_OF_ATTEMPTS_LAVA_BOOT) to boot
# the device.
# It is better to retry the boot than cancel the job and re-submit to avoid
# the enqueue delay.
LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 5))
# Estimated overhead in minutes for a job from GitLab to reach the test phase,
# including LAVA scheduling and boot duration
LAVA_TEST_OVERHEAD_MIN = int(getenv("LAVA_TEST_OVERHEAD_MIN", 5))
# CI_JOB_TIMEOUT in full minutes, no reason to use seconds here
CI_JOB_TIMEOUT_MIN = int(getenv("CI_JOB_TIMEOUT")) // 60
# Sanity check: we need more job time than the LAVA estimated overhead
assert CI_JOB_TIMEOUT_MIN > LAVA_TEST_OVERHEAD_MIN, (
f"CI_JOB_TIMEOUT in full minutes ({CI_JOB_TIMEOUT_MIN}) must be greater than LAVA_TEST_OVERHEAD ({LAVA_TEST_OVERHEAD_MIN})"
)
# Test suite phase is where initialization occurs on both the DUT and the Docker container.
# The device will be listening to the SSH session until the end of the job.
LAVA_TEST_SUITE_TIMEOUT = CI_JOB_TIMEOUT_MIN - LAVA_TEST_OVERHEAD_MIN
# Test cases may take a long time, this script has no right to interrupt
# them. But if the test case takes almost 1h, it will never succeed due to
# Gitlab job timeout.
LAVA_TEST_CASE_TIMEOUT = CI_JOB_TIMEOUT_MIN - LAVA_TEST_OVERHEAD_MIN
# LAVA post processing may refer to a test suite teardown, or the
# adjustments to start the next test_case
LAVA_POST_PROCESSING_TIMEOUT = int(getenv("LAVA_POST_PROCESSING_TIMEOUT", 5))
FALLBACK_GITLAB_SECTION_TIMEOUT = timedelta(minutes=10)
DEFAULT_GITLAB_SECTION_TIMEOUTS = {
LogSectionType.LAVA_SUBMIT: timedelta(minutes=LAVA_SUBMIT_TIMEOUT),
LogSectionType.LAVA_QUEUE: timedelta(minutes=LAVA_QUEUE_TIMEOUT),
LogSectionType.LAVA_DEPLOY: timedelta(minutes=LAVA_DEPLOY_TIMEOUT),
LogSectionType.LAVA_BOOT: timedelta(minutes=LAVA_BOOT_TIMEOUT),
LogSectionType.TEST_SUITE: timedelta(minutes=LAVA_TEST_SUITE_TIMEOUT),
LogSectionType.TEST_CASE: timedelta(minutes=LAVA_TEST_CASE_TIMEOUT),
LogSectionType.LAVA_POST_PROCESSING: timedelta(
minutes=LAVA_POST_PROCESSING_TIMEOUT
),
}
@dataclass(frozen=True)
class LogSection:
regex: Union[Pattern, str]
levels: tuple[str]
section_id: str
section_header: str
section_type: LogSectionType
collapsed: bool = False
def from_log_line_to_section(
self, lava_log_line: dict[str, str], main_test_case: Optional[str],
timestamp_relative_to: Optional[datetime]
) -> Optional[GitlabSection]:
if lava_log_line["lvl"] not in self.levels:
return
if match := re.search(self.regex, lava_log_line["msg"]):
section_id = self.section_id.format(*match.groups())
section_header = self.section_header.format(*match.groups())
is_main_test_case = section_id == main_test_case
return GitlabSection(
id=section_id,
header=section_header,
type=self.section_type,
start_collapsed=self.collapsed,
suppress_start=is_main_test_case,
suppress_end=is_main_test_case,
timestamp_relative_to=timestamp_relative_to,
)
LOG_SECTIONS = (
LogSection(
regex=re.compile(r"start: 2 (\S+) \(timeout ([^)]+)\).*"),
levels=("info"),
section_id="{}",
section_header="Booting via {}",
section_type=LogSectionType.LAVA_BOOT,
collapsed=True,
),
LogSection(
regex=re.compile(r"<?STARTTC>? ([^>]*)"),
levels=("target", "debug"),
section_id="{}",
section_header="test_case {}",
section_type=LogSectionType.TEST_CASE,
collapsed=True,
),
LogSection(
regex=re.compile(r"<?STARTRUN>? ([^>]*ssh.*server.*)"),
levels=("debug"),
section_id="{}",
section_header="Setting up hardware device for remote control",
section_type=LogSectionType.TEST_SUITE,
collapsed=True,
),
LogSection(
regex=re.compile(r"ENDTC>? ([^>]+)"),
levels=("target", "debug"),
section_id="post-{}",
section_header="Post test_case {}",
section_type=LogSectionType.LAVA_POST_PROCESSING,
collapsed=True,
),
)